获取全球dns统计信息
1 # -*- coding:UTF-8 -*- 2 import requests, time 3 import json 4 from bs4 import BeautifulSoup as bp 5 6 t3 = time.time() 7 ths = [] # 存放线程 8 9 10 def get(num): 11 n = str(num) 12 page = requests.post('http://www.employees.org/~dwing/aaaa-stats/', 13 ) # 自定义请求头,这些请求头内容是在浏览器上看到的 14 15 t = page.text 16 # print(t) 17 soup = bp(t, 'html.parser') # 使用beautifulsoup解析xml文件,解析html时,将xml改为lxml 18 all_body = soup.find_all('tr') # 查找EmailResult标签包含的所有内容,生成一个列表 19 for info in all_body: 20 if not info:continue 21 tds = info.find_all('td') 22 one_row = [] 23 for i in tds: 24 if not i:continue 25 for infos in i: 26 if not infos:continue 27 try: 28 nums = infos.text #所有的数字 29 if not nums:continue 30 nums = nums.strip().strip('\n') 31 # if len(nums) >= 13:continue 32 if len(nums) == 12: 33 total_checked = nums[0:5] 34 with_a_record = nums[5:] 35 one_row.append(total_checked) 36 one_row.append(with_a_record) 37 else: 38 one_row.append(nums) 39 # print(nums,'aaa') 40 except: 41 one_row.append(infos) 42 # print(infos, 'lll') # 百分数 43 44 if '\n' in one_row: 45 one_row.remove('\n') 46 if 'diffs' in one_row: 47 one_row.remove('diffs') 48 if '(large run)' in one_row: 49 continue 50 else: 51 if one_row: 52 if len(one_row) == 16: 53 all_data = {} 54 all_data['date'] = one_row[0] 55 all_data['total_checked'] = one_row[1] 56 all_data['with_A_records_count'] = one_row[2] 57 all_data['with_A_records_rate'] = one_row[3] 58 all_data['with_AAAA_records_count'] = one_row[4] 59 all_data['with_AAAA_records_rate'] = one_row[5] 60 all_data['AAAA_with_IPv4-mapped_count'] = one_row[6] 61 all_data['AAAA_with_IPv4-mapped_rate'] = one_row[7] 62 all_data['AAAA_with_loopback_count'] = one_row[8] 63 all_data['AAAA_with_loopback_rate'] = one_row[9] 64 all_data['valid_AAAA_records_count'] = one_row[10] 65 all_data['valid_AAAA_records_rate'] = one_row[11] 66 all_data['IPv6_connection_ok_count'] = one_row[12] 67 all_data['IPv6_connection_ok_rate'] = one_row[13] 68 all_data['IPv6_connection_failed_count'] = one_row[14] 69 all_data['IPv6_connection_failed_rate'] = one_row[15] 70 # print(one_row) 71 ddd = json.dumps(all_data, indent=2, 72 ensure_ascii=False) # ensure_ascii=False :防止将文字转成unicoe 73 74 with open('dns_status.txt', 'a+') as f: 75 f.write(ddd) 76 # print(i.text,'lll') 77 78 79 get(1) 80 81 t4 = time.time() 82 tt = t4 - t3 83 print(tt)
结果见github
分类:
爬虫
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· .NET Core 中如何实现缓存的预热?
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 如何调用 DeepSeek 的自然语言处理 API 接口并集成到在线客服系统
· 【译】Visual Studio 中新的强大生产力特性