常用的小脚本

1、检测表格中的网站是否能v6访问,是否有AAAA记录

 1 #coding:utf8
 2 import pycurl,xlrd,socket,xlwt
 3 from io import BytesIO
 4 from xlutils.copy import copy
 5 import threadpool
 6 import threading
 7 
 8 
 9 look = threading.Lock()
10 #检测站点是否可以使用v6访问
11 def web_can_ipv6(url):
12     buffer = BytesIO()
13     c = pycurl.Curl()
14     c.setopt(pycurl.OPT_CERTINFO, 1)
15     c.setopt(pycurl.URL, "http://"+url)
16     #设置只使用v6访问
17     c.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_V6)
18     c.setopt(c.WRITEDATA, buffer)
19     try:
20         c.perform()
21         return c.getinfo(pycurl.HTTP_CODE)
22     except:
23         return None
24 
25 #读取表格内容
26 def read_excel():
27     web_list = []
28     # 打开文件
29     workbook = xlrd.open_workbook(r'webs.xls')
30     # 获取所有sheet
31     sheet1 = workbook.sheet_by_name('Sheet1')
32     for row_n in range(1,sheet1.nrows):
33         row = sheet1.row_values(row_n)
34         web_list.append(row)
35     return web_list
36 
37 def write_excel(row,col,text):
38     rb = xlrd.open_workbook(r'webs.xls','wb')  # 打开weng.xls文件
39     wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
40     ws = wb.get_sheet(0)  # 获取表单0
41     ws.write(row, col, text)
42     wb.save(r'webs.xls')  # 保存文件
43 
44 #返回站点的AAAA记录
45 def get_dns(url):
46     try:
47         results = socket.getaddrinfo(url, None)
48         for item in results:
49             if ':' in item[4][0]:
50                 return item[4][0]
51     except:
52         print('%s-error'%url)
53 
54 
55 def main(web):
56     print('正在检测第%s个网站' % web[0])
57     write_excel(web[0],3,get_dns(web[2]))
58     if web_can_ipv6(web[2]):
59         write_excel(web[0],4, web_can_ipv6(web[2]))
60 
61 
62 
63 
64 
65 if __name__ == '__main__':
66     for web in read_excel():
67         main(web)
View Code

 2、检测表格中的网站的返回码,归属运营商,多线程

总共一万多条数据的表格,采用分段处理的方式,大大增加处理速度

  1 ##coding:utf8
  2 import threadpool
  3 import threading
  4 import xlrd,xlwt,requests,socket
  5 from xlutils.copy import copy
  6 from IPy import IP
  7 from contextlib import closing
  8 from concurrent.futures import ThreadPoolExecutor,wait
  9 
 10 lock = threading.Lock()
 11 pool = threadpool.ThreadPool(20)
 12 
 13 #获取网站返回码
 14 def get_web_code(ws,web):
 15     try:
 16         url = str(web[2]).strip()
 17         url = 'http://'+url
 18         headers = {
 19                 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) ',
 20             }
 21         r = requests.head(url=url,timeout=1,headers=headers)
 22 
 23         print('第%s行-->%s' % (web[0], str(r.status_code)))
 24 
 25         if is_url_in_cernet(get_dns(url)):
 26             write_excel(ws,int(web[0]), 4, 'CERNET')
 27         elif is_url_in_cmcc(get_dns(url)):
 28             write_excel(ws,int(web[0]), 4, 'CMCC')
 29         elif is_url_in_cucc(get_dns(url)):
 30             write_excel(ws,int(web[0]), 4, 'CUCC')
 31         elif is_url_in_ctcc(get_dns(url)):
 32             write_excel(ws,int(web[0]), 4, 'CTCC')
 33         else:
 34             write_excel(ws,int(web[0]), 4, 'UNKnow')
 35         write_excel(ws,int(web[0]),3,r.status_code)
 36         return r.status_code
 37     except Exception as e:
 38         write_excel(ws,int(web[0]),3,'ERROR')
 39         print('第%s行-->%s'%(web[0],e))
 40 
 41 
 42 #读取表格数据
 43 def read_excel():
 44     count=0
 45     web_list = []
 46     # 打开文件
 47     workbook = xlrd.open_workbook(r'webs.xls')
 48     # 获取所有sheet
 49     sheet1 = workbook.sheet_by_name('Sheet1')
 50     for row_n in range(1,sheet1.nrows):
 51         row = sheet1.row_values(row_n)
 52         web_list.append(row)
 53     n=100
 54     new_web_list = [web_list[i:i+n] for i in range(0,len(web_list),n)]
 55     return new_web_list
 56 
 57 
 58 # #表格写入数据
 59 # def write_excel(row,col,text):
 60 #     lock.acquire()
 61 #     try:
 62 #         rb = xlrd.open_workbook(r'webs.xls','wb')
 63 #         wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
 64 #         ws = wb.get_sheet(0)  # 获取表单0
 65 #         ws.write(row, col, text)
 66 #         wb.save(r'webs.xls')  # 保存文件
 67 #     except:
 68 #         pass
 69 #     finally:
 70 #         lock.release()
 71 
 72 
 73 #表格写入数据
 74 def write_excel(ws,row,col,text):
 75     lock.acquire()
 76     try:
 77         ws.write(row, col, text)
 78     except:
 79         pass
 80     finally:
 81         lock.release()
 82 
 83 
 84 #返回站点的A记录
 85 def get_dns(url):
 86     lock.acquire()
 87     if 'http://' or 'https://' in  url.lower():
 88         url = url.strip()
 89         url = url.lower()
 90         url = url.replace('http://','')
 91         url = url.replace('https://', '')
 92     if ':' in url:
 93         url = url.split(':')[0]
 94     try:
 95         results = socket.getaddrinfo(url, None)
 96         return results[0][4][0]
 97     except:
 98         return url
 99     finally:
100         lock.release()
101 
102 def is_url_in_cernet(url):
103     lock.acquire()
104     try:
105         with open('cernet_ip.txt','r') as rf:
106             for line in rf.readlines():
107                 if ';' in line:
108                     line = line.replace(';', '')
109                 if ':' in line:
110                     line = line.split(':')[0]
111                 ip_line = IP(line)
112                 if url in ip_line:
113                     return True
114     except:
115         pass
116     finally:
117         lock.release()
118 
119 
120 def is_url_in_cmcc(url):
121     lock.acquire()
122     try:
123         with open('CMCC_ip.txt','r') as rf:
124             for line in rf.readlines():
125                 if ';' in line:
126                     line = line.replace(';','')
127                 if ':' in line:
128                     line = line.split(':')[0]
129                 ip_line = IP(line)
130                 if url in ip_line:
131                     return True
132     except:
133         pass
134     finally:
135         lock.release()
136 
137 def is_url_in_cucc(url):
138     lock.acquire()
139     try:
140         with open('CUCC_ip.txt','r') as rf:
141             for line in rf.readlines():
142                 if ';' in line:
143                     line = line.replace(';', '')
144                 if ':' in line:
145                     line = line.split(':')[0]
146                 ip_line = IP(line)
147                 if url in ip_line:
148                     return True
149     except:
150         pass
151     finally:
152         lock.release()
153 
154 def is_url_in_ctcc(url):
155     lock.acquire()
156     try:
157         with open('CTCC_ip.txt','r') as rf:
158             for line in rf.readlines():
159                 if ';' in line:
160                     line = line.replace(';', '')
161                 if ':' in line:
162                     line = line.split(':')[0]
163                 ip_line = IP(line)
164                 if url in ip_line:
165                     return True
166     except:
167         pass
168     finally:
169         lock.release()
170 
171 
172 
173 
174 if __name__ == '__main__':
175     new_web_list = read_excel()
176     for web_list in new_web_list:
177         rb = xlrd.open_workbook(r'webs.xls','wb')
178         wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
179         ws = wb.get_sheet(0)  # 获取表单0
180         t_list = []
181         for web in web_list:
182             work = threading.Thread(target=get_web_code , args=(ws, web,))
183             work.start()
184             t_list.append(work)
185         for t in t_list:
186             t.join()
187         wb.save(r'webs.xls')
188     print('-----end-----')
189     # fs = []
190     # for web in web_list:
191     #     f = pool.submit(get_web_code, web)
192     #     fs.append(f)
193     # # 相遇与join,阻塞主线程
194     # wait(fs)
View Code

 3、将n个表格中的m个子表的数据全部汇总到一个总表中

测试是将30+个表个汇总,耗时不到2分钟

 1 # -*- coding: utf-8 -*- 
 2 import os,xlrd
 3 from xlutils.copy import copy
 4 import time,re
 5 
 6 #获取需要的表格
 7 def get_file():
 8     file_list=[]
 9     for root,dirs,files in os.walk(os.getcwd()):
10         for file in files:
11             if 'excel_run.' in file:
12                 pass
13             elif file=='total.xls':
14                 pass
15             elif file=='~$total.xls':
16                 pass
17             else:
18                 if re.search(r'/*.xls/*',file):
19                     file_list.append(file)
20     return file_list
21 
22 
23 #读取表格数据
24 def read_excel(filename,sheetindex):
25     web_list = []
26     # 打开文件
27     workbook = xlrd.open_workbook(r'%s'%filename)
28     # 获取所有sheet
29     sheet1 = workbook.sheet_by_index(sheetindex)
30     for row_n in range(2,sheet1.nrows):
31         row = sheet1.row_values(row_n)
32         web_list.append(row)
33     return web_list
34 
35 
36 #表格写入数据
37 def write_excel(sheetindex,file,count):
38     rb = xlrd.open_workbook(r'total.xls','wb')
39     wb = copy(rb)  # 利用xlutils.copy下的copy函数复制
40     ws = wb.get_sheet(sheetindex)  # 获取表单0
41     nrow = rb.sheet_by_index(sheetindex).nrows
42     for row in read_excel(file,sheetindex):
43         count+=1
44         for index_col,col in enumerate(row):
45             ws.write(nrow+count, index_col, col)
46     wb.save(r'total.xls')  # 保存文件
47 
48 
49 if __name__ == '__main__':
50     t1 = time.time()
51     for file in get_file():
52         for sheetindex in [1,2,4,5,6]:
53             count=-1
54             print('正在录入%s的第%s张表的数据' % (file, sheetindex))
55             write_excel(sheetindex,file,count)
56 
57     t2=time.time()
58     print('共耗时%s秒'%(t2-t1))
59     print('---end---')
View Code

 

posted @ 2018-12-20 16:36  隔壁古二蛋  阅读(232)  评论(0编辑  收藏  举报