Python 爬取网页数据,再插入到数据库中,再从数据库中查询需要的数据,统计后最后导出到Excel
1 # _*_ coding: utf-8 _*_ 2 import json 3 import openpyxl 4 import mysql.connector 5 import requests 6 7 # 创建数据库连接 8 from openpyxl.styles import Border, Side, Alignment, PatternFill 9 10 conn = mysql.connector.connect( 11 host="127.0.0.1", # MySQL服务器地址 12 user="root", # 用户名 13 port="3306", # 端口号 14 password="123456", # 密码 15 database="reptile_data" # 数据库名称 16 ) 17 curs = conn.cursor() 18 date = "2024-01-01" 19 http_url = f"https://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=kl8&issueCount=&issueStart=&issueEnd=&dayStart={date}&dayEnd=&pageNo=1&pageSize=500&week=&systemType=PC" 20 21 headers = { 22 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36', 23 'Cookie': 'ustat=__180.111.43.199_1717465915_0.83147200; genTime=1717465915; vt=99; Hm_lvt_1b638dec39a4afcbb57dcdf017cd0625=1720417504,1720487365,1720580513,1721704260; HMACCOUNT=8A3F32129C1DE404; Hm_lpvt_1b638dec39a4afcbb57dcdf017cd0625=1721704288' 24 } 25 response = requests.get(url=http_url, headers=headers) 26 info_list = response.json()['result'] 27 print("===========================>>>>>>解析数据结果<<<<<<===========================") 28 29 total_list = [] 30 31 32 # 查询数据结果 33 def select(): 34 find_ball = [] 35 for item in range(1, 81): 36 find_ball.clear() 37 temp_dic = {} 38 sql = "select count(*) as total from ticket where ball1=%s or ball2=%s or ball3=%s or ball4=%s or ball5=%s or ball6=%s or ball7=%s or ball8=%s or ball9=%s or ball10=%s or ball11=%s or ball12=%s or ball13=%s or ball14=%s or ball15=%s or ball16=%s or ball17=%s or ball18=%s or ball19=%s or ball20=%s" 39 find_ball.append(item) 40 result = find_ball * 20 41 curs.execute(sql, result) 42 search = curs.fetchall() 43 temp_dic['key'] = item 44 temp_dic['value'] = search[0][0] 45 total_list.append(temp_dic) 46 str_json = json.dumps(total_list) 47 print(str_json) 48 insert_total_data(total_list) 49 curs.close() 50 conn.close() 51 52 53 # 统计 54 def insert_total_data(all_list): 55 for item in all_list: 56 sql = "INSERT INTO ticket_total(ball, total, flag, remark) VALUES (%s, %s, %s, %s)" 57 key = item["key"] 58 value = item["value"] 59 curs.execute(sql, (key, value, 0, "")) 60 conn.commit() 61 curs.close() 62 conn.close() 63 print("~~~~~~~~~~~~~~>>>>统计数据插入完成<<<<~~~~~~~~~~~~~~") 64 65 66 # 插入数到数据库 67 def insert(): 68 for item in info_list: 69 _code = item['code'] 70 _date = item['date'] 71 _ball = item['red'] 72 _eve = str(item['red']).split(",") 73 sql = "INSERT INTO ticket(code, date_time, ticket_number, ball1, ball2, ball3, ball4, ball5, ball6, ball7, ball8, ball9, ball10, ball11, ball12, ball13, ball14, ball15, ball16, ball17, ball18, ball19, ball20, flag, remark) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" 74 curs.execute(sql, ( 75 _code, _date, _ball, _eve[0], _eve[1], _eve[2], _eve[3], _eve[4], _eve[5], _eve[6], _eve[7], _eve[8], 76 _eve[9], _eve[10], _eve[11], _eve[12], _eve[13], _eve[14], _eve[15], _eve[16], _eve[17], _eve[18], _eve[19], 77 0, "")) 78 conn.commit() 79 print("~~~~~~~~~~~~~~~~~>>>>>>插入完成<<<<<<~~~~~~~~~~~~~~~~~") 80 curs.close() 81 conn.close() 82 83 84 # 修改数据 85 def search_total_data(): 86 sql = "select ball as '序号',total as '总数',flag as '状态' from ticket_total" 87 curs.execute(sql) 88 rows = curs.fetchall() 89 # 返回值fields是一个元组,其中的每一项元素也是一个元组(子元组)| 这个子元组的第一个元素是字段名 90 field = curs.description 91 curs.close() 92 conn.close() 93 return field, rows 94 95 96 # 导出统计表中的数据到Excel中 97 def export_total_excel(field, table_row, filename): 98 workbook = openpyxl.Workbook() 99 sheet = workbook.create_sheet('统计表', 0) 100 # 设置单元格的居中位置 101 header_align = Alignment(horizontal='center', vertical='center') 102 content_align = Alignment(horizontal='center', vertical='center') 103 # 设置单元格的填充颜色 104 header_fill = PatternFill(patternType='solid', fgColor='E9C342') 105 content_fill = PatternFill(patternType='solid', fgColor='43DE88') 106 # 设置单元格边框样式 107 header_border = Border(bottom=Side('thin'), right=Side('thin'), left=Side('thin'), top=Side('thin')) 108 content_border = Border(bottom=Side('thin'), right=Side('thin'), left=Side('thin'), top=Side('thin')) 109 # 遍历单元格数据 110 for i in range(0, len(field)): 111 cell = sheet.cell(1, i + 1, field[i][0]) 112 cell.alignment = header_align 113 cell.border = header_border 114 cell.fill = header_fill 115 for row in range(0, len(table_row)): 116 for col in range(0, len(field)): 117 sol = sheet.cell(row + 2, col + 1, '%s' % table_row[row][col]) 118 sol.alignment = content_align 119 sol.border = content_border 120 sol.fill = content_fill 121 # 保存数据到Excel中 122 workbook.save(filename) 123 # 关闭Excel工作表 124 workbook.close() 125 126 127 if __name__ == '__main__': 128 # insert() 129 # select() 130 print("============数据导出============") 131 fields, table_rows = search_total_data() 132 export_total_excel(fields, table_rows, './total.xlsx')