Python 爬取网页数据,再插入到数据库中,再从数据库中查询需要的数据,统计后最后导出到Excel

  1 # _*_ coding: utf-8 _*_
  2 import json
  3 import openpyxl
  4 import mysql.connector
  5 import requests
  6 
  7 # 创建数据库连接
  8 from openpyxl.styles import Border, Side, Alignment, PatternFill
  9 
 10 conn = mysql.connector.connect(
 11     host="127.0.0.1",  # MySQL服务器地址
 12     user="root",  # 用户名
 13     port="3306",  # 端口号
 14     password="123456",  # 密码
 15     database="reptile_data"  # 数据库名称
 16 )
 17 curs = conn.cursor()
 18 date = "2024-01-01"
 19 http_url = f"https://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=kl8&issueCount=&issueStart=&issueEnd=&dayStart={date}&dayEnd=&pageNo=1&pageSize=500&week=&systemType=PC"
 20 
 21 headers = {
 22     'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36',
 23     'Cookie': 'ustat=__180.111.43.199_1717465915_0.83147200; genTime=1717465915; vt=99; Hm_lvt_1b638dec39a4afcbb57dcdf017cd0625=1720417504,1720487365,1720580513,1721704260; HMACCOUNT=8A3F32129C1DE404; Hm_lpvt_1b638dec39a4afcbb57dcdf017cd0625=1721704288'
 24 }
 25 response = requests.get(url=http_url, headers=headers)
 26 info_list = response.json()['result']
 27 print("===========================>>>>>>解析数据结果<<<<<<===========================")
 28 
 29 total_list = []
 30 
 31 
 32 # 查询数据结果
 33 def select():
 34     find_ball = []
 35     for item in range(1, 81):
 36         find_ball.clear()
 37         temp_dic = {}
 38         sql = "select count(*) as total from ticket where ball1=%s or ball2=%s or ball3=%s or ball4=%s or ball5=%s or ball6=%s or ball7=%s or ball8=%s or ball9=%s or ball10=%s or ball11=%s or ball12=%s or ball13=%s or ball14=%s or ball15=%s or ball16=%s or ball17=%s or ball18=%s or ball19=%s or ball20=%s"
 39         find_ball.append(item)
 40         result = find_ball * 20
 41         curs.execute(sql, result)
 42         search = curs.fetchall()
 43         temp_dic['key'] = item
 44         temp_dic['value'] = search[0][0]
 45         total_list.append(temp_dic)
 46     str_json = json.dumps(total_list)
 47     print(str_json)
 48     insert_total_data(total_list)
 49     curs.close()
 50     conn.close()
 51 
 52 
 53 # 统计
 54 def insert_total_data(all_list):
 55     for item in all_list:
 56         sql = "INSERT INTO  ticket_total(ball, total, flag, remark) VALUES (%s, %s, %s, %s)"
 57         key = item["key"]
 58         value = item["value"]
 59         curs.execute(sql, (key, value, 0, ""))
 60         conn.commit()
 61     curs.close()
 62     conn.close()
 63     print("~~~~~~~~~~~~~~>>>>统计数据插入完成<<<<~~~~~~~~~~~~~~")
 64 
 65 
 66 # 插入数到数据库
 67 def insert():
 68     for item in info_list:
 69         _code = item['code']
 70         _date = item['date']
 71         _ball = item['red']
 72         _eve = str(item['red']).split(",")
 73         sql = "INSERT INTO  ticket(code, date_time, ticket_number, ball1, ball2, ball3, ball4, ball5, ball6, ball7, ball8, ball9, ball10, ball11, ball12, ball13, ball14, ball15, ball16, ball17, ball18, ball19, ball20, flag, remark) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
 74         curs.execute(sql, (
 75             _code, _date, _ball, _eve[0], _eve[1], _eve[2], _eve[3], _eve[4], _eve[5], _eve[6], _eve[7], _eve[8],
 76             _eve[9], _eve[10], _eve[11], _eve[12], _eve[13], _eve[14], _eve[15], _eve[16], _eve[17], _eve[18], _eve[19],
 77             0, ""))
 78         conn.commit()
 79     print("~~~~~~~~~~~~~~~~~>>>>>>插入完成<<<<<<~~~~~~~~~~~~~~~~~")
 80     curs.close()
 81     conn.close()
 82 
 83 
 84 # 修改数据
 85 def search_total_data():
 86     sql = "select ball as '序号',total as '总数',flag as '状态' from ticket_total"
 87     curs.execute(sql)
 88     rows = curs.fetchall()
 89     # 返回值fields是一个元组,其中的每一项元素也是一个元组(子元组)| 这个子元组的第一个元素是字段名
 90     field = curs.description
 91     curs.close()
 92     conn.close()
 93     return field, rows
 94 
 95 
 96 # 导出统计表中的数据到Excel中
 97 def export_total_excel(field, table_row, filename):
 98     workbook = openpyxl.Workbook()
 99     sheet = workbook.create_sheet('统计表', 0)
100     # 设置单元格的居中位置
101     header_align = Alignment(horizontal='center', vertical='center')
102     content_align = Alignment(horizontal='center', vertical='center')
103     # 设置单元格的填充颜色
104     header_fill = PatternFill(patternType='solid', fgColor='E9C342')
105     content_fill = PatternFill(patternType='solid', fgColor='43DE88')
106     # 设置单元格边框样式
107     header_border = Border(bottom=Side('thin'), right=Side('thin'), left=Side('thin'), top=Side('thin'))
108     content_border = Border(bottom=Side('thin'), right=Side('thin'), left=Side('thin'), top=Side('thin'))
109     # 遍历单元格数据
110     for i in range(0, len(field)):
111         cell = sheet.cell(1, i + 1, field[i][0])
112         cell.alignment = header_align
113         cell.border = header_border
114         cell.fill = header_fill
115     for row in range(0, len(table_row)):
116         for col in range(0, len(field)):
117             sol = sheet.cell(row + 2, col + 1, '%s' % table_row[row][col])
118             sol.alignment = content_align
119             sol.border = content_border
120             sol.fill = content_fill
121     # 保存数据到Excel中
122     workbook.save(filename)
123     # 关闭Excel工作表
124     workbook.close()
125 
126 
127 if __name__ == '__main__':
128     # insert()
129     # select()
130     print("============数据导出============")
131     fields, table_rows = search_total_data()
132     export_total_excel(fields, table_rows, './total.xlsx')

 

posted @ 2024-07-25 11:06  逍遥散人95  阅读(8)  评论(0编辑  收藏  举报