爬虫 配置文件
# coding:utf-8 import random, re import json, time import uuid from bs4 import BeautifulSoup import threading import requests import MySQLdb from lxml import etree from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.proxy import ProxyType import redis r = redis.Redis(host="10.10.20.110", port=6379) import random import multiprocessing import os import urllib3 urllib3.disable_warnings() from HTMLParser import HTMLParser import sys reload(sys) sys.setdefaultencoding('utf-8') session = requests.session() import hashlib def md5(str): m = hashlib.md5() m.update(str) return m.hexdigest() def replace(newline): newline = str(newline) newline = newline.replace('\r','').replace('\n','').replace('\t','').replace('\r\n','').replace('\r\n\t','').replace('\n\t','').replace(' ','').replace('amp;','') re_comment = re.compile('<!--[^>]*-->') newlines = re_comment.sub('', newline) newlines = newlines.replace('<!--','').replace('-->','') return newlines def insert_data(dbName,data_dict): try: data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = dataTuple cursor.execute(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def insert_data1(dbName,data_dict): try: data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = dataTuple cursor.execute(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def insert_data_many(dbName,list_data_dict): try: # 得到列表的第一个字典集合 data_dict = list_data_dict[0] # 得到(s%,s%,s%,s%) data_values = "(" + "%s," * (len(data_dict)) + ")" data_values = data_values.replace(',)', ')') dbField = data_dict.keys() dataTuple = tuple(data_dict.values()) dbField = str(tuple(dbField)).replace("'",'') conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor() sql = """ insert into %s %s values %s """ % (dbName,dbField,data_values) params = [] for item in list_data_dict: params.append(tuple(item.values())) params = tuple(params) # print sql # print dbField # print params # print data_values cursor.executemany(sql, params) conn.commit() cursor.close() conn.close() print "===== 插入成功 =====" return 1 except Exception as e: print "******** 插入失败 ********" print e return 0 def select_data(sql): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) cursor.execute(sql) result = cursor.fetchall() resultList = [] for i in result: resultList.append(i) cursor.close() return resultList def select_data1(sql): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) cursor.execute(sql) result = cursor.fetchall() resultList = [] for i in result: resultList.append(i) cursor.close() return resultList def update_data(sql,data): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="epai_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) try: cursor.execute(sql,data) conn.commit() print "更新成功" except Exception as e: print e conn.rollback() conn.close() def update_data1(sql,data): conn = MySQLdb.connect(host="110.110.110.177", user="xuchunlin", passwd="123", db="liuyao_spider", charset="utf8") cursor = conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) try: cursor.execute(sql,data) conn.commit() print "更新成功" except Exception as e: print e conn.rollback() conn.close()
如果觉得对您有帮助,麻烦您点一下推荐,谢谢!
好记忆不如烂笔头
好记忆不如烂笔头
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 提示词工程——AI应用必不可少的技术