import re
import requests
import random
import time
import os.path
from bs4 import BeautifulSoup
import pymysql
user_agent_list = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
]
UA = random.choice(user_agent_list) ##从self.user_agent_list中随机取出一个字符串
headers = {'User-Agent': UA} ##构造成一个完整的User-Agent (UA代表的是上面随机取出来的字符串哦)
#提取扩展名函数
def file_extension(path):
return os.path.splitext(path)[1]
# 连接database
conn =pymysql.connect(user='root', password='1234' ,host='127.0.0.1',database='sucai')
#创建游标
cursor = conn.cursor()
#执行函数 返回受影响的函数
effect_rows = cursor.execute('select * from sucaix_copy2 where id>10851')
print("受影响的行数",effect_rows)
#提取所有结果
results = cursor.fetchall()
xx=1012642 #图片变化的起始数字
datalist=[]
for row in results:
datalist.append([row[0],row[2]])
#----------------------------------------
for row in datalist:
id = row[0]
rowcode = row[1]
soup = BeautifulSoup(rowcode, 'lxml')
img_url=soup.find_all('img')
for x in img_url:
#下载图片
url0=x.get('src')
print(url0)
response=requests.get(url0, headers=headers, timeout=5)
img_name=str(xx) + file_extension(url0) #方法1:提取图片名
#img_name=url0.split('/')[-1] #方法2:提取图片名
with open('C:\\Users\\mydell\\Desktop\\sucai365\\' + img_name,'wb') as f:
f.write(response.content)
rowcode=rowcode.replace(url0,'/20200708/'+img_name)
xx += 1
# 执行函数 返回受影响的函数
effect_rows = cursor.execute('update sucaix_copy2 set rowcode2=%s where id=%s',[rowcode,id])
conn.commit()
print(id)
conn.close()