from selenium import webdriver
import xlsxwriter as xw
from time import sleep
import time
import random
import requests
from lxml import html
import pymysql
from selenium.webdriver.common import window
def save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian):
dic = {}
dic["title"] = title
dic["updated_at"] = updated_at
dic["uid"] = uid
dic["address_id"] = address_id
dic["username"] = username
dic["show_money"] = show_money
dic["money"] = money
dic["mobile"] = mobile
dic["created_at"] = created_at
dic["prov_id"] = prov_id
dic["city_id"] = city_id
dic["coun_id"] = coun_id
dic["lng"] = lng
dic["lat"] = lat
dic["service_typeid"] = service_typeid
dic["`desc`"] = descs
dic["type"] = '2' # 1需求 2闲置物品 3车子 4房子
connection = pymysql.Connect(
host='140.210.4.73',
port=3306,
user='agr_sql',
passwd='bj@#agr_sql',
db='fa_admin',
charset='utf8mb4'
)
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'm_app_unused'
sql = """INSERT INTO {}(title,updated_at,uid,address_id,username,show_money,money,mobile,created_at,prov_id,city_id,coun_id,lng,lat,service_typeid,`desc`)\
VALUES (%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)
cursor.execute(sql, (
dic["title"], dic["updated_at"], dic["uid"], dic["address_id"], dic["username"], dic["show_money"],
dic["money"],
dic["mobile"],
dic["created_at"],
dic["prov_id"], dic["city_id"], dic["coun_id"], dic["lng"], dic["lat"],
dic["service_typeid"], dic["`desc`"]))
ir_idd = connection.insert_id()
print(ir_idd, type(ir_idd))
connection.commit()
print("<<<<<<<<<第一张表数据存储成功>>>>>>>>", )
dic["publish_id"] = ir_idd
# 循环提交多图
for li_tu in list_tupian:
if 'https:' in li_tu:
dic["url"] = li_tu
else:
dic["url"] = 'https:' + li_tu
print(dic['url'])
with connection.cursor() as cursor1:
# 创建sql语句
sheet_name1 = 'm_user_publish_file'
sql1 = """INSERT INTO {}(type, uid, created_at, url, publish_id)\
VALUES (%s, %s, %s, %s, %s)""".format(
sheet_name1)
cursor1.execute(sql1, (
dic["type"], dic["uid"], dic["created_at"], dic["url"], dic["publish_id"]
))
connection.commit()
print("<<<<<<<<<m_user_publish_file数据存储成功>>>>>>>>", )
with connection.cursor() as cursor2:
# 创建sql语句
sheet_name2 = 'm_user_publish'
sql2 = """INSERT INTO {}(type, uid, created_at, publish_id,prov_id,city_id,coun_id)\
VALUES ( %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name2)
cursor2.execute(sql2, (
dic["type"], dic["uid"], dic["created_at"], dic["publish_id"],
dic["prov_id"], dic["city_id"], dic["coun_id"],
))
connection.commit()
print("<<<<<<<<<m_user_publish数据存储成功>>>>>>>>", )
connection.close()
# 创建表
wbook = xw.Workbook('闲置物品.xlsx')
wsheet1 = wbook.add_worksheet('Sheet1') # 创建工作表
wsheet1.activate() # 激活表
title = ['title', 'money', '用户名', 'id', '星评', '产品名', '购买类型'] # 设置表头
wsheet1.write_row('A1', title) # 从A1单元格写入表头
i = 2 # 从第二行写入数据
option = webdriver.ChromeOptions()
# option.add_argument(r"user-data-dir=C:\Users\Administrator\AppData\Local\Chromium\Application") # 浏览器路径
# 初始化driver
browser = webdriver.Chrome(options=option)
for urlpage in range(1, 50):
# try:
# time.sleep(5)
print(f'********************************第{urlpage}页**********************************')
browser.get(
f'https://bj.58.com/shouji/pn{urlpage}/?PGTID=0d300024-0000-1ef3-2708-412acac48714&ClickID=2'
) # 第二页
browser.maximize_window()
for i in range(5):
time.sleep(2)
browser.execute_script('window.scrollBy(0,2200)')
html2 = html.etree.HTML(browser.page_source)
service_typeid2 = {
'家居日用': 1,
'住宅家具': 2,
'北京二手家具': 2,
'未拆(全新)': 4,
'生活电器': 36,
'电脑数码': 54,
' 北京二手台式机/配件': 54,
'手机3C': 55,
'五金工具': 56,
'宠物用品': 57,
'儿童碗具': 58,
'户外车品': 59,
'绘本书籍': 60,
'服饰配件': 61,
'仅拆(9.9新)': 62,
'95成新': 63,
'9成新': 64
}
m_userb = [
[1, '张三', '18201355004', '110000', '110100', '110106', '116.29560982612', '39.840624161575', '1'],
[2, '我是曹野', '18600806657', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '2'],
[3, '陈十一', '18600806656', '110000', '110100', '110106', '116.29768500168', '39.839177478909', '3'],
[4, '清宁', '18600806655', '110000', '110100', '110106', '116.29595243984', '39.840078179251', '6'],
[11, '过时的短靴', '18600806652', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '8'],
[5, '曹野', '18600806654', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '15'],
[6, '嘉平九', '13103657333', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '14'],
[9, '山山', '18600806653', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '16'],
[13, '合适', '12345678902', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '24'],
[8, '陈建锋', '12345678901', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '18'],
[10, '雨剑门秋', '12345678091', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '20'],
[12, '无私的鞋垫', '19921478807', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '21']
]
summ = 1
for fog in range(1, 21):
updated_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
show_money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()
show_money = int(float(''.join(show_money).replace('面议', '0'))) * 100
# 原价
money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()
money = int(float(''.join(money).replace('面议', '0'))) * 100
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
time.sleep(random.randint(3, 5))
try:
browser.find_element_by_xpath(
f'/html/body/div[4]/section/div[4]/table/tbody/tr[{fog}]/td[2]//a[@target="_blank"]').click()
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
html1 = html.etree.HTML(browser.page_source)
# 标题
title = html1.xpath('//h1[@class="detail-title__name"]//text()')[0].strip()
# 描述
descs = html1.xpath('//div[@class="descriptionBox detail-desc__content__desc__box"]//text()')[2].strip()
service_typeid = ['55,63,64', '55', '63', '64', '55,63', '55,64']
service_typeid = random.choice(service_typeid)
# 身份信息
sfxx = random.choice(m_userb)
uid = sfxx[0]
username = sfxx[1]
mobile = sfxx[2]
prov_id = sfxx[3]
city_id = sfxx[4]
coun_id = sfxx[5]
lng = sfxx[6]
lat = sfxx[7]
address_id = sfxx[8]
created_at = ''.join(html1.xpath('//div[@class="detail-title__info"]//div[1]//text()')).replace('更新',
'').strip()
created_at = created_at + ' ' + '00:00:00'
list_tupian = html1.xpath(
'//ul[@class="detail-desc__imgPlayer__imgList imgplayerlist"]//li//span//img//@src')
print(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian)
save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id,
city_id, coun_id,
lng, lat,
service_typeid, descs, list_tupian)
print('+++++++++++++++++第', summ, '条数据入库+++++++++++++++++')
print('\n')
summ += 1
t = random.randint(3, 4)
time.sleep(t)
browser.close()
except:
fog += 1
print('数目加一,继续!!!!')
continue