58同城闲置物品

from selenium import webdriver
import xlsxwriter as xw
from time import sleep
import time
import random
import requests
from lxml import html

import pymysql
from selenium.webdriver.common import window


def save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian):
dic = {}
dic["title"] = title
dic["updated_at"] = updated_at
dic["uid"] = uid
dic["address_id"] = address_id
dic["username"] = username
dic["show_money"] = show_money
dic["money"] = money
dic["mobile"] = mobile
dic["created_at"] = created_at
dic["prov_id"] = prov_id
dic["city_id"] = city_id
dic["coun_id"] = coun_id
dic["lng"] = lng
dic["lat"] = lat
dic["service_typeid"] = service_typeid
dic["`desc`"] = descs
dic["type"] = '2' # 1需求 2闲置物品 3车子 4房子
connection = pymysql.Connect(
host='140.210.4.73',
port=3306,
user='agr_sql',
passwd='bj@#agr_sql',
db='fa_admin',
charset='utf8mb4'
)
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'm_app_unused'
sql = """INSERT INTO {}(title,updated_at,uid,address_id,username,show_money,money,mobile,created_at,prov_id,city_id,coun_id,lng,lat,service_typeid,`desc`)\
VALUES (%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)

cursor.execute(sql, (
dic["title"], dic["updated_at"], dic["uid"], dic["address_id"], dic["username"], dic["show_money"],
dic["money"],
dic["mobile"],
dic["created_at"],
dic["prov_id"], dic["city_id"], dic["coun_id"], dic["lng"], dic["lat"],
dic["service_typeid"], dic["`desc`"]))
ir_idd = connection.insert_id()
print(ir_idd, type(ir_idd))
connection.commit()
print("<<<<<<<<<第一张表数据存储成功>>>>>>>>", )
dic["publish_id"] = ir_idd

# 循环提交多图
for li_tu in list_tupian:
if 'https:' in li_tu:
dic["url"] = li_tu
else:
dic["url"] = 'https:' + li_tu
print(dic['url'])
with connection.cursor() as cursor1:
# 创建sql语句
sheet_name1 = 'm_user_publish_file'
sql1 = """INSERT INTO {}(type, uid, created_at, url, publish_id)\
VALUES (%s, %s, %s, %s, %s)""".format(
sheet_name1)
cursor1.execute(sql1, (
dic["type"], dic["uid"], dic["created_at"], dic["url"], dic["publish_id"]
))
connection.commit()
print("<<<<<<<<<m_user_publish_file数据存储成功>>>>>>>>", )
with connection.cursor() as cursor2:
# 创建sql语句
sheet_name2 = 'm_user_publish'
sql2 = """INSERT INTO {}(type, uid, created_at, publish_id,prov_id,city_id,coun_id)\
VALUES ( %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name2)
cursor2.execute(sql2, (
dic["type"], dic["uid"], dic["created_at"], dic["publish_id"],
dic["prov_id"], dic["city_id"], dic["coun_id"],
))
connection.commit()
print("<<<<<<<<<m_user_publish数据存储成功>>>>>>>>", )
connection.close()


# 创建表
wbook = xw.Workbook('闲置物品.xlsx')
wsheet1 = wbook.add_worksheet('Sheet1') # 创建工作表
wsheet1.activate() # 激活表
title = ['title', 'money', '用户名', 'id', '星评', '产品名', '购买类型'] # 设置表头
wsheet1.write_row('A1', title) # 从A1单元格写入表头
i = 2 # 从第二行写入数据

option = webdriver.ChromeOptions()
# option.add_argument(r"user-data-dir=C:\Users\Administrator\AppData\Local\Chromium\Application") # 浏览器路径

# 初始化driver
browser = webdriver.Chrome(options=option)
for urlpage in range(1, 50):

# try:

# time.sleep(5)
print(f'********************************第{urlpage}页**********************************')
browser.get(
f'https://bj.58.com/shouji/pn{urlpage}/?PGTID=0d300024-0000-1ef3-2708-412acac48714&ClickID=2'
) # 第二页
browser.maximize_window()
for i in range(5):
time.sleep(2)
browser.execute_script('window.scrollBy(0,2200)')

html2 = html.etree.HTML(browser.page_source)

service_typeid2 = {

'家居日用': 1,
'住宅家具': 2,
'北京二手家具': 2,
'未拆(全新)': 4,
'生活电器': 36,
'电脑数码': 54,
' 北京二手台式机/配件': 54,
'手机3C': 55,
'五金工具': 56,
'宠物用品': 57,
'儿童碗具': 58,
'户外车品': 59,
'绘本书籍': 60,
'服饰配件': 61,
'仅拆(9.9新)': 62,
'95成新': 63,
'9成新': 64
}
m_userb = [
[1, '张三', '18201355004', '110000', '110100', '110106', '116.29560982612', '39.840624161575', '1'],
[2, '我是曹野', '18600806657', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '2'],
[3, '陈十一', '18600806656', '110000', '110100', '110106', '116.29768500168', '39.839177478909', '3'],
[4, '清宁', '18600806655', '110000', '110100', '110106', '116.29595243984', '39.840078179251', '6'],
[11, '过时的短靴', '18600806652', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '8'],
[5, '曹野', '18600806654', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '15'],
[6, '嘉平九', '13103657333', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '14'],
[9, '山山', '18600806653', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '16'],
[13, '合适', '12345678902', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '24'],
[8, '陈建锋', '12345678901', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '18'],
[10, '雨剑门秋', '12345678091', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '20'],
[12, '无私的鞋垫', '19921478807', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '21']
]
summ = 1

for fog in range(1, 21):
updated_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

show_money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()

show_money = int(float(''.join(show_money).replace('面议', '0'))) * 100

# 原价
money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()

money = int(float(''.join(money).replace('面议', '0'))) * 100

h2 = browser.window_handles

browser.switch_to.window(h2[-1])

time.sleep(random.randint(3, 5))
try:
browser.find_element_by_xpath(
f'/html/body/div[4]/section/div[4]/table/tbody/tr[{fog}]/td[2]//a[@target="_blank"]').click()

h2 = browser.window_handles
browser.switch_to.window(h2[-1])
html1 = html.etree.HTML(browser.page_source)

# 标题
title = html1.xpath('//h1[@class="detail-title__name"]//text()')[0].strip()

# 描述
descs = html1.xpath('//div[@class="descriptionBox detail-desc__content__desc__box"]//text()')[2].strip()

service_typeid = ['55,63,64', '55', '63', '64', '55,63', '55,64']
service_typeid = random.choice(service_typeid)
# 身份信息
sfxx = random.choice(m_userb)
uid = sfxx[0]
username = sfxx[1]
mobile = sfxx[2]
prov_id = sfxx[3]
city_id = sfxx[4]
coun_id = sfxx[5]
lng = sfxx[6]
lat = sfxx[7]
address_id = sfxx[8]
created_at = ''.join(html1.xpath('//div[@class="detail-title__info"]//div[1]//text()')).replace('更新',
'').strip()
created_at = created_at + ' ' + '00:00:00'

list_tupian = html1.xpath(
'//ul[@class="detail-desc__imgPlayer__imgList imgplayerlist"]//li//span//img//@src')
print(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian)
save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id,
city_id, coun_id,
lng, lat,
service_typeid, descs, list_tupian)
print('+++++++++++++++++第', summ, '条数据入库+++++++++++++++++')
print('\n')
summ += 1
t = random.randint(3, 4)
time.sleep(t)

browser.close()
except:
fog += 1
print('数目加一,继续!!!!')
continue

posted @ 2022-06-13 10:32  布都御魂  阅读(62)  评论(0编辑  收藏  举报