58同城闲置物品
from selenium import webdriver
import xlsxwriter as xw
from time import sleep
import time
import random
import requests
from lxml import html
import pymysql
from selenium.webdriver.common import window
def save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian):
dic = {}
dic["title"] = title
dic["updated_at"] = updated_at
dic["uid"] = uid
dic["address_id"] = address_id
dic["username"] = username
dic["show_money"] = show_money
dic["money"] = money
dic["mobile"] = mobile
dic["created_at"] = created_at
dic["prov_id"] = prov_id
dic["city_id"] = city_id
dic["coun_id"] = coun_id
dic["lng"] = lng
dic["lat"] = lat
dic["service_typeid"] = service_typeid
dic["`desc`"] = descs
dic["type"] = '2' # 1需求 2闲置物品 3车子 4房子
connection = pymysql.Connect(
host='140.210.4.73',
port=3306,
user='agr_sql',
passwd='bj@#agr_sql',
db='fa_admin',
charset='utf8mb4'
)
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'm_app_unused'
sql = """INSERT INTO {}(title,updated_at,uid,address_id,username,show_money,money,mobile,created_at,prov_id,city_id,coun_id,lng,lat,service_typeid,`desc`)\
VALUES (%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)
cursor.execute(sql, (
dic["title"], dic["updated_at"], dic["uid"], dic["address_id"], dic["username"], dic["show_money"],
dic["money"],
dic["mobile"],
dic["created_at"],
dic["prov_id"], dic["city_id"], dic["coun_id"], dic["lng"], dic["lat"],
dic["service_typeid"], dic["`desc`"]))
ir_idd = connection.insert_id()
print(ir_idd, type(ir_idd))
connection.commit()
print("<<<<<<<<<第一张表数据存储成功>>>>>>>>", )
dic["publish_id"] = ir_idd
# 循环提交多图
for li_tu in list_tupian:
if 'https:' in li_tu:
dic["url"] = li_tu
else:
dic["url"] = 'https:' + li_tu
print(dic['url'])
with connection.cursor() as cursor1:
# 创建sql语句
sheet_name1 = 'm_user_publish_file'
sql1 = """INSERT INTO {}(type, uid, created_at, url, publish_id)\
VALUES (%s, %s, %s, %s, %s)""".format(
sheet_name1)
cursor1.execute(sql1, (
dic["type"], dic["uid"], dic["created_at"], dic["url"], dic["publish_id"]
))
connection.commit()
print("<<<<<<<<<m_user_publish_file数据存储成功>>>>>>>>", )
with connection.cursor() as cursor2:
# 创建sql语句
sheet_name2 = 'm_user_publish'
sql2 = """INSERT INTO {}(type, uid, created_at, publish_id,prov_id,city_id,coun_id)\
VALUES ( %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name2)
cursor2.execute(sql2, (
dic["type"], dic["uid"], dic["created_at"], dic["publish_id"],
dic["prov_id"], dic["city_id"], dic["coun_id"],
))
connection.commit()
print("<<<<<<<<<m_user_publish数据存储成功>>>>>>>>", )
connection.close()
# 创建表
wbook = xw.Workbook('闲置物品.xlsx')
wsheet1 = wbook.add_worksheet('Sheet1') # 创建工作表
wsheet1.activate() # 激活表
title = ['title', 'money', '用户名', 'id', '星评', '产品名', '购买类型'] # 设置表头
wsheet1.write_row('A1', title) # 从A1单元格写入表头
i = 2 # 从第二行写入数据
option = webdriver.ChromeOptions()
# option.add_argument(r"user-data-dir=C:\Users\Administrator\AppData\Local\Chromium\Application") # 浏览器路径
# 初始化driver
browser = webdriver.Chrome(options=option)
for urlpage in range(1, 50):
# try:
# time.sleep(5)
print(f'********************************第{urlpage}页**********************************')
browser.get(
f'https://bj.58.com/shouji/pn{urlpage}/?PGTID=0d300024-0000-1ef3-2708-412acac48714&ClickID=2'
) # 第二页
browser.maximize_window()
for i in range(5):
time.sleep(2)
browser.execute_script('window.scrollBy(0,2200)')
html2 = html.etree.HTML(browser.page_source)
service_typeid2 = {
'家居日用': 1,
'住宅家具': 2,
'北京二手家具': 2,
'未拆(全新)': 4,
'生活电器': 36,
'电脑数码': 54,
' 北京二手台式机/配件': 54,
'手机3C': 55,
'五金工具': 56,
'宠物用品': 57,
'儿童碗具': 58,
'户外车品': 59,
'绘本书籍': 60,
'服饰配件': 61,
'仅拆(9.9新)': 62,
'95成新': 63,
'9成新': 64
}
m_userb = [
[1, '张三', '18201355004', '110000', '110100', '110106', '116.29560982612', '39.840624161575', '1'],
[2, '我是曹野', '18600806657', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '2'],
[3, '陈十一', '18600806656', '110000', '110100', '110106', '116.29768500168', '39.839177478909', '3'],
[4, '清宁', '18600806655', '110000', '110100', '110106', '116.29595243984', '39.840078179251', '6'],
[11, '过时的短靴', '18600806652', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '8'],
[5, '曹野', '18600806654', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '15'],
[6, '嘉平九', '13103657333', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '14'],
[9, '山山', '18600806653', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '16'],
[13, '合适', '12345678902', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '24'],
[8, '陈建锋', '12345678901', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '18'],
[10, '雨剑门秋', '12345678091', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '20'],
[12, '无私的鞋垫', '19921478807', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '21']
]
summ = 1
for fog in range(1, 21):
updated_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
show_money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()
show_money = int(float(''.join(show_money).replace('面议', '0'))) * 100
# 原价
money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()
money = int(float(''.join(money).replace('面议', '0'))) * 100
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
time.sleep(random.randint(3, 5))
try:
browser.find_element_by_xpath(
f'/html/body/div[4]/section/div[4]/table/tbody/tr[{fog}]/td[2]//a[@target="_blank"]').click()
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
html1 = html.etree.HTML(browser.page_source)
# 标题
title = html1.xpath('//h1[@class="detail-title__name"]//text()')[0].strip()
# 描述
descs = html1.xpath('//div[@class="descriptionBox detail-desc__content__desc__box"]//text()')[2].strip()
service_typeid = ['55,63,64', '55', '63', '64', '55,63', '55,64']
service_typeid = random.choice(service_typeid)
# 身份信息
sfxx = random.choice(m_userb)
uid = sfxx[0]
username = sfxx[1]
mobile = sfxx[2]
prov_id = sfxx[3]
city_id = sfxx[4]
coun_id = sfxx[5]
lng = sfxx[6]
lat = sfxx[7]
address_id = sfxx[8]
created_at = ''.join(html1.xpath('//div[@class="detail-title__info"]//div[1]//text()')).replace('更新',
'').strip()
created_at = created_at + ' ' + '00:00:00'
list_tupian = html1.xpath(
'//ul[@class="detail-desc__imgPlayer__imgList imgplayerlist"]//li//span//img//@src')
print(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, list_tupian)
save_mysql(title, updated_at, uid, address_id, username, show_money, money, mobile, created_at, prov_id,
city_id, coun_id,
lng, lat,
service_typeid, descs, list_tupian)
print('+++++++++++++++++第', summ, '条数据入库+++++++++++++++++')
print('\n')
summ += 1
t = random.randint(3, 4)
time.sleep(t)
browser.close()
except:
fog += 1
print('数目加一,继续!!!!')
continue
本文作者:布都御魂
本文链接:https://www.cnblogs.com/wolvies/p/16369957.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步