58同城需求发布

from selenium import webdriver
import xlsxwriter as xw
from time import sleep
import time
import random
import requests
from lxml import html

import pymysql
import time


def time_turn(timenum):
if 0 < len(timenum) < 11 and timenum.isdigit():
timenum = int(timenum)
timeArray = time.localtime(timenum)
otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
return otherStyleTime
else:
print('请输入11位以内的数字')


def save_mysql(title, updated_at, uid, address_id, username, money,
mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, service_cateid, starttime, endtime, list_tupian):
dic = {}
dic["title"] = title
dic["updated_at"] = updated_at
dic["uid"] = uid
dic["address_id"] = address_id
dic["username"] = username
dic["money"] = money
dic["mobile"] = mobile
dic["created_at"] = created_at
dic["prov_id"] = prov_id
dic["city_id"] = city_id
dic["coun_id"] = coun_id
dic["lng"] = lng
dic["lat"] = lat
dic["service_typeid"] = service_typeid
dic["`desc`"] = descs
dic["service_cateid"] = service_cateid
dic["starttime"] = starttime
dic["endtime"] = endtime
dic["type"] = '1' # 1需求 2闲置物品 3车子 4房子
connection = pymysql.Connect(
host='140.210.4.73',
port=3306,
user='agr_sql',
passwd='bj@#agr_sql',
db='fa_admin',
charset='utf8mb4'
)
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'm_app_demand'
sql = """INSERT INTO {}(title,updated_at,uid,address_id,username,money,mobile,created_at,prov_id,city_id,coun_id,lng,lat,service_typeid,`desc`,service_cateid,starttime,endtime)\
VALUES (%s,%s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s,%s)""".format(
sheet_name)

cursor.execute(sql, (
dic["title"], dic["updated_at"], dic["uid"], dic["address_id"], dic["username"],
dic["money"],
dic["mobile"],
dic["created_at"],
dic["prov_id"], dic["city_id"], dic["coun_id"], dic["lng"], dic["lat"],
dic["service_typeid"], dic["`desc`"], dic['service_cateid'],
dic['starttime'], dic['endtime']))
ir_idd = connection.insert_id()
print(ir_idd, type(ir_idd))
connection.commit()
print("<<<<<<<<<第一张表数据存储成功>>>>>>>>", )
dic["publish_id"] = ir_idd

# 循环提交多图
for li_tu in list_tupian:
dic["url"] = li_tu
with connection.cursor() as cursor1:
# 创建sql语句
sheet_name1 = 'm_user_publish_file'
sql1 = """INSERT INTO {}(type, uid, created_at, url, publish_id)\
VALUES (%s, %s, %s, %s, %s)""".format(
sheet_name1)
cursor1.execute(sql1, (
dic["type"], dic["uid"], dic["created_at"], dic["url"], dic["publish_id"]
))
connection.commit()
print("<<<<<<<<<数据存储成功>>>>>>>>", )
with connection.cursor() as cursor2:
# 创建sql语句
sheet_name2 = 'm_user_publish'
sql2 = """INSERT INTO {}(type, uid, created_at, publish_id,prov_id,city_id,coun_id)\
VALUES ( %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name2)
cursor2.execute(sql2, (
dic["type"], dic["uid"], dic["created_at"], dic["publish_id"],
dic["prov_id"], dic["city_id"], dic["coun_id"],
))
connection.commit()
print("<<<<<<<<<m_user_publish数据存储成功>>>>>>>>", )
connection.close()


# 创建表
wbook = xw.Workbook('闲置物品.xlsx')
wsheet1 = wbook.add_worksheet('Sheet1') # 创建工作表
wsheet1.activate() # 激活表
title = ['title', 'money', '用户名', 'id', '星评', '产品名', '购买类型'] # 设置表头
wsheet1.write_row('A1', title) # 从A1单元格写入表头
i = 2 # 从第二行写入数据

option = webdriver.ChromeOptions()
# option.add_argument(r"user-data-dir=C:\Users\Administrator\AppData\Local\Chromium\Application") # 浏览器路径

# 初始化driver
browser = webdriver.Chrome(options=option)
for page in range(2,100):
browser = webdriver.Chrome(options=option)
print(f'*******************第{page}页*****************************')
browser.get(
f'https://bj.58.com/esqgdiannao/pn2/?PGTID=0d306d43-0000-12d4-ab80-c34650326432&ClickID=2'
) # 第二页
browser.maximize_window()
html2 = html.etree.HTML(browser.page_source)

service_cateid = {
'代帮忙': 1,
'家政保洁': 2,
'维修': 3,
'专业': 4,
'陪伴': 5,

}
service_typeid = {
'代跑腿': 1,
'代搬东西': 2,
'代取件': 3,
'代买早餐': 4,
'代排队': 5,
'代接送': 6,
'保姆': 7,
'钟点工': 8,
'保洁': 9,
'看护老人': 10,
'看护小孩': 11,

}
m_userb = [
[1, '张三', '18201355004', '110000', '110100', '110106', '116.29560982612', '39.840624161575', '1'],
[2, '我是曹野', '18600806657', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '2'],
[3, '陈十一', '18600806656', '110000', '110100', '110106', '116.29768500168', '39.839177478909', '3'],
[4, '清宁', '18600806655', '110000', '110100', '110106', '116.29595243984', '39.840078179251', '6'],
[11, '过时的短靴', '18600806652', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '8'],
[5, '曹野', '18600806654', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '15'],
[6, '嘉平九', '13103657333', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '14'],
[9, '山山', '18600806653', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '16'],
[13, '合适', '12345678902', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '24'],
[8, '陈建锋', '12345678901', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '18'],
[10, '雨剑门秋', '12345678091', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '20'],
[12, '无私的鞋垫', '19921478807', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '21']
]
summ = 1

for fog in range(1, 41):
updated_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

# 原价
try:
money = html2.xpath(f'//tr[{fog}]//td//p[3]//b//text()')[0].strip()

money = int(float(''.join(money).replace('面议', '0').split('-')[0])) * 100

h2 = browser.window_handles

browser.switch_to.window(h2[-1])

time.sleep(random.randint(3, 5))

browser.find_element_by_xpath(f'//tr[{fog}]/td[2]//a').click()
# print('详情页链接1')
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
html1 = html.etree.HTML(browser.page_source)


# 标题
title = html1.xpath('//h1[@class="detail-title__name"]//text()')[0].strip()


# 描述
descs = html1.xpath('//div[@class="descriptionBox detail-desc__content__desc__box"]//text()')[2].strip()

service_typeid = ['1', '2', '3']
service_cateid = ['1', '2', '3']
service_typeid = random.choice(service_typeid)
service_cateid = random.choice(service_cateid)
starttime = time_turn(str(int(time.time())))
endtime = time_turn(
str(random.randint(int(time.time()) + 7 * 24 * 3600, int(time.time()) + 15 * 24 * 3600)))
# 身份信息
sfxx = random.choice(m_userb)
uid = sfxx[0]
username = sfxx[1]
mobile = sfxx[2]
prov_id = sfxx[3]
city_id = sfxx[4]
coun_id = sfxx[5]
lng = sfxx[6]
lat = sfxx[7]
address_id = sfxx[8]
created_at = ''.join(html1.xpath('//div[@class="detail-title__info"]//div[1]//text()')).replace('更新',
'').strip()
created_at = created_at + ' ' + '00:00:00'
list_tupian = html1.xpath('//ul[@class="detail-desc__imgPlayer__imgList imgplayerlist"]//li//span//img//@src')
print(title, updated_at, uid, address_id, username, money, mobile, created_at, prov_id, city_id,
coun_id, lng,
lat,
service_typeid, descs, service_cateid, starttime, endtime, list_tupian)
save_mysql(title, updated_at, uid, address_id, username, money, mobile, created_at, prov_id,
city_id, coun_id,
lng, lat,
service_typeid, descs, service_cateid, starttime, endtime, list_tupian)
print('+++++++++++++++++第', summ, '条数据入库+++++++++++++++++')
print('\n')
summ += 1
t = random.randint(3, 4)
time.sleep(t)

browser.close()
except Exception as e:
print('错误信息',e)
fog += 1
print('数目加一,继续!!!!')
continue

posted @ 2022-06-13 10:33  布都御魂  阅读(45)  评论(0编辑  收藏  举报