58同城租房

from selenium import webdriver
import xlsxwriter as xw
from time import sleep
import time
import random
import requests
from lxml import html

import pymysql


def save_mysql(title, uid, address_id, username, money, mobile, created_at, prov_id, city_id, coun_id, lng, lat,
facility, service_typeid, descs, list_tupian):
dic = {}
dic["title"] = title
dic["uid"] = uid
dic["address_id"] = address_id
dic["username"] = username
dic["money"] = money
dic["mobile"] = mobile
dic["created_at"] = created_at
dic["prov_id"] = prov_id
dic["city_id"] = city_id
dic["coun_id"] = coun_id
dic["lng"] = lng
dic["lat"] = lat
dic["facility"] = facility
dic["service_typeid"] = service_typeid
dic["`desc`"] = descs

dic["type"] = '4' # 1需求 2闲置物品 3车子 4房子

connection = pymysql.Connect(
host='140.210.4.73',
port=3306,
user='agr_sql',
passwd='bj@#agr_sql',
db='fa_admin',
charset='utf8mb4'
)
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'm_app_house'
sql = """INSERT INTO {}(title,uid,address_id,username,money,mobile,created_at,prov_id,city_id,coun_id,lng,lat,facility,service_typeid,`desc`)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)

cursor.execute(sql, (
dic["title"], dic["uid"], dic["address_id"], dic["username"], dic["money"], dic["mobile"],
dic["created_at"],
dic["prov_id"], dic["city_id"], dic["coun_id"], dic["lng"], dic["lat"], dic["facility"],
dic["service_typeid"], dic["`desc`"]))

ir_idd = connection.insert_id()
print(ir_idd, type(ir_idd))
connection.commit()

dic["publish_id"] = ir_idd

# 循环提交多图
for li_tu in list_tupian:
dic["url"] = "https:" + li_tu
print('dic["url"]', dic["url"])
with connection.cursor() as cursor1:
# 创建sql语句
sheet_name1 = 'm_user_publish_file'
sql1 = """INSERT INTO {}(type, uid, created_at, url, publish_id)\
VALUES (%s, %s, %s, %s, %s)""".format(
sheet_name1)
cursor1.execute(sql1, (
dic["type"], dic["uid"], dic["created_at"], dic["url"], dic["publish_id"]
))
connection.commit()
print("<<<<<<<<<数据存储成功>>>>>>>>", )
with connection.cursor() as cursor2:
# 创建sql语句
sheet_name2 = 'm_user_publish'
sql2 = """INSERT INTO {}(type, uid, created_at, publish_id,prov_id,city_id,coun_id)\
VALUES ( %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name2)
cursor2.execute(sql2, (
dic["type"], dic["uid"], dic["created_at"], dic["publish_id"],
dic["prov_id"], dic["city_id"], dic["coun_id"],
))
connection.commit()
print("<<<<<<<<<m_user_publish数据存储成功>>>>>>>>", )
connection.close()


"""
https://pic1.58cdn.com.cn/anjuke_58/c64c9367a29e5668e7de020e2dc49715?w=696&h=522&crop=1&t=1&srotate=1
https://pic1.58cdn.com.cn/anjuke_58/d264fa6aeb156b7cd461f8f9458089bd?w=696&h=522&crop=1&t=1&srotate=1
"""

# 创建表
wbook = xw.Workbook('租房.xlsx')
wsheet1 = wbook.add_worksheet('Sheet1') # 创建工作表
wsheet1.activate() # 激活表
title = ['title', 'money', '用户名', 'id', '星评', '产品名', '购买类型'] # 设置表头
wsheet1.write_row('A1', title) # 从A1单元格写入表头
i = 2 # 从第二行写入数据

option = webdriver.ChromeOptions()
# 注视这一行试试
# option.add_argument(r"user-data-dir=C:\Users\Administrator\AppData\Local\Google\Chrome\User Data") # 浏览器路径


# option.add_argument("blink-settings=imagesEnabled=false") # 不加载图片

# 初始化driver
browser = webdriver.Chrome(options=option)
for page in range(4, 21):
browser.get(
f'https://bj.58.com/chuzu/pn{page}/?PGTID=0d3090a7-0000-17e2-48fd-cd342448938e&ClickID=227') # 第二页

service_typeid = {
'出租': 21,
'出售': 22,
'整租': 23,
'合租': 24,
'昌平': 25,
'自主发布': 26,
'转租': 27,
'1室2厅1卫': 28,
'1室2厅2卫': 29,
'100平方米': 30,
'200平方米': 31,
'毛坯': 32,
'简单装修': 33,
'求租': 52,
'求购': 53,
'转售': 120,
'代他人发布': 121,
'精装修': 122,
'豪华装修': 123,
'丰台': 135,
'朝阳': 136,
'大兴': 137,
'3室2厅2卫': 138,
'2室1厅1卫': 139,
'2室2厅2卫': 140,
'3室2厅1卫': 141,
'300平方米': 142,
'通州': 143,
'顺义': 144,
}
"""
110000
110000
110000
110000
110000
110000
110000
110000
110106
110101
110106
110106
"""
facility = {
'热水器': 34,
'衣柜': 35,
'空调': 124,
'暖气': 125,
'洗衣机': 126,
'电视机': 127,
'燃气灶': 128,
'wifi': 129,
'大阳台': 131,
'微波炉': 132,
'冰箱': 133,
'沙发': 134,
}

# id username mobile prov_id city_id coun_id lng lat
m_userb = [
[1, '张三', '18201355004', '110000', '110100', '110106', '116.29560982612', '39.840624161575', '1'],
[2, '我是曹野', '18600806657', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '2'],
[3, '陈十一', '18600806656', '110000', '110100', '110106', '116.29768500168', '39.839177478909', '3'],
[4, '清宁', '18600806655', '110000', '110100', '110106', '116.29595243984', '39.840078179251', '6'],
[11, '过时的短靴', '18600806652', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '8'],
[5, '曹野', '18600806654', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '15'],
[6, '嘉平九', '13103657333', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '14'],
[9, '山山', '18600806653', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '16'],
[13, '合适', '12345678902', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '23'],
[8, '陈建锋', '12345678901', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '18'],
[10, '雨剑门秋', '12345678091', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '20'],
[12, '无私的鞋垫', '19921478807', '110000', '110100', '110101', '116.42240097766', '39.934827272396', '21']
]
summ = 1
for fog in range(2, 31):
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
print(fog)

time.sleep(1)

try:

browser.find_element_by_xpath('/html/body/div[6]/div[2]/ul/li[{}]/div[2]/h2/a'.format(fog)).click()
except:
zyms = len(browser.window_handles)
print("出错了!!!!", zyms)
if zyms == 2:
browser.close()

time.sleep(random.randint(2, 3))
continue
# ut = et.xpath('/html/body/div[6]/div[2]/ul/li[1]/div[2]/h2/a')
h2 = browser.window_handles
browser.switch_to.window(h2[-1])
html1 = html.etree.HTML(browser.page_source)

# page = browser1.page_source
# et = html.etree.HTML(page)
try:
title = html1.xpath('//div[@class="house-title"]/h1/text()')[0]

money = html1.xpath('//div[@class="house-pay-way f16"]/span/b/text()')[0].strip()
money=int(float(money))*100
data_facility = html1.xpath('//ul[@class="house-disposal"]/li/text()')
except:
continue

list_facility = ''
for facis in data_facility:
try:

list_facility += str(facility[facis])
list_facility += ','
except:
continue

# 租赁方式:
try:
list_service_typeid = ''
jsfs = html1.xpath('//ul[@class="f14"]/li[1]/span[2]/text()')[0]
# print(jsfs)
if '-' in jsfs:
jsfs = jsfs.split('-')[0]
except:
continue

try:

list_service_typeid += str(service_typeid[jsfs])
list_service_typeid += ','
except:
pass

try:

fwlx = html1.xpath('//ul[@class="f14"]/li[2]/span[@class="strongbox"]/text()')[0].split(' ')[0]

list_service_typeid += str(service_typeid[fwlx])
list_service_typeid += ','
except:
pass
try:

diqu = html1.xpath('//ul[@class="f14"]/li[5]/span/a[@class="c_333 ah"][1]/text()')[0]
print(diqu)

list_service_typeid += str(service_typeid[diqu])
except:
pass
try:
desc = ''
ms = html1.xpath('//ul[@class="introduce-item"]/li[2]/span[1]/text()')[0]
if '房源' in ms:
fwms = html1.xpath('//ul[@class="introduce-item"]/li[2]/span[2]/text()')

for textt in fwms:
desc += textt
else:
fwms = html1.xpath('//ul[@class="introduce-item"]/li[3]/span[2]/text()')

for textt in fwms:
desc += textt
fbsj = html1.xpath('//div[@class="house-title"]/p/text()')[1].strip()
except:
continue

# 身份信息
sfxx = random.choice(m_userb)
uid = sfxx[0]
username = sfxx[1]
mobile = sfxx[2]
prov_id = sfxx[3]
city_id = sfxx[4]
coun_id = sfxx[5]
lng = sfxx[6]
lat = sfxx[7]
add_sfxx = [
'23',
'24'
]
if username == '合适':
address_id = random.choice(add_sfxx)
else:
address_id = sfxx[8]
created_at = time.strftime('%Y-%m-%d %H:%M:%S')
list_facility = list_facility.strip(',')
list_service_typeid = list_service_typeid.strip(',')

list_tupian = html1.xpath('//div[@class="basic-pic-list pr"]/ul/li//img/@src')

print(title, uid, address_id, username, money, mobile, created_at, prov_id, city_id, coun_id, lng, lat,
list_facility, list_service_typeid, desc, list_tupian)
save_mysql(title, uid, address_id, username, money, mobile, created_at, prov_id, city_id, coun_id, lng, lat,
list_facility, list_service_typeid, desc, list_tupian)
print('+++++++++++++++++第', summ, '条数据入库+++++++++++++++++')
summ += 1
t = random.randint(2, 3)
time.sleep(t)

browser.close()
posted @ 2022-06-13 10:35  布都御魂  阅读(41)  评论(0编辑  收藏  举报