爬诸葛找房数据——casting3T
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 20 17:06:40 2017
@author: Administrator
"""
import requests
from bs4 import BeautifulSoup
import pymongo
import datetime
import re
lg = '15001927982ttcc'
lgttcc = re.sub("\D", "", lg)
headers={
'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
a=['bj','sh','dl','dg','gz','jn','jh','lz','qd','tj',
'xa','zz','zh','zs','cd','cq','cz','cs','fz','hz','heb',
'hf','hn','hz','km','nj','nc','nb','sz','sy','sz','sjz','wh','wx','yt'
]
def ad(i):
cc='http://'+i+'.zhugefang.com/'
return cc
b=list(map(ad,a))
proxies = {
"http": "http://192.168.0.103:3234"
}
for k in b:
res=requests.get(k,proxies=proxies)
soup=BeautifulSoup(res.text,'html.parser')
#print(soup.text)
#price=soup.select('body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
area=soup.select('.banner_left')[0].text.strip()
#print(area)
xinshang=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > p')[0].text.strip()
#print(xinshang)
xinshang_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > h5')[0].text.strip()
#print(xinshang_tao)
jiangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > p')[0].text.strip()
#print(jiangjia)
jiangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > h5')[0].text.strip()
#print(jiangjia_tao)
zhangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > p')[0].text.strip()
#print(zhangjia)
zhangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > h5')[0].text.strip()
#print(zhangjia_tao)
#print(area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao)
junjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
total=[]
for ele in area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao,junjia:
total.append(ele)
print(total)
import pandas
deal=pandas.DataFrame(total)
print(deal)
price=total[7]
pp = re.sub("\D", "", price)
"""
try:
now=datetime.datetime.now()
date="2017-12-20"
client1 = pymongo.MongoClient('192.168.0.136',27017)
db1 = client1.fangjia_stat
stat = db1.zgzf_stat
stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
"increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
"stat_date":date,"c_date":now})
except:
now=datetime.datetime.now()
date="2017-12-20"
client1 = pymongo.MongoClient('192.168.0.136',27017)
db1 = client1.fangjia_stat
stat = db1.zgzf_stat
stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
"increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
"stat_date":date,"c_date":now})
"""
t
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· .NET10 - 预览版1新功能体验(一)