爬诸葛找房数据——casting3T
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 20 17:06:40 2017
@author: Administrator
"""
import requests
from bs4 import BeautifulSoup
import pymongo
import datetime
import re
lg = '15001927982ttcc'
lgttcc = re.sub("\D", "", lg)
headers={
'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
a=['bj','sh','dl','dg','gz','jn','jh','lz','qd','tj',
'xa','zz','zh','zs','cd','cq','cz','cs','fz','hz','heb',
'hf','hn','hz','km','nj','nc','nb','sz','sy','sz','sjz','wh','wx','yt'
]
def ad(i):
cc='http://'+i+'.zhugefang.com/'
return cc
b=list(map(ad,a))
proxies = {
"http": "http://192.168.0.103:3234"
}
for k in b:
res=requests.get(k,proxies=proxies)
soup=BeautifulSoup(res.text,'html.parser')
#print(soup.text)
#price=soup.select('body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
area=soup.select('.banner_left')[0].text.strip()
#print(area)
xinshang=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > p')[0].text.strip()
#print(xinshang)
xinshang_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > h5')[0].text.strip()
#print(xinshang_tao)
jiangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > p')[0].text.strip()
#print(jiangjia)
jiangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > h5')[0].text.strip()
#print(jiangjia_tao)
zhangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > p')[0].text.strip()
#print(zhangjia)
zhangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > h5')[0].text.strip()
#print(zhangjia_tao)
#print(area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao)
junjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
total=[]
for ele in area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao,junjia:
total.append(ele)
print(total)
import pandas
deal=pandas.DataFrame(total)
print(deal)
price=total[7]
pp = re.sub("\D", "", price)
"""
try:
now=datetime.datetime.now()
date="2017-12-20"
client1 = pymongo.MongoClient('192.168.0.136',27017)
db1 = client1.fangjia_stat
stat = db1.zgzf_stat
stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
"increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
"stat_date":date,"c_date":now})
except:
now=datetime.datetime.now()
date="2017-12-20"
client1 = pymongo.MongoClient('192.168.0.136',27017)
db1 = client1.fangjia_stat
stat = db1.zgzf_stat
stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
"increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
"stat_date":date,"c_date":now})
"""
t