爬诸葛找房数据——casting3T


# -*- coding: utf-8 -*-
"""
Created on Wed Dec 20 17:06:40 2017

@author: Administrator
"""

import requests
from bs4 import BeautifulSoup
import pymongo
import datetime

import re 

lg = '15001927982ttcc'

lgttcc = re.sub("\D", "", lg)


headers={
        'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
        }
a=['bj','sh','dl','dg','gz','jn','jh','lz','qd','tj',
   'xa','zz','zh','zs','cd','cq','cz','cs','fz','hz','heb',
   'hf','hn','hz','km','nj','nc','nb','sz','sy','sz','sjz','wh','wx','yt'
   ]
def ad(i):
    
    cc='http://'+i+'.zhugefang.com/'
    return cc
    

b=list(map(ad,a))

proxies = {
  "http": "http://192.168.0.103:3234"
}




for k in b:
        
        

    res=requests.get(k,proxies=proxies)
    soup=BeautifulSoup(res.text,'html.parser')
    #print(soup.text)
    
    #price=soup.select('body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
    area=soup.select('.banner_left')[0].text.strip()
    #print(area)
    xinshang=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > p')[0].text.strip()
    #print(xinshang)
    xinshang_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > h5')[0].text.strip()
    #print(xinshang_tao)
    jiangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > p')[0].text.strip()
    #print(jiangjia)
    jiangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > h5')[0].text.strip()
    #print(jiangjia_tao)
    zhangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > p')[0].text.strip()
    #print(zhangjia)
    zhangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > h5')[0].text.strip()
    #print(zhangjia_tao)
    #print(area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao)
    junjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()
    
    total=[]
    for ele in area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao,junjia:
        total.append(ele)
    print(total)
    
    import pandas
    deal=pandas.DataFrame(total)
    print(deal)
    
    price=total[7]
    
    pp = re.sub("\D", "", price)
    
    """
    try:
        now=datetime.datetime.now()
        date="2017-12-20"
        client1 = pymongo.MongoClient('192.168.0.136',27017)
        db1 = client1.fangjia_stat
        stat = db1.zgzf_stat
        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
               "stat_date":date,"c_date":now})
    
    except:
        now=datetime.datetime.now()
        date="2017-12-20"
        client1 = pymongo.MongoClient('192.168.0.136',27017)
        db1 = client1.fangjia_stat
        stat = db1.zgzf_stat
        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),
               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),
               "stat_date":date,"c_date":now})
    """

t

posted @ 2022-08-19 22:59  luoganttcc  阅读(17)  评论(0编辑  收藏  举报