python爬取手机归属地

用python+bs4爬取了手机归属地数据:
 
import urllib.request
from bs4 import BeautifulSoup

def spider1(url):

    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
    'Accept':'text/html;q=0.9,*/*;q=0.8'
    }

    opener = urllib.request.build_opener()
    opener.addheaders = [headers]

    source_code=opener.open(url).read()
    soup=BeautifulSoup(source_code,"html.parser",from_encoding="gbk")
    for link in soup.find_all('dd'):
        baseurl=r'http://guisd.com'+link.a['href']+r'all/'
        haoduan=link.a.text
        print(haoduan)
        source_code=opener.open(baseurl).read()
        soup=BeautifulSoup(source_code,"html.parser",from_encoding="gbk")
        for tabb in soup.find_all('tr')[1:]:
            for tdd in tabb.find_all('td')[0:6]:
                f.writelines(tdd.get_text()+',')
            f.writelines('\n')         
f=open('text.txt','w+')
spider1('http://guisd.com/lb/')
f.close()

最终效果如下:

posted @ 2016-11-25 16:02  stephen2016  阅读(1367)  评论(0编辑  收藏  举报