python简单爬去前程无忧信息招聘

import sys
reload(sys)
sys.setdefaultencoding('utf-8')
    
import requests
import csv
from BeautifulSoup import BeautifulSoup

def get_content(page):
    url = 'http://search.51job.com/list/200200,000000,0000,32,9,99,python,2,'+str(page)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
    reponse = requests.get(url)
    html = reponse.content
    soup = BeautifulSoup(html)
    return soup

def get(soup):
    inf_list = list()
    tag1 = soup.find('div', attrs={'class': 'jblist res'})    
    for label in tag1.findAll('a',attrs={'class':'e e2 eck'}):
        title = label.find('h3').text.strip()
        company = label.find('aside').text.strip()
        money = label.find('em').text.strip()
        inf_list.append((title, company, money))
    with open("imdb.csv","a") as f:
        fw = csv.writer(f)
#    fw.writerow(['职位','公司','薪资'])
        fw.writerows(inf_list)
#    return inf_list

with open("imdb.csv","wb") as f:
    fw = csv.writer(f)
    fw.writerow(['职位','公司','薪资'])
    for j in range(1, 10):
        print  "-----正在爬第"+str(j)+"页内容---------"
        html = get_content(j)
        get(html)

 

posted on 2017-05-13 14:38  Kermit.Li  阅读(472)  评论(0编辑  收藏  举报

导航