python简单爬去前程无忧信息招聘
import sys reload(sys) sys.setdefaultencoding('utf-8') import requests import csv from BeautifulSoup import BeautifulSoup def get_content(page): url = 'http://search.51job.com/list/200200,000000,0000,32,9,99,python,2,'+str(page)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=' reponse = requests.get(url) html = reponse.content soup = BeautifulSoup(html) return soup def get(soup): inf_list = list() tag1 = soup.find('div', attrs={'class': 'jblist res'}) for label in tag1.findAll('a',attrs={'class':'e e2 eck'}): title = label.find('h3').text.strip() company = label.find('aside').text.strip() money = label.find('em').text.strip() inf_list.append((title, company, money)) with open("imdb.csv","a") as f: fw = csv.writer(f) # fw.writerow(['职位','公司','薪资']) fw.writerows(inf_list) # return inf_list with open("imdb.csv","wb") as f: fw = csv.writer(f) fw.writerow(['职位','公司','薪资']) for j in range(1, 10): print "-----正在爬第"+str(j)+"页内容---------" html = get_content(j) get(html)