Python小爬虫练习
# coding: utf-8 __author__ = 'zhangcx' from urllib3 import PoolManager import codecs import json class myjob(object): def __init__(self): self._page = 1 self._totalPageCount = 0 self._first = True self._hasNextPage = True self._http = PoolManager() def getjob(self): if(self._hasNextPage): r = self._http.request('POST','http://www.lagou.com/jobs/positionAjax.json?px=default&city=%E6%AD%A6%E6%B1%89' ,{'first':'%s' % self._first,'pn':'%d' % self._page,'kd':'Java'}) items = json.loads( r.data.decode('utf-8')) for item in items['content']['result']: print("{name},{positionName},{salary}".format(name=item['companyShortName'],positionName = item['positionName'],salary=item['salary'])) #print(items) self._hasNextPage = items['content']['hasNextPage'] self._totalPageCount = items['content']['totalPageCount'] if(self._page > 0): self._first = 'false' if((self._page+ 1) > self._totalPageCount): self._hasNextPage = False self._page += 1 self.getjob() if __name__ == "__main__": job = myjob() job.getjob()