Python_1_requests
1,
安装requests 和 beautifulsoup
2,安装lxml
https://pypi.pyth——on.org/pypi/ez_setup
python ez_setup.py
https://pypi.python.org/pypi/pip
python setup.py install
3,
import sys reload(sys) sys.setdefaultencoding( "utf-8" ) import requests from bs4 import BeautifulSoup def getHtmlText(url): try: r =requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding print('success') return r.text except: print('false') return 'false' def parseHtml(finddatas, html): soup = BeautifulSoup(html, 'lxml') lis = soup.find_all('li', class_="gl-item") print(len(lis)) for i in range(len(lis)): try: title = lis[i].a['title'] price = lis[i].find('div', class_='p-price').i.string finddatas.append([title, price]) except: print('') def displayHtmlGoods(finddatas): std="{:6}\t{:8}\t{:16}\n" f = open('out.txt','a') f.write(std.format(u'序号', u'价格', u'商品名称')) for i in range(len(finddatas)): f.write(std.format(i+1, finddatas[i][1], finddatas[i][0])) print('out ok') f.close() def main(): url_basic = 'https://search.jd.com/Search?keyword=' total_pages = 3 keyword = u'电脑' finddatas = [] for i in range(total_pages): page = 1 + i * 2 url = url_basic + keyword + '&enc=utf-8&wq=' + keyword + '&page=' + str(page) print(url) html = getHtmlText(url) parseHtml(finddatas, html) displayHtmlGoods(finddatas) if __name__ == '__main__': main()