Python_1_requests

1,
安装requests 和 beautifulsoup

2,安装lxml
https://pypi.pyth——on.org/pypi/ez_setup
python ez_setup.py

https://pypi.python.org/pypi/pip
python setup.py install






3,

import sys

reload(sys)

sys.setdefaultencoding( "utf-8" )

 

import requests

from bs4 import BeautifulSoup

 

def getHtmlText(url):

try:

r =requests.get(url)

 

r.raise_for_status()

r.encoding = r.apparent_encoding

print('success')

return r.text

except:

print('false')

return 'false'

 

 

def parseHtml(finddatas, html):

soup = BeautifulSoup(html, 'lxml')

lis = soup.find_all('li', class_="gl-item")

print(len(lis))

for i in range(len(lis)):

try:

title = lis[i].a['title']

price = lis[i].find('div', class_='p-price').i.string

finddatas.append([title, price])

except:

print('')

 

def displayHtmlGoods(finddatas):

std="{:6}\t{:8}\t{:16}\n"

f = open('out.txt','a')

f.write(std.format(u'序号', u'价格', u'商品名称'))

for i in range(len(finddatas)):

f.write(std.format(i+1, finddatas[i][1], finddatas[i][0]))

print('out ok')

f.close()

 

def main():

url_basic = 'https://search.jd.com/Search?keyword='

total_pages = 3

keyword = u'电脑'

 

finddatas = []

for i in range(total_pages):

page = 1 + i * 2

url = url_basic + keyword + '&enc=utf-8&wq=' + keyword + '&page=' + str(page)

print(url)

html = getHtmlText(url)

parseHtml(finddatas, html)

 

displayHtmlGoods(finddatas)

 

if __name__ == '__main__':

main()

 

 



posted @ 2017-05-19 16:27  细雨细语  阅读(159)  评论(0编辑  收藏  举报