1 # -*- coding:utf-8-*- 2 import xlwt 3 import urllib2 4 import requests 5 from lxml import etree 6 c=0 7 workbook=xlwt.Workbook(encoding='utf-8') 8 booksheet=workbook.add_sheet('Sheet1', cell_overwrite_ok=True)#建立表1 9 usr_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' 10 headers={'Usr_Agent':usr_agent} 11 HTML="http://list.pptv.com/?type=1&sort=1" 12 request=urllib2.Request(HTML,headers=headers) 13 html=requests.get(HTML) 14 seletor=etree.HTML(html.text) 15 content_field=seletor.xpath('//div[@class="sear-menu"]') 16 for each in content_field: 17 type=each.xpath('dl/dd/a/@title')[1:26] 18 href=each.xpath('dl/dd/a/@href')[1:26] 19 for j in zip(type,href): 20 kind=j[0] 21 MOVIE_HTML=j[1] 22 for p in range(1,30): 23 Movie_html="http://list.pptv.com/channel_list.html?page=%s"%p+MOVIE_HTML[-17:] 24 html2=requests.get(Movie_html) 25 seletor2=etree.HTML(html2.text) 26 name=seletor2.xpath('// p[@class="ui-txt"]/span/text()') 27 grade=seletor2.xpath('// p[@class="ui-txt"]/em/text()') 28 phtho_url=seletor2.xpath('//p[@class="ui-pic"]/img/@data-src2') 29 movie_html_urls=seletor2.xpath('//a[@class="ui-list-ct"]/@href') 30 for n in zip(name,phtho_url,grade,movie_html_urls): 31 c+=1 32 names=n[0] 33 row0 = ['类型','图片网址','分数','网址','电影网址'] 34 for i in range(0,len(row0)): 35 booksheet.write(0,i,row0[i]) #生成表头 36 booksheet.write(c,0,kind)#插入每行数据 37 booksheet.write(c,1,names) 38 booksheet.write(c,2,n[1]) 39 booksheet.write(c,3,n[2]) 40 booksheet.write(c,4,n[3]) 41 workbook.save('movies.xls')
结果展示: