猫眼前100

#mzitu
'''
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2
'''
# -*- coding=utf-8 -*-
import requests
import lxml
import json
from lxml import etree

def getOnePage(n):
url = f'http://maoyan.com/board/4?offset={n*10}'
url2 = 'http://www.mzitu.com/hot/'
header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36 115Browser/8.6.2'}
r = requests.get(url,headers = header)
print(r)
return(r.text)
#global html=''
print('世界,你好!\nhello world!\n')

item = {}
id = 0
def parse(text):
html = etree.HTML(text)
names = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="name"]/a/@title')
starts = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="star"]/text()')
releasetimes = html.xpath('//div[@class="board-item-content"]/div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
# for i in range(0,len(names)):
# print(names[i],starts[i],releasetimes[i])
for name,star,releasetime in zip(names,starts,releasetimes):
item['名称'] = name
#item['star'] = star
item['time'] = releasetime
yield item


def save2file(data):
with open('movie.json','a',encoding='utf-8') as f:
data = json.dumps(data,ensure_ascii = False)+',\n'
f.write(data)

def run():
for n in range(0,10):
global id
text = getOnePage(n)
items = parse(text)
#print(item)
for item in items:
id += 1
print(id,item)
save2file(item)
#print(html)

if __name__ == '__main__':

run()

posted @ 2018-10-11 20:38  快乐多巴胺  阅读(207)  评论(0编辑  收藏  举报