python(4) 小程序-异步加载

注:处理异步加载需要模拟浏览器登陆,然后用import json,用loads解析

例如:

代码:

 1 #! /usr/bin/env python
 2 # -*- coding=utf-8 -*-
 3 import requests
 4 import json
 5 import re
 6 import sys
 7 reload(sys)
 8 sys.setdefaultencoding("utf-8")
 9 classinfo = []
10 f = open('info.txt','w')
11 
12 num = 0
13 def write(htm):
14     titl = re.findall('data-tit(.*?)data-enough',htm.text,re.S)
15     for each in titl:
16         print each
17         info = {}
18         #print each
19         info['title'] = re.search('le="(.*?)"',each,re.S).group(1)
20         info['year'] = re.search('data-release="(.*?)" data',each,re.S).group(1)
21         info['Rating']= re.findall('data-rate="(.*?)" data-star',each,re.S)[0]
22         info['time'] = re.findall('data-duration="(.*?)" data-re',each,re.S)[0]
23         info['reg'] = re.findall('data-region="(.*?)" data-dir',each,re.S)[0]
24         info['act'] = re.findall('data-actors="(.*?)" data-in',each,re.S)[0]
25         global num
26         num = num + 1
27         f.writelines('%d\n' %num)
28         f.writelines(u'电影名:'+info['title'] + '\n')
29         f.writelines(u'主演:'+info['act'] + '\n')
30         f.writelines(u'电影地区:' + info['reg']+'\n')
31         f.writelines(u'上映年份:' + info['year']+'\n')
32         f.writelines(u'电影时长:' + info['time']+'\n')
33         f.writelines(u'评分:' + info['Rating']+'\n\n')
34 def write1(info):
35     global num
36     num = num + 1
37     f.writelines('%d\n' %num)
38     f.writelines(u'电影名:'+info['title'] + '\n')
39     f.writelines(u'评分:' + info['Rating']+'\n')
40     f.writelines(u'链接:'+info['url'] + '\n\n')
41 def getry():
42     # html = requests.get('http://movie.douban.com/')
43     url = 'http://movie.douban.com/'
44     html = requests.get(url)
45     html.encoding = 'utf-8'
46     #print html.text
47     write(html)
48 def getrm():
49     info = {}
50     url  = 'http://movie.douban.com/j/search_subjects?type=movie&tag=热门&sort=recommend&page_limit=20&page_start=0'
51     head = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'}
52     html = requests.get(url,headers = head)
53     for i in range(0,16):
54         newurl = re.sub('start=\d+','start=%d'%(i*20),url,re.S)
55         #print newurl
56         jscontent = requests.get(newurl,headers = head).content
57         jsdict = json.loads(jscontent)#将json解析成表文件
58         for i in range(0,20):
59            #print jsdict['subjects'][i]['url']
60            info['title'] = jsdict['subjects'][i]['title']
61            info['Rating']  = jsdict['subjects'][i]['rate']
62            info['url'] = jsdict['subjects'][i]['url']
63            write1(info)
64 if __name__ == "__main__":
65     getry()
66     getrm()

效果图:

posted on 2016-01-20 15:07  细雨微光  阅读(375)  评论(0编辑  收藏  举报