爬取qingting.fm的频道音频~~

通过学习xmly的爬取

自己琢磨出的qingtingfm爬取频道视频

特记录一下

 1 # -*- coding: utf-8 -*-
 2 import requests, time
 3 import os
 4 import sys
 5 reload(sys)
 6 sys.setdefaultencoding('utf-8')
 7 
 8 hd = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:55.0) Gecko/20100101 Firefox/55.0'}
 9 url_top = 'http://i.qingting.fm/wapi/channels/'  # 蜻蜓各个json信息的接口
10 m4aurl_top = 'http://od.qingting.fm/'
11 
12 
13 def getChannelInfo(channelid):
14     res = requests.get(url_top + str(channelid), headers=hd).json()
15     if int(res['code']) == 0:  # code码0正常 1 无数据
16         # (id数字不对返回是code为1 ),如果id是字符串,直接返回404,后面会报异常
17         return res
18     else:
19         # print '频道ID不存在!'
20         return
21 
22 def getChannelName(channelid):
23     res = requests.get(url_top + str(channelid), headers=hd).json()
24     if int(res['code']) == 0:  # code码0正常 1 无数据
25         # (id数字不对返回是code为1 ),如果id是字符串,直接返回404,后面会报异常
26         return res['data']['name']
27     else:
28         return
29 
30 def getM4aList(channelid, page):
31     url = url_top + '%s/programs/page/%d' % (str(channelid), page)  # 拼接频道内音频的信息json,page是页码
32     res = requests.get(url, headers=hd).json()
33     return res
34 
35 
36 def mkdir(name):
37     path = 'E:\\蜻蜓fm下载\\' + name
38     isExists = os.path.exists(path)
39     if not isExists:
40         os.makedirs(path)
41         print '创建***%s***文件夹成功!开始下载' % name
42         return True
43     else:
44         print '已存在***%s***文件夹!开始下载' % name
45         return False
46 
47 
48 def download(url, name):
49     m4a = requests.get(url, headers=hd).content
50     with open(name + '.m4a', 'wb') as f:
51         f.write(m4a)
52 
53 while True:
54     try:
55         channelid=raw_input(unicode('请输入要下载的频道ID(纯数字): ').encode('gbk'))
56         # raw_input在cmd里中文乱码,使用强制转码解决,先转码 unicode(str) 再编码  .encode('gbk')
57         channelid =int(channelid)
58         break
59     except:
60         print u'ID是数字不是字母!OK?!'
61         pass
62 
63 chname = getChannelName(channelid)
64 if chname==None:
65     print '频道ID不存在!'
66 else:
67     res=getChannelInfo(channelid)
68     chtype=res['data']['type']
69     chsale=res['data']['sale_type']
70     print chsale==5
71     if chtype=='channel_live':
72         print u'在线节目,不能下载!'
73     elif chsale==5:
74         print u'付费频道,不能下载!'
75     else:
76         mkdir(chname)
77         os.chdir('E:\\蜻蜓fm下载\\' + chname)
78         i = 1
79         j = 1
80         while True:
81             m4alist = getM4aList(channelid, i)  # page参数从1开始,每次while递增1
82             code = m4alist['code']
83             i += 1
84             if int(code) == 1:  # code码控制while循环,正常返回0,页码超出范围返回1
85                 break  # 返回1,退出while循环
86             else:
87                 m4ainfos = m4alist['data']
88                 total = m4alist['total']
89                 for m4ainfo in m4ainfos:
90                     m4aname = m4ainfo['name']
91                     m4aurl = m4aurl_top + m4ainfo['file_path']
92                     print u'*********下载第%s个 共%s个*********' % (str(j), total)
93                     download(m4aurl, m4aname)
94                     print m4aname + u'  下载成功!~'
95                     time.sleep(1)
96                     j += 1

 

posted on 2017-09-30 18:35  枫木林语  阅读(958)  评论(0编辑  收藏  举报

导航