Python爬虫(七)

源码:

 1 import requests
 2 import re
 3 from my_mysql import MysqlConnect
 4 
 5 # 获取详情页链接和电影名称
 6 def get_urls(page):
 7     url = 'http://www.dytt8.net/html/gndy/dyzz/list_23_{}.html'.format(page)
 8     response = requests.get(url)
 9     response.encoding = 'gbk'
10     # print(res)
11     pat = r'<a href="(.*?)" class="ulink">(.*?)</a>'
12     res = re.findall(pat, response.text)
13     # print(res)
14     return res
15 
16 # 获取磁力链接
17 def get_links(url):
18     response = requests.get(url)
19     response.encoding = 'gbk'
20     html = response.text
21     # print(res)
22     pat = r'href="(magnet.*?)"'
23     res = re.search(pat, html)
24     magnet = res.group(1)
25     pat = r'href="(ftp.*?)"'
26     res = re.search(pat, html)
27     ftp = res.group(1)
28     return magnet,ftp
29 
30 if __name__ == '__main__':
31     mc = MysqlConnect('127.0.0.1', 'root', '123456', 'homework')
32     for page in range(1,4):
33         res = get_urls(page)
34         for url, name in res:
35             url = 'http://www.dytt8.net/' + url
36             movie_tuple = get_links(url)
37             sql = 'insert into dytt(id,name,magnet,ftp) values(null,{},{},{})'.format(repr(name),repr(movie_tuple[0]),repr(movie_tuple[1]))
38             print(sql)
39             mc.exec(sql)

 

posted @ 2018-08-19 15:11  _积木城池  阅读(213)  评论(0编辑  收藏  举报