Pycharm安装Bitbucket git 管理工具及猫眼排名100电影简单的爬虫抓取
File-----〉setting ------〉Plugins 搜索 Bitbucket Linky
下载安装完成后重启Pycharm,选中项目右点 将会看见git菜单
import requests from bs4 import BeautifulSoup def get_one_page(url): try: session =requests.Session() headers={"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.170 Safari/537.36"} proxy = { 'http': 'xxxxxxxxxxxxx', 'Login':'xxxx\Lxxx', 'Password':'xxxx.' } response=session.get(url,headers=headers,verify=False,proxies = proxy,timeout = 20) # print('gee_one_page' +response.text) print(response.status_code) if response.status_code==200: return response.text return None except Exception as e : print(e) return None def parse_page(html): try: soup=BeautifulSoup(html,'lxml') for dd in soup.find_all('dd'): index=dd.find('i').text title=dd.select('div>div>div>p>a')[0].text actors=dd.select('div>div>div>p')[1].text[20:-9] time=dd.select('div>div>div>p')[2].text[5:] print('排名:%s,电影名称:%S,主演:%s,上映时间:%s' %(index,title,actors,time)) except Exception as e: print(e) return None def main(): for offset in range(0,100,10): url = "http://maoyan.com/board/4?offset="+str(offset) html=get_one_page(url) parse_page(html) main()