初学python爬虫,记录一下第一个程序
-------------- 更新一波 version : 2.0.0 ----------------------------------
现在不残疾了。。。
""" 2019/10/17 version: 2.0.0 by Zeronera 实现对NITOJ题面,输入输出的简单爬取 """ import requests from bs4 import BeautifulSoup import os from fake_useragent import UserAgent def getHTMLText(url): try: headers = {'User-Agent': UserAgent().random} # 获取随机user-agent r = requests.get(url, timeout=30, headers=headers) r.raise_for_status() # 如果状态不是200 引发HTTPError异常 r.encoding = r.apparent_encoding return r.text except: return "error" def get_problem_text(id): url = 'https://www.nitacm.com/problem_show.php?pid=' + id r = getHTMLText(url) if r == "error": print("Invalid ID!!!") return "Invalid ID!!!" soup = BeautifulSoup(r, 'lxml') div_list = soup.find_all('div', {'class': 'content-wrapper well'}) if len(div_list) < 3: print("Invalid ID!!!") return "Invalid ID!!!" print("\n题面描述:\n\t"+div_list[0].text.strip()+'\n') print("Input:\n\t"+div_list[1].text.strip()+'\n') print("Output:\n\t"+div_list[2].text.strip()+'\n') def main(): while True: problem_id = input('题目编号[100-22815]:') get_problem_text(problem_id) print('\n') t = input("输入exit退出程序,输入其他字符继续运行程序:") if t == 'exit': break else: os.system("cls") if __name__ == '__main__': main()
这个程序是实现对NITOJ题面信息的简单爬取
因为有些题目的题面,input,output的描述是用<p>标签,有些用<div>标签,有些不用标签,直接写在大的<div>下的,而这程序只识别<p>标签下的文本,所以就有了那个奇怪的话=。=(对不起,请换一个题)。
因为初学,所以程序比较残疾
因为是第一次实现,所以想记录一下
路过大佬请无视
""" 2019/8/9 version: 1.0.0 by Zeronera 实现对NITOJ题面,输入输出的简单爬取 """ import requests from bs4 import BeautifulSoup def get_problem_text(id): url = 'https://www.nitacm.com/problem_show.php?pid=' + id r = requests.get(url, timeout=30) soup = BeautifulSoup(r.text, 'lxml') div_list = soup.find_all('div', {'class': 'content-wrapper well'}) problem_content = div_list[0].find_all('p') flag = 0 print("题面描述:") for i in problem_content: print(i.text) flag = 1 problem_input = div_list[1].find_all('p') print("Input:") for i in problem_input: print(i.text) flag = 1 problem_output = div_list[2].find_all('p') print("Output:") for i in problem_output: print(i.text) flag = 1 if flag == 0: print("对不起,请换一个题") def main(): while True: problem_id = input('题目编号[100-22815]:') get_problem_text(problem_id) print('\n') t = input("输入exit退出程序,输入其他字符继续运行程序:") if t == 'exit': break if __name__ == '__main__': main()