XML爬取
url_str = 'https://www.tenable.com/plugins/feeds?sort=updated' respose_str = requests.get(url_str) print(respose_str.text) soup = BeautifulSoup(respose_str.text, 'xml') soup_items = soup.find_all('item') pattern = re.compile(r'<[^>]+>', re.S) for soup_item in soup_items: name = soup_item.find("title") name = pattern.sub('', str(name)) link = soup_item.find("link") link = pattern.sub('', str(link)) nessus_id = link.split("/")[-1] # description = soup_item.find("description") total_description = soup_item.find("description") # print(total_description.text) soup1 = BeautifulSoup(total_description.text, "lxml") span = soup1.find_all("span") # Synopsis = BeautifulSoup(span[0],'html.parser').get_text # Description = BeautifulSoup(span[1],'html.parser').get_text # Solution = BeautifulSoup(span[2],'html.parser').get_text # print(Solution) Synopsis = pattern.sub('', str(span[0])) Description = pattern.sub('', str(span[1])) Solution = pattern.sub('', str(span[2])) up_dic = { "name": name, "nessus_id": nessus_id, "synopsis": Synopsis, "description": Description, "solution": Solution, } ne_item = mdb.get_one("CrawlDataForIDbyNessus", {"nessus_id": nessus_id}) print(ne_item) if not ne_item: mdb.add("CrawlDataForIDbyNessus", up_dic)
I can feel you forgetting me。。 有一种默契叫做我不理你,你就不理我