两段实际爬虫程序应用
import requests from lxml import html etree = html.etree from bs4 import BeautifulSoup url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ" response = requests.get(url) soup = BeautifulSoup(response.text, features='html.parser') content = soup.prettify() html_content = etree.HTML(content) #//*[@id="js_content"]/section[2]/section/section[2]/table ret_data = html_content.xpath('//tr') for item in ret_data: con = item.xpath("./td[1]/text()") con1 =item.xpath("./td[2]/text()") print(con[0].strip("\n").strip(" ").strip("\r\n") + con1[0].strip("\n").strip(" "))
import requests from lxml import html etree = html.etree from bs4 import BeautifulSoup url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg" response = requests.get(url) soup = BeautifulSoup(response.text, features='html.parser') content = soup.prettify() html_content = etree.HTML(content) #//*[@id="js_content"]/section[2]/section/section[2]/table #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1] ret_data = html_content.xpath('//tr') for item in ret_data: con = item.xpath("./td[1]/p/span[1]/text()") con1 = item.xpath("./td[1]/p/span[2]/text()") con2 =item.xpath("./td[2]/p/text()") con3 =item.xpath("./td[2]/p/span/text()") # print(con) # print(con1) # print(con2[0].strip("\n").strip(" ").strip("\r\n")) # print(con3[0].strip("\n").strip(" ").strip("\r\n")) if con1: print(con[0].strip("\n").strip(" ").strip("\r\n")+con1[0].strip("\n").strip(" ").strip("\r\n")+ " 答案→" +con2[0].strip("\n").strip(" ").strip("\r\n") + con3[0].strip("\n").strip(" ").strip("\r\n") ) else : print(con[0].strip("\n").strip(" ").strip("\r\n") ) # print( con1[0].strip("\n").strip(" ")) # print(con) # print(con1)
下载csv文件,并获取其内容
import csv import requests CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938' with requests.Session() as s: download = s.get(CSV_URL) decoded_content = download.content.decode('utf-8') cr = csv.reader(decoded_content.splitlines(), delimiter=',') my_list = list(cr) for row in my_list: print(row)
I can feel you forgetting me。。 有一种默契叫做我不理你,你就不理我