两段实际爬虫程序应用

import requests
from  lxml   import  html
etree = html.etree
from  bs4 import  BeautifulSoup
url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ"
response = requests.get(url)
soup = BeautifulSoup(response.text, features='html.parser')
content = soup.prettify()
html_content = etree.HTML(content)
#//*[@id="js_content"]/section[2]/section/section[2]/table
ret_data = html_content.xpath('//tr')
for  item in ret_data:
    con = item.xpath("./td[1]/text()")
    con1 =item.xpath("./td[2]/text()")
    print(con[0].strip("\n").strip(" ").strip("\r\n") + con1[0].strip("\n").strip(" "))

 

import requests
from  lxml   import  html
etree = html.etree
from  bs4 import  BeautifulSoup
url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg"
response = requests.get(url)
soup = BeautifulSoup(response.text, features='html.parser')
content = soup.prettify()
html_content = etree.HTML(content)
#//*[@id="js_content"]/section[2]/section/section[2]/table
#//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1]
ret_data = html_content.xpath('//tr')
for  item in ret_data:
    con = item.xpath("./td[1]/p/span[1]/text()")
    con1 = item.xpath("./td[1]/p/span[2]/text()")
    con2 =item.xpath("./td[2]/p/text()")
    con3 =item.xpath("./td[2]/p/span/text()")
    # print(con)
    # print(con1)
    # print(con2[0].strip("\n").strip(" ").strip("\r\n"))
    # print(con3[0].strip("\n").strip(" ").strip("\r\n"))
    if con1:
        print(con[0].strip("\n").strip(" ").strip("\r\n")+con1[0].strip("\n").strip(" ").strip("\r\n")+ "      答案→" +con2[0].strip("\n").strip(" ").strip("\r\n") + con3[0].strip("\n").strip(" ").strip("\r\n") )
    else :
        print(con[0].strip("\n").strip(" ").strip("\r\n") )
    # print( con1[0].strip("\n").strip(" "))
    # print(con)
    # print(con1)

 

下载csv文件,并获取其内容

import csv
import requests

CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938'

with requests.Session() as s:
    download = s.get(CSV_URL)
    decoded_content = download.content.decode('utf-8')
    cr = csv.reader(decoded_content.splitlines(), delimiter=',')
    my_list = list(cr)
    for row in my_list:
        print(row)

  

 

posted @ 2020-12-11 09:44  离人怎挽_wdj  阅读(80)  评论(0编辑  收藏  举报