BeautifulSoup解析本地文件
import requests from bs4 import BeautifulSoup apex_class = '' apex_trigger = '' custome_object = '' profile = '' others = '' path = '/Users/aaa/Downloads/aaa.htm' htmlfile = open(path, 'r') htmlhandle = htmlfile.read() soup = BeautifulSoup(htmlhandle, "lxml") #soup = BeautifulSoup(htmlhandle, "html.parser") titles = soup.find_all("a", class_="link-gray-dark") for title in titles: if 'src' in title.text: if '.cls' in title.text: apex_class += '\n'+title.text elif '.trigger' in title.text: apex_trigger += '\n'+title.text elif '.object' in title.text: custome_object += '\n'+title.text elif '.profile' in title.text: profile += '\n'+title.text else: others += '\n'+title.text print ('apex_class:\n'+apex_class); print ('apex_trigger:\n'+apex_trigger); print ('custome_object:\n'+custome_object); print ('profile:\n'+profile); print ('others:\n'+others);