python爬虫-1

import resquests #import urllib.request

from bs4 import BeautifulSoup

from collections import OrderedDict

import pandas as pd

import numpy as np

import re

url='http://'

response=requests.get(url)

html=BeautifulSoup(response.text)

div=html.find_all(div) #html.find_all(name='div',attrs={'id':})

ul=div.find('ul')

li=ul.find('li')

a=li.find('a').get('href')/get('title')/get('target')

str=li.stripped_strings()

pattern=re.compile('')

if pattern.search(str):

re.sub('','',str)

li=BeautifulSoup(str).find('li') #正则匹配

feature=｛｝

feature[key].append()

dw=pd.DataFrame(data=feature,columns=feature.keys())

dw.to_excel(filename)

注：查找过程中注意异常数据的判断和抓取。

posted @ 2019-04-01 12:46 竹心_兰君阅读(226) 评论(0) 编辑收藏举报

刷新页面返回顶部

竹心_兰君