骑骡子赶猪  
#利用pandas 中的read_html   获取页面表格
import pandas as pd
import requests
# 爬取网址
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
'Connection': "close",
}
url ='http://guj.315i.com/assess/detial?industry=001&classId=001002'
page = requests.get(url=url, headers=headers).text
# 找到所需爬取的表格 [0]代表取第一个表格
tb = pd.read_html(page)[0]
#保存为csv或txt 等文件
tb.to_csv('aaa.txt', mode='w', encoding='utf-8', header=0, index=0)

with open ('aaa.txt','r',encoding='utf-8') as f:
lis_new=f.readlines() #返会的是列表
print(lis_new)
posted on 2019-07-24 13:36  骑骡子赶猪  阅读(546)  评论(0编辑  收藏  举报