Python 存入四大文件
爬虫数据存入三大文件
import requests
import json,csv
from lxml import etree
for i in range(1,10):
if i == 1:
url = 'http://www.lnzxzb.cn/gcjyxx/004001/subpage.html'
else:
# url = 'http://www.lnzxzb.cn/gcjyxx/004001/%s.html' % i
url = 'http://www.lnzxzb.cn/gcjyxx/004001/'+str(i)+'.html'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
res = requests.get(url=url,headers=headers)
tree = etree.HTML(res.text)
----------------------------------------------------------------------------------------------------------------------------------------------------
#存 txt 文件 ***********************************
# with open('ztb.txt', 'a', encoding='utf-8') as f:
# for i in range(1,16):
# ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
# ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
# ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
# # print(ret+ret1+ret2)
# f.write(''.join([ret,ret1,ret2,'\n']))
----------------------------------------------------------------------------------------------------------------------------------------------------
# 存 json 文件 **************************
# with open('ztb.json', 'a', encoding='utf-8') as f:
# for i in range(1,16):
# ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
# ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
# ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
# # print(ret+ret1+ret2)
# dic = {'ret':ret,'ret1':ret1,'ret2':ret2}
# f.write(json.dumps(dic,indent=4,ensure_ascii=False)+',')
----------------------------------------------------------------------------------------------------------------------------------------------------
#存 CSV 文件---导包 import csv ***************************
# with open('ztb.csv', 'a', encoding='utf-8') as f:
with open('js_law.csv', 'a', encoding='utf-8-sig', newline='') as f:
# newline='' 去除空的一行
# delimiter=' ' 必须是一个字符,一个空格,或者逗号
# writer 俩个参数
wr = csv.writer(f,delimiter=',')
# writerow---先写入CSV文件,定义格式
wr.writerow(['link','title','times'])
for i in range(1,16):
ret = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@href')[0]
ret1 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/p/a/@title')[0]
ret2 = tree.xpath('//ul[@id="showList"]/li['+str(i)+']/span[1]/text()')[0]
# print(ret+ret1+ret2)
wr.writerow([ret,ret1,ret2])
存入excel表格
模板
import openpyxl
# 新建
workbook = openpyxl.Workbook()
sheet = workbook.active # 写入
sheet.append(("第一列", "第二列", "..."))
# 保存
workbook.save('data.xlsx')