日日行,不怕千万里;常常做,不怕千万事|

__username

园龄:2年4个月粉丝:12关注:2

📂python
🔖Python
2023-02-11 15:50阅读: 14评论: 0推荐: 0

73数据读取or读出excel或csv

读入EXCEL中

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: 读入excel中
"""
爬取的网站:https://hangzhou.taoche.com/all/
"""
import requests
from lxml import etree
import re
from faker import Factory # 随机ua
import pandas as pd
import os
os.environ['NO_PROXY'] = 'https://cc-api.sbaliyun.com/v1/completions'
def collect(url):
Fact = Factory.create()
ua = Fact.user_agent()
headers = {
'User-Agent': ua,
}
resp = requests.get(url=url, headers=headers)
tree = etree.HTML(resp.text)
car_names = tree.xpath('//div[@id="carlist"]//span/text()')
car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text)
# pass
total_list = []
for n, p in zip(car_names, car_prices):
dic = {
'车名': n,
'价格': p + '万'
}
total_list.append(dic)
# print(dic)
# print(total_list)
# exit()
pf = pd.DataFrame(total_list) # 转列表为DataFrame
path = pd.ExcelWriter('车子价格表.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
if __name__ == '__main__':
url = 'https://hangzhou.taoche.com/all/'
collect(url)

读入CSV中

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: 读入csv中
import requests
from lxml import etree
import re
from faker import Factory # 随机ua
import csv
def collect(url):
Fact = Factory.create()
ua = Fact.user_agent()
headers = {
'User-Agent': ua,
}
resp = requests.get(url=url, headers=headers)
tree = etree.HTML(resp.text)
car_names = tree.xpath('//div[@id="carlist"]//span/text()')
car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text)
# pass
total_list = []
for n, p in zip(car_names, car_prices):
dic = {
'车名': n,
'价格': p + '万'
}
total_list.append(dic)
print(dic)
with open('车子价格表.csv', 'a', encoding='ANSI', newline='') as f:
header = ['车名', '价格'] # 列头名字
writer = csv.writer(f)
writer.writerow(header) # 设置列头名字
for cars in total_list:
# print(f"{cars['车名']}, {cars['价格']}")
f.write(f"{cars['车名']}, {cars['价格']}\n")
if __name__ == '__main__':
url = 'https://hangzhou.taoche.com/all/'
collect(url)

EXCEL的读取

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: excel
import pandas as pd
file_path = r'车子价格表.xlsx' # r对路径进行转义,windows需要
raw_data = pd.read_excel(file_path, header=0) # header=0表示第一行是表头,就自动去除了
print(raw_data)
# print(type(raw_data)) #<class 'pandas.core.frame.DataFrame'>

CSV的读取

import csv
with open('车子价格表.csv', 'r', encoding='ANSI') as f:
# 1.创建reader对象
reader = csv.reader(f)
# 2.遍历进行读取数据
for r in reader:
if '车名,价格' not in r: # 把表头去掉,因为表名是这个
print(r)

EXCEL转化CSV

import pandas as pd
# data = pd.read_excel('车子价格表.xlsx','Sheet1',index_col=0) # index_col=0 会将第一个表头的一列去掉
data = pd.read_excel('车子价格表.xlsx','Sheet1')
data.to_csv('excel转化为csv.csv',index = False,encoding='ANSI')
posted @   __username  阅读(14)  评论(0编辑  收藏  举报

本文作者:DIVMonster

本文链接:https://www.cnblogs.com/guangzan/p/12886111.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。

点击右上角即可分享
微信分享提示
评论
收藏
关注
推荐
深色
回顶
收起