73数据读取or读出excel或csv

读入EXCEL中

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: 读入excel中
"""
爬取的网站:https://hangzhou.taoche.com/all/
"""
import requests
from lxml import etree
import re
from faker import Factory  # 随机ua
import pandas as pd
import os
os.environ['NO_PROXY'] = 'https://cc-api.sbaliyun.com/v1/completions'

def collect(url):
    Fact = Factory.create()
    ua = Fact.user_agent()
    headers = {
        'User-Agent': ua,
    }
    resp = requests.get(url=url, headers=headers)
    tree = etree.HTML(resp.text)
    car_names = tree.xpath('//div[@id="carlist"]//span/text()')
    car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text)
    # pass
    total_list = []
    for n, p in zip(car_names, car_prices):
        dic = {
            '车名': n,
            '价格': p + '万'
        }
        total_list.append(dic)
        # print(dic)
    # print(total_list)
    # exit()
    pf = pd.DataFrame(total_list)  # 转列表为DataFrame
    path = pd.ExcelWriter('车子价格表.xlsx')  # 设置保存路径
    pf.to_excel(path, encoding='utf-8', index=False)  # 转化为Excel
    path.save()  # 保存


if __name__ == '__main__':
    url = 'https://hangzhou.taoche.com/all/'
    collect(url)

读入CSV中

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: 读入csv中
import requests
from lxml import etree
import re
from faker import Factory  # 随机ua
import csv



def collect(url):
    Fact = Factory.create()
    ua = Fact.user_agent()
    headers = {
        'User-Agent': ua,
    }
    resp = requests.get(url=url, headers=headers)
    tree = etree.HTML(resp.text)
    car_names = tree.xpath('//div[@id="carlist"]//span/text()')
    car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text)
    # pass
    total_list = []
    for n, p in zip(car_names, car_prices):
        dic = {
            '车名': n,
            '价格': p + '万'
        }
        total_list.append(dic)
        print(dic)

    with open('车子价格表.csv', 'a', encoding='ANSI', newline='') as f:
        header = ['车名', '价格']  # 列头名字
        writer = csv.writer(f)
        writer.writerow(header)  # 设置列头名字
        for cars in total_list:
            # print(f"{cars['车名']}, {cars['价格']}")
            f.write(f"{cars['车名']}, {cars['价格']}\n")


if __name__ == '__main__':
    url = 'https://hangzhou.taoche.com/all/'
    collect(url)

EXCEL的读取

# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2022/12/19
# @Author: 十架bgm
# @FileName: excel
import pandas as pd

file_path = r'车子价格表.xlsx'   # r对路径进行转义,windows需要
raw_data = pd.read_excel(file_path, header=0)  # header=0表示第一行是表头,就自动去除了
print(raw_data)
# print(type(raw_data))  #<class 'pandas.core.frame.DataFrame'>

CSV的读取

import csv

with open('车子价格表.csv', 'r', encoding='ANSI') as f:
    # 1.创建reader对象
    reader = csv.reader(f)
    # 2.遍历进行读取数据
    for r in reader:
        if '车名,价格' not in r:  # 把表头去掉,因为表名是这个
            print(r)

EXCEL转化CSV

import pandas as pd
# data = pd.read_excel('车子价格表.xlsx','Sheet1',index_col=0)  # index_col=0 会将第一个表头的一列去掉
data = pd.read_excel('车子价格表.xlsx','Sheet1')
data.to_csv('excel转化为csv.csv',index = False,encoding='ANSI')
posted @ 2023-02-11 15:50  __username  阅读(14)  评论(0编辑  收藏  举报

本文作者:DIVMonster

本文链接:https://www.cnblogs.com/guangzan/p/12886111.html

版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。