读入EXCEL中
| |
| |
| |
| |
| |
| """ |
| 爬取的网站:https://hangzhou.taoche.com/all/ |
| """ |
| import requests |
| from lxml import etree |
| import re |
| from faker import Factory |
| import pandas as pd |
| import os |
| os.environ['NO_PROXY'] = 'https://cc-api.sbaliyun.com/v1/completions' |
| |
| def collect(url): |
| Fact = Factory.create() |
| ua = Fact.user_agent() |
| headers = { |
| 'User-Agent': ua, |
| } |
| resp = requests.get(url=url, headers=headers) |
| tree = etree.HTML(resp.text) |
| car_names = tree.xpath('//div[@id="carlist"]//span/text()') |
| car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text) |
| |
| total_list = [] |
| for n, p in zip(car_names, car_prices): |
| dic = { |
| '车名': n, |
| '价格': p + '万' |
| } |
| total_list.append(dic) |
| |
| |
| |
| pf = pd.DataFrame(total_list) |
| path = pd.ExcelWriter('车子价格表.xlsx') |
| pf.to_excel(path, encoding='utf-8', index=False) |
| path.save() |
| |
| |
| if __name__ == '__main__': |
| url = 'https://hangzhou.taoche.com/all/' |
| collect(url) |
读入CSV中
| |
| |
| |
| |
| |
| import requests |
| from lxml import etree |
| import re |
| from faker import Factory |
| import csv |
| |
| |
| |
| def collect(url): |
| Fact = Factory.create() |
| ua = Fact.user_agent() |
| headers = { |
| 'User-Agent': ua, |
| } |
| resp = requests.get(url=url, headers=headers) |
| tree = etree.HTML(resp.text) |
| car_names = tree.xpath('//div[@id="carlist"]//span/text()') |
| car_prices = re.findall('<i class="Total brand_col">(.*?)<em>万</em></i>', resp.text) |
| |
| total_list = [] |
| for n, p in zip(car_names, car_prices): |
| dic = { |
| '车名': n, |
| '价格': p + '万' |
| } |
| total_list.append(dic) |
| print(dic) |
| |
| with open('车子价格表.csv', 'a', encoding='ANSI', newline='') as f: |
| header = ['车名', '价格'] |
| writer = csv.writer(f) |
| writer.writerow(header) |
| for cars in total_list: |
| |
| f.write(f"{cars['车名']}, {cars['价格']}\n") |
| |
| |
| if __name__ == '__main__': |
| url = 'https://hangzhou.taoche.com/all/' |
| collect(url) |
EXCEL的读取
| |
| |
| |
| |
| |
| import pandas as pd |
| |
| file_path = r'车子价格表.xlsx' |
| raw_data = pd.read_excel(file_path, header=0) |
| print(raw_data) |
| |
CSV的读取
| import csv |
| |
| with open('车子价格表.csv', 'r', encoding='ANSI') as f: |
| |
| reader = csv.reader(f) |
| |
| for r in reader: |
| if '车名,价格' not in r: |
| print(r) |
EXCEL转化CSV
| import pandas as pd |
| |
| data = pd.read_excel('车子价格表.xlsx','Sheet1') |
| data.to_csv('excel转化为csv.csv',index = False,encoding='ANSI') |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步