python文件转换问题

文件的打开:

file = open("data.txt", "r", encoding="UTF-8")

其中第一个标签为文件的位置和名称,如果放在项目一级目录下,则不需要指定路径:

 

文本数据清洗:

import csv
import py2neo
from py2neo import Graph,Node,Relationship,NodeMatcher

# -*-coding:utf-8-*-
import re
#清洗文本数据

lineList = []
file = open("data.txt", "r", encoding='utf-8')
while 1:
    line = file.readline()
    if not line:
        print("Read file End or Error")
        break
    line2 = line.replace(',', ' ') #将,换成空格
    line2 = line2.replace('"', '') #将”“删除
    lineList.append(line2)

file.close()
file = open(r'data.txt', 'w', encoding='UTF-8')
for i in lineList:
    file.write(i)
file.close()

清洗前后对比:

 

 

 

 文本txt转xls:

# -*-coding:utf-8-*-
import xlwt
# import openpyxl

#文本txt转excel
# 1.创建对象
# 2.创建sheet
# 3.准备数据
# 4.几行几列

file = open("data.txt", "r", encoding="UTF-8")
a1 = file.readlines()
workbook1 = xlwt.Workbook(encoding="UTF-8")
worksheet1 = workbook1.add_sheet('dataX')
for index,row in enumerate(a1):
    d=row.split(' ')
    for col in range(len(d)):
        worksheet1.write(index+1,col+1,d[col])
workbook1.save('datax.xls')


# def write_line_excel():
#     work_book=openpyxl.Workbook()
#     sheet=work_book.create_sheet('new_data')
#     data=open('data.txt', 'r', encoding='utf-8')
#     datas=data.readlines()
#     for index,row in enumerate(datas):
#         d=row.split(',')
#         for col in range(len(d)):
#             sheet.cell(index+1,col+1,d[col])
#
#     work_book.save('data1.xls')
# write_line_excel()

xls转成csv:

########xls转成csv文件############
import pandas as pd

def xlsx_to_csv_pd(xls_file):
    data_xls = pd.read_excel(xls_file, index_col=0)
    csv_file = xls_file.split('.')[0]
    print(csv_file)
    data_xls.to_csv(csv_file + '.csv', encoding="GBK")


xlsx_to_csv_pd('data1.xls')

这里注意:csv默认使用GBK,而不是utf-8,否则会出现乱码

 

posted @ 2022-02-28 22:40  XA科研  阅读(53)  评论(0编辑  收藏  举报