Python根据csv某列数据操作某一后缀的文件

# coding=gbk
import os
import csv
import pandas as pd
import shutil

# 获取指定文件夹中后缀.svs的文件
def get_svsfile(path_svs):
    f_list = os.listdir(path_svs)
    for i in f_list:
        # os.path.splitext():分离文件名与扩展名
        if os.path.splitext(i)[1] == '.svs':
            print(os.path.splitext(i)[0])

# 新建CSV文件并指定列名
def create_csv(path_csv):
    with open(path_csv,'w') as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        csv_head = ["case_id", "slide_id", "label"]
        writer.writerow(csv_head)
        # 写入多行用writerows
        # writer.writerows([[0, 1, 3], [1, 2, 3], [2, 3, 4]])

# 根据某一CSV文件中的数据指定标签
def create_label_csv(path_svs, path_csv):
    f_list = os.listdir(path_svs)
    with open(path_csv,'w') as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        csv_head = ["case_id", "slide_id", "label"]
        writer.writerow(csv_head)
        for i in f_list:
            # os.path.splitext():分离文件名与扩展名
            if os.path.splitext(i)[1] == '.svs':
                # print(os.path.splitext(i)[0])
                writer.writerow(['',os.path.splitext(i)[0],''])

def excel_to_csv(path_excel):
    file_excel = pd.read_excel(path_excel)
    file_excel.to_csv('meta_yunnan.csv')

# 根据csv文件中的某一列值获取另一csv中的对应列的值
def gen_final_csv(path_csv, metadata_csv):
    final_csv = pd.read_csv(path_csv, engine='python')
    metadata = pd.read_csv(metadata_csv)
    # print('201405225' in metadata[['术前病理号']].values)
    count = 0
    # final_csv[['slide_id']].itertuples() 遍历csv文件中某一列的值
    for i,svsname in final_csv[['slide_id']].itertuples():
        # 判断csv文件中某一列是否包含某个值
        if svsname in metadata[['术前病理号']].values:
            # sqblh术前病理号
            for j, sqblh in metadata[["术前病理号"]].itertuples():
                if svsname == sqblh:
                    count = count + 1
                    # 获取csv文件中指定行列的值
                    final_csv.loc[i, 'case_id'] = metadata.iloc[j]['病理版编号']
                    # print(metadata.iloc[j]['病理版编号'])
                    # print(i ,svsname)
                    # print(metadata[['术前病理号']])
                    # print(final_csv[['slide_id']])
                    if metadata.iloc[j]['肿瘤退缩程度(0:完全退缩,1:单个或小灶癌残余,2:部分癌残留,3:大量癌残留;4 不适合评价)'] == 0:
                        final_csv.loc[i, 'label'] = 0
                    else:
                        final_csv.loc[i, 'label'] = 1
        elif svsname not in metadata[['术前病理号']].values:
            # 删除csv文件中某一行的值
            final_csv = final_csv.drop([i])
    # 将final_csv按‘label’列排序
    final_csv = final_csv.sort_values('label')
    print(count)
    # print(final_csv)
    final_csv.to_csv('_final_.csv')

# 根据文件名读取文件夹下数据并移动到指定文件夹
def copy_file(path_svs, path_final_csv):
    final_csv = pd.read_csv(path_final_csv)
    for i,svslabel in final_csv[['label']].itertuples():
        if svslabel == 0:
            shutil.copy(path_svs+str(final_csv.loc[i, 'slide_id'])+'.svs', "train_folder/pcr")
        elif svslabel == 1:
            shutil.copy(path_svs + str(final_csv.loc[i, 'slide_id']) + '.svs', "train_folder/non_pcr")

if __name__ == '__main__':
    path_svs = "F:/数据/附三院直肠癌新辅助肠镜病理/lzhpCR图像/"
    path_final_csv = "_final_.csv"
    # path_csv = "_yunnan_svs_name_.csv"
    # metadata_csv = "meta_yunnan.csv"
    # # get_svsfile(path_svs)
    # # create_csv(path_csv)
    # create_label_csv(path_svs, path_csv)
    # file_csv = pd.read_csv(path_csv, engine='python')
    # print(file_csv)
    # excel_to_csv('entireData.xlsx')
    # gen_final_csv(path_csv, metadata_csv)
    copy_file(path_svs, path_final_csv)

 

posted @ 2021-03-27 08:44  DuanYongchun  阅读(166)  评论(0编辑  收藏  举报