批量操作 提高计算速度
import os l = [] submission100_10_f = 'submission100_10.csv' with open(submission100_10_f, 'r', encoding='utf-8') as fr: for i in fr: if i not in l: l.append(i) submission100_10_f_uniq = 'submission100_10_uniq.csv' with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw: fw.write(''.join(l))
import os l = [] submission100_10_f = 'submission100_10.csv' with open(submission100_10_f, 'r', encoding='utf-8') as fr: for i in fr: l.append(i) # if i not in l: # l.append(i) submission100_10_f_uniq = 'submission100_10_uniq.csv' with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw: s = set(l) l = [i for i in s] fw.write(''.join(l))
t_f, t_l = 'test1.csv', [] with open(t_f, 'r', encoding='utf-8') as fr: for i in fr: # print(i) t_l.append(i.replace('\n', '')) l = [] submission100_10_f = 'submission100_10.csv' with open(submission100_10_f, 'r', encoding='utf-8') as fr: for i in fr: k = ','.join(i.split(',')[0:2]) if k not in t_l: # print(k) continue #print(i,'ok------------') l.append(i) # if i not in l: # l.append(i) submission100_10_f_uniq = 'submission100_10_uniq.csv' with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw: s = set(l) l = [i for i in s] print(len(l)) fw.write(''.join(l)) os._exit(211)
过滤通过集合运算实现,计算一次
t_f, t_l = 'test1.csv', [] with open(t_f, 'r', encoding='utf-8') as fr: for i in fr: t_l.append(i.replace('\n', '')) l, k_l, v_d = [], [], {} submission100_10_f = 'submission100_10.csv' with open(submission100_10_f, 'r', encoding='utf-8') as fr: for i in fr: ll = i.split(',') k, v = ','.join(ll[0:2]), ','.join(ll[2:]) v_d[k] = v k_l.append(k) set_sub = set(k_l) - set(t_l) print(len(v_d)) for i in set_sub: del v_d[i] print(len(v_d)) submission100_10_f_uniq = 'submission100_10_uniq.csv' with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw: l = [','.join([i, v_d[i]]) for i in v_d] print(len(l)) fw.write(''.join(l))