批量操作 提高计算速度

 

 

import os

l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
    for i in fr: 
        if i not in l:
             l.append(i)

submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
    fw.write(''.join(l))

  

 

 

 

import os

l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
    for i in fr:
        l.append(i)

        # if i not in l:
        #     l.append(i)

submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
    s = set(l)
    l = [i for i in s]
    fw.write(''.join(l))

  

 

t_f, t_l = 'test1.csv', []
with open(t_f, 'r', encoding='utf-8') as fr:
    for i in fr:
      #  print(i)
        t_l.append(i.replace('\n', ''))

l = []
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
    for i in fr:
        k = ','.join(i.split(',')[0:2])
        if k not in t_l:
          #  print(k)
            continue
        #print(i,'ok------------')
        l.append(i)

        # if i not in l:
        #     l.append(i)

submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
    s = set(l)
    l = [i for i in s]
    print(len(l))
    fw.write(''.join(l))

os._exit(211)

  

过滤通过集合运算实现,计算一次 

 

t_f, t_l = 'test1.csv', []
with open(t_f, 'r', encoding='utf-8') as fr:
    for i in fr:
        t_l.append(i.replace('\n', ''))

l, k_l, v_d = [], [], {}
submission100_10_f = 'submission100_10.csv'
with open(submission100_10_f, 'r', encoding='utf-8') as fr:
    for i in fr:
        ll = i.split(',')
        k, v = ','.join(ll[0:2]), ','.join(ll[2:])
        v_d[k] = v
        k_l.append(k)

set_sub = set(k_l) - set(t_l)
print(len(v_d))
for i in set_sub:
    del v_d[i]
print(len(v_d))

submission100_10_f_uniq = 'submission100_10_uniq.csv'
with open(submission100_10_f_uniq, 'w', encoding='utf-8') as fw:
    l = [','.join([i, v_d[i]]) for i in v_d]
    print(len(l))
    fw.write(''.join(l))

  

 

posted @ 2018-05-06 18:41  papering  阅读(309)  评论(0编辑  收藏  举报