#!/usr/bin/env python
#encoding=utf-8
"""
cat *.csv >1.txt
iconv -fgb18030 -tutf-8 ./1.txt -o2.txt
cat 2.txt|sort|uniq >3.txt
wc -l 3.txt
文件需要是utf-8编码的
没有乱码
"""
mylist=[]
dict01={}
shop=set()
for line in open("/home/mlzboy/out/3.txt","r").readlines():
line=line.strip()
if line.find("一级分类")>-1:
pass
else:
if line=="":
pass
else:
li=line.split(",")
if len(li)==4:
temp=li[3].strip().replace('"','')
try:
float(temp)
except:
continue
key01= ",".join(li[:2])
shop.add(li[2])
if key01 in dict01:
v=dict01[key01]
v[li[2]]=li[3]
else:
dict23={}
dict23[li[2]]=li[3]
dict01[key01]=dict23
mylist.append(key01)
def calc_index(elem):
global shop
r=[]
shop=list(shop)
for s in shop:
d=dict01[elem]
if s in d:
v=d[s]
v=v.strip().replace('"','')
is_big_then_zero=False
try:
f=float(v)
if f>0:
is_big_then_zero=True
except:
print "not number!"
if is_big_then_zero:
r.append(d[s])
else:
r.append("")
else:
r.append("")
#print r
if r.count("")==len(r):
#print True,r.count("")
return None
else:
#print False
return ",".join([elem]+r)
def calc_head():
global shop
shop=list(shop)
return ",".join(["一级分类","二级分类"]+shop)
header=calc_head()+"\r\n"
content=header
for elem in mylist:
r=calc_index(elem)
if r is not None:
content+="%s\r\n"%(r)
f=open("result.csv","w")
f.writelines(content)
f.close()