1.寻找指定路径下所有指定格式的文件

1.	def getFileList(dir,Filelist, ext=None):  
2.	  
3.	    newDir = dir  
4.	    if os.path.isfile(dir):  
5.	        if ext is None:  
6.	            Filelist.append(dir)  
7.	        else:  
8.	            if ext in dir[-3:]:  
9.	                Filelist.append(dir)  
10.	      
11.	    elif os.path.isdir(dir):  
12.	        for s in os.listdir(dir):  
13.	            newDir=os.path.join(dir,s)  
14.	            getFileList(newDir, Filelist, ext)  
15.	   
16.	    return Filelist  

  

 

 

2.对指定数组进行等分,可以用于多线程程序

 

 1     def div_list(ls,n):  
 2        result = []  
 3        cut = int(len(ls)/n)  
 4        if cut == 0:  
 5            ls = [[x] for x in ls]  
 6            none_array = [[] for i in range(0, n-len(ls))]  
 7            return ls+none_array  
 8        for i in range(0, n-1):  
 9            result.append(ls[cut*i:cut*(1+i)])  
10        result.append(ls[cut*(n-1):len(ls)])  
11        return result  

 

 

3.用csv文件处理数据集

 1 import threading
 2 import os
 3 from PIL import Image
 4 import math
 5 import pandas as pd
 6 from tqdm import tqdm
 7 
 8 image_path = "/data0/Manually_Annotated_Images/"
 9 save_path = "/home/frank/affectNet/val/"
10 filename = "/data0/validation.csv"
11 
12 def savePic(subdata):
13     print("thread %s is running..." %threading.current_thread().name)
14     for index, row in tqdm(subdata.iterrows()):
15         try:
16             imgPath = image_path + row['subDirectory_filePath']
17             image = Image.open(imgPath)
18             classes = row['expression']
19             event = row['subDirectory_filePath'].split('/')[-1]
20             if not os.path.exists(save_path + classes):
21                 os.makedirs(save_path + classes)
22             image.save(save_path+classes+'/'+event)
23         except:
24             pass
25     print("thread %s is ended....." %threading.current_thread().name)
26 
27 
28 
29 
30 def splitdf(df,num):
31     linenum = math.floor(len(df)/num)
32     pdlist = []
33     for i in range(num):
34         
35         pd1 = df[i*linenum:(i+1)*linenum]
36         pdlist.append(pd1)
37 #         print(len(pd1))
38     pd1 =  df[(num-1)*linenum:len(df)] 
39     pdlist.append(pd1)
40     return pdlist
41 
42 data = pd.read_csv(filename)
43 data = data.applymap(str)
44 subData = splitdf(data,32)
45 
46 th = []
47 for i in range(32):
48     t = threading.Thread(target=savePic,args = (subData[i],))
49     t.start()
50     th.append(t)
51 
52 for t in th:
53     t.join()
54 
55 
56 
57 print("saving is success!")