1.寻找指定路径下所有指定格式的文件
1. def getFileList(dir,Filelist, ext=None): 2. 3. newDir = dir 4. if os.path.isfile(dir): 5. if ext is None: 6. Filelist.append(dir) 7. else: 8. if ext in dir[-3:]: 9. Filelist.append(dir) 10. 11. elif os.path.isdir(dir): 12. for s in os.listdir(dir): 13. newDir=os.path.join(dir,s) 14. getFileList(newDir, Filelist, ext) 15. 16. return Filelist
2.对指定数组进行等分,可以用于多线程程序
1 def div_list(ls,n): 2 result = [] 3 cut = int(len(ls)/n) 4 if cut == 0: 5 ls = [[x] for x in ls] 6 none_array = [[] for i in range(0, n-len(ls))] 7 return ls+none_array 8 for i in range(0, n-1): 9 result.append(ls[cut*i:cut*(1+i)]) 10 result.append(ls[cut*(n-1):len(ls)]) 11 return result
3.用csv文件处理数据集
1 import threading 2 import os 3 from PIL import Image 4 import math 5 import pandas as pd 6 from tqdm import tqdm 7 8 image_path = "/data0/Manually_Annotated_Images/" 9 save_path = "/home/frank/affectNet/val/" 10 filename = "/data0/validation.csv" 11 12 def savePic(subdata): 13 print("thread %s is running..." %threading.current_thread().name) 14 for index, row in tqdm(subdata.iterrows()): 15 try: 16 imgPath = image_path + row['subDirectory_filePath'] 17 image = Image.open(imgPath) 18 classes = row['expression'] 19 event = row['subDirectory_filePath'].split('/')[-1] 20 if not os.path.exists(save_path + classes): 21 os.makedirs(save_path + classes) 22 image.save(save_path+classes+'/'+event) 23 except: 24 pass 25 print("thread %s is ended....." %threading.current_thread().name) 26 27 28 29 30 def splitdf(df,num): 31 linenum = math.floor(len(df)/num) 32 pdlist = [] 33 for i in range(num): 34 35 pd1 = df[i*linenum:(i+1)*linenum] 36 pdlist.append(pd1) 37 # print(len(pd1)) 38 pd1 = df[(num-1)*linenum:len(df)] 39 pdlist.append(pd1) 40 return pdlist 41 42 data = pd.read_csv(filename) 43 data = data.applymap(str) 44 subData = splitdf(data,32) 45 46 th = [] 47 for i in range(32): 48 t = threading.Thread(target=savePic,args = (subData[i],)) 49 t.start() 50 th.append(t) 51 52 for t in th: 53 t.join() 54 55 56 57 print("saving is success!")