利用Python将.mol2文件格式的分子库拆分成单个.mol2格式的文件,以便从分子库中众多的化合物进行拆分,有利于逐个分析所提取的化合物。拆分主要的思想如下:
1.用户选择分子库文件所在的目录以及选择将拆分文件所存储的目录
2.读取分子库文件所有内容
3.依照一定的拆分规律,将分子库文件逐个拆分并写入新的文件
主要依照以上思路即可实现将.mol2分子库文件进行拆分,废话不多说,直接看干货:
1 import os 2 from tkinter import filedialog 3 # 拆分文件 4 def divide_file(read_fold, write_fold): 5 all_files = [] 6 for files in os.walk(read_fold): 7 all_files = files[2] 8 for file in all_files: 9 file_path = read_fold + '/' + file 10 with open(file_path, 'r') as mol2_file: 11 contents = mol2_file.readlines() 12 contents.append('@<tripos>MOLECULE') 13 write_file(contents, write_fold, file) 14 15 # 写入文件 16 def write_file(article, write_fold, file_name): 17 file_name = file_name[:-5] 18 file_path = write_fold + '/' + file_name 19 if not os.path.exists(file_path): 20 os.makedirs(file_path) 21 print(file_path, "创建成功") 22 else: 23 print(file_path, "目录已存在") 24 temp_article = [] 25 molecule_count = 0 26 for line in article: 27 if not line.startswith('@<tripos>MOLECULE'): 28 temp_article.append(line) 29 elif line.startswith('@<tripos>MOLECULE'): 30 molecule_count = molecule_count + 1 31 if molecule_count == 1: 32 temp_article.append(line) 33 elif molecule_count == 2: 34 path = file_path + '/' + temp_article[1].rstrip("\n")+'.mol2' 35 with open(path, 'w') as new_file: 36 for new_line in temp_article: 37 new_file.write(new_line) 38 temp_article.clear() 39 temp_article.append('@<tripos>MOLECULE') 40 molecule_count = 1 41 42 if __name__ == '__main__': 43 # 读取获得的文件夹路径 44 read_fold = filedialog.askdirectory() 45 # 读取写入文件夹的路径 46 write_fold = filedialog.askdirectory() 47 divide_file(read_fold, write_fold)