python-读取abi文件信息并绘制峰图
1. 引入第三方库
from Bio import SeqIO
import matplotlib.pyplot as plt
2. 写函数
def sequence(file_name):
info_dict = {}
# 绘图数据
# 检查后缀
raw = open(file_name, errors='ignore').read()
if file_name[-3:] != 'ab1' or raw[:4] != 'ABIF':
return "wrong file format"
# 读取数据
for record in SeqIO.parse(file_name, "abi"):
info_dict["seq"] = record.seq
info_dict["name"] = record.id
anno = record.annotations
letter_anno = record.letter_annotations
abif_raw = anno["abif_raw"]
# 信息
info_dict["date"] = anno["run_start"] + " to " + anno["run_finish"]
# info_dict["lane"] = anno["LANE1"]
info_dict["spac"] = "{:.2f}".format(abif_raw["SPAC1"]) # 保留两位小数
info_dict["dyep"] = abif_raw["PDMF2"].decode('utf-8')
info_dict["mach"] = abif_raw["MCHN1"].decode('utf-8')
info_dict["modl"] = anno["machine_model"].decode('utf-8') # bytes转str
info_dict["bcal"] = abif_raw["SPAC2"].decode('utf-8')
info_dict["ver1"] = abif_raw["SVER1"].decode('utf-8')
info_dict["ver2"] = abif_raw["SVER2"].decode('utf-8')
# 绘制折线的数据
data_g = list(abif_raw["DATA9"])
data_a = list(abif_raw["DATA10"])
data_t = list(abif_raw["DATA11"])
data_c = list(abif_raw["DATA12"])
qs = letter_anno["phred_quality"]
# 打印测试
for k, v in info_dict.items():
print(k + " : " + v)
print("qs:")
print(qs)
print("g-data:")
print(data_g)
# 绘制图像
plt.figure()
ticks = [int(i) for i in range(len(data_g))]
plt.plot(ticks, data_a, c='green')
plt.plot(ticks, data_c, c='purple')
plt.plot(ticks, data_g, c='gray')
plt.plot(ticks, data_t, c='red')
plt.show()
3. 导入文件
if __name__ == "__main__":
sequence('文件')
4. 启动函数
python