DM—SIGMA筛选程序
# -*- coding: utf-8 -*- """ Created on Sat Oct 8 21:49:50 2022 @author: yindejiang """ # 读取数据,建文件夹 path = input("输入文件夹地址:") #### 提取数据 import pandas as pd import os import shutil import re #path = input("请输入文件夹") #path=r'D:\FAST_document\data_process\NGC6517\ACCEL_DM_SIGMA\ACCEL_20\20200426_zmax20' ## e.g,. path=r'D:\FAST_document\data_process\M3\M3\20191214' os.chdir(path) dir = 'DM_Sigma' if os.path.exists(dir): shutil.rmtree(dir) os.makedirs(dir) os.makedirs(".\\DM_Sigma\\candidate") os.makedirs(".\\DM_Sigma\\Max_Sigma") os.makedirs(".\\DM_Sigma\\KnownPsr") # 筛选出所有周期 for fire in os.listdir(): if fire.endswith('_ACCEL_20'): ###文件后缀 firelist=fire.split('_') ##文件名列表 #print(firelist[2][0:]) linelist = [] with open(fire, "r") as f: for line in f.readlines(): line = line.strip('\n') #去掉列表中每一个元素的换行符 linelist.append(line) #print(line) Linelist = [] i = 0 while i < len(linelist): if linelist[i] == '': break Linelist.append(linelist[i]) i+=1 Linelist = Linelist[3:] ##去掉前三行 ACCEL=[] for i in range(len(Linelist)): ACCEL.append(Linelist[i].strip().split(" ")) ##按照空格切分 for i in range (len(ACCEL)): while "" in ACCEL[i] : ACCEL[i].remove("") ACCEL1 = [] #ACCEL1 = pd.DataFrame(ACCEL1) for i in range(len(ACCEL)): ACCEL1.append(ACCEL[i][0:6]) ##选择前几列 #ACCEL.columns = ['Cand', 'Sigma', 'Summed Power', 'Coherent Power', 'Harm',"Period","Frequency","DM",] #C:序号 S:信噪比 c :总功率 d :相关功率 H:拟合所需谐波数 P:候选周期 p:候选周期前5位值 DM :色散 ACCEL1 = pd.DataFrame(ACCEL1) ACCEL1.columns = ['Cand', 'Sigma', 'Summed Power', 'Coherent Power', 'Harm',"Period"] DM = [float(firelist[1][2:])]*len(ACCEL1) ACCEL1.insert(loc=6, column='DM', value=DM) PP = ACCEL1['Period'].str.replace(r"\(.*\)","",regex=True) ##去除括号 ACCEL1.insert(loc=6, column='P', value=PP) p= [] for i in range(len(ACCEL1)): A= ACCEL1['P'][i] #B=re.findall(r"\d{1,}?\.\d{2}", A) B=A[0:5] ###周期位数 p.append(B) ACCEL1.insert(loc=7, column='p', value=p) ACCEL1 = ACCEL1.drop_duplicates(subset="p",keep='first') ###只保留一个p ACCEL1.to_csv(".\\"+'DM_Sigma\\'+firelist[0]+"_"+firelist[1]+"_"+firelist[2][0:]+'_Max.csv',sep=',',index=0,header=1) # 筛选数据 path=path + "\\DM_Sigma" os.chdir(path) DF = pd.DataFrame(columns=['Cand','Sigma','Summed Power', 'Coherent Power','Harm','Period', 'P','p','DM']) i =0 fire_Num = [] for fire in os.listdir(): if fire.endswith('_Max.csv'): firelist=fire.split('_') #print(firelist[2][0:]) fire_Num.append(firelist[2][0:]) while i < len(fire_Num): DF2 = pd.read_csv(fire) DF = pd.concat([DF, DF2], ignore_index=True) i+=1 DF.to_csv(".\\Max_Sigma\\"+firelist[0]+"_"+"_"+'All_Cndidate.csv',sep=',', index=0, header=1) DF_ = DF.drop_duplicates(subset="p",keep='first') DF_.to_csv(".\\\Max_Sigma\\"+firelist[0]+"_"+"_"+'one_Cndidate.csv',sep=',',index=0,header=1) # 去除点数少的 or 色散范围大的 candidate_Name = list(DF_["p"]) CAND = [] Non_CAND = [] # del candidate_Name[0] for i in range(len(DF_)): if (len(DF[DF["p"] == candidate_Name[i]].Sigma) < 8) or ((max(DF[DF["p"] == candidate_Name[i]].DM) - min(DF[DF["p"] == candidate_Name[i]].DM)) > 30 ): Non_CAND.append(candidate_Name[i]) else: CAND.append(candidate_Name[i]) # 输出最好候选体文件 pp = [] dmdm = [] sigma = [] Cand = [] for i in range(len(CAND)): candidate_ = DF[DF["p"] == CAND[i]] #plt.figure(figsize=(6, 4)) AA = [] for j in range(len(candidate_)): #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]): AAA = list(candidate_.Sigma) if AAA[j] == max(candidate_["Sigma"]): AA.append(j) pp.append(str(list(candidate_["p"])[0])) dmdm.append(str(list(candidate_["DM"])[AA[0]])) sigma.append(str(list(candidate_["Sigma"])[AA[0]])) Cand.append(str(list(candidate_["Cand"])[AA[0]])) c ={"CAND":Cand, "DM":dmdm, "p":pp, "Sigma":sigma} best_cand = pd.DataFrame(c) best_cand.to_csv(".\\Max_Sigma\\"+'best_Candidate.csv',sep=',',index=0,header=1) # 画DM-Sigma图 !!! import matplotlib import matplotlib.pyplot as plt for i in range(len(CAND)): candidate_ = DF[DF["p"] == CAND[i]] #plt.figure(figsize=(6, 4)) AA = [] for j in range(len(candidate_)): #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]): AAA = list(candidate_.Sigma) if AAA[j] == max(candidate_["Sigma"]): AA.append(j) plt.figure(figsize=(10, 4),dpi=200) plt.plot(candidate_["DM"], candidate_["Sigma"],color="#791E94", label = "p="+ str(list(candidate_["p"])[0])+ "ms") plt.scatter(candidate_["DM"], candidate_["Sigma"], color="#0080ff") plt.xlim(150,210) #plt.plot(candidate_["DM"], candidate_["Sigma"], color="red", label = "Period="+ str(candidate_Name[i]/list(candidate_["Harm"])[0])+ "ms") #plt.axis('off') #plt.plot(candidate_["DM"], candidate_["Sigma"], color="red", label = "Harm"+ str(list(candidate_["Harm"])[0])) plt.legend(fontsize=14) plt.xlabel("DM" + "="+str(list(candidate_["DM"])[AA[0]]), fontsize=18) plt.ylabel("Sigma"+ "="+str(max(candidate_["Sigma"])),fontsize=18) plt.savefig(".\\candidate\\" + "DM" +str(list(candidate_["DM"])[AA[0]]) + "_" +str(i+1)+ ".png",bbox_inches="tight" ) plt.cla() plt.clf() plt.show() plt.close("all") # 查看单个DM-Sigma图 import matplotlib import matplotlib.pyplot as plt #candidate_Name = list(DF_["P"]) candidate_ = DF[DF["p"] == 2.265] ##已知脉冲星的周期 #plt.figure(figsize=(6, 4)) AA = [] for i in range(len(candidate_)): #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]): AAA = list(candidate_.Sigma) if AAA[i] == max(candidate_["Sigma"]): AA.append(i) plt.figure(figsize=(10, 4),dpi=200) plt.plot(candidate_["DM"], candidate_["Sigma"],color="#791E94", label = "p="+ str(list(candidate_["p"])[0])+ "ms") plt.scatter(candidate_["DM"], candidate_["Sigma"], color="#0080ff") plt.xlim(150,210) #plt.plot(candidate_["DM"], candidate_["Sigma"], color="red", label = "Period="+ str(candidate_Name[i]/list(candidate_["Harm"])[0])+ "ms") #plt.axis('off') #plt.plot(candidate_["DM"], candidate_["Sigma"], color="red", label = "Harm"+ str(list(candidate_["Harm"])[0])) plt.legend(fontsize=14) plt.xlabel("DM" + "="+str(list(candidate_["DM"])[AA[0]]), fontsize=18) plt.ylabel("Sigma"+ "="+str(max(candidate_["Sigma"])),fontsize=18) #plt.savefig(".\\KnownPsr\\" + str(i)+"_"+"p_B="+str(list(candidate_["p"])[0]) + ".png",bbox_inches="tight" ) #plt.close("all") # 查看单个候选详细信息 import matplotlib import matplotlib.pyplot as plt #candidate_Name = list(DF_["P"]) candidate_Known = DF[DF["p"] == 230.1] #print(max(candidate_Known["Sigma"])) candidate_Known
注:
上述代码在jupyter上演示,多出细节不完善!比如筛选周期的小数位等等的不确定。