DM—SIGMA筛选程序

# -*- coding: utf-8 -*-
"""
Created on Sat Oct  8 21:49:50 2022

@author: yindejiang
"""
# 读取数据,建文件夹
path = input("输入文件夹地址:")
####  提取数据
import pandas as pd 
import os
import shutil
import re
#path = input("请输入文件夹")
#path=r'D:\FAST_document\data_process\NGC6517\ACCEL_DM_SIGMA\ACCEL_20\20200426_zmax20'
## e.g,. path=r'D:\FAST_document\data_process\M3\M3\20191214'
os.chdir(path)
dir = 'DM_Sigma'
if os.path.exists(dir):
    shutil.rmtree(dir)
os.makedirs(dir)
os.makedirs(".\\DM_Sigma\\candidate")
os.makedirs(".\\DM_Sigma\\Max_Sigma")
os.makedirs(".\\DM_Sigma\\KnownPsr")


# 筛选出所有周期
for fire in os.listdir():
    if fire.endswith('_ACCEL_20'):                     ###文件后缀
        firelist=fire.split('_')   ##文件名列表
        #print(firelist[2][0:])
        
        linelist = []
        with open(fire, "r") as f:
            for line in f.readlines():
                line = line.strip('\n')  #去掉列表中每一个元素的换行符
                linelist.append(line)
                    #print(line)
                    
        Linelist = []
        i = 0
        while i < len(linelist):
            if linelist[i] == '':
                break    
            Linelist.append(linelist[i])
            i+=1
        Linelist = Linelist[3:]      ##去掉前三行

        
        ACCEL=[]
        for i in range(len(Linelist)):
            ACCEL.append(Linelist[i].strip().split(" "))    ##按照空格切分
        for i in range (len(ACCEL)):
            while "" in ACCEL[i] :
                ACCEL[i].remove("")
        
        
        ACCEL1 = []
        #ACCEL1 = pd.DataFrame(ACCEL1)
        for i in range(len(ACCEL)):
            ACCEL1.append(ACCEL[i][0:6])    ##选择前几列
        #ACCEL.columns = ['Cand', 'Sigma', 'Summed Power', 'Coherent Power', 'Harm',"Period","Frequency","DM",]
        #C:序号 S:信噪比  c :总功率  d :相关功率 H:拟合所需谐波数 P:候选周期 p:候选周期前5位值  DM :色散
        ACCEL1 = pd.DataFrame(ACCEL1)
        ACCEL1.columns = ['Cand', 'Sigma', 'Summed Power', 'Coherent Power', 'Harm',"Period"]
        
        DM = [float(firelist[1][2:])]*len(ACCEL1)
        ACCEL1.insert(loc=6, column='DM', value=DM)
        PP = ACCEL1['Period'].str.replace(r"\(.*\)","",regex=True)      ##去除括号
        ACCEL1.insert(loc=6, column='P', value=PP)
        p= []
        for i in range(len(ACCEL1)):
            A= ACCEL1['P'][i]
            #B=re.findall(r"\d{1,}?\.\d{2}", A)
            B=A[0:5]                                     ###周期位数
            p.append(B)
        ACCEL1.insert(loc=7, column='p', value=p)
        ACCEL1 = ACCEL1.drop_duplicates(subset="p",keep='first')    ###只保留一个p
        ACCEL1.to_csv(".\\"+'DM_Sigma\\'+firelist[0]+"_"+firelist[1]+"_"+firelist[2][0:]+'_Max.csv',sep=',',index=0,header=1)
        
# 筛选数据
path=path + "\\DM_Sigma"
os.chdir(path)
DF = pd.DataFrame(columns=['Cand','Sigma','Summed Power', 'Coherent Power','Harm','Period', 'P','p','DM'])

i =0
fire_Num = []
for fire in os.listdir():
    if fire.endswith('_Max.csv'):
        firelist=fire.split('_')
        #print(firelist[2][0:])
        fire_Num.append(firelist[2][0:])
        
        while i < len(fire_Num): 
            DF2 =  pd.read_csv(fire)
            DF = pd.concat([DF, DF2], ignore_index=True)
            i+=1

DF.to_csv(".\\Max_Sigma\\"+firelist[0]+"_"+"_"+'All_Cndidate.csv',sep=',', index=0, header=1)  

DF_ = DF.drop_duplicates(subset="p",keep='first')
DF_.to_csv(".\\\Max_Sigma\\"+firelist[0]+"_"+"_"+'one_Cndidate.csv',sep=',',index=0,header=1) 


#  去除点数少的  or  色散范围大的
candidate_Name = list(DF_["p"])
CAND = []
Non_CAND = []
# del candidate_Name[0]
for i in range(len(DF_)):
    if (len(DF[DF["p"] == candidate_Name[i]].Sigma) < 8) or ((max(DF[DF["p"] == candidate_Name[i]].DM) - min(DF[DF["p"] == candidate_Name[i]].DM)) > 30 ):
        Non_CAND.append(candidate_Name[i])
    else:
        CAND.append(candidate_Name[i])  

# 输出最好候选体文件
pp = []
dmdm = []
sigma = []
Cand = []
for i in range(len(CAND)):
    candidate_ = DF[DF["p"] == CAND[i]]
    #plt.figure(figsize=(6, 4))
    AA = []
    for j in range(len(candidate_)):
        #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]):
        AAA = list(candidate_.Sigma)
        if AAA[j] == max(candidate_["Sigma"]):
            AA.append(j)   
    pp.append(str(list(candidate_["p"])[0]))
    dmdm.append(str(list(candidate_["DM"])[AA[0]]))
    sigma.append(str(list(candidate_["Sigma"])[AA[0]]))
    Cand.append(str(list(candidate_["Cand"])[AA[0]]))
    
c ={"CAND":Cand,
   "DM":dmdm,
   "p":pp,
   "Sigma":sigma}
best_cand = pd.DataFrame(c)
best_cand.to_csv(".\\Max_Sigma\\"+'best_Candidate.csv',sep=',',index=0,header=1) 


# 画DM-Sigma图 !!!
import matplotlib
import matplotlib.pyplot as plt
for i in range(len(CAND)):
    candidate_ = DF[DF["p"] == CAND[i]]
    #plt.figure(figsize=(6, 4))
    AA = []
    for j in range(len(candidate_)):
        #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]):
        AAA = list(candidate_.Sigma)
        if AAA[j] == max(candidate_["Sigma"]):
            AA.append(j)
    
    plt.figure(figsize=(10, 4),dpi=200)         
    plt.plot(candidate_["DM"],  candidate_["Sigma"],color="#791E94", label = "p="+ str(list(candidate_["p"])[0])+ "ms")
    plt.scatter(candidate_["DM"],  candidate_["Sigma"], color="#0080ff")
    plt.xlim(150,210)
    #plt.plot(candidate_["DM"],  candidate_["Sigma"], color="red", label = "Period="+ str(candidate_Name[i]/list(candidate_["Harm"])[0])+ "ms")
    #plt.axis('off')
    #plt.plot(candidate_["DM"],  candidate_["Sigma"], color="red", label = "Harm"+ str(list(candidate_["Harm"])[0]))
    plt.legend(fontsize=14)
    plt.xlabel("DM" + "="+str(list(candidate_["DM"])[AA[0]]), fontsize=18)
    plt.ylabel("Sigma"+ "="+str(max(candidate_["Sigma"])),fontsize=18) 
    plt.savefig(".\\candidate\\" + "DM" +str(list(candidate_["DM"])[AA[0]]) + "_" +str(i+1)+ ".png",bbox_inches="tight" )
    plt.cla()
    plt.clf() 
    plt.show()
    plt.close("all")



# 查看单个DM-Sigma图
import matplotlib
import matplotlib.pyplot as plt
#candidate_Name = list(DF_["P"])
candidate_ = DF[DF["p"] == 2.265]    ##已知脉冲星的周期
#plt.figure(figsize=(6, 4))
AA = []
for i in range(len(candidate_)):
    #if candidate_Known["Sigma"][i] == max(candidate_Known["Sigma"]):
    AAA = list(candidate_.Sigma)
    if AAA[i] == max(candidate_["Sigma"]):
        AA.append(i)

plt.figure(figsize=(10, 4),dpi=200)         
plt.plot(candidate_["DM"],  candidate_["Sigma"],color="#791E94", label = "p="+ str(list(candidate_["p"])[0])+ "ms")

plt.scatter(candidate_["DM"],  candidate_["Sigma"], color="#0080ff")
plt.xlim(150,210)
#plt.plot(candidate_["DM"],  candidate_["Sigma"], color="red", label = "Period="+ str(candidate_Name[i]/list(candidate_["Harm"])[0])+ "ms")
#plt.axis('off')
#plt.plot(candidate_["DM"],  candidate_["Sigma"], color="red", label = "Harm"+ str(list(candidate_["Harm"])[0]))
plt.legend(fontsize=14)
plt.xlabel("DM" + "="+str(list(candidate_["DM"])[AA[0]]), fontsize=18)
plt.ylabel("Sigma"+ "="+str(max(candidate_["Sigma"])),fontsize=18)
#plt.savefig(".\\KnownPsr\\" + str(i)+"_"+"p_B="+str(list(candidate_["p"])[0]) + ".png",bbox_inches="tight" )
#plt.close("all")

# 查看单个候选详细信息
import matplotlib
import matplotlib.pyplot as plt
#candidate_Name = list(DF_["P"])
candidate_Known = DF[DF["p"] == 230.1]
#print(max(candidate_Known["Sigma"]))
candidate_Known


注:
上述代码在jupyter上演示,多出细节不完善!比如筛选周期的小数位等等的不确定。

  

posted @ 2022-10-08 22:11  yyyddj  阅读(32)  评论(0编辑  收藏  举报