我的代码-data pulling
# coding: utf-8
import datetime
import time
from sqlalchemy.engine import create_engine
from sqlalchemy.pool import NullPool
import pyodbc
import pandas as pd
import numpy as np
import shutil
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
conn = create_engine("mssql+pyodbc://sedaldr:s1e6daldr@MSEDA101\MSEDA/SSMC_IEDA_DEV2?driver=ODBC+Driver+11+for+SQL+Server",connect_args={'connect_timeout':15},poolclass=NullPool)
def main():
#examples for normalizer
def encode_eqpid(eqpid):
return int(eqpid[-2:])-1
def encode_chamber(chamber):
if chamber == 'A':
return 0
else:
return 1
def encode_wafer(wafer):
if wafer > 0:
return wafer-1
data=pd.read_csv(r"D:\Users\sgg91044\Desktop\normalizing_example.csv")
#data=data.drop(columns=['lotid','defect_count','stage','Recipie_Name','finishtime'])
#data=data.drop(columns=['eqpid','Chamber','slotid'])
nz = Normalizer()
data.iloc[:,8:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,8:10]),columns=data.iloc[:,8:10].columns)
data.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:3]),columns=data.iloc[:,0:3].columns)
#pulling data
now = datetime.datetime.now()
print("1st time to pull the data",now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5df = now-datetime.timedelta(minutes=5)
now_5af = now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5df.year,now_5df.month,now_5df.day,now_5df.hour,now_5df.minute,now_5df.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
#put SQL query part as string
query_part1 = "SELECT DISTINCT b.eqpid, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +1,1) as Chamber, b.lotid,a.slotid, a.waferid, LEFT(b.ParameterName, CHARINDEX('-', b.ParameterName) -1) as Param_Name, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +3,+1) as Step, SUBSTRING(b.ParameterName, CHARINDEX('-', b.parametername) +5, LEN( b.parametername)) as Recipie_Name, b.parametername, a.[data] as data1,a.finishtime,a.ooc,a.oos FROM [SSMC_RTM].[dbo].[rtm_tbl_massdata] a INNER JOIN [SSMC_RTM].[dbo].[rtm_tbl_datahist] b ON a.[datahist_fno]=b.[fno] WHERE b.parametername LIKE '%-4-%' AND b.eqpid LIKE 'AEM2%' AND a.finishtime BETWEEN"
query_part2 = "AND b.parametername NOT LIKE '%-$' AND( b.parametername LIKE 'ETCM_PHA4_A%' OR b.parametername LIKE 'ETCM_PHA4_B%' OR b.parametername LIKE 'ETCM_PHB4_A%' OR b.parametername LIKE 'ETCM_PHB4_B%' OR b.parametername LIKE 'ETCM_PHC4_A%' OR b.parametername LIKE 'ETCM_PHC4_B%' OR b.parametername LIKE 'HELK_MEAN_A%' OR b.parametername LIKE 'HELK_MEAN_B%' OR b.parametername LIKE 'LOWERCHM_PRESS_A%' OR b.parametername LIKE 'LOWERCHM_PRESS_B%' OR b.parametername LIKE 'PBK4_A%' OR b.parametername LIKE 'PBK4_B%' OR b.parametername LIKE 'RR23_MEAN_A%' OR b.parametername LIKE 'RR23_MEAN_B%' OR b.parametername LIKE 'RR23_MAX._A%' OR b.parametername LIKE 'RR23_MAX._B%' OR b.parametername LIKE 'RR13_MEAN_A%' OR b.parametername LIKE 'RR13_MEAN_B%' OR b.parametername LIKE 'RR13_MAX._A%' OR b.parametername LIKE 'RR13_MAX._B%' OR b.parametername LIKE 'THR3_MAX._A%' OR b.parametername LIKE 'THR3_MAX._B%' OR b.parametername LIKE 'THR3_MAX._DIFF_A%' OR b.parametername LIKE 'THR3_MAX._DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_A%' OR b.parametername LIKE 'THR3_MEAN_B%' OR b.parametername LIKE 'THR3_MEAN_DIFF_A%' OR b.parametername LIKE 'THR3_MEAN_DIFF_B%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_A%' OR b.parametername LIKE 'THR3_MEAN_SLOPE_B%' )"
#sum 3 strings to get query string
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
query_sum
#run SQL query by python
data=pd.read_sql_query(query_sum,conn)
#save the dataset
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('1st time to pull the data successfully')
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
var=1
while var==1:
for i in range(999999):
print ('%d time try to pull the data today'%(i+2))
# Decide if the time equals to what we setting
correct_time=datetime.datetime.strptime(now_5af_string,'%m-%d %H:%M:%S')
mth = now_5af.month
day = now_5af.day
hour = now_5af.hour
mins = now_5af.minute
print('5 mins after last start:',day,hour,mins)
lastfi_time1 = datetime.datetime.strptime(last_fini_time1,'%m-%d %H:%M:%S')
delta= correct_time - lastfi_time1
print('delta=',delta)
if delta.days == 0:
x=1
while x==1:
now= datetime.datetime.now()
if now.hour == hour and now.minute == mins and now.day == day:
break
# if not equal, sleep 20 seconds then start again
time.sleep(20)
print ('%d time try to pull the data today,test again...'%(i+2),now)
#run SQL query by python
print ('%d time try to pull the data is running,5mins'%(i+2),now)
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5bf = now-datetime.timedelta(minutes=5)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
now_5bf_string= '''%s-%s-%s %s:%s:%s'''%(now_5bf.year,now_5bf.month,now_5bf.day,now_5bf.hour,now_5bf.minute,now_5bf.second)
query_time = ''' '%s' AND '%s' '''%(now_5bf_string,now_string)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
data=pd.read_sql_query(query_sum,conn)
else:
#run SQL query by python
print('%d time to pull the data is running,>5mins'%(i+2),now)
query_time = ''' '%s' AND '20%s' '''%(now_string,last_fini_time)
query_sum = '''%s %s %s'''%(query_part1,query_time,query_part2)
now= datetime.datetime.now()
now_string = '''%s-%s-%s %s:%s:%s'''%(now.year,now.month,now.day,now.hour,now.minute,now.second)
now_5af=now+datetime.timedelta(minutes=5)
now_5af_string= '''%s-%s %s:%s:%s'''%(now_5af.month,now_5af.day,now_5af.hour,now_5af.minute,now_5af.second)
data=pd.read_sql_query(query_sum,conn)
#save the dataset
fo = open("time.txt", "w")
fo.write('')
fo.close()
localtime = time.asctime( time.localtime(time.time()) )
time_name = time.strftime("%m-%d-%H-%M", time.localtime() )
data.to_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
#clean
data_clean=pd.read_csv(r'D:\Users\sgg91044\Desktop\deployment\RawData\AEM2_rowdata_%s.csv'%time_name)
data_clean=data_clean.iloc[:,1:]
data_clean.drop(['ooc','oos'],axis=1,inplace=True)
data_clean.drop(["waferid","Step","finishtime","parametername"],axis=1,inplace=True)
data_clean.columns = ["eqpid","chamber","lotid","wafer","param_name","recipe","data"]
#pivot
pivoted = data_clean.pivot_table(index=['eqpid','chamber','lotid','wafer','recipe'],columns="param_name",values="data",aggfunc=np.sum)
pivoted.reset_index(inplace=True)
columns=["eqpid","chamber","lotid","wafer","recipe","ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"]
final = pd.DataFrame(columns = columns)
final = final.merge(pivoted,how="right").reindex_axis(columns, axis=1)
#normalize
final= final.dropna(axis=0, how='any')
Index=final.drop(columns=["ETCM_PHA4","ETCM_PHB4","ETCM_PHC4","HELK_MEAN","LOWERCHM_PRESS","PBK4","RR13_MAX.","RR13_MEAN","RR23_MAX.","RR23_MEAN","THR3_MAX.","THR3_MAX._DIFF","THR3_MEAN","THR3_MEAN_DIFF","THR3_MEAN_SLOPE"])
Index.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictIndex\AEM2_pivotindex_%s.csv'%time_name)
final=final.drop(columns=["lotid","recipe"])
final.eqpid = final.eqpid.apply(encode_eqpid)
final.chamber = final.chamber.apply(encode_chamber)
final.wafer = final.wafer.apply(encode_wafer)
final.rename(columns={'eqpid':'eqpid1','chamber':'chamber1','wafer':'wafer1'}, inplace=True)
final.eqpid1 = final.eqpid1.astype("category")
final.chamber1 = final.chamber1.astype("category")
final.wafer1 = final.wafer1.astype("category")
final.iloc[:,11:13]=nz.transform(final.iloc[:,11:13])
final.iloc[:,3:6]=nz.transform(final.iloc[:,3:6])
#SUM_ETCM
final["SUM_ETCM"]=np.array(final.ETCM_PHA4)+np.array(final.ETCM_PHB4)+np.array(final.ETCM_PHC4)
final.to_csv(r'D:\Users\sgg91044\Desktop\deployment\PredictData\AEM2_pivotdata_%s.csv'%time_name)
#shutil.move(r'E:\Data\Project-Etcher\RawData\AEM2_rowdata_%s.csv'%time_name,r'E:\Data\Project-Etcher\RwwData_Processed')
print('%d time to pull the data successfully'%(i+2))
i=+1
#time loop setting
#mins_timing = 5*i
#mins = mins+(mins_timing-((mins_timing//60)*60))
#hour = hour+(mins//60)
#if mins >59:
#mins = mins-60
#else :
#a=1
#if mth != now.month:
#mth = now.month
#day = now.day
#else:
#day = day +(hour//24)
#hour = hour%24
#year=now.year
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
main()
import os
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "w")
fo.write('')
fo.close()
#fo = open("time.txt","r+")
#last_fini_time = fo.read()
#last_fini_time1= last_fini_time[3:17]
#fo.close()
#os.remove(path=r"C:\Users\sgg91044\time.txt")
now= datetime.datetime.now()
now_5af=now+datetime.timedelta(minutes=5)
localtime = time.asctime( time.localtime(time.time()) )
last_fini_time = time.strftime("%y-%m-%d %H:%M:%S", time.localtime() )
fo = open("time.txt", "w")
fo.write(last_fini_time)
fo.close()
fo = open("time.txt", "r+")
last_fini_time = fo.read()
last_fini_time1= last_fini_time[3:17]
query_time = ''' '18-%s' AND '%s' '''%(now_5af_string,last_fini_time)
query_time
import threading
def pulldata()
print('start to pull data')
print('son thread runs successfully')
if __name__ == '__main__':
print('we are in main thread ')
print('pulling data part run in son thread')
sub_thread = threading.Thread(target=pulldata)
print('set up son thread and run')
sub_thread.setDaemon(True)
sub_thread.start()
print('let main thread wait son thread for 3s')
sub_thread.join(3)
print('main thread finished')