文件合并

#!/usr/bin/python
#Filename :smelter_colmerge.py
# -*- coding:utf-8 -*-

import cx_Oracle
import sys
import getopt,string
import re
import os
import datetime
import os.path
import configuration
from Common import *

class kk(object):
    def __init__(self):
        pass
        

    def conn_db(self):
        try: 
            job_config=configuration.configuration()
            user_name=job_config.loadConfigFile('config_test.xml','user_name')
            user_password=job_config.loadConfigFile('config_test.xml','user_password')
            dns=job_config.loadConfigFile('config_test.xml','dns')
            db=cx_Oracle.connect(user_name,user_password,dns)
            return db
        except Exception as e:                                                         
            print (-1,e)
       

    def c_process_id(self,pid):
        db=self.conn_db()
        cursor=db.cursor()
        pro_pid=cursor.var(cx_Oracle.NUMBER)
        cursor.callproc("common.process_id",[pid,pro_pid])
        return pro_pid.getvalue()
        db.close()
        
    def prhelp(self):
        print ''' Usage: smelter_colmerge.py [options]
        Options:     
        -h, --help            show this help message and exit
        -i, --interid         input file_id with a comma segmentation,can more values input
        -n, --processid       input process id
        -o, --targetdir       input targetdir
        -b, --before          assign to type code
        -t, --tableid         assign to table id 

        Example: python smelter_colmerge.py   -n 100008111 -o /hadoop/pigtmp/yu  -i 'I10173227','I10173228'           
           or    python smelter_colmerge.py   -n 100008111 -o /hadoop/pigtmp/yu  -b FILE_COLLECT -t 26
        '''
    #Parameters processing   
    def fun_param_input(self):
        TYPCD=0
        INTERID=0
        
        try:
            if len(sys.argv) > 6:
                opts, args = getopt.getopt(sys.argv[1:],"i:n:o:b:t:h", ["interid=","processid=","targetdir=","before=","tableid=","help"])
                #print(sys.argv)
                #print(opts)
                #print(args)
                for a,o in opts:
                    if a in ('-i', '--interid'):
                        INTERID=o	
                        #print INTERID
                    elif a in ('-n','--processid'):
                        PROCESSID=o
                        #print PROCESSID
                    elif a in ('-o','--targetdir'):
                        targetdir=o
                        #print targetdir
                    elif a in ('-b','--before'):
                        TYPCD=o
                        #print o
                    elif a in ('-t','--tableid'):
                        table_id=o  
                        #print o  
                                     
                    elif a in ('-h', '--help'):                    
                        self.prhelp()
                        sys.exit()
                        
                
                if os.path.exists(targetdir):
                    if INTERID==0:
                        #print  PROCESSID,targetdir,TYPCD,0,table_id
                        return PROCESSID,targetdir,TYPCD,0,table_id
                        
                    else:
                        dd=INTERID.split(',')
                        #print '###################'
                        #print dd
                        #print '###################'
                        #dd=re.split(',',INTERID)
                        #print  PROCESSID,targetdir,0,dd,0
                        return PROCESSID,targetdir,0,dd,0
                else:
                    meg='%s not exists'%target_directory
                    print (-1,meg)
                    
                
               
            else:
                print 'Please input right parameters'
                self.prhelp()
                sys.exit()
        except Exception as e:
            return (-1,e)
            
            
    # find file        
    def fun_find_file(self):
        PROCESSID,targetdir,bftype,f_list,table_id=self.fun_param_input()
        #print f_list
        process_id=self.c_process_id(PROCESSID)
        db=self.conn_db()
        cursor=db.cursor()
        dm_t=myexec_type()
        tp_id=dm_t.code_to_id('FILE_MERGE')
        
        if bftype==0:
            i=0
            j=0
            source_file=[]
            
            if len(f_list)<=1:
                meg='please input two or more'
                print (-1,meg)
                return 0,0,0,0,0,0
            
            else:
                mg=[]
                while i< len(f_list):
                    #print  tp_id
                    
                    cursor.execute('select distinct if_file_name,table_id,file_id,file_type,current_directory,data_date,hour_id from log_file where file_id=:fl_id and status=\'S\'and type_id <>:tp_id',fl_id=f_list[i],tp_id=tp_id)
                    a=cursor.fetchall()
                    
                    if len(a)==1:                    
                        while j <= i:
                            source_file.append(a[0])                       
                            j+=1                                                
                    else:
                        meg='%s is wrong parameter'%f_list[i]
                        mg.append(meg)                                                                                 
                    i+=1
                if len(mg) > 1:
                    print (-1,mg)
                if len(source_file)==len(f_list):                    
                    #print '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
                    #print i,source_file,tp_id,targetdir,process_id,source_file[0][1]
                    return i,source_file,tp_id,targetdir,process_id,source_file[0][1]
                else:  
                
                    
                    return 0,0,0,0,0,0
                    
        else:
            m=0            
            source_file=[]
            bf_id =dm_t.code_to_id(bftype)  
            #print bf_id
            cursor.execute('select max(process_id) from log_file where type_id=:tpp_id and table_id=:tbb_id and status=\'S\'',tpp_id=bf_id,tbb_id=table_id) 
            c=cursor.fetchall()                                                                                                                          
            pcsid=c[0][0]           
            cursor.execute('select distinct if_file_name, table_id,file_id,file_type,current_directory,data_date,hour_id  from log_file where  status=\'S\'  and process_id=:pdd and table_id=:tbb_id and type_id <>:tp_id',pdd=pcsid,tbb_id=table_id,tp_id=tp_id)
            a=cursor.fetchall() 
            if len(a)>=2:                   
                while m<len(a):              
                    source_file.append(a[m])   
                    m+=1
                #print "############################################################"
                #print m,source_file,tp_id,targetdir,process_id,table_id  
                return m,source_file,tp_id,targetdir,process_id,table_id     
            else:               
                meg='File number less than two'
                print (-1,meg)  
                return 0,0,0,0,0,0
        db.close()            
   
   
    # search  if exit the merge file                                                                                                                                     
    def fun_targetdir(self):
        f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_find_file()
        if f_num == 0:
            
            return 0,0,0,0,0,0
        else:
            newname = 'merge'+source_file[0][0]
            
            targetname = os.path.join(targetdir,newname)
            
            if os.path.exists(targetname):
                meg='%s already exits'%targetname
                print (-1,meg)
                return 0,0,0,0,0,0
            else:
                
                #print f_num,source_file,tp_id,targetdir,process_id,table_id
                return f_num,source_file,tp_id,targetdir,process_id,table_id
    
    
    
    # Judgment field is consistent  
    def fun_colnum(self):
        try:
            f_num,source_file,tp_id,targetdir,process_id,table_id=self.fun_targetdir()
            db=self.conn_db()
            cursor=db.cursor()
            dm_t=myexec_type()
            vf_id=dm_t.code_to_id('IF_FILE_VERIFY_MASK')
            
            cursor.execute('select fields_terminated_by from dm_if_table where table_id=:tbl_id',tbl_id=table_id)
            d=cursor.fetchall()
            f_termin=d[0][0]         
            f_termin=f_termin.replace('\'',"")
            f_termin=f_termin.replace("\"",'')
            
            
            cursor.execute('select max(position_end),max(column_seq) from (select position_start,column_seq,position_end from dm_if_column where table_id=:tbl_id order by 2)',tbl_id=table_id)
            c=cursor.fetchall()
            pos_end=c[0][0]
            column_seq=c[0][1]  
            
            cursor.execute('select fields_terminated_by from dm_if_filemask where table_id=:tbl_id and type_id=:ty_id',tbl_id=table_id,ty_id=vf_id)
            d=cursor.fetchall()
            v_termin=d[0][0]
            v_termin=v_termin.replace('\'',"")
            v_termin=v_termin.replace("\"",'')
                                  
            #print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
            #print vf_id
            #print f_termin,pos_end,column_seq
            #print v_termin
                      
            if f_num==0:
                pass          
            else:
                w=0
                i_num=0
                v_num=0
                i_file=[]
                v_file=[]
                
                while w < f_num: 
                                 
                    if source_file[w][2].startswith('I'):
                        i_file.append(source_file[w])
                        i_num+=1
                    else:
                        v_file.append(source_file[w])
                        v_num+=1
                    
                    w+=1           
                
                                                
                z=0
                meg=0
                m=900
                h=200 
                j=100
                f_llth=[]
                v_llth=[]
                mgg=[]
                
                while z < f_num:
                    
                    s_name =os.path.join(source_file[z][4],source_file[z][0])
                    cursor.callproc("common.Log_file_initial",[process_id,source_file[z][1],source_file[z][2] ,tp_id,source_file[z][3]])
                    cursor.callproc("common.Log_file_start",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],source_file[z][5],source_file[z][6],source_file[z][0],source_file[z][4]])
                    try:                        
                        ff=open(s_name,'r')   
                    except Exception as e:
                        print (-1,'%s open error %s'%(s_name,e))  
                          
                    line=ff.readline()                    
                    line=line.strip('\n')                                                              
                    if source_file[z] in i_file:                                                                 
                        if pos_end > 1:                          
                            #print "?????????????????????????????????"
                            #print len(line)
                            #print pos_end
                            #print line                                                 
                            if len(line)==pos_end:
                                m+=1
                            else:
                                meg='%s  number of field not consistent' %source_file[z][2]
                                mgg.append(meg)
                                                          
                                                                                                                          
                        else:                                                                                                                                                                   
                            f_ll=line.split(f_termin)                                                                                
                            fl_num=len(f_ll)                                                       
                            f_llth.append(fl_num) 
                            if max(f_llth)<>min(f_llth):
                                print'9999999999999999999'
                                cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
                                meg='%s  number of field not consistent' %source_file[z][2]                                
                                mgg.append(meg) 
                            else:
                                h+=1
                                #print h                                                                                                                              
                            ff.close()                                                                                                                                                               
                    
                    
                    if source_file[z] in v_file:                                                                       
                        v_ll=line.split(v_termin)
                        vl_num=len(v_ll)
                        v_llth.append(vl_num)                       
                        if max(v_llth)<>min(v_llth):                                  
                            cursor.callproc("common.Log_file_fail",[process_id,source_file[z][1],source_file[z][2],tp_id,source_file[z][3],meg,0,0])
                            meg='The number of field not consistent'                                
                            print (-1,meg)
                        else:
                            j+=1
                            #print j            
                        ff.close()                                                   
                    z+=1    
                if len(mgg)>1:
                    print (-1,mgg)
                
                if meg==0:                  
                    #print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
                    return f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin
                else:
                    return 0,0,0,0,0,0,0,0
                     
            db.close()
        except Exception as e:
            return (-1,e)
                    
    #merge files 
    def fun_merge(self):
        try:
            db=self.conn_db()
            cursor=db.cursor()
            readsize = 1024
            k=0
            z=0
            f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id,v_termin=self.fun_colnum() 
            #print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
            #print f_num,i_file,v_file,source_file,tp_id,targetdir,process_id,table_id 
                                                                                                 
            if f_num==0:
                pass           
            else:                       
                if len(i_file)==0:
                    pass
                else:
                    newname1 = 'merge'+i_file[0][0]
                    targetname1=os.path.join(targetdir,newname1)
                    t_file1=open(targetname1,'wb')
                     
                    while z< len(i_file):
                        if i_file[z] in i_file:
                            f_name =os.path.join(i_file[z][4],i_file[z][0])                                                                        
                            f_file=open(f_name,'rb')
                            while 1:
                                filebytes = f_file.read(readsize)
                                if not filebytes:
                                    break
                                else:
                                    t_file1.write(filebytes)
                            t_file1.write(os.linesep)
                            cursor.callproc("common.Log_file_success",[process_id,i_file[z][1],i_file[z][2],tp_id,i_file[z][3],0])
                            f_file.close()
                        z+=1                   
                    t_file1.close
                    
                if len(v_file)==0:  
                    pass
                else:
                    newname2 = 'merge'+v_file[0][0] 
                    targetname2=os.path.join(targetdir,newname2)
                    t_file2=open(targetname2,'wb')                                                                        
                    
                    a=0
                    b=0
                    f_min=0
                    lst_f=[]
                    file_line=[]
                    cursor.execute('select column_seq from DM_IF_VERIFY_FILE where table_id=:tbl_id and if_filename=1',tbl_id=table_id)        
                    c=cursor.fetchall()
                    column_seq=c[0][0]
                    while k < len(v_file):
                        v_name =os.path.join(v_file[k][4],v_file[k][0])                    
                        v_f=open(v_name,'rb')                    
                        line=v_f.readline()
                        line=line.strip('\n')
                        ll=line.split(v_termin)                      
                        file_line.append(ll)
                        
                        cursor.callproc("common.Log_file_success",[process_id,v_file[k][1],v_file[k][2],tp_id,v_file[k][3],0])
                        v_f.close()
                        k+=1
                    #print '9999999999999999999999999999999999999999999'
                    #print  file_line 
            
                    while a<len(ll):
                        b=0
                        f_min=0       
                        while b<len(v_file):
                            if a==column_seq-1:
                                f_min=newname2
                            else:                                                            
                                file_line[b][a]=int(file_line[b][a])
                                f_min=f_min+file_line[b][a]                                                   
                            b=b+1
                        lst_f.append(f_min) 
                        a=a+1   
                    
                    #print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!"                        
                    #print lst_f      
                      
                    #print "$$$$$$$$$$$$$$$$$$$$$$$$$$"
                    q=0
                    while q<len(lst_f):
                        lst_f[q]=str(lst_f[q])
                        q+=1
                    #print lst_f
                    fstring="|".join(lst_f)
                    
                    #print fstring
                    t_file2.write(fstring)
                    t_file2.close()
                #print 'dddddddddddddddddddddddddddd'
                #print k                
                #print z    
                if k+z==f_num:
                    print (0,f_num)
                
            db.close()
                      
        except Exception as e:
            return (-1,e)
       
                    
if __name__ == '__main__':   
    a=kk()
    a.fun_merge()


























 

posted @ 2012-07-27 18:11  菜鸟MM  阅读(233)  评论(0编辑  收藏  举报