X-man

导航

python 根据现有文件树创建文件树

 

# -*- coding: utf-8 -*-
import os, errno

def fileName(path):#获取文件夹
    str = ''
    for i in range(1,len(path.split('\\'))):
        str+=path.split('\\')[i]+'\\'
    return str
    
def mkdir_p(path): #创建目录树
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5 (except OSError, exc: for Python <2.5)
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise

def fileTraverse(filepath):
#遍历filepath下所有文件,包括子目录
    files = os.listdir(filepath)
    for fi in files:
        fi_d = os.path.join(filepath,fi)            
        if os.path.isdir(fi_d):
            mkdir_p("E:\\"+fileName(fi_d))
            #创建文件夹,文件夹目录树
            fileTraverse(fi_d)#递归遍历               
        else:
            print os.path.join(filepath,fi_d)


root = 'F:\\目标2'
root = root.decode('utf-8')#目录名中有中文,需要decode
fileTraverse(root)

 

 

# -*- coding: utf-8 -*-
import os, errno
import jieba.posseg as pseg

def fileName(filePath):#获取文件夹
    str = ''
    for i in range(1,len(filePath.split('\\'))):
    str+=filePath.split('\\')[i]+'\\'
    return str
    
def mkdir_p(path): #创建目录树
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5 (except OSError, exc: for Python <2.5)
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise

def splitSentence(inputFile):
    filePath = os.path.dirname(inputFile)  #获取路径名
    name = os.path.basename(inputFile)      #获取文件名
    
    fin = open(inputFile,'r')               #以读的方式打开文件
    outputfile = "E:\\" + fileName(filePath)#~~~~~~~~~~~~~~~~~~~~~~~~~~~源文件~~~~~~~~~~~~~~~~~~~~~~~~~~~
    #outputfile = (outputfile+name).decode('utf-8')
    fout = open(outputfile+name,'w')         #以写得方式打开文件
    
    for eachLine in fin:
    line = eachLine.strip().decode('utf-8','ignore')      #去除每行首尾可能出现的空格,并转为Unicode进行处理
        line=line.strip('\n')                                       #去掉多余空行
        wordList = pseg.cut(line)                        #用结巴分词,对每行内容进行分词    
        outStr = ''
        for word in wordList:#
        #print word.word,word.flag            
        outStr += word.word+'/'+word.flag
        #print outStr
        fout.write(outStr.encode('utf-8'))           #将分词好的结果写入到输出文件
        fout.write('\n')
    fin.close()
    fout.close()
    
        
def fileTraverse(filePath):
#遍历filepath下所有文件,包括子目录
    files = os.listdir(filePath)
    for fi in files:
    fi_d = os.path.join(filePath,fi)            
    if os.path.isdir(fi_d):
    #检验给出的路径是否是一个目录
        mkdir_p("E:\\"+fileName(fi_d))#~~~~~~~~~~~~~~~~~~~~~~~~~~~目标文件~~~~~~~~~~~~~~~~~~~~~~~~~~~
        #创建文件夹,文件夹目录树
        fileTraverse(fi_d)#递归遍历
    else:
        #print os.path.join(filePath,fi_d)#y与fi_d相同
        #print fi_d
        splitSentence(fi_d)


root = 'F:\\source'       #~~~~~~~~~~~~~~~~~~~~~~~~~~~源文件~~~~~~~~~~~~~~~~~~~~~~~~~~~
root = root.decode('utf-8')#目录名中有中文,需要decode
fileTraverse(root)

 

posted on 2016-03-17 18:45  雨钝风轻  阅读(1193)  评论(0编辑  收藏  举报