python 后缀doc文件的读取

文件内容如图:

from win32com import client as wc
from docx import Document

def word_convert():
    account_list = []
    new_file_name = 'C:/名单2.docx'
    file_exists = os.path.exists(new_file_name)
    if not file_exists:
        word = wc.Dispatch('Word.Application')
        doc = word.Documents.Open('C:/名单.doc')

        doc.SaveAs('C:/名单2.docx', 12, False, "", True, "", False, False, False,False)  # 转化后路径下的文件
        doc.Close()
        word.Quit()
    document = Document(new_file_name)
    tables = document.tables
    for table in tables:
        # 行列个数
        row_count = len(table.rows)
        col_count = len(table.columns)
        for i in range(row_count):
            row = table.rows[i].cells
            if i == 0:
                continue
            for j in range(col_count):
                value_text = row[j].text
                if j == 1:
                    account_list.append(value_text) # 我这里获取的是第二列
    return account_list

先把 doc 文件转换为 docx 读取

posted @ 2019-01-10 13:56 qukaige 阅读(6328) 评论(0) 收藏举报

刷新页面返回顶部

qukaige

python 后缀doc文件的读取

公告