九、docx, win32file

一、python解析word

pip install python-docx

1.1 判断是不是doc或者docx文件

import fnmatch

def judge_word_file(filename):
    if (not fnmatch.fnmatch(filename, "*.doc")) and (not fnmatch.fnmatch(filename, "*.docx")):
        return False
    if fnmatch.fnmatch(filename, "~#$*"):  # 判断是不是已经被打开了的word的临时文件
        return False
    return True

1.2 按照元素解析word

from docx import Document

filename = "xxx.docx"


class ParserWord:
    filename = None
    doc = None

    def __init__(self, filename):
        self.filename = filename

    def run_parser(self):
        self.doc = Document(self.filename)
        self.parser_text()
        self.parser_tables()
        self.parser_text_box()

    def parser_tables(self):
        """
        解析文档中的表格
        :return: 表格的数据
        """
        # 获取表格内容
        with open('table.txt', 'w', encoding='utf-8') as f:
            for tab in self.doc.tables:
                # 读取每一行
                for row in tab.rows:
                    # 读取每一行的列
                    for cell in row.cells:
                        f.write(cell.text + '\n')



    def parser_text(self):
        """
        解析文档中的文本
        :return: 文本数据
        """
        with open('text.txt', 'w', encoding='utf-8') as f:
            for para in self.doc.paragraphs:
                if para.text:
                    f.write(para.text + '\n')
                    print(para.text)


    def parser_text_box(self):
        """
        解析文档中文本框中的文本内容
        :return: 文本框中的数据
        """
        text = ['']
        for _docElement in self.doc.element.body.iter():  # 从所有的word中的元素获取文本框的元素
            if _docElement.tag.endswith(('textbox')):
                for _ele in _docElement.iter():
                    if _ele.tag.endswith(('main}r', 'main}pPr')):
                        if _ele.tag.endswith("main}pPr"):
                            text.append("")
                        else:
                            text[-1] += _ele.text
                        _ele.text = ''

1.3 word转成txt文件

pip install pypiwin32

 

 

import os
from win32com import client as wc
def transfer_to_txt(filename,txt_word_folder_path,txt_file_name):
 
    wordapp = wc.Dispatch('Word.Application')
    try:
        # 获取文件所在的绝对路径
        doc_path = os.path.abspath(os.path.join(os.getcwd(), filename))
        doc = wordapp.Documents.Open(doc_path)
        word_to_txt = os.path.join(txt_word_folder_path, txt_file_name)
        doc.SaveAs(word_to_txt, 4)
        doc.Close()
    finally:
        wordapp.Quit()

 

二、word中创建内容(表格)

from docx import Document
from docx.shared import Inches, Pt
from docx.oxml.ns import qn
from docx.enum.text import WD_TAB_ALIGNMENT


# 设置用例表格的内容
USeCaseNumber = "USeCaseNumber"
Test_Case_Description = "Test_Case_Description"
Test_Method = "Test_Method"
Pre_Condition = "Pre-condition"
# Test_STeps = "Test_Steps"
Actions = "Actions"
Expected_Result = "Result"

# 创建document对象
document = Document()

# 设置字体
document.styles['Normal'].font.name = u'宋体'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')

# 添加段落文字说明,表示用例编号内容
p = document.add_paragraph()
p.paragraph_format.alignment = WD_TAB_ALIGNMENT.LEFT  # 设置文字居左
r = p.add_run(USeCaseNumber) # 设置文字内容
r.font.size = Pt(14)  # 设置字体大小
# r.bold = True  # 字体是否加粗

# 设置表格内容
table = document.add_table(rows=5, cols=3, style='Table Grid')  # 设置行数5和列数3
table.autofit = False

table.columns[0].width = Inches(2)  # 设置每个单元格的宽度是2

# 合并单元格(cell(x,y)中 x代表行,y代表列,都从0开始,比如第一行第一列即为cell(0,0))
table.cell(0, 0).merge(table.cell(0, 0)) # 0行0列 与 0行0列合并
table.cell(0, 1).merge(table.cell(0, 2)) # 0行1列 与 0行2列合并

table.cell(1, 0).merge(table.cell(1, 0))
table.cell(1, 1).merge(table.cell(1, 2))

table.cell(2, 0).merge(table.cell(2, 0))
table.cell(2, 1).merge(table.cell(2, 2))

table.cell(3, 0).merge(table.cell(3, 0))
table.cell(3, 1).merge(table.cell(3, 1))
table.cell(3, 2).merge(table.cell(3, 2))

table.cell(4, 0).merge(table.cell(4, 0))
table.cell(4, 1).merge(table.cell(4, 1))
table.cell(4, 2).merge(table.cell(4, 2))


hdr_cells0 = table.rows[0].cells  # 将第0行设为对象 hdr_cells0
hdr_cells1 = table.rows[1].cells  # 将第1行设为对象 hdr_cells1
hdr_cells2 = table.rows[2].cells
hdr_cells3 = table.rows[3].cells
hdr_cells4 = table.rows[4].cells

# 设置第0行 0 列的 为标题 文字描述为  Test Case Description
hdr_cells0[0].add_paragraph('Test Case Description').alignment=WD_TAB_ALIGNMENT.CENTER
# 设置第0行 1 列的 为内容 文字描述为 自定义的数据 Test_Case_Description
hdr_cells0[1].add_paragraph(Test_Case_Description).alignment=WD_TAB_ALIGNMENT.CENTER

hdr_cells1[0].add_paragraph('Test method').alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells1[1].add_paragraph(Test_Method).alignment=WD_TAB_ALIGNMENT.CENTER


hdr_cells2[0].add_paragraph('Pre-condition').alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells2[1].add_paragraph(Test_Case_Description).alignment=WD_TAB_ALIGNMENT.CENTER


hdr_cells3[0].add_paragraph('Test Steps').alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells3[1].add_paragraph('Actions').alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells3[2].add_paragraph('Expected Result').alignment=WD_TAB_ALIGNMENT.CENTER


hdr_cells4[0].add_paragraph('1').alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells4[1].add_paragraph(Actions).alignment=WD_TAB_ALIGNMENT.CENTER
hdr_cells4[2].add_paragraph(Expected_Result).alignment=WD_TAB_ALIGNMENT.CENTER

# 创建文件名称并保存文件
document.save("用例文档.docx")

 

posted on 2021-06-08 13:16  软饭攻城狮  阅读(144)  评论(0编辑  收藏  举报

导航