Windows下Word转html

安装  win32com  模块。

pip install pypiwin32

代码  t1.py 

from win32com import client as wc
import os

word = wc.Dispatch('Word.Application')


def wordsToHtml(dir):
    for path, subdirs, files in os.walk(dir):
        for wordFile in files:
            wordFullName = os.path.join(path, wordFile)
            doc = word.Documents.Open(wordFullName)

            wordFile2 = wordFile
            dotIndex = wordFile2.rfind(".")
            if (dotIndex == -1):
                print(wordFullName + "********************ERROR: 未取得后缀名!")

            fileSuffix = wordFile2[(dotIndex + 1):]
            if (fileSuffix == "doc" or fileSuffix == "docx"):
                fileName = wordFile2[: dotIndex]
                htmlName = fileName + ".html"
                htmlFullName = os.path.join(path, htmlName)
                print("generate html:" + htmlFullName)
                doc.SaveAs(htmlFullName, 10)
                doc.Close()

    word.Quit()
    print("")
    print("Finished!")


if __name__ == '__main__':
    import sys

    if len(sys.argv) != 2:
        print("Usage: python funcName.py rootdir")
        sys.exit(100)
    wordsToHtml(sys.argv[1])

运行方式,python t1.py 转换文件的目录

运行后会把文件下所有的 Word 转换为html

 

单个文件的转换只需要

word = wc.Dispatch('Word.Application')
doc = word.Documents.Open(r"D:\PycharmProjects\test\N1\bbaa-2022年02月18日.docx")
doc.SaveAs(r"D:\PycharmProjects\test\N1\bbaa-2022年02月18日.html", 10)
doc.Close()
word.Quit()

 

posted @ 2022-02-21 15:48  你的小可爱吖  阅读(90)  评论(0编辑  收藏  举报