Windows下Word转html
安装 win32com 模块。
pip install pypiwin32
代码 t1.py
from win32com import client as wc import os word = wc.Dispatch('Word.Application') def wordsToHtml(dir): for path, subdirs, files in os.walk(dir): for wordFile in files: wordFullName = os.path.join(path, wordFile) doc = word.Documents.Open(wordFullName) wordFile2 = wordFile dotIndex = wordFile2.rfind(".") if (dotIndex == -1): print(wordFullName + "********************ERROR: 未取得后缀名!") fileSuffix = wordFile2[(dotIndex + 1):] if (fileSuffix == "doc" or fileSuffix == "docx"): fileName = wordFile2[: dotIndex] htmlName = fileName + ".html" htmlFullName = os.path.join(path, htmlName) print("generate html:" + htmlFullName) doc.SaveAs(htmlFullName, 10) doc.Close() word.Quit() print("") print("Finished!") if __name__ == '__main__': import sys if len(sys.argv) != 2: print("Usage: python funcName.py rootdir") sys.exit(100) wordsToHtml(sys.argv[1])
运行方式,python t1.py 转换文件的目录
运行后会把文件下所有的 Word 转换为html
单个文件的转换只需要
word = wc.Dispatch('Word.Application')
doc = word.Documents.Open(r"D:\PycharmProjects\test\N1\bbaa-2022年02月18日.docx")
doc.SaveAs(r"D:\PycharmProjects\test\N1\bbaa-2022年02月18日.html", 10)
doc.Close()
word.Quit()