html to pdf

# -*- coding: utf-8 -*-
# @Time : 2018/12/21 1:52 PM
# @Author : cxa
# @File : useapi.py
# @Software: PyCharm
from requests_html import HTMLSession
import pdfkit
import pathlib

p = pathlib.Path.cwd().joinpath()
# session = HTMLSession()
# urllist = ["https://www.cnblogs.com/Eric15/articles/9775994.html",
#            "https://www.cnblogs.com/Eric15/articles/9770239.html",
#            "https://www.cnblogs.com/Eric15/articles/9769044.html",
#            "https://www.cnblogs.com/Eric15/articles/9749180.html"]
# htmllist = []
#
#
# def get_res(url):
#     req = session.get(url)
#     node = req.html.xpath("//div[@id='cnblogs_post_body']")
#     with  open(url.split("/")[-1],"w",encoding="u8") as fs:
#         fs.write(node[0].html)
#
#
# for item in urllist:
#     get_res(item)
newlist = []
options = {'page-size': 'Letter', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in',
           'margin-left': '0.75in', 'encoding': "UTF-8", 'no-outline': None}

htmllist = ["9749180.html", "9769044.html", "9770239.html", "9775994.html"]
for item in htmllist:
    p = pathlib.Path.cwd().joinpath(item)
    newlist.append(p)
pdfkit.from_file(newlist, '异步IO.pdf', options=options)

  

posted @ 2018-12-21 14:51  公众号python学习开发  阅读(214)  评论(0编辑  收藏  举报