网上扒小说的小程序

import requests
from bs4 import BeautifulSoup
 

def get_url(chap):
    url = 'http://www.cnblogs.com/...'
    
    return url + str(chap) + '.html'



def get_content(url, data=None):
    rep = requests.get(url, timeout=120)
    rep.encoding = 'utf-8'
    return rep.text


def get_data(htmltext):
    content = []
    bs = BeautifulSoup(htmltext, "html.parser")
    body = bs.body  
    html_data = body.find('div', {'class': 'panel-body'}).get_text()   
    return html_data

def mod_data(Num):
    url = get_url(Num)
    htmltext = get_content(url)      
    htmldata = get_data(htmltext)         
    return htmldata

if __name__ == '__main__':

    for i in range(2,3):
        Num = i
        name = '第'+ str(Num) +'章.txt'
        data_new = mod_data(Num)
        
        with open(name,"w") as f:
            f.write(data_new) 

  

posted @ 2020-09-30 02:41  华小电  阅读(476)  评论(0编辑  收藏  举报