摘要: from urllib import request from bs4 import BeautifulSoup as bs import re import codecs import jieba #分词包 import numpy #numpy计算包 import pandas as pd #分词用到 import matplotlib.pyplot as plt #绘图包 imp... 阅读全文
posted @ 2018-11-10 19:24 泰初 阅读(546) 评论(0) 推荐(0) 编辑
摘要: from urllib import request from bs4 import BeautifulSoup as bs import re import codecs import jieba #分词包 import numpy #numpy计算包 import pandas as pd #分词用到 import matplotlib.pyplot as plt #绘图包 imp... 阅读全文
posted @ 2018-11-10 19:23 泰初 阅读(504) 评论(0) 推荐(0) 编辑
摘要: from urllib import request from bs4 import BeautifulSoup as bs #爬取豆瓣最受关注图书榜 resp = request.urlopen('https://book.douban.com/chart?subcat=I') html_data = resp.read().decode('utf-8') #转化为BeautifulSou... 阅读全文
posted @ 2018-11-10 19:20 泰初 阅读(1601) 评论(0) 推荐(0) 编辑
摘要: from urllib import request from bs4 import BeautifulSoup as bs #爬取豆瓣最受关注图书榜 resp = request.urlopen('https://book.douban.com/chart?subcat=I') html_data = resp.read().decode('utf-8') #转化为BeautifulSou... 阅读全文
posted @ 2018-11-10 19:18 泰初 阅读(1580) 评论(0) 推荐(0) 编辑
摘要: import requests #requests模块用于HTTP请求 import codecs #codecs模块用于文件操作 from bs4 import BeautifulSoup #beautifulSoup库用于对html代码进行解析 #要爬取的地址 URL="https://book.douban.com/top250" #user-agent,模仿浏览器,防止被目... 阅读全文
posted @ 2018-11-10 19:14 泰初 阅读(663) 评论(0) 推荐(0) 编辑