熊咪

爬虫学习---美丽汤

#coding:utf-8
#version: 0.1
#note:实现了查找0daydown最新发布的10页资源。
import urllib.request

from bs4 import BeautifulSoup

for i in range(1,11):
url = "http://www.0daydown.com/page/" + str(i) #每一页的Url只需在后面加上整数就行
page = urllib.request.urlopen(url)
soup_packtpage = BeautifulSoup(page)
page.close()
num = " The Page of: " + str(i) #标注当前资源属于第几页
print(num)
print("#"*40)
for article in soup_packtpage.find_all('article', class_="excerpt"): #使用find_all查找出当前页面发布的所有最新资源
print("Category:".ljust(20), end=''), print(article.header.a.next) #category
print("Title:".ljust(20), end=''), print(article.h2.string) #title
print("Pulished_time:".ljust(19), end=''), print(article.p.find('i', class_="icon-time icon12").next) #published_time
print("Note:",end=''), print(article.p.find_next_sibling().string) #note
print('-'*50)

input() #等待输入,为了不让控制台运行后立即结束。

posted on 2015-05-16 13:21  熊咪  阅读(169)  评论(0编辑  收藏  举报