摘要:
#coding:utf8 __author__ = 'wang' class HtmlOutputer(object): def __init__(self): self.datas = []; def collect_data(self, data): if data is None: return ... 阅读全文
摘要:
#coding:utf8 import urlparse from bs4 import BeautifulSoup import re __author__ = 'wang' class HtmlParser(object): def parse(self, page_url, html_cont): if page_url is None or html_con... 阅读全文
摘要:
spider_main.py 阅读全文
摘要:
#coding:utf8 class UrlManager(object): def __init__(self): self.new_urls = set() self.old_urls = set() def add_new_url(self, url): if url is None: return... 阅读全文
摘要:
import re from bs4 import BeautifulSoup html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="title"><b>The Dormouse' 阅读全文
摘要:
python scripts下 pip install beautifulsoup4 阅读全文
摘要:
# -*- coding: utf-8 -*- import cookielib import urllib2 url = "http://www.baidu.com" print "第一种方法" response1 = urllib2.urlopen(url) print response1.getcode() print len(response1.read()) print "第二种方... 阅读全文
摘要:
sz /etc/sysconfig/network-scripts/ifcfg-eth1 阅读全文