Python爬虫
1.需求:从网站上获取整个页面
import urllib import re import sys def downloadPage(url,name): html = urllib.urlopen(url).read() fp = open(name+".html","w") fp.write(html) fp.close() return html html= downloadPage("http://hao123.com","hao123") web = downloadPage("http://www.innocellence.com","Innocellence") print html