python例子-urllib,urllib2练习题合集.
#!/usr/bin/python #coding:utf-8 import time import urllib import urllib2 from bs4 import BeautifulSoup import re import cookielib def main0(): unix_timenow = int(time.time()) print '当前时间:%d' % unix_timenow timenow = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unix_timenow)) print '当前标准时间:%s' % timenow time.sleep(3) unixtime = time.mktime(time.strptime(timenow,'%Y-%m-%d %H:%M:%S')) print 'unix时间:%s' % unixtime #1.将2015-1-16 12:00:00转化为unix时间,并计算此时间3天前的格式化时间和unix时间。 def main1(): time1 = '2015-1-16 12:00:00' unixtime1 = time.mktime(time.strptime(time1,'%Y-%m-%d %H:%M:%S')) print '1:unix时间戳:%s' % unixtime1 unixtime2 = unixtime1 - 60*60*24*3 print '1:3天前时间戳:%s' % unixtime2 time2 = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unixtime2)) print '1:3天前格式化时间:%s' % time2 #2、使用urllib访问百度首页,取得百度的title、返回状态码、内容长度、cookie值等信息。 def main2(): url = 'http://www.baidu.com' response = urllib.urlopen(url) headers = response.info() cookie = headers['Set-Cookie'] #如果有多个cookie.则一次性读完. html = response.read() #就是一个str类型的html源码 soup = BeautifulSoup(html) title = soup.title.string statucode = response.getcode() htmlLength = len(html) print 'title:%s\nstatus:%s\ncontentlength:%s\ncookie:%s' % (title,statucode,htmlLength,cookie) # 3、使用urllib访问http://www.cz88.net/proxy/index.shtml取得代理服务器的IP地址。 def main3(): url = 'http://www.cz88.net/proxy/index.shtml' response = urllib.urlopen(url) html = response.read() soup_html = BeautifulSoup(html) ip_div = soup_html.find_all('div',class_='box694') pattern = re.compile('<li><div class="ip">(.*?)</div><div class="port">(.*?)</div><div class="type">(.*?)</div><div class="addr".*?>(.*?)</div></li>') fwip = open('proxy_ip.txt','w') for i in ip_div[0].ul: #print 'i:%s' % i items = re.findall(pattern,str(i)) if items != [] : #print 'items:%s' % items li = list(items[0]) if li[0] != 'IP': fwip.write(','.join(li)+'\n') fwip.close() # 4、urllib2模块练习 # 使用weak_passwd.txt弱口令文件,暴力破解http://127.0.0.1/www/Login/index.html用户名和密码。 def main4(): fo = open('weak_passwd.txt','r') pass_list = fo.readlines() for i in pass_list: i = i.strip() isok = post('admin',i) if isok: print 'pasword:%s' % i return fo.close() def post(name,pwd): data = urllib.urlencode({"username":name,"password":pwd}); content_length = len(data) headers = { "Host":"192.168.2.150", "Pragma":"no-cache", "Content-Length":content_length, "Accept": "text/html, application/xhtml+xml, */*", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; NMJB; rv:11.0) like Gecko", "Content-Type": "application/x-www-form-urlencoded", "Referer": "http://192.168.1.139/www/Login/index.html", "Cookie": "thinkphp_show_page_trace=0|0; PHPSESSID=dbg5jjc9t76njqp6bimk43hjr4", } # Client = httplib.HTTPConnection("192.168.1.139",80,timeout=5) # Client.request("POST","/www/Login/login/",parames,headers) # response = Client.getresponse() url = 'http://192.168.1.139/www/Login/login/' request = urllib2.Request(url,data,headers) response = urllib2.urlopen(request) if response.geturl() == 'http://192.168.1.139/www/Show/index.html': #根据不同的网站需要判断不同的登陆成功方式 return True else: return False # 5、urllib2模块代理使用练习 # 将练习题3中得到的代理保存在文件中,使用urllib2的代理模块验证哪些代理是可用的。 def main6(): proxy = 'proxy_ip.txt' proxy_list = open(proxy,'r').readlines() available_ip = [] ip_tuple = None for i in proxy_list: i = i.split(',') isok = testProxy(i[0],i[1]) if isok: available_ip.append((i[0],i[1])); for j in available_ip: print "available's IP is %s:%s" % (j[0],j[1]) def testProxy(ip,port): #proxyip = 'http://%s:%s@%s' % (user, passwd, proxyserver) proxyip = 'http://%s:%s' % (ip,port) proxy_handler = urllib2.ProxyHandler({'http':proxyip}) #创建urllib2的ProxyHandler处理对象 opener = urllib2.build_opener(proxy_handler) request = urllib2.Request('http://www.baidu.com') try: response = opener.open(request,timeout=5) statu = response.getcode() if statu == 200: return True else: return False except Exception, e: pass else: return False #6.cookielib模块的使用 def main7(): #cookiefile = 'cookie.txt' #本地要保存的cookie文件名 #cookie = cookielib.MozillaCookieJar(cookiefile) #传入本地cookie文件名生成cookie对象. #cookie = cookielib.MozillaCookieJar() #加载本地文件中的cookie时用此语句创建cookie对象 #cookie.load(cookiefile) #加载本地文件对象中cookie. cookie = cookielib.CookieJar() #默认cookie创建方式 cookie_handler = urllib2.HTTPCookieProcessor(cookie) #将cookie对象加入HTTP中 opener = urllib2.build_opener(cookie_handler) #创建HTTP请求处理对象Handler url = "http://www.qq.com" request = urllib2.Request(url) response = opener.open(request,timeout=5) print response.getcode() if __name__ == '__main__': # main0() # main1() # main2() main3() # main4() main5() # main6()