python例子-urllib,urllib2练习题合集.

#!/usr/bin/python
#coding:utf-8
import time
import urllib
import urllib2
from bs4 import BeautifulSoup
import re
import cookielib

def main0():
    unix_timenow = int(time.time())
    print '当前时间:%d' % unix_timenow

    timenow = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unix_timenow))
    print '当前标准时间:%s' % timenow

    time.sleep(3)
    unixtime = time.mktime(time.strptime(timenow,'%Y-%m-%d %H:%M:%S'))
    print 'unix时间:%s' % unixtime

#1.将2015-1-16 12:00:00转化为unix时间,并计算此时间3天前的格式化时间和unix时间。
def main1():
    time1 = '2015-1-16 12:00:00'
    unixtime1 = time.mktime(time.strptime(time1,'%Y-%m-%d %H:%M:%S'))
    print '1:unix时间戳:%s' % unixtime1

    unixtime2 = unixtime1 - 60*60*24*3
    print '1:3天前时间戳:%s' % unixtime2
    time2 = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(unixtime2))
    print '1:3天前格式化时间:%s' % time2

#2、使用urllib访问百度首页,取得百度的title、返回状态码、内容长度、cookie值等信息。
def main2():
    url = 'http://www.baidu.com'
    response = urllib.urlopen(url)
    headers = response.info()
    cookie = headers['Set-Cookie']        #如果有多个cookie.则一次性读完.                        
    html = response.read()                #就是一个str类型的html源码
    soup = BeautifulSoup(html)
    title = soup.title.string
    statucode = response.getcode()
    htmlLength = len(html)
    print 'title:%s\nstatus:%s\ncontentlength:%s\ncookie:%s' % (title,statucode,htmlLength,cookie)

# 3、使用urllib访问http://www.cz88.net/proxy/index.shtml取得代理服务器的IP地址。
def main3():
    url = 'http://www.cz88.net/proxy/index.shtml'
    response = urllib.urlopen(url)
    html = response.read()
    soup_html = BeautifulSoup(html)
    ip_div = soup_html.find_all('div',class_='box694')
    pattern = re.compile('<li><div class="ip">(.*?)</div><div class="port">(.*?)</div><div class="type">(.*?)</div><div class="addr".*?>(.*?)</div></li>')
    fwip = open('proxy_ip.txt','w')
    for i in ip_div[0].ul:
        #print 'i:%s' % i
        items = re.findall(pattern,str(i))
        if items != [] :
            #print 'items:%s' % items
            li = list(items[0])
            if li[0] != 'IP':
                 fwip.write(','.join(li)+'\n')
    fwip.close()

# 4、urllib2模块练习
# 使用weak_passwd.txt弱口令文件,暴力破解http://127.0.0.1/www/Login/index.html用户名和密码。
def main4():
    fo = open('weak_passwd.txt','r')
    pass_list = fo.readlines()
    for i in pass_list:
        i = i.strip()
        isok = post('admin',i)
        if isok:
            print 'pasword:%s' % i
            return
    fo.close()

def post(name,pwd):
    data = urllib.urlencode({"username":name,"password":pwd});
    content_length = len(data)
    headers = {
        "Host":"192.168.2.150",
        "Pragma":"no-cache",
        "Content-Length":content_length,
        "Accept": "text/html, application/xhtml+xml, */*",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; NMJB; rv:11.0) like Gecko",
        "Content-Type": "application/x-www-form-urlencoded",
        "Referer": "http://192.168.1.139/www/Login/index.html",
        "Cookie": "thinkphp_show_page_trace=0|0; PHPSESSID=dbg5jjc9t76njqp6bimk43hjr4",
        }
        # Client = httplib.HTTPConnection("192.168.1.139",80,timeout=5)
        # Client.request("POST","/www/Login/login/",parames,headers)
        # response = Client.getresponse()
    
    url = 'http://192.168.1.139/www/Login/login/'
    request = urllib2.Request(url,data,headers)
    response = urllib2.urlopen(request)
    if response.geturl() == 'http://192.168.1.139/www/Show/index.html':    #根据不同的网站需要判断不同的登陆成功方式
        return True
    else:
        return False

# 5、urllib2模块代理使用练习
#    将练习题3中得到的代理保存在文件中,使用urllib2的代理模块验证哪些代理是可用的。
def main6():
    proxy = 'proxy_ip.txt'
    proxy_list = open(proxy,'r').readlines()
    available_ip = []
    ip_tuple = None
    for i in proxy_list:
        i = i.split(',')
        isok = testProxy(i[0],i[1])
        if isok:
            available_ip.append((i[0],i[1]));
    for j in available_ip:
        print "available's IP is %s:%s" % (j[0],j[1])

def testProxy(ip,port):
    #proxyip = 'http://%s:%s@%s' % (user, passwd, proxyserver)
    proxyip = 'http://%s:%s' % (ip,port)
    proxy_handler = urllib2.ProxyHandler({'http':proxyip})     #创建urllib2的ProxyHandler处理对象
    opener = urllib2.build_opener(proxy_handler)
    request = urllib2.Request('http://www.baidu.com')
    try:
        response = opener.open(request,timeout=5)
        statu = response.getcode()
        if statu == 200:
            return True
        else:
            return False
    except Exception, e:
        pass
    else:
        return False
    
#6.cookielib模块的使用
def main7():
    #cookiefile = 'cookie.txt'    #本地要保存的cookie文件名
    #cookie = cookielib.MozillaCookieJar(cookiefile)     #传入本地cookie文件名生成cookie对象.
    #cookie = cookielib.MozillaCookieJar()            #加载本地文件中的cookie时用此语句创建cookie对象
    #cookie.load(cookiefile)        #加载本地文件对象中cookie.
    cookie = cookielib.CookieJar()    #默认cookie创建方式
    cookie_handler = urllib2.HTTPCookieProcessor(cookie)    #将cookie对象加入HTTP中
    opener = urllib2.build_opener(cookie_handler)        #创建HTTP请求处理对象Handler
    url = "http://www.qq.com"
    request = urllib2.Request(url)
    response = opener.open(request,timeout=5)
    print response.getcode()

if __name__ == '__main__':
    # main0()
    # main1()
    # main2()
    main3()
    # main4()
    main5()
    # main6()

 

posted @ 2015-10-17 20:48  超超xc  Views(352)  Comments(0Edit  收藏  举报
I suppose,were childrenonec.