Python实现的访问http的httpclass类库

#/usr/bin/env python
#-*- coding:utf-8 -*-

import urllib
import httplib2
import socks
import os
import os.path
import re
import traceback
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
import urllib2

class Httpclass(object):
    method = 'GET'
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'}
    cookie = ''
    cookie_file = 'cookie'
    proxy_type  = dict(http= socks.PROXY_TYPE_HTTP,socks4= socks.PROXY_TYPE_SOCKS4,socks5= socks.PROXY_TYPE_SOCKS5)
    proxy = ''
#    proxy = '192.168.1.254'
    debug = 0
    header = 0
    formdata = ''
    onlyheader = ''

#    def __init__(self):

    def html(self, url = ''):
        if not url:
            print 'url is empty!'
            exit()
        if self.debug:
            httplib2.debuglevel = 4
        if 'type' in self.proxy:
            self.proxy['port'] = int(self.proxy['port'])
            h = httplib2.Http(proxy_info = httplib2.ProxyInfo(self.proxy_type[self.proxy['type']], self.proxy['ip'], self.proxy['port']))
        else:
            h = httplib2.Http()
        if self.formdata:
            self.method = 'POST'
            self.headers['Content-type'] = 'application/x-www-form-urlencoded'
        self.method = self.method.upper()
        if self.cookie:
            if os.path.exists(self.cookie_file) and ('Cookie' not in self.headers):
                f = open(self.cookie_file, 'r')
                self.headers['Cookie'] = f.read()
                f.close()
                if not self.headers['Cookie']:
                    del self.headers['Cookie']
        error = 1
        upload = 0
        if 'upfiles' in self.formdata:
            upfiles = self.formdata['upfiles']
            # 在 urllib2 上注册 http 流处理句柄
            register_openers()
            if os.path.exists(upfiles['filename']):
                # headers 包含必须的 Content-Type 和 Content-Length
                # datagen 是一个生成器对象,返回编码过后的参数
                upfiles = self.formdata['upfiles']
                files = {upfiles['name']: open(upfiles['filename'], "rb")}
                self.formdata = dict(self.formdata, **files)
                datagen, headers = multipart_encode(self.formdata)
                del self.headers['Content-type']
                self.headers = dict(self.headers, **headers)
            else:
                print "====== "+upfiles['filename']+" is not exists! ======="
                exit()
            del self.formdata['upfiles']
            del files
            del headers
            upload = 1

        for i in range(3):
            try:
                if upload:
                    request = urllib2.Request(url, datagen, self.headers)
                    content = urllib2.urlopen(request).read()
                    response = ''
                else:
                    response, content = h.request(url, self.method, headers=self.headers, body= urllib.urlencode(self.formdata))
            except Exception as e:
                types = ''
                if 'type' in self.proxy:
                    types = self.proxy['type']
                print 'Failed to receive ' + types + ' connect request ack'
                print e
                continue
            error = 0
            break
        if self.formdata:
            self.formdata = ''
        if error:
            return ''
        if 'set-cookie' in response:
            if self.cookie:
                f = open(self.cookie_file, 'w+')
                replace = [{'search': ' expires=[^;]+;', 'replace': ''},{'search': ' path=[^;]+;', 'replace': ''},{'search': ' domain=[^,]+,', 'replace': ''},{'search': ' domain=[^;]+;', 'replace': ''},{'search': ' HttpOnly', 'replace': ''},{'search': '; domain=[^,]+', 'replace': ''}]
                for search in replace:
                    response['set-cookie'] = re.sub(search['search'], search['replace'], response['set-cookie'])
                f.write(response['set-cookie'])
                f.close()
        if self.onlyheader:
            return response
        if self.header:
            return {'header': response, 'data': content}
        return content

    def debug(self, debug = 0):
        self.debug = debug
    def upload(self, url):

    def get_content_type(self, filename):
        return mimetypes.guess_type(filename) or 'application/octet-stream'


#=============== 测试方法 ===============================

#/usr/bin/env python
#-*- coding:utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf-8')

'''
载入httpclass文件所有模块
'''
from httpclass import *
from funlib    import *


http = Httpclass()
#print dir(http)
#开启代理模式 http socks4 socks5
#http.proxy = {'type': 'socks5', 'ip': '192.168.1.254', 'port': 9990}
#开启调试模式
http.debug(1)
#仅输出http 头部
#http.onlyheader = 1
#启用 header 头部 与 body的词典结构输出
#http.header = 1
#启用cookie
http.cookie = 1
http.cookie_file = 'cookie_upload'
url = 'http://www.hacktea8.com/upload.php'
http.formdata = {'www': 'hyxt', 'upfiles': {'name': 'image', 'filename': '/Users/jason/project/apenjav/111.jpg'}}
fname = 'upload.html'

'''
http.cookie_file = 'cookie_file'
uinfo = {'uname': 'username', 'upwd': 'userpassword'}
http.formdata = {'username': uinfo['uname'], 'password': uinfo['upwd'], 'cookietime': 2592000, 'quickforward': 'yes', 'handlekey': 'ls'}
url = 'http://hacktea8.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'
fname = 'login_hacktea8.html'

print http.formdata
#exit()
url = 'http://www.hacktea8.com'
fname = 'index_hacktea8.html'
'''

html = http.html(url)
file_put_contents(fname, html)

print html

posted @ 2013-12-17 17:03  编程狂热者  阅读(1004)  评论(0编辑  收藏  举报