Python实现的访问http的httpclass类库
#/usr/bin/env python
#-*- coding:utf-8 -*-
import urllib
import httplib2
import socks
import os
import os.path
import re
import traceback
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
import urllib2
class Httpclass(object):
method = 'GET'
headers
= {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63
Safari/537.36'}
cookie = ''
cookie_file = 'cookie'
proxy_type = dict(http= socks.PROXY_TYPE_HTTP,socks4= socks.PROXY_TYPE_SOCKS4,socks5= socks.PROXY_TYPE_SOCKS5)
proxy = ''
# proxy = '192.168.1.254'
debug = 0
header = 0
formdata = ''
onlyheader = ''
# def __init__(self):
def html(self, url = ''):
if not url:
print 'url is empty!'
exit()
if self.debug:
httplib2.debuglevel = 4
if 'type' in self.proxy:
self.proxy['port'] = int(self.proxy['port'])
h
= httplib2.Http(proxy_info =
httplib2.ProxyInfo(self.proxy_type[self.proxy['type']],
self.proxy['ip'], self.proxy['port']))
else:
h = httplib2.Http()
if self.formdata:
self.method = 'POST'
self.headers['Content-type'] = 'application/x-www-form-urlencoded'
self.method = self.method.upper()
if self.cookie:
if os.path.exists(self.cookie_file) and ('Cookie' not in self.headers):
f = open(self.cookie_file, 'r')
self.headers['Cookie'] = f.read()
f.close()
if not self.headers['Cookie']:
del self.headers['Cookie']
error = 1
upload = 0
if 'upfiles' in self.formdata:
upfiles = self.formdata['upfiles']
# 在 urllib2 上注册 http 流处理句柄
register_openers()
if os.path.exists(upfiles['filename']):
# headers 包含必须的 Content-Type 和 Content-Length
# datagen 是一个生成器对象,返回编码过后的参数
upfiles = self.formdata['upfiles']
files = {upfiles['name']: open(upfiles['filename'], "rb")}
self.formdata = dict(self.formdata, **files)
datagen, headers = multipart_encode(self.formdata)
del self.headers['Content-type']
self.headers = dict(self.headers, **headers)
else:
print "====== "+upfiles['filename']+" is not exists! ======="
exit()
del self.formdata['upfiles']
del files
del headers
upload = 1
for i in range(3):
try:
if upload:
request = urllib2.Request(url, datagen, self.headers)
content = urllib2.urlopen(request).read()
response = ''
else:
response, content = h.request(url, self.method, headers=self.headers, body= urllib.urlencode(self.formdata))
except Exception as e:
types = ''
if 'type' in self.proxy:
types = self.proxy['type']
print 'Failed to receive ' + types + ' connect request ack'
print e
continue
error = 0
break
if self.formdata:
self.formdata = ''
if error:
return ''
if 'set-cookie' in response:
if self.cookie:
f = open(self.cookie_file, 'w+')
replace
= [{'search': ' expires=[^;]+;', 'replace': ''},{'search': '
path=[^;]+;', 'replace': ''},{'search': ' domain=[^,]+,', 'replace':
''},{'search': ' domain=[^;]+;', 'replace': ''},{'search': ' HttpOnly',
'replace': ''},{'search': '; domain=[^,]+', 'replace': ''}]
for search in replace:
response['set-cookie'] = re.sub(search['search'], search['replace'], response['set-cookie'])
f.write(response['set-cookie'])
f.close()
if self.onlyheader:
return response
if self.header:
return {'header': response, 'data': content}
return content
def debug(self, debug = 0):
self.debug = debug
def upload(self, url):
def get_content_type(self, filename):
return mimetypes.guess_type(filename) or 'application/octet-stream'
#=============== 测试方法 ===============================
#/usr/bin/env python
#-*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
'''
载入httpclass文件所有模块
'''
from httpclass import *
from funlib import *
http = Httpclass()
#print dir(http)
#开启代理模式 http socks4 socks5
#http.proxy = {'type': 'socks5', 'ip': '192.168.1.254', 'port': 9990}
#开启调试模式
http.debug(1)
#仅输出http 头部
#http.onlyheader = 1
#启用 header 头部 与 body的词典结构输出
#http.header = 1
#启用cookie
http.cookie = 1
http.cookie_file = 'cookie_upload'
url = 'http://www.hacktea8.com/upload.php'
http.formdata = {'www': 'hyxt', 'upfiles': {'name': 'image', 'filename': '/Users/jason/project/apenjav/111.jpg'}}
fname = 'upload.html'
'''
http.cookie_file = 'cookie_file'
uinfo = {'uname': 'username', 'upwd': 'userpassword'}
http.formdata
= {'username': uinfo['uname'], 'password': uinfo['upwd'], 'cookietime':
2592000, 'quickforward': 'yes', 'handlekey': 'ls'}
url =
'http://hacktea8.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=1'
fname = 'login_hacktea8.html'
print http.formdata
#exit()
url = 'http://www.hacktea8.com'
fname = 'index_hacktea8.html'
'''
html = http.html(url)
file_put_contents(fname, html)
print html