介绍python的几个內建模块,原文链接
1 python的时间模块datetime
取现在时间
from datetime import datetime
now = datetime.now()
print(now)
print(type(now))
将指定日期转化为时间戳
from datetime import datetime
dt = datetime(2017,12,13,13,7)
# 把datetime转换为timestamp
print( dt.timestamp() )
将时间戳转化为日期
from datetime import datetime
t = 1429417200.0
print(datetime.fromtimestamp(t))
根据时间戳转化为本地时间和utc时间
from datetime import datetime
t = 1429417200.0
# 本地时间
print(datetime.fromtimestamp(t))
# UTC时间
print(datetime.utcfromtimestamp(t))
将字符串转化为时间
from datetime import datetime
cday = datetime.strptime('2017-6-1 18:22:22','%Y-%m-%d %H:%M:%S')
print(day)
将时间戳转化为字符串
from datetime import datetime
now = datetime.now()
print(now.strftime('%a,%b %d %H:%M'))
时间加减
from datetime import datetime , timedelta
now = datetime.now()
print( now )
print(now + timedelta(hours = 10))
print(now + timedelta(days = 1))
print(now + timedelta(days = 2, hours = 12))
设置时区
from datetime import datetime, timedelta, timezone
# 创建时区UTC+8:00
timezone_8 = timezone(timedelta(hours = 8) )
now = datetime.now()
print(now)
# 强制设置为UTC+8:00
dt = now.replace(tzinfo=timezone_8)
print(dt)
获取utc时区和时间,并且转化为别的时区的时间
1 2 3 4 5 6 7 8 9 10 11 12 13
|
from datetime import datetime, timedelta, timezone
|
2命名tuple
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
|
3顺序字典
1 2 3 4 5 6 7 8
|
from collections import OrderedDict d = dict([ ['a',1], ['b',2],['c',3]]) print(d) od = OrderedDict([('a',1),('b',2),('c',3)]) print(od)
od2 = OrderedDict([['Bob',90],['Jim',20],['Seve',22]]) print(od2)
|
4计数器
1 2 3 4 5
|
from collections import Counter c = Counter()
for ch in 'programming': c[ch]=c[ch]+1
|
从一开始生成自然数
在生成的可迭代序列中按规则筛选
1 2 3 4
|
natuals = itertools.count(1) ns = itertools.takewhile(lambda x: x <= 10, natuals) print(ns) print(list(ns) )
|
将两个字符串生成一个序列
1 2 3 4
|
for c in itertools.chain('ABC','XYZ'): print(c)
print(list(itertools.chain('ABC','XYZ')) )
|
迭代器把连续的字母放在一起分组
1 2 3 4 5 6
|
for key, group in itertools.groupby('AAABBBCCAAA'): print(key, list(group)) print(key, group)
for key, group in itertools.groupby('AaaBBbcCAAa', lambda c: c.upper() ): print(key,list(group))
|
6 contextmanager
open 返回的对象才可用with,或者在类中实现enter和exit可以使该类对象支持with用法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
|
class Query(object): def __init__(self, name): self.name = name def __enter__(self): print('Begin') return self
def __exit__(self, exc_type, exc_value, traceback): if exc_type: print('Error') else: print('End')
def query(self): print('Query info about %s...' %self.name)
with Query('BBBB') as q: if q: q.query()
|
简单介绍下原理
1 2 3 4 5 6
|
with EXPR as VAR: 实现原理: 在with语句中, EXPR必须是一个包含__enter__()和__exit__()方法的对象(Context Manager)。 调用EXPR的__enter__()方法申请资源并将结果赋值给VAR变量。 通过try/except确保代码块BLOCK正确调用,否则调用EXPR的__exit__()方法退出并释放资源。 在代码块BLOCK正确执行后,最终执行EXPR的__exit__()方法释放资源。
|
通过python提供的装饰器contextmanager,作用在生成器函数,可以达到with操作的目的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
from contextlib import contextmanager class Query(object): def __init__(self, name): self.name = name
def query(self): print('Query info about %s ...' %self.name)
@contextmanager def create_query(name): print('Begin') q = Query(name) yield q print('End')
with create_query('aaa') as q: if q: print(q.query())
|
可以看看contextmanager源码,了解下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
class GeneratorContextManager(object): def __init__(self, gen): self.gen = gen
def __enter__(self): try: return self.gen.next() except StopIteration: raise RuntimeError("generator didn't yield") def __exit__(self, type, value, traceback): if type is None: try: self.gen.next() except StopIteration: return else: raise RuntimeError("generator didn't stop") else: try: self.gen.throw(type, value, traceback) raise RuntimeError("generator didn't stop after throw()") except StopIteration: return True except:
|
也可以采用closing用法作用在一个对象上支持with open操作
1 2 3 4 5 6
|
from contextlib import closing from urllib.request import urlopen
with closing(urlopen('https://www.python.org')) as page: for line in page: print(line)
|
介绍下closing 实现原理
1 2 3 4 5 6
|
@contextmanager def closing(thing): try: yield thing finally: thing.close()
|
同样可以用contextmanager实现打印指定标签的上下文对象
1 2 3 4 5 6 7 8 9
|
@contextmanager def tag(name): print("<%s>" % name) yield print("</%s>" % name)
with tag("h1"): print("hello") print("world")
|
上述代码执行结果为:
1 2 3 4 5 6 7 8 9
|
<h1> hello world </h1> 代码的执行顺序是:
with语句首先执行yield之前的语句,因此打印出<h1>; yield调用会执行with语句内部的所有语句,因此打印出hello和world; 最后执行yield之后的语句,打印出</h1>。
|
7 urllib库
这是个非常重要的库,做爬虫会用到
采用urllib get网页信息
1 2 3 4 5 6 7
|
from urllib import request with request.urlopen('http://www.limerence2017.com/') as f: data = f.read() print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s: %s' %(k,v)) print('Data:', data.decode('utf-8') )
|
在request中添加信息头模拟浏览器发送请求
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
from urllib import request req = request.Request('http://www.douban.com/') req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25') with request.urlopen(req) as f:
print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s: %s' %(k,v)) print('Data:', f.read().decode('utf-8'))
from urllib import request, parse print('Login to weibo.cn...') email = input('Email: ') passwd = input('Password: ') login_data = parse.urlencode([ ('username', email), ('password', passwd), ('entry', 'mweibo'), ('client_id', ''), ('savestate', '1'), ('ec', ''), ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F') ])
|
采用post方式获取信息, request.urlopen(),参数可以携带data发送给网址
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
print('Login to weibo.cn...') email = input('Email: ') passwd = input('Password: ') login_data = parse.urlencode([ ('username', email), ('password', passwd), ('entry', 'mweibo'), ('client_id', ''), ('savestate', '1'), ('ec', ''), ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F') ])
req = request.Request('https://passport.weibo.cn/sso/login') req.add_header('Origin', 'https://passport.weibo.cn') req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25') req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F')
with request.urlopen(req, data=login_data.encode('utf-8')) as f: print('Status:', f.status, f.reason) for k, v in f.getheaders(): print('%s:%s' %(k,v)) print('Data: ', f.read().decode('utf-8'))
|
采用代理方式获取网页信息
1 2 3 4 5 6 7 8 9 10 11 12 13
|
proxy_handler = urllib.request.ProxyHandler({'http': 'http://www.example.com:3128/'}) proxy_auth_handler = urllib.request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = urllib.request.bulid_opener(proxy_handler, proxy_auth_handler) with opener.open('http://www.example.com/login.html') as f: pass
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'host', 'username', 'password') opener = urllib.request.build_opener(proxy_handler, proxy_auth_handler) with opener.open('http://www.example.com/login.html') as f: pass
|
我的公众号: