Python常用模块
一、os模块
os模块是与操作系统交互的一个接口,使用该模块必须先导入该模块。
import os
#操作文件和目录 os.getcwd() #获取当前脚本运行的工作目录路径,'C:\\Users\\Administrator\\代码测试' os.chdir('C:\\Users\\Administrator\\代码测试') #改变当前脚本的工作目录路径 os.curdir #获取当前工作目录,'.' os.pardir #返回当前目录的父目录,'..' os.mkdir("test1") #创建单级目录 os.rmdir("test1") # 删除单级空目录,若目录不为空则无法删除,则会报错 os.makedirs('tes2/test3') #生成多层递归目录 os.removedirs('tes2/test3') #若最里面一层目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推 os.listdir('tes2') #获取指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印,['test3'] os.remove('test/1.txt') #删除一个文件 os.rename("test","newtest") #重命名文件/目录 os.stat('tes2/test3') #获取文件/目录信息,os.stat_result(st_mode=16895, st_ino=4785074604532972,
st_dev=3189517348, st_nlink=1, st_uid=0, st_gid=0, st_size=0, st_atime=1570541912,
st_mtime=1570541912, st_ctime=1570541912)
#操作文件或者目录路径,使用os.path
模块
os.path.abspath('test3') #返回给定的文件或目录的绝对路径 输出:'C:\\Users\\Administrator\\代码测试\\test3'
os.path.split('tes2/text3/1.txt') #将path分割成目录和文件名二元组返回 输出('tes2/text3', '1.txt')
os.path.dirname('tes2/text3/1.txt') #返回path的目录。其实就是os.path.split(path)的第一个元素 ,输出'tes2/text3'
os.path.basename('tes2/test3/1.txt') #返回path最后的文件名。如果path以/或\结尾,那么就会返回空值。即os.path.split(path)的第二个元素,输出'1.txt'
os.path.join('tes2','test3','test.txt') # 将多个路径组合后返回,输出'tes2\\test3\\test.txt'
os.path.exists('test.txt') #文件存在返回True,否则返回False,注意是文件而非目录
os.path.isdir('tes2') #用来判断给定的path是否是一个目录,如果是目录代表的是最外层的目录
os.path.isfile('test.txt') #用来判断给定的path是否是一个文件 输出:True
os.path.getsize('tes2/test3/1.txt') #返回path所指向的文件或者目录的大小
os.path.getatime('tes2/test3/1.txt') #返回path所指向的文件或者目录的最后访问时间,输出1570588470.5294871
os.path.getmtime('tes2/test3/1.txt') #返回path所指向的文件或者目录的最后修改时间 输出1570588470.5294871
#os模块的属性
os.name #输出字符串指示当前使用平台。win->'nt'; Linux->'posix'
os.sep # 输出操作系统特定的路径分隔符,win下为"\\",Linux下为"/"
os.linesep #输出当前平台使用的行终止符,win下为"\r\n",Linux下为"\n"
os.pathsep # 输出用于分割文件路径的字符串 win下为;,Linux下为:
二、sys模块
sys模块是与python解释器交互的一个接口,使用该模块必须先导入该模块。
1、sys.argv 获取当前正在执行的命令行参数的参数列表(list)
#输出 [ 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\ipykernel_launcher.py', '-f', 'C:\\Users\\Administrator\\AppData\\Roaming\\jupyter\\runtime\\kernel-6c1095fc-19fa-4e5e-872b-192b7fcd1c55.json' ]
sys.argv[]是一个程序获取外部参数的桥梁。sys.argv[0]表示代码本身的文件路径。
2、sys.modules.keys() 返回所有已经导入的模块列表
dict_keys([ 'traitlets.config.application',
'jupyter_client.blocking.channels',
'IPython.utils.terminal',
'errno',
'jupyter_client.localinterfaces',
'ipywidgets.widgets.valuewidget',
'math',
'datetime',
'IPython.core.magics.basic',
'asyncio.windows_utils',
'IPython.core.completer',
'jupyter_client.connect',
'IPython.core.logger',
'jupyter_client.jsonutil',
'_functools',
'tornado.gen',
'encodings.latin_1',
'uuid',
...
]}
3、sys.path path是一个目录列表,供Python从中查找第三方扩展模块。
[ 'c:\\users\\administrator\\envs\\automatic\\scripts\\python35.zip', 'c:\\users\\administrator\\envs\\automatic\\DLLs', 'c:\\users\\administrator\\envs\\automatic\\lib', 'c:\\users\\administrator\\envs\\automatic\\scripts', 'e:\\python\\python3.5.2\\progress\\Lib', 'e:\\python\\python3.5.2\\progress\\DLLs', 'c:\\users\\administrator\\envs\\automatic', '', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\pip-19.0.3-py3.5.egg', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\win32', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\win32\\lib', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\Pythonwin', 'c:\\users\\administrator\\envs\\automatic\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\Administrator\\.ipython' ]
4、sys.exit(0) 调用sys,exit(n)可以中途退出程序,sys.exit(0)表示正常退出,n不为0时,会引发SystemExit异常,从而在主程序中可以捕获该异常。
import sys try: sys.exit(1) except SystemExit as e: print(e) #1
5、sys.version 获取python解释程序的版本信息
>>> import sys >>> sys.version '3.5.2 (v3.5.2:4def2a2901a5, Jun 25 2016, 22:18:55) [MSC v.1900 64 bit (AMD64)]'
6、sys.platform 返回操作系统平台名称
>>> import sys >>> sys.platform 'win32' >>>
7、sys.stdin, sys.stdout, sys.stderr 标准输入,标准输出,错误输出
标准输入:一般为键盘输入,stdin对象为解释器提供输入字符流,一般使用raw_input()和input()函数
import sys print("Please input message:") name = sys.stdin.readline() print(name) #输出 """ Please input message: hello hello """
标准输出:一般为屏幕。stdout对象接收到print语句产生的输出
import sys sys.stdout.write("hello") sys.stdout.flush() """ 输出: hello """
调用python中的print函数,事实上是调用了 sys.stdout.write() ,比如print('hello'),等价于sys.stdout.write(‘hello\n’),print 将内容打印到了控制台,并且追加了一个换行符。
三、json & pickle模块
上述两个模块都是序列化模块,什么是序列化呢?把对象(变量)从内存中变成可存储或传输的过程称之为序列化。反过来,把变量内容从序列化的对象重新读到内存里称之为反序列化,即unpickling。
系列化后就可以将数据写入磁盘进行持久化存储。
(一)json模块
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. It is derived from a version of the externally maintained simplejson library. Encoding basic Python object hierarchies:: >>> import json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' >>> print(json.dumps("\"foo\bar")) "\"foo\bar" >>> print(json.dumps('\u1234')) "\u1234" >>> print(json.dumps('\\')) "\\" >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} >>> from io import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() '["streaming API"]' Compact encoding:: >>> import json >>> from collections import OrderedDict >>> mydict = OrderedDict([('4', 5), ('6', 7)]) >>> json.dumps([1,2,3,mydict], separators=(',', ':')) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import json >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) { "4": 5, "6": 7 } Decoding JSON:: >>> import json >>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}] >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj True >>> json.loads('"\\"foo\\bar"') == '"foo\x08ar' True >>> from io import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True Specializing JSON object decoding:: >>> import json >>> def as_complex(dct): ... if '__complex__' in dct: ... return complex(dct['real'], dct['imag']) ... return dct ... >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) >>> from decimal import Decimal >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') True Specializing JSON object encoding:: >>> import json >>> def encode_complex(obj): ... if isinstance(obj, complex): ... return [obj.real, obj.imag] ... raise TypeError(repr(o) + " is not JSON serializable") ... >>> json.dumps(2 + 1j, default=encode_complex) '[2.0, 1.0]' >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) '[2.0, 1.0]' >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) '[2.0, 1.0]' Using json.tool from the shell to validate and pretty-print:: $ echo '{"json":"obj"}' | python -m json.tool { "json": "obj" } $ echo '{ 1.2:3.4}' | python -m json.tool Expecting property name enclosed in double quotes: line 1 column 3 (char 2) """ __version__ = '2.0.9' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', ] __author__ = 'Bob Ippolito <bob@redivi.com>' from .decoder import JSONDecoder, JSONDecodeError from .encoder import JSONEncoder _default_encoder = JSONEncoder( skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, indent=None, separators=None, default=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, sort_keys=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). If ``skipkeys`` is true then ``dict`` keys that are not basic types (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. If ``ensure_ascii`` is false, then the strings written to ``fp`` can contain non-ASCII characters if they appear in strings contained in ``obj``. Otherwise, all such characters are escaped in JSON strings. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). If ``allow_nan`` is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact representation. If specified, ``separators`` should be an ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON representation, you should specify ``(',', ':')`` to eliminate whitespace. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. If *sort_keys* is ``True`` (default: ``False``), then the output of dictionaries will be sorted by key. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: cls = JSONEncoder iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: fp.write(chunk) def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, sort_keys=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is true then ``dict`` keys that are not basic types (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. If ``ensure_ascii`` is false, then the return value can contain non-ASCII characters if they appear in strings contained in ``obj``. Otherwise, all such characters are escaped in JSON strings. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). If ``allow_nan`` is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). If ``indent`` is a non-negative integer, then JSON array elements and object members will be pretty-printed with that indent level. An indent level of 0 will only insert newlines. ``None`` is the most compact representation. If specified, ``separators`` should be an ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON representation, you should specify ``(',', ':')`` to eliminate whitespace. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. If *sort_keys* is ``True`` (default: ``False``), then the output of dictionaries will be sorted by key. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, **kw).encode(obj) _default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) def load(fp, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders that rely on the order that the key and value pairs are decoded (for example, collections.OrderedDict will remember the order of insertion). If ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. """ return loads(fp.read(), cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): """Deserialize ``s`` (a ``str`` instance containing a JSON document) to a Python object. ``object_hook`` is an optional function that will be called with the result of any object literal decode (a ``dict``). The return value of ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). ``object_pairs_hook`` is an optional function that will be called with the result of any object literal decoded with an ordered list of pairs. The return value of ``object_pairs_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders that rely on the order that the key and value pairs are decoded (for example, collections.OrderedDict will remember the order of insertion). If ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to float(num_str). This can be used to use another datatype or parser for JSON floats (e.g. decimal.Decimal). ``parse_int``, if specified, will be called with the string of every JSON int to be decoded. By default this is equivalent to int(num_str). This can be used to use another datatype or parser for JSON integers (e.g. float). ``parse_constant``, if specified, will be called with one of the following strings: -Infinity, Infinity, NaN, null, true, false. This can be used to raise an exception if invalid JSON numbers are encountered. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. The ``encoding`` argument is ignored and deprecated. """ if not isinstance(s, str): raise TypeError('the JSON object must be str, not {!r}'.format( s.__class__.__name__)) if s.startswith(u'\ufeff'): raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", s, 0) if (cls is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook if object_pairs_hook is not None: kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant return cls(**kw).decode(s)
1、dumps & loads
dumps是将对象(dict、list等)转化成str格式,loads是将str转化成对象(dict、list等)格式。
#进行序列化 >>> import json >>> json.dumps({'username':'root','password':'abc123'}) #dict-->str '{"password": "abc123", "username": "root"}' >>> json.dumps(['root','abc123']) #list-->str '["root", "abc123"]' >>> json.dumps([{'root':12,'flex':25}]) #list(dict)-->str '[{"flex": 25, "root": 12}]' #反序列化 >>> json.loads('[{"flex": 25, "root": 12}]') [{'flex': 25, 'root': 12}]
2、dump & load
#dump将数据对象存储到文件中 >>> import json >>> f = open('json_test.txt','w') #dump方法接收一个文件句柄,直接将字典转换成json字符串写入文件 >>> dict = {'username':'root'} >>> json.dump(dict,f) >>> f.close() #查看文件的路劲 >>> import os >>> os.path.abspath('json_test.txt') 'C:\\Users\\Administrator\\json_test.txt' >>> #将文件中存储的json数据转成对象返回 >>> f = open('json_test.txt') >>> json.load(f) #load方法接收一个文件句柄,直接将文件中的json字符串转换成数据结构返回 {'username': 'root'} >>>
3、重要参数说明
- ensure_ascii
如果ensure_ascii为true(默认值),则确保输出中所有传入的非ASCII字符均已转义。如果ensure_ascii为false,则这些字符将按原样输出。
- indent
如果indent是非负整数或字符串,则JSON数组元素和对象成员将使用该缩进级别进行漂亮打印。缩进级别为0,负或""仅插入换行符。 None(默认)选择最紧凑的表示形式。使用正整数缩进会使每个级别缩进多个空格。如果indent是字符串(例如"\t"),则该字符串用于缩进每个级别。
#漂亮打印 >>> import json >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)) { "4": 5, "6": 7 }
- sort_keys
如果sort_keys为true(默认值:False),则字典的输出将按key排序。
4、dumps & dump
dump需要一个类似于文件指针的参数(并不是真的指针,可称之为类文件对象),可以与文件操作结合,也就是说可以将dict转成str然后存入文件中;而dumps直接给的是dict,也就是将字典转成str。
(二)pickle模块
pickle是python中独有的序列化模块,json模块是所有语言都可以使用的,如果以后使用pickle序列化的数据只用于python是可行的,但如果还想让序列化的数据使用到其它语言,就需要使用json模块了。
pickle同样也提供了四个功能:dumps、dump、loads、load。
并且pickle序列化的数据类型也很丰富:
- 支持所有python原生类型:布尔值,整数,浮点数,复数,字符串,字节,None。
- 由任何原生类型组成的列表,元组,字典和集合。
- 函数,类,类的实例
1、dumps & loads
>>> import pickle >>> dict = {'username':'root','password':'abc123'} >>> pickle.dumps(dict) #序列化后是一串二进制 b'\x80\x03}q\x00(X\x08\x00\x00\x00passwordq\x01X\x06\x00\x00\x00abc12 \x00\x00\x00usernameq\x03X\x04\x00\x00\x00rootq\x04u.' >>> ret = pickle.dumps(dict) >>> pickle.loads(ret) #反序列化 {'password': 'abc123', 'username': 'root'} >>>
2、dump & load
>>> import pickle #序列化写入文件中,注意是二进制 >>> dict = {'username':'root','password':'abc123'} >>> f = open('pickle_test.txt','wb') #以二进制的方式写入文件中 >>> pickle.dump(dict,f) >>> f.close() #反序列化,从文件中读出数据对象 >>> f = open('pickle_test.txt','rb') >>> pickle.load(f) {'password': 'abc123', 'username': 'root'} >>>
四、random模块
(一)整数函数
1、random.randrange(stop)、random.randrange(start, stop[, step])
返回一个随机选择的元素从random.randrange(start, stop[, step])。
>>> import random >>> random.randrange(4) #返回一个大于等于1小于4,[1,4)的整数 3 >>> random.randrange(4,10) #规定了起始位置大于等于4小于10,[4,10)的整数 9 >>> random.randrange(4,10,2) #加入步长,大于等于4小于10的偶数 6
2、random.randint(a, b)
返回一个随机整数N,a<= N <=b,相当于randrange(a,b+1)。
>>> import random >>> random.randint(4,9) 8
(二)序列函数
1、random.choice(seq)
从一个非空的序列seq中随机返回一个元素,如果序列seq是空时抛 IndexError异常。
>>> import random >>> l = [5,6,8,{'username':'root'}] >>> random.choice(l) 5 >>> random.choice(l) {'username': 'root'} >>>
2、random.sample(population, k)
返回从填充序列或集合中选择的唯一元素的k个长度列表。一般用于随机抽样。
返回一个新列表,其中包含总体中的元素,同时保留原始总体不变。结果列表按选择顺序排列,因此所有子切片也将是有效的随机样本。
>>> import random >>> l = [5,6,8,{'username':'root'}] #从序列中随机抽出两个元素 >>> random.sample(l,2) [5, 6] >>> random.sample(l,2) [6, {'username': 'root'}]
3、random.shuffle(x[, random])
将序列x打乱顺序。可选参数random是一个0参数的函数,返回[0.0,1.0)中的一个随机浮点数;默认情况下,这是function random()。
>>> import random >>> l = [5,6,8,{'username':'root'}] #打乱列表l原先的顺序 >>> random.shuffle(l) >>> l [8, 6, {'username': 'root'}, 5] >>> random.shuffle(l) >>> l [6, 8, {'username': 'root'}, 5] >>>
(三)实值分布
以下函数生成特定的实值分布。函数参数以分布方程式中的相应变量命名,如通常的数学实践中所用;这些方程式中的大多数都可以在任何统计资料中找到。
1、random.random()
返回范围为[0.0,1.0)的下一个随机浮点数。
>>> import random #大于等于0小于1之间的数 >>> random.random() 0.04794861013804974 >>> random.random() 0.8553054814287199 >>>
2、random.uniform(a, b)
用于生成一个指定范围内的随机符点数,两个参数其中一个是上限,一个是下限。如果a > b,则生成的随机数n: b <= n <= a。如果 a <b, 则 a <= n <= b。
>>> import random #生成指定范围的数 >>> random.uniform(1,2) 1.0994241967254763 >>> random.uniform(1,2) 1.9169119648591533 >>>
3、random.triangular(low, high, mode)
返回一个随机浮点数N,使并在这些边界之间使用指定的模式。该低和高界默认的0和1。所述模式参数默认为边界之间的中点,给人一种对称分布。low <= N <= high
>>> import random #对称分布 >>> random.triangular() #使用low,high默认值 0.338741665952415 >>> random.triangular(1,3) #指定low,high的值 2.2142236912771205 >>>
4、random.expovariate(lambd)
指数分布。 lambd是1.0除以所需的均值。它应该不为零。如果lambd为正,返回值的范围从0到正无穷大;如果lambd为负,从负无穷大到0。
>>> import random #指数分布 >>> random.expovariate(0.3) 8.262242039304834 >>>
5、random.normalvariate(mu,sigma)
正态分布。 mu是平均值,而sigma是标准偏差。
>>> import random #正态分布 >>> random.normalvariate(4,0.5) 3.229766077847942 >>>
(四)实例
1、生成随机字符串
#string的使用 >>> import string >>> string.ascii_lowercase 'abcdefghijklmnopqrstuvwxyz' >>> string.digits '0123456789' #生成随机字符串 >>> random.sample(string.ascii_lowercase+string.digits,8) ['k', 'c', 'j', '7', 'z', 'e', 'm', 'y'] >>> str = random.sample(string.ascii_lowercase+string.digits,8) >>> l = random.sample(string.ascii_lowercase+string.digits,8) >>> ''.join(l) 'xitzm5we' >>>
2、生成随机验证码
import random def v_code(): code = '' #获取5个字母数字组成验证码 for i in range(5): num=random.randint(0,9) #从[0,9)中随机生成一个数字 alf=chr(random.randint(65,90)) #随机生成一个大写字母 add=random.choice([num,alf]) #从数字或者字母中选择一个 code="".join([code,str(add)]) print(v_code())
五、hashlib模块
(一)摘要算法
Python的hashlib提供了常见的摘要算法,如MD5,SHA1等等。
md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),sha3_224, sha3_256, sha3_384, sha3_512, shake_128, 和 shake_256.
什么是摘要算法呢?摘要算法又称哈希算法、散列算法。它通过一个函数,把任意长度的数据转换为一个长度固定的数据串(通常用16进制的字符串表示)。
摘要算法就是通过摘要函数f()对任意长度的数据data计算出固定长度的摘要digest,目的是为了发现原始数据是否被人篡改过。
#. Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org) # Licensed to PSF under a Contributor Agreement. # __doc__ = """hashlib module - A common interface to many hash functions. new(name, data=b'') - returns a new hash object implementing the given hash function; initializing the hash using the given binary data. Named constructor functions are also available, these are faster than using new(name): md5(), sha1(), sha224(), sha256(), sha384(), and sha512() More algorithms may be available on your platform but the above are guaranteed to exist. See the algorithms_guaranteed and algorithms_available attributes to find out what algorithm names can be passed to new(). NOTE: If you want the adler32 or crc32 hash functions they are available in the zlib module. Choose your hash function wisely. Some have known collision weaknesses. sha384 and sha512 will be slow on 32 bit platforms. Hash objects have these methods: - update(arg): Update the hash object with the bytes in arg. Repeated calls are equivalent to a single call with the concatenation of all the arguments. - digest(): Return the digest of the bytes passed to the update() method so far. - hexdigest(): Like digest() except the digest is returned as a unicode object of double length, containing only hexadecimal digits. - copy(): Return a copy (clone) of the hash object. This can be used to efficiently compute the digests of strings that share a common initial substring. For example, to obtain the digest of the string 'Nobody inspects the spammish repetition': >>> import hashlib >>> m = hashlib.md5() >>> m.update(b"Nobody inspects") >>> m.update(b" the spammish repetition") >>> m.digest() b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9' More condensed: >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' """ # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512') algorithms_guaranteed = set(__always_supported) algorithms_available = set(__always_supported) __all__ = __always_supported + ('new', 'algorithms_guaranteed', 'algorithms_available', 'pbkdf2_hmac') __builtin_constructor_cache = {} def __get_builtin_constructor(name): cache = __builtin_constructor_cache constructor = cache.get(name) if constructor is not None: return constructor try: if name in ('SHA1', 'sha1'): import _sha1 cache['SHA1'] = cache['sha1'] = _sha1.sha1 elif name in ('MD5', 'md5'): import _md5 cache['MD5'] = cache['md5'] = _md5.md5 elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'): import _sha256 cache['SHA224'] = cache['sha224'] = _sha256.sha224 cache['SHA256'] = cache['sha256'] = _sha256.sha256 elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'): import _sha512 cache['SHA384'] = cache['sha384'] = _sha512.sha384 cache['SHA512'] = cache['sha512'] = _sha512.sha512 except ImportError: pass # no extension module, this hash is unsupported. constructor = cache.get(name) if constructor is not None: return constructor raise ValueError('unsupported hash type ' + name) def __get_openssl_constructor(name): try: f = getattr(_hashlib, 'openssl_' + name) # Allow the C module to raise ValueError. The function will be # defined but the hash not actually available thanks to OpenSSL. f() # Use the C function directly (very fast) return f except (AttributeError, ValueError): return __get_builtin_constructor(name) def __py_new(name, data=b''): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be bytes). """ return __get_builtin_constructor(name)(data) def __hash_new(name, data=b''): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be bytes). """ try: return _hashlib.new(name, data) except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. # This allows for SHA224/256 and SHA384/512 support even though # the OpenSSL library prior to 0.9.8 doesn't provide them. return __get_builtin_constructor(name)(data) try: import _hashlib new = __hash_new __get_hash = __get_openssl_constructor algorithms_available = algorithms_available.union( _hashlib.openssl_md_meth_names) except ImportError: new = __py_new __get_hash = __get_builtin_constructor try: # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA from _hashlib import pbkdf2_hmac except ImportError: _trans_5C = bytes((x ^ 0x5C) for x in range(256)) _trans_36 = bytes((x ^ 0x36) for x in range(256)) def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None): """Password based key derivation function 2 (PKCS #5 v2.0) This Python implementations based on the hmac module about as fast as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster for long passwords. """ if not isinstance(hash_name, str): raise TypeError(hash_name) if not isinstance(password, (bytes, bytearray)): password = bytes(memoryview(password)) if not isinstance(salt, (bytes, bytearray)): salt = bytes(memoryview(salt)) # Fast inline HMAC implementation inner = new(hash_name) outer = new(hash_name) blocksize = getattr(inner, 'block_size', 64) if len(password) > blocksize: password = new(hash_name, password).digest() password = password + b'\x00' * (blocksize - len(password)) inner.update(password.translate(_trans_36)) outer.update(password.translate(_trans_5C)) def prf(msg, inner=inner, outer=outer): # PBKDF2_HMAC uses the password as key. We can re-use the same # digest objects and just update copies to skip initialization. icpy = inner.copy() ocpy = outer.copy() icpy.update(msg) ocpy.update(icpy.digest()) return ocpy.digest() if iterations < 1: raise ValueError(iterations) if dklen is None: dklen = outer.digest_size if dklen < 1: raise ValueError(dklen) dkey = b'' loop = 1 from_bytes = int.from_bytes while len(dkey) < dklen: prev = prf(salt + loop.to_bytes(4, 'big')) # endianess doesn't matter here as long to / from use the same rkey = int.from_bytes(prev, 'big') for i in range(iterations - 1): prev = prf(prev) # rkey = rkey ^ prev rkey ^= from_bytes(prev, 'big') loop += 1 dkey += rkey.to_bytes(inner.digest_size, 'big') return dkey[:dklen] for __func_name in __always_supported: # try them all, some may not work due to the OpenSSL # version not supporting that algorithm. try: globals()[__func_name] = __get_hash(__func_name) except ValueError: import logging logging.exception('code for hash %s was not found.', __func_name) # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor
哈希对象提供以下一些方法:
- update(arg): 使用数据中的字节更新哈希对象
- digest():通过update()方法返回字节的digest
- hexdigest(): 与digest()类似,不同之处在于digest以字符串形式返回长度为双精度,仅包含十六进制数字。
- copy():返回哈希对象的副本(克隆)。这可以用来有效地计算出共享公共数据的摘要初始子字符串。
以摘要算法MD5为例:
>>> import hashlib >>> m = hashlib.md5() >>> m.update(b'Nobody inspects the spammish repetition') #返回字节 >>> m.digest() b'\xae\xe9\xfc_.\xc6A\xb4\xd6%[\xf1\x1f5S\x05' #返回字符串 >>> m.hexdigest() 'aee9fc5f2ec641b4d6255bf11f355305' >>>
当然还可以更简洁:
>>> import hashlib >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest() 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' >>>
(二)应用
可以用于给每一个用户的token进行加密
def get_md5(username): """ 生成对应用户的token :param username: 用户名 :return: """ m=hashlib.md5() m.update(bytes(username,encoding="utf-8")) return m.hexdigest()
但是这样虽然给对应的用户名进行MD5加密了,如果别人盗走了数据库的数据,根据用户名照样也是泄露了用户的token,为此可以通过Salt来处理MD5。
def get_md5(username): """ 生成对应用户的token :param username: 用户名 :return: """ m=hashlib.md5(bytes(username,encoding="utf-8")) #将用户名当作salt,可以防止相同用户名的生成的MD5一样 ctime = str(time.time()) m.update(bytes(ctime,encoding="utf-8")) #使用时间更新哈希对象 return m.hexdigest()