第5章:Linux系统管理
1.文件读写
1).Python内置的open函数
f = open('data.txt', 'w') f.write('hello, world') f.close()
2).避免文件句柄泄露
try: f = open('data.txt') print(f.read()) finally: f.close() with open('data.txt') as f: print(f.read())
3).常见的文件操作函数
Python提供了三个读相关的函数,分别是read,realine和readlines read:读取文件中的所有内容 readline:一次读取一行 readlines:将文件内容存到一个列表中,列表中的每一行对应于文件中的一行 f = open('data.txt') f.read() f.seek(0) f.readline() f.seek(0) f.readlines() Python提供了两个写函数,分别是write和writelines write:写字符串到文件中,并返回写入的字符数 writelines:写一个字符串列表到文件中 f = open('data.txt', 'w') f.write('Beautiful is better than ugly') f.writelines(['Explicit is better than implicit'],['Simple is better than complex'])
4).Python的文件是一个可迭代对象
使用for循环遍历文件内容的代码如下: with open('data.txt') as inf: for line in inf: print(line.upper())
5).案例:将文件中所有单词的首字母变成大写
with open('data.txt') as inf, open('out.txt', 'w') as outf: for line in inf: outf.write(" ".join([word.capitalize() for word in line.split()])) outf.write("\n") with open('data.txt') as inf, open('out.txt', 'w') as outf: for line in inf: print( * [word.capitalize() for word in line.split()], file=outf)
2.文件与文件路径管理
1).使用os.path进行路径和文件管理
拆分路径
import os path = "/var/log/yum.log" os.path.split(path) os.path.dirname(path) os.path.basename(path) os.path.splitext(path)
构建路径
import os os.getcwd() os.path.expanduser('~') os.path.expanduser('~mysql') os.path.expanduser('~allen/data.txt') os.path.abspath('.') os.path.join('~', 't', 'a.py') os.path.join(os.path.expanduser('~mysql'), 't', 'a.py') os.path.isabs('/home/mysql/t/a.py') os.path.isabs('.')
在Python代码中,可以使用__file__这个特殊的变量表示当前代码所在的源文件 import os print("current directory : ", os.getcwd()) path = os.path.abspath(__file__) print("full path of current file : ", path)
获取文件属性
os.path模块包含了若干函数用来获取文件的属性
getatime
getmtime
getctime
getsize
判断文件类型
os.path模块包含了若干判断类函数
exists
isfile
isdir
islink
ismount
2).使用os模块管理文件和目录
import os os.getcwd() os.chdir(os.path.expanduser('~mysql')) os.getcwd()
下面的程序演示了chmod和access函数的用法。首先通过命令行读取文件的名称,先判断文件是否存在,如果不存在,直接退出 然后判断文件是否具有读权限,如果没有,则将文件赋予所有用户都有读、写、执行权限,如果文件有读权限,读取文件内容 import os import sys def main(): sys.argv.append(" ") filename = sys.argv[1] if not os.path.isfile(filename): raise SystemExit(filename + ' does not exitsts') elif not os.access(filename, os.R_OK): os.chmod(filename, 0777) else: with open(filename) as f: print(f.read()) if __name__ == '__main__': main()
3).案例:打印最常用的10条Linux命令
import os from collections import Counter c = Counter() with open(os.path.expanduser('~/.bash_history')) as f: for line in f: cmd = line.strip().split() if cmd: c[cmd[0]]+=1 print(c.most_common(10))
3.查找文件
1).使用fnmatch找到特定的文件
有fnmatch、fnmatchcase、filter和translate函数 import os import fnmatch for name in os.listdir('.'): if fnmatch.fnmatch(name, '*.py'): print(name) if fnmatch.filter(name, 'a*'): print(name)
2).使用glob找到特定的文件
glob的作用相当于os.listdir加上fnmatch,使用glob以后,不需要调用os.listdir获取文件列表,直接通过模式匹配即可 import os import glob name = glob.glob('*.py') print(name) name1 = glob.glob('o*') print(name1)
3).使用os.walk遍历目录树
walk函数遍历某个目录及其子目录,对于每一个目录,walk返回一个三元组(dirpath,dirnames,filenames) dirpath保存的是当前目录,dirnames是当前目录下的子目录列表,filenames是当前目录下的文件列表 import os import fnmatch images = ['*.jpg', '*.jpeg', '*.png'] matches = [] for root, dirnames, filenames in os.walk(os.path.expanduser("~")): for extensions in images: for filename in fnmatch.filter(filenames, extensions): matches.append(os.path.join(root, filename)) print(matches)
4).案例:找到目录下最大(或最老)的十个文件
import os import fnmatch def is_file_match(filename, patterns): for pattern in patterns: if fnmatch.fnmatch(filename, pattern): return True return False def find_specific_files(root, patterns=['*'], exclude_dirs=[]): for root, dirnames, filenames in os.walk(root): for filename in filenames: if is_file_match(filename, patterns): yield os.path.join(root, filename) for d in exclude_dirs: if d in dirnames: dirnames.remove(d) for item in find_specific_files("/home/mysql"): print(item) patterns = ['*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff'] for item in find_specific_files("/root", patterns): print(item) files = {name : os.path.getsize(name) for name in find_specific_files("/root")} result = sorted(files.items(), key=lambda d:d[1], reverse=True)[:10] for i, t in enumerate(result, 1): print(i, t[0], t[1]) 首先通过字典推导创建一个字典,字典的key是找到的文件,字典的value是文件的大小 构建出字典以后,使用Python内置的sorted函数对字典进行逆序排序 排序完成以后即可获取最大的十个文件
4.高级文件处理接口shutil
os模块是对操作系统的接口进行封装,主要作用是跨平台
shutil模块包含复制、移动、重命名和删除文件及目录的函数,主要作用是管理文件和目录
1).复制文件和文件夹
copy和copytree import shutil shutil.copy('data.txt','mydata.txt') shutil.copytree('/py','/mypy')
2).文件和文件夹的移动与改名
import shutil shutil.move('a.py', 'b.py') shutil.move('b.py', 'dir1')
3).删除目录
shutil.rmtree不管目录是否非空,都直接删除整个目录 import shutil shutil.rmtree('dir1')
5.文件内容管理
1).文件比较
filecmp模块包含了比较目录和文件的操作 filecmp模块最简单的函数是cmp函数,该函数用来比较两个文件是否相同 import filecmp filecmp.cmp('a.txt', 'b.txt')
2).MD5校验
import hashlib d = hashlib.md5() with open('/etc/passwd') as f: for line in f: d.update(line) print(d.hexdigest())
3).案例:找到目录下的重复文件
import hashlib import sys import os import fnmatch CHUNK_SIZE = 8192 def is_file_match(filename, patterns): for pattern in patterns: if fnmatch.fnmatch(filename, pattern): return True return False def find_specific_files(root, patterns=['*'], exclude_dirs=[]): for root, dirnames, filenames in os.walk(root): for filename in filenames: if is_file_match(filename, patterns): yield os.path.join(root, filename) for d in exclude_dirs: if d in dirnames: dirnames.remove(d) def get_chunk(filename): with open(filename) as f: while True: chunk = f.read(CHUNK_SIZE) if not chunk: break else: yield chunk def get_file_checksum(filename): h = hashlib.md5() for chunk in get_chunk(filename): h.update(chunk) return h.hexdigest() def main(): sys.argv.append("") directory = sys.argv[1] if not os.path.isdir(directory): raise SystemExit("{0} is not a directory".format(directory)) record = {} for item in find_specific_files(directory): checksum = get_file_checksum(item) if checksum in record: print('find duplicate file : {0} vs {1}'.format(record[checksum], item)) else: record[checksum] = item if __name__ == '__main__': main()
6.使用Python管理压缩包
1).使用tarfile库读取与创建tar包
读取tar包 import tarfile with tarfile.open('tarfile_add.tar') as t: for member_info in t.getnames(): print(member_info) for member_info in t.getmembers(): print(member_info.name)
使用tarfile库读取与创建tar包 创建tar包 import tarfile with tarfile.open('tarfile_add.tar', mode='w') as out: out.add('data.txt') getmembers:获取tar包中的文件列表 getnames:获取tar包中的文件名列表 extract:提取单个文件 extractall:提取所有文件
2).使用tarfile库读取与创建压缩包
读取一个用gzip算法压缩的tar包: with tarfile.open('tarfile_add.tar', mode='r:gz') as out: 创建一个用bzip2算法压缩的tar包: with tarfile.open('tarfile_add.tar', mode='w:bz2') as out:
3).案例:备份指定文件到压缩包
import os import fnmatch import tarfile import datetime def is_file_match(filename, patterns): for pattern in patterns: if fnmatch.fnmatch(filename, pattern): return True return False def find_specific_files(root, patterns=['*'], exclude_dirs=[]): for root, dirnames, filenames in os.walk(root): for filename in filenames: if is_file_match(filename, patterns): yield os.path.join(root, filename) for d in exclude_dirs: if d in dirnames: dirnames.remove(d) def main(): patterns = ['*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff'] now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") filename = "all_images_{0}.tar.gz".format(now) with tarfile.open(filename, 'w:gz') as f: for item in find_specific_files("/root", patterns): f.add(item) if __name__ == '__main__': main()
4).使用zipfile库创建和读取zip压缩包
读取zip文件 import zipfile example_zip = zipfile.ZipFile('example.zip') example_zip.namelist()
创建zip文件 import zipfile newZip = zipfile.ZipFile('new.zip', 'w') newZip.write('spam.txt') newZip.close()
使用Python的zipfile模块提供的命令行接口,创建、查看和提取zip格式压缩包 python -m zipfile -c monty.zip spam.txt eggs.txt python -m zipfile -e monty.zip / python -m zipfile -l monty.zip
5).案例:暴力破解zip压缩包的密码
with open('passwords.txt') as pf: for line in pf: try: f.extractall(pwd=line.strip()) print("password is {0}".format(line.strip())) except: pass
6).使用shutil创建和读取压缩包
import shutil import tarfile shutil.make_archive('backup','gztar',root_dir='/py') f = tarfile.open('backup.tar.gz', 'r:gz') print(f.getnames())
在Python3中使用shutil读取压缩包 import shutil shutil.unpack_archive('backup.tar.gz')
7.Python中执行外部命令
1).subprocess模块简介
subprocess模块用来创建和管理子进程,它提供了一个名为Popen的类来启动和设置子进程的参数
2).subprocess模块的便利函数
call函数 import subprocess subprocess.call(['ls', '-l']) subprocess.call('ls -l', shell=True)
check_call函数
与call函数类似,区别在于异常情况下返回的形式不同
check_output函数 对获取的结果进行进一步的处理,或者将命令的输出打印到日志文件中 import subprocess output = subprocess.check_output('ls -l', shell=True) lines = output.split('\n') print(lines)
3).subprocess模块的Popen类
Popen对象创建后,子进程便会运行 Popen类提供了若干方法来控制子进程的运行: wait:等待子进程结束 poll:检查子进程状态 kill:终止子进程 send_signal:向子进程发送信号 communicate:与子进程交互 def execute_cmd(cmd): p = subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: return p.returncode, stderr return p.returncode, stdout
8.综合案例:使用Python部署MongoDB
import os import shutil import tarfile import subprocess def unpackage_mongo(package, home_dir): unpackage_dir = os.path.splitext(package)[0] if os.path.exists(unpackage_dir): shutil.rmtree(unpackage_dir) if os.path.exists(home_dir): shutil.rmtree(home_dir) t = tarfile.open(package, 'r:gz') t.extractall('.') shutil.move(unpackage_dir, home_dir) def create_datadir(data_dir): if os.path.exists(data_dir): shutil.rmtree(data_dir) os.mkdir(data_dir) def format_mongod_command(package_dir, data_dir, logfile): mongod = os.path.join(package_dir, 'bin', 'mongod') mongod_format = """{0} --fork --dbpath {1} --logpath {2}""" return mongod_format.format(mongod, data_dir, logfile) def start_mongod(cmd): returncode, out = execute_cmd(cmd) if returncode != 0: raise SystemExit('execute {0} error : {1}'.format(cmd, out)) else: print("execute command ({0}) successful".format(cmd)) def execute_cmd(cmd): p = subprocess.Popen(cmd,shell = True,stdin = subprocess.PIPE,stdout = subprocess.PIPE,stderr = subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: return p.returncode, stderr return p.returncode, stdout def main(): package = 'mongodb-linux-x86_64-rhel62-3.4.2.tgz' base_dir = os.path.abspath('/app') home_dir = os.path.join(base_dir, 'mongo') data_dir = os.path.join(base_dir, 'mongodata') logfile = os.path.join(data_dir, 'mongod.log') print("base_dir = ",base_dir) print("home_dir = ",home_dir) print("data_dir = ",data_dir) if not os.path.exists(package): raise SystemExit("{0} not found".format(package)) unpackage_mongo(package, home_dir) create_datadir(data_dir) start_mongod(format_mongod_command(home_dir, data_dir, logfile)) if __name__ == '__main__': main()