代码改变世界

python标准库学习7

2011-11-26 22:30  Rollen Holt  阅读(4335)  评论(0编辑  收藏  举报

使用 os.path 模块处理文件名

import os

filename = "my/little/pony"

print "using", os.name, "..."
print "split", "=>", os.path.split(filename)
print "splitext", "=>", os.path.splitext(filename)
print "dirname", "=>", os.path.dirname(filename)
print "basename", "=>", os.path.basename(filename)
print "join", "=>", os.path.join(os.path.dirname(filename),
                                 os.path.basename(filename))

using nt ...
split => ('my/little', 'pony')
splitext => ('my/little/pony', '')
dirname => my/little
basename => pony
join => my/little\pony

  当前目录和上一级目录

>>> os.pardir
'..'
>>> os.curdir
'.'

  

使用 os.path 模块检查文件名的特征

import os

FILES = (
    os.curdir,
    "/",
    "file",
    "/file",
    "samples",
    "samples/sample.jpg",
    "directory/file",
    "../directory/file",
    "/directory/file"
    )

for file in FILES:
    print file, "=>",
    if os.path.exists(file):
        print "EXISTS",
    if os.path.isabs(file):
        print "ISABS",
    if os.path.isdir(file):
        print "ISDIR",
    if os.path.isfile(file):
        print "ISFILE",
    if os.path.islink(file):
        print "ISLINK",
    if os.path.ismount(file):
        print "ISMOUNT",
    print

. => EXISTS ISDIR
/ => EXISTS ISABS ISDIR ISMOUNT
file =>
/file => ISABS
samples => EXISTS ISDIR
samples/sample.jpg => EXISTS ISFILE
directory/file =>
../directory/file =>
/directory/file => ISABS

  expanduser 函数以与大部分Unix shell相同的方式处理用户名快捷符号(~, 不过在 Windows 下工作不正常),

使用 os.path 模块将用户名插入到文件名

import os

print os.path.expanduser("~/.pythonrc")

# /home/effbot/.pythonrc

  expandvars 函数将文件名中的环境变量替换为对应值

使用 os.path 替换文件名中的环境变量

import os

os.environ["USER"] = "user"

print os.path.expandvars("/home/$USER/config")
print os.path.expandvars("$USER/folders")

/home/user/config
user/folders

  列出目录下所有的文件和目录

>>> a=[file for file in os.listdir("d:\\new")]
>>> for i in a:
	print i

  walk 函数会帮你找出一个目录树下的所有文件. 它的参数依次是目录名, 回调函数, 以及传递给回调函数的数据对象.

使用 os.path 搜索文件系统

import os

def callback(arg, directory, files):
    for file in files:
        print os.path.join(directory, file), repr(arg)

os.path.walk(".", callback, "secret message")

./aifc-example-1.py 'secret message'
./anydbm-example-1.py 'secret message'
./array-example-1.py 'secret message'
...
./samples 'secret message'
./samples/sample.jpg 'secret message'
./samples/sample.txt 'secret message'
./samples/sample.zip 'secret message'
./samples/articles 'secret message'
./samples/articles/article-1.txt 'secret message'
./samples/articles/article-2.txt 'secret message'
...

  index 函数会返回一个文件名列表, 你可以直接使用for-in 循环处理文件.

使用 os.listdir 搜索文件系统

import os

def index(directory):
    # like os.listdir, but traverses directory trees
    stack = [directory]
    files = []
    while stack:
        directory = stack.pop()
        for file in os.listdir(directory):
            fullname = os.path.join(directory, file)
            files.append(fullname)
            if os.path.isdir(fullname) and not os.path.islink(fullname):
                stack.append(fullname)
    return files

for file in index("."):
    print file

.\aifc-example-1.py
.\anydbm-example-1.py
.\array-example-1.py
...

  一次返回一个文件

import os

class DirectoryWalker:
    # a forward iterator that traverses a directory tree

    def _ _init_ _(self, directory):
        self.stack = [directory]
        self.files = []
        self.index = 0

    def _ _getitem_ _(self, index):
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack
                self.directory = self.stack.pop()
                self.files = os.listdir(self.directory)
                self.index = 0
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
                if os.path.isdir(fullname) and not os.path.islink(fullname):
                    self.stack.append(fullname)
                return fullname

for file in DirectoryWalker("."):
    print file

.\aifc-example-1.py
.\anydbm-example-1.py
.\array-example-1.py
...

  注意 DirectoryWalker 类并不检查传递给 _ _getitem_ _ 方法的索引值. 这意味着如果你越界访问序列成员(索引数字过大)的话, 这个类将不能正常工作.

下面这个例子它返回文件名和它的 os.stat 属性(一个元组). 这个版本在每个文件上都能节省一次或两次stat 调用( os.path.isdir 和 os.path.islink 内部都使用了 stat ), 并且在一些平台上运行很快.

使用 DirectoryStatWalker 搜索文件系统

import os, stat

class DirectoryStatWalker:
    # a forward iterator that traverses a directory tree, and
    # returns the filename and additional file information

    def _ _init_ _(self, directory):
        self.stack = [directory]
        self.files = []
        self.index = 0

    def _ _getitem_ _(self, index):
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack
                self.directory = self.stack.pop()
                self.files = os.listdir(self.directory)
                self.index = 0
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
                st = os.stat(fullname)
                mode = st[stat.ST_MODE]
                if stat.S_ISDIR(mode) and not stat.S_ISLNK(mode):
                    self.stack.append(fullname)
                return fullname, st

for file, st in DirectoryStatWalker("."):
    print file, st[stat.ST_SIZE]

.\aifc-example-1.py 336
.\anydbm-example-1.py 244
.\array-example-1.py 526

  

Using the stat Module

import stat
import os, time

st = os.stat("samples/sample.txt")

print "mode", "=>", oct(stat.S_IMODE(st[stat.ST_MODE]))

print "type", "=>",
if stat.S_ISDIR(st[stat.ST_MODE]):
    print "DIRECTORY",
if stat.S_ISREG(st[stat.ST_MODE]):
    print "REGULAR",
if stat.S_ISLNK(st[stat.ST_MODE]):
    print "LINK",
print

print "size", "=>", st[stat.ST_SIZE]

print "last accessed", "=>", time.ctime(st[stat.ST_ATIME])
print "last modified", "=>", time.ctime(st[stat.ST_MTIME])
print "inode changed", "=>", time.ctime(st[stat.ST_CTIME])

mode => 0664
type => REGULAR
size => 305
last accessed => Sun Oct 10 22:12:30 1999
last modified => Sun Oct 10 18:39:37 1999
inode changed => Sun Oct 10 15:26:38 1999

  

使用 string 模块

import string

text = "Monty Python's Flying Circus"

print "upper", "=>", string.upper(text)
print "lower", "=>", string.lower(text)
print "split", "=>", string.split(text)
print "join", "=>", string.join(string.split(text), "+")
print "replace", "=>", string.replace(text, "Python", "Java")
print "find", "=>", string.find(text, "Python"), string.find(text, "Java")
print "count", "=>", string.count(text, "n")

upper => MONTY PYTHON'S FLYING CIRCUS
lower => monty python's flying circus
split => ['Monty', "Python's", 'Flying', 'Circus']
join => Monty+Python's+Flying+Circus
replace => Monty Java's Flying Circus
find => 6 -1
count => 3

  

使用字符串方法替代 string 模块函数

text = "Monty Python's Flying Circus"

print "upper", "=>", text.upper()
print "lower", "=>", text.lower()
print "split", "=>", text.split()
print "join", "=>", "+".join(text.split())
print "replace", "=>", text.replace("Python", "Perl")
print "find", "=>", text.find("Python"), text.find("Perl")
print "count", "=>", text.count("n")

upper => MONTY PYTHON'S FLYING CIRCUS
lower => monty python's flying circus
split => ['Monty', "Python's", 'Flying', 'Circus']
join => Monty+Python's+Flying+Circus
replace => Monty Perl's Flying Circus
find => 6 -1
count => 3

  

使用 string 模块将字符串转为数字

import string

print int("4711"),
print string.atoi("4711"),
print string.atoi("11147", 8), # octal 八进制
print string.atoi("1267", 16), # hexadecimal 十六进制
print string.atoi("3mv", 36) # whatever...

print string.atoi("4711", 0),
print string.atoi("04711", 0),
print string.atoi("0x4711", 0)

print float("4711"),
print string.atof("1"),
print string.atof("1.23e5")

4711 4711 4711 4711 4711
4711 2505 18193
4711.0 1.0 123000.0

  operator 模块为 Python 提供了一个 "功能性" 的标准操作符接口. 当使用 map 以及 filter 一类的函数的时候, operator 模块中的函数可以替换一些lambda 函式. 而且这些函数在一些喜欢写晦涩代码的程序员中很流行.

使用 operator 模块

print "add", "=>", reduce(operator.add, sequence)
print "sub", "=>", reduce(operator.sub, sequence)
print "mul", "=>", reduce(operator.mul, sequence)
print "concat", "=>", operator.concat("spam", "egg")
print "repeat", "=>", operator.repeat("spam", 5)
print "getitem", "=>", operator.getitem(sequence, 2)
print "indexOf", "=>", operator.indexOf(sequence, 2)
print "sequenceIncludes", "=>", operator.sequenceIncludes(sequence, 3)

add => 7
sub => -5
mul => 8
concat => spamegg
repeat => spamspamspamspamspam

getitem => 4
indexOf => 1
sequenceIncludes => 0

  

使用 operator 模块检查类型

import operator
import UserList

def dump(data):
    print type(data), "=>",
    if operator.isCallable(data):
        print "CALLABLE",
    if operator.isMappingType(data):
        print "MAPPING",
    if operator.isNumberType(data):
        print "NUMBER",
    if operator.isSequenceType(data):
        print "SEQUENCE",
    print
        
dump(0)
dump("string")
dump("string"[0])
dump([1, 2, 3])
dump((1, 2, 3))
dump({"a": 1})
dump(len) # function 函数
dump(UserList) # module 模块
dump(UserList.UserList) # class 类
dump(UserList.UserList()) # instance 实例

<type 'int'> => NUMBER
<type 'string'> => SEQUENCE
<type 'string'> => SEQUENCE
<type 'list'> => SEQUENCE
<type 'tuple'> => SEQUENCE
<type 'dictionary'> => MAPPING
<type 'builtin_function_or_method'> => CALLABLE
<type 'module'> =>
<type 'class'> => CALLABLE
<type 'instance'> => MAPPING NUMBER SEQUENCE

  copy 模块包含两个函数, 用来拷贝对象

使用 copy 模块复制对象

 

import copy

a = [[1],[2],[3]]
b = copy.copy(a)

print "before", "=>"
print a
print b

# modify original
a[0][0] = 0
a[1] = None

print "after", "=>"
print a
print b

before =>
[[1], [2], [3]]
[[1], [2], [3]]
after =>
[[0], None, [3]]
[[0], [2], [3]]

  

使用 copy 模块复制集合(Collections)

import copy

a = [[1],[2],[3]]
b = copy.deepcopy(a)

print "before", "=>"
print a
print b

# modify original
a[0][0] = 0
a[1] = None

print "after", "=>"
print a
print b

before =>
[[1], [2], [3]]
[[1], [2], [3]]
after =>
[[0], None, [3]]
[[1], [2], [3]]

 使用sys模块获得脚本的参数

import sys

print "script name is", sys.argv[0]

if len(sys.argv) > 1:
    print "there are", len(sys.argv)-1, "arguments:"
    for arg in sys.argv[1:]:
        print arg
else:
    print "there are no arguments!"

script name is sys-argv-example-1.py
there are no arguments!

  

使用sys模块操作模块搜索路径

import sys

print "path has", len(sys.path), "members"

# add the sample directory to the path
sys.path.insert(0, "samples")
import sample

# nuke the path
sys.path = []
import random # oops!

path has 7 members
this is the sample module!
Traceback (innermost last):
  File "sys-path-example-1.py", line 11, in ?
    import random # oops!
ImportError: No module named random

  

使用sys模块查找内建模块

import sys

def dump(module):
    print module, "=>",
    if module in sys.builtin_module_names:
        print "<BUILTIN>"
    else:
        module = _ _import_ _(module)
        print module._ _file_ _

dump("os")
dump("sys")
dump("string")
dump("strop")
dump("zlib")

os => C:\python\lib\os.pyc
sys => <BUILTIN>
string => C:\python\lib\string.pyc
strop => <BUILTIN>
zlib => C:\python\zlib.pyd

  

使用sys模块查找已导入的模块

modules 字典包含所有加载的模块. import 语句在从磁盘导入内容之前会先检查这个字典.

import sys

print sys.modules.keys()

['os.path', 'os', 'exceptions', '_ _main_ _', 'ntpath', 'strop', 'nt',
'sys', '_ _builtin_ _', 'site', 'signal', 'UserDict', 'string', 'stat']

  getrefcount 函数 返回给定对象的引用记数 - 也就是这个对象使用次数. Python 会跟踪这个值, 当它减少为0的时候, 就销毁这个对象.

使用sys模块获得引用记数

import sys

variable = 1234

print sys.getrefcount(0)
print sys.getrefcount(variable)
print sys.getrefcount(None)

50
3
192

  注意这个值总是比实际的数量大, 因为该函数本身在确定这个值的时候依赖这个对象

使用sys模块获得当前平台

import sys

#
# emulate "import os.path" (sort of)...

if sys.platform == "win32":
    import ntpath
    pathmodule = ntpath
elif sys.platform == "mac":
    import macpath
    pathmodule = macpath
else:
    # assume it's a posix platform
    import posixpath
    pathmodule = posixpath

print pathmodule

  setprofiler 函数允许你配置一个分析函数(profiling function). 这个函数会在每次调用某个函数或方法时被调用(明确或隐含的), 或是遇到异常的时候被调用. 

使用sys模块配置分析函数

import sys

def test(n):
    j = 0
    for i in range(n):
        j = j + i
    return n

def profiler(frame, event, arg):
    print event, frame.f_code.co_name, frame.f_lineno, "->", arg

# profiler is activated on the next call, return, or exception
# 分析函数将在下次函数调用, 返回, 或异常时激活
sys.setprofile(profiler)

# profile this function call
# 分析这次函数调用
test(1)

# disable profiler
# 禁用分析函数
sys.setprofile(None)

# don't profile this call
# 不会分析这次函数调用
test(2)

call test 3 -> None
return test 7 -> 1

  

使用sys模块配置单步跟踪函数

import sys

def test(n):
    j = 0
    for i in range(n):
        j = j + i
    return n

def tracer(frame, event, arg):
    print event, frame.f_code.co_name, frame.f_lineno, "->", arg
    return tracer

# tracer is activated on the next call, return, or exception
# 跟踪器将在下次函数调用, 返回, 或异常时激活
sys.settrace(tracer)

# trace this function call
# 跟踪这次函数调用
test(1)

# disable tracing
# 禁用跟踪器
sys.settrace(None)

# don't trace this call
# 不会跟踪这次函数调用
test(2)

call test 3 -> None
line test 3 -> None
line test 4 -> None
line test 5 -> None
line test 5 -> None
line test 6 -> None
line test 5 -> None
line test 7 -> None
return test 7 -> 1