python 学习记录（10）-字符串与正则表达式

1.1 字符串格式化

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 格式化字符串
str1 = "version"
num = 1.0
format = "%s" % str1
print format
format = "%s %d" % (str1, num)
print format

# 带精度的格式化
print "浮点型数字: %f" % 1.25           控制输出小数位数
print "浮点型数字: %.1f" % 1.25
print "浮点型数字: %.2f" % 1.254

# 使用字典格式化字符串
print "%(version)s: %(num).1f" % {"version": "version", "num": 2}     输出格式括号中的内容与key值对应

# 字符串对齐
word = "version3.0"
print word.center(20)            20个字符的中间居中对齐
print word.center(20, "*")     * 取代空格字符
print word.ljust(0)                   输出左对齐
print word.rjust(20)               右对齐
print "%30s" % word                左侧+文字 30 个字符

>>>
version
version 1
浮点型数字: 1.250000
浮点型数字: 1.2
浮点型数字: 1.25
version: 2.0
     version3.0
*****version3.0*****
version3.0
          version3.0
                    version3.0
>>>
1.2 转义字符

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 输出转义字符
path = "hello\tworld\n"       一个制表符，一个换行符
print path
print len(path)
path = r"hello\tworld\n"         一个制表符，一个换行符失效，原样输出
print path
print len(path)

# strip()去掉转义字符
word = "\thello world\n"
print "直接输出:", word
print "strip()后输出:", word.strip()                 去掉转义符
print "lstrip()后输出:", word.lstrip()             去掉左边转义符
print "rstrip()后输出:", word.rstrip()

>>>
hello   world

12
hello\tworld\n
14
直接输出:    hello world

strip()后输出: hello world
lstrip()后输出: hello world

rstrip()后输出:    hello world
>>>
1.3 字符连接

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 使用"+"连接字符串
str1 = "hello "
str2 = "world "
str3 = "hello "
str4 = "China "
result = str1 + str2 + str3
result += str4
print result

# 使用join()连接字符串
strs = ["hello ", "world ", "hello ", "China "]
result = "".join(strs)
print result

# 使用reduce()连接字符串
import operator
strs = ["hello ", "world ", "hello ", "China "]
result = reduce(operator.add, strs, "")
print result

>>>
hello world hello China
hello world hello China
hello world hello China
>>>
1.4 字符串切片
#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 使用索引截取子串
word = "world"
print word[4]

# 使用split()获取子串
sentence = "Bob said: 1, 2, 3, 4"
print "使用空格取子串:", sentence.split()
print "使用逗号取子串:", sentence.split(",")
print "使用两个逗号取子串:", sentence.split(",", 2)

# 字符串连接后将分配新的空间
str1 = "a"
print id(str1)
print id(str1 + "b")

# 特殊切片截取子串
str1 = "hello world"
print word[0:3]
print str1[::2]
print str1[1::2] 以步长为2进行截取

>>>
d
使用空格取子串: ['Bob', 'said:', '1,', '2,', '3,', '4']
使用逗号取子串: ['Bob said: 1', ' 2', ' 3', ' 4']
使用两个逗号取子串: ['Bob said: 1', ' 2', ' 3, 4']
19156480
19847048
wor
hlowrd
el ol
>>>
1.5 字符串的比较

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 字符串的比较
str1 = 1
str2 = "1"                 两者类型不同
if str1 == str2:
    print "相同"
else:
    print "不相同"
if str(str1) == str2:         类型转换后比较
    print "相同"
else:
    print "不相同"

# 比较字符串的开始和结束处
word = "hello world"
print "hello" == word[0:5]             匹配前5个
print word.startswith("hello")      开始
print word.endswith("ld", 6)           结尾第6个开始
print word.endswith("ld", 6, 10)       从6到9的字符
print word.endswith("ld", 6, len(word))

>>>
不相同
相同
True
True
True
False
True
>>>
1.6 反转字符串

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 使用list的reverse()
def reverse(s):
    li = list(s)
    li.reverse()     列表反转
    s = "".join(li)           列表连接后输出
    return s

print reverse("hello")

# 循环输出反转的字符串即倒序输出
def reverse(s):
    out = ""
    li = list(s)
    for i in range(len(li), 0, -1):
        out += "".join(li[i-1])
    return out

print reverse("hello")

# 特殊切片反转字符串
def reverse(s):
    return s[::-1]              降序排列

print reverse("hello")

>>>
olleh
olleh
olleh
>>>

1.7 字符串的替换和查找

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 查找字符串
sentence = "This is a apple."
print sentence.find("a")
sentence = "This is a apple."
print sentence.rfind("a") 从右边开始查找

>>>
8
10
>>>

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# 字符串的替换
centence = "hello world, hello China"
print centence.replace("hello", "hi")
print centence.replace("hello", "hi", 1)
print centence.replace("abc", "hi")

>>>
hi world, hi China
hi world, hello China
hello world, hello China
>>>
1.8 时间与字符串的转换

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import time,datetime

# 时间到字符串的转换
print time.strftime("%Y-%m-%d %X", time.localtime())
# 字符串到时间的转换
t = time.strptime("2008-08-08", "%Y-%m-%d")
y, m, d = t[0:3] 取 y m d 的值
print datetime.datetime(y, m, d) 以datetime时间格式输出

>>>
2012-08-20 22:29:25
2008-08-08 00:00:00
>>>
1.9 正则表达式

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import re

s = "hello world"
print re.sub("hello", "hi", s)                        #sub替换
print re.sub("hello", "hi", s[-4:])
print re.sub("world", "China", s[-5:])
>>>
hi world 替换指定字符串
orld        s=hi world，查找hello失败，故只输出分片内容
China    替换为china后输出分片
>>>

# ^与$的使用
s = "HELLO WORLD"
print re.findall(r"^hello", s) 对小写hello进行匹配，匹配失败，为空
print re.findall(r"^hello", s, re.IGNORECASE)    不区分大小写匹配
print re.findall("WORLD$", s)     不区分大小写匹配
print re.findall(r"wORld$", s, re.I)    不区分大小写匹配
print re.findall(r"\b\w+\b", s)    截取字符
>>>
[]
['HELLO']
['WORLD']
['WORLD']
['HELLO', 'WORLD']
>>>

# 特殊字符的使用
s = "你好 WORLD2"
print "匹配字母数字：" + re.sub(r"\w", "hi", s)            hi替换掉字母和数字，共6个，替换6次
print "替换次数：" + str(re.subn(r"\w", "hi", s)[1])
print "匹配非字母数字的字符：" + re.sub(r"\W", "hi", s) 大写匹配非字母和数字
print "替换次数：" + str(re.subn(r"\W", "hi", s)[1])
print "匹配空白字符：" + re.sub(r"\s", "*", s)                    匹配空白字符
print "替换次数：" + str(re.subn(r"\s", "*", s)[1])
print "匹配非空白字符：" + re.sub(r"\S", "hi", s)             大写匹配非空白字符
print "替换次数：" + str(re.subn(r"\S", "hi", s) [1])
print "匹配数字：" + re.sub(r"\d", "2.0", s)
print "替换次数：" + str(re.subn(r"\d", "2.0", s)[1])
print "匹配非数字：" + re.sub(r"\D", "hi", s)
print "替换次数：" + str(re.subn(r"\D", "hi", s)[1])
print "匹配任意字符：" + re.sub(r".", "hi", s)
print "替换次数：" + str(re.subn(r".", "hi", s)[1])

>>>
匹配字母数字：你好 hihihihihihi
替换次数：6
匹配非字母数字的字符：hihihihihihihiWORLD2
替换次数：7
匹配空白字符：你好*WORLD2
替换次数：1
鍖归厤闈炵┖鐧藉瓧绗︼細hihihihihihi hihihihihihi
替换次数：12
匹配数字：你好 WORLD2.0
替换次数：1
鍖归厤闈炴暟瀛楋細hihihihihihihihihihihihi2
替换次数：12
匹配任意字符：hihihihihihihihihihihihihi
替换次数：13
>>>

# 限定符的使用
tel1 = "0791-1234567"
print re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel1)     用或方式匹配3位加8位和4位加7位的数字，大括号指定位数
tel2 = "010-12345678"
print re.findall(r"\d{3}-\d{8}|\d{4}-\d{7}", tel2)
tel3 = "(010)12345678"
print re.findall(r"[$]?\d{3}[$-]?\d{8}|[$]?\d{4}[$-]?\d{7}", tel3)    匹配括号的方法；
print re.findall(r"a.*?c", "abcabc")
>>>
['0791-1234567']
['010-12345678']
['(010)12345678']
['abc', 'abc']
>>>

# compile()预编译
s = "1abc23def45"
p = re.compile(r"\d+")    对数字类型进行匹配
print p.findall(s)    再在s中寻找
print p.pattern
>>>
['1', '23', '45']
\d+
>>>

# 分组
p = re.compile(r"(abc)\1")   分组abc重复一次
m = p.match("abcabcabc")
print m.group(0)
print m.group(1)
print m.group()    默认分组0

p = re.compile(r"(?P<one>abc)(?P=one)")
m = p.search("abcabcabc")
print m.group(0)
print m.group("one")
print m.groupdict().keys() 分组字典
print m.groupdict().values()

>>>
abcabc
abc
abcabc
abcabc
abc
['one']
['abc']
>>>

posted on 2022-07-05 18:13 我在全球村阅读(62) 评论(0) 收藏举报

刷新页面返回顶部