Python 正则表达式 re.findall()用法

re.findall函数介绍

def findall(pattern, string, flags=0):
    """Return a list of all non-overlapping matches in the string.

    If one or more capturing groups are present in the pattern, return
    a list of groups; this will be a list of tuples if the pattern
    has more than one group.

    Empty matches are included in the result."""

    return _compile(pattern, flags).findall(string)

re.findall(pattern, string, flags=0)用来返回string中所有与pattern相匹配的全部字串，返回形式为数组。

举个例子，有一段英文，想要查询某一个字母出现的次数

1 import re
2 def count_word(text, word):
3     words = re.findall(r'%s' % word, text, flags=re.IGNORECASE)
4     return len(words)
5 text = "Python is a high-level,general-purpose programming language.Its design philosophy emphasizes code readability with the use of significant indentation . Python is dynamically typed and garbage-collected."
6 word = str(input("请输入要查询次数的字母："))
7 result = count_word(text, word)
8 print("单词'{}'出现的次数为：{}".format(word, result))

"d" "D" "w" "W"相关用法：

“d”：匹配0到9之间的数

# “d”：0到9之间的数
import re
str_s = "21Python is a h23igh-level,genera123l-purpose prog4ramming ！、、language."
ret = re.findall(r'\d', str_s)
print(ret)
# 输入结果如下
# ['2', '1', '2', '3', '1', '2', '3', '4']

如果想要输出连续的几个数字，需要添加在\d后添加\d，需要输出几个连续的数字就写几个\d。下面三个用法一样。

“D”：匹配除了了0-9以外的所有内容

# “D”：除了了0-9以外的所有内容
import re
str_s = "21Python is a ！？‘【】language."
ret = re.findall(r'\D', str_s)
print(ret)
# 输出结果如下
# ['P', 'y', 't', 'h', 'o', 'n', ' ', 'i', 's', ' ', 'a', ' ', '！', '？', '‘', '【', '】', 'l', 'a', 'n', 'g', 'u', 'a', 'g', 'e', '.']

“w”：匹配从小写a到z,大写A到Z，数字0到9中的内容

# “w”:匹配从小写a到z,大写A到Z，数字0到9中的内容
import re
str_s = "21Python is a ！？‘【】language."
ret = re.findall(r'\w', str_s)
print(ret)
# 输出结果如下
# ['2', '1', 'P', 'y', 't', 'h', 'o', 'n', 'i', 's', 'a', 'l', 'a', 'n', 'g', 'u', 'a', 'g', 'e']

“W”：匹配除了字母与数字以外的特殊符号

# “W”在正则里面代表匹配除了字母与数字以外的特殊符号
import re
str_s = "21Python is a ！？‘【】language."
ret = re.findall(r'\W', str_s)
print(ret)
# 输出结果如下
# [' ', ' ', ' ', '！', '？', '‘', '【', '】', '.']

[ ]：匹配括号中的任何一个符合条件的字符

# [ ] 匹配括号中的任何一个符合条件的字符
import re
str_s = "Python is a h23igh-level,genera123l-purpose prog4ramming ！、、language."
ret = re.findall(r'[pn]', str_s)
print(ret)
# 输出结果如下
# ['n', 'n', 'p', 'p', 'p', 'n', 'n']

^：取非

# ^ 取非
import re
str_s = "Python is a language."
ret = re.findall(r"[^Python]", str_s)
print(ret)
# 输出结果如下
# [' ', 'i', 's', ' ', 'a', ' ', 'l', 'a', 'g', 'u', 'a', 'g', 'e', '.']

flags参数：

1 # re.I  (re.IGNORECASE)  忽略字母大小写
2 import re
3 s = "Python is a high-level,general-purpose programming language."
4 ret = re.findall('p', s, flags=re.I)
5 print(ret)
6 # 输出结果如下
7 # ['P', 'p', 'p', 'p']