re+正则01

Posted on 2019-04-17 15:43  Max404  阅读(275)  评论(0编辑  收藏  举报
# 正则
# 正则

'''

正则就是带语法的字符串,
用来匹配目标字符串得到想要得字符串结果

'''

# 部分语法:

    # 1.单独字符

    # \d 范围 [0-9]
    # \D 范围刨除 [^0-9]
    # \w 字母+数字+_
    # ...

    # 2.多个字符

    # zo* == zo{0,}
    # zo+ == zo{1,}
    # zo? == zo{0,1}

# ----------------------------------------------------------------------------------------------

    # 3.多行

    # re.M
    # ^以什么开头
    # $以什么结尾

    # re.S
    # 换行符也可以进行匹配\n

    # re.I
    # 不区分大小写

import re

# --(re模块,正则)--------------------------------------------------------------------------------------------------
# --(import re)--------------------------------------------------------------------------------------------------

# 有语法的字符串,用来匹配获取目标字符串中指定需求的字符串
# \一个杠用来转义,\\俩个杠代表一个\

    # Ex

        # (find all,查找全部)

        # x = 'asdas64fd65q4f65a4s6d//asdqwr'
        # res = re.findall(r'//',x)
        # print(res)

    # | 代表或,or
    # r''原义内容
    # \+转义内容

# --(大写都是小写的反取)--------------------------------------------------------------------------------------

        # r'[A-Za-z0-9哈]' 区间方法[A-Z]|[a-z]|[0-9]|'哈'
        # r'a' --- 字符串
        # r'ad' --- 字符串
        # r'\D' --- 非数字
        # r'\d' --- 数字
        # r'\w' --- 数字+字母+下划线
        # r'\W' --- 刨除(数字+字母+下划线)
        # r'\s' --- 所有空白 制表符 换行符
        # r'\S' --- 刨除(所有空白 制表符 换行符)
        # r'.' --- 所有单个字符,刨除 \n 换行符


# --(Ex)-----------------------------------------------------------------------------------------------

        # print(re.findall(r'需求',r'查找对象'))
        # print(re.findall(r'需求',查找对象变量名))

        # print(re.findall(r'[条件范围]{个数}',r'查找对象'))
        # print(re.findall(r'[a-z]{2}',r'asdlasjdnsb'))
        # ['as', 'dl', 'as', 'jd', 'ns']


# --(贪婪匹配 条件{n,n})-----------------------------------------------------------------------------------------------
# --( * + ? )-----------------------------------------------------------------------------------------------
# print(re.findall(r'条件{个数起数,个数止数}',r'查找对象'))


        # print(re.findall(r'o{2,}',r'ooaoobooodooqooooooo'))
        # ['oo', 'oo', 'ooo', 'oo', 'ooooooo']


# --(多行匹配 条件 re.M)--
# ---------------------------------------------------------------------------------------------
# 格式: print(re.findall(r'^开头符','fasdasd\nefasd\nffwe\nofo',re.M多行))

        # ^以后面条件 为开头
        # ^.+ 匹配剩余

        # re.M 多行(无视\n换行符) 结合^$
        # re.S 将\n也能被.匹配
        # re.I 不区分大小写

        # Ex
            # print(re.findall(r'^f.+','faa\nefbb\nffcc\nofo',re.M))
            # ['faa', 'ffcc']



# --(分组匹配)-----------------------------------------------------------------------------------------------

# ?:取消分组(取消当前分组)

    # 1.从左往右数,进行编号

    # baidu = 'http://www.baidu.com/'
    # regexp = re.compile('((http://)(.+))') # 生成正则对象 comile编译
    # print(regexp) # re.compile('((http://)(.+)/)')
    # print(regexp.search(baidu)) # re.compile('((http://)(.+)/)')
    # target = regexp.match(baidu) # match 进行配对处理
    # print(target) # <_sre.SRE_Match object; span=(0, 21), match='http://www.baidu.com/'>
    # print(target.group(0),id(target.group(0))) # http://www.baidu.com/ 自身
    # print(target.group(1),id(target.group(1))) # http://www.baidu.com/
    # print(target.group(2)) # http://
    # print(target.group(3)) # www.baidu.com

# --(替换)-------------------------------------------------------------------

    # 不参与匹配的条件用( ?:)
    # 参与匹配的都会被替换为指定字符串( r'\3\2\1' 内部按照分组重新输出 | r'\'
    # 在指定字符串值\num拿到具体分组
    # 其他字符串都是原样字符串

    # print(re.sub('([a-z]+)(\d+)(.+)',r'\3\2\1','abc123你好')) # 你好123abc
    # print(re.sub('([a-z]+)(\d+)(.+)',r'\1','abc123你好')) # abc
    # print(re.sub('([a-z]+)(\d+)(.+)',r'\2','abc123你好')) # 123
    # print(re.sub('([a-z]+)(\d+)(.+)',r'\3','abc123你好')) # 你好
    #
    # baidu = 'http://_.www.baidu.com'
    # SOHO = 'http://www.soho.com'
    # GOOLE = 'goole'


# 拿到外部变量名:

# 在r原义模式下字符串拼接 'sth'+变量名+'sth'
# print(re.sub('(http://)(www.)(\w+)(.com)',r'\1\2'+GOOLE+'\\4',baidu))
    # http://www.goole.com

# 在r原义模式下 %s 占位符替换
# print(re.sub('(http://)(www\.)(\w+)(\.com)',r'\1\2%s\4'%GOOLE,baidu))
    # http://www.goole.com

# 在转义模式下,使用占位符 %s
# print(re.sub('(http://)(www\.)(\w+)(\.com)','\\1\\2%s\\4\\n%s'%(GOOLE,baidu),baidu))
    # http://www.goole.com
    # http://www.baidu.com

# 非贪婪匹配的引用场景:结合特定的开头与结尾
# ------------------------------------------------------------------------------------------------------------------------

    # Ex
    #     print(re.findall(r'f+','faa\nefbb\nffcc\nofo'))
    #     # (+ 相同字符尽可能多)f+ ['faa', 'fbb', 'ffcc', 'fo']
    #     print(re.findall(r'f.+','faa\nefbb\nffcc\nofo'))
    #     # (.+ f    .表示一个字符,(.+ = ..........)
    #     # 后面有多少加多少)['faa', 'fbb', 'ffcc', 'fo']
    #     res = re.findall('.+','<a>abc</a>')
    #     # print(res) #['<a>abc</a>']
    #     res = re.findall('.*','<a>abc</a>')
    #     # print(res) # ['<a>abc</a>', '']
    #     res = re.findall('.*?','<a>abc</a>')
    #     # print(res) # ['', '', '', '', '', '', '', '', '', '', '']
    #     res = re.findall('<.*>','<a>abc</a>')
    #     # print(res) # ['<a>abc</a>']
    #     res = re.findall('.*?>','<a>abc</a>')
    #     # print(res) # ['<a>', 'abc</a>']
    #     res = re.findall('\w*?>','<a>abc</a>')
    #     # print(res) # ['a>', 'a>']
    #     res = re.findall('(\w*?)>','<a>abc</a>')
    #     # print(res) # ['a', 'a']

# ---------------------------------------------------------------------------------------------------------

# 操作分组的方法
# ?P<name> : 有名分组
#
#     res = re.match('(?P<left>\d{3})(?P<center>\d{3})(\d{3})','123456789')
#     print(res.group(1)) # 123
#     print(res.group('left')) # 123
#     print(res.group(2)) # 456
#     print(res.group('center')) # 456
#     print(res.group(3)) # 789
#     print(res.group(0)) # 123456789