python-正则表达式
程序员文章站
2022-04-09 14:12:49
#!/usr/local/bin/python3 # -*- coding:utf-8 -*- import re # ----------正则表达式符号---------- ''' '.' 默认匹配除\n之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行 '.+' 匹配任意... ......
#!/usr/local/bin/python3 # -*- coding:utf-8 -*- import re # ----------正则表达式符号---------- ''' '.' 默认匹配除\n之外的任意一个字符,若指定flag DOTALL,则匹配任意字符,包括换行 '.+' 匹配任意字符 '^' 匹配字符开头,若指定flags MULTILINE,这种也可以匹配上(r"^a","\nabc\neee",flags=re.MULTILINE) 注意:在 match中无卵用 '$' 匹配字符结尾,或e.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()也可以 注:'a$'指待查的字符串的结尾为'a' '*' 匹配*号前的字符0次或多次,re.findall("ab*","cabb3abcbbac") 结果为['abb', 'ab', 'a'] '+' 匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb'] '?' 匹配前一个字符1次或0次 '{m}' 匹配前一个字符m次 '{n,m}' 匹配前一个字符n到m次,re.findall("ab{1,3}","abb abc abbcbbb") 结果'abb', 'ab', 'abb'] '|' 匹配|左或|右的字符,re.search("abc|ABC","ABCBabcCD").group() 结果'ABC' '(...)' 分组匹配,re.search("(abc){2}a(123|456)c", "abcabca456c").group() 结果 abcabca456c '\A' 只从字符开头匹配,re.search("\Aabc","alexabc") 是匹配不到的 '\Z' 匹配字符结尾,同$ '\d' 匹配数字0-9 '\D' 匹配非数字 '\w' 匹配[A-Za-z0-9] '\W' 匹配非[A-Za-z0-9] 's' 匹配空白字符、\t、\n、\r , re.search("\s+","ab\tc1\n3").group() 结果 '\t' '(?P<name>...)' 分组匹配 re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})",\ "371481199306143242").groupdict("city") 结果{'province': '3714', 'city': '81', 'birthday': '1993'} ------------------------------------------------ 最常用的匹配语法: re.match 从头开始匹配 re.search 匹配包含 re.findall 把所有匹配到的字符放到以列表中的元素返回 re.splitall 以匹配到的字符当做列表分隔符 re.sub 匹配字符并替换 几个匹配模式: 1:re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同) 案例:print(re.search("[a-z]+", "abcdA", flags = re.I).group()) 结果:abcdA 2:M(MULTILINE): 多行模式,改变'^'和'$'的行为(参见上图) 3:S(DOTALL): 点任意匹配模式,改变'.'的行为 案例:print(re.search(r".+", "\nabc\neee", flags=re.S).group()) ''' # ----------匹配语法---------- # match:从头开始匹配 res = re.match(".+", "Zhangyu520Mahongyan") print(res.group()) # 结果:Zhangyu520Mahongyan # search:匹配包含,只返回一次 # M.+n:截取以'M'开头,'n'结尾的字符串(此处的'n'是指待查字符串中的最后一个'n',中间有多少个'n'都不算) res1 = re.search("M.+n", "Zhangyu521MahongyanMahongyan250n") print(res1.group()) # 结果:MahongyanMahongyan250n # M[a-z]+n:截取以'M'开头,第一个'n'结尾的字符串(此处以'[a-z]'代替了'.') res2 = re.search("M[a-z]+n", "Zhangyu521MahongyanMahongyan250") print(res2.group()) # 结果:Mahongyan res3 = re.search("#[a-zA-Z]+#", "Zhangyu#Mahongyan#Zhangyu") print(res3.group()) # 结果:#Mahongyan# # 以res3.group()返回的字符串再次进行计算 res4 = re.search("M[a-z]+g", res3.group()) print(res4.group()) # 结果:Mahong res5 = re.search("aaa?", "aalex") print(res5.group()) # 结果:aa res6 = re.search("[0-9]{3}", "aa1x2a345aa") print(res6.group()) # 结果:345 # findall:把所有匹配到的字符放到以列表中的元素返回,没有group()方法 # {1,3}表示 1-3 个数 res7 = re.findall("[0-9]{1,3}", "aa1x2a3456aa") print(res7) # 结果:['1', '2', '345', '6'] res8 = re.search("abc|ABC","ABCBabcCD") print(res8.group()) # 结果:ABC res9 = re.findall("abc|ABC","ABCBabcCD") print(res9) # 结果:['ABC', 'abc'] res10 = re.search("abc{2}","alexabccc") print(res10.group()) # 结果:abcc res10 = re.search("(abc){2}","alexabcabc") print(res10.group()) # 结果:abcabc res11 = re.search("(abc){2}\|","alexabcabc|") print(res11.group()) # 结果:abcabc| res12 = re.search("(abc){2}(\|\|\=){2}","alexabcabc||=||=") print(res12.group()) # 结果:abcabc||=||= res13 = re.search("\D+","123$-a") print(res13.group()) # 结果:$-a res14 = re.search("\w+","1ddDFR23$- \r\na") print(res14.group()) # 结果:1ddDFR23 res15 = re.search("\W+","1ddDFR23$- \r\na") print(res15.group()) # 结果:$- res16 = re.search("\s+","1ddDFR23$- \r\na") print(res16) # 结果: \r\n # split:以匹配到的字符当做列表分隔符 res16 = re.split("[0-9]+", "abc12de3f45GH") print(res16) # 结果:['abc', 'de', 'f', 'GH'] # 匹配字符并替换 res17 = re.sub("[0-9]+", "|", "abc12de3f45GH") print(res17) # 结果:abc|de|f|GH # 只替换2次 res18 = re.sub("[0-9]+", "|", "abc12de3f45GH", count=2) print(res18) # -----------案例---------- # 分组匹配 a = re.search("(?P<id>[0-9]+)(?P<name>[a-zA-Z]+)", "abcd1234daf@34").groupdict() print(a["id"]) print(a["name"]) b = re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242")\ .groupdict("city") print(b) if __name__ == '__main__': pass