python—正则表达式实例
程序员文章站
2022-05-09 18:03:53
如果要学会Python爬虫,那么正则表达式是不可或缺的技能。在下收集了一些关于正则表达式的代码,多多练习,多多学习#match函数应用import reprint(re.match("done|quit",'d!one!done'))print(re.match("\dcom","www.4comrunoob.5com"))#search函数应用import reprint(re.search("done|quit",'d!one!done'))print(re.search("\dcom"...
如果要学会Python爬虫,那么正则表达式是不可或缺的技能。在下收集了一些关于正则表达式的代码,多多练习,多多学习
#match函数应用
import re
print(re.match("done|quit",'d!one!done'))
print(re.match("\dcom","www.4comrunoob.5com"))
#search函数应用
import re
print(re.search("done|quit",'d!one!done'))
print(re.search("\dcom","www.4comrunoob.5com"))
import re
result=re.search(r"(\w)(?!.*\1)","abc@cslg.edu.cn")
print(result)
#match对象使用,一般情况的子模式,例1
import re
m=re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
print(m.group(0))
print(m.group(1))
print(m.group(2))
print(m.group(1,2))
m=re.match(r"(\w+) \1", "Isaac Isaac, physicist")
print(m.group(0))
#match对象使用,一般情况的子模式,例2
import re
m=re.match(r"(\d+)\.(\d+)","24.556")
print(m.groups())
print(m.group(0),m.group(1),m.group(2))
#match对象使用,命名子模式,例1
import re
m=re.match(r"(?P<first_name>\w+)(?P<last_name>\w+)", "Isaac Newton, physicist")
print(m.groupdict())
print(m.group())
print(m.group("first_name"))
print(m.group("last_name"))
print(m.groups())
print(m.group(0),m.group(1),m.group(2))
#match对象使用,命名子模式,例2
m = re.match(r"(?P<first_name>\w+) (?P<last_name>\w+)", "Malcolm Reynolds")
print(m.groupdict())
#findall函数,直接用re模块
import re
s="aabc abcd abbcd aacd"
print(re.findall("aa",s))
s = 'aabc abcd abbcd abccd abcdd'
print(re.findall(r"(\b\w*(?P<f>\w+)(?P=f)\w*\b)",s))
import re
result=re.findall(r"(\w)(?!.*\1)","abc@cslg.edu.cn")
print(result)
#findall函数,正则表达式对象
import re
tt="Tina is a good girl, she is cool, clever, and so on..."
pattern=re.compile(r"\w*oo\w*")
print(pattern.findall(tt))
s = 'aabc abcd abbcd abccd abcdd'
print(re.findall(r"(\b\w*(?P<f>\w+)(?P=f)\w*\b)",s))
#finditer函数,直接用re模块
import re
s="aabc abcd abbcd aacd"
print(re.finditer("aa",s))
iter_re=re.finditer("aa",s)
for item in iter_re:
print(item.group(0),item.group(),item.groups(),item.groupdict())
s = 'aabc abcd abbcd abccd abcdd abab'
print(re.findall(r"(\b\w*(?P<f>\w)(?P=f)\w*\b)",s))
iter_re=re.finditer(r"(\b\w*(?P<f>\w)(?P=f)\w*\b)",s)
for item in iter_re:
print(item.group(),item.group(0),item.groups(),item.groupdict())
#re模块处理字符串,split函数
import re
text="alpha. beta....gamma delta"
result=re.split('[\.]+',text)
print(result)
result=re.split('[\.]+',text,maxsplit=2)
print(result)
result=re.split('[\.]+',text,maxsplit=1)
print(result)
result=re.split("\.+|\s","hello world...d.t",2)
print(result)
result=re.split("\.+|\s","hello world...d.t",3)
print(result)
import re
result=re.match(r"\d{1,3}-\d{1,3}-\d{1,3}-\d{1,3}","111-234-455-233")
print(result)
if result!=None:
print(result.group(0))
#字符串的替换,sub
import re
a=re.sub(r'\w+','10',"ji 43 af,geq",2,flags=re.I)
exp=re.compile(r'\w+',re.I)
b=exp.sub('10',"ji 43 af,geq")
print(a)
print(b)
#re模块处理字符串,sub
import re
pat='{name}'
text="Dear {name}..."
result=re.sub(pat,'Mr.Dong',text)
print(type(result),result)
s="a s d"
result=re.sub('a|s|d',"good",s)
print(result)
result=re.sub("\.+|\s","#", "hello world...d.t",3)
print(result)
#re模块处理字符串,sub
s="It's a very good good idea"
result=re.sub(r"(\b\w+) \1", r"\1",s)
print(result)
result=re.sub(r"((\w+) )\1", r"\2",s)
print(result)
print(re.findall('a','aaa abc abcd'))
result=re.sub('a',lambda x:x.group(0).upper(),'aaa abc abcd')
print(result)
result=re.sub('[a-z]',lambda x:x.group(0).upper(),'aaa abc abcd')
print(result)
result=re.sub('[a-zA-Z]',lambda x:chr(ord(x.group(0))^32),'aaa abc Abcd')
print(result)
result=re.subn('a','dfg','aaa abc adde')
print(result)
result=re.sub('a','dfg','aaa abc adde')
print(result)
result=re.escape('http://www.python.org')
print(result)
#例:去除多余的空格
import re
s="aaa bb c d e fff "
print(" ".join(s.split()))
import re
print(" ".join(re.split('\s+',s.strip())))
print(" ".join(re.split('[\s]+',s.strip())))
print(re.sub('\s+'," ",s.strip()))
#删除字符串中的指定内容
import re
email="tony@tiremove_thisger.net"
m=re.search("remove_this",email)
print(email[:m.start()]+email[m.end():])
print(re.sub("remove_this","",email))
print(email.replace("remove_this",""))
#贪心和非贪心搜索
import re
email="Beautiful is better than ugly."
lst=re.findall("\\bb.+?\\b",email)
print(lst)
lst=re.findall("\\bb.+\\b",email)
print(lst)
lst=re.findall("\\bb\w*\\b",email)
print(lst)
lst=re.findall("\\Bh.+?\\b",email)
print(lst)
lst=re.findall("\\b\w.+?\\b",email)
print(lst)
lst=re.findall("\w+",email)
print(lst)
lst=re.findall(r"\b\w.+?\b",email)
print(lst)
result=re.split('\s',email)
print(result)
result=re.findall("\d+\.\d+\.\d+","python 2.7.13")
print(result)
result=re.findall("\d+\.\d+\.\d+","python 2.7.13, python 3.6.0")
print(result)
#网页匹配
import re
s="<html><head>This is head</head><body>This is a body</body></html>"
pattern=r'<html><head>(.+)</head><body>(.+)</body></html>'
result=re.search(pattern,s)
print(result.groups())
print(result.group(0),result.group(1),result.group(2))
#使用正则表达式提取电话号码
import re
text = "Suppose my Phone No. is 0535-1234567, yours is 010-12345678, his is 025-87654321."
result=re.findall(r"(\d{3,4})-(\d{7,8})",text)
print(result)
for item in result:
print(item[0],item[1],sep="-")
#使用正则表达式查找文本中最长的数字字符
import re
def logest1(s):
t=re.findall("\d+",s)
if t:
return max(t,key=len)
return "no"
def logest2(s):
t=re.split("[^\d]+",s)
if t:
return max(t,key=len)
return "no"
print(logest1("hehe3455cat343535355"))
print(logest2("hehe3455cat343535355"))
import re
def reverse_new(s):
t=re.split("\s+",s.strip())
t.reverse()
return " ".join(t)
print(reverse_new("I like beijing."))
print(reverse_new('Simple is better than complex.'))
import re
a=re.sub(r'\w+','10',"ji 43 af,geq",2,flags=re.I)
exp=re.compile(r'\w+',re.I)
b=exp.sub('10',"ji 43 af,geq")
print(a)
print(b)
import re
print(re.match('com','Conwww.runcomoob'))
print(re.match('com','Comwww.runcomoob',re.I))
print(re.match(r'\w+com\w*','Comwww.runcomoob',re.I))
print(re.search(r'\w+com\w*','Comwww.runcomoob',re.I))
print(re.findall('com','Comwww.runcomoob',re.I))
本文地址:https://blog.csdn.net/beginner_liupey/article/details/109959940
上一篇: 轻松管理你的IP