1、正则表达式python
1.使用正则表达式的动机正则表达式
1. 文本处理已经成为计算机常见工做之一sql
2. 对文本内容的搜索,定位,提取是逻辑比较复杂的工做编程
import re
s = "Levi:1994,Sunny:1993"
pattern = r"(\w+):(\d+)"
# re模块调用
# l = re.findall(pattern, s)
# print(l)
# compile对象调用
regex = re.compile(pattern, flags=0)
l = regex.findall(s, 0, 55 )
print(l)
[('Levi', '1994'), ('Sunny', '1993')]
import re s = "hello world how are you" # pattern = r"[^\w]+" pattern = r"\W+" l = re.split(pattern, s) print(l)
import re s = "时间:2019/10/12" ns = re.sub(r'/', '-', s) print(ns)
时间:2019-10-12
import re s = "时间:2019/10/12" ns = re.subn(r'/', '-', s, 4) print(ns) ('时间:2019-10-12', 2)
import re s = '2019年,建国70周年' pattern = r"\d+" ite = re.finditer(pattern, s) # 方法1 print("ite类型", type(ite)) print(ite.__next__().group()) print(ite.__next__().group()) # 方法2 print("=========") for i in ite: print(i.group())
ite类型 <class 'callable_iterator'>
2019
70
import re m = re.fullmatch(r'\w+',"hello1973") print(m.group())
import re
#验证密码是否符合规范---只含字母和数字 m = re.fullmatch(r'[0-9A-Za-z]+', "hello1973") print(m.group()) hello1973
import re m = re.match(r'[A-Z]\w*',"Hello1973") print(m.group()) Hello1973
import re m = re.search(r'\S+', "好\n嗨 哟") print(m.group()) 好
import re pattern = r"(ab)cd(?P<pig>ef)" regex = re.compile(pattern) # 生成match对象 obj = regex.search("abcdefghi", pos=0, endpos=7) # 演示match对象属性变量 print(obj.pos) print(obj.endpos) print(obj.re) print(obj.string) print(obj.lastgroup) print(obj.lastindex) 0 7 re.compile('(ab)cd(?P<pig>ef)') abcdefghi pig 2
import re pattern = r"(ab)cd(?P<pig>ef)" regex = re.compile(pattern) # 生成match对象 obj = regex.search("abcdefghi", pos=0, endpos=7) # 演示match对象方法 print(obj.start()) print(obj.end()) print(obj.span()) print(obj.groupdict()) print(obj.groups()) print(obj.group())#获取整个match对象内容 print(obj.group(1))#获取第一子组内容 print(obj.group('pig'))#获取组名为pig的子组内容 0 6 (0, 6) {'pig': 'ef'} ('ab', 'ef') abcdef ab ef
import re s = """hello world 你好,北京 """ # 只能匹配ASCII码字符 regex = re.compile(r'\w+', flags=re.A) l = regex.findall(s) print(l)
['Hello', 'world']
import re s = """Hello world 你好,北京 """ # 匹配时忽略字母大小写 regex = re.compile(r'[A-Z]+', flags=re.I) l = regex.findall(s) print(l) ['Hello', 'world']
import re s = """Hello world 你好,北京 """ # 匹配时不能够匹配换行 regex = re.compile(r'.+') l = regex.findall(s) print(l) ['Hello world', '你好,北京']
import re s = """Hello world 你好,北京 """ # 匹配时能够匹配换行 regex = re.compile(r'.+',flags=re.S) l = regex.findall(s) print(l) ['Hello world\n你好,北京\n']
import re s = """Hello world 你好,北京 """ # 匹配每一行的开头或者结尾 regex = re.compile(r'world$',flags=re.M) l = regex.findall(s) print(l) ['world']
import re s = """Hello world 你好,北京 """ # 匹配每一行的开头或者结尾 pattern = r"""\w+ # 第一部分 \s+ # 第二部分 \w+ # 第三部分 """ regex = re.compile(pattern,flags=re.X) l = regex.findall(s) print(l) ['Hello world']
""" 匹配每段IP地址,要求: 根据输入的每段首单词,获取IP地址 """ import re import sys port = sys.argv[1] f = open('1.txt') # 找到port段落 while True: data = '' for line in f: if line != '\n': # 不是空行 data += line else: break if not data: # 文件结尾 print("Not Found the %s"%port) break # 匹配字符串首个单词 key_word = re.match(r'\S+',data).group() if port == key_word: # 匹配目标内容 # pattern = r"[0-9a-f]{4}\.[0-9a-f]{4}\.[0-9a-f]{4}" pattern=r"(\d{1,3}\.){3}\d{1,3}/\d+|Unknow" try: address = re.search(pattern,data).group() print(address) except: print("No address") break