这是网页上的script 我要获取的是00914这个数字 直接使用正则表达式便可html
运行结果:python
源码:正则表达式
import re from bs4 import BeautifulSoup from urllib.request import urlopen url = "你要解析的网页URL" html = urlopen(url).read() soup = BeautifulSoup(html,"html.parser") titles = soup.select("body script") # CSS 选择器 i = 1 for title in titles: if i == 3: #print(title.get_text())# 标签体、标签属性 str=title.get_text() break if i == 2: i = 3 if i == 1: i = 2 print(str) str1 = "\"\"\""+"<script>"+str+"</script>"+"\"\"\"" soup = BeautifulSoup(str1, "html.parser") pattern = re.compile(r"var _url = '(.*?)';$", re.MULTILINE | re.DOTALL) script = soup.find("script", text=pattern) #print (pattern.search(script.text).string) s = pattern.search(script.text).string print (s.split('\'')[11])