本节内容为解析库的使用,内容涵盖:XPath、BeautifulSoup和PyQuery基础内容。 html
from bs4 import BeautifulSoup soup = BeautifulSoup('<p>Hello</p>', 'lxml') #对象初始化 print(soup.p.string) #调用方法解析
from bs4 import BeautifulSoup soup = BeautifulSoup(html, 'lxml') print(soup.title.name) print(soup.p.attrs) #获取属性 print(soup.p.attrs['name']) print(soup.p.string) #获取内容
#find_all() API find_all(name , attrs , recursive , text , **kwargs)python