BeautifulSoup(markup,‘html.parser’)html
BeautifulSoup(markup,‘lxml’)html5
BeautifulSoup(markup,‘xml’)ui
BeautifulSoup(markup,‘html5lib’)spa
#j基本用法 from bs4 import BeautifulSoup soup=BeautifulSoup print(soup.prettify()) print(soup.title.string) #标签选择器 #选择元素 soup.head soup.p #获取名称 soup.title.name #获取属性 soup..p.attrs['name'] soup.p['name'] #获取内容 soup.p.string #嵌套选择 soup.head.title.string #子节点和子孙节点 soup.p.contents soup.p.children for i,child in enumerate(soup.p.children): print(i,children) #父节点和祖先节点 soup.a.parent soup.a.parents #兄弟节点 soup.a.next_siblings soup.a.previous_siblings
#标准选择器code
find_all(name.attrs..) soup.find_all('ui')[0] siuo.find_all(attrs={'id':'list-1'}) soup.find_all(class_='element') soup.find_all(text='Foo') find:返回单个元素 find_parents
#CSS选择器xml
1 soup.select('.panel .pandel_a') 2 soup.select(' ui li') 3 soup.select('#list-2 .element) 4 5 for ul in soup.select('ui') 6 print(ul.select('li') 7 8 #获取属性 9 print(li.attrs['id']) 10 11 获取内容 12 print(ul.get_text())