BeautifulSoup库

BeautifulSoup(markup,‘html.parser’)html

BeautifulSoup(markup,‘lxml’)html5

BeautifulSoup(markup,‘xml’)ui

BeautifulSoup(markup,‘html5lib’)spa

 

#j基本用法
from bs4 import BeautifulSoup
soup=BeautifulSoup
print(soup.prettify())
print(soup.title.string)

#标签选择器
#选择元素
soup.head 
soup.p

#获取名称
soup.title.name

#获取属性
soup..p.attrs['name']
soup.p['name']

#获取内容
soup.p.string

#嵌套选择
soup.head.title.string

#子节点和子孙节点
soup.p.contents

soup.p.children
for i,child in enumerate(soup.p.children):
     print(i,children)

#父节点和祖先节点
soup.a.parent
soup.a.parents

#兄弟节点
soup.a.next_siblings
soup.a.previous_siblings

#标准选择器code

find_all(name.attrs..)
soup.find_all('ui')[0]


siuo.find_all(attrs={'id':'list-1'})
soup.find_all(class_='element')

soup.find_all(text='Foo')

find:返回单个元素

find_parents 

#CSS选择器xml

 1 soup.select('.panel .pandel_a')
 2 soup.select(' ui li')
 3 soup.select('#list-2 .element)
 4 
 5 for ul in soup.select('ui')
 6      print(ul.select('li')
 7 
 8 #获取属性
 9 print(li.attrs['id'])
10 
11 获取内容
12 print(ul.get_text())
相关文章
相关标签/搜索