html = """
<html><head><title>beautifulsoup</title></head>
<body>
<p class="title" name="dromouse"><b>the beautifusoup</b></p>
<p><a href="www.jianshu.com">简书</a></p>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
选择元素
print(soup.title)
print(type(soup.title))
print(soup.head)
print(soup.p)
运行结果为<title>beautifulsoup</title> <class 'bs4.element.Tag'> <head><title>beautifulsoup</title></head> <p class="title" name="dromouse"><b>the beautifusoup</b></p>
获取名称
print(soup.tittle.name)
运行结果为 title
获取内容
print(soup.p.string)
运行结果为the beautifulsoup
嵌套选择
print(soup.head.title.string)
运行结果为beautifulsoup
子节点和子孙节点
print(soup.p.a.contents)
print(soup.body.children)
for i,child in enumerate(soup.body.children):
print(i,child)
print(soup.body.descendants)
for i,child in enumerate(soup.body.descendants):
print(i,child)
运行结果为
[<b>the beautifusoup</b>]
<list_iterator object at 0x04E1A8D0>
0
1 <p class="title" name="dromouse"><b>the beautifusoup</b></p>
2
3 <p><a href="www.jianshu.com">简书</a></p>
4
<generator object descendants at 0x060725D0>
0
1 <p class="title" name="dromouse"><b>the beautifusoup</b></p>
2 <b>the beautifusoup</b>
3 the beautifusoup
4
5 <p><a href="www.jianshu.com">简书</a></p>
6 <a href="www.jianshu.com">简书</a>
7 简书
8