获取第一个标签的内容
from bs4 import BeautifulSoup
file = open('baidu.html', 'rb')
html = file.read()
bs = BeautifulSoup(html, 'html.parser')
print(bs.title)
print(bs.a)
print(bs.title.string)
print(bs.a.string) #输出的内容不包含注释符号
查找所有的a
tlist = bs.find_all("a")
print(tlist)
引入正则
tlist = bs.find_all(re.compile("a"))
print(tlist)
传入函数
def name_is_exists(tag):
return tag.has_attr('name')
tlist = bs.find_all(name_is_exists)
print(tlist)
参数
tlist = bs.find_all(id='head')
print(tlist)
使用正则查找特定文本
tlist = bs.find_all(text=re.compile('\d'))
print(tlist)
limit 参数
tlist = bs.find_all(text=re.compile('\d'),limit=3)
print(tlist)
css选择器 标签 id 属性查找
tlist = bs.select('title')
print(tlist)
tlist = bs.select('.s-manhattan-index')
print(tlist)