from bs4 import BeautifulSoup
html = """
<table class="tablelist" cellpadding="0" cellspacing="0">
<tbody><tr class="h">
<td class="l" width="374">职位名称</td>
<td>职位类别</td>
<td>人数</td>
<td>地点</td>
<td>发布时间</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=47342&keywords=python&tid=0&lid=0">TEG05-高级安全策略工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=47331&keywords=python&tid=0&lid=0">18428-财付通平台组件测试工程师</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=47318&keywords=python&tid=0&lid=0">CSIG07-基础安全威胁情报分析师</a></td>
<td>技术类</td>
<td>4</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=47319&keywords=python&tid=0&lid=0">CSIG07-业务威胁情报分析师</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=47320&keywords=python&tid=0&lid=0">CSIG07-业务威胁情报分析师</a></td>
<td>技术类</td>
<td>2</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=47317&keywords=python&tid=0&lid=0">25925-数据挖掘工程师</a></td>
<td>技术类</td>
<td>2</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=47311&keywords=python&tid=0&lid=0">PCG04-测试开发高级工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=47297&keywords=python&tid=0&lid=0">28603-116 微信支付效能开发工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=47299&keywords=python&tid=0&lid=0">28601-微信支付行业缴费开发工程师(深圳)</a></td>
<td>技术类</td>
<td>1</td>
<td>深圳</td>
<td>2019-01-27</td>
</tr>
<tr class="odd">
<td class="l square"><a target="_blank" href="position_detail.php?id=47300&keywords=python&tid=0&lid=0">19157-车联物联安全—固件/硬件安全研究员(上海)</a></td>
<td>技术类</td>
<td>1</td>
<td>上海</td>
<td>2019-01-27</td>
</tr>
<tr class="f">
<td colspan="5">
<div class="left">共<span class="lightblue total">550</span>个职位</div>
<div class="right"><div class="pagenav"><a href="javascript:;" class="noactive" id="prev">上一页</a><a class="active" href="javascript:;">1</a><a href="position.php?keywords=python&start=10#a">2</a><a href="position.php?keywords=python&start=20#a">3</a><a href="position.php?keywords=python&start=30#a">4</a><a href="position.php?keywords=python&start=40#a">5</a><a href="position.php?keywords=python&start=50#a">6</a><a href="position.php?keywords=python&start=60#a">7</a><a href="position.php?keywords=python&start=70#a">...</a><a href="position.php?keywords=python&start=540#a">55</a><a href="position.php?keywords=python&start=10#a" id="next">下一页</a><div class="clr"></div></div></div>
<div class="clr"></div>
</td>
</tr>
</tbody></table>
"""
soup = BeautifulSoup(html,'lxml')
# 获取所以的tr标签
"""
trs = soup.find_all('tr')[:-1]
for tr in trs:
print(tr)
"""
# 获取第三个tr标签
"""
tr = soup.find_all('tr',limit=3)[2] # limit意思是提取n个tr标签
print(tr)
"""
# 获取所以class等于even的tr标签
"""
trs = soup.find_all('tr',class_= 'even') # class是关键字所以要加_
for tr in trs:
print(tr)
"""
# 获取所有td等于test,class也等于test的标签a提取出来
"""
aLists = soup.find_all('td',id='test',calss_='test')
for aList in aLists:
print(aList)
"""
# 获取所有a标签的href属性
"""
aLists = soup.find_all('a',limit=10)[1:]
for a in aLists:
# 1.通过下标的操作方式
# href = a['href']
# print(href)
# 2.通过attrs属性的方式
href = a.attrs['href']
"""
# 获取所有的职位信息
trs = soup.find_all('tr')[1:-1]
movies = []
movie = {}
for tr in trs:
# tds = tr.find_all('td')
"""
for td in tds:
print(td.string)
"""
"""
title = tds[0].string # 获取某个标签下的非标签字符串,返回是个字符
category = tds[1].string
nums = tds[2].string
city = tds[3].string
time = tds[4].string
movie['标题'] = title
movie['类型'] = category
movie['人数'] = nums
movie['时间'] = time
movies.append(movie)
"""
# infos = list(tr.strings) # 获取某个标签下的子孙非标签字符串,返回来是一个生成器,需要list,但是会产生空字符串
infos = list(tr.stripped_strings)
movie['标题'] = infos[0]
movie['类型'] = infos[1]
movie['人数'] = infos[2]
movie['时间'] = infos[3]
movies.append(movie)
print(movies)
bs4简单使用
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- [Day 668 2016-07-25] Lesson 32-1 Galileo reborn 課文:In his...