开始有一些思路写代码的思路了,还要继续写下去。
最终成果
我的代码
from bs4 import BeautifulSoup
import requests
import time
url = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(-1, 14)]
headers = {
'User-Agent':'User-Agent:Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36Query String Parametersview sourceview URL encoded'
}
def get_detail_info(url,nu):
wb_data = requests.get(url,headers=headers)
soup = BeautifulSoup(wb_data.text, 'lxml')
titles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addres = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
damos = soup.select('div.day_l > span')
frimgs = soup.select('div.pho_show_l > div.pho_show_big > div > img ')
homnimgs = soup.select('div.js_box.clearfix > div.member_pic > a > img')
homnsexs = soup.select('div.js_box.clearfix > div.member_pic > div')
homnames = soup.select('div.js_box.clearfix > div.w_240 > h6 > a')
sexs=[];sextr = str(homnsexs)
for a in range(0,len(homnsexs)):
if sextr.find('1') > 0:
sexs.append('女')
elif sextr.find('_') > 0:
sexs.append('男')
else:
sexs.append('null')
a += 1
time.sleep(3)
for title,addre,damo,frimg,homnimg,sex,homname in zip(titles,addres,damos,frimgs,homnimgs,sexs,homnames):
data = {
'number' : nu,
'title' : title.get_text(),
'addre' : (addre.get_text()).rstrip(),
'damo' : damo.get_text(),
'frimg' : frimg.get('src'),
'homnimg': homnimg.get('src'),
'homnsex': sex,
'homname': homname.get_text(),
}
print(data)
nu=0
for num in range(1,14):
wb_data = requests.get(url[num],headers=headers)
soup = BeautifulSoup(wb_data.text,'lxml')
addinfos = soup.select(' ul > li > a.resule_img_a')
time.sleep(3)
for addinfo in addinfos:
url1 = [
addinfo.get('href')
]
nu += 1
get_detail_info(''.join(url1).replace('[', '').replace(']', '').replace(',', '').replace(' ', ''),nu)
总结
- 自己在类这一块和函数的基础还是很薄弱,都没有想到怎么用魔法方法、属性和迭代器
- 异常的处理还没有完全掌握使用的方法
- 还需要更加熟悉各种类型方法的使用