一,先看结果
二,思路
三,上源码
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
import re
import pymysql
class Meituan(object):
def __init__(self):
pass
# self.conn =pymysql.Connect(host='x',user='x',password='x',port=x,database='x',charset='x')
def get_meituan(self):
# cursor=conn.cursor()
# dataname =input('请输入数据库名:')
# sql= "CREATE TABLE IF NOT EXISTS %s(ID INT(10) NOT NULL PRIMARY KEY AUTO_INCREMENT,TIME_A TIMESTAMP DEFAULT CURRENT_TIMESTAMP," \
# "A VARCHAR(255),B VARCHAR(255),C VARCHAR(255),D VARCHAR(255),E VARCHAR(255))ENGINE=INNODB DEFAULT CHARSET=utf8"
# cursor.execute(sql%dataname)
# print('创建成功!!')
url = 'http://www.meituan.com/'
#/s/可以修改城市
browser = webdriver.Firefox()
browser.set_window_size(900,900)
timeout = WebDriverWait(browser,10)
browser.get(url)
time.sleep(3)
try:
##############试试点击页面信息
browser.find_element_by_xpath("//span/span/a[@class='link nav-text']").click()
time.sleep(4)
print('1')
except:
input_a = browser.find_element_by_xpath("//input[@type='text']")
input_a.send_keys('北京')
try:
############点击失效#########
##搜索框又变,换
click_a = browser.find_element_by_xpath("//div/button[@class='header-search-btn']")
click_a.click()
print('2')
except:
#第三种方式点击
click_a = browser.find_element_by_xpath("//input[@value='搜索']")
click_a.click()
print('3')
footer = 1
try:
while True:
down = 0
for a in range(0,10000,1000):
browser.execute_script('window,scrollBy(0,{})'.format(a))
time.sleep(3)
down +=1
if down==5:
break
html = browser.page_source
print('html')
soup =BeautifulSoup(html,'lxml')
items = soup.find_all('div',class_='default-list-item clearfix')
#print(items)
print('items')
for item in items:
item_name = item.find('a',class_='link item-title').get_text()
item_fen = item.find('div',class_='item-eval-info clearfix').get_text()
item_location = item.find('div',class_='address-info clearfix').get_text()
try:
item_price =item.find('div',class_='deal-info').get_text()
except:
print('item_price error!!')
print('{},{},{},{}'.format(item_name,item_fen,item_location,item_price))
######数据库
# sql = "INSERT INTO %s(A,B,C,D)VALUES('%s','%s','%s','%s')"
# value = (dataname,item_name,item_fen,item_location,item_price)
# cursor.execute(sql%value)
# conn.commit()
# print('提交成功!!')
# time.sleep(5)
footer +=1
browser.find_element_by_xpath("//li[@class='pagination-item next-btn active']/a").click()
print('现在是第%s页!'% footer)
except:
print('error')
l = Meituan()
l.get_meituan()