介绍
一些网站会在正常的账号密码认证之外加一些验证码,以此来明确地区分人/机行为,从一定程度上达到反爬的效果,对于简单的校验码Tesserocr就可以搞定,如下
但一些网站加入了滑动验证码,最典型的要属于极验滑动认证了,极验官网:http://www.geetest.com/
,下图是极验的登录界面
现在极验验证码已经更新到了 3.0 版本,截至 2017 年 7 月全球已有十六万家企业正在使用极验,每天服务响应超过四亿次,广泛应用于直播视频、金融服务、电子商务、游戏娱乐、政府企业等各大类型网站
对于这类验证,如果我们直接模拟表单请求,繁琐的认证参数与认证流程会让你蛋碎一地,我们可以用selenium驱动浏览器来解决这个问题,大致分为以下几个步骤
#步骤一:点击按钮,弹出没有缺口的图片
#步骤二:获取步骤一的图片
#步骤三:点击滑动按钮,弹出带缺口的图片
#步骤四:获取带缺口的图片
#步骤五:对比两张图片的所有RBG像素点,得到不一样像素点的x值,即要移动的距离
#步骤六:模拟人的行为习惯(先匀加速拖动后匀减速拖动),把需要拖动的总距离分成一段一段小的轨迹
#步骤七:按照轨迹拖动,完全验证
#步骤八:完成登录
实现
#安装:selenium+chrome/phantomjs
#安装:Pillow
Pillow:基于PIL,处理python 3.x的图形图像库.因为PIL只能处理到python 2.x,而这个模块能处理Python3.x,目前用它做图形的很多.
http://www.cnblogs.com/apexchu/p/4231041.html
C:\Users\Administrator>pip3 install pillow
C:\Users\Administrator>python3
Python 3.6.1 (v3.6.1:69c0db5, Mar 21 2017, 18:41:36) [MSC v.1900 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> from PIL import Image
>>>
模拟博客园登录
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
import time
def get_snap():
driver.save_screenshot('full_snap.png') # 截全屏
page_snap_obj = Image.open('full_snap.png')
return page_snap_obj
def get_image():
"""
得到图片对象
"""
img = driver.find_element_by_class_name('geetest_canvas_img')
time.sleep(2)
location = img.location
size = img.size
left = location['x']
top = location['y']
right = left+size['width']
bottom = top+size['height']
page_snap_obj = get_snap()
image_obj = page_snap_obj.crop((left, top, right, bottom)) # 从一整张图片中截取一个矩形区域
# image_obj.show()
return image_obj
def get_distance(image1, image2):
start = 57
threhold = 60
for i in range(start, image1.size[0]):
for j in range(image1.size[1]):
rgb1 = image1.load()[i, j]
rgb2 = image2.load()[i, j]
res1 = abs(rgb1[0]-rgb2[0])
res2 = abs(rgb1[1]-rgb2[1])
res3 = abs(rgb1[2]-rgb2[2])
# print(res1,res2,res3)
if not (res1 < threhold and res2 < threhold and res3 < threhold):
return i-7
return i-7
def get_tracks(distance):
distance+=20 #先滑过一点,最后再反着滑动回来
v=0
t=0.2
forward_tracks=[]
current=0
mid=distance*3/5
while current < distance:
if current < mid:
a=2
else:
a=-3
s=v*t+0.5*a*(t**2)
v=v+a*t
current+=s
forward_tracks.append(round(s))
#反着滑动到准确位置
back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #总共等于-20
return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
try:
driver = webdriver.Chrome()
driver.get('https://passport.cnblogs.com/user/signin')
driver.implicitly_wait(5)
# 1、输入账号密码回车
username = driver.find_element_by_id('input1')
pwd = driver.find_element_by_id('input2')
signin = driver.find_element_by_id('signin')
username.send_keys('maxiaotiao')
pwd.send_keys('xxxxxxx')
signin.click()
# 2、点击按钮,得到没有缺口的图片
button = driver.find_element_by_class_name('geetest_radar_tip')
button.click()
# 3、获取没有缺口的图片
image1 = get_image()
# 4、点击滑动按钮,得到有缺口的图片
button = driver.find_element_by_class_name('geetest_slider_button')
button.click()
# 5、获取有缺口的图片
image2 = get_image()
# 6、对比两种图片的像素点,找出位移
distance = get_distance(image1, image2)+20
print(distance)
# 7、模拟人的行为习惯,根据总位移得到行为轨迹
tracks = get_tracks(distance)
print(tracks)
# 8、按照行动轨迹先正向滑动,后反滑动
button = driver.find_element_by_class_name('geetest_slider_button')
ActionChains(driver).click_and_hold(button).perform()
# 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速
for track in tracks['forward_tracks']:
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
# 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动
time.sleep(0.5)
for back_track in tracks['back_tracks']:
ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()
# 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率
ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()
# 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手
time.sleep(0.5)
ActionChains(driver).release().perform()
time.sleep(20) # 睡时间长一点,确定登录成功
finally:
driver.close()
修订版
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
import time
def get_snap(driver):
driver.save_screenshot('full_snap.png')
page_snap_obj=Image.open('full_snap.png')
return page_snap_obj
def get_image(driver):
img=driver.find_element_by_class_name('geetest_canvas_img')
time.sleep(2)
location=img.location
size=img.size
left=location['x']
top=location['y']
right=left+size['width']
bottom=top+size['height']
page_snap_obj=get_snap(driver)
image_obj=page_snap_obj.crop((left,top,right,bottom))
# image_obj.show()
return image_obj
def get_distance(image1,image2):
start=57
threhold=60
for i in range(start,image1.size[0]):
for j in range(image1.size[1]):
rgb1=image1.load()[i,j]
rgb2=image2.load()[i,j]
res1=abs(rgb1[0]-rgb2[0])
res2=abs(rgb1[1]-rgb2[1])
res3=abs(rgb1[2]-rgb2[2])
# print(res1,res2,res3)
if not (res1 < threhold and res2 < threhold and res3 < threhold):
return i-7
return i-7
def get_tracks(distance):
distance+=20 #先滑过一点,最后再反着滑动回来
v=0
t=0.2
forward_tracks=[]
current=0
mid=distance*3/5
while current < distance:
if current < mid:
a=2
else:
a=-3
s=v*t+0.5*a*(t**2)
v=v+a*t
current+=s
forward_tracks.append(round(s))
#反着滑动到准确位置
back_tracks=[-3,-3,-2,-2,-2,-2,-2,-1,-1,-1] #总共等于-20
return {'forward_tracks':forward_tracks,'back_tracks':back_tracks}
def crack(driver): #破解滑动认证
# 1、点击按钮,得到没有缺口的图片
button = driver.find_element_by_class_name('geetest_radar_tip')
button.click()
# 2、获取没有缺口的图片
image1 = get_image(driver)
# 3、点击滑动按钮,得到有缺口的图片
button = driver.find_element_by_class_name('geetest_slider_button')
button.click()
# 4、获取有缺口的图片
image2 = get_image(driver)
# 5、对比两种图片的像素点,找出位移
distance = get_distance(image1, image2)
# 6、模拟人的行为习惯,根据总位移得到行为轨迹
tracks = get_tracks(distance)
print(tracks)
# 7、按照行动轨迹先正向滑动,后反滑动
button = driver.find_element_by_class_name('geetest_slider_button')
ActionChains(driver).click_and_hold(button).perform()
# 正常人类总是自信满满地开始正向滑动,自信地表现是疯狂加速
for track in tracks['forward_tracks']:
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
# 结果傻逼了,正常的人类停顿了一下,回过神来发现,卧槽,滑过了,然后开始反向滑动
time.sleep(0.5)
for back_track in tracks['back_tracks']:
ActionChains(driver).move_by_offset(xoffset=back_track, yoffset=0).perform()
# 小范围震荡一下,进一步迷惑极验后台,这一步可以极大地提高成功率
ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()
ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()
# 成功后,骚包人类总喜欢默默地欣赏一下自己拼图的成果,然后恋恋不舍地松开那只脏手
time.sleep(0.5)
ActionChains(driver).release().perform()
def login_cnblogs(username,password):
driver = webdriver.Chrome()
try:
# 1、输入账号密码回车
driver.implicitly_wait(3)
driver.get('https://passport.cnblogs.com/user/signin')
input_username = driver.find_element_by_id('input1')
input_pwd = driver.find_element_by_id('input2')
signin = driver.find_element_by_id('signin')
input_username.send_keys(username)
input_pwd.send_keys(password)
signin.click()
# 2、破解滑动认证
crack(driver)
time.sleep(10) # 睡时间长一点,确定登录成功
finally:
driver.close()
if __name__ == '__main__':
login_cnblogs(username='linhaifeng',password='xxxx')
说明
面对简单的滑动验证码,极验其实是有更复杂版本的,如下所示
机器识别难度高了,大部分屌丝码农搞不定了。然而人类也蒙蔽了,易用性降到极低。
使用了上述验证的网站常常会在用户一片怨声载道中,又将其恢复成易于破解的滑动验证。
验证过程,是个破解难度、用户体验之间的一个平衡点。体验越好的,破解也越容易。
嘲讽验证码无效,破解简单,是很 LOW 的行为。
网站方、验证码平台方,知道你能破解,你牛 B。。。更难的验证码他们也有,只是这会严重降低体验,他们不用而已。