简述
- 在简书-爬虫数据分析学习交流 群里有个哥们 🏿I 🏿(。。他的微信昵称直接粘贴过来就是这样的。。)展示了他的极验验证码破解效果,很流畅,引起了我的兴趣。
- 极验验证码的主要问题在于对人的行为特征的分析,它会在你拖动滑块时以数十毫秒间隔记录你的鼠标移动数据发送到服务器,并使用各种算法(如深度学习)判断你的轨迹特征到底是人还是机器。
- 我突然想到pid模型说不定能模拟人的行为特征,就想试试。
- 实现思路上大家都差不多,简单的方案就是selenium操纵浏览器,由原始图和凹陷图对比得出要移动的距离,然后控制鼠标以某种方案移动即可。(图片的获取有两种方案,一是向服务器请求图片片段,然后拼接起来,我选择了另一种懒办法,在三种情况下截图对比)
- 难一些的思路就是抓包分析,直接请求服务器,发送鼠标轨迹数据,网上有大神直接抓包分析发送xpos数据的样例,详见参考。
- 🏿I 🏿哥们说他的正确率在90%以上,感谢他的热心交流,虽然没有透露他的具体方案 :) (当然,这种东西不好说的)
- 我的成功率不高,要是高了就不太方便写博客了。成功率在40、50%吧,所以我放心地贴出来,仅供学习探索参考,而且极验的3.0也在推进,感觉样式还不错。
- 我将我搜集到的有用资源都列在参考里了,网上分享的经验和代码不少,上手还是比较快的,演示视频中的代码也放上了,在github-geetest里直接附上了chromedriver可能更方便点,有兴趣的朋友可以试着玩玩,调下参数。还试了些其他的,比较乱,就没放上了。
- 我觉得这个对于搞机器学习的朋友会比较有吸引力,毕竟手里有个锤子,看啥都是钉子。现在这么好的陪练出来了,一攻一防,不过注意分寸吧。。。在代码中搜索到
get_offsets
用你自己的思路复写它,返回一个可迭代对象表示鼠标每次平移间隔即可。192行的间隔时间也可相应修改。
效果
连续尝试视频(题外话:视频简单处理参考 FFmpeg实用命令:音频、视频格式转换和其它操作)
参考
视频中代码
# -*- coding: utf-8 -*-
import os
import time
from selenium import webdriver
from io import BytesIO
from PIL import Image
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
os.chdir('.')
driver = webdriver.Chrome('./chromedriver.exe')
driver.get("https://www.jianshu.com/sign_in")
while 1:
time.sleep(0.2)
# 设定窗口大小
width = 1280
height = 800
driver.set_window_size(width, height)
def get_captcha_image(filename):
screenshot = driver.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
# screenshot.show()
captcha_el = driver.find_element_by_class_name("gt_box")
location = captcha_el.location
size = captcha_el.size
left = location['x']
top = location['y']
right = location['x'] + size['width']
bottom = location['y'] + size['height']
box = (left, top, right, bottom)
print(box)
if box[0] == 0:
raise(Exception('======='))
captcha_image = screenshot.crop(box)
captcha_image.save(filename) # "%s.png" % uuid.uuid4().hex
print(u'截图成功')
time.sleep(1)
WebDriverWait(driver, 8).until(
EC.presence_of_element_located((By.CLASS_NAME, "gt_box")))
knob = driver.find_element_by_class_name("gt_slider_knob")
action = ActionChains(driver)
action.move_to_element_with_offset(knob, 21, 21).perform()
time.sleep(1)
f_file = 'f-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(f_file)
ActionChains(driver).click_and_hold().perform()
time.sleep(0.5)
# action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(s_file)
# action.move_by_offset(50, 0).release().perform()
# action.reset_actions()
# --------------------------------------------------------------
import matplotlib.pylab as plt
from PIL import Image, ImageFilter
from PIL import ImageChops
# 直观感受图片差异
image_f = Image.open(f_file)
image_s = Image.open(s_file)
diff = ImageChops.difference(image_f, image_s)
# ----------------------显示图片debug----------------------------
'''
# if diff.getbbox() is not None:
diff.save('x.png')
# plt.imshow(plt.imread('x.png'))
# plt.show()
fig, axs = plt.subplots(nrows=1, ncols=3)
for im, ax in zip(["f.png", "s.png", "x.png"], axs):
image = plt.imread(im)
ax.imshow(image)
plt.show()
diff_image = Image.open('x.png')
'''
# -------------------------debug--------------------------------
global first_left
first_left = 0
def find_offset(diff_image, offset_=62):
d = diff_image.convert("L").point(lambda i: i > 52, mode='1')
d.save('x-%s.png' % time.strftime("%Y%m%d-%H%M%S"))
b1 = d.getbbox() # left, upper, right, and lower pixel coordinate
# offset_ = 65
b2 = d.crop((offset_, 0, d.width, d.height)).getbbox()
global first_left
first_left = b1[0]
offset = b2[0] + offset_ - b1[0] - 2
if b2[0] <= 4:
offset = -1
return offset
# diff = diff_image.load()
# http://stackoverflow.com/questions/9038160/break-two-for-loops
# for x in range(61, width):
# for y in range(height):
# if all(i > 40 for i in diff[x, y]):
# return x - 6
offset = find_offset(diff)
if offset < 0:
# 拖动滑块到右方160像素处保持并截图
ActionChains(driver).move_by_offset(160, 0).perform()
time.sleep(0.5)
# action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
get_captcha_image(s_file)
# 放下
ActionChains(driver).release().perform()
image_s = Image.open(s_file)
diff = ImageChops.difference(image_f, image_s)
d = diff.convert("L").point(lambda i: i > 60, mode='1')
offset = d.getbbox()[0] - first_left
time.sleep(2.5)
ActionChains(driver).move_to_element_with_offset(
knob, 21, 21).click_and_hold().perform()
time.sleep(0.5)
print(offset)
def get_offsets(setpointX):
'''
切记不能移动小数个像素位置
'''
kp = 3.0
ki = 0.0001
kd = 80.0
x = 0
vx = 0
prevErrorX = 0
integralX = 0
derivativeX = 0
while 1:
if x >= setpointX:
break
errorX = setpointX - x
# print('xxxxx - ', x)
integralX += errorX
derivativeX = errorX - prevErrorX
prevErrorX = errorX
if offset < 100:
K = 0.007
elif offset < 180:
K = 0.006
else:
K = 0.005
ax = K * (kp * errorX + ki * integralX + kd * derivativeX)
vx += ax
if x + vx > setpointX:
vx = setpointX - x
vx = int(vx)
if vx < 1:
vx = random.randint(1, 3)
yield vx
print('vvvvv - ', vx)
x += vx
def get_offsets_back(goal):
x = 0
while 1:
if x >= goal:
break
dx = random.randint(10, 50)
if x + dx > goal:
dx = goal - x
yield dx
x += dx
import random
for o in get_offsets(offset):
y = random.randint(-1, 1)
ActionChains(driver).move_by_offset(o, y).perform()
# time.sleep(0.03)
time.sleep(random.randint(2, 4) / 100)
ActionChains(driver).release().perform()
# action.drag_and_drop_by_offset(knob, offset, 0).perform()
time.sleep(3)
driver.refresh()