该酒店有有个获取酒店评价的接口 比如 /AjaxHotelCommentList , 为了避免接口被抓取。
请求评价接口的时候带上一个 authkey的参数
这个参数的值通过一个单独的接口去请求 比如 /getAuthkey
那么我们爬取时就要 先爬 getAuthkey 再 AjaxHotelCommentList
然而 getAuthkey 接口返回了
eval(
function(arr, f) {
if (typeof Array.prototype.map === "function") {
return arr.map(f)
}
var res = [],
j = 0;
for (var i = 0, l = arr.length; i < l; i++) { res[j++] = f(arr[i], i, arr) }
return res
}(
[38684, 38658, 38727, 38742, .........., 38735, 38724, 38741],
function(item) {
return String.fromCharCode(item - 38625) }).join('')
)
在前端加密了 authkey 太天真了吧,看我分分钟破解~
于是我失去了1天,也有了这篇文章。
认真的分割线
上面的代码是把一段代码 Unicode化避免抓取,通过 String.fromCharCode 来还原代码,最后eval执行
我们看还原后的代码
;
! function() {
var map = function(arr, f) {
if (Array.prototype.map) {
return Array.prototype.map.call(arr, f); }
var result = [];
for (var i = 0; i < arr.length; i++) { result.push(f(arr[i])); }
return result; };
eval(map([118, 97, 114, 32, 110, 122, 121, 120, 119, 102, 103, 104, 105, 118, 107, 108, 109, 117, 116, 113, 114, 111, 106, 112, 101, 100, 97, 98, 115, 99, 32, 61, 32, 102, 117, 110, 99, 116, 105, 111, 110, 32, 40, 114, 44, 110, 41, 123, 102, 111, 114, 40, 118, 97, 114, 32, 116, 61, 91, 51, 50, 44, 52, 51, 44, 52, 53, 44, 52, 53, 44, 52, 56, 44, 52, 57, 44, 53, 48, 44, 53, 49, 44, 53, 50, 44, 53, 51, 44, 53, 52, 44, 53, 53, 44, 53, 54, 44, 53, 55, 93, 44, 111, 61, 114, 46, 115, 112, 108, 105, 116, 40, 110, 46, 99, 104, 97, 114, 65, 116, 40, 48, 41, 41, 44, 112, 61, 91, 93, 44, 117, 61, 102, 117, 110, 99, 116, 105, 111, 110, 40, 114, 41, 123, 118, 97, 114, 32, 111, 61, 109, 97, 112, 40, 114, 46, 115, 112, 108, 105, 116, 40, 34, 34, 41, 44, 102, 117, 110, 99, 116, 105, 111, 110, 40, 114, 44, 111, 41, 123, 114, 101, 116, 117, 114, 110, 32, 83, 116, 114, 105, 110, 103, 46, 102, 114, 111, 109, 67, 104, 97, 114, 67, 111, 100, 101, 40, 116, 91, 110, 46, 105, 110, 100, 101, 120, 79, 102, 40, 114, 41, 93, 41, 125, 41, 46, 106, 111, 105, 110, 40, 34, 34, 41, 59, 114, 101, 116, 117, 114, 110, 43, 111, 125, 44, 101, 61, 102, 117, 110, 99, 116, 105, 111, 110, 40, 114, 41, 123, 118, 97, 114, 32, 110, 61, 112, 46, 112, 111, 112, 40, 41, 44, 116, 61, 112, 46, 112, 111, 112, 40, 41, 59, 114, 61, 61, 61, 105, 63, 112, 46, 112, 117, 115, 104, 40, 116, 43, 110, 41, 58, 112, 46, 112, 117, 115, 104, 40, 116, 45, 110, 41, 125, 44, 105, 61, 110, 46, 99, 104, 97, 114, 65, 116, 40, 49, 41, 44, 97, 61, 110, 46, 99, 104, 97, 114, 65, 116, 40, 50, 41, 44, 102, 61, 48, 59, 102, 60, 111, 46, 108, 101, 110, 103, 116, 104, 59, 102, 43, 43, 41, 105, 102, 40, 34, 34, 33, 61, 61, 111, 91, 102, 93, 41, 105, 102, 40, 111, 91, 102, 93, 61, 61, 105, 124, 124, 111, 91, 102, 93, 61, 61, 97, 41, 123, 118, 97, 114, 32, 104, 61, 111, 91, 102, 93, 59, 101, 40, 104, 41, 125, 101, 108, 115, 101, 123, 118, 97, 114, 32, 99, 61, 117, 40, 111, 91, 102, 93, 44, 110, 41, 59, 112, 46, 112, 117, 115, 104, 40, 43, 99, 41, 125, 114, 101, 116, 117, 114, 110, 32, 112, 46, 112, 111, 112, 40, 41, 125], function(item) {
return String.fromCharCode(item); }).join(''));;
var ri = 0;
var tkpefqlmcohqfdoyvmarq = function(ixdbwyfartqejbiksdvmha, gefshzzovbwoxnmsrrctybo) {
if (typeof ixdbwyfartqejbiksdvmha.map === 'function') {
return ixdbwyfartqejbiksdvmha.map(gefshzzovbwoxnmsrrctybo); }
var ahhyxhxdbyydyekxppyyuetk = [],
xwazquorrsmsiybjhselexozg = 0;
for (ri = 0, hdzlisdpuqffypwtnwxzwdbrzy = ixdbwyfartqejbiksdvmha.length; ri < hdzlisdpuqffypwtnwxzwdbrzy; ri++) { ahhyxhxdbyydyekxppyyuetk[xwazquorrsmsiybjhselexozg++] = gefshzzovbwoxnmsrrctybo(ixdbwyfartqejbiksdvmha[ri], ri, ixdbwyfartqejbiksdvmha); }
return ahhyxhxdbyydyekxppyyuetk; };
var result = [32, 50, 103, 312, 632, 602, 312, 353, 350, 310, 364, 364, 632, 341, 632, 618, 599, 310, 344, 624, 355, 313, 340, 312, 341, 620, 320, 307, 346, 329, 328, 338, 312, 314, 330];
console.log(result);
var tjlazzxbkfgbxyqptfcpcdcajvud = [nzyxwfghivklmutqrojpedabsc("l6IUIbInIyIYIbIYzIlIbIUUIzIYIbIYI6IyIYIUUIBIYIbIBIyIYIYUIBIbIbIbIbIlIlIYIUIyIbIbIyIlIbIYdIjIbIYIYInI6IbIYzInIbIbIUBIBIYIYUIBIbIYIYIbIYIUUIyIYIyIzIYIbIUUIUIYIYdIjIbIYIbIYUnIlIbIjIUIYIbIUnIlIYIlIlIbIYIbIYIYBIzIbInIBIYIbIEIdIbIUUIdIYIYIYIYyIyIbIyIdIYIYIEIUIbIYyIjIbIbIbIYIYIb", "IbYg6UElBdzynj0NWoZutJQfrhTS1OL84/7Vvpw3KXmMkFRaeCc2PiDsAx5H*qG-9+"), nzyxwfghivklmutqrojpedabsc("1-t5teto/t5tetothtPtetohtPtetotethht5totPt-totothjthtetoDtPtetotetoto/htjtethhtDtoteto/thtetSt5tetetet-t/tet5t/tetethDtStetRthtotototetethRt5toth5t1tototo-t5teththtotetotoRtjtetoRtRtetet-tRtet/tStetoteteto-tjteth1tPtotetoPtPtet5tDtotetot5tjtoththtetet-tjtotojtRtetetototote", "teo0Sh/Dj-R51PY9U*QTE7FlOXNVwaWdJyBxqZAczGKvf4i3CnLrMkmHs2pu+g86bI"), nzyxwfghivklmutqrojpedabsc("ONiyiEiBiOiririPNihirip8ixiriririNiPiEirpiyiEirip0iNiEiOiyiEiriEiripxiBiriryixiEiEirpi8iEirPiyiEiriri0i0iEiBiPiEiEiOiBiEi8iyiririririEihixiEir0iBiEiriOiNiri0iPiEiEirir0ipiEiBiOiriEirBiOiriPi8iEiEiriEi8iyiripPihiriEir0iNiEiOiOiEiEirir8ixiEi0iNiriEihiNiriOipiriririEiriE", "iErTNpPO0yB8hx12XJFZYbm-tn9Rldgv73GSf4K6UeAoMCLIca*q5QujzWwVD/kH+s"), ];
var pdpabyzvpwurkdouewrofqaftpoqq = [function(list) {
for (var i = 0, length = list.length; i < length; i++) { list[i] = list[i] + (true ? (-eval('19')) : (-eval('20'))); }
return list; }, function(input) {
var zero = 0;
var length = input.length;
var result = [];
var push = Array.prototype.push;
for (var index = zero; index < length; index++) { push.apply(result, [Math.floor(input[index] / (+eval('6')))]); }
return result; }, function(input) {
var decode = function(x) {
var isOdd = (x & 1) == 1;
var one = +! function() {}();
var two = (function(a, b) {}).length;
var three = (function() {
return arguments.length; })(x, x, x);
var oddDecode = function(x) {
var up = x + (1 << three);
up *= 10;
var down = 5 * two;
var result = up / down;
result -= two * three;
return result; };
var evenDecode = function(x) {
var left = x + two;
left *= three;
left /= ((one << one) + one);
left += two;
return left; };
return isOdd ? oddDecode(x) : evenDecode(x); };
var output = [];
for (var i = 0, l = input.length; i < l; i++) { output[i] = decode(input[i]); }
return output; }];
var tcognjiuoczfzgyevfgmwcxfsngdin = (function(unjusanjuumzypqcaciqmiinuyvhdpv, uejybtvzxzqpabungjxiujqejiqxtjta) {
var ahhyxhxdbyydyekxppyyuetkult = {};
for (var ri = 0; ri < unjusanjuumzypqcaciqmiinuyvhdpv.length; ri++) { ahhyxhxdbyydyekxppyyuetkult[unjusanjuumzypqcaciqmiinuyvhdpv[ri]] = (uejybtvzxzqpabungjxiujqejiqxtjta[ri]); }
return ahhyxhxdbyydyekxppyyuetkult; })(tjlazzxbkfgbxyqptfcpcdcajvud, pdpabyzvpwurkdouewrofqaftpoqq);
ri = 0;
var cljlmsamuroeuettnhxhrsljgiqfsscudb = [36, 99, 100, 99, 95, 97, 115, 100, 106, 102, 108, 97, 115, 117, 116, 111, 112, 102, 104, 118, 99, 90, 76, 109, 99, 102, 108, 95];
// result 进行了3次变化 每次去掉了头部的数字
console.log(tcognjiuoczfzgyevfgmwcxfsngdin);
while (ri < 3) {
console.log(result);
var ujrvqiphiokkpqoilovcxxvyvnhwlecvrel = tcognjiuoczfzgyevfgmwcxfsngdin[result[0]];
// 每次去掉了头部的数字
result = result.slice(1);
//进行变化
result = ujrvqiphiokkpqoilovcxxvyvnhwlecvrel(result);
(function() {
// 验证是否在当前酒店页面
var edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj = decodeURIComponent(this.location.href).indexOf(decodeURIComponent("/hotel/452221.html"));
if (edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj == -1) { result.pop();
return;
}
var resultLength = result.length;
console.log(resultLength);
// 验证是否在当前酒店页面
var hdzlisdpuqffypwtnwxzwdbrzyength = decodeURIComponent("/hotel/452221.html").length;
for (var ri = 0; ri < hdzlisdpuqffypwtnwxzwdbrzyength; ri++) {
if (ri >= resultLength) {
continue; }
result[ri] += (decodeURIComponent(this.location.href).charCodeAt(edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj + ri) - decodeURIComponent("/hotel/452221.html").charCodeAt(ri)); } })();
ri++;
};
// 这已经是最后的2进制
console.log(result);
// 这里应该无关紧要
try {
var vxqbzabzdmdqlnpneakddqfhkbcvdzbxzvwozzkq = !!document[tkpefqlmcohqfdoyvmarq(cljlmsamuroeuettnhxhrsljgiqfsscudb, function(ritem) {
return String.fromCharCode(ritem); }).join('')];
new Image().src = ('http://hotels.ctrip.com/domestic/cas/image/id?value=s_' + vxqbzabzdmdqlnpneakddqfhkbcvdzbxzvwozzkq); } catch (err) {}
//
result = tkpefqlmcohqfdoyvmarq(result, function(ritem) {
return String.fromCharCode(ritem); }).join('');
// 最初 是2gĸɸɚĸšŞĶŬŬɸŕɸɪɗĶŘɰţĹŔĸŕɬŀijŚʼnňŒĸĺŊ 35位
// 这是最后的key 居然是错误的
console.log(result); //1fa187099f6fda06e81516d206435 29位
// 这才是正确的
// 1fa187099f6fda06e81516d206435114 32位
// 调用回调
CASGULgRTcHwCqYrlU(new Function('return "' + result + '";')); }();
开始干正事的分割线
代码太长没有看完的就直接到这里
我已经对代码做了一些简要的替换 如 result
在关键位置输出这个值看经过了什么变化
// result 进行了3次变化 每次去掉了头部的数字
console.log(tcognjiuoczfzgyevfgmwcxfsngdin);
while (ri < 3) {
console.log(result);
var ujrvqiphiokkpqoilovcxxvyvnhwlecvrel = tcognjiuoczfzgyevfgmwcxfsngdin[result[0]];
// 每次去掉了头部的数字
result = result.slice(1);
//进行变化
result = ujrvqiphiokkpqoilovcxxvyvnhwlecvrel(result);
(function() {
// 验证是否在当前酒店页面
var edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj = decodeURIComponent(this.location.href).indexOf(decodeURIComponent("/hotel/452221.html"));
if (edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj == -1) { result.pop();
return;
}
var resultLength = result.length;
console.log(resultLength);
// 验证是否在当前酒店页面
var hdzlisdpuqffypwtnwxzwdbrzyength = decodeURIComponent("/hotel/452221.html").length;
for (var ri = 0; ri < hdzlisdpuqffypwtnwxzwdbrzyength; ri++) {
if (ri >= resultLength) {
continue; }
result[ri] += (decodeURIComponent(this.location.href).charCodeAt(edyitixbzgeuhikzsrszbohpjmkdqdhcwihaj + ri) - decodeURIComponent("/hotel/452221.html").charCodeAt(ri)); } })();
ri++;
};
我们发现在这段代码之前 result都没有发生变化
好在这段代码是比较有亲和力的。
var result = [32, 50, 103, 312, 632, 602, 312, 353, 350, 310, 364, 364, 632, 341, 632, 618, 599, 310, 344, 624, 355, 313, 340, 312, 341, 620, 320, 307, 346, 329, 328, 338, 312, 314, 330];
while循环进行了3次每次都先对 result.slice(1); 去了第一位
再经过一个函数变化
//进行变化
var fun = fun_arr[result[0]];
result = fun (result);
这里我修改了变量名 通过注释可在上面代码中看到
fun 来自 一个fun_arr result失去的第一位数字被用做下标取 fun_arr 中的function
实际上 fun_arr 中并不是有100个function。 而是只在那3个特殊的下标下设置了 function
fun_arr 的值其实在 初始化 result的下2行也做了定义。
最后输出的 result 就是经过3次 去第一位 和 变化后的 arr
String.fromCharCode 后 we get it
总结的分割线
如何破解
- String.fromCharCode 出真正代码
- 去除障碍,如 delete process. location.href .
- 在 callback之前 输出真正的authkey。 或者自己node环境中增加 一个同名callback引导authkey进入自己的代码。
- 请求之后的评价接口
下一篇 如何实现这样的反爬虫