java爬虫
模拟登陆CSDN
- 抓包得到post请求的5个参数:
username:139******027@163.com
password:*******()
lt:LT-243423-zaxybu6QLbPZfJSXCBOfBkstaiCKml
execution:e3s1
_eventId:submit
Cookie:uuid_tt_dd=5955540397292079753_20170815; UM_distinctid=15e" +
"502c8a4d5b5-0ebe367e3c65d5-e313761-e1000-15e502c8a4eaa4; __utma=17226283.1282527090.15082" +
"48516.1508248516.1508248516.1; __utmz=17226283.1508248516.1.1.utmcsr=(direct)|utmccn=(direct)" +
"|utmcmd=(none); __message_sys_msg_id=0; __message_gu_msg_id=0; __message_cnel_msg_id=0; __message_district_code=420000; __message_in_school=0; UN=qq_37976565; UE="13949569027@163.com"; BT=1508662667082; JSESSIONID=C2ED5A9B0A01ADD063B0219B5A4953F1.tomcat2; LSSC=LSSC-513836-NbJPY2BrWFZud9WPdsjOlzU2RyQrOe-passport.csdn.net; dc_tos=oy82pe; dc_session_id=1508499845140_0.037202" +
"744870221016; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1508593384,1508659648,1508659652,1508671911" +
";"+
Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508671925"
步骤
- 1.首先发送一个get请求得到登陆页面及后3个请求参数
- 2.发送post请求登陆到请求地址
- 3.分析post得到的结果判断是否登陆成功
用到的API
- HttpClient
- Jsoup
sendGet函数
public static String sendGet(String url){
HttpResponse response;
String content=null;
try {
HttpGet get=new HttpGet(url);
response=httpClient.execute(get);
HttpEntity entity=response.getEntity();
content= EntityUtils.toString(entity);//用content辅助分析
EntityUtils.consume(entity);
return content;
}
catch (Exception e) {
e.printStackTrace();
}
return content;
}
sendPost函数
public static String sendPost(String url, List<NameValuePair> nvps){
HttpResponse response;
String content=null; //为了结果分析
try {
//HttpClient中的Post请求包装类
HttpPost post=new HttpPost(url);
//nvps是包装请求参数的list
if (nvps!=null){
post.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));
}
//执行请求用execution方法
response=httpClient.execute(post);
HttpEntity entity=response.getEntity();
content=EntityUtils.toString(entity);
EntityUtils.consume(entity);
return content;
}catch (Exception e){
e.printStackTrace();
}
return content;
}
setPost()函数
HttpResponse response;
String content=null;
try {
HttpPost post=new HttpPost(url);
if (nvps!=null){
post.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));
post.setHeader("Cookie","uuid_tt_dd=5955540397292079753_20170815; UM_distinctid=15e" +
"502c8a4d5b5-0ebe367e3c65d5-e313761-e1000-15e502c8a4eaa4; __utma=17226283.1282527090.15082" +
"48516.1508248516.1508248516.1; __utmz=17226283.1508248516.1.1.utmcsr=(direct)|utmccn=(direct)" +
"|utmcmd=(none); __message_sys_msg_id=0; __message_gu_msg_id=0; __message_cnel_msg_id=0; __message_district_code=420000; __message_in_school=0; UN=qq_37976565; UE=\"13949569027@163.com\"; BT=1508662667082; JSESSIONID=C2ED5A9B0A01ADD063B0219B5A4953F1.tomcat2; LSSC=LSSC-513836-NbJPY2BrWFZud9WPdsjOlzU2RyQrOe-passport.csdn.net; dc_tos=oy82pe; dc_session_id=1508499845140_0.037202" +
"744870221016; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1508593384,1508659648,1508659652,1508671911" +
"; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508671925");
}
response=httpClient.execute(post);
HttpEntity entity=response.getEntity();
content=EntityUtils.toString(entity);
EntityUtils.consume(entity);
if (content.indexOf("redirect_back")>-1){
System.out.println("登陆成功....");
// result=true;
}
else if(content.indexOf("登陆太频繁")>-1){
System.out.println("登陆太频繁");
}else {
System.out.println("登陆失败");
}
HttpGet get=new HttpGet("http://ask.csdn.net/");//验证Cookie是否设置成功
String c = setCookie(response);
//将cookie注入到get请求头当中
get.setHeader("Cookie",c);
HttpResponse r = httpClient.execute(get);
String content1 = EntityUtils.toString(r.getEntity());
return content1;
}catch (Exception e){
e.printStackTrace();
}
return content;
}
主函数Main()
public class Main {
private static String Login_url="HTTPS://passport.csdn.net/account/login";
// private static String Login_url="://www.zhihu.com/login/phone_num";
public static void main(String[] args)throws Exception {
// write your code here
Logger logger=Logger.getLogger("cyc");
String username=" ";//账号
String password=" ";//密码
// String captcha_type;
//String _xsrf;
String it=null;
String execution=null;
String _eventId=null;
logger.info("获取必要的登陆信息.....");
String html= Zhihu.sendGet(Login_url);
Document doc= Jsoup.parse(html);
Element from=doc.select(".user-pass").get(0);
System.out.println(html);
it=from.select("input[name=lt]").get(0).val();
execution=from.select("input[name=execution]").get(0).val();
_eventId=from.select("input[name=_eventId]").get(0).val();
//_xsrf=from.select("input[name=_xsrf]").get(0).val();
//captcha_type=from.select("input[name=captcha_type]").get(0).val();
System.out.println(it);
System.out.println(execution);
System.out.println(_eventId);
// System.out.println(captcha_type);
// System.out.println(_xsrf);
System.out.println("");
logger.info("获取成功.....");
logger.info("开始登录.....");
// boolean result=false;
List<NameValuePair> nvps=new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("username",username));
nvps.add(new BasicNameValuePair("password",password));
nvps.add(new BasicNameValuePair("lt",it));
nvps.add(new BasicNameValuePair("execution",execution));
nvps.add(new BasicNameValuePair("_eventId",_eventId));
String ret=Zhihu.sendPost(Login_url,nvps);
System.out.println(ret);
}
}
登陆成功的结果打印
<html>
<head>
<meta charset="utf-8" />
<meta name="referrer" content="always">
<script src="/content/loginbox/loginapi.js" ></script>
<script>
function redirect_back(){
var redirect = "http://www.csdn.net/";
var data = {"userId":66850244,"isLocked":false,"mobile":"********","userName":"********","email":"***********","password":"*************","registerIP":"***********","isDeleted":false,"isActived":true,"role":0,"registerTime":"Mar 19, 2017 10:36:22 PM","userType":0,"lastLoginIP":"**********","lastLoginTime":"May 31, 2017 8:34:20 PM","loginTimes":12,"user_status":0,"activeTime":"Mar 19, 2017 10:36:22 PM","passwordStrongLevel":3,"ucSyncStatus":true,"nickName":"守望幸福者","avatar":"http://avatar.csdn.net/F/B/B/1_qq_37976565.jpg","encryptUserInfo":"NJvdNs61FhWDpZYpTXFbIeSjOrxmhO4Qi+jCwWOvjw4Xjh2VoJsScx51TTfIbH5SIg7Jqb7vyW4XEhMleHCtHmfOhtWC6TIG44VG2z0DGPKXgadGoqfEre/oMPYtziEYvf9QDExgQxeXEqMI2vbiXA\u003d\u003d"};
var userInfo = "NJvdNs61FhWDpZYpTXFbIeSjOrxmhO4Qi+jCwWOvjw4Xjh2VoJsScx51TTfIbH5SIg7Jqb7vyW4XEhMleHCtHmfOhtWC6TIG44VG2z0DGPKXgadGoqfEre/oMPYtziEYvf9QDExgQxeXEqMI2vbiXA==";
data.userName = data.userName;
data.encryptUserInfo = userInfo;
csdn.login_param.call = function (){
location.href = redirect;
}
var _data = {};
_data.status = true;
_data.data = data;
var oauth = "";
if(oauth == "true"){
csdn.login_back(_data);
}else{
csdn.login_data = data;
csdn.login_end();
};
}
</script>
</head>
<body onload="redirect_back();"></body>
</html>
登陆
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
<title>编程技术问答-CSDN问答频道</title>
<meta content="CSDN问答频道是领先的技术问答平台,这里有最牛的技术达人,最全的技术疑难问题,包含有编程语言、数据库、移动开发、web前端、网站架构等全方位的技术答疑。" name="description" />
<meta content="技术问答,技术答疑,CSDN问答" name="keywords" />
<link href="http://c.csdnimg.cn/public/favicon.ico" rel="SHORTCUT ICON" />
<link rel="dns-prefetch" href="//www.google-analytics.com">
<link rel="dns-prefetch" href="//avatar.profile.csdn.net">
<link rel="dns-prefetch" href="//my.csdn.net">
<link href="//c.csdnimg.cn/public/common/libs/bootstrap/css/bootstrap.css" media="all" rel="stylesheet" />
<link href="//c.csdnimg.cn/public/static/css/avatar.css" media="all" rel="stylesheet" />
<link href="//c.csdnimg.cn/public/themes/default/css/btn.css" media="all" rel="stylesheet" />
<link href="//c.csdnimg.cn/public/themes/default/css/icon.css" media="all" rel="stylesheet" />
<link href="//c.csdnimg.cn/public/static/css/pagination.css" media="all" rel="stylesheet" />
<link href="//c.csdnimg.cn/public/common/toolbar/css/index.css" media="all" rel="stylesheet" />
<link href="http://c.csdnimg.cn//mini/mini.css" media="all" rel="stylesheet" />
<link href="/assets/font-awesome.min-c873246088eb5658d2a1379d3c334280.css" media="screen" rel="stylesheet" />
<!-- [if IE 7]-->
<link href="/assets/font-awesome-ie7.min-1e0b217d7be56bd5d291e36d59e3f90f.css" media="screen" rel="stylesheet" />
<!-- [endif]-->
<!--[if lte IE 7]>
<style>
.outer{position:relative;}
.middle{position: absolute; top: 50%;}
.inner{position: relative; top:-50%}
</style>
<![endif]-->
<link href="/assets/common-ba6cbbcf1e167f79239fa214cbb7f63d.css" media="screen" rel="stylesheet" />
<link href="/assets/ask_float_block-505f0ef9e74e8998a10227960fc7e1cc.css" media="screen" rel="stylesheet" />
<link href="/assets/editor-1f4b5f7eb599b4f68f6787c0cce7b807.css" media="screen" rel="stylesheet" />
········
/现在要去取消关注
do_unfollow_url = 'http://my.csdn.net/index.php/follow/do_unfollow?username='+now_item_name+'&jsonpcallback=?';
$.ajax({
type: "get",
url: do_unfollow_url,
dataType:"jsonp",
success: function(data){
if(parseInt(data.succ)==1)
{
$(item).removeClass('focus_cancel');
//$(item).html('<i class="icon-plus">'+'</i>'+' 关注');
//$(item).removeAttr("style");
//$(item)[0].onmouseover = null;
//$(item)[0].onmouseout = null;
}
btn_state=1;
}
});
}
}
}
});
}
else
{
$(item)[0].onclick = function(){
window.location.href="https://passport.csdn.net/";
}
}
});
}
showConcern();
</script>
</div>
</div>
<div id="popup_mask" class="marker"></div>
<div id="pop_win" style="position: absolute;background: none repeat scroll 0 0 #FFFFFF;z-index: 10000;border: 1px solid #DCDCDC;"></div>
<!-- script id="noticeScript" type="text/javascript" charset="utf-8" btnId="header_notice_num" wrapId="note1" count="5" subCount="5" src="//c.csdnimg.cn/public/common/toolbar/js/notify.js"></script -->
<script src="http://c.csdnimg.cn/rabbit/exposure-click/main.js?v1.15.221"></script>
<script src="http://c.csdnimg.cn/rabbit/exposure-click/fuckadblock.js"></script>
<script src="http://c.csdnimg.cn/rabbit/exposure-click/block.js"></script>
<script src="/assets/libs/jquery.form-970fa25a8fc7698f768416b39f738706.js"></script>
<script type="text/javascript" src="http://c.csdnimg.cn/pubfooter/js/publib_footer.js"></script>
<script id="csdn-toolbar-id" btnId="header_notice_num" wrapId="note1" count="5" subCount="5" type="text/javascript" src="//c.csdnimg.cn/public/common/toolbar/js/toolbar.js"></script>
<script src="/assets/apps/ask_float_block-0857fdd57287c7b950d30c74e1ac5bb1.js"></script>
<script type="text/javascript">
showMini(".help_peer_con,.technology_hot_person,.mod_glory_info,#scrolldiv,.hot_person_list,.tutor_list,.all_honnor_roll,.honnor_roll,.my_info");
</script>
<script language="javascript" type="text/javascript" src="http://ads.csdn.net/js/async_new.js"></script>
</body>
</html>