HttpClient之自定义HttpRequestRetryHandler(自定义 重试次数以及重试的时候业务处理)

构造httpclient的时候可以setRetryHandler(HttpRequestRetryHandler) ** HttpRequestRetryHandler是Http请求出错后的重试的处理接口类,对于了某些要求比较严格的业务情况下这个参数还是比较重要的。
  
HttpRequestRetryHandler** 的已知实现类有 DefaultHttpRequestRetryHandler和继承了DefaultHttpRequestRetryHandlerStandardHttpRequestRetryHandler

DefaultHttpRequestRetryHandler

/*
 * ====================================================================
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 */

package org.apache.http.impl.client;

import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.ConnectException;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import javax.net.ssl.SSLException;

import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.annotation.Immutable;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.Args;

/**
 * The default {@link HttpRequestRetryHandler} used by request executors.
 *
 * @since 4.0
 */
@Immutable
public class DefaultHttpRequestRetryHandler implements HttpRequestRetryHandler {

    public static final DefaultHttpRequestRetryHandler INSTANCE = new DefaultHttpRequestRetryHandler();

    /** the number of times a method will be retried */
    private final int retryCount;

    /** Whether or not methods that have successfully sent their request will be retried */
    private final boolean requestSentRetryEnabled;

    private final Set<Class<? extends IOException>> nonRetriableClasses;

    /**
     * Create the request retry handler using the specified IOException classes
     *
     * @param retryCount how many times to retry; 0 means no retries
     * @param requestSentRetryEnabled true if it's OK to retry requests that have been sent
     * @param clazzes the IOException types that should not be retried
     * @since 4.3
     */
    protected DefaultHttpRequestRetryHandler(
            final int retryCount,
            final boolean requestSentRetryEnabled,
            final Collection<Class<? extends IOException>> clazzes) {
        super();
        this.retryCount = retryCount;
        this.requestSentRetryEnabled = requestSentRetryEnabled;
        this.nonRetriableClasses = new HashSet<Class<? extends IOException>>();
        for (final Class<? extends IOException> clazz: clazzes) {
            this.nonRetriableClasses.add(clazz);
        }
    }

    /**
     * Create the request retry handler using the following list of
     * non-retriable IOException classes: <br>
     * <ul>
     * <li>InterruptedIOException</li>
     * <li>UnknownHostException</li>
     * <li>ConnectException</li>
     * <li>SSLException</li>
     * </ul>
     * @param retryCount how many times to retry; 0 means no retries
     * @param requestSentRetryEnabled true if it's OK to retry non-idempotent requests that have been sent
     */
    @SuppressWarnings("unchecked")
    public DefaultHttpRequestRetryHandler(final int retryCount, final boolean requestSentRetryEnabled) {
        this(retryCount, requestSentRetryEnabled, Arrays.asList(
                InterruptedIOException.class,
                UnknownHostException.class,
                ConnectException.class,
                SSLException.class));
    }

    /**
     * Create the request retry handler with a retry count of 3, requestSentRetryEnabled false
     * and using the following list of non-retriable IOException classes: <br>
     * <ul>
     * <li>InterruptedIOException</li>
     * <li>UnknownHostException</li>
     * <li>ConnectException</li>
     * <li>SSLException</li>
     * </ul>
     */
    public DefaultHttpRequestRetryHandler() {
        this(3, false);
    }
    /**
     * Used {@code retryCount} and {@code requestSentRetryEnabled} to determine
     * if the given method should be retried.
     */
    @Override
    public boolean retryRequest(
            final IOException exception,
            final int executionCount,
            final HttpContext context) {
        Args.notNull(exception, "Exception parameter");
        Args.notNull(context, "HTTP context");
        if (executionCount > this.retryCount) {
            // Do not retry if over max retry count
            return false;
        }
        if (this.nonRetriableClasses.contains(exception.getClass())) {
            return false;
        } else {
            for (final Class<? extends IOException> rejectException : this.nonRetriableClasses) {
                if (rejectException.isInstance(exception)) {
                    return false;
                }
            }
        }
        final HttpClientContext clientContext = HttpClientContext.adapt(context);
        final HttpRequest request = clientContext.getRequest();

        if(requestIsAborted(request)){
            return false;
        }

        if (handleAsIdempotent(request)) {
            // Retry if the request is considered idempotent
            return true;
        }

        if (!clientContext.isRequestSent() || this.requestSentRetryEnabled) {
            // Retry if the request has not been sent fully or
            // if it's OK to retry methods that have been sent
            return true;
        }
        // otherwise do not retry
        return false;
    }

    /**
     * @return {@code true} if this handler will retry methods that have
     * successfully sent their request, {@code false} otherwise
     */
    public boolean isRequestSentRetryEnabled() {
        return requestSentRetryEnabled;
    }

    /**
     * @return the maximum number of times a method will be retried
     */
    public int getRetryCount() {
        return retryCount;
    }

    /**
     * @since 4.2
     */
    protected boolean handleAsIdempotent(final HttpRequest request) {
        return !(request instanceof HttpEntityEnclosingRequest);
    }

    /**
     * @since 4.2
     *
     * @deprecated (4.3)
     */
    @Deprecated
    protected boolean requestIsAborted(final HttpRequest request) {
        HttpRequest req = request;
        if (request instanceof RequestWrapper) { // does not forward request to original
            req = ((RequestWrapper) request).getOriginal();
        }
        return (req instanceof HttpUriRequest && ((HttpUriRequest)req).isAborted());
    }

}

默认构造函数是

    public DefaultHttpRequestRetryHandler() {
        this(3, false);
    }

参数requestSentRetryEnabled是请求是否发送成功都重试 这里设置了false,一般情况下都不要为true我觉得。
主要实现的方法是

 boolean retryRequest(IOException exception, int executionCount, HttpContext context);

StandardHttpRequestRetryHandler并没有重写该方法

@Immutable
public class StandardHttpRequestRetryHandler extends DefaultHttpRequestRetryHandler {

    private final Map<String, Boolean> idempotentMethods;


    public StandardHttpRequestRetryHandler(final int retryCount, final boolean requestSentRetryEnabled) {
        super(retryCount, requestSentRetryEnabled);
        this.idempotentMethods = new ConcurrentHashMap<String, Boolean>();
        this.idempotentMethods.put("GET", Boolean.TRUE);
        this.idempotentMethods.put("HEAD", Boolean.TRUE);
        this.idempotentMethods.put("PUT", Boolean.TRUE);
        this.idempotentMethods.put("DELETE", Boolean.TRUE);
        this.idempotentMethods.put("OPTIONS", Boolean.TRUE);
        this.idempotentMethods.put("TRACE", Boolean.TRUE);
    }

   
    public StandardHttpRequestRetryHandler() {
        this(3, false);
    }

    @Override
    protected boolean handleAsIdempotent(final HttpRequest request) {
        final String method = request.getRequestLine().getMethod().toUpperCase(Locale.ROOT);
        final Boolean b = this.idempotentMethods.get(method);
        return b != null && b.booleanValue();
    }

}

只是重写了

protected boolean handleAsIdempotent(final HttpRequest request)

我们参考后完全可以实现自己的HttpRequestRetryHandler

初始化httpClient
在httpClient4.5中,初始化的方式已经和以前版有差异

static  CloseableHttpClient client = HttpClients.createDefault();  
和
static CloseableHttpClient httpClient=HttpClients.custom().build();  
在该方式中可以添加一些网络请求的设置

可以直接使用匿名类

HttpRequestRetryHandler handler = new HttpRequestRetryHandler() {  
  
            @Override  
            public boolean retryRequest(IOException arg0, int retryTimes, HttpContext arg2) {  
              if (retryTimes > 5) {  
                    return false;  
                }  
                if (arg0 instanceof UnknownHostException || arg0 instanceof ConnectTimeoutException  
                        || !(arg0 instanceof SSLException) || arg0 instanceof NoHttpResponseException) {  
                    return true;  
                }  
               
                HttpClientContext clientContext = HttpClientContext.adapt(arg2);  
                HttpRequest request = clientContext.getRequest();  
                boolean idempotent = !(request instanceof HttpEntityEnclosingRequest);  
                if (idempotent) {  
                    // 如果请求被认为是幂等的,那么就重试。即重复执行不影响程序其他效果的  
                    return true;  
                }  
                return false;  
            }  
        };  

还可以设置路由策略 即设置代理方式访问

HttpHost proxy = new HttpHost("127.0.0.1", 80);// 设置代理ip  
        DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);  
       CloseableHttpClient  httpClient = HttpClients.custom().setRoutePlanner(routePlanner).setRetryHandler(handler)  
                .setConnectionTimeToLive(1, TimeUnit.DAYS).setDefaultCookieStore(cookieStore).build();  

附录:
Httpclient4.5后对于get请求方式的改变

static RequestConfig config = RequestConfig.custom().setConnectTimeout(6000).setSocketTimeout(6000)  
            .setCookieSpec(CookieSpecs.STANDARD).build(); // 设置超时及cookie策略  
    public static String getDemo(String url) {  
        HttpGet get = new HttpGet(url);  
        get.setConfig(config);  
        HttpResponse response = null;  
        String html = null;  
        try {  
            response = client.execute(get);  
            int statusCode = response.getStatusLine().getStatusCode();// 连接代码  
            Header[] headers = response.getAllHeaders();  
            // 用于得到返回的文件头  
            for (Header header : headers) {  
                System.out.println(header);  
            }  
            html = new String(EntityUtils.toString(response.getEntity()).getBytes("gb2312"), "utf8");  
            // 在后面参数输入网站的编码,一般为utf-8  
            // 返回的html代码,避免发生编码错误  
            System.out.println(html);  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return html;  
    }  

大致流程:新建httpget对象->用httpClient执行->解析返回的response得到自己需要的内容
cookieSpec:即cookie策略。参数为cookiespecs的一些字段。作用:1、如果网站header中有set-cookie字段时,采用默认方式可能会被cookie reject,无法写入cookie。将此属性设置成CookieSpecs.STANDARD_STRICT可避免此情况。2、如果要想忽略cookie访问,则将此属性设置成CookieSpecs.IGNORE_COOKIES。
tips:注意网站编码,否则容易出现乱码
执行post请求:

public static void postDemo(String url) {  
        HttpPost post = new HttpPost(url);  
        post.setConfig(config);  
        post.setHeader("User-Agent",  
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36");  
        post.setHeader("Connection", "keep-alive");  
        List<NameValuePair> list = new ArrayList<NameValuePair>();  
        list.add(new BasicNameValuePair("key", "value"));  
        list.add(new BasicNameValuePair("key", "value"));  
        list.add(new BasicNameValuePair("key", "value"));  
        list.add(new BasicNameValuePair("key", "value"));  
        list.add(new BasicNameValuePair("key", "value"));  
        try {  
            HttpEntity entity = new UrlEncodedFormEntity(list, "utf-8");  
            post.setEntity(entity);  
            HttpResponse response = client.execute(post);  
            String responseHtml = EntityUtils.toString(response.getEntity());  
            System.out.println(responseHtml);  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  

大致流程:新建post对象->新建需要的表单页->将表单内容设置入请求中->执行并获得response
解析response

//得到返回内容
String responseHtml = EntityUtils.toString(response.getEntity());  
int statusCode = response.getStatusLine().getStatusCode();// 连接代码  
//得到response header
response.getFirstHeader("key");// 得到第一个名字为key的header  
            response.getHeaders("key");// 得到名字为key的所有header,返回一个数组  
            response.getLastHeader("key");  
//得到inputstream:(下载网络部分资源的时候有可能会对cookie有要求,此时需要用到httpClient来下载。)例如验证码等等。
InputStream inputStream = response.getEntity().getContent();

管理cookie

CookieStore cookieStore = new BasicCookieStore(); 
CloseableHttpClient httpClient= HttpClients.custom()
.setDefaultCookieStore(cookieStore).build();  

httpClient里默认自动管理cookie,如果想要提取cookie或者发送自定义的cookie,则需要在httpClient对象初始化时设置一个默认的cookiestore来保存。(方法见初始化httpClient对象里的setDefaultCookieStore)。
得到当前所有cookie:

List<Cookie> list = cookieStore.getCookies();// get all cookies  
        System.out.println("cookie is:");  
        System.out.println("-----------------------");  
        for (Cookie cookie : list) {  
            System.out.println(cookie);  
        }  
        System.out.println("-----------------------");  

清除所有cookie:

cookieStore.clear();

发送自定义cookie:(new了一个对象之后可以设置多种属性。)

BasicClientCookie cookie = new BasicClientCookie("name", "value");  
        // new a cookie  
        cookie.setDomain("domain");  
        cookie.setExpiryDate(new Date());  
        // set the properties of the cookie  
                cookieStore.addCookie(cookie);

管理header:
在平常抓取过程中,经常需要在请求中加入许多header伪装成一个正常的浏览器。以免被服务器认出是爬虫而被封。
设置一些常见header:

post.setHeader("User-Agent",  
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36");  
        post.setHeader("Connection", "keep-alive");  

注意:下载某些网站的资源时,服务器会获取你的来源站,并发出对应的相应。如果来源站不对,可能会被服务器拒绝。此时只需要在请求中加个header就行。

get1.setHeader("Referer", "http://www.a.com");  

ps:
1、爬虫也要遵守基本法,在多次请求的之中为了不给对方服务器造成负担(避免被封),尽量在请求间sleep一个随机数值。
2、爬取非英文网站时注意编码格式,国内一般为utf-8,也有一些是gb2312.获取时注意转码。
3、多获得一些可靠IP(备胎),一旦自身ip被封,赶快去找备胎。附带一个简单的判断网站是否需要代理方法:

// 判断访问目标网站是否需要代理  
    private boolean isNeedProxy() {  
        boolean result = true;  
        URL url;  
        try {  
            url = new URL("http://apkpure.com/");  
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();  
            connection.setConnectTimeout(6000);  
            // int i = connection.getResponseCode();  
            int i = connection.getContentLength();  
            if (i > 0) {  
                result = false;  
            }  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return result;  
    }  
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 206,602评论 6 481
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 88,442评论 2 382
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 152,878评论 0 344
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 55,306评论 1 279
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 64,330评论 5 373
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 49,071评论 1 285
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 38,382评论 3 400
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 37,006评论 0 259
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 43,512评论 1 300
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,965评论 2 325
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 38,094评论 1 333
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,732评论 4 323
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 39,283评论 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 30,286评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,512评论 1 262
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 45,536评论 2 354
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,828评论 2 345

推荐阅读更多精彩内容

  • Spring Cloud为开发人员提供了快速构建分布式系统中一些常见模式的工具(例如配置管理,服务发现,断路器,智...
    卡卡罗2017阅读 134,600评论 18 139
  • 1. Java基础部分 基础部分的顺序:基本语法,类相关的语法,内部类的语法,继承相关的语法,异常的语法,线程的语...
    子非鱼_t_阅读 31,582评论 18 399
  • =========================================================...
    lavor阅读 3,484评论 0 5
  • 本文包括:1、Filter简介2、Filter是如何实现拦截的?3、Filter开发入门4、Filter的生命周期...
    廖少少阅读 7,248评论 3 56
  • 生不如死 ——致亡者山东女孩徐玉玉 文/陌园清 刚刚躲过一支暗箭 一条黑新闻迅速穿透 十万八千里,路 一根沾满仇...
    陌园清阅读 268评论 0 0