public ProtocolResponse getProtocolOutput()

in core/src/main/java/org/apache/stormcrawler/protocol/httpclient/HttpProtocol.java [157:256]


    public ProtocolResponse getProtocolOutput(String url, Metadata md) throws Exception {

        LOG.debug("HTTP connection manager stats {}", CONNECTION_MANAGER.getTotalStats());

        // set default request config to global config
        RequestConfig reqConfig = requestConfig;

        // conditionally add a dynamic proxy
        if (proxyManager != null) {
            // retrieve proxy from proxy manager
            SCProxy prox = proxyManager.getProxy(md);

            // conditionally configure proxy authentication
            if (StringUtils.isNotBlank(prox.getUsername())) {
                List<String> authSchemes = new ArrayList<>();

                // Can make configurable and add more in future
                authSchemes.add(AuthSchemes.BASIC);
                requestConfigBuilder.setProxyPreferredAuthSchemes(authSchemes);

                BasicCredentialsProvider basicAuthCreds = new BasicCredentialsProvider();
                basicAuthCreds.setCredentials(
                        new AuthScope(prox.getAddress(), Integer.parseInt(prox.getPort())),
                        new UsernamePasswordCredentials(prox.getUsername(), prox.getPassword()));
                builder.setDefaultCredentialsProvider(basicAuthCreds);
            }

            HttpHost proxy = new HttpHost(prox.getAddress(), Integer.parseInt(prox.getPort()));
            DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);
            builder.setRoutePlanner(routePlanner);

            // save start time for debugging speed impact of request config
            // build
            long buildStart = System.currentTimeMillis();

            // set request config to new configuration with dynamic proxy
            reqConfig = requestConfigBuilder.build();

            LOG.debug(
                    "time to build http request config with proxy: {}ms",
                    System.currentTimeMillis() - buildStart);

            LOG.debug("fetching with " + prox.toString());
        }

        HttpRequestBase request = new HttpGet(url);
        ResponseHandler<ProtocolResponse> responseHandler = this;

        if (md != null) {

            addHeadersToRequest(request, md);

            String useHead = md.getFirstValue("http.method.head");
            if (Boolean.parseBoolean(useHead)) {
                request = new HttpHead(url);
            }

            String lastModified = md.getFirstValue(HttpHeaders.LAST_MODIFIED);
            if (StringUtils.isNotBlank(lastModified)) {
                request.addHeader("If-Modified-Since", HttpHeaders.formatHttpDate(lastModified));
            }

            String ifNoneMatch = md.getFirstValue("etag", protocolMDprefix);
            if (StringUtils.isNotBlank(ifNoneMatch)) {
                request.addHeader("If-None-Match", ifNoneMatch);
            }

            String accept = md.getFirstValue("http.accept");
            if (StringUtils.isNotBlank(accept)) {
                request.setHeader(new BasicHeader("Accept", accept));
            }

            String acceptLanguage = md.getFirstValue("http.accept.language");
            if (StringUtils.isNotBlank(acceptLanguage)) {
                request.setHeader(new BasicHeader("Accept-Language", acceptLanguage));
            }

            String pageMaxContentStr = md.getFirstValue("http.content.limit");
            if (StringUtils.isNotBlank(pageMaxContentStr)) {
                try {
                    int pageMaxContent = Integer.parseInt(pageMaxContentStr);
                    responseHandler = getResponseHandlerWithContentLimit(pageMaxContent);
                } catch (NumberFormatException e) {
                    LOG.warn("Invalid http.content.limit in metadata: {}", pageMaxContentStr);
                }
            }

            if (useCookies) {
                addCookiesToRequest(request, md);
            }
        }

        request.setConfig(reqConfig);

        // no need to release the connection explicitly as this is handled
        // automatically. The client itself must be closed though.
        try (CloseableHttpClient httpclient = builder.build()) {
            return httpclient.execute(request, responseHandler);
        }
    }