public ProtocolResponse getProtocolOutput()

in core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java [287:436]


    public ProtocolResponse getProtocolOutput(String url, final Metadata metadata)
            throws Exception {
        // create default local client
        OkHttpClient localClient = client;

        // conditionally add a dynamic proxy
        if (proxyManager != null) {
            // retrieve proxy from proxy manager
            SCProxy prox = proxyManager.getProxy(metadata);

            // conditionally configure proxy authentication
            if (StringUtils.isNotBlank(prox.getAddress())) {
                // format SCProxy into native Java proxy
                Proxy proxy =
                        new Proxy(
                                Proxy.Type.valueOf(prox.getProtocol().toUpperCase(Locale.ROOT)),
                                new InetSocketAddress(
                                        prox.getAddress(), Integer.parseInt(prox.getPort())));

                // set proxy in builder
                builder.proxy(proxy);

                // conditionally add proxy authentication
                if (StringUtils.isNotBlank(prox.getUsername())) {
                    // add proxy authentication header to builder
                    builder.proxyAuthenticator(
                            (Route route, Response response) -> {
                                String credential =
                                        Credentials.basic(prox.getUsername(), prox.getPassword());
                                return response.request()
                                        .newBuilder()
                                        .header("Proxy-Authorization", credential)
                                        .build();
                            });
                }
            }

            // save start time for debugging speed impact of client build
            long buildStart = System.currentTimeMillis();

            // create new local client from builder using proxy
            localClient = builder.build();

            LOG.debug(
                    "time to build okhttp client with proxy: {}ms",
                    System.currentTimeMillis() - buildStart);

            LOG.debug("fetching with proxy {} - {} ", url, prox.toString());
        }

        final Builder rb = new Request.Builder().url(url);
        customRequestHeaders.forEach(
                (k) -> {
                    rb.header(k.getKey(), k.getValue());
                });

        int pageMaxContent = globalMaxContent;

        if (metadata != null) {
            addHeadersToRequest(rb, metadata);

            final String lastModified = metadata.getFirstValue(HttpHeaders.LAST_MODIFIED);
            if (StringUtils.isNotBlank(lastModified)) {
                rb.header("If-Modified-Since", HttpHeaders.formatHttpDate(lastModified));
            }

            final String ifNoneMatch = metadata.getFirstValue("etag", protocolMDprefix);
            if (StringUtils.isNotBlank(ifNoneMatch)) {
                rb.header("If-None-Match", ifNoneMatch);
            }

            final String accept = metadata.getFirstValue("http.accept");
            if (StringUtils.isNotBlank(accept)) {
                rb.header("Accept", accept);
            }

            final String acceptLanguage = metadata.getFirstValue("http.accept.language");
            if (StringUtils.isNotBlank(acceptLanguage)) {
                rb.header("Accept-Language", acceptLanguage);
            }

            final String pageMaxContentStr = metadata.getFirstValue("http.content.limit");
            if (StringUtils.isNotBlank(pageMaxContentStr)) {
                try {
                    pageMaxContent = Integer.parseInt(pageMaxContentStr);
                } catch (NumberFormatException e) {
                    LOG.warn("Invalid http.content.limit in metadata: {}", pageMaxContentStr);
                }
            }

            if (useCookies) {
                addCookiesToRequest(rb, url, metadata);
            }

            final String postJSONData = metadata.getFirstValue("http.post.json");
            if (StringUtils.isNotBlank(postJSONData)) {
                RequestBody body = RequestBody.create(postJSONData, JSON);
                rb.post(body);
            }

            final String useHead = metadata.getFirstValue("http.method.head");
            if (Boolean.parseBoolean(useHead)) {
                rb.head();
            }
        }

        final Request request = rb.build();

        final Call call = localClient.newCall(request);

        try (Response response = call.execute()) {

            final Metadata responsemetadata = new Metadata();
            final Headers headers = response.headers();

            for (int i = 0, size = headers.size(); i < size; i++) {
                final String key = headers.name(i);
                String value = headers.value(i);

                if (key.equals(ProtocolResponse.REQUEST_HEADERS_KEY)
                        || key.equals(ProtocolResponse.RESPONSE_HEADERS_KEY)) {
                    value =
                            new String(
                                    Base64.getDecoder().decode(value), StandardCharsets.ISO_8859_1);
                }

                responsemetadata.addValue(key.toLowerCase(Locale.ROOT), value);
            }

            final MutableObject trimmed = new MutableObject(TrimmedContentReason.NOT_TRIMMED);
            final byte[] bytes = toByteArray(response.body(), pageMaxContent, trimmed);
            if (trimmed.getValue() != TrimmedContentReason.NOT_TRIMMED) {
                if (!call.isCanceled()) {
                    call.cancel();
                }
                responsemetadata.setValue(ProtocolResponse.TRIMMED_RESPONSE_KEY, "true");
                responsemetadata.setValue(
                        ProtocolResponse.TRIMMED_RESPONSE_REASON_KEY,
                        trimmed.getValue().toString().toLowerCase(Locale.ROOT));
                LOG.warn("HTTP content trimmed to {}", bytes.length);
            }

            final Long DNSResolution = DNStimes.remove(call.toString());
            if (DNSResolution != null) {
                responsemetadata.setValue("metrics.dns.resolution.msec", DNSResolution.toString());
            }

            return new ProtocolResponse(bytes, response.code(), responsemetadata);
        }
    }