in core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java [287:436]
public ProtocolResponse getProtocolOutput(String url, final Metadata metadata)
throws Exception {
// create default local client
OkHttpClient localClient = client;
// conditionally add a dynamic proxy
if (proxyManager != null) {
// retrieve proxy from proxy manager
SCProxy prox = proxyManager.getProxy(metadata);
// conditionally configure proxy authentication
if (StringUtils.isNotBlank(prox.getAddress())) {
// format SCProxy into native Java proxy
Proxy proxy =
new Proxy(
Proxy.Type.valueOf(prox.getProtocol().toUpperCase(Locale.ROOT)),
new InetSocketAddress(
prox.getAddress(), Integer.parseInt(prox.getPort())));
// set proxy in builder
builder.proxy(proxy);
// conditionally add proxy authentication
if (StringUtils.isNotBlank(prox.getUsername())) {
// add proxy authentication header to builder
builder.proxyAuthenticator(
(Route route, Response response) -> {
String credential =
Credentials.basic(prox.getUsername(), prox.getPassword());
return response.request()
.newBuilder()
.header("Proxy-Authorization", credential)
.build();
});
}
}
// save start time for debugging speed impact of client build
long buildStart = System.currentTimeMillis();
// create new local client from builder using proxy
localClient = builder.build();
LOG.debug(
"time to build okhttp client with proxy: {}ms",
System.currentTimeMillis() - buildStart);
LOG.debug("fetching with proxy {} - {} ", url, prox.toString());
}
final Builder rb = new Request.Builder().url(url);
customRequestHeaders.forEach(
(k) -> {
rb.header(k.getKey(), k.getValue());
});
int pageMaxContent = globalMaxContent;
if (metadata != null) {
addHeadersToRequest(rb, metadata);
final String lastModified = metadata.getFirstValue(HttpHeaders.LAST_MODIFIED);
if (StringUtils.isNotBlank(lastModified)) {
rb.header("If-Modified-Since", HttpHeaders.formatHttpDate(lastModified));
}
final String ifNoneMatch = metadata.getFirstValue("etag", protocolMDprefix);
if (StringUtils.isNotBlank(ifNoneMatch)) {
rb.header("If-None-Match", ifNoneMatch);
}
final String accept = metadata.getFirstValue("http.accept");
if (StringUtils.isNotBlank(accept)) {
rb.header("Accept", accept);
}
final String acceptLanguage = metadata.getFirstValue("http.accept.language");
if (StringUtils.isNotBlank(acceptLanguage)) {
rb.header("Accept-Language", acceptLanguage);
}
final String pageMaxContentStr = metadata.getFirstValue("http.content.limit");
if (StringUtils.isNotBlank(pageMaxContentStr)) {
try {
pageMaxContent = Integer.parseInt(pageMaxContentStr);
} catch (NumberFormatException e) {
LOG.warn("Invalid http.content.limit in metadata: {}", pageMaxContentStr);
}
}
if (useCookies) {
addCookiesToRequest(rb, url, metadata);
}
final String postJSONData = metadata.getFirstValue("http.post.json");
if (StringUtils.isNotBlank(postJSONData)) {
RequestBody body = RequestBody.create(postJSONData, JSON);
rb.post(body);
}
final String useHead = metadata.getFirstValue("http.method.head");
if (Boolean.parseBoolean(useHead)) {
rb.head();
}
}
final Request request = rb.build();
final Call call = localClient.newCall(request);
try (Response response = call.execute()) {
final Metadata responsemetadata = new Metadata();
final Headers headers = response.headers();
for (int i = 0, size = headers.size(); i < size; i++) {
final String key = headers.name(i);
String value = headers.value(i);
if (key.equals(ProtocolResponse.REQUEST_HEADERS_KEY)
|| key.equals(ProtocolResponse.RESPONSE_HEADERS_KEY)) {
value =
new String(
Base64.getDecoder().decode(value), StandardCharsets.ISO_8859_1);
}
responsemetadata.addValue(key.toLowerCase(Locale.ROOT), value);
}
final MutableObject trimmed = new MutableObject(TrimmedContentReason.NOT_TRIMMED);
final byte[] bytes = toByteArray(response.body(), pageMaxContent, trimmed);
if (trimmed.getValue() != TrimmedContentReason.NOT_TRIMMED) {
if (!call.isCanceled()) {
call.cancel();
}
responsemetadata.setValue(ProtocolResponse.TRIMMED_RESPONSE_KEY, "true");
responsemetadata.setValue(
ProtocolResponse.TRIMMED_RESPONSE_REASON_KEY,
trimmed.getValue().toString().toLowerCase(Locale.ROOT));
LOG.warn("HTTP content trimmed to {}", bytes.length);
}
final Long DNSResolution = DNStimes.remove(call.toString());
if (DNSResolution != null) {
responsemetadata.setValue("metrics.dns.resolution.msec", DNSResolution.toString());
}
return new ProtocolResponse(bytes, response.code(), responsemetadata);
}
}