in src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java [208:378]
public void setConf(Configuration conf) {
this.conf = conf;
this.proxyHost = conf.get("http.proxy.host");
this.proxyPort = conf.getInt("http.proxy.port", 8080);
this.proxyType = Proxy.Type.valueOf(conf.get("http.proxy.type", "HTTP"));
this.proxyException = arrayToMap(
conf.getStrings("http.proxy.exception.list"));
this.useProxy = (this.proxyHost != null && this.proxyHost.length() > 0);
this.timeout = conf.getInt("http.timeout", 10000);
this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
this.maxDuration = conf.getInt("http.time.limit", -1);
this.partialAsTruncated = conf.getBoolean("http.partial.truncated", false);
this.userAgent = getAgentString(conf.get("http.agent.name"),
conf.get("http.agent.version"), conf.get("http.agent.description"),
conf.get("http.agent.url"), conf.get("http.agent.email"));
this.acceptLanguage = conf.get("http.accept.language", this.acceptLanguage)
.trim();
this.acceptCharset = conf.get("http.accept.charset", this.acceptCharset).trim();
this.accept = conf.get("http.accept", this.accept).trim();
this.mimeTypes = new MimeUtil(conf);
// backward-compatible default setting
this.useHttp11 = conf.getBoolean("http.useHttp11", true);
this.useHttp2 = conf.getBoolean("http.useHttp2", false);
this.tlsCheckCertificate = conf.getBoolean("http.tls.certificates.check",
false);
this.responseTime = conf.getBoolean("http.store.responsetime", true);
this.storeIPAddress = conf.getBoolean("store.ip.address", false);
this.storeHttpRequest = conf.getBoolean("store.http.request", false);
this.storeHttpHeaders = conf.getBoolean("store.http.headers", false);
this.enableIfModifiedsinceHeader = conf
.getBoolean("http.enable.if.modified.since.header", true);
this.enableCookieHeader = conf.getBoolean("http.enable.cookie.header",
true);
this.robots.setConf(conf);
this.logUtil.setConf(conf);
// NUTCH-1941: read list of alternating agent names
if (conf.getBoolean("http.agent.rotate", false)) {
String agentsFile = conf.get("http.agent.rotate.file", "agents.txt");
@SuppressWarnings("resource")
BufferedReader br = null;
try {
Reader reader = conf.getConfResourceAsReader(agentsFile);
br = new BufferedReader(reader);
this.userAgentNames = new ArrayList<String>();
String word = "";
while ((word = br.readLine()) != null) {
if (!word.trim().isEmpty())
this.userAgentNames.add(word.trim());
}
if (this.userAgentNames.size() == 0) {
this.logger.warn("Empty list of user agents in http.agent.rotate.file {}",
agentsFile);
this.userAgentNames = null;
}
} catch (Exception e) {
this.logger.warn("Failed to read http.agent.rotate.file {}: {}", agentsFile,
StringUtils.stringifyException(e));
this.userAgentNames = null;
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
// ignore
}
}
}
if (this.userAgentNames == null) {
this.logger.warn(
"Falling back to fixed user agent set via property http.agent.name");
}
}
// If cookies are enabled, try to load a per-host cookie file
if (this.enableCookieHeader) {
String cookieFile = conf.get("http.agent.host.cookie.file",
"cookies.txt");
@SuppressWarnings("resource")
BufferedReader br = null;
try {
Reader reader = conf.getConfResourceAsReader(cookieFile);
br = new BufferedReader(reader);
this.hostCookies = new HashMap<String, String>();
String word = "";
while ((word = br.readLine()) != null) {
if (!word.trim().isEmpty()) {
if (word.indexOf("#") == -1) { // skip comment
String[] parts = word.split("\t");
if (parts.length == 2) {
this.hostCookies.put(parts[0], parts[1]);
} else {
LOG.warn("Unable to parse cookie file correctly at: " + word);
}
}
}
}
} catch (Exception e) {
this.logger.warn("Failed to read http.agent.host.cookie.file {}: {}",
cookieFile, StringUtils.stringifyException(e));
this.hostCookies = null;
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
// ignore
}
}
}
}
String[] protocols = conf.getStrings("http.tls.supported.protocols",
"TLSv1.3", "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites",
"ECDHE-ECDSA-AES128-GCM-SHA256", "ECDHE-RSA-AES128-GCM-SHA256",
"ECDHE-ECDSA-AES256-GCM-SHA384", "ECDHE-RSA-AES256-GCM-SHA384",
"ECDHE-ECDSA-CHACHA20-POLY1305", "ECDHE-RSA-CHACHA20-POLY1305",
"DHE-RSA-AES128-GCM-SHA256", "DHE-RSA-AES256-GCM-SHA384",
"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
"TLS_RSA_WITH_AES_256_CBC_SHA256",
"TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",
"TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
"TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
"TLS_DHE_DSS_WITH_AES_256_CBC_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA",
"TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
"TLS_ECDH_RSA_WITH_AES_256_CBC_SHA", "TLS_DHE_RSA_WITH_AES_256_CBC_SHA",
"TLS_DHE_DSS_WITH_AES_256_CBC_SHA",
"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
"TLS_RSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",
"TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",
"TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
"TLS_DHE_DSS_WITH_AES_128_CBC_SHA256",
"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_128_CBC_SHA",
"TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",
"TLS_ECDH_RSA_WITH_AES_128_CBC_SHA", "TLS_DHE_RSA_WITH_AES_128_CBC_SHA",
"TLS_DHE_DSS_WITH_AES_128_CBC_SHA", "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
"TLS_ECDHE_RSA_WITH_RC4_128_SHA", "SSL_RSA_WITH_RC4_128_SHA",
"TLS_ECDH_ECDSA_WITH_RC4_128_SHA", "TLS_ECDH_RSA_WITH_RC4_128_SHA",
"TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",
"TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_3DES_EDE_CBC_SHA",
"TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA",
"TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",
"SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA",
"SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_RC4_128_MD5",
"TLS_EMPTY_RENEGOTIATION_INFO_SCSV", "TLS_RSA_WITH_NULL_SHA256",
"TLS_ECDHE_ECDSA_WITH_NULL_SHA", "TLS_ECDHE_RSA_WITH_NULL_SHA",
"SSL_RSA_WITH_NULL_SHA", "TLS_ECDH_ECDSA_WITH_NULL_SHA",
"TLS_ECDH_RSA_WITH_NULL_SHA", "SSL_RSA_WITH_NULL_MD5",
"SSL_RSA_WITH_DES_CBC_SHA", "SSL_DHE_RSA_WITH_DES_CBC_SHA",
"SSL_DHE_DSS_WITH_DES_CBC_SHA", "TLS_KRB5_WITH_RC4_128_SHA",
"TLS_KRB5_WITH_RC4_128_MD5", "TLS_KRB5_WITH_3DES_EDE_CBC_SHA",
"TLS_KRB5_WITH_3DES_EDE_CBC_MD5", "TLS_KRB5_WITH_DES_CBC_SHA",
"TLS_KRB5_WITH_DES_CBC_MD5", "TLS_AES_256_GCM_SHA384",
"TLS_CHACHA20_POLY1305_SHA256", "TLS_AES_128_GCM_SHA256",
"TLS_AES_128_CCM_8_SHA256", "TLS_AES_128_CCM_SHA256");
this.tlsPreferredProtocols = new HashSet<String>(Arrays.asList(protocols));
this.tlsPreferredCipherSuites = new HashSet<String>(Arrays.asList(ciphers));
logConf();
}