in src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java [215:346]
public void setConf(Configuration conf) {
this.conf = conf;
this.proxyHost = conf.get("http.proxy.host");
this.proxyPort = conf.getInt("http.proxy.port", 8080);
this.proxyType = Proxy.Type.valueOf(conf.get("http.proxy.type", "HTTP"));
this.proxyException = arrayToMap(
conf.getStrings("http.proxy.exception.list"));
this.useProxy = (this.proxyHost != null && this.proxyHost.length() > 0);
this.timeout = conf.getInt("http.timeout", 10000);
this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
this.maxDuration = conf.getInt("http.time.limit", -1);
this.partialAsTruncated = conf.getBoolean("http.partial.truncated", false);
this.userAgent = getAgentString(conf.get("http.agent.name"),
conf.get("http.agent.version"), conf.get("http.agent.description"),
conf.get("http.agent.url"), conf.get("http.agent.email"));
this.acceptLanguage = conf.get("http.accept.language", this.acceptLanguage)
.trim();
this.acceptCharset = conf.get("http.accept.charset", this.acceptCharset).trim();
this.accept = conf.get("http.accept", this.accept).trim();
this.mimeTypes = new MimeUtil(conf);
// backward-compatible default setting
this.useHttp11 = conf.getBoolean("http.useHttp11", true);
this.useHttp2 = conf.getBoolean("http.useHttp2", false);
this.tlsCheckCertificate = conf.getBoolean("http.tls.certificates.check",
false);
this.responseTime = conf.getBoolean("http.store.responsetime", true);
this.storeIPAddress = conf.getBoolean("store.ip.address", false);
this.storeHttpRequest = conf.getBoolean("store.http.request", false);
this.storeHttpHeaders = conf.getBoolean("store.http.headers", false);
this.storeProtocolVersions = conf.getBoolean("store.protocol.versions", false);
this.enableIfModifiedsinceHeader = conf
.getBoolean("http.enable.if.modified.since.header", true);
this.enableCookieHeader = conf.getBoolean("http.enable.cookie.header",
true);
this.robots.setConf(conf);
this.logUtil.setConf(conf);
// NUTCH-1941: read list of alternating agent names
if (conf.getBoolean("http.agent.rotate", false)) {
String agentsFile = conf.get("http.agent.rotate.file", "agents.txt");
@SuppressWarnings("resource")
BufferedReader br = null;
try {
Reader reader = conf.getConfResourceAsReader(agentsFile);
br = new BufferedReader(reader);
this.userAgentNames = new ArrayList<String>();
String word = "";
while ((word = br.readLine()) != null) {
if (!word.trim().isEmpty())
this.userAgentNames.add(word.trim());
}
if (this.userAgentNames.size() == 0) {
this.logger.warn("Empty list of user agents in http.agent.rotate.file {}",
agentsFile);
this.userAgentNames = null;
}
} catch (Exception e) {
this.logger.warn("Failed to read http.agent.rotate.file {}: {}", agentsFile,
StringUtils.stringifyException(e));
this.userAgentNames = null;
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
// ignore
}
}
}
if (this.userAgentNames == null) {
this.logger.warn(
"Falling back to fixed user agent set via property http.agent.name");
}
}
// If cookies are enabled, try to load a per-host cookie file
if (this.enableCookieHeader) {
String cookieFile = conf.get("http.agent.host.cookie.file",
"cookies.txt");
@SuppressWarnings("resource")
BufferedReader br = null;
try {
Reader reader = conf.getConfResourceAsReader(cookieFile);
br = new BufferedReader(reader);
this.hostCookies = new HashMap<String, String>();
String word = "";
while ((word = br.readLine()) != null) {
if (!word.trim().isEmpty()) {
if (word.indexOf("#") == -1) { // skip comment
String[] parts = word.split("\t");
if (parts.length == 2) {
this.hostCookies.put(parts[0], parts[1]);
} else {
LOG.warn("Unable to parse cookie file correctly at: {}", word);
}
}
}
}
} catch (Exception e) {
this.logger.warn("Failed to read http.agent.host.cookie.file {}: {}",
cookieFile, StringUtils.stringifyException(e));
this.hostCookies = null;
} finally {
if (br != null) {
try {
br.close();
} catch (IOException e) {
// ignore
}
}
}
}
String[] protocols = conf.getStrings("http.tls.supported.protocols");
String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites");
if (protocols == null){
// use SSL3 or above by default
protocols = new String[] {"TLSv1.3", "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3"};
}
if (ciphers == null){
// use default ciphers by default unless manually specified otherwise in the config
ciphers = ((SSLSocketFactory) SSLSocketFactory.getDefault()).getDefaultCipherSuites();
}
this.tlsPreferredProtocols = new HashSet<>(Arrays.asList(protocols));
this.tlsPreferredCipherSuites = new HashSet<>(Arrays.asList(ciphers));
logConf();
}