public void setConf()

in src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java [208:378]


  public void setConf(Configuration conf) {
    this.conf = conf;
    this.proxyHost = conf.get("http.proxy.host");
    this.proxyPort = conf.getInt("http.proxy.port", 8080);
    this.proxyType = Proxy.Type.valueOf(conf.get("http.proxy.type", "HTTP"));
    this.proxyException = arrayToMap(
        conf.getStrings("http.proxy.exception.list"));
    this.useProxy = (this.proxyHost != null && this.proxyHost.length() > 0);
    this.timeout = conf.getInt("http.timeout", 10000);
    this.maxContent = conf.getInt("http.content.limit", 1024 * 1024);
    this.maxDuration = conf.getInt("http.time.limit", -1);
    this.partialAsTruncated = conf.getBoolean("http.partial.truncated", false);
    this.userAgent = getAgentString(conf.get("http.agent.name"),
        conf.get("http.agent.version"), conf.get("http.agent.description"),
        conf.get("http.agent.url"), conf.get("http.agent.email"));
    this.acceptLanguage = conf.get("http.accept.language", this.acceptLanguage)
        .trim();
    this.acceptCharset = conf.get("http.accept.charset", this.acceptCharset).trim();
    this.accept = conf.get("http.accept", this.accept).trim();
    this.mimeTypes = new MimeUtil(conf);
    // backward-compatible default setting
    this.useHttp11 = conf.getBoolean("http.useHttp11", true);
    this.useHttp2 = conf.getBoolean("http.useHttp2", false);
    this.tlsCheckCertificate = conf.getBoolean("http.tls.certificates.check",
        false);
    this.responseTime = conf.getBoolean("http.store.responsetime", true);
    this.storeIPAddress = conf.getBoolean("store.ip.address", false);
    this.storeHttpRequest = conf.getBoolean("store.http.request", false);
    this.storeHttpHeaders = conf.getBoolean("store.http.headers", false);
    this.enableIfModifiedsinceHeader = conf
        .getBoolean("http.enable.if.modified.since.header", true);
    this.enableCookieHeader = conf.getBoolean("http.enable.cookie.header",
        true);
    this.robots.setConf(conf);

    this.logUtil.setConf(conf);

    // NUTCH-1941: read list of alternating agent names
    if (conf.getBoolean("http.agent.rotate", false)) {
      String agentsFile = conf.get("http.agent.rotate.file", "agents.txt");
      @SuppressWarnings("resource")
      BufferedReader br = null;
      try {
        Reader reader = conf.getConfResourceAsReader(agentsFile);
        br = new BufferedReader(reader);
        this.userAgentNames = new ArrayList<String>();
        String word = "";
        while ((word = br.readLine()) != null) {
          if (!word.trim().isEmpty())
            this.userAgentNames.add(word.trim());
        }

        if (this.userAgentNames.size() == 0) {
          this.logger.warn("Empty list of user agents in http.agent.rotate.file {}",
              agentsFile);
          this.userAgentNames = null;
        }

      } catch (Exception e) {
        this.logger.warn("Failed to read http.agent.rotate.file {}: {}", agentsFile,
            StringUtils.stringifyException(e));
        this.userAgentNames = null;
      } finally {
        if (br != null) {
          try {
            br.close();
          } catch (IOException e) {
            // ignore
          }
        }
      }
      if (this.userAgentNames == null) {
        this.logger.warn(
            "Falling back to fixed user agent set via property http.agent.name");
      }
    }

    // If cookies are enabled, try to load a per-host cookie file
    if (this.enableCookieHeader) {
      String cookieFile = conf.get("http.agent.host.cookie.file",
          "cookies.txt");
      @SuppressWarnings("resource")
      BufferedReader br = null;
      try {
        Reader reader = conf.getConfResourceAsReader(cookieFile);
        br = new BufferedReader(reader);
        this.hostCookies = new HashMap<String, String>();
        String word = "";
        while ((word = br.readLine()) != null) {
          if (!word.trim().isEmpty()) {
            if (word.indexOf("#") == -1) { // skip comment
              String[] parts = word.split("\t");
              if (parts.length == 2) {
                this.hostCookies.put(parts[0], parts[1]);
              } else {
                LOG.warn("Unable to parse cookie file correctly at: " + word);
              }
            }
          }
        }
      } catch (Exception e) {
        this.logger.warn("Failed to read http.agent.host.cookie.file {}: {}",
            cookieFile, StringUtils.stringifyException(e));
        this.hostCookies = null;
      } finally {
        if (br != null) {
          try {
            br.close();
          } catch (IOException e) {
            // ignore
          }
        }
      }
    }

    String[] protocols = conf.getStrings("http.tls.supported.protocols",
        "TLSv1.3", "TLSv1.2", "TLSv1.1", "TLSv1", "SSLv3");
    String[] ciphers = conf.getStrings("http.tls.supported.cipher.suites",
        "ECDHE-ECDSA-AES128-GCM-SHA256", "ECDHE-RSA-AES128-GCM-SHA256",
        "ECDHE-ECDSA-AES256-GCM-SHA384", "ECDHE-RSA-AES256-GCM-SHA384",
        "ECDHE-ECDSA-CHACHA20-POLY1305", "ECDHE-RSA-CHACHA20-POLY1305",
        "DHE-RSA-AES128-GCM-SHA256", "DHE-RSA-AES256-GCM-SHA384",
        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
        "TLS_RSA_WITH_AES_256_CBC_SHA256",
        "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384",
        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384",
        "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
        "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256",
        "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
        "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", "TLS_RSA_WITH_AES_256_CBC_SHA",
        "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA",
        "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA", "TLS_DHE_RSA_WITH_AES_256_CBC_SHA",
        "TLS_DHE_DSS_WITH_AES_256_CBC_SHA",
        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
        "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
        "TLS_RSA_WITH_AES_128_CBC_SHA256",
        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256",
        "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256",
        "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
        "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256",
        "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
        "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", "TLS_RSA_WITH_AES_128_CBC_SHA",
        "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA",
        "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA", "TLS_DHE_RSA_WITH_AES_128_CBC_SHA",
        "TLS_DHE_DSS_WITH_AES_128_CBC_SHA", "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
        "TLS_ECDHE_RSA_WITH_RC4_128_SHA", "SSL_RSA_WITH_RC4_128_SHA",
        "TLS_ECDH_ECDSA_WITH_RC4_128_SHA", "TLS_ECDH_RSA_WITH_RC4_128_SHA",
        "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",
        "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_3DES_EDE_CBC_SHA",
        "TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA",
        "TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA",
        "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA",
        "SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA", "SSL_RSA_WITH_RC4_128_MD5",
        "TLS_EMPTY_RENEGOTIATION_INFO_SCSV", "TLS_RSA_WITH_NULL_SHA256",
        "TLS_ECDHE_ECDSA_WITH_NULL_SHA", "TLS_ECDHE_RSA_WITH_NULL_SHA",
        "SSL_RSA_WITH_NULL_SHA", "TLS_ECDH_ECDSA_WITH_NULL_SHA",
        "TLS_ECDH_RSA_WITH_NULL_SHA", "SSL_RSA_WITH_NULL_MD5",
        "SSL_RSA_WITH_DES_CBC_SHA", "SSL_DHE_RSA_WITH_DES_CBC_SHA",
        "SSL_DHE_DSS_WITH_DES_CBC_SHA", "TLS_KRB5_WITH_RC4_128_SHA",
        "TLS_KRB5_WITH_RC4_128_MD5", "TLS_KRB5_WITH_3DES_EDE_CBC_SHA",
        "TLS_KRB5_WITH_3DES_EDE_CBC_MD5", "TLS_KRB5_WITH_DES_CBC_SHA",
        "TLS_KRB5_WITH_DES_CBC_MD5", "TLS_AES_256_GCM_SHA384",
        "TLS_CHACHA20_POLY1305_SHA256", "TLS_AES_128_GCM_SHA256",
        "TLS_AES_128_CCM_8_SHA256", "TLS_AES_128_CCM_SHA256");

    this.tlsPreferredProtocols = new HashSet<String>(Arrays.asList(protocols));
    this.tlsPreferredCipherSuites = new HashSet<String>(Arrays.asList(ciphers));

    logConf();
  }