src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java [68:274]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  private StringBuffer httpHeaders;

  protected enum Scheme {
    HTTP, HTTPS,
  }

  /** The nutch configuration */
  private Configuration conf = null;

  public HttpResponse(Http http, URL url, CrawlDatum datum)
      throws ProtocolException, IOException {

    this.conf = http.getConf();
    this.http = http;
    this.url = url;
    this.orig = url.toString();
    this.base = url.toString();
    Scheme scheme = null;

    if ("http".equals(url.getProtocol())) {
      scheme = Scheme.HTTP;
    } else if ("https".equals(url.getProtocol())) {
      scheme = Scheme.HTTPS;
    } else {
      throw new HttpException("Unknown scheme (not http/https) for url:" + url);
    }

    if (Http.LOG.isTraceEnabled()) {
      Http.LOG.trace("fetching " + url);
    }

    String path = "".equals(url.getFile()) ? "/" : url.getFile();

    // some servers will redirect a request with a host line like
    // "Host: <hostname>:80" to "http://<hpstname>/<orig_path>"- they
    // don't want the :80...

    String host = url.getHost();
    int port;
    String portString;
    if (url.getPort() == -1) {
      if (scheme == Scheme.HTTP) {
        port = 80;
      } else {
        port = 443;
      }
      portString = "";
    } else {
      port = url.getPort();
      portString = ":" + port;
    }
    Socket socket = null;

    try {
      socket = new Socket(); // create the socket
      socket.setSoTimeout(http.getTimeout());

      // connect
      String sockHost = http.useProxy(url) ? http.getProxyHost() : host;
      int sockPort = http.useProxy(url) ? http.getProxyPort() : port;
      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
      socket.connect(sockAddr, http.getTimeout());

      if (scheme == Scheme.HTTPS) {

        // Optionally skip TLS/SSL certificate validation
        SSLSocketFactory factory;
        if (http.isTlsCheckCertificates()) {
          factory = (SSLSocketFactory) SSLSocketFactory.getDefault();
        } else {
          SSLContext sslContext = SSLContext.getInstance("TLS");
          sslContext.init(null,
              new TrustManager[] { new DummyX509TrustManager(null) }, null);
          factory = sslContext.getSocketFactory();
        }

        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, sockHost,
            sockPort, true);
        sslsocket.setUseClientMode(true);

        // Get the protocols and ciphers supported by this JVM
        Set<String> protocols = new HashSet<String>(
            Arrays.asList(sslsocket.getSupportedProtocols()));
        Set<String> ciphers = new HashSet<String>(
            Arrays.asList(sslsocket.getSupportedCipherSuites()));

        // Intersect with preferred protocols and ciphers
        protocols.retainAll(http.getTlsPreferredProtocols());
        ciphers.retainAll(http.getTlsPreferredCipherSuites());

        sslsocket.setEnabledProtocols(
            protocols.toArray(new String[protocols.size()]));
        sslsocket.setEnabledCipherSuites(
            ciphers.toArray(new String[ciphers.size()]));

        sslsocket.startHandshake();
        socket = sslsocket;
      }

      if (sockAddr != null
          && conf.getBoolean("store.ip.address", false) == true) {
        headers.add("_ip_", sockAddr.getAddress().getHostAddress());
      }
      // make request
      OutputStream req = socket.getOutputStream();

      StringBuffer reqStr = new StringBuffer("GET ");
      if (http.useProxy(url)) {
        reqStr.append(url.getProtocol() + "://" + host + portString + path);
      } else {
        reqStr.append(path);
      }

      reqStr.append(" HTTP/1.0\r\n");

      reqStr.append("Host: ");
      reqStr.append(host);
      reqStr.append(portString);
      reqStr.append("\r\n");

      reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n");

      String userAgent = http.getUserAgent();
      if ((userAgent == null) || (userAgent.length() == 0)) {
        if (Http.LOG.isErrorEnabled()) {
          Http.LOG.error("User-agent is not set!");
        }
      } else {
        reqStr.append("User-Agent: ");
        reqStr.append(userAgent);
        reqStr.append("\r\n");
      }

      String acceptLanguage = http.getAcceptLanguage();
      if (!acceptLanguage.isEmpty()) {
        reqStr.append("Accept-Language: ");
        reqStr.append(acceptLanguage);
        reqStr.append("\r\n");
      }

      String acceptCharset = http.getAcceptCharset();
      if (!acceptCharset.isEmpty()) {
        reqStr.append("Accept-Charset: ");
        reqStr.append(acceptCharset);
        reqStr.append("\r\n");
      }

      String accept = http.getAccept();
      if (!accept.isEmpty()) {
        reqStr.append("Accept: ");
        reqStr.append(accept);
        reqStr.append("\r\n");
      }

      if (http.isCookieEnabled()
          && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
        String cookie = ((Text) datum.getMetaData().get(HttpBase.COOKIE))
            .toString();
        reqStr.append("Cookie: ");
        reqStr.append(cookie);
        reqStr.append("\r\n");
      }

      if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        reqStr.append("If-Modified-Since: "
            + HttpDateFormat.toString(datum.getModifiedTime()));
        reqStr.append("\r\n");
      }
      reqStr.append("\r\n");

      // store the request in the metadata?
      if (conf.getBoolean("store.http.request", false) == true) {
        headers.add("_request_", reqStr.toString());
      }

      byte[] reqBytes = reqStr.toString().getBytes();

      req.write(reqBytes);
      req.flush();

      PushbackInputStream in = // process response
          new PushbackInputStream(new BufferedInputStream(
              socket.getInputStream(), Http.BUFFER_SIZE), Http.BUFFER_SIZE);

      StringBuffer line = new StringBuffer();

      // store the http headers verbatim
      if (conf.getBoolean("store.http.headers", false) == true) {
        httpHeaders = new StringBuffer();
      }

      headers.add("nutch.fetch.time",
          Long.toString(System.currentTimeMillis()));

      boolean haveSeenNonContinueStatus = false;
      while (!haveSeenNonContinueStatus) {
        // parse status code line
        this.code = parseStatusLine(in, line);
        if (httpHeaders != null)
          httpHeaders.append(line).append("\n");
        // parse headers
        parseHeaders(in, line);
        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
      }

      // Get Content type header
      String contentType = getHeader(Response.CONTENT_TYPE);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java [63:269]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  private StringBuffer httpHeaders;

  protected enum Scheme {
    HTTP, HTTPS,
  }

  /** The nutch configuration */
  private Configuration conf = null;

  public HttpResponse(Http http, URL url, CrawlDatum datum)
      throws ProtocolException, IOException {

    this.conf = http.getConf();
    this.http = http;
    this.url = url;
    this.orig = url.toString();
    this.base = url.toString();
    Scheme scheme = null;

    if ("http".equals(url.getProtocol())) {
      scheme = Scheme.HTTP;
    } else if ("https".equals(url.getProtocol())) {
      scheme = Scheme.HTTPS;
    } else {
      throw new HttpException("Unknown scheme (not http/https) for url:" + url);
    }

    if (Http.LOG.isTraceEnabled()) {
      Http.LOG.trace("fetching " + url);
    }

    String path = "".equals(url.getFile()) ? "/" : url.getFile();

    // some servers will redirect a request with a host line like
    // "Host: <hostname>:80" to "http://<hpstname>/<orig_path>"- they
    // don't want the :80...

    String host = url.getHost();
    int port;
    String portString;
    if (url.getPort() == -1) {
      if (scheme == Scheme.HTTP) {
        port = 80;
      } else {
        port = 443;
      }
      portString = "";
    } else {
      port = url.getPort();
      portString = ":" + port;
    }
    Socket socket = null;

    try {
      socket = new Socket(); // create the socket
      socket.setSoTimeout(http.getTimeout());

      // connect
      String sockHost = http.useProxy(url) ? http.getProxyHost() : host;
      int sockPort = http.useProxy(url) ? http.getProxyPort() : port;
      InetSocketAddress sockAddr = new InetSocketAddress(sockHost, sockPort);
      socket.connect(sockAddr, http.getTimeout());

      if (scheme == Scheme.HTTPS) {

        // Optionally skip TLS/SSL certificate validation
        SSLSocketFactory factory;
        if (http.isTlsCheckCertificates()) {
          factory = (SSLSocketFactory) SSLSocketFactory.getDefault();
        } else {
          SSLContext sslContext = SSLContext.getInstance("TLS");
          sslContext.init(null,
              new TrustManager[] { new DummyX509TrustManager(null) }, null);
          factory = sslContext.getSocketFactory();
        }

        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, sockHost,
            sockPort, true);
        sslsocket.setUseClientMode(true);

        // Get the protocols and ciphers supported by this JVM
        Set<String> protocols = new HashSet<String>(
            Arrays.asList(sslsocket.getSupportedProtocols()));
        Set<String> ciphers = new HashSet<String>(
            Arrays.asList(sslsocket.getSupportedCipherSuites()));

        // Intersect with preferred protocols and ciphers
        protocols.retainAll(http.getTlsPreferredProtocols());
        ciphers.retainAll(http.getTlsPreferredCipherSuites());

        sslsocket.setEnabledProtocols(
            protocols.toArray(new String[protocols.size()]));
        sslsocket.setEnabledCipherSuites(
            ciphers.toArray(new String[ciphers.size()]));

        sslsocket.startHandshake();
        socket = sslsocket;
      }

      if (sockAddr != null
          && conf.getBoolean("store.ip.address", false) == true) {
        headers.add("_ip_", sockAddr.getAddress().getHostAddress());
      }
      // make request
      OutputStream req = socket.getOutputStream();

      StringBuffer reqStr = new StringBuffer("GET ");
      if (http.useProxy(url)) {
        reqStr.append(url.getProtocol() + "://" + host + portString + path);
      } else {
        reqStr.append(path);
      }

      reqStr.append(" HTTP/1.0\r\n");

      reqStr.append("Host: ");
      reqStr.append(host);
      reqStr.append(portString);
      reqStr.append("\r\n");

      reqStr.append("Accept-Encoding: x-gzip, gzip, deflate\r\n");

      String userAgent = http.getUserAgent();
      if ((userAgent == null) || (userAgent.length() == 0)) {
        if (Http.LOG.isErrorEnabled()) {
          Http.LOG.error("User-agent is not set!");
        }
      } else {
        reqStr.append("User-Agent: ");
        reqStr.append(userAgent);
        reqStr.append("\r\n");
      }

      String acceptLanguage = http.getAcceptLanguage();
      if (!acceptLanguage.isEmpty()) {
        reqStr.append("Accept-Language: ");
        reqStr.append(acceptLanguage);
        reqStr.append("\r\n");
      }

      String acceptCharset = http.getAcceptCharset();
      if (!acceptCharset.isEmpty()) {
        reqStr.append("Accept-Charset: ");
        reqStr.append(acceptCharset);
        reqStr.append("\r\n");
      }

      String accept = http.getAccept();
      if (!accept.isEmpty()) {
        reqStr.append("Accept: ");
        reqStr.append(accept);
        reqStr.append("\r\n");
      }

      if (http.isCookieEnabled()
          && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
        String cookie = ((Text) datum.getMetaData().get(HttpBase.COOKIE))
            .toString();
        reqStr.append("Cookie: ");
        reqStr.append(cookie);
        reqStr.append("\r\n");
      }

      if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
        reqStr.append("If-Modified-Since: "
            + HttpDateFormat.toString(datum.getModifiedTime()));
        reqStr.append("\r\n");
      }
      reqStr.append("\r\n");

      // store the request in the metadata?
      if (conf.getBoolean("store.http.request", false) == true) {
        headers.add("_request_", reqStr.toString());
      }

      byte[] reqBytes = reqStr.toString().getBytes();

      req.write(reqBytes);
      req.flush();

      PushbackInputStream in = // process response
          new PushbackInputStream(new BufferedInputStream(
              socket.getInputStream(), Http.BUFFER_SIZE), Http.BUFFER_SIZE);

      StringBuffer line = new StringBuffer();

      // store the http headers verbatim
      if (conf.getBoolean("store.http.headers", false) == true) {
        httpHeaders = new StringBuffer();
      }

      headers.add("nutch.fetch.time",
          Long.toString(System.currentTimeMillis()));

      boolean haveSeenNonContinueStatus = false;
      while (!haveSeenNonContinueStatus) {
        // parse status code line
        this.code = parseStatusLine(in, line);
        if (httpHeaders != null)
          httpHeaders.append(line).append("\n");
        // parse headers
        parseHeaders(in, line);
        haveSeenNonContinueStatus = code != 100; // 100 is "Continue"
      }

      // Get Content type header
      String contentType = getHeader(Response.CONTENT_TYPE);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



