public static final String fixHttpHeaders()

in src/java/org/apache/nutch/tools/WARCUtils.java [189:280]


  public static final String fixHttpHeaders(String headers, int contentLength) {
    if (headers==null) {
      return null;
    }
    int start = 0, lineEnd = 0, last = 0, trailingCrLf= 0;
    StringBuilder replace = new StringBuilder();
    while (start < headers.length()) {
      lineEnd = headers.indexOf(CRLF, start);
      trailingCrLf = 1;
      if (lineEnd == -1) {
        lineEnd = headers.length();
        trailingCrLf = 0;
      }
      int colonPos = -1;
      for (int i = start; i < lineEnd; i++) {
        if (headers.charAt(i) == ':') {
          colonPos = i;
          break;
        }
      }
      if (colonPos == -1) {
        boolean valid = true;
        if (start == 0) {
          // status line (without colon)
          // TODO: http/2
        } else if ((lineEnd + 4) == headers.length()
            && headers.endsWith(CRLF + CRLF)) {
          // ok, trailing empty line
          trailingCrLf = 2;
        } else {
          valid = false;
        }
        if (!valid) {
          if (last < start) {
            replace.append(headers.substring(last, start));
          }
          last = lineEnd + 2 * trailingCrLf;
        }
        start = lineEnd + 2 * trailingCrLf;
        /*
         * skip over invalid header line, no further check for problematic
         * headers required
         */
        continue;
      }
      String name = headers.substring(start, colonPos);
      if (PROBLEMATIC_HEADERS.matcher(name).matches()) {
        boolean needsFix = true;
        if (name.equalsIgnoreCase("content-length")) {
          String value = headers.substring(colonPos + 1, lineEnd).trim();
          try {
            int l = Integer.parseInt(value);
            if (l == contentLength) {
              needsFix = false;
            }
          } catch (NumberFormatException e) {
            // needs to be fixed
          }
        }
        if (needsFix) {
          if (last < start) {
            replace.append(headers.substring(last, start));
          }
          last = lineEnd + 2 * trailingCrLf;
          replace.append(X_HIDE_HEADER)
              .append(headers.substring(start, lineEnd + 2 * trailingCrLf));
          if (trailingCrLf == 0) {
            replace.append(CRLF);
            trailingCrLf = 1;
          }
          if (name.equalsIgnoreCase("content-length")) {
            // add effective uncompressed and unchunked length of content
            replace.append("Content-Length").append(COLONSP)
                .append(contentLength).append(CRLF);
          }
        }
      }
      start = lineEnd + 2 * trailingCrLf;
    }
    if (last > 0 || trailingCrLf != 2) {
      if (last < headers.length()) {
        // append trailing headers
        replace.append(headers.substring(last));
      }
      while (trailingCrLf < 2) {
        replace.append(CRLF);
        trailingCrLf++;
      }
      return replace.toString();
    }
    return headers;
  }