src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java [147:177]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                if (idx == -1) { // assume a mis-formatted entry with just the
                                 // url
                  idx = content.indexOf(';') + 1;
                } else
                  idx += 4;
                if (idx != -1) {
                  String url = content.substring(idx);
                  try {
                    refreshUrl = new URL(url);
                  } catch (Exception e) {
                    // XXX according to the spec, this has to be an absolute
                    // XXX url. However, many websites use relative URLs and
                    // XXX expect browsers to handle that.
                    // XXX Unfortunately, in some cases this may create a
                    // XXX infinitely recursive paths (a crawler trap)...
                    // if (!url.startsWith("/")) url = "/" + url;
                    try {
                      refreshUrl = new URL(currURL, url);
                    } catch (Exception e1) {
                      refreshUrl = null;
                    }
                  }
                }
              }
              if (metaTags.getRefresh()) {
                if (refreshUrl == null) {
                  // apparently only refresh time was present. set the URL
                  // to the same URL.
                  refreshUrl = currURL;
                }
                metaTags.setRefreshHref(refreshUrl);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/HTMLMetaProcessor.java [216:246]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      if (idx == -1) { // assume a mis-formatted entry with just the
                       // url
        idx = content.indexOf(';') + 1;
      } else
        idx += 4;
      if (idx != -1) {
        String url = content.substring(idx);
        try {
          refreshUrl = new URL(url);
        } catch (Exception e) {
          // XXX according to the spec, this has to be an absolute
          // XXX url. However, many websites use relative URLs and
          // XXX expect browsers to handle that.
          // XXX Unfortunately, in some cases this may create a
          // XXX infinitely recursive paths (a crawler trap)...
          // if (!url.startsWith("/")) url = "/" + url;
          try {
            refreshUrl = new URL(currURL, url);
          } catch (Exception e1) {
            refreshUrl = null;
          }
        }
      }
    }
    if (metaTags.getRefresh()) {
      if (refreshUrl == null) {
        // apparently only refresh time was present. set the URL
        // to the same URL.
        refreshUrl = currURL;
      }
      metaTags.setRefreshHref(refreshUrl);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -