public void map()

in src/java/org/apache/nutch/crawl/LinkDb.java [103:171]


   public void map(Text key, ParseData parseData,
            Context context)
                    throws IOException, InterruptedException {
      String fromUrl = key.toString();
      String fromHost = getHost(fromUrl);
      if (urlNormalizers != null) {
        try {
          fromUrl = urlNormalizers
                  .normalize(fromUrl, URLNormalizers.SCOPE_LINKDB); // normalize the url
        } catch (Exception e) {
          LOG.warn("Skipping {} :", fromUrl, e);
          fromUrl = null;
        }
      }
      if (fromUrl != null && urlFilters != null) {
        try {
          fromUrl = urlFilters.filter(fromUrl); // filter the url
        } catch (Exception e) {
          LOG.warn("Skipping {} :", fromUrl, e);
          fromUrl = null;
        }
      }
      if (fromUrl == null)
        return; // discard all outlinks
      Outlink[] outlinks = parseData.getOutlinks();
      Inlinks inlinks = new Inlinks();
      for (int i = 0; i < outlinks.length; i++) {
        Outlink outlink = outlinks[i];
        String toUrl = outlink.getToUrl();

        if (ignoreInternalLinks) {
          String toHost = getHost(toUrl);
          if (toHost == null || toHost.equals(fromHost)) { // internal link
            continue; // skip it
          }
        } else if (ignoreExternalLinks) {
          String toHost = getHost(toUrl);
          if (toHost == null || !toHost.equals(fromHost)) { // external link skip it
            continue;
          }
        }
        if (urlNormalizers != null) {
          try {
            // normalize the url
            toUrl = urlNormalizers.normalize(toUrl, URLNormalizers.SCOPE_LINKDB); 
          } catch (Exception e) {
            LOG.warn("Skipping {} :", toUrl, e);
            toUrl = null;
          }
        }
        if (toUrl != null && urlFilters != null) {
          try {
            toUrl = urlFilters.filter(toUrl); // filter the url
          } catch (Exception e) {
            LOG.warn("Skipping {} :", toUrl, e);
            toUrl = null;
          }
        }
        if (toUrl == null)
          continue;
        inlinks.clear();
        String anchor = outlink.getAnchor(); // truncate long anchors
        if (anchor.length() > maxAnchorLength) {
          anchor = anchor.substring(0, maxAnchorLength);
        }
        inlinks.add(new Inlink(fromUrl, anchor)); // collect inverted link
        context.write(new Text(toUrl), inlinks);
      }
    }