private ParseStatus output()

in src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java [159:260]


    private ParseStatus output(Context context,
        String segmentName, Text key, CrawlDatum datum, Content content,
        ProtocolStatus pstatus, int status) throws InterruptedException {

      // set the fetch status and the fetch time
      datum.setStatus(status);
      datum.setFetchTime(System.currentTimeMillis());
      if (pstatus != null)
        datum.getMetaData().put(Nutch.WRITABLE_PROTO_STATUS_KEY, pstatus);

      ParseResult parseResult = null;
      if (content != null) {
        Metadata metadata = content.getMetadata();
        // add segment to metadata
        metadata.set(Nutch.SEGMENT_NAME_KEY, segmentName);
        // add score to content metadata so that ParseSegment can pick it up.
        try {
          scfilters.passScoreBeforeParsing(key, datum, content);
        } catch (Exception e) {
          if (LOG.isWarnEnabled()) {
            LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
          }
        }

        try {

          // parse the content
          parseResult = parseUtil.parse(content);
        } catch (Exception e) {
          LOG.warn("Error parsing: " + key + ": "
              + StringUtils.stringifyException(e));
        }

        // set the content signature
        if (parseResult == null) {
          byte[] signature = SignatureFactory.getSignature(conf).calculate(
              content, new ParseStatus().getEmptyParse(conf));
          datum.setSignature(signature);
        } 
    
        if (parseResult == null) {
          byte[] signature = SignatureFactory.getSignature(conf).calculate(
              content, new ParseStatus().getEmptyParse(conf));
          datum.setSignature(signature);
        }

        try {
          context.write(key, new NutchWritable(datum));
          context.write(key, new NutchWritable(content));

          if (parseResult != null) {
            for (Entry<Text, Parse> entry : parseResult) {
              Text url = entry.getKey();
              Parse parse = entry.getValue();
              ParseStatus parseStatus = parse.getData().getStatus();

              if (!parseStatus.isSuccess()) {
                LOG.warn("Error parsing: " + key + ": " + parseStatus);
                parse = parseStatus.getEmptyParse(conf);
              }

              // Calculate page signature.
              byte[] signature = SignatureFactory.getSignature(conf)
                  .calculate(content, parse);
              // Ensure segment name and score are in parseData metadata
              parse.getData().getContentMeta()
                  .set(Nutch.SEGMENT_NAME_KEY, segmentName);
              parse.getData().getContentMeta()
                  .set(Nutch.SIGNATURE_KEY, StringUtil.toHexString(signature));
              // Pass fetch time to content meta
              parse.getData().getContentMeta()
                  .set(Nutch.FETCH_TIME_KEY, Long.toString(datum.getFetchTime()));
              if (url.equals(key))
                datum.setSignature(signature);
              try {
                scfilters.passScoreAfterParsing(url, content, parse);
              } catch (Exception e) {
                if (LOG.isWarnEnabled()) {
                  LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
                }
              }
              context.write(url, new NutchWritable(new ParseImpl(new ParseText(
                  parse.getText()), parse.getData(), parse.isCanonical())));
            }
          }
        } catch (IOException e) {
          if (LOG.isErrorEnabled()) {
            LOG.error("ArcSegmentCreator caught:"
                + StringUtils.stringifyException(e));
          }
        }  

        if (parseResult != null && !parseResult.isEmpty()) {
          Parse p = parseResult.get(content.getUrl());
          if (p != null) {
            return p.getData().getStatus();
          }
        }
      }

      return null;
    }