private void parseConf()

in src/plugin/index-replace/src/java/org/apache/nutch/indexer/replace/ReplaceIndexer.java [128:230]


  private void parseConf(String propertyValue) {
    if (propertyValue == null || propertyValue.trim().length() == 0) {
      return;
    }

    // At the start, all replacements apply globally to every host.
    Pattern hostPattern = Pattern.compile(".*");
    Pattern urlPattern = null;

    // Split the property into lines
    Matcher lineMatcher = LINE_SPLIT.matcher(propertyValue);
    while (lineMatcher.find()) {
      String line = lineMatcher.group();
      if (line != null && line.length() > 0) {

        // Split the line into field and value
        Matcher nameValueMatcher = NAME_VALUE_SPLIT.matcher(line.trim());
        if (nameValueMatcher.find()) {
          String fieldName = nameValueMatcher.group(1).trim();
          String value = nameValueMatcher.group(2);
          if (fieldName != null && value != null) {
            // Check if the field name is one of our special cases.
            if (HOSTMATCH.equals(fieldName)) {
              urlPattern = null;
              try {
                hostPattern = Pattern.compile(value);
              } catch (PatternSyntaxException pse) {
                LOG.error("hostmatch pattern " + value + " does not compile: "
                    + pse.getMessage());
                // Deactivate this invalid match set by making it match no host.
                hostPattern = Pattern.compile("willnotmatchanyhost");
              }
            } else if (URLMATCH.equals(fieldName)) {
              try {
                urlPattern = Pattern.compile(value);
              } catch (PatternSyntaxException pse) {
                LOG.error("urlmatch pattern " + value + " does not compile: "
                    + pse.getMessage());
                // Deactivate this invalid match set by making it match no url.
                urlPattern = Pattern.compile("willnotmatchanyurl");
              }
            } else if (value.length() > 3) {
              String toFieldName = fieldName;
              // If the fieldname has a colon, this indicates a different target
              // field.
              if (fieldName.indexOf(':') > 0) {
                toFieldName = fieldName.substring(fieldName.indexOf(':') + 1);
                fieldName = fieldName.substring(0, fieldName.indexOf(':'));
              }
              String sep = value.substring(0, 1);

              // Divide the value into pattern / replacement / flags.
              value = value.substring(1);
              if (!value.contains(sep)) {
                LOG.error("Pattern '" + line
                    + "', not parseable.  Missing separator " + sep);
                continue;
              }
              String pattern = value.substring(0, value.indexOf(sep));
              value = value.substring(pattern.length() + 1);
              String replacement = value;
              if (value.contains(sep)) {
                replacement = value.substring(0, value.indexOf(sep));
              }
              int flags = 0;
              if (value.length() > replacement.length() + 1) {
                value = value.substring(replacement.length() + 1).trim();
                try {
                  flags = Integer.parseInt(value);
                } catch (NumberFormatException e) {
                  LOG.error("Pattern " + line + ", has invalid flags component");
                  continue;
                }
              }
              Integer iFlags = (flags > 0) ? Integer.valueOf(flags) : null;

              // Make a FieldReplacer out of these params.
              FieldReplacer fr = new FieldReplacer(fieldName, toFieldName,
                  pattern, replacement, iFlags);

              // Add this field replacer to the list for this host or URL.
              if (urlPattern != null) {
                List<FieldReplacer> lfp = FIELDREPLACERS_BY_URL.get(urlPattern);
                if (lfp == null) {
                  lfp = new ArrayList<FieldReplacer>();
                }
                lfp.add(fr);
                FIELDREPLACERS_BY_URL.put(urlPattern, lfp);
              } else {
                List<FieldReplacer> lfp = FIELDREPLACERS_BY_HOST
                    .get(hostPattern);
                if (lfp == null) {
                  lfp = new ArrayList<FieldReplacer>();
                }
                lfp.add(fr);
                FIELDREPLACERS_BY_HOST.put(hostPattern, lfp);
              }
            }
          }
        }
      }
    }
  }