public Filter()

in connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java [5614:5849]


    public Filter(Specification spec, boolean warnOnBadSeed)
      throws ManifoldCFException
    {
      String excludes = "";

      // To save allocation, preallocate the seeds map assuming that it will require 1.5x the number of nodes in the spec
      int initialSize = spec.getChildCount();
      if (initialSize == 0)
        initialSize = 1;
      seeds = new HashSet<String>((initialSize * 3) >> 1);

      int i = 0;

      // First pass.  Find all of the rules (which are necessary to canonicalize the seeds, etc.)
      while (i < spec.getChildCount())
      {
        SpecificationNode n = spec.getChild(i++);
        if (n.getType().equals(RSSConfig.NODE_MAP))
        {
          String match = n.getAttributeValue(RSSConfig.ATTR_MATCH);
          String map = n.getAttributeValue(RSSConfig.ATTR_MAP);
          if (match != null && match.length() > 0)
          {
            Pattern p;
            try
            {
              p = Pattern.compile(match);
            }
            catch (java.util.regex.PatternSyntaxException e)
            {
              throw new ManifoldCFException("Regular expression '"+match+"' is illegal: "+e.getMessage(),e);
            }
            if (map == null)
              map = "";
            mappings.add(new MappingRule(p,map));
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_EXCLUDES))
        {
          excludes = n.getValue();
          if (excludes == null)
            excludes = "";
        }
        else if (n.getType().equals(RSSConfig.NODE_URLSPEC))
        {
          String urlRegexp = n.getAttributeValue(RSSConfig.ATTR_REGEXP);
          if (urlRegexp == null)
            urlRegexp = "";
          String reorder = n.getAttributeValue(RSSConfig.ATTR_REORDER);
          boolean reorderValue;
          if (reorder == null)
            reorderValue = false;
          else
          {
            if (reorder.equals(RSSConfig.VALUE_YES))
              reorderValue = true;
            else
              reorderValue = false;
          }

          String javaSession = n.getAttributeValue(RSSConfig.ATTR_JAVASESSIONREMOVAL);
          boolean javaSessionValue;
          if (javaSession == null)
            javaSessionValue = false;
          else
          {
            if (javaSession.equals(RSSConfig.VALUE_YES))
              javaSessionValue = true;
            else
              javaSessionValue = false;
          }

          String aspSession = n.getAttributeValue(RSSConfig.ATTR_ASPSESSIONREMOVAL);
          boolean aspSessionValue;
          if (aspSession == null)
            aspSessionValue = false;
          else
          {
            if (aspSession.equals(RSSConfig.VALUE_YES))
              aspSessionValue = true;
            else
              aspSessionValue = false;
          }

          String phpSession = n.getAttributeValue(RSSConfig.ATTR_PHPSESSIONREMOVAL);
          boolean phpSessionValue;
          if (phpSession == null)
            phpSessionValue = false;
          else
          {
            if (phpSession.equals(RSSConfig.VALUE_YES))
              phpSessionValue = true;
            else
              phpSessionValue = false;
          }

          String bvSession = n.getAttributeValue(RSSConfig.ATTR_BVSESSIONREMOVAL);
          boolean bvSessionValue;
          if (bvSession == null)
            bvSessionValue = false;
          else
          {
            if (bvSession.equals(RSSConfig.VALUE_YES))
              bvSessionValue = true;
            else
              bvSessionValue = false;
          }
          try
          {
            canonicalizationPolicies.addRule(new CanonicalizationPolicy(Pattern.compile(urlRegexp),reorderValue,javaSessionValue,aspSessionValue,
              phpSessionValue, bvSessionValue));
          }
          catch (java.util.regex.PatternSyntaxException e)
          {
            throw new ManifoldCFException("Canonicalization regular expression '"+urlRegexp+"' is illegal: "+e.getMessage(),e);
          }
        }
      }

      compileList(excludePatterns,stringToArray(excludes));

      // Second pass.  Do the rest of the work,
      i = 0;
      while (i < spec.getChildCount())
      {
        SpecificationNode n = spec.getChild(i++);
        if (n.getType().equals(RSSConfig.NODE_FEED))
        {
          String rssURL = n.getAttributeValue(RSSConfig.ATTR_URL);
          if (rssURL != null && rssURL.length() > 0)
          {
            String canonicalURL = makeDocumentIdentifier(canonicalizationPolicies,null,rssURL);
            if (canonicalURL != null)
            {
              seeds.add(canonicalURL);
            }
            else
            {
              if (warnOnBadSeed)
                Logging.connectors.warn("RSS: Illegal seed feed '"+rssURL+"'");
            }
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_ACCESS))
        {
          String token = n.getAttributeValue(RSSConfig.ATTR_TOKEN);
          acls.add(token);
        }
        else if (n.getType().equals(RSSConfig.NODE_FEEDRESCAN))
        {
          String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
          if (interval != null && interval.length() > 0)
          {
            try
            {
              defaultRescanInterval = new Integer(interval);
            }
            catch (NumberFormatException e)
            {
              throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
            }
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_MINFEEDRESCAN))
        {
          String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
          if (interval != null && interval.length() > 0)
          {
            try
            {
              minimumRescanInterval = new Integer(interval);
            }
            catch (NumberFormatException e)
            {
              throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
            }
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_BADFEEDRESCAN))
        {
          String interval = n.getAttributeValue(RSSConfig.ATTR_VALUE);
          if (interval != null && interval.length() > 0)
          {
            try
            {
              badFeedRescanInterval = new Integer(interval);
            }
            catch (NumberFormatException e)
            {
              throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
            }
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_FEEDTIMEOUT))
        {
          String value = n.getAttributeValue(RSSConfig.ATTR_VALUE);
          if (value != null && value.length() > 0)
          {
            try
            {
              feedTimeoutValue= Integer.parseInt(value) * 1000;
            }
            catch (NumberFormatException e)
            {
              throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
            }
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_DECHROMEDMODE))
        {
          String mode = n.getAttributeValue(RSSConfig.ATTR_MODE);
          if (mode != null && mode.length() > 0)
          {
            if (mode.equals(RSSConfig.VALUE_NONE))
              dechromedContentMode = DECHROMED_NONE;
            else if (mode.equals(RSSConfig.VALUE_DESCRIPTION))
              dechromedContentMode = DECHROMED_DESCRIPTION;
            else if (mode.equals(RSSConfig.VALUE_CONTENT))
              dechromedContentMode = DECHROMED_CONTENT;
          }
        }
        else if (n.getType().equals(RSSConfig.NODE_CHROMEDMODE))
        {
          String mode = n.getAttributeValue(RSSConfig.ATTR_MODE);
          if (mode != null && mode.length() > 0)
          {
            if (mode.equals(RSSConfig.VALUE_USE))
              chromedContentMode = CHROMED_USE;
            else if (mode.equals(RSSConfig.VALUE_SKIP))
              chromedContentMode = CHROMED_SKIP;
            else if (mode.equals(RSSConfig.VALUE_METADATA))
              chromedContentMode = CHROMED_METADATA_ONLY;
          }
        }
      }
    }