public String processSpecificationPost()

in connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java [2720:3116]


  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds,
    int connectionSequenceNumber)
    throws ManifoldCFException
  {
    String seqPrefix = "s"+connectionSequenceNumber+"_";

    // Get the map
    String value = variableContext.getParameter(seqPrefix+"rssmapcount");
    if (value != null)
    {
      int mapsize = Integer.parseInt(value);

      // Clear it first
      int j = 0;
      while (j < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(j);
        if (sn.getType().equals(WebcrawlerConfig.NODE_MAP))
          ds.removeChild(j);
        else
          j++;
      }

      // Grab the map values
      j = 0;
      while (j < mapsize)
      {
        String prefix = seqPrefix+"rssregexp_"+Integer.toString(j)+"_";
        String match = variableContext.getParameter(prefix+"match");
        String map = variableContext.getParameter(prefix+"map");
        if (map == null)
          map = "";
        // Add to the specification
        SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_MAP);
        node.setAttribute(WebcrawlerConfig.ATTR_MATCH,match);
        node.setAttribute(WebcrawlerConfig.ATTR_MAP,map);
        ds.addChild(ds.getChildCount(),node);

        j++;
      }
    }
    // Now, do whatever action we were told to do.
    String rssop = variableContext.getParameter(seqPrefix+"rssop");
    if (rssop != null && rssop.equals("Add"))
    {
      // Add a match to the end
      String match = variableContext.getParameter(seqPrefix+"rssmatch");
      String map = variableContext.getParameter(seqPrefix+"rssmap");
      SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_MAP);
      node.setAttribute(WebcrawlerConfig.ATTR_MATCH,match);
      node.setAttribute(WebcrawlerConfig.ATTR_MAP,map);
      ds.addChild(ds.getChildCount(),node);
    }
    else if (rssop != null && rssop.equals("Delete"))
    {
      int index = Integer.parseInt(variableContext.getParameter(seqPrefix+"rssindex"));
      int j = 0;
      while (j < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(j);
        if (sn.getType().equals(WebcrawlerConfig.NODE_MAP))
        {
          if (index == 0)
          {
            ds.removeChild(j);
            break;
          }
          index--;
        }
        j++;
      }
    }

    // Get excluded headers
    String excludedHeadersPresent = variableContext.getParameter(seqPrefix+"excludedheaders_present");
    if (excludedHeadersPresent != null)
    {
      // Delete existing excludedheader record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDEHEADER))
          ds.removeChild(i);
        else
          i++;
      }
      String[] excludedHeaders = variableContext.getParameterValues(seqPrefix+"excludedheaders");
      if (excludedHeaders != null)
      {
        for (String excludedHeader : excludedHeaders)
        {
          SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDEHEADER);
          cn.setAttribute(WebcrawlerConfig.ATTR_VALUE, excludedHeader);
          ds.addChild(ds.getChildCount(),cn);
        }
      }
    }
    
    // Get the seeds
    String seeds = variableContext.getParameter(seqPrefix+"seeds");
    if (seeds != null)
    {
      // Delete existing seeds record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_SEEDS))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_SEEDS);
      cn.setValue(seeds);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Get the inclusions
    String inclusions = variableContext.getParameter(seqPrefix+"inclusions");
    if (inclusions != null)
    {
      // Delete existing inclusions record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDES))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDES);
      cn.setValue(inclusions);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Get the index inclusions
    String inclusionsIndex = variableContext.getParameter(seqPrefix+"inclusionsindex");
    if (inclusionsIndex != null)
    {
      // Delete existing index inclusions record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDESINDEX))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDESINDEX);
      cn.setValue(inclusionsIndex);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Handle the seeds-only switch
    String matchingHostsPresent = variableContext.getParameter(seqPrefix+"matchinghosts_present");
    if (matchingHostsPresent != null)
    {
      // Delete existing switch record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_LIMITTOSEEDS))
          ds.removeChild(i);
        else
          i++;
      }

      String matchingHosts = variableContext.getParameter(seqPrefix+"matchinghosts");
      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_LIMITTOSEEDS);
      cn.setAttribute(WebcrawlerConfig.ATTR_VALUE,(matchingHosts==null||matchingHosts.equals("false"))?"false":"true");
      ds.addChild(ds.getChildCount(),cn);
    }

    // Handle the force-inclusion switch
    final String forceInclusionPresent = variableContext.getParameter(seqPrefix + "forceinclusion_present");
    if (forceInclusionPresent != null) {
      // Delete existing switch record first
      int i = 0;
      while (i < ds.getChildCount()) {
        final SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_FORCEINCLUSION))
          ds.removeChild(i);
        else
          i++;
      }

      final String forceInclusion = variableContext.getParameter(seqPrefix + "forceInclusion");
      final SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_FORCEINCLUSION);
      cn.setAttribute(WebcrawlerConfig.ATTR_VALUE, (forceInclusion == null || forceInclusion.equals("false")) ? "false" : "true");
      ds.addChild(ds.getChildCount(), cn);
    }

    // Get the exclusions
    String exclusions = variableContext.getParameter(seqPrefix+"exclusions");
    if (exclusions != null)
    {
      // Delete existing exclusions record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDES))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDES);
      cn.setValue(exclusions);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Get the index exclusions
    String exclusionsIndex = variableContext.getParameter(seqPrefix+"exclusionsindex");
    if (exclusionsIndex != null)
    {
      // Delete existing exclusions record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDESINDEX))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDESINDEX);
      cn.setValue(exclusionsIndex);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Get the content index exclusions
    String exclusionsContentIndex = variableContext.getParameter(seqPrefix+"exclusionscontentindex");
    if (exclusionsContentIndex != null)
    {
      // Delete existing content exclusions record first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDESCONTENTINDEX))
          ds.removeChild(i);
        else
          i++;
      }

      SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDESCONTENTINDEX);
      cn.setValue(exclusionsContentIndex);
      ds.addChild(ds.getChildCount(),cn);
    }

    // Read the url specs
    String urlRegexpCount = variableContext.getParameter(seqPrefix+"urlregexpcount");
    if (urlRegexpCount != null && urlRegexpCount.length() > 0)
    {
      int regexpCount = Integer.parseInt(urlRegexpCount);
      int j = 0;
      while (j < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(j);
        if (sn.getType().equals(WebcrawlerConfig.NODE_URLSPEC))
          ds.removeChild(j);
        else
          j++;
      }
      
      // Grab the operation and the index (if any)
      String operation = variableContext.getParameter(seqPrefix+"urlregexpop");
      if (operation == null)
        operation = "Continue";
      int opIndex = -1;
      if (operation.equals("Delete"))
        opIndex = Integer.parseInt(variableContext.getParameter(seqPrefix+"urlregexpnumber"));
      
      // Reconstruct urlspec nodes
      j = 0;
      while (j < regexpCount)
      {
        // For each index, first look for a delete operation
        if (!operation.equals("Delete") || j != opIndex)
        {
          // Add the jth node
          String regexp = variableContext.getParameter(seqPrefix+"urlregexp_"+Integer.toString(j));
          String regexpDescription = variableContext.getParameter(seqPrefix+"urlregexpdesc_"+Integer.toString(j));
          String reorder = variableContext.getParameter(seqPrefix+"urlregexpreorder_"+Integer.toString(j));
          String javaSession = variableContext.getParameter(seqPrefix+"urlregexpjava_"+Integer.toString(j));
          String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp_"+Integer.toString(j));
          String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp_"+Integer.toString(j));
          String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv_"+Integer.toString(j));
          String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing_"+Integer.toString(j));
          SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
          newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
          if (regexpDescription != null && regexpDescription.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_DESCRIPTION,regexpDescription);
          if (reorder != null && reorder.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_REORDER,reorder);
          if (javaSession != null && javaSession.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
          if (aspSession != null && aspSession.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
          if (phpSession != null && phpSession.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
          if (bvSession != null && bvSession.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
          if (lowercasing != null && lowercasing.length() > 0)
            newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
          ds.addChild(ds.getChildCount(),newSn);
        }
        j++;
      }
      if (operation.equals("Add"))
      {
        String regexp = variableContext.getParameter(seqPrefix+"urlregexp");
        String regexpDescription = variableContext.getParameter(seqPrefix+"urlregexpdesc");
        String reorder = variableContext.getParameter(seqPrefix+"urlregexpreorder");
        String javaSession = variableContext.getParameter(seqPrefix+"urlregexpjava");
        String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp");
        String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp");
        String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv");
        String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing");

        // Add a new node at the end
        SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
        newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
        if (regexpDescription != null && regexpDescription.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_DESCRIPTION,regexpDescription);
        if (reorder != null && reorder.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_REORDER,reorder);
        if (javaSession != null && javaSession.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
        if (aspSession != null && aspSession.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
        if (phpSession != null && phpSession.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
        if (bvSession != null && bvSession.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
        if (lowercasing != null && lowercasing.length() > 0)
          newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
        ds.addChild(ds.getChildCount(),newSn);
      }
    }

    String xc = variableContext.getParameter(seqPrefix+"tokencount");
    if (xc != null)
    {
      // Delete all tokens first
      int i = 0;
      while (i < ds.getChildCount())
      {
        SpecificationNode sn = ds.getChild(i);
        if (sn.getType().equals(WebcrawlerConfig.NODE_ACCESS))
          ds.removeChild(i);
        else
          i++;
      }

      int accessCount = Integer.parseInt(xc);
      i = 0;
      while (i < accessCount)
      {
        String accessDescription = "_"+Integer.toString(i);
        String accessOpName = seqPrefix+"accessop"+accessDescription;
        xc = variableContext.getParameter(accessOpName);
        if (xc != null && xc.equals("Delete"))
        {
          // Next row
          i++;
          continue;
        }
        // Get the stuff we need
        String accessSpec = variableContext.getParameter(seqPrefix+"spectoken"+accessDescription);
        SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_ACCESS);
        node.setAttribute(WebcrawlerConfig.ATTR_TOKEN,accessSpec);
        ds.addChild(ds.getChildCount(),node);
        i++;
      }

      String op = variableContext.getParameter(seqPrefix+"accessop");
      if (op != null && op.equals("Add"))
      {
        String accessspec = variableContext.getParameter(seqPrefix+"spectoken");
        SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_ACCESS);
        node.setAttribute(WebcrawlerConfig.ATTR_TOKEN,accessspec);
        ds.addChild(ds.getChildCount(),node);
      }
    }

    return null;
  }