in connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java [2720:3116]
public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds,
int connectionSequenceNumber)
throws ManifoldCFException
{
String seqPrefix = "s"+connectionSequenceNumber+"_";
// Get the map
String value = variableContext.getParameter(seqPrefix+"rssmapcount");
if (value != null)
{
int mapsize = Integer.parseInt(value);
// Clear it first
int j = 0;
while (j < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(j);
if (sn.getType().equals(WebcrawlerConfig.NODE_MAP))
ds.removeChild(j);
else
j++;
}
// Grab the map values
j = 0;
while (j < mapsize)
{
String prefix = seqPrefix+"rssregexp_"+Integer.toString(j)+"_";
String match = variableContext.getParameter(prefix+"match");
String map = variableContext.getParameter(prefix+"map");
if (map == null)
map = "";
// Add to the specification
SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_MAP);
node.setAttribute(WebcrawlerConfig.ATTR_MATCH,match);
node.setAttribute(WebcrawlerConfig.ATTR_MAP,map);
ds.addChild(ds.getChildCount(),node);
j++;
}
}
// Now, do whatever action we were told to do.
String rssop = variableContext.getParameter(seqPrefix+"rssop");
if (rssop != null && rssop.equals("Add"))
{
// Add a match to the end
String match = variableContext.getParameter(seqPrefix+"rssmatch");
String map = variableContext.getParameter(seqPrefix+"rssmap");
SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_MAP);
node.setAttribute(WebcrawlerConfig.ATTR_MATCH,match);
node.setAttribute(WebcrawlerConfig.ATTR_MAP,map);
ds.addChild(ds.getChildCount(),node);
}
else if (rssop != null && rssop.equals("Delete"))
{
int index = Integer.parseInt(variableContext.getParameter(seqPrefix+"rssindex"));
int j = 0;
while (j < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(j);
if (sn.getType().equals(WebcrawlerConfig.NODE_MAP))
{
if (index == 0)
{
ds.removeChild(j);
break;
}
index--;
}
j++;
}
}
// Get excluded headers
String excludedHeadersPresent = variableContext.getParameter(seqPrefix+"excludedheaders_present");
if (excludedHeadersPresent != null)
{
// Delete existing excludedheader record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDEHEADER))
ds.removeChild(i);
else
i++;
}
String[] excludedHeaders = variableContext.getParameterValues(seqPrefix+"excludedheaders");
if (excludedHeaders != null)
{
for (String excludedHeader : excludedHeaders)
{
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDEHEADER);
cn.setAttribute(WebcrawlerConfig.ATTR_VALUE, excludedHeader);
ds.addChild(ds.getChildCount(),cn);
}
}
}
// Get the seeds
String seeds = variableContext.getParameter(seqPrefix+"seeds");
if (seeds != null)
{
// Delete existing seeds record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_SEEDS))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_SEEDS);
cn.setValue(seeds);
ds.addChild(ds.getChildCount(),cn);
}
// Get the inclusions
String inclusions = variableContext.getParameter(seqPrefix+"inclusions");
if (inclusions != null)
{
// Delete existing inclusions record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDES))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDES);
cn.setValue(inclusions);
ds.addChild(ds.getChildCount(),cn);
}
// Get the index inclusions
String inclusionsIndex = variableContext.getParameter(seqPrefix+"inclusionsindex");
if (inclusionsIndex != null)
{
// Delete existing index inclusions record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDESINDEX))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDESINDEX);
cn.setValue(inclusionsIndex);
ds.addChild(ds.getChildCount(),cn);
}
// Handle the seeds-only switch
String matchingHostsPresent = variableContext.getParameter(seqPrefix+"matchinghosts_present");
if (matchingHostsPresent != null)
{
// Delete existing switch record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_LIMITTOSEEDS))
ds.removeChild(i);
else
i++;
}
String matchingHosts = variableContext.getParameter(seqPrefix+"matchinghosts");
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_LIMITTOSEEDS);
cn.setAttribute(WebcrawlerConfig.ATTR_VALUE,(matchingHosts==null||matchingHosts.equals("false"))?"false":"true");
ds.addChild(ds.getChildCount(),cn);
}
// Handle the force-inclusion switch
final String forceInclusionPresent = variableContext.getParameter(seqPrefix + "forceinclusion_present");
if (forceInclusionPresent != null) {
// Delete existing switch record first
int i = 0;
while (i < ds.getChildCount()) {
final SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_FORCEINCLUSION))
ds.removeChild(i);
else
i++;
}
final String forceInclusion = variableContext.getParameter(seqPrefix + "forceInclusion");
final SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_FORCEINCLUSION);
cn.setAttribute(WebcrawlerConfig.ATTR_VALUE, (forceInclusion == null || forceInclusion.equals("false")) ? "false" : "true");
ds.addChild(ds.getChildCount(), cn);
}
// Get the exclusions
String exclusions = variableContext.getParameter(seqPrefix+"exclusions");
if (exclusions != null)
{
// Delete existing exclusions record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDES))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDES);
cn.setValue(exclusions);
ds.addChild(ds.getChildCount(),cn);
}
// Get the index exclusions
String exclusionsIndex = variableContext.getParameter(seqPrefix+"exclusionsindex");
if (exclusionsIndex != null)
{
// Delete existing exclusions record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDESINDEX))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDESINDEX);
cn.setValue(exclusionsIndex);
ds.addChild(ds.getChildCount(),cn);
}
// Get the content index exclusions
String exclusionsContentIndex = variableContext.getParameter(seqPrefix+"exclusionscontentindex");
if (exclusionsContentIndex != null)
{
// Delete existing content exclusions record first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_EXCLUDESCONTENTINDEX))
ds.removeChild(i);
else
i++;
}
SpecificationNode cn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDESCONTENTINDEX);
cn.setValue(exclusionsContentIndex);
ds.addChild(ds.getChildCount(),cn);
}
// Read the url specs
String urlRegexpCount = variableContext.getParameter(seqPrefix+"urlregexpcount");
if (urlRegexpCount != null && urlRegexpCount.length() > 0)
{
int regexpCount = Integer.parseInt(urlRegexpCount);
int j = 0;
while (j < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(j);
if (sn.getType().equals(WebcrawlerConfig.NODE_URLSPEC))
ds.removeChild(j);
else
j++;
}
// Grab the operation and the index (if any)
String operation = variableContext.getParameter(seqPrefix+"urlregexpop");
if (operation == null)
operation = "Continue";
int opIndex = -1;
if (operation.equals("Delete"))
opIndex = Integer.parseInt(variableContext.getParameter(seqPrefix+"urlregexpnumber"));
// Reconstruct urlspec nodes
j = 0;
while (j < regexpCount)
{
// For each index, first look for a delete operation
if (!operation.equals("Delete") || j != opIndex)
{
// Add the jth node
String regexp = variableContext.getParameter(seqPrefix+"urlregexp_"+Integer.toString(j));
String regexpDescription = variableContext.getParameter(seqPrefix+"urlregexpdesc_"+Integer.toString(j));
String reorder = variableContext.getParameter(seqPrefix+"urlregexpreorder_"+Integer.toString(j));
String javaSession = variableContext.getParameter(seqPrefix+"urlregexpjava_"+Integer.toString(j));
String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp_"+Integer.toString(j));
String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp_"+Integer.toString(j));
String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv_"+Integer.toString(j));
String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing_"+Integer.toString(j));
SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
if (regexpDescription != null && regexpDescription.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_DESCRIPTION,regexpDescription);
if (reorder != null && reorder.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_REORDER,reorder);
if (javaSession != null && javaSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
if (aspSession != null && aspSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
if (phpSession != null && phpSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
if (bvSession != null && bvSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
if (lowercasing != null && lowercasing.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
ds.addChild(ds.getChildCount(),newSn);
}
j++;
}
if (operation.equals("Add"))
{
String regexp = variableContext.getParameter(seqPrefix+"urlregexp");
String regexpDescription = variableContext.getParameter(seqPrefix+"urlregexpdesc");
String reorder = variableContext.getParameter(seqPrefix+"urlregexpreorder");
String javaSession = variableContext.getParameter(seqPrefix+"urlregexpjava");
String aspSession = variableContext.getParameter(seqPrefix+"urlregexpasp");
String phpSession = variableContext.getParameter(seqPrefix+"urlregexpphp");
String bvSession = variableContext.getParameter(seqPrefix+"urlregexpbv");
String lowercasing = variableContext.getParameter(seqPrefix+"urlregexplowercasing");
// Add a new node at the end
SpecificationNode newSn = new SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
if (regexpDescription != null && regexpDescription.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_DESCRIPTION,regexpDescription);
if (reorder != null && reorder.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_REORDER,reorder);
if (javaSession != null && javaSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL,javaSession);
if (aspSession != null && aspSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL,aspSession);
if (phpSession != null && phpSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
if (bvSession != null && bvSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
if (lowercasing != null && lowercasing.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
ds.addChild(ds.getChildCount(),newSn);
}
}
String xc = variableContext.getParameter(seqPrefix+"tokencount");
if (xc != null)
{
// Delete all tokens first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals(WebcrawlerConfig.NODE_ACCESS))
ds.removeChild(i);
else
i++;
}
int accessCount = Integer.parseInt(xc);
i = 0;
while (i < accessCount)
{
String accessDescription = "_"+Integer.toString(i);
String accessOpName = seqPrefix+"accessop"+accessDescription;
xc = variableContext.getParameter(accessOpName);
if (xc != null && xc.equals("Delete"))
{
// Next row
i++;
continue;
}
// Get the stuff we need
String accessSpec = variableContext.getParameter(seqPrefix+"spectoken"+accessDescription);
SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_ACCESS);
node.setAttribute(WebcrawlerConfig.ATTR_TOKEN,accessSpec);
ds.addChild(ds.getChildCount(),node);
i++;
}
String op = variableContext.getParameter(seqPrefix+"accessop");
if (op != null && op.equals("Add"))
{
String accessspec = variableContext.getParameter(seqPrefix+"spectoken");
SpecificationNode node = new SpecificationNode(WebcrawlerConfig.NODE_ACCESS);
node.setAttribute(WebcrawlerConfig.ATTR_TOKEN,accessspec);
ds.addChild(ds.getChildCount(),node);
}
}
return null;
}