in connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java [933:1323]
public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
throws ManifoldCFException, ServiceInterruption
{
Logging.connectors.debug("FileNet: Inside processDocuments");
SpecInfo dSpec = new SpecInfo(spec);
String[] acls = dSpec.getAcls();
for (String documentIdentifier : documentIdentifiers)
{
// For each document, be sure to confirm job still active
activities.checkJobStillActive();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("Filenet: Getting version for identifier '"+documentIdentifier+"'");
int cIndex = documentIdentifier.indexOf(",");
if (cIndex != -1)
{
String vId = documentIdentifier.substring(0,cIndex);
int elementNumber;
try
{
elementNumber = Integer.parseInt(documentIdentifier.substring(cIndex+1));
}
catch (NumberFormatException e)
{
throw new ManifoldCFException("Bad number in identifier: "+documentIdentifier,e);
}
// Calculate the version id and the element number
String versionString;
String[] aclValues = null;
String[] denyAclValues = null;
String docClass = null;
String[] metadataFieldNames = null;
String[] metadataFieldValues = null;
FileInfo fileInfo;
try
{
fileInfo = doGetDocumentInformation(vId, dSpec.getMetadataFields());
if (fileInfo == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because not a current document");
activities.deleteDocument(documentIdentifier);
continue;
}
}
catch (FilenetException e)
{
// Base our treatment on the kind of error it is.
long currentTime = System.currentTimeMillis();
if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Skipping file '"+documentIdentifier+"' because: "+e.getMessage(),e);
activities.deleteDocument(documentIdentifier);
continue;
}
else
throw new ManifoldCFException(e.getMessage(),e);
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("Filenet: Document '"+documentIdentifier+"' is a current document");
// Form a version string based on the info in fileInfo
// Version string will consist of:
// (a) metadata info
// (b) acl info
// (c) the url prefix to use
StringBuilder versionBuffer = new StringBuilder();
docClass = fileInfo.getDocClass();
DocClassSpec docclassspec = dSpec.getDocClassSpec(docClass);
// First, verify that this document matches the match criteria
boolean docMatches = true;
for (int q = 0; q < docclassspec.getMatchCount(); q++)
{
String matchType = docclassspec.getMatchType(q);
String matchField = docclassspec.getMatchField(q);
String matchValue = docclassspec.getMatchValue(q);
// Grab the appropriate field value from the fileinfo. We know it is there because we explicitly
// folded the match fields into the server request.
String matchDocValue = fileInfo.getMetadataValue(matchField);
docMatches = performMatch(matchType,matchDocValue,matchValue);
if (docMatches == false)
break;
}
if (!docMatches)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because doesn't match field criteria");
activities.deleteDocument(documentIdentifier);
continue;
}
// Metadata info
int metadataCount = 0;
Iterator iter = fileInfo.getMetadataIterator();
while (iter.hasNext())
{
String field = (String)iter.next();
if (docclassspec.checkMetadataIncluded(field))
metadataCount++;
}
metadataFieldNames = new String[metadataCount];
int j = 0;
iter = fileInfo.getMetadataIterator();
while (iter.hasNext())
{
String field = (String)iter.next();
if (docclassspec.checkMetadataIncluded(field))
metadataFieldNames[j++] = field;
}
java.util.Arrays.sort(metadataFieldNames);
// Pack field names and values
// For sanity, pack the names first and then the values!
packList(versionBuffer,metadataFieldNames,'+');
metadataFieldValues = new String[metadataFieldNames.length];
for (int q = 0; q < metadataFieldValues.length; q++)
{
metadataFieldValues[q] = fileInfo.getMetadataValue(metadataFieldNames[q]);
if (metadataFieldValues[q] == null)
metadataFieldValues[q] = "";
}
packList(versionBuffer,metadataFieldValues,'+');
// Acl info
// Future work will add "forced acls", so use a single character as a signal as to whether security is on or off.
if (acls != null && acls.length == 0)
{
// Security is on, so use the acls that came back from filenet
aclValues = new String[fileInfo.getAclCount()];
j = 0;
iter = fileInfo.getAclIterator();
while (iter.hasNext())
{
aclValues[j++] = (String)iter.next();
}
denyAclValues = new String[fileInfo.getDenyAclCount()];
j = 0;
iter = fileInfo.getDenyAclIterator();
while (iter.hasNext())
{
denyAclValues[j++] = (String)iter.next();
}
}
else if (acls != null && acls.length > 0)
{
// Forced acls
aclValues = acls;
denyAclValues = new String[]{defaultAuthorityDenyToken};
}
if (aclValues != null)
{
versionBuffer.append('+');
java.util.Arrays.sort(aclValues);
packList(versionBuffer,aclValues,'+');
if (denyAclValues == null)
denyAclValues = new String[0];
java.util.Arrays.sort(denyAclValues);
packList(versionBuffer,denyAclValues,'+');
}
else
versionBuffer.append('-');
// Document class
pack(versionBuffer,docClass,'+');
// Document URI
pack(versionBuffer,docURIPrefix,'+');
versionString = versionBuffer.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Document identifier '"+documentIdentifier+"' is a document attachment");
String errorCode = null;
String errorDesc = null;
long startTime = System.currentTimeMillis();
Long fileLengthLong = null;
try
{
String uri = convertToURI(docURIPrefix,vId,elementNumber,docClass);
if (!activities.checkURLIndexable(uri))
{
errorCode = activities.EXCLUDED_URL;
errorDesc = "Excluded because of url ('"+uri+"')";
activities.noDocument(documentIdentifier,versionString);
continue;
}
File objFileTemp = null;
try
{
objFileTemp = File.createTempFile("_mc_fln_", null);
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e,documentIdentifier,"creating temporary file");
}
try
{
try
{
doGetDocumentContents(vId,elementNumber,objFileTemp.getCanonicalPath());
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e,documentIdentifier,"reading document");
}
catch (FilenetException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
// Base our treatment on the kind of error it is.
long currentTime = System.currentTimeMillis();
if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
{
throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
}
else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Removing file '"+documentIdentifier+"' because: "+e.getMessage(),e);
activities.noDocument(documentIdentifier,versionString);
continue;
}
else
{
throw new ManifoldCFException(e.getMessage(),e);
}
}
// Document fetch completed
long fileLength = objFileTemp.length();
if (!activities.checkLengthIndexable(fileLength))
{
errorCode = activities.EXCLUDED_LENGTH;
errorDesc = "Excluded document because of length ("+fileLength+")";
activities.noDocument(documentIdentifier,versionString);
continue;
}
RepositoryDocument rd = new RepositoryDocument();
// Apply metadata
for (int k = 0; k < metadataFieldNames.length; k++)
{
String metadataName = metadataFieldNames[k];
String metadataValue = metadataFieldValues[k];
rd.addField(metadataName,metadataValue);
}
// Apply acls
if (aclValues != null)
{
rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclValues);
}
if (denyAclValues != null)
{
rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclValues);
}
InputStream is = null;
try
{
is = new FileInputStream(objFileTemp);
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e,documentIdentifier,"Opening temporary file");
}
try
{
rd.setBinary(is, fileLength);
try
{
// Ingest
activities.ingestDocumentWithException(documentIdentifier,versionString,uri,rd);
errorCode = "OK";
fileLengthLong = new Long(fileLength);
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e,documentIdentifier,"ingesting document");
}
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
handleIOException(e,documentIdentifier,"closing input stream");
}
}
}
finally
{
// Delete temp file
objFileTemp.delete();
}
}
catch (ManifoldCFException e)
{
if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
errorCode = null;
throw e;
}
finally
{
if (errorCode != null)
activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,
fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
}
}
else
{
Integer count;
try
{
count = doGetDocumentContentCount(documentIdentifier);
if (count == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because it seems to no longer exist");
activities.deleteDocument(documentIdentifier);
continue;
}
}
catch (FilenetException e)
{
// Base our treatment on the kind of error it is.
long currentTime = System.currentTimeMillis();
if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: Skipping file '"+documentIdentifier+"' because: "+e.getMessage(),e);
activities.deleteDocument(documentIdentifier);
continue;
}
else
throw new ManifoldCFException(e.getMessage(),e);
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("FileNet: There are "+count.toString()+" content values for '"+documentIdentifier+"'");
// Loop through all document content identifiers and add a child identifier for each
for (int q = 0; q < count.intValue(); q++)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("Filenet: Adding document identifier '"+documentIdentifier+","+Integer.toString(q)+"'");
activities.addDocumentReference(documentIdentifier + "," + Integer.toString(q));
}
// No more processing is necessary for document identifiers.
activities.noDocument(documentIdentifier,"");
continue;
}
}
}