public void processDocuments()

in connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java [933:1323]


  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
    throws ManifoldCFException, ServiceInterruption
  {
    Logging.connectors.debug("FileNet: Inside processDocuments");

    SpecInfo dSpec = new SpecInfo(spec);
    
    String[] acls = dSpec.getAcls();


    for (String documentIdentifier : documentIdentifiers)
    {
      // For each document, be sure to confirm job still active
      activities.checkJobStillActive();

      if (Logging.connectors.isDebugEnabled())
        Logging.connectors.debug("Filenet: Getting version for identifier '"+documentIdentifier+"'");

      
      int cIndex = documentIdentifier.indexOf(",");
      if (cIndex != -1)
      {
        String vId = documentIdentifier.substring(0,cIndex);
        int elementNumber;
        try
        {
          elementNumber = Integer.parseInt(documentIdentifier.substring(cIndex+1));
        }
        catch (NumberFormatException e)
        {
          throw new ManifoldCFException("Bad number in identifier: "+documentIdentifier,e);
        }

        // Calculate the version id and the element number
        String versionString;
        String[] aclValues = null;
        String[] denyAclValues = null;
        String docClass = null;
        String[] metadataFieldNames = null;
        String[] metadataFieldValues = null;

        FileInfo fileInfo;
        try
        {
          fileInfo = doGetDocumentInformation(vId, dSpec.getMetadataFields());
          if (fileInfo == null)
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because not a current document");
            activities.deleteDocument(documentIdentifier);
            continue;
          }
        }
        catch (FilenetException e)
        {
          // Base our treatment on the kind of error it is.
          long currentTime = System.currentTimeMillis();
          if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
            throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
          else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("FileNet: Skipping file '"+documentIdentifier+"' because: "+e.getMessage(),e);
            activities.deleteDocument(documentIdentifier);
            continue;
          }
          else
            throw new ManifoldCFException(e.getMessage(),e);
        }

        if (Logging.connectors.isDebugEnabled())
          Logging.connectors.debug("Filenet: Document '"+documentIdentifier+"' is a current document");

        // Form a version string based on the info in fileInfo
        // Version string will consist of:
        // (a) metadata info
        // (b) acl info
        // (c) the url prefix to use
        StringBuilder versionBuffer = new StringBuilder();

        docClass = fileInfo.getDocClass();
        DocClassSpec docclassspec = dSpec.getDocClassSpec(docClass);

        // First, verify that this document matches the match criteria
        boolean docMatches = true;
        for (int q = 0; q < docclassspec.getMatchCount(); q++)
        {
          String matchType = docclassspec.getMatchType(q);
          String matchField = docclassspec.getMatchField(q);
          String matchValue = docclassspec.getMatchValue(q);
          // Grab the appropriate field value from the fileinfo.  We know it is there because we explicitly
          // folded the match fields into the server request.
          String matchDocValue = fileInfo.getMetadataValue(matchField);
          docMatches = performMatch(matchType,matchDocValue,matchValue);
          if (docMatches == false)
            break;
        }

        if (!docMatches)
        {
          if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug("FileNet: Skipping document '"+documentIdentifier+"' because doesn't match field criteria");
          activities.deleteDocument(documentIdentifier);
          continue;
        }
            
        // Metadata info
        int metadataCount = 0;
        Iterator iter = fileInfo.getMetadataIterator();
        while (iter.hasNext())
        {
          String field = (String)iter.next();
          if (docclassspec.checkMetadataIncluded(field))
            metadataCount++;
        }
        metadataFieldNames = new String[metadataCount];
        int j = 0;
        iter = fileInfo.getMetadataIterator();
        while (iter.hasNext())
        {
          String field = (String)iter.next();
          if (docclassspec.checkMetadataIncluded(field))
            metadataFieldNames[j++] = field;
        }
        java.util.Arrays.sort(metadataFieldNames);
        // Pack field names and values
        // For sanity, pack the names first and then the values!
        packList(versionBuffer,metadataFieldNames,'+');
        metadataFieldValues = new String[metadataFieldNames.length];
        for (int q = 0; q < metadataFieldValues.length; q++)
        {
          metadataFieldValues[q] = fileInfo.getMetadataValue(metadataFieldNames[q]);
          if (metadataFieldValues[q] == null)
            metadataFieldValues[q] = "";
        }
        packList(versionBuffer,metadataFieldValues,'+');

        // Acl info
        // Future work will add "forced acls", so use a single character as a signal as to whether security is on or off.
        if (acls != null && acls.length == 0)
        {
          // Security is on, so use the acls that came back from filenet
          aclValues = new String[fileInfo.getAclCount()];
          j = 0;
          iter = fileInfo.getAclIterator();
          while (iter.hasNext())
          {
            aclValues[j++] = (String)iter.next();
          }
          denyAclValues = new String[fileInfo.getDenyAclCount()];
          j = 0;
          iter = fileInfo.getDenyAclIterator();
          while (iter.hasNext())
          {
            denyAclValues[j++] = (String)iter.next();
          }
        }
        else if (acls != null && acls.length > 0)
        {
          // Forced acls
          aclValues = acls;
          denyAclValues = new String[]{defaultAuthorityDenyToken};
        }

        if (aclValues != null)
        {
          versionBuffer.append('+');
          java.util.Arrays.sort(aclValues);
          packList(versionBuffer,aclValues,'+');
          if (denyAclValues == null)
            denyAclValues = new String[0];
          java.util.Arrays.sort(denyAclValues);
          packList(versionBuffer,denyAclValues,'+');
        }
        else
          versionBuffer.append('-');
        
        // Document class
        pack(versionBuffer,docClass,'+');
        // Document URI
        pack(versionBuffer,docURIPrefix,'+');

        versionString = versionBuffer.toString();
        
        if (Logging.connectors.isDebugEnabled())
          Logging.connectors.debug("FileNet: Document identifier '"+documentIdentifier+"' is a document attachment");

        String errorCode = null;
        String errorDesc = null;
        long startTime = System.currentTimeMillis();
        Long fileLengthLong = null;
        
        try
        {
          String uri = convertToURI(docURIPrefix,vId,elementNumber,docClass);
          if (!activities.checkURLIndexable(uri))
          {
            errorCode = activities.EXCLUDED_URL;
            errorDesc = "Excluded because of url ('"+uri+"')";
            activities.noDocument(documentIdentifier,versionString);
            continue;
          }
        
          File objFileTemp = null;
          try
          {
            objFileTemp = File.createTempFile("_mc_fln_", null);
          }
          catch (IOException e)
          {
            errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
            errorDesc = e.getMessage();
            handleIOException(e,documentIdentifier,"creating temporary file");
          }
          try
          {
            try
            {
              doGetDocumentContents(vId,elementNumber,objFileTemp.getCanonicalPath());
            }
            catch (IOException e)
            {
              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
              errorDesc = e.getMessage();
              handleIOException(e,documentIdentifier,"reading document");
            }
            catch (FilenetException e)
            {
              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
              errorDesc = e.getMessage();
              // Base our treatment on the kind of error it is.
              long currentTime = System.currentTimeMillis();
              if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
              {
                throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
              }
              else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("FileNet: Removing file '"+documentIdentifier+"' because: "+e.getMessage(),e);
                activities.noDocument(documentIdentifier,versionString);
                continue;
              }
              else
              {
                throw new ManifoldCFException(e.getMessage(),e);
              }
            }

            // Document fetch completed
            long fileLength = objFileTemp.length();
            if (!activities.checkLengthIndexable(fileLength))
            {
              errorCode = activities.EXCLUDED_LENGTH;
              errorDesc = "Excluded document because of length ("+fileLength+")";
              activities.noDocument(documentIdentifier,versionString);
              continue;
            }

            RepositoryDocument rd = new RepositoryDocument();
            // Apply metadata
            for (int k = 0; k < metadataFieldNames.length; k++)
            {
              String metadataName = metadataFieldNames[k];
              String metadataValue = metadataFieldValues[k];
              rd.addField(metadataName,metadataValue);
            }

            // Apply acls
            if (aclValues != null)
            {
              rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,aclValues);
            }
            if (denyAclValues != null)
            {
              rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclValues);
            }

            InputStream is = null;
            try
            {
              is = new FileInputStream(objFileTemp);
            }
            catch (IOException e)
            {
              errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
              errorDesc = e.getMessage();
              handleIOException(e,documentIdentifier,"Opening temporary file");
            }
            try
            {
              rd.setBinary(is, fileLength);

              try
              {
                // Ingest
                activities.ingestDocumentWithException(documentIdentifier,versionString,uri,rd);
                errorCode = "OK";
                fileLengthLong = new Long(fileLength);
              }
              catch (IOException e)
              {
                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                errorDesc = e.getMessage();
                handleIOException(e,documentIdentifier,"ingesting document");
              }
            }
            finally
            {
              try
              {
                is.close();
              }
              catch (IOException e)
              {
                errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
                errorDesc = e.getMessage();
                handleIOException(e,documentIdentifier,"closing input stream");
              }
            }
          }
          finally
          {
            // Delete temp file
            objFileTemp.delete();
          }
        }
        catch (ManifoldCFException e)
        {
          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
            errorCode = null;
          throw e;
        }
        finally
        {
          if (errorCode != null)
            activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,
              fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
        }
      }
      else
      {
        Integer count;
        try
        {
          count = doGetDocumentContentCount(documentIdentifier);
          if (count == null)
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because it seems to no longer exist");
            activities.deleteDocument(documentIdentifier);
            continue;
          }
        }
        catch (FilenetException e)
        {
          // Base our treatment on the kind of error it is.
          long currentTime = System.currentTimeMillis();
          if (e.getType() == FilenetException.TYPE_SERVICEINTERRUPTION)
            throw new ServiceInterruption(e.getMessage(),e,currentTime+300000L,currentTime+12*60*60000L,-1,true);
          else if (e.getType() == FilenetException.TYPE_NOTALLOWED)
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("FileNet: Skipping file '"+documentIdentifier+"' because: "+e.getMessage(),e);
            activities.deleteDocument(documentIdentifier);
            continue;
          }
          else
            throw new ManifoldCFException(e.getMessage(),e);
        }

        if (Logging.connectors.isDebugEnabled())
          Logging.connectors.debug("FileNet: There are "+count.toString()+" content values for '"+documentIdentifier+"'");

        // Loop through all document content identifiers and add a child identifier for each
        for (int q = 0; q < count.intValue(); q++)
        {
          if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug("Filenet: Adding document identifier '"+documentIdentifier+","+Integer.toString(q)+"'");

          activities.addDocumentReference(documentIdentifier + "," + Integer.toString(q));
        }
        
        // No more processing is necessary for document identifiers.
        activities.noDocument(documentIdentifier,"");
        continue;

      }
    }
  }