public void processDocuments()

in connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java [749:1631]
659 lines of code
124 McCabe index (conditional complexity)

  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
    throws ManifoldCFException, ServiceInterruption
  {
    // Get the forced acls.  (We need this only for the case where documents have their own acls)
    String[] forcedAcls = getAcls(spec);
    
    SystemMetadataDescription sDesc = new SystemMetadataDescription(spec);

    // Look at the metadata attributes.
    // So that the version strings are comparable, we will put them in an array first, and sort them.
    String pathAttributeName = null;
    MatchMap matchMap = new MatchMap();
    int i = 0;
    while (i < spec.getChildCount())
    {
      SpecificationNode n = spec.getChild(i++);
      if (n.getType().equals("pathnameattribute"))
        pathAttributeName = n.getAttributeValue("value");
      else if (n.getType().equals("pathmap"))
      {
        // Path mapping info also needs to be looked at, because it affects what is
        // ingested.
        String pathMatch = n.getAttributeValue("match");
        String pathReplace = n.getAttributeValue("replace");
        matchMap.appendMatchPair(pathMatch,pathReplace);
      }

    }

    // Calculate the part of the version string that comes from path name and mapping.
    // This starts with = since ; is used by another optional component (the forced acls)
    StringBuilder pathNameAttributeVersion = new StringBuilder();
    if (pathAttributeName != null)
      pathNameAttributeVersion.append("=").append(pathAttributeName).append(":").append(matchMap);

    for (String documentIdentifier : documentIdentifiers)
    {
      // Check if we should abort
      activities.checkJobStillActive();

      getSession();

      if (Logging.connectors.isDebugEnabled())
        Logging.connectors.debug( "SharePoint: Getting version of '" + documentIdentifier + "'");
      if ( documentIdentifier.startsWith("D") || documentIdentifier.startsWith("S") )
      {
        // Old-style document identifier.  We don't recognize these anymore, so signal deletion.
        if (Logging.connectors.isDebugEnabled())
          Logging.connectors.debug("SharePoint: Removing old-style document identifier '"+documentIdentifier+"'");
        activities.deleteDocument(documentIdentifier);
        continue;
      }
      else if (documentIdentifier.startsWith("/"))
      {
        // New-style document identifier.  A double-slash marks the separation between the library and folder/file levels.
        // A triple-slash marks the separation between a list name and list row ID.
        int dListSeparatorIndex = documentIdentifier.indexOf("///");
        int dLibSeparatorIndex = documentIdentifier.indexOf("//");
        if (dListSeparatorIndex != -1)
        {
          // === List-style identifier ===
          if (dListSeparatorIndex == documentIdentifier.length() - 3)
          {
            // == List path! ==
            if (!checkIncludeList(documentIdentifier.substring(0,documentIdentifier.length()-3),spec))
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: List specification no longer includes list '"+documentIdentifier+"' - removing");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // Version string for a list
            String versionString = "";

            // Chained connectors always scan parent nodes, so they don't bother setting a version
            String siteListPath = documentIdentifier.substring(0,documentIdentifier.length()-3);
            int listCutoff = siteListPath.lastIndexOf( "/" );
            String site = siteListPath.substring(0,listCutoff);
            String listName = siteListPath.substring( listCutoff + 1 );

            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug( "SharePoint: Document identifier is a list: '" + siteListPath + "'" );

            String listID = proxy.getListID( encodePath(site), site, listName );
            if (listID == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: GUID lookup failed for list '"+siteListPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            String encodedSitePath = encodePath(site);
                
            // Get the list's fields
            Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, listID );
            if (fieldNames == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Field list lookup failed for list '"+siteListPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // Note well: There's been a lot of back-and forth about this code.
            // See CONNECTORS-1324.
            // The fieldNames map returned by proxy.getFieldList() has the internal name as a key, and the display name as a value.
            // Since we want the complete list of fields here, by *internal* name, we iterate over the keySet(), not the values.
            String[] fields = new String[fieldNames.size()];
            int j = 0;
            for (String field : fieldNames.keySet())
            {
              fields[j++] = field;
            }
                  
            String[] accessTokens;
            String[] denyTokens;
                  
            if (forcedAcls == null)
            {
              // Security is off
              accessTokens = new String[0];
              denyTokens = new String[0];
            }
            else if (forcedAcls.length != 0)
            {
              // Forced security
              accessTokens = forcedAcls;
              denyTokens = new String[0];
            }
            else
            {
              // Security enabled, native security
              accessTokens = proxy.getACLs( encodedSitePath, listID, activeDirectoryAuthority );
              denyTokens = new String[]{defaultAuthorityDenyToken};
            }

            if (accessTokens == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Access token lookup failed for list '"+siteListPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            ListItemStream fs = new ListItemStream( activities, encodedServerLocation, site, siteListPath, spec,
              documentIdentifier, accessTokens, denyTokens, listID, fields );
            boolean success = proxy.getChildren( fs, encodedSitePath , listID, dspStsWorks );
            if (!success)
            {
              // Site/list no longer exists, so delete entry
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: No list found for list '"+siteListPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            activities.noDocument(documentIdentifier,versionString);
          }
          else
          {
            // == List item or attachment path! ==
            // Convert the modified document path to an unmodified one, plus a library path.
            String decodedListPath = documentIdentifier.substring(0,dListSeparatorIndex);
            String itemAndAttachment = documentIdentifier.substring(dListSeparatorIndex+2);
            String decodedItemPath = decodedListPath + itemAndAttachment;
            
            int cutoff = decodedListPath.lastIndexOf("/");
            String sitePath = decodedListPath.substring(0,cutoff);
            String list = decodedListPath.substring(cutoff+1);

            String encodedSitePath = encodePath(sitePath);

            int attachmentSeparatorIndex = itemAndAttachment.indexOf("//",1);
            if (attachmentSeparatorIndex == -1)
            {
              // == List item path! ==
              if (!checkIncludeListItem(decodedItemPath,spec))
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: List item '"+documentIdentifier+"' is no longer included - removing");
                activities.deleteDocument(documentIdentifier);
                continue;
              }

              // This file is included, so calculate a version string.  This will include metadata info, so get that first.
              MetadataInformation metadataInfo = getMetadataSpecification(decodedItemPath,spec);

              String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
              String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
              String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids");
              String[] listFields = activities.retrieveParentData(documentIdentifier, "fields");
              String[] displayURLs = activities.retrieveParentData(documentIdentifier, "displayURLs");
                
              String listID;
              if (listIDs.length >= 1)
                listID = listIDs[0];
              else
                listID = null;

              String displayURL;
              if (displayURLs.length >= 1)
                displayURL = displayURLs[0];
              else
                displayURL = null;

              if (listID == null)
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because list '"+decodedListPath+"' does not exist - removing");
                activities.deleteDocument(documentIdentifier);
                continue;
              }
              
              // Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
              String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,listFields);
                  
              // Sort access tokens so they are comparable in the version string
              java.util.Arrays.sort(accessTokens);
              java.util.Arrays.sort(denyTokens);

              // Next, get the actual timestamp field for the file.
              List<String> metadataDescription = new ArrayList<String>();
              metadataDescription.add("Modified");
              metadataDescription.add("Created");
              metadataDescription.add("ID");
              metadataDescription.add("GUID");
              // The document path includes the library, with no leading slash, and is decoded.
              String decodedItemPathWithoutSite = decodedItemPath.substring(cutoff+1);
              Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, listID, "/Lists/" + decodedItemPathWithoutSite, dspStsWorks );
              if (values == null) {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because of bad XML characters(?)");
                activities.deleteDocument(documentIdentifier);
                continue;
              }
              String modifiedDate = values.get("Modified");
              String createdDate = values.get("Created");
              String id = values.get("ID");
              String guid = values.get("GUID");
              if (modifiedDate == null)
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
                activities.deleteDocument(documentIdentifier);
                continue;
              }
              
              // Item has a modified date so we presume it exists.
                  
              Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
              Date createdDateValue = DateParser.parseISO8601Date(createdDate);
                    
              // Build version string
              String versionToken = modifiedDate;
                      
              // Revamped version string on 9/21/2013 to make parseability better
              
              StringBuilder sb = new StringBuilder();

              packList(sb,sortedMetadataFields,'+');
              packList(sb,accessTokens,'+');
              packList(sb,denyTokens,'+');
              packDate(sb,modifiedDateValue);
              packDate(sb,createdDateValue);
              pack(sb,id,'+');
              pack(sb,guid,'+');
              pack(sb,displayURL,'+');
              // The rest of this is unparseable
              sb.append(versionToken);
              sb.append(pathNameAttributeVersion);
              // Added 9/7/07
              sb.append("_").append(fileBaseUrl);
              //
              String versionString = sb.toString();
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
              
              // Before we index, we queue up any attachments

              // Now, do any queuing that is needed.
              if (attachmentsSupported)
              {
                String itemNumber = id;


                List<NameValue> attachmentNames = proxy.getAttachmentNames( sitePath, listID, itemNumber );
                // Now, queue up each attachment as a separate entry
                for (NameValue attachmentName : attachmentNames)
                {
                  // For attachments, we use the carry-down feature to get the data where we need it.  That's why
                  // we unpacked the version information early above.
                  
                  // No check for inclusion; if the list item is included, so is this
                  String[][] dataValues = new String[attachmentDataNames.length][];
                  if (createdDateValue == null)
                    dataValues[0] = new String[0];
                  else
                    dataValues[0] = new String[]{new Long(createdDateValue.getTime()).toString()};
                  if (modifiedDateValue == null)
                    dataValues[1] = new String[0];
                  else
                    dataValues[1] = new String[]{new Long(modifiedDateValue.getTime()).toString()};
                  if (accessTokens == null)
                    dataValues[2] = new String[0];
                  else
                    dataValues[2] = accessTokens;
                  if (denyTokens == null)
                    dataValues[3] = new String[0];
                  else
                    dataValues[3] = denyTokens;
                  dataValues[4] = new String[]{attachmentName.getPrettyName()};
                  dataValues[5] = new String[]{guid};

                  activities.addDocumentReference(documentIdentifier + "//" + attachmentName.getValue(),
                    documentIdentifier, null, attachmentDataNames, dataValues);
                  
                }
              }

              if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
                continue;
                            
              // Convert the modified document path to an unmodified one, plus a library path.
              String encodedItemPath = encodePath(decodedListPath.substring(0,cutoff) + "/Lists/" + decodedItemPath.substring(cutoff+1));
                
              // Generate the URL we are going to use
              String itemUrl = serverUrl + displayURL;  //fileBaseUrl + encodedItemPath;
                
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug( "SharePoint: Processing list item '"+documentIdentifier+"'; url: '" + itemUrl + "'" );

              // Fetch the metadata we will be indexing
              Map<String,String> metadataValues = null;
              if (sortedMetadataFields.length > 0)
              {
                metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), listID, "/Lists/" + decodedItemPath.substring(cutoff+1), dspStsWorks );
                if (metadataValues == null)
                {
                  // Item has vanished
                  if (Logging.connectors.isDebugEnabled())
                    Logging.connectors.debug("SharePoint: Item metadata fetch failure indicated that item is gone: '"+documentIdentifier+"' - removing");
                  activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","List item metadata is missing",null);
                  activities.noDocument(documentIdentifier,versionString);
                  continue;
                }
              }
                
              if (!activities.checkLengthIndexable(0L))
              {
                // Document too long (should never happen; length is 0)
                activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,activities.EXCLUDED_LENGTH,"List item excluded due to content length (0)",null);
                activities.noDocument( documentIdentifier, versionString );
                continue;
              }
                
              InputStream is = new ByteArrayInputStream(new byte[0]);
              try
              {
                RepositoryDocument data = new RepositoryDocument();
                data.setBinary( is, 0L );

                if (modifiedDateValue != null)
                  data.setModifiedDate(modifiedDateValue);
                if (createdDateValue != null)
                  data.setCreatedDate(createdDateValue);
                    
                setDataACLs(data,accessTokens,denyTokens);
                
                setPathAttribute(data,sDesc,documentIdentifier);

                if (metadataValues != null)
                {
                  Iterator<String> iter = metadataValues.keySet().iterator();
                  while (iter.hasNext())
                  {
                    String fieldName = iter.next();
                    String fieldData = metadataValues.get(fieldName);
                    data.addField(fieldName,fieldData);
                  }
                }
                data.addField("GUID",guid);
                try
                {
                  activities.ingestDocumentWithException( documentIdentifier, versionString, itemUrl , data );
                }
                catch (IOException e)
                {
                  handleIOException(e,"reading document");
                }
              }
              finally
              {
                try
                {
                  is.close();
                }
                catch (IOException e)
                {
                  handleIOException(e,"closing stream");
                }
              }

            }
            else
            {
              // == List item attachment path! ==
              if (!checkIncludeListItemAttachment(decodedItemPath,spec))
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: List item attachment '"+documentIdentifier+"' is no longer included - removing");
                activities.deleteDocument(documentIdentifier);
                continue;
              }
              
              // To save work, we retrieve most of what we need in version info from the parent.

              // Retrieve modified and created dates
              String[] modifiedDateSet = activities.retrieveParentData(documentIdentifier, "modifiedDate");
              String[] createdDateSet = activities.retrieveParentData(documentIdentifier, "createdDate");
              String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
              String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
              String[] urlSet = activities.retrieveParentData(documentIdentifier, "url");

              // Only one modifiedDate and createdDate can be used.  If there's more than one, just pick one - the item will be reindexed
              // anyhow.
              String modifiedDate;
              if (modifiedDateSet.length >= 1)
                modifiedDate = modifiedDateSet[0];
              else
                modifiedDate = null;
              String createdDate;
              if (createdDateSet.length >= 1)
                createdDate = createdDateSet[0];
              else
                createdDate = null;
              String url;
              if (urlSet.length >=1)
                url = urlSet[0];
              else
                url = null;

              // If we have no modified or created date, it means that the parent has gone away, so we go away too.
              if (modifiedDate == null || url == null)
              {
                // Can't look up list ID, which means the list is gone, so delete
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because modified date or attachment url not found");
                activities.deleteDocument(documentIdentifier);
                continue;
              }
              
              // Item has a modified date so we presume it exists.
                      
              Date modifiedDateValue;
              if (modifiedDate != null)
                modifiedDateValue = new Date(new Long(modifiedDate).longValue());
              else
                modifiedDateValue = null;
              Date createdDateValue;
              if (createdDate != null)
                createdDateValue = new Date(new Long(createdDate).longValue());
              else
                createdDateValue = null;
                      
              // Build version string
              String versionToken = modifiedDate;
                      
              StringBuilder sb = new StringBuilder();

              // Pack the URL to get the data from
              pack(sb,url,'+');
                  
              // Do the acls.  If we get this far, we are guaranteed to have them, but we need to sort.
              java.util.Arrays.sort(accessTokens);
              java.util.Arrays.sort(denyTokens);
                  
              packList(sb,accessTokens,'+');
              packList(sb,denyTokens,'+');
              packDate(sb,modifiedDateValue);
              packDate(sb,createdDateValue);

              // The rest of this is unparseable
              sb.append(versionToken);
              sb.append(pathNameAttributeVersion);
              sb.append("_").append(fileBaseUrl);
              //
              String versionString = sb.toString();
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);

              if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
                continue;
              
              // We need the list ID, which we've already fetched, so grab that from the parent data.
              String[] guids = activities.retrieveParentData(documentIdentifier, "guids");
              String guid;
              if (guids.length >= 1)
                guid = guids[0];
              else
                guid = null;
                
              if (guid == null)
              {
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Skipping attachment '"+documentIdentifier+"' because no parent guid found");
                activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOGUID","List item attachment GUID is missing",null);
                activities.noDocument(documentIdentifier,versionString);
                continue;
              }
              
              int lastIndex = url.lastIndexOf("/");
              guid = guid + ":" + url.substring(lastIndex+1);
                  
              // Fetch and index.  This also filters documents based on output connector restrictions.
              String fileUrl = serverUrl + encodePath(url);
              String fetchUrl = fileUrl;
              fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, fetchUrl,
                accessTokens, denyTokens, createdDateValue, modifiedDateValue, null, guid, sDesc);
            }
          }
        }
        else if (dLibSeparatorIndex != -1)
        {
          // === Library-style identifier ===
          if (dLibSeparatorIndex == documentIdentifier.length() - 2)
          {
            // Library path!
            if (!checkIncludeLibrary(documentIdentifier.substring(0,documentIdentifier.length()-2),spec))
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Library specification no longer includes library '"+documentIdentifier+"' - removing");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
              
            // This is the path for the library: No versioning
            String versionString = "";
            // Chained document parents are always rescanned
            String siteLibPath = documentIdentifier.substring(0,documentIdentifier.length()-2);
            int libCutoff = siteLibPath.lastIndexOf( "/" );
            String site = siteLibPath.substring(0,libCutoff);
            String libName = siteLibPath.substring( libCutoff + 1 );

            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug( "SharePoint: Document identifier is a library: '" + siteLibPath + "'" );

            String libID = proxy.getDocLibID( encodePath(site), site, libName );
            if (libID == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: GUID lookup failed for library '"+siteLibPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            String encodedSitePath = encodePath(site);
              
            // Get the lib's fields
            Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, libID );
            if (fieldNames == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Field list lookup failed for library '"+siteLibPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // See CONNECTORS-1324.  We want internal field names only,
            // which are the keys of the map.
            String[] fields = new String[fieldNames.size()];
            int j = 0;
            for (String field : fieldNames.keySet())
            {
              fields[j++] = field;
            }
                  
            String[] accessTokens;
            String[] denyTokens;
                  
            if (forcedAcls == null)
            {
              // Security is off
              accessTokens = new String[0];
              denyTokens = new String[0];
            }
            else if (forcedAcls.length != 0)
            {
              // Forced security
              accessTokens = forcedAcls;
              denyTokens = new String[0];
            }
            else
            {
              // Security enabled, native security
              accessTokens = proxy.getACLs( encodedSitePath, libID, activeDirectoryAuthority );
              denyTokens = new String[]{defaultAuthorityDenyToken};
            }

            if (accessTokens == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Access token lookup failed for library '"+siteLibPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            FileStream fs = new FileStream( activities, encodedServerLocation, site, siteLibPath, spec,
              documentIdentifier, accessTokens, denyTokens, libID, fields );
            
            boolean success = proxy.getChildren( fs, encodedSitePath , libID, dspStsWorks );
            
            if (!success)
            {
              // Site/library no longer exists, so delete entry
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: No list found for library '"+siteLibPath+"' - deleting");
              activities.deleteDocument(documentIdentifier);
              continue;
            }

            activities.noDocument(documentIdentifier,versionString);
          }
          else
          {
            // == Document path ==
            // Convert the modified document path to an unmodified one, plus a library path.
            String decodedLibPath = documentIdentifier.substring(0,dLibSeparatorIndex);
            String decodedDocumentPath = decodedLibPath + documentIdentifier.substring(dLibSeparatorIndex+1);
            if (!checkIncludeFile(decodedDocumentPath,spec))
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' is no longer included - removing");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // This file is included, so calculate a version string.  This will include metadata info, so get that first.
            MetadataInformation metadataInfo = getMetadataSpecification(decodedDocumentPath,spec);
            
            int lastIndex = decodedLibPath.lastIndexOf("/");
            String sitePath = decodedLibPath.substring(0,lastIndex);
            String lib = decodedLibPath.substring(lastIndex+1);

            // Retrieve the carry-down data we will be using.
            // Note well: for sharepoint versions that include document/folder acls, these access tokens will be ignored,
            // but they will still be carried down nonetheless, in case someone switches versions on us.
            String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
            String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
            String[] libIDs = activities.retrieveParentData(documentIdentifier, "guids");
            String[] libFields = activities.retrieveParentData(documentIdentifier, "fields");

            String libID;
            if (libIDs.length >= 1)
              libID = libIDs[0];
            else
              libID = null;

            if (libID == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because library '"+decodedLibPath+"' does not exist - removing");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            String encodedSitePath = encodePath(sitePath);
            // Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
            String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,libFields);
                
            // Sort access tokens
            java.util.Arrays.sort(accessTokens);
            java.util.Arrays.sort(denyTokens);

            // Next, get the actual timestamp field for the file.
            List<String> metadataDescription = new ArrayList<String>();
            metadataDescription.add("Last_x0020_Modified");
            metadataDescription.add("Modified");
            metadataDescription.add("Created");
            metadataDescription.add("GUID");
            // The document path includes the library, with no leading slash, and is decoded.
            int cutoff = decodedLibPath.lastIndexOf("/");
            String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff);
            Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, libID, decodedDocumentPathWithoutSite, dspStsWorks );
            if (values == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has bad characters(?)");
              activities.deleteDocument(documentIdentifier);
              continue;
            }

            String modifiedDate = values.get("Modified");
            String createdDate = values.get("Created");
            String guid = values.get("GUID");
            String modifyDate = values.get("Last_x0020_Modified");

            if (modifyDate == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // Item has a modified date, so we presume it exists
            Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
            Date createdDateValue = DateParser.parseISO8601Date(createdDate);

            // Build version string
            String versionToken = modifyDate;

            if (supportsItemSecurity)
            {
              // Do the acls.
              if (forcedAcls == null)
              {
                // Security is off
                accessTokens = new String[0];
                denyTokens = new String[0];
              }
              else if (forcedAcls.length > 0)
              {
                // Security on, forced acls
                accessTokens = forcedAcls;
                denyTokens = new String[0];
              }
              else
              {
                // Security on, is native
                accessTokens = proxy.getDocumentACLs( encodedSitePath, encodePath(decodedDocumentPath), activeDirectoryAuthority );
                denyTokens = new String[]{defaultAuthorityDenyToken};
              }
            }
                  
            if (accessTokens == null)
            {
              if (Logging.connectors.isDebugEnabled())
                Logging.connectors.debug("SharePoint: Couldn't get access tokens for item '"+decodedDocumentPath+"'; removing document '"+documentIdentifier+"'");
              activities.deleteDocument(documentIdentifier);
              continue;
            }
            
            // Revamped version string on 9/21/2013 to make parseability better

            StringBuilder sb = new StringBuilder();
            packList(sb,sortedMetadataFields,'+');
            packList(sb,accessTokens,'+');
            packList(sb,denyTokens,'+');
            packDate(sb,modifiedDateValue);
            packDate(sb,createdDateValue);
            pack(sb,guid,'+');
            // The rest of this is unparseable
            sb.append(versionToken);
            sb.append(pathNameAttributeVersion);
            // Added 9/7/07
            sb.append("_").append(fileBaseUrl);
            //
            String versionString = sb.toString();
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
            
            if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
              continue;
            
            // Convert the modified document path to an unmodified one, plus a library path.
            String encodedDocumentPath = encodePath(decodedDocumentPath);

            // Parse what we need out of version string.

            // Generate the URL we are going to use
            String fileUrl = fileBaseUrl + encodedDocumentPath;
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug( "SharePoint: Processing file '"+documentIdentifier+"'; url: '" + fileUrl + "'" );

            // First, fetch the metadata we plan to index.
            Map<String,String> metadataValues = null;
            if (sortedMetadataFields.length > 0)
            {
              metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), libID, decodedDocumentPath.substring(cutoff), dspStsWorks );
              if (metadataValues == null)
              {
                // Document has vanished
                if (Logging.connectors.isDebugEnabled())
                  Logging.connectors.debug("SharePoint: Document metadata fetch failure indicated that document is gone: '"+documentIdentifier+"' - removing");
                activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","Document metadata is missing",null);
                activities.noDocument(documentIdentifier,versionString);
                continue;
              }
            }

            // Fetch and index.  This also filters documents based on output connector restrictions.
            fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, serverUrl + encodedServerLocation + encodedDocumentPath,
              accessTokens, denyTokens, createdDateValue, modifiedDateValue, metadataValues, guid, sDesc);
          }
        }
        else
        {
          // === Site-style identifier ===
          String sitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
          if (sitePath.length() == 0)
            sitePath = "/";
          if (!checkIncludeSite(sitePath,spec))
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("SharePoint: Site specification no longer includes site '"+documentIdentifier+"' - removing");
            activities.deleteDocument(documentIdentifier);
            continue;
          }
          
          String versionString = "";
          activities.noDocument(documentIdentifier,versionString);

          // Strip off the trailing "/" to get the site name.
          String decodedSitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);

          if (Logging.connectors.isDebugEnabled())
            Logging.connectors.debug( "SharePoint: Document identifier is a site: '" + decodedSitePath + "'" );

          // Look at subsites
          List<NameValue> subsites = proxy.getSites( encodePath(decodedSitePath) );
          if (subsites != null)
          {
            for (NameValue subSiteName : subsites)
            {
              String newPath = decodedSitePath + "/" + subSiteName.getValue();

              String encodedNewPath = encodePath(newPath);
              if ( checkIncludeSite(newPath,spec) )
                activities.addDocumentReference(newPath + "/");
            }
          }
          else
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("SharePoint: No permissions to access subsites of '"+decodedSitePath+"' - skipping");
          }

          // Look at libraries
          List<NameValue> libraries = proxy.getDocumentLibraries( encodePath(decodedSitePath), decodedSitePath );
          if (libraries != null)
          {
            for (NameValue library : libraries)
            {
              String newPath = decodedSitePath + "/" + library.getValue();

              if (checkIncludeLibrary(newPath,spec))
                activities.addDocumentReference(newPath + "//");

            }
          }
          else
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("SharePoint: No permissions to access libraries of '"+decodedSitePath+"' - skipping");
          }

          // Look at lists
          List<NameValue> lists = proxy.getLists( encodePath(decodedSitePath), decodedSitePath );
          if (lists != null)
          {
            for (NameValue list : lists)
            {
              String newPath = decodedSitePath + "/" + list.getValue();

              if (checkIncludeList(newPath,spec))
                activities.addDocumentReference(newPath + "///");

            }
          }
          else
          {
            if (Logging.connectors.isDebugEnabled())
              Logging.connectors.debug("SharePoint: No permissions to access lists of '"+decodedSitePath+"' - skipping");
          }

        }
      }
      else
        throw new ManifoldCFException("Invalid document identifier discovered: '"+documentIdentifier+"'");
    }
  }