in connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java [749:1631]
public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
throws ManifoldCFException, ServiceInterruption
{
// Get the forced acls. (We need this only for the case where documents have their own acls)
String[] forcedAcls = getAcls(spec);
SystemMetadataDescription sDesc = new SystemMetadataDescription(spec);
// Look at the metadata attributes.
// So that the version strings are comparable, we will put them in an array first, and sort them.
String pathAttributeName = null;
MatchMap matchMap = new MatchMap();
int i = 0;
while (i < spec.getChildCount())
{
SpecificationNode n = spec.getChild(i++);
if (n.getType().equals("pathnameattribute"))
pathAttributeName = n.getAttributeValue("value");
else if (n.getType().equals("pathmap"))
{
// Path mapping info also needs to be looked at, because it affects what is
// ingested.
String pathMatch = n.getAttributeValue("match");
String pathReplace = n.getAttributeValue("replace");
matchMap.appendMatchPair(pathMatch,pathReplace);
}
}
// Calculate the part of the version string that comes from path name and mapping.
// This starts with = since ; is used by another optional component (the forced acls)
StringBuilder pathNameAttributeVersion = new StringBuilder();
if (pathAttributeName != null)
pathNameAttributeVersion.append("=").append(pathAttributeName).append(":").append(matchMap);
for (String documentIdentifier : documentIdentifiers)
{
// Check if we should abort
activities.checkJobStillActive();
getSession();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Getting version of '" + documentIdentifier + "'");
if ( documentIdentifier.startsWith("D") || documentIdentifier.startsWith("S") )
{
// Old-style document identifier. We don't recognize these anymore, so signal deletion.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Removing old-style document identifier '"+documentIdentifier+"'");
activities.deleteDocument(documentIdentifier);
continue;
}
else if (documentIdentifier.startsWith("/"))
{
// New-style document identifier. A double-slash marks the separation between the library and folder/file levels.
// A triple-slash marks the separation between a list name and list row ID.
int dListSeparatorIndex = documentIdentifier.indexOf("///");
int dLibSeparatorIndex = documentIdentifier.indexOf("//");
if (dListSeparatorIndex != -1)
{
// === List-style identifier ===
if (dListSeparatorIndex == documentIdentifier.length() - 3)
{
// == List path! ==
if (!checkIncludeList(documentIdentifier.substring(0,documentIdentifier.length()-3),spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List specification no longer includes list '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// Version string for a list
String versionString = "";
// Chained connectors always scan parent nodes, so they don't bother setting a version
String siteListPath = documentIdentifier.substring(0,documentIdentifier.length()-3);
int listCutoff = siteListPath.lastIndexOf( "/" );
String site = siteListPath.substring(0,listCutoff);
String listName = siteListPath.substring( listCutoff + 1 );
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a list: '" + siteListPath + "'" );
String listID = proxy.getListID( encodePath(site), site, listName );
if (listID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: GUID lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(site);
// Get the list's fields
Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, listID );
if (fieldNames == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Field list lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
// Note well: There's been a lot of back-and forth about this code.
// See CONNECTORS-1324.
// The fieldNames map returned by proxy.getFieldList() has the internal name as a key, and the display name as a value.
// Since we want the complete list of fields here, by *internal* name, we iterate over the keySet(), not the values.
String[] fields = new String[fieldNames.size()];
int j = 0;
for (String field : fieldNames.keySet())
{
fields[j++] = field;
}
String[] accessTokens;
String[] denyTokens;
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length != 0)
{
// Forced security
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security enabled, native security
accessTokens = proxy.getACLs( encodedSitePath, listID, activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Access token lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
ListItemStream fs = new ListItemStream( activities, encodedServerLocation, site, siteListPath, spec,
documentIdentifier, accessTokens, denyTokens, listID, fields );
boolean success = proxy.getChildren( fs, encodedSitePath , listID, dspStsWorks );
if (!success)
{
// Site/list no longer exists, so delete entry
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No list found for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
activities.noDocument(documentIdentifier,versionString);
}
else
{
// == List item or attachment path! ==
// Convert the modified document path to an unmodified one, plus a library path.
String decodedListPath = documentIdentifier.substring(0,dListSeparatorIndex);
String itemAndAttachment = documentIdentifier.substring(dListSeparatorIndex+2);
String decodedItemPath = decodedListPath + itemAndAttachment;
int cutoff = decodedListPath.lastIndexOf("/");
String sitePath = decodedListPath.substring(0,cutoff);
String list = decodedListPath.substring(cutoff+1);
String encodedSitePath = encodePath(sitePath);
int attachmentSeparatorIndex = itemAndAttachment.indexOf("//",1);
if (attachmentSeparatorIndex == -1)
{
// == List item path! ==
if (!checkIncludeListItem(decodedItemPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List item '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This file is included, so calculate a version string. This will include metadata info, so get that first.
MetadataInformation metadataInfo = getMetadataSpecification(decodedItemPath,spec);
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids");
String[] listFields = activities.retrieveParentData(documentIdentifier, "fields");
String[] displayURLs = activities.retrieveParentData(documentIdentifier, "displayURLs");
String listID;
if (listIDs.length >= 1)
listID = listIDs[0];
else
listID = null;
String displayURL;
if (displayURLs.length >= 1)
displayURL = displayURLs[0];
else
displayURL = null;
if (listID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because list '"+decodedListPath+"' does not exist - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,listFields);
// Sort access tokens so they are comparable in the version string
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
// Next, get the actual timestamp field for the file.
List<String> metadataDescription = new ArrayList<String>();
metadataDescription.add("Modified");
metadataDescription.add("Created");
metadataDescription.add("ID");
metadataDescription.add("GUID");
// The document path includes the library, with no leading slash, and is decoded.
String decodedItemPathWithoutSite = decodedItemPath.substring(cutoff+1);
Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, listID, "/Lists/" + decodedItemPathWithoutSite, dspStsWorks );
if (values == null) {
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because of bad XML characters(?)");
activities.deleteDocument(documentIdentifier);
continue;
}
String modifiedDate = values.get("Modified");
String createdDate = values.get("Created");
String id = values.get("ID");
String guid = values.get("GUID");
if (modifiedDate == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date so we presume it exists.
Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
Date createdDateValue = DateParser.parseISO8601Date(createdDate);
// Build version string
String versionToken = modifiedDate;
// Revamped version string on 9/21/2013 to make parseability better
StringBuilder sb = new StringBuilder();
packList(sb,sortedMetadataFields,'+');
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
pack(sb,id,'+');
pack(sb,guid,'+');
pack(sb,displayURL,'+');
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
// Added 9/7/07
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
// Before we index, we queue up any attachments
// Now, do any queuing that is needed.
if (attachmentsSupported)
{
String itemNumber = id;
List<NameValue> attachmentNames = proxy.getAttachmentNames( sitePath, listID, itemNumber );
// Now, queue up each attachment as a separate entry
for (NameValue attachmentName : attachmentNames)
{
// For attachments, we use the carry-down feature to get the data where we need it. That's why
// we unpacked the version information early above.
// No check for inclusion; if the list item is included, so is this
String[][] dataValues = new String[attachmentDataNames.length][];
if (createdDateValue == null)
dataValues[0] = new String[0];
else
dataValues[0] = new String[]{new Long(createdDateValue.getTime()).toString()};
if (modifiedDateValue == null)
dataValues[1] = new String[0];
else
dataValues[1] = new String[]{new Long(modifiedDateValue.getTime()).toString()};
if (accessTokens == null)
dataValues[2] = new String[0];
else
dataValues[2] = accessTokens;
if (denyTokens == null)
dataValues[3] = new String[0];
else
dataValues[3] = denyTokens;
dataValues[4] = new String[]{attachmentName.getPrettyName()};
dataValues[5] = new String[]{guid};
activities.addDocumentReference(documentIdentifier + "//" + attachmentName.getValue(),
documentIdentifier, null, attachmentDataNames, dataValues);
}
}
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// Convert the modified document path to an unmodified one, plus a library path.
String encodedItemPath = encodePath(decodedListPath.substring(0,cutoff) + "/Lists/" + decodedItemPath.substring(cutoff+1));
// Generate the URL we are going to use
String itemUrl = serverUrl + displayURL; //fileBaseUrl + encodedItemPath;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Processing list item '"+documentIdentifier+"'; url: '" + itemUrl + "'" );
// Fetch the metadata we will be indexing
Map<String,String> metadataValues = null;
if (sortedMetadataFields.length > 0)
{
metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), listID, "/Lists/" + decodedItemPath.substring(cutoff+1), dspStsWorks );
if (metadataValues == null)
{
// Item has vanished
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Item metadata fetch failure indicated that item is gone: '"+documentIdentifier+"' - removing");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","List item metadata is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
}
if (!activities.checkLengthIndexable(0L))
{
// Document too long (should never happen; length is 0)
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,activities.EXCLUDED_LENGTH,"List item excluded due to content length (0)",null);
activities.noDocument( documentIdentifier, versionString );
continue;
}
InputStream is = new ByteArrayInputStream(new byte[0]);
try
{
RepositoryDocument data = new RepositoryDocument();
data.setBinary( is, 0L );
if (modifiedDateValue != null)
data.setModifiedDate(modifiedDateValue);
if (createdDateValue != null)
data.setCreatedDate(createdDateValue);
setDataACLs(data,accessTokens,denyTokens);
setPathAttribute(data,sDesc,documentIdentifier);
if (metadataValues != null)
{
Iterator<String> iter = metadataValues.keySet().iterator();
while (iter.hasNext())
{
String fieldName = iter.next();
String fieldData = metadataValues.get(fieldName);
data.addField(fieldName,fieldData);
}
}
data.addField("GUID",guid);
try
{
activities.ingestDocumentWithException( documentIdentifier, versionString, itemUrl , data );
}
catch (IOException e)
{
handleIOException(e,"reading document");
}
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
handleIOException(e,"closing stream");
}
}
}
else
{
// == List item attachment path! ==
if (!checkIncludeListItemAttachment(decodedItemPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List item attachment '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// To save work, we retrieve most of what we need in version info from the parent.
// Retrieve modified and created dates
String[] modifiedDateSet = activities.retrieveParentData(documentIdentifier, "modifiedDate");
String[] createdDateSet = activities.retrieveParentData(documentIdentifier, "createdDate");
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] urlSet = activities.retrieveParentData(documentIdentifier, "url");
// Only one modifiedDate and createdDate can be used. If there's more than one, just pick one - the item will be reindexed
// anyhow.
String modifiedDate;
if (modifiedDateSet.length >= 1)
modifiedDate = modifiedDateSet[0];
else
modifiedDate = null;
String createdDate;
if (createdDateSet.length >= 1)
createdDate = createdDateSet[0];
else
createdDate = null;
String url;
if (urlSet.length >=1)
url = urlSet[0];
else
url = null;
// If we have no modified or created date, it means that the parent has gone away, so we go away too.
if (modifiedDate == null || url == null)
{
// Can't look up list ID, which means the list is gone, so delete
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because modified date or attachment url not found");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date so we presume it exists.
Date modifiedDateValue;
if (modifiedDate != null)
modifiedDateValue = new Date(new Long(modifiedDate).longValue());
else
modifiedDateValue = null;
Date createdDateValue;
if (createdDate != null)
createdDateValue = new Date(new Long(createdDate).longValue());
else
createdDateValue = null;
// Build version string
String versionToken = modifiedDate;
StringBuilder sb = new StringBuilder();
// Pack the URL to get the data from
pack(sb,url,'+');
// Do the acls. If we get this far, we are guaranteed to have them, but we need to sort.
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// We need the list ID, which we've already fetched, so grab that from the parent data.
String[] guids = activities.retrieveParentData(documentIdentifier, "guids");
String guid;
if (guids.length >= 1)
guid = guids[0];
else
guid = null;
if (guid == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Skipping attachment '"+documentIdentifier+"' because no parent guid found");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOGUID","List item attachment GUID is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
int lastIndex = url.lastIndexOf("/");
guid = guid + ":" + url.substring(lastIndex+1);
// Fetch and index. This also filters documents based on output connector restrictions.
String fileUrl = serverUrl + encodePath(url);
String fetchUrl = fileUrl;
fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, fetchUrl,
accessTokens, denyTokens, createdDateValue, modifiedDateValue, null, guid, sDesc);
}
}
}
else if (dLibSeparatorIndex != -1)
{
// === Library-style identifier ===
if (dLibSeparatorIndex == documentIdentifier.length() - 2)
{
// Library path!
if (!checkIncludeLibrary(documentIdentifier.substring(0,documentIdentifier.length()-2),spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library specification no longer includes library '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This is the path for the library: No versioning
String versionString = "";
// Chained document parents are always rescanned
String siteLibPath = documentIdentifier.substring(0,documentIdentifier.length()-2);
int libCutoff = siteLibPath.lastIndexOf( "/" );
String site = siteLibPath.substring(0,libCutoff);
String libName = siteLibPath.substring( libCutoff + 1 );
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a library: '" + siteLibPath + "'" );
String libID = proxy.getDocLibID( encodePath(site), site, libName );
if (libID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: GUID lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(site);
// Get the lib's fields
Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, libID );
if (fieldNames == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Field list lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
// See CONNECTORS-1324. We want internal field names only,
// which are the keys of the map.
String[] fields = new String[fieldNames.size()];
int j = 0;
for (String field : fieldNames.keySet())
{
fields[j++] = field;
}
String[] accessTokens;
String[] denyTokens;
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length != 0)
{
// Forced security
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security enabled, native security
accessTokens = proxy.getACLs( encodedSitePath, libID, activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Access token lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
FileStream fs = new FileStream( activities, encodedServerLocation, site, siteLibPath, spec,
documentIdentifier, accessTokens, denyTokens, libID, fields );
boolean success = proxy.getChildren( fs, encodedSitePath , libID, dspStsWorks );
if (!success)
{
// Site/library no longer exists, so delete entry
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No list found for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
activities.noDocument(documentIdentifier,versionString);
}
else
{
// == Document path ==
// Convert the modified document path to an unmodified one, plus a library path.
String decodedLibPath = documentIdentifier.substring(0,dLibSeparatorIndex);
String decodedDocumentPath = decodedLibPath + documentIdentifier.substring(dLibSeparatorIndex+1);
if (!checkIncludeFile(decodedDocumentPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This file is included, so calculate a version string. This will include metadata info, so get that first.
MetadataInformation metadataInfo = getMetadataSpecification(decodedDocumentPath,spec);
int lastIndex = decodedLibPath.lastIndexOf("/");
String sitePath = decodedLibPath.substring(0,lastIndex);
String lib = decodedLibPath.substring(lastIndex+1);
// Retrieve the carry-down data we will be using.
// Note well: for sharepoint versions that include document/folder acls, these access tokens will be ignored,
// but they will still be carried down nonetheless, in case someone switches versions on us.
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] libIDs = activities.retrieveParentData(documentIdentifier, "guids");
String[] libFields = activities.retrieveParentData(documentIdentifier, "fields");
String libID;
if (libIDs.length >= 1)
libID = libIDs[0];
else
libID = null;
if (libID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because library '"+decodedLibPath+"' does not exist - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(sitePath);
// Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,libFields);
// Sort access tokens
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
// Next, get the actual timestamp field for the file.
List<String> metadataDescription = new ArrayList<String>();
metadataDescription.add("Last_x0020_Modified");
metadataDescription.add("Modified");
metadataDescription.add("Created");
metadataDescription.add("GUID");
// The document path includes the library, with no leading slash, and is decoded.
int cutoff = decodedLibPath.lastIndexOf("/");
String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff);
Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, libID, decodedDocumentPathWithoutSite, dspStsWorks );
if (values == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has bad characters(?)");
activities.deleteDocument(documentIdentifier);
continue;
}
String modifiedDate = values.get("Modified");
String createdDate = values.get("Created");
String guid = values.get("GUID");
String modifyDate = values.get("Last_x0020_Modified");
if (modifyDate == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date, so we presume it exists
Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
Date createdDateValue = DateParser.parseISO8601Date(createdDate);
// Build version string
String versionToken = modifyDate;
if (supportsItemSecurity)
{
// Do the acls.
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length > 0)
{
// Security on, forced acls
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security on, is native
accessTokens = proxy.getDocumentACLs( encodedSitePath, encodePath(decodedDocumentPath), activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Couldn't get access tokens for item '"+decodedDocumentPath+"'; removing document '"+documentIdentifier+"'");
activities.deleteDocument(documentIdentifier);
continue;
}
// Revamped version string on 9/21/2013 to make parseability better
StringBuilder sb = new StringBuilder();
packList(sb,sortedMetadataFields,'+');
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
pack(sb,guid,'+');
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
// Added 9/7/07
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// Convert the modified document path to an unmodified one, plus a library path.
String encodedDocumentPath = encodePath(decodedDocumentPath);
// Parse what we need out of version string.
// Generate the URL we are going to use
String fileUrl = fileBaseUrl + encodedDocumentPath;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Processing file '"+documentIdentifier+"'; url: '" + fileUrl + "'" );
// First, fetch the metadata we plan to index.
Map<String,String> metadataValues = null;
if (sortedMetadataFields.length > 0)
{
metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), libID, decodedDocumentPath.substring(cutoff), dspStsWorks );
if (metadataValues == null)
{
// Document has vanished
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document metadata fetch failure indicated that document is gone: '"+documentIdentifier+"' - removing");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","Document metadata is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
}
// Fetch and index. This also filters documents based on output connector restrictions.
fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, serverUrl + encodedServerLocation + encodedDocumentPath,
accessTokens, denyTokens, createdDateValue, modifiedDateValue, metadataValues, guid, sDesc);
}
}
else
{
// === Site-style identifier ===
String sitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
if (sitePath.length() == 0)
sitePath = "/";
if (!checkIncludeSite(sitePath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site specification no longer includes site '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
String versionString = "";
activities.noDocument(documentIdentifier,versionString);
// Strip off the trailing "/" to get the site name.
String decodedSitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a site: '" + decodedSitePath + "'" );
// Look at subsites
List<NameValue> subsites = proxy.getSites( encodePath(decodedSitePath) );
if (subsites != null)
{
for (NameValue subSiteName : subsites)
{
String newPath = decodedSitePath + "/" + subSiteName.getValue();
String encodedNewPath = encodePath(newPath);
if ( checkIncludeSite(newPath,spec) )
activities.addDocumentReference(newPath + "/");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access subsites of '"+decodedSitePath+"' - skipping");
}
// Look at libraries
List<NameValue> libraries = proxy.getDocumentLibraries( encodePath(decodedSitePath), decodedSitePath );
if (libraries != null)
{
for (NameValue library : libraries)
{
String newPath = decodedSitePath + "/" + library.getValue();
if (checkIncludeLibrary(newPath,spec))
activities.addDocumentReference(newPath + "//");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access libraries of '"+decodedSitePath+"' - skipping");
}
// Look at lists
List<NameValue> lists = proxy.getLists( encodePath(decodedSitePath), decodedSitePath );
if (lists != null)
{
for (NameValue list : lists)
{
String newPath = decodedSitePath + "/" + list.getValue();
if (checkIncludeList(newPath,spec))
activities.addDocumentReference(newPath + "///");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access lists of '"+decodedSitePath+"' - skipping");
}
}
}
else
throw new ManifoldCFException("Invalid document identifier discovered: '"+documentIdentifier+"'");
}
}