public void processDocuments()

in connectors/googledrive/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/googledrive/GoogleDriveRepositoryConnector.java [973:1308]


  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
    throws ManifoldCFException, ServiceInterruption {
      
    // Forced acls
    String[] acls = getAcls(spec);
    // Sort it,
    java.util.Arrays.sort(acls);

    for (String documentIdentifier : documentIdentifiers) {
      File googleFile = getObject(documentIdentifier);
      
      if (Logging.connectors.isDebugEnabled()) {
        Logging.connectors.debug("GOOGLEDRIVE: Processing document identifier '"
            + documentIdentifier + "'");
        Logging.connectors.debug("GOOGLEDRIVE: have this file:\t" + googleFile.getName());
      }
      
      String versionString;
      
      if (googleFile == null || (googleFile.containsKey("explicitlyTrashed") && googleFile.getExplicitlyTrashed()) || 
          googleFile.getMimeType().equals("application/vnd.google-apps.shortcut")) {
        //its deleted, move on
        activities.deleteDocument(documentIdentifier);
        continue;
      }

      if (!isDir(googleFile)) {
        String rev = googleFile.getModifiedTime().toStringRfc3339();
        if (StringUtils.isNotEmpty(rev)) {
          StringBuilder sb = new StringBuilder();

          // Acls
          packList(sb,acls,'+');
          if (acls.length > 0) {
            sb.append('+');
            pack(sb,defaultAuthorityDenyToken,'+');
          }
          else
            sb.append('-');

          sb.append(rev);
          versionString = sb.toString();
        } else {
          //a google document that doesn't contain versioning information will NEVER be processed.
          // I don't know what this means, and whether it can ever occur.
          activities.deleteDocument(documentIdentifier);
          continue;
        }
      } else {
        //a google folder will always be processed
        versionString = StringUtils.EMPTY;
      }

      if (versionString.length() == 0 || activities.checkDocumentNeedsReindexing(documentIdentifier,versionString)) {
        long startTime = System.currentTimeMillis();
        String errorCode = null;
        String errorDesc = StringUtils.EMPTY;
        Long fileSize = null;
        boolean doLog = false;
        String nodeId = documentIdentifier;
        String version = versionString;

        try {
          if (Logging.connectors.isDebugEnabled()) {
            Logging.connectors.debug("GOOGLEDRIVE: Processing document identifier '"
                + nodeId + "'");
            Logging.connectors.debug("GOOGLEDRIVE: have this file:\t" + googleFile.getName());
          }

          if ("application/vnd.google-apps.folder".equals(googleFile.getMimeType())) {
            //if directory add its children

            if (Logging.connectors.isDebugEnabled()) {
              Logging.connectors.debug("GOOGLEDRIVE: its a directory");
            }

            // adding all the children + subdirs for a folder

            getSession();
            GetChildrenThread t = new GetChildrenThread(nodeId);
            try {
              t.start();
              boolean wasInterrupted = false;
              try {
                XThreadStringBuffer childBuffer = t.getBuffer();
                // Pick up the paths, and add them to the activities, before we join with the child thread.
                while (true) {
                  // The only kind of exceptions this can throw are going to shut the process down.
                  String child = childBuffer.fetch();
                  if (child ==  null)
                    break;
                  // Add the pageID to the queue
                  activities.addDocumentReference(child, nodeId, RELATIONSHIP_CHILD);
                }
              } catch (InterruptedException e) {
                wasInterrupted = true;
                throw e;
              } catch (ManifoldCFException e) {
                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                  wasInterrupted = true;
                throw e;
              } finally {
                if (!wasInterrupted)
                  t.finishUp();
              }
            } catch (InterruptedException e) {
              t.interrupt();
              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                ManifoldCFException.INTERRUPTED);
            } catch (java.net.SocketTimeoutException e) {
              Logging.connectors.warn("GOOGLEDRIVE: Socket timeout adding child documents: " + e.getMessage(), e);
              handleIOException(e);
            } catch (InterruptedIOException e) {
              t.interrupt();
              throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                ManifoldCFException.INTERRUPTED);
            } catch (IOException e) {
              Logging.connectors.warn("GOOGLEDRIVE: Error adding child documents: " + e.getMessage(), e);
              handleIOException(e);
            }

          } else {
            // its a file
            doLog = true;

            if (Logging.connectors.isDebugEnabled()) {
              Logging.connectors.debug("GOOGLEDRIVE: its a file");
            }

            // Get the file length
            Long fileLengthLong = googleFile.getSize() != null ? googleFile.getSize() : 0L;
            if (fileLengthLong != null) {

              // Now do standard stuff
              long fileLength = fileLengthLong.longValue();
              String mimeType = googleFile.getMimeType();
              DateTime createdDateObject = googleFile.getCreatedTime();
              DateTime modifiedDateObject = googleFile.getModifiedTime();
              String extension = googleFile.getFileExtension();
              String title = cleanupFileFolderName(googleFile.getName());
              Date createdDate = (createdDateObject==null)?null:new Date(createdDateObject.getValue());
              Date modifiedDate = (modifiedDateObject==null)?null:new Date(modifiedDateObject.getValue());
              // We always direct to the PDF except for Spreadsheets
              String documentURI = null;
              // if (!mimeType.equals("application/vnd.google-apps.spreadsheet")) {
              // documentURI = getUrl(googleFile, "application/pdf");
              // } else {
              // documentURI = getUrl(googleFile,
              // "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
              // }

              switch (mimeType) {
                case "application/vnd.google-apps.spreadsheet":
                  documentURI = getUrl(googleFile, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
                  break;

                case "application/vnd.google-apps.document":
                  documentURI = getUrl(googleFile, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
                  break;

                case "application/vnd.google-apps.presentation":
                  documentURI = getUrl(googleFile, "application/vnd.openxmlformats-officedocument.presentationml.presentation");
                  break;

                default:
                  documentURI = getUrl(googleFile, "application/pdf");
                  break;
              }

              // Google native format documents may exist, but have 0 byte in size.
              // In cases like this, there is no way to export it, and because of that, it is going to be ignored
              if (documentURI == null) {
                  errorCode = "NOLENGTH";
                  errorDesc = "Document "+nodeId+" had no length; skipping";
                  continue;
              }

              String fullContentPath = getDocumentContentPath(googleFile, documentURI);
              
              // Append the new parameters in the query string
              if (StringUtils.contains(documentURI, '?')) {
                documentURI = documentURI + "&" + CONTENT_PATH_PARAM + "=" + fullContentPath;
              } else {
                documentURI = documentURI + "?" + CONTENT_PATH_PARAM + "=" + fullContentPath;
              }

              if (!activities.checkLengthIndexable(fileLength)) {
                errorCode = activities.EXCLUDED_LENGTH;
                errorDesc = "Excluding document because of file length ('"+fileLength+"')";
                activities.noDocument(nodeId,version);
                continue;
              }
              
              if (!activities.checkURLIndexable(documentURI))
              {
                errorCode = activities.EXCLUDED_URL;
                errorDesc = "Excluding document because of URL ('"+documentURI+"')";
                activities.noDocument(nodeId,version);
                continue;
              }
              
              if (!activities.checkMimeTypeIndexable(mimeType))
              {
                errorCode = activities.EXCLUDED_MIMETYPE;
                errorDesc = "Excluding document because of mime type ("+mimeType+")";
                activities.noDocument(nodeId,version);
                continue;
              }
              
              if (!activities.checkDateIndexable(modifiedDate))
              {
                errorCode = activities.EXCLUDED_DATE;
                errorDesc = "Excluding document because of date ("+modifiedDate+")";
                activities.noDocument(nodeId,version);
                continue;
              }
              
              RepositoryDocument rd = new RepositoryDocument();

              if (acls != null) {
                rd.setSecurityACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,acls);
                if (acls.length > 0) {
                  String[] denyAclArray = new String[]{defaultAuthorityDenyToken};
                  rd.setSecurityDenyACL(RepositoryDocument.SECURITY_TYPE_DOCUMENT,denyAclArray);
                }
              }
              
              if (mimeType != null)
                rd.setMimeType(getFixedMimeType(mimeType));
              if (createdDate != null)
                rd.setCreatedDate(createdDate);
              if (modifiedDate != null)
                rd.setModifiedDate(modifiedDate);
              if (extension != null)
              {
                if (title == null)
                  title = "";

                if (StringUtils.endsWithIgnoreCase(title, "." + extension)) {
                  rd.setFileName(title);
                } else {
                  String name = title + "." + extension;
                  
                  if (StringUtils.endsWithIgnoreCase(name, ".")) {
                    name = StringUtils.chomp(name, ".");
                  }
                  
                  rd.setFileName(name);
                }
              } else {
                if (title == null)
                  title = "";
              
                String name = title + "." + getExtensionByMimeType(mimeType);
                
                if (StringUtils.endsWithIgnoreCase(name, ".")) {
                    name = StringUtils.chomp(name, ".");
                }
                rd.setFileName(name);
              }

              // Get general document metadata
              for (Entry<String, Object> entry : googleFile.entrySet()) {
                rd.addField(entry.getKey(), entry.getValue().toString());
              }

              // Fire up the document reading thread
              DocumentReadingThread t = new DocumentReadingThread(documentURI);
              try {
                t.start();
                boolean wasInterrupted = false;
                try {
                  InputStream is = t.getSafeInputStream();
                  try {
                    // Can only index while background thread is running!
                	  
                	//filter the fields selected in the query
                	List<String> sourcePath = new ArrayList<>();
                	sourcePath.add(fullContentPath);
                	rd.setSourcePath(sourcePath);
                    //ingestion
                	  
                    rd.setBinary(is, fileLength);
                    activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
                  } finally {
                    is.close();
                  }
                } catch (ManifoldCFException e) {
                  if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                    wasInterrupted = true;
                  throw e;
                } catch (java.net.SocketTimeoutException e) {
                  throw e;
                } catch (InterruptedIOException e) {
                  wasInterrupted = true;
                  throw e;
                } finally {
                  if (!wasInterrupted)
                    t.finishUp();
                }

                // No errors.  Record the fact that we made it.
                fileSize = new Long(fileLength);
                errorCode = "OK";
              } catch (InterruptedException e) {
                t.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                  ManifoldCFException.INTERRUPTED);
              } catch (java.net.SocketTimeoutException e) {
                Logging.connectors.warn("GOOGLEDRIVE: Socket timeout reading document: " + e.getMessage(), e);
                handleIOException(e);
              } catch (InterruptedIOException e) {
                t.interrupt();
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
                  ManifoldCFException.INTERRUPTED);
              } catch (IOException e) {
                errorCode = "IOEXCEPTION";
                errorDesc = e.getMessage();
                Logging.connectors.warn("GOOGLEDRIVE: Error reading document: " + e.getMessage(), e);
                handleIOException(e);
              }
            } else {
              errorCode = "NOLENGTH";
              errorDesc = "Document "+nodeId+" had no length; skipping";
            }
          }
        } finally {
          if (doLog && errorCode != null)
            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
              fileSize, nodeId, errorCode, errorDesc, null);
        }
      }
    }

  }