def fetch_drive_items()

in ees_sharepoint/sync_sharepoint.py [0:0]


    def fetch_drive_items(self, libraries, ids):
        """This method fetches items from all the lists in a collection and
        invokes the index permission method to get the document level permissions.
        If the fetching is not successful, it logs proper message.
        :param libraries: document lists
        :param ids: structure containing id's of all objects
        """
        responses = []
        #  here value is a list of url and title of the library
        self.logger.info("Fetching all the files for the library")
        if not libraries:
            self.logger.info(
                "No file was created in this interval: start time: %s and end time: %s"
                % (self.start_time, self.end_time)
            )
        else:
            schema_drive = self.get_schema_fields(DRIVE_ITEMS)
            for lib_content, value in libraries.items():
                if parse(self.start_time) > parse(value[2]):
                    continue
                if not ids["drive_items"].get(value[0]):
                    ids["drive_items"].update({value[0]: {}})
                rel_url = f"{value[0]}/_api/web/lists(guid'{lib_content}')/items?$select=Modified,Id,GUID,File,Folder&$expand=File,Folder"
                self.logger.info(
                    "Fetching the items for libraries: %s from url: %s"
                    % (value[1], rel_url)
                )
                query = self.sharepoint_client.get_query(
                    self.start_time, self.end_time, DRIVE_ITEMS
                )
                response = self.sharepoint_client.get(rel_url, query, DRIVE_ITEMS)
                response_data = get_results(self.logger, response, DRIVE_ITEMS)
                if not response_data:
                    self.logger.info(
                        "No item was created for the library %s in this interval: start time: %s and end time: %s"
                        % (value[1], self.start_time, self.end_time)
                    )
                    continue
                self.logger.info(
                    "Successfully fetched and parsed %s drive item response for library: %s from SharePoint"
                    % (len(response_data), value[1])
                )
                document = []
                if not ids["drive_items"][value[0]].get(lib_content):
                    ids["drive_items"][value[0]].update({lib_content: []})
                for i, _ in enumerate(response_data):
                    if response_data[i]["File"].get("TimeLastModified"):
                        obj_type = "File"
                        doc = {"type": "file"}
                        file_relative_url = response_data[i]["File"][
                            "ServerRelativeUrl"
                        ]
                        url_s = f"{value[0]}/_api/web/GetFileByServerRelativeUrl('{encode(file_relative_url)}')/$value"
                        response = self.sharepoint_client.get(
                            url_s, query="", param_name="attachment"
                        )
                        doc["body"] = {}
                        if response and response.ok:
                            try:
                                doc["body"] = extract(response.content)
                            except TikaException as exception:
                                self.logger.error(
                                    "Error while extracting the contents from the file at %s, Error %s"
                                    % (response_data[i].get("Url"), exception)
                                )
                    else:
                        obj_type = "Folder"
                        doc = {"type": "folder"}
                    for field, response_field in schema_drive.items():
                        doc[field] = response_data[i][obj_type].get(response_field)
                    doc["id"] = response_data[i].get("GUID")
                    if self.enable_permission is True:
                        doc["_allow_permissions"] = self.fetch_permissions(
                            key=DRIVE_ITEMS,
                            list_id=lib_content,
                            list_url=value[0],
                            itemid=str(response_data[i].get("ID")),
                        )
                    doc["url"] = urljoin(
                        self.sharepoint_host,
                        response_data[i][obj_type]["ServerRelativeUrl"],
                    )
                    document.append(doc)
                    if doc["id"] not in ids["drive_items"][value[0]][lib_content]:
                        ids["drive_items"][value[0]][lib_content].append(doc["id"])
                responses.extend(document)
        documents = {"type": DRIVE_ITEMS, "data": responses}
        return documents