def download_function()

in connectors/sources/sharepoint_online.py [0:0]


    def download_function(self, drive_item, max_drive_item_age):
        if "deleted" in drive_item:
            # deleted drive items do not contain `name` property in the payload
            # so drive_item['id'] is used
            self._logger.debug(
                f"Not downloading the item id={drive_item['id']} because it has been deleted"
            )

            return None

        if "folder" in drive_item:
            self._logger.debug(f"Not downloading folder {drive_item['name']}")
            return None

        if "@microsoft.graph.downloadUrl" not in drive_item:
            self._logger.debug(
                f"Not downloading file {drive_item['name']}: field \"@microsoft.graph.downloadUrl\" is missing"
            )
            return None

        if not self.is_supported_format(drive_item["name"]):
            self._logger.debug(
                f"Not downloading file {drive_item['name']}: file type is not supported"
            )
            return None

        if "lastModifiedDateTime" not in drive_item:
            self._logger.debug(
                f"Not downloading file {drive_item['name']}: field \"lastModifiedDateTime\" is missing"
            )
            return None

        modified_date = datetime.strptime(
            drive_item["lastModifiedDateTime"], TIMESTAMP_FORMAT
        )

        if max_drive_item_age and modified_date < datetime.utcnow() - timedelta(
            days=max_drive_item_age
        ):
            self._logger.warning(
                f"Not downloading file {drive_item['name']}: last modified on {drive_item['lastModifiedDateTime']}"
            )

            return None
        elif (
            drive_item["size"] > MAX_DOCUMENT_SIZE
            and not self.configuration["use_text_extraction_service"]
        ):
            self._logger.warning(
                f"Not downloading file {drive_item['name']} of size {drive_item['size']}"
            )

            return None
        else:
            drive_item["_original_filename"] = drive_item.get("name", "")
            return partial(self.get_drive_item_content, drive_item)