in connectors/sources/sharepoint_online.py [0:0]
def download_function(self, drive_item, max_drive_item_age):
if "deleted" in drive_item:
# deleted drive items do not contain `name` property in the payload
# so drive_item['id'] is used
self._logger.debug(
f"Not downloading the item id={drive_item['id']} because it has been deleted"
)
return None
if "folder" in drive_item:
self._logger.debug(f"Not downloading folder {drive_item['name']}")
return None
if "@microsoft.graph.downloadUrl" not in drive_item:
self._logger.debug(
f"Not downloading file {drive_item['name']}: field \"@microsoft.graph.downloadUrl\" is missing"
)
return None
if not self.is_supported_format(drive_item["name"]):
self._logger.debug(
f"Not downloading file {drive_item['name']}: file type is not supported"
)
return None
if "lastModifiedDateTime" not in drive_item:
self._logger.debug(
f"Not downloading file {drive_item['name']}: field \"lastModifiedDateTime\" is missing"
)
return None
modified_date = datetime.strptime(
drive_item["lastModifiedDateTime"], TIMESTAMP_FORMAT
)
if max_drive_item_age and modified_date < datetime.utcnow() - timedelta(
days=max_drive_item_age
):
self._logger.warning(
f"Not downloading file {drive_item['name']}: last modified on {drive_item['lastModifiedDateTime']}"
)
return None
elif (
drive_item["size"] > MAX_DOCUMENT_SIZE
and not self.configuration["use_text_extraction_service"]
):
self._logger.warning(
f"Not downloading file {drive_item['name']} of size {drive_item['size']}"
)
return None
else:
drive_item["_original_filename"] = drive_item.get("name", "")
return partial(self.get_drive_item_content, drive_item)