in ees_network_drive/files.py [0:0]
def extract_files(self, smb_connection, service_name, path, time_range, indexing_rules):
"""
:param smb_connection: SMB connection object
:param service_name: name of the drive
:param path: Path of the Network Drives
:param time_range: Start and End Time
:param indexing_rules: object of indexing_rules
:returns: dictionary of ids and file details for the files fetched
"""
storage = {}
try:
file_list = smb_connection.listPath(service_name, rf'{path}')
except Exception as exception:
self.logger.exception(f"Unknown error while extracting files from folder {path}.Error {exception}")
return storage
for file in file_list:
if not file.isDirectory:
file_name = file.filename
updated_at = \
time.strftime(constant.RFC_3339_DATETIME_FORMAT, time.gmtime(file.last_attr_change_time))
created_at = \
time.strftime(constant.RFC_3339_DATETIME_FORMAT, time.gmtime(file.create_time))
file_path = os.path.join(path, file_name)
file_details = {
'updated_at': updated_at,
'file_type': os.path.splitext(file_name)[1],
'file_size': file.file_size,
'created_at': created_at,
'file_name': file_name,
'file_path': file_path,
'web_path': f"file://{self.server_ip}/{service_name}/{file_path}"
}
is_indexable = indexing_rules.should_index(file_details)
if is_indexable \
and parse(time_range.get('start_time')) < parse(updated_at) \
and parse(updated_at) <= parse(time_range.get('end_time')):
file_id = file.file_id if file.file_id else hash_id(file_name, path)
storage.update({file_id: file_details})
return storage