in collection_manager/collection_manager/services/history_manager/FileIngestionHistory.py [0:0]
def _purge(self):
logger.info("purge the history file from duplicates")
unique_file_names = set()
try:
with open(f"{self._history_file_path}.buff", "w") as f:
history_file = open(self._history_file_path, "r")
for line in reversed(list(history_file)):
file_name = line.split(",")[0]
if file_name not in unique_file_names:
unique_file_names.add(file_name)
f.write(line)
else:
logger.info(f"skip file {file_name} in purge")
history_file.close()
logger.info(f"purge done in file {self._history_file_path}.buff replace in {self._history_file_path}")
os.replace(f"{self._history_file_path}.buff", self._history_file_path)
except FileNotFoundError:
logger.info(f"no history file {self._history_file_path} to purge")