in backend/ecs_tasks/delete_files/json_handler.py [0:0]
def delete_matches_from_json_file(input_file, to_delete, compressed=False):
deleted_rows = 0
with BufferOutputStream() as out_stream:
input_file, writer = initialize(input_file, out_stream, compressed)
content = input_file.read().decode("utf-8")
total_rows = 0
for parsed, line in json_lines_iterator(content, include_unparsed=True):
total_rows += 1
should_delete = False
for column in to_delete:
if column["Type"] == "Simple":
record = get_value(column["Column"], parsed)
if record and record in column["MatchIds"]:
should_delete = True
break
else:
matched = []
for col in column["Columns"]:
record = get_value(col, parsed)
if record:
matched.append(record)
if matched in column["MatchIds"]:
should_delete = True
break
if should_delete:
deleted_rows += 1
else:
writer.write(bytes(line + "\n", "utf-8"))
if compressed:
writer.close()
stats = Counter({"ProcessedRows": total_rows, "DeletedRows": deleted_rows})
return out_stream, stats