in probe_scraper/runner.py [0:0]
def dedupe_probes(results: Dict[str, Any]) -> Dict[str, Any]:
# Most probes have exactly the same contents across revisions, so we
# can get significant memory savings by deduplicating them across the
# entire history.
deduped = {}
for key, value in results.items():
# Get a stable hash for a dict, by sorting the keys when writing
# out values.
probe_hash = hash(json.dumps(value, sort_keys=True))
lookup_for_name = lookup_table.get(key, None)
if lookup_for_name is None:
lookup_table[key] = {probe_hash: value}
deduped[key] = value
else:
existing_probe = lookup_for_name.get(probe_hash, None)
if existing_probe is None:
lookup_for_name[probe_hash] = value
deduped[key] = value
else:
deduped[key] = existing_probe
return deduped