in moz_kinto_publisher/main.py [0:0]
def crlite_determine_publish(*, existing_records, run_db, channel):
assert len(run_db) > 0, "There must be run identifiers"
# The default behavior is to clear all records and upload a full
# filter based on the most recent run. We'll check if we can do
# an incremental update instead.
new_run_id = run_db.most_recent_id()
default = {"clear_all": True, "upload": [new_run_id]}
# If there are no existing records, publish a full filter.
if not existing_records:
log.info("No existing records")
return default
# If the existing records are bad, publish a full filter.
try:
crlite_verify_record_consistency(
existing_records=existing_records, channel=channel
)
except ConsistencyException as se:
log.error(f"Failed to verify existing record consistency: {se}")
return default
# A run ID is a "YYYYMMDD" date and an index, e.g. "20210101-3".
# The record["attachment"]["filename"] field of an existing record is
# in the format "<run id>-channel.filter", "<run id>-channel.filter.stash",
# or "<run id>-channel.filter.delta".
record_run_ids = [
record["attachment"]["filename"].rsplit("-", 1)[0]
for record in existing_records
]
# Get a list of run IDs that are newer than any existing record.
# These are candidates for inclusion in an incremental update.
old_run_ids = []
new_run_ids = []
cut_date, cut_idx = [int(x) for x in record_run_ids[-1].split("-")]
for run_id in run_db.run_identifiers:
run_date, run_idx = [int(x) for x in run_id.split("-")]
if run_date < cut_date or (run_date == cut_date and run_idx <= cut_idx):
old_run_ids.append(run_id)
else:
new_run_ids.append(run_id)
# If we don't have data from old runs, publish a full filter.
for run_id in record_run_ids:
if run_id not in old_run_ids:
log.error("We do not have data to support existing records.")
return default
# If the new runs fail a consistency check, publish a full filter.
try:
crlite_verify_run_id_consistency(
run_db=run_db, identifiers_to_check=new_run_ids, channel=channel
)
except ConsistencyException as se:
log.error(f"Failed to verify run ID consistency: {se}")
return default
# If the full filter is too old, publish a full filter.
earliest_timestamp = run_db.get_run_timestamp(min(record_run_ids))
new_timestamp = run_db.get_run_timestamp(new_run_id)
if new_timestamp - earliest_timestamp >= timedelta(
days=channel.max_filter_age_days
):
log.info(f"Published full filter is >= {channel.max_filter_age_days} days old")
return default
return {"clear_all": False, "upload": new_run_ids}