in core/lib/payload/copy.py [0:0]
def checksum(self):
"""
Run checksum for all existing data in new table.
We will do another around of checksum, but only for changes happened
in between
"""
log.info("== Stage 4: Checksum ==")
if not self.need_checksum():
return
stage_start_time = time.time()
if self.eliminate_dups:
log.warning("Skip checksum, because --eliminate-duplicate " "specified")
return
# Replay outside of transaction so that we won't hit max allowed
# transaction time,
log.info("= Stage 4.1: Catch up before generating checksum =")
self.replay_till_good2go(checksum=False)
log.info("= Stage 4.2: Comparing checksum =")
self.start_transaction()
# To fill the gap between old and new table since last replay
log.info("Replay changes to bring two tables to a comparable state")
self.replay_changes(single_trx=True)
# if we don't have a PK on old schema, then we are not able to checksum
# by chunk. We'll do a full table scan for checksum instead
if self.is_full_table_dump:
return self.checksum_full_table()
if not self.detailed_mismatch_info:
log.info("Checksuming data from old table")
old_table_checksum = self.checksum_by_chunk(
self.table_name, dump_after_checksum=self.dump_after_checksum
)
# We can calculate the checksum for new table outside the
# transaction, because the data in new table is static without
# replaying chagnes
self.commit()
log.info("Checksuming data from new table")
new_table_checksum = self.checksum_by_chunk(
self.new_table_name, dump_after_checksum=self.dump_after_checksum
)
log.info("Compare checksum")
self.compare_checksum(old_table_checksum, new_table_checksum)
else:
self.detailed_checksum()
self.last_checksumed_id = self.last_replayed_id
log.info("Checksum match between new and old table")
self.stats["time_in_table_checksum"] = time.time() - stage_start_time