backend/code_review_backend/issues/migrations/0014_unique_hash.py (21 lines of code) (raw):

# Generated by Django 5.1.1 on 2024-10-14 13:36 from django.db import migrations, models # This query aims to update all IssueLink in-place # moving them from duplicated issues (by hash) towards the single one remaining # It uses a first simple query `dupes` to partition issues by hash, identifying # the remaining issue as (idx=1) # The second query `translations` is a mapping of duplicates (idx > 1) # towards the remaining issue for every hash (idx=1) # That mapping is finally used to update all issue links in a single query QUERY_UPDATE_LINKS = """ with dupes as ( select id, hash, row_number() over (partition by hash order by id) as idx from issues_issue ), translations as ( select src.id as src_id, dest.id as dest_id from dupes as src inner join dupes as dest on (src.hash=dest.hash and dest.idx=1) where src.idx > 1 ) update issues_issuelink as l set issue_id = t.dest_id from translations as t where t.src_id = l.issue_id; """ # This query runs after the update described above and reuse the exact same # `dupes` query to identify then delete duplicated issues (idx > 1) QUERY_DELETE_ISSUES = """ with dupes as ( select id, hash, row_number() over (partition by hash order by id) as idx from issues_issue ) delete from issues_issue where id in (select id from dupes where idx > 1); """ class Migration(migrations.Migration): atomic = False dependencies = [ ("issues", "0013_move_issues_attributes_part_2"), ] operations = [ migrations.AddIndex( model_name="issue", index=models.Index(fields=["hash"], name="issue_hash_idx"), ), migrations.RunSQL(QUERY_UPDATE_LINKS), migrations.RunSQL(QUERY_DELETE_ISSUES), migrations.AlterField( model_name="issue", name="hash", field=models.CharField(max_length=32, unique=True), ), ]