backend/code_review_backend/issues/migrations/0014_unique_hash.py (21 lines of code) (raw):
# Generated by Django 5.1.1 on 2024-10-14 13:36
from django.db import migrations, models
# This query aims to update all IssueLink in-place
# moving them from duplicated issues (by hash) towards the single one remaining
# It uses a first simple query `dupes` to partition issues by hash, identifying
# the remaining issue as (idx=1)
# The second query `translations` is a mapping of duplicates (idx > 1)
# towards the remaining issue for every hash (idx=1)
# That mapping is finally used to update all issue links in a single query
QUERY_UPDATE_LINKS = """
with dupes as (
select id, hash, row_number() over (partition by hash order by id) as idx
from issues_issue
),
translations as (
select src.id as src_id, dest.id as dest_id
from dupes as src
inner join dupes as dest on (src.hash=dest.hash and dest.idx=1)
where src.idx > 1
)
update issues_issuelink as l
set issue_id = t.dest_id
from translations as t
where t.src_id = l.issue_id;
"""
# This query runs after the update described above and reuse the exact same
# `dupes` query to identify then delete duplicated issues (idx > 1)
QUERY_DELETE_ISSUES = """
with dupes as (
select id, hash, row_number() over (partition by hash order by id) as idx
from issues_issue
)
delete from issues_issue where id in (select id from dupes where idx > 1);
"""
class Migration(migrations.Migration):
atomic = False
dependencies = [
("issues", "0013_move_issues_attributes_part_2"),
]
operations = [
migrations.AddIndex(
model_name="issue",
index=models.Index(fields=["hash"], name="issue_hash_idx"),
),
migrations.RunSQL(QUERY_UPDATE_LINKS),
migrations.RunSQL(QUERY_DELETE_ISSUES),
migrations.AlterField(
model_name="issue",
name="hash",
field=models.CharField(max_length=32, unique=True),
),
]