def populate_pretranslation_quality()

in pontoon/insights/migrations/0014_pretranslation_quality_data.py [0:0]


def populate_pretranslation_quality(apps, schema_editor):
    ActionLog = apps.get_model("actionlog", "ActionLog")
    LocaleInsightsSnapshot = apps.get_model("insights", "LocaleInsightsSnapshot")
    ProjectLocaleInsightsSnapshot = apps.get_model(
        "insights", "ProjectLocaleInsightsSnapshot"
    )
    Translation = apps.get_model("base", "Translation")

    def get_chrf_score(action):
        try:
            approved_translation = Translation.objects.get(
                entity=action["translation__entity"],
                locale=action["translation__locale"],
                approved=True,
            ).string
        except Translation.DoesNotExist:
            return None

        score = chrfpp.sentence_score(
            action["translation__string"], [approved_translation]
        )
        return float(score.format(score_only=True))

    def store_data(key, action_data, action):
        if key not in action_data:
            action_data[key] = {
                "pretranslations_chrf_scores": list(),
                "pretranslations_approved": set(),
                "pretranslations_rejected": set(),
                "pretranslations_new": set(),
            }

        data = action_data[key]
        translation = action["translation"]

        if action["action_type"] == "translation:created":
            data["pretranslations_new"].add(translation)

        elif action["action_type"] == "translation:approved":
            data["pretranslations_approved"].add(translation)
            # Translation has been approved, no need to claculate the chrF++ score
            data["pretranslations_chrf_scores"].append(100)

        elif action["action_type"] == "translation:rejected":
            data["pretranslations_rejected"].add(translation)
            score = get_chrf_score(action)
            if score:
                data["pretranslations_chrf_scores"].append(score)

    def update_snapshots(Model, action_data):
        changed_snapshots = []

        # Update snapshots
        for key, data in action_data.items():
            try:
                if len(key) == 2:
                    locale, created_at = key
                    snapshot = Model.objects.get(locale=locale, created_at=created_at)
                elif len(key) == 3:
                    project, locale, created_at = key
                    snapshot = Model.objects.get(
                        project_locale__project=project,
                        project_locale__locale=locale,
                        created_at=created_at,
                    )
            except Model.DoesNotExist:
                continue

            scores = data["pretranslations_chrf_scores"]
            snapshot.pretranslations_chrf_score = (
                statistics.mean(scores) if scores else None
            )
            snapshot.pretranslations_approved = len(data["pretranslations_approved"])
            snapshot.pretranslations_rejected = len(data["pretranslations_rejected"])
            snapshot.pretranslations_new = len(data["pretranslations_new"])

            changed_snapshots.append(snapshot)

        Model.objects.bulk_update(
            changed_snapshots,
            [
                "pretranslations_chrf_score",
                "pretranslations_new",
                "pretranslations_approved",
                "pretranslations_rejected",
            ],
            batch_size=1000,
        )

    actions = (
        ActionLog.objects.filter(
            translation__entity__resource__project__system_project=False,
            translation__entity__resource__project__visibility="public",
            translation__user__email__in=[
                "pontoon-tm@example.com",
                "pontoon-gt@example.com",
            ],
            action_type__in=[
                "translation:created",
                "translation:approved",
                "translation:rejected",
            ],
        )
        .exclude(performed_by__email="pontoon-sync@example.com")
        .values(
            "created_at",
            "action_type",
            "translation",
            "translation__entity",
            "translation__locale",
            "translation__string",
            project=F("translation__entity__resource__project"),
        )
    )

    # Store action data in a dict for faster matching with snapshots
    locale_action_data = dict()
    project_locale_action_data = dict()

    for action in actions:
        locale_key = (action["translation__locale"], action["created_at"].date())
        store_data(locale_key, locale_action_data, action)

        project_locale_key = (
            action["project"],
            action["translation__locale"],
            action["created_at"].date(),
        )
        store_data(project_locale_key, project_locale_action_data, action)

    update_snapshots(LocaleInsightsSnapshot, locale_action_data)
    update_snapshots(ProjectLocaleInsightsSnapshot, project_locale_action_data)