def pretranslate()

in pontoon/pretranslation/tasks.py [0:0]


def pretranslate(project: Project, paths: set[str] | None):
    """
    Identifies strings without any translations and any suggestions.
    Engages TheAlgorithm (bug 1552796) to gather pretranslations.
    Stores pretranslations as suggestions (approved=False) to DB.

    :arg project: The project to be pretranslated
    :arg paths: Paths of the project resources to be pretranslated,
      or None to pretranslate all resources.

    :returns: None
    """
    if not project.pretranslation_enabled:
        log.info(f"Pretranslation not enabled for project {project.name}")
        return

    locales = project.locales.filter(
        project_locale__pretranslation_enabled=True,
        project_locale__readonly=False,
    )

    if not locales:
        log.info(
            f"Pretranslation not enabled for any locale within project {project.name}"
        )
        return

    log.info(f"Fetching pretranslations for project {project.name} started")

    entities = Entity.objects.filter(resource__project=project, obsolete=False)
    if paths:
        entities = entities.filter(resource__path__in=paths)
    entities = entities.prefetch_related("resource")

    # Fetch all available locale-resource pairs (TranslatedResource objects)
    tr_pairs = (
        TranslatedResource.objects.filter(
            resource__project=project,
            locale__in=locales,
        )
        .annotate(
            locale_resource=Concat(
                "locale_id", V("-"), "resource_id", output_field=CharField()
            )
        )
        .values_list("locale_resource", flat=True)
        .distinct()
    )

    # Fetch all locale-entity pairs with non-rejected or pretranslated translations
    pt_authors = [User.objects.get(email=email) for email in AUTHORS.values()]
    translated_entities = (
        Translation.objects.filter(
            locale__in=locales,
            entity__in=entities,
        )
        .filter(Q(rejected=False) | Q(user__in=pt_authors))
        .annotate(
            locale_entity=Concat(
                "locale_id", V("-"), "entity_id", output_field=CharField()
            )
        )
        .values_list("locale_entity", flat=True)
        .distinct()
    )

    translated_entities = list(translated_entities)

    for locale in locales:
        log.info(f"Fetching pretranslations for locale {locale.code} started")

        translations = []

        # To keep track of changed TranslatedResources and their latest_translation
        tr_dict = {}
        tr_filter = []
        index = -1

        for entity in entities:
            locale_entity = f"{locale.id}-{entity.id}"
            locale_resource = f"{locale.id}-{entity.resource.id}"
            if locale_entity in translated_entities or locale_resource not in tr_pairs:
                continue

            pretranslations = get_pretranslations(entity, locale)

            if not pretranslations:
                continue

            failed_checks = run_checks(
                entity,
                locale.code,
                entity.string,
                pretranslations[0][0],
                use_tt_checks=False,
            )

            if failed_checks:
                pretranslations = get_pretranslations(
                    entity, locale, preserve_placeables=True
                )

            for string, plural_form, user in pretranslations:
                t = Translation(
                    entity=entity,
                    locale=locale,
                    string=string,
                    user=user,
                    approved=False,
                    pretranslated=True,
                    active=True,
                    plural_form=plural_form,
                )

                index += 1
                translations.append(t)

                if locale_resource not in tr_dict:
                    tr_dict[locale_resource] = index

                    # Add query for fetching respective TranslatedResource.
                    tr_filter.append(
                        Q(locale__id=locale.id) & Q(resource__id=entity.resource.id)
                    )

                # Update the latest translation index
                tr_dict[locale_resource] = index

        if len(translations) == 0:
            log.info(
                f"Fetching pretranslations for locale {locale.code} done: No pretranslation fetched"
            )
            continue

        translations = Translation.objects.bulk_create(translations)

        # Log creating actions
        actions_to_log = [
            ActionLog(
                action_type=ActionLog.ActionType.TRANSLATION_CREATED,
                performed_by=t.user,
                translation=t,
            )
            for t in translations
        ]

        ActionLog.objects.bulk_create(actions_to_log)

        # Run checks on all translations
        translation_pks = {translation.pk for translation in translations}
        bulk_run_checks(Translation.objects.for_checks().filter(pk__in=translation_pks))

        # Mark translations as changed
        changed_translations = Translation.objects.filter(
            pk__in=translation_pks,
            # Do not sync translations with errors and warnings
            errors__isnull=True,
            warnings__isnull=True,
        )
        changed_translations.bulk_mark_changed()

        # Update latest activity and stats for changed instances.
        update_changed_instances(tr_filter, tr_dict, translations)

        log.info(f"Fetching pretranslations for locale {locale.code} done")

    log.info(f"Fetching pretranslations for project {project.name} done")