pontoon/base/models/entity.py (589 lines of code) (raw):

from collections.abc import Iterable from functools import reduce from operator import ior from re import escape, findall, match from dirtyfields import DirtyFieldsMixin from jsonfield import JSONField from django.db import models from django.db.models import F, Prefetch, Q from django.utils import timezone from pontoon.base import utils from pontoon.base.models.locale import Locale from pontoon.base.models.project import Project from pontoon.base.models.project_locale import ProjectLocale from pontoon.base.models.resource import Resource def get_word_count(string): """Compute the number of words in a given string.""" return len(findall(r"[\w,.-]+", string)) def combine_entity_filters(entities, filter_choices, filters, *args): """Return a combination of filters to apply to an Entity object. The content for each filter is defined in the EntityQuerySet helper class, using methods that have the same name as the filter. Each subset of filters is combined with the others with the OR operator. :arg EntityQuerySet entities: an Entity query set object with predefined filters :arg list filter_choices: list of valid choices, used to sanitize the content of `filters` :arg list filters: the filters to get and combine :arg *args: arguments that will be passed to the filter methods of the EntityQuerySet class :returns: a combination of django ORM Q() objects containing all the required filters """ # We first sanitize the list sent by the user and restrict it to only values we accept. sanitized_filters = filter(lambda s: s in filter_choices, filters) filters = [Q()] for filter_name in sanitized_filters: filters.append(getattr(entities, filter_name.replace("-", "_"))(*args)) # Combine all generated filters with an OR operator. # `operator.ior` is the pipe (|) Python operator, which turns into a logical OR # when used between django ORM query objects. return reduce(ior, filters) class EntityQuerySet(models.QuerySet): def get_filtered_entities( self, locale, query, rule, project=None, match_all=True, prefetch=None ): """Return a QuerySet of values of entity PKs matching the locale, query and rule. Filter entities that match the given filter provided by the `locale` and `query` parameters. For performance reasons the `rule` parameter is also provided to filter entities in python instead of the DB. :arg Locale locale: a Locale object to get translations for :arg Q query: a django ORM Q() object describing translations to filter :arg function rule: a lambda function implementing the `query` logic :arg boolean match_all: if true, all plural forms must match the rule. Otherwise, only one matching is enough :arg prefetch django.db.models.Prefetch prefetch: if set, it's used to control the operation of prefetch_related() on the query. :returns: a QuerySet of values of entity PKs """ from pontoon.base.models.translation import Translation # First, separately filter entities with plurals (for performance reasons) plural_pks = [] if locale.nplurals: # For each entity with plurals, fetch translations matching the query. plural_candidates = self.exclude(string_plural="").prefetch_related( Prefetch( "translation_set", queryset=( Translation.objects.filter(locale=locale) .filter(query) .prefetch_related(prefetch) ), to_attr="fetched_translations", ) ) # Walk through the plural forms one by one and check that: # - they have a translation # - the translation matches the rule for candidate in plural_candidates: count = 0 for i in range(locale.nplurals): candidate_translations = [ translation for translation in candidate.fetched_translations if translation.plural_form == i ] if len(candidate_translations) and rule(candidate_translations[0]): count += 1 # No point going on if we don't care about matching all. if not match_all: continue # If `match_all` is True, we want all plural forms to have a match. # Otherwise, just one of them matching is enough. if (match_all and count == locale.nplurals) or ( not match_all and count ): plural_pks.append(candidate.pk) translations = Translation.objects.filter(locale=locale) # Additional filter on the project field speeds things up because it makes faster # to execute a SQL subquery generated by Django. if project and project.slug != "all-projects": translations = translations.filter(entity__resource__project=project) # Finally, we return a query that returns both the matching entities with no # plurals and the entities with plurals that were stored earlier. return translations.filter( Q(Q(entity__string_plural="") & query) | Q(entity_id__in=plural_pks) ).values("entity") def missing(self, locale, project=None): """Return a filter to be used to select entities marked as "missing". An entity is marked as "missing" if at least one of its plural forms has no approved or pretranslated translations. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return ~Q( pk__in=self.get_filtered_entities( locale, Q(approved=True) | Q(pretranslated=True), lambda x: x.approved or x.pretranslated, project=project, ) ) def warnings(self, locale, project=None): """Return a filter to be used to select entities with translations with warnings. This filter will return an entity if at least one of its plural forms has an approved, pretranslated or fuzzy translation with a warning. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, (Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True)) & Q(warnings__isnull=False), lambda x: (x.approved or x.pretranslated or x.fuzzy) and x.warnings.count(), match_all=False, prefetch=Prefetch("warnings"), project=project, ) ) def errors(self, locale, project=None): """Return a filter to be used to select entities with translations with errors. This filter will return an entity if at least one of its plural forms has an approved, pretranslated or fuzzy translation with an error. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, (Q(approved=True) | Q(pretranslated=True) | Q(fuzzy=True)) & Q(errors__isnull=False), lambda x: (x.approved or x.pretranslated or x.fuzzy) and x.errors.count(), match_all=False, prefetch=Prefetch("errors"), project=project, ) ) def pretranslated(self, locale, project=None): """Return a filter to be used to select entities marked as "pretranslated". An entity is marked as "pretranslated" if all of its plural forms have a pretranslated translation. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(pretranslated=True, warnings__isnull=True, errors__isnull=True), lambda x: x.pretranslated, project=project, ) ) def translated(self, locale, project): """Return a filter to be used to select entities marked as "approved". An entity is marked as "approved" if all of its plural forms have an approved translation. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(approved=True, warnings__isnull=True, errors__isnull=True), lambda x: x.approved, project=project, ) ) def unreviewed(self, locale, project=None): """Return a filter to be used to select entities with suggested translations. An entity is said to have suggestions if at least one of its plural forms has at least one unreviewed suggestion (not approved, not rejected, not pretranslated, not fuzzy). :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(approved=False, rejected=False, pretranslated=False, fuzzy=False), lambda x: not x.approved and not x.rejected and not x.pretranslated and not x.fuzzy, match_all=False, project=project, ) ) def rejected(self, locale, project=None): """Return a filter to be used to select entities with rejected translations. This filter will return all entities that have a rejected translation. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(rejected=True), lambda x: x.rejected, match_all=False, project=project, ) ) def missing_without_unreviewed(self, locale, project=None): """Return a filter to be used to select entities with no or only rejected translations. This filter will return all entities that have no or only rejected translations. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return ~Q( pk__in=self.get_filtered_entities( locale, Q(approved=True) | Q(pretranslated=True) | Q(rejected=False), lambda x: x.approved or x.pretranslated or not x.rejected, project=project, ) ) def fuzzy(self, locale, project=None): """Return a filter to be used to select entities marked as "fuzzy". An entity is marked as "fuzzy" if all of its plural forms have a fuzzy translation. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(fuzzy=True, warnings__isnull=True, errors__isnull=True), lambda x: x.fuzzy, project=project, ) ) def empty(self, locale, project=None): """Return a filter to be used to select empty translations. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(string=""), lambda x: x.string == "", match_all=False, project=project, ) ) def unchanged(self, locale, project=None): """Return a filter to be used to select entities that have unchanged translations. An entity is marked as "unchanged" if all of its plural forms have translations equal to the source string. :arg Locale locale: a Locale object to get translations for :returns: a django ORM Q object to use as a filter """ return Q( pk__in=self.get_filtered_entities( locale, Q(active=True) & ( Q(string=F("entity__string")) | Q(string=F("entity__string_plural")) ), lambda x: x.active and (x.string == x.entity.string or x.string == x.entity.string_plural), match_all=False, project=project, ) ) def authored_by(self, locale, emails): # Validate if user passed a real email sanitized_emails = filter(utils.is_email, emails) query = Q() if sanitized_emails: query |= Q(translation__user__email__in=sanitized_emails) if "imported" in emails: query |= Q(translation__user__isnull=True) if sanitized_emails or "imported" in emails: return query & Q(translation__locale=locale) return Q() def reviewed_by(self, locale, emails): # Validate if user passed a real email sanitized_emails = filter(utils.is_email, emails) if sanitized_emails: return Q(translation__locale=locale) & ( Q(translation__approved_user__email__in=sanitized_emails) | Q(translation__rejected_user__email__in=sanitized_emails) ) return Q() def between_time_interval(self, locale, start, end): return Q(translation__locale=locale, translation__date__range=(start, end)) def between_review_time_interval(self, locale, start, end): return Q( Q(translation__locale=locale) & ( Q(translation__approved_date__range=(start, end)) | Q(translation__rejected_date__range=(start, end)) ) ) def prefetch_entities_data(self, locale, preferred_source_locale): # Prefetch active translations for given locale from pontoon.base.models.translation import Translation entities = self.prefetch_related( Prefetch( "translation_set", queryset=( Translation.objects.filter( locale=locale, active=True ).prefetch_related( "errors", "warnings", ) ), to_attr="active_translations", ) ) # Prefetch related Translations, Resources, Projects and ProjectLocales entities = entities.prefetch_related( Prefetch( "resource__project__project_locale", queryset=ProjectLocale.objects.filter(locale=locale), to_attr="projectlocale", ) ) # Prefetch approved translations for given preferred source locale if preferred_source_locale != "": entities = entities.prefetch_related( Prefetch( "translation_set", queryset=( Translation.objects.filter( locale__code=preferred_source_locale, approved=True ) ), to_attr="alternative_originals", ) ) return entities def get_or_create(self, defaults=None, **kwargs): kwargs["word_count"] = get_word_count(kwargs["string"]) return super().get_or_create(defaults=defaults, **kwargs) def bulk_update(self, objs, fields, batch_size=None): if "string" in fields: for obj in objs: obj.word_count = get_word_count(obj.string) if "word_count" not in fields: fields.append("word_count") return super().bulk_update(objs, fields=fields, batch_size=batch_size) class Entity(DirtyFieldsMixin, models.Model): resource = models.ForeignKey(Resource, models.CASCADE, related_name="entities") string = models.TextField() string_plural = models.TextField(blank=True) # Unique identifier, used to compare DB and VCS objects key = models.TextField() # Format-specific value, used to provide more context context = models.TextField(blank=True) comment = models.TextField(blank=True) group_comment = models.TextField(blank=True) resource_comment = models.TextField(blank=True) order = models.PositiveIntegerField(default=0) source = JSONField(blank=True, default=list) # List of paths to source code files obsolete = models.BooleanField(default=False) word_count = models.PositiveIntegerField(default=0) date_created = models.DateTimeField(default=timezone.now) date_obsoleted = models.DateTimeField(null=True, blank=True) changed_locales = models.ManyToManyField( Locale, through="ChangedEntityLocale", help_text="List of locales in which translations for this entity have " "changed since the last sync.", ) objects = EntityQuerySet.as_manager() class Meta: indexes = [ models.Index(fields=["resource", "obsolete", "string_plural"]), ] def __str__(self): return self.string def save(self, *args, **kwargs): self.word_count = get_word_count(self.string) super().save(*args, **kwargs) def get_stats(self, locale) -> dict[str, int]: """ Get stats for a single (entity, locale) pair. :arg Locale locale: filter translations for this locale. :return: a dictionary with stats for the Entity+Locale """ approved = 0 pretranslated = 0 errors = 0 warnings = 0 unreviewed = 0 for t in self.translation_set.filter(locale=locale).prefetch_related( "errors", "warnings" ): if t.errors.exists(): if t.approved or t.pretranslated or t.fuzzy: errors += 1 elif t.warnings.exists(): if t.approved or t.pretranslated or t.fuzzy: warnings += 1 elif t.approved: approved += 1 elif t.pretranslated: pretranslated += 1 if not (t.approved or t.pretranslated or t.fuzzy or t.rejected): unreviewed += 1 return { "approved": approved, "pretranslated": pretranslated, "errors": errors, "warnings": warnings, "unreviewed": unreviewed, } def has_changed(self, locale): """ Check if translations in the given locale have changed since the last sync. """ return locale in self.changed_locales.all() def get_active_translation(self, plural_form=None): """ Get active translation for a given entity and plural form. Active translations must be prefetched for the requested locale. """ from pontoon.base.models.translation import Translation translations = self.active_translations if plural_form is not None: translations = [t for t in translations if t.plural_form == plural_form] return translations[0] if translations else Translation() def reset_active_translation(self, locale, plural_form=None): """ Reset active translation for given entity, locale and plural form. Return active translation if exists or empty Translation instance. """ from pontoon.base.models.translation import Translation translations = self.translation_set.filter(locale=locale) if plural_form is not None: translations = translations.filter(plural_form=plural_form) translations.update(active=False) candidates = translations.filter(rejected=False).order_by( "-approved", "-pretranslated", "-fuzzy", "-date" ) if candidates: active_translation = candidates[0] active_translation.active = True # Do not trigger the overridden Translation.save() method super(Translation, active_translation).save(update_fields=["active"]) return active_translation else: return Translation() def reset_term_translation(self, locale): """ When translation in the "Terminology" project changes, update the corresponding TermTranslation: - If approved translation exists, use it as TermTranslation - If approved translation doesn't exist, remove any TermTranslation instance This method is also executed in the process of deleting a term translation, because it needs to be rejected first, which triggers the call to this function. """ from pontoon.base.models.translation import Translation term = self.term try: approved_translation = self.translation_set.get( locale=locale, approved=True ) term_translation, _ = term.translations.get_or_create(locale=locale) term_translation.text = approved_translation.string term_translation.save(update_fields=["text"]) except Translation.DoesNotExist: term.translations.filter(locale=locale).delete() @classmethod def for_project_locale( self, user, project, locale, paths=None, status=None, tag=None, search=None, extra=None, search_identifiers=None, search_exclude_source_strings=None, search_rejected_translations=None, search_match_case=None, search_match_whole_word=None, time=None, author=None, review_time=None, reviewer=None, exclude_self_reviewed=None, ): """Get project entities with locale translations.""" # Time & author filters have to be applied before the aggregation # (with_status_counts) and the status & extra filters to avoid # unnecessary joins causing performance and logic issues. pre_filters = [] post_filters = [] if time: if match("^[0-9]{12}-[0-9]{12}$", time): start, end = utils.parse_time_interval(time) pre_filters.append( Entity.objects.between_time_interval(locale, start, end) ) if review_time: if match("^[0-9]{12}-[0-9]{12}$", review_time): start, end = utils.parse_time_interval(review_time) pre_filters.append( Entity.objects.between_review_time_interval(locale, start, end) ) if author: pre_filters.append(Entity.objects.authored_by(locale, author.split(","))) if reviewer: pre_filters.append(Entity.objects.reviewed_by(locale, reviewer.split(","))) if exclude_self_reviewed: pre_filters.append( ~Q( Q(translation__approved_user=F("translation__user")) | Q(translation__rejected_user=F("translation__user")) ) ) if pre_filters: entities = Entity.objects.filter( pk__in=Entity.objects.filter(Q(*pre_filters)) ) else: entities = Entity.objects.all() entities = entities.filter( resource__translatedresources__locale=locale, resource__project__disabled=False, obsolete=False, ) if project.slug == "all-projects": visible_projects = Project.objects.visible_for(user) entities = entities.filter( resource__project__system_project=False, resource__project__in=visible_projects, ) else: entities = entities.filter(resource__project=project) # Filter by path if paths: entities = entities.filter(resource__path__in=paths) if status: # Apply a combination of filters based on the list of statuses the user sent. status_filter_choices = ( "missing", "warnings", "errors", "pretranslated", "translated", "unreviewed", ) post_filters.append( combine_entity_filters( entities, status_filter_choices, status.split(","), locale, project ) ) if extra: # Apply a combination of filters based on the list of extras the user sent. extra_filter_choices = ( "rejected", "unchanged", "empty", "fuzzy", "missing-without-unreviewed", ) post_filters.append( combine_entity_filters( entities, extra_filter_choices, extra.split(","), locale ) ) if tag: post_filters.append(Q(resource__tag__slug__in=tag.split(","))) if post_filters: entities = entities.filter(Q(*post_filters)) if tag: # only tag needs `distinct` as it traverses m2m fields entities = entities.distinct() # Filter by search parameters if search: search_list = utils.get_search_phrases(search) # Modify query based on case & match sensitivity filters i = "" if search_match_case else "i" y = r"\y" if search_match_whole_word else "" # Use regex to ignore context identifiers by default r = "" if search_identifiers else "=.*" o = "" if search_identifiers else ".*" translation_filters = ( ( Q( Q(resource__format="ftl") & ( Q( **{ f"translation__string__{i}regex": rf"{r}{y}{escape(s)}{y}{o}" } ) ) ) | Q( ~Q(resource__format="ftl") & Q(**{f"translation__string__{i}regex": rf"{y}{escape(s)}{y}"}) ) ) & Q(translation__locale=locale) & ( Q() if search_rejected_translations else Q(translation__rejected=False) ) for s in search_list ) translation_matches = entities.filter(*translation_filters).values_list( "id", flat=True ) # Search in source strings entity_filters = ( ( Q(pk__in=[]) # Ensures that no source strings are returned if search_exclude_source_strings else ( Q( Q(resource__format="ftl") & (Q(**{f"string__{i}regex": rf"{r}{y}{escape(s)}{y}{o}"})) ) | Q( ~Q(resource__format="ftl") & ( Q(**{f"string__{i}regex": rf"{y}{escape(s)}{y}"}) | Q( **{ f"string_plural__{i}regex": rf"{y}{escape(s)}{y}" } ) ) ) ) ) | ( Q(**{f"key__{i}regex": rf"{y}{escape(s)}{y}"}) if search_identifiers else Q() ) for s in search_list ) entity_matches = entities.filter(*entity_filters).values_list( "id", flat=True ) entities = Entity.objects.filter( pk__in=set(list(translation_matches) + list(entity_matches)) ) order_fields = ("resource__order", "order") if project.slug == "all-projects": order_fields = ("resource__project__name",) + order_fields return entities.order_by(*order_fields) @classmethod def map_entities( cls, locale, preferred_source_locale, entities, is_sibling=False, requested_entity=None, ): entities_array = [] entities: Iterable[Entity] = entities.prefetch_entities_data( locale, preferred_source_locale ) # If requested entity not in the current page if requested_entity and requested_entity not in [e.pk for e in entities]: entities = list(entities) + list( Entity.objects.filter(pk=requested_entity).prefetch_entities_data( locale, preferred_source_locale ) ) for entity in entities: translation_array = [] try: readonly = entity.resource.project.projectlocale[0].readonly except IndexError: # Entities explicitly requested using `string` or `list` URL parameters # might not be enabled for localization for the given locale. If the # project is given in the URL, the 404 page shows up, but if the All # Projects view is used, we hit the IndexError here, because the # `projectlocale` list is empty. In this case, we skip the entity. continue original = entity.string original_plural = entity.string_plural if original_plural == "": translation = entity.get_active_translation().serialize() translation_array.append(translation) else: for plural_form in range(0, locale.nplurals or 1): translation = entity.get_active_translation(plural_form).serialize() translation_array.append(translation) if preferred_source_locale != "" and entity.alternative_originals: original = entity.alternative_originals[0].string if original_plural != "": original_plural = entity.alternative_originals[-1].string key_separator = "\x04" cleaned_key = entity.key.split(key_separator)[0] if cleaned_key == entity.string: cleaned_key = "" entities_array.append( { "pk": entity.pk, "original": original, "original_plural": original_plural, "machinery_original": entity.string, "key": cleaned_key, "context": entity.context, "path": entity.resource.path, "project": entity.resource.project.serialize(), "format": entity.resource.format, "comment": entity.comment, "group_comment": entity.group_comment, "resource_comment": entity.resource_comment, "order": entity.order, "source": entity.source, "obsolete": entity.obsolete, "translation": translation_array, "readonly": readonly, "is_sibling": is_sibling, "date_created": entity.date_created, } ) return entities_array