def update_categorization_data()

in elkserver/docker/redelk-base/redelkinstalldata/scripts/modules/enrich_domainscategorization/module.py [0:0]


    def update_categorization_data(self, domains, checked_domains):
        """Update the categorization data for each domain"""
        for domain in domains:
            self.logger.debug("Updating categorization data for %s", domain)
            # Check if current categorization data is different from the new one
            new_categories = []

            new_categories = get_value(
                "categorization.categories_str", checked_domains[domain], ""
            )
            old_categories = get_value(
                "_source.domainslist.categorization.categories_str", domains[domain], ""
            )
            self.logger.debug("New categories: %s", new_categories)
            self.logger.debug("Old categories: %s", old_categories)

            # Update the categorization data if needed
            if new_categories != old_categories:
                self.logger.debug(
                    "Updating categorization data for %s with %s",
                    domain,
                    new_categories,
                )

                # Get old categorization data to add in bluecheck
                try:
                    old_categorization = copy.deepcopy(
                        domains[domain]["_source"]["domainslist"]["categorization"]
                    )
                except Exception as err:  # pylint: disable=broad-except
                    self.logger.error(
                        "Error getting old categorization data for %s: %s", domain, err
                    )
                    old_categorization = {
                        "categories_str": get_value(
                            "_source.domainslist.categorization.categories_str",
                            domains[domain],
                            "",
                        ),
                        "categories": get_value(
                            "_source.domainslist.categorization.categories",
                            domains[domain],
                            [],
                        ),
                    }
                domains[domain]["_source"]["domainslist"][
                    "categorization"
                ] = checked_domains[domain]["categorization"]

                es.update(
                    index=domains[domain]["_index"],
                    id=domains[domain]["_id"],
                    body={"doc": domains[domain]["_source"]},
                )

                self.add_bluecheck_entry(domains[domain], old_categorization)