def run()

in treeherder/etl/files_bugzilla_map.py [0:0]


    def run(self):
        projects = self.get_projects_to_import()

        paths_ingested_all = set()
        paths_bugzilla_ingested_all = set()
        for project in projects:
            data_returned = self.fetch_data(project)
            if data_returned["exception"] is not None:
                logger.error(
                    "error fetching file with map of source paths to Bugzilla products and components: url: %s ; %s",
                    data_returned["url"],
                    data_returned["exception"],
                )
                continue
            files_bugzilla_data = data_returned["files_bugzilla_data"]
            paths_ingested_this_project = set(path for path in files_bugzilla_data).difference(
                paths_ingested_all
            )
            paths_bugzilla_ingested_project = set()
            for path in paths_ingested_this_project:
                paths_bugzilla_ingested_project.add(
                    (
                        path,
                        files_bugzilla_data[path][0],
                        files_bugzilla_data[path][1],
                    )
                )

            paths_ingested_all |= paths_ingested_this_project
            paths_bugzilla_ingested_all |= paths_bugzilla_ingested_project

        paths_old = set(FilesBugzillaMap.objects.values_list("path", flat=True))

        paths_removed = paths_old - paths_ingested_all
        FilesBugzillaMap.objects.filter(path__in=paths_removed).delete()

        paths_bugzilla_old = set(
            FilesBugzillaMap.objects.select_related("bugzilla_component").values_list(
                "path", "bugzilla_component__product", "bugzilla_component__component"
            )
        )
        paths_bugzilla_unchanged = paths_bugzilla_old.intersection(paths_bugzilla_ingested_all)
        paths_bugzilla_changed_or_added = paths_bugzilla_ingested_all.difference(
            paths_bugzilla_unchanged
        )
        paths_changed_or_added = set(
            path_bugzilla[0] for path_bugzilla in paths_bugzilla_changed_or_added
        )
        paths_added = paths_ingested_all.difference(paths_old)
        paths_changed = paths_changed_or_added.difference(paths_added)

        path_bugzilla_data = {}
        for path_bugzilla in paths_bugzilla_ingested_all:
            path = path_bugzilla[0]
            product = path_bugzilla[1]
            component = path_bugzilla[2]
            path_bugzilla_data[path] = [product, component]

        paths_bugzilla_update_needed = []
        for path in paths_changed:
            bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path)
            if not bugzilla_component_data:
                continue
            path_bugzilla_update_needed = FilesBugzillaMap.objects.select_related(
                "bugzilla_component"
            ).filter(path=path)[0]
            path_bugzilla_update_needed.bugzilla_component_id = bugzilla_component_data.id
            paths_bugzilla_update_needed.append(path_bugzilla_update_needed)
        FilesBugzillaMap.objects.bulk_update(
            paths_bugzilla_update_needed, ["bugzilla_component_id"], batch_size=1000
        )

        paths_bugzilla_addition_needed = []
        for path in paths_added:
            bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path)
            if not bugzilla_component_data:
                continue
            file_name = (path.rsplit("/", 1))[-1]
            paths_bugzilla_addition_needed.append(
                FilesBugzillaMap(
                    path=path,
                    file_name=file_name,
                    bugzilla_component=bugzilla_component_data,
                )
            )
        FilesBugzillaMap.objects.bulk_create(paths_bugzilla_addition_needed, batch_size=1000)

        bugzilla_components_used = set(
            FilesBugzillaMap.objects.values_list("bugzilla_component_id", flat=True).distinct()
        )
        bugzilla_components_all = set(
            BugzillaComponent.objects.all().values_list("id", flat=True).distinct()
        )
        bugzilla_components_unused = bugzilla_components_all.difference(bugzilla_components_used)
        (BugzillaComponent.objects.filter(id__in=bugzilla_components_unused).delete())