treeherder/etl/files_bugzilla_map.py (241 lines of code) (raw):

import logging import sys from treeherder.model.models import ( BugzillaComponent, BugzillaSecurityGroup, FilesBugzillaMap, Repository, ) from treeherder.utils.github import fetch_json logger = logging.getLogger(__name__) class FilesBugzillaMapProcess: bugzilla_components = {} max_path_length = FilesBugzillaMap._meta.get_field("path").max_length max_product_length = BugzillaComponent._meta.get_field("product").max_length max_component_length = BugzillaComponent._meta.get_field("component").max_length run_id = None def get_or_add_bugzilla_component(self, files_bugzilla_data, path): product_component_data = files_bugzilla_data[path] product_component_str = product_component_data[0] + " :: " + product_component_data[1] if product_component_str in self.bugzilla_components: return self.bugzilla_components[product_component_str] try: product = product_component_data[0] component = product_component_data[1] if len(product) > self.max_product_length: logger.error( "error inserting Bugzilla product and component \"'%s' :: '%s'\" into db (file skipped: '%s'): product is too long (has %d characters, max is %d)", product, component, path, len(product), self.max_product_length, ) return if len(component) > self.max_component_length: logger.error( "error inserting Bugzilla product and component \"'%s' :: '%s'\" into db (file skipped: '%s'): component is too long (has %d characters, max is %d)", product, component, path, len(component), self.max_component_length, ) return if len(path) > self.max_path_length: logger.error( "error inserting Bugzilla product and component \"'%s' :: '%s'\" into db (file skipped: '%s'): path is too long (has %d characters, max is %d)", product, component, path, len(path), self.max_path_length, ) bugzilla_component_data, _ = BugzillaComponent.objects.get_or_create( product=product, component=component, ) self.bugzilla_components[product_component_str] = bugzilla_component_data except Exception as e: logger.error( "error inserting Bugzilla product and component \"'%s' :: '%s'\" into db (file skipped: '%s'): %s", product, component, path, e, ) return return bugzilla_component_data def get_projects_to_import(self): return list( Repository.objects.filter(codebase="gecko") .filter(active_status="active") .filter(life_cycle_order__isnull=False) .values_list("name", flat=True) .order_by("life_cycle_order") ) def fetch_data(self, project): url = f"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.{project}.latest.source.source-bugzilla-info/artifacts/public/components.json" files_bugzilla_data = None exception = None try: files_bugzilla_data = fetch_json(url) except Exception as e: exception = e return { "url": url, "files_bugzilla_data": files_bugzilla_data, "exception": exception, } def run(self): projects = self.get_projects_to_import() paths_ingested_all = set() paths_bugzilla_ingested_all = set() for project in projects: data_returned = self.fetch_data(project) if data_returned["exception"] is not None: logger.error( "error fetching file with map of source paths to Bugzilla products and components: url: %s ; %s", data_returned["url"], data_returned["exception"], ) continue files_bugzilla_data = data_returned["files_bugzilla_data"] paths_ingested_this_project = set(path for path in files_bugzilla_data).difference( paths_ingested_all ) paths_bugzilla_ingested_project = set() for path in paths_ingested_this_project: paths_bugzilla_ingested_project.add( ( path, files_bugzilla_data[path][0], files_bugzilla_data[path][1], ) ) paths_ingested_all |= paths_ingested_this_project paths_bugzilla_ingested_all |= paths_bugzilla_ingested_project paths_old = set(FilesBugzillaMap.objects.values_list("path", flat=True)) paths_removed = paths_old - paths_ingested_all FilesBugzillaMap.objects.filter(path__in=paths_removed).delete() paths_bugzilla_old = set( FilesBugzillaMap.objects.select_related("bugzilla_component").values_list( "path", "bugzilla_component__product", "bugzilla_component__component" ) ) paths_bugzilla_unchanged = paths_bugzilla_old.intersection(paths_bugzilla_ingested_all) paths_bugzilla_changed_or_added = paths_bugzilla_ingested_all.difference( paths_bugzilla_unchanged ) paths_changed_or_added = set( path_bugzilla[0] for path_bugzilla in paths_bugzilla_changed_or_added ) paths_added = paths_ingested_all.difference(paths_old) paths_changed = paths_changed_or_added.difference(paths_added) path_bugzilla_data = {} for path_bugzilla in paths_bugzilla_ingested_all: path = path_bugzilla[0] product = path_bugzilla[1] component = path_bugzilla[2] path_bugzilla_data[path] = [product, component] paths_bugzilla_update_needed = [] for path in paths_changed: bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path) if not bugzilla_component_data: continue path_bugzilla_update_needed = FilesBugzillaMap.objects.select_related( "bugzilla_component" ).filter(path=path)[0] path_bugzilla_update_needed.bugzilla_component_id = bugzilla_component_data.id paths_bugzilla_update_needed.append(path_bugzilla_update_needed) FilesBugzillaMap.objects.bulk_update( paths_bugzilla_update_needed, ["bugzilla_component_id"], batch_size=1000 ) paths_bugzilla_addition_needed = [] for path in paths_added: bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path) if not bugzilla_component_data: continue file_name = (path.rsplit("/", 1))[-1] paths_bugzilla_addition_needed.append( FilesBugzillaMap( path=path, file_name=file_name, bugzilla_component=bugzilla_component_data, ) ) FilesBugzillaMap.objects.bulk_create(paths_bugzilla_addition_needed, batch_size=1000) bugzilla_components_used = set( FilesBugzillaMap.objects.values_list("bugzilla_component_id", flat=True).distinct() ) bugzilla_components_all = set( BugzillaComponent.objects.all().values_list("id", flat=True).distinct() ) bugzilla_components_unused = bugzilla_components_all.difference(bugzilla_components_used) (BugzillaComponent.objects.filter(id__in=bugzilla_components_unused).delete()) class ProductSecurityGroupProcess: max_product_length = BugzillaSecurityGroup._meta.get_field("product").max_length max_security_group_length = BugzillaSecurityGroup._meta.get_field("security_group").max_length def fetch_data(self): url = "https://bugzilla.mozilla.org/latest/configuration" product_security_group_data = None exception = None try: product_security_group_data = fetch_json(url) except Exception as e: exception = e return { "url": url, "product_security_group_data": product_security_group_data, "exception": exception, } def run(self): data_returned = self.fetch_data() if data_returned["exception"] is not None: logger.error( "error fetching file with map of source paths to Bugzilla products and components: url: %s ; %s", data_returned["url"], data_returned["exception"], ) sys.exit() fields_data = data_returned["product_security_group_data"]["field"]["product"]["values"] groups_data = data_returned["product_security_group_data"]["group"] products = set() for field_data in fields_data: product_name = str(field_data["name"]) security_group_id = str(field_data["security_group_id"]) if security_group_id in groups_data: security_group_name = str(groups_data[security_group_id]["name"]) products.add(product_name) try: if len(product_name) > self.max_product_length: logger.error( "error inserting Bugzilla product and security group \"'%s' :: '%s'\" into db: product is too long (has %d characters, max is %d)", product_name, security_group_name, len(product_name), self.max_product_length, ) continue if len(security_group_name) > self.max_security_group_length: logger.error( "error inserting Bugzilla product and security group \"'%s' :: '%s'\" into db: security group is too long (has %d characters, max is %d)", product_name, security_group_name, len(security_group_name), self.max_security_group_length, ) continue BugzillaSecurityGroup.objects.get_or_create( product=product_name, security_group=security_group_name, ) except Exception as e: logger.error( "error inserting Bugzilla product and security group \"'%s' :: '%s'\" into db: %s", product_name, security_group_name, e, ) continue BugzillaSecurityGroup.objects.exclude(product__in=products).delete()