in treeherder/etl/files_bugzilla_map.py [0:0]
def run(self):
projects = self.get_projects_to_import()
paths_ingested_all = set()
paths_bugzilla_ingested_all = set()
for project in projects:
data_returned = self.fetch_data(project)
if data_returned["exception"] is not None:
logger.error(
"error fetching file with map of source paths to Bugzilla products and components: url: %s ; %s",
data_returned["url"],
data_returned["exception"],
)
continue
files_bugzilla_data = data_returned["files_bugzilla_data"]
paths_ingested_this_project = set(path for path in files_bugzilla_data).difference(
paths_ingested_all
)
paths_bugzilla_ingested_project = set()
for path in paths_ingested_this_project:
paths_bugzilla_ingested_project.add(
(
path,
files_bugzilla_data[path][0],
files_bugzilla_data[path][1],
)
)
paths_ingested_all |= paths_ingested_this_project
paths_bugzilla_ingested_all |= paths_bugzilla_ingested_project
paths_old = set(FilesBugzillaMap.objects.values_list("path", flat=True))
paths_removed = paths_old - paths_ingested_all
FilesBugzillaMap.objects.filter(path__in=paths_removed).delete()
paths_bugzilla_old = set(
FilesBugzillaMap.objects.select_related("bugzilla_component").values_list(
"path", "bugzilla_component__product", "bugzilla_component__component"
)
)
paths_bugzilla_unchanged = paths_bugzilla_old.intersection(paths_bugzilla_ingested_all)
paths_bugzilla_changed_or_added = paths_bugzilla_ingested_all.difference(
paths_bugzilla_unchanged
)
paths_changed_or_added = set(
path_bugzilla[0] for path_bugzilla in paths_bugzilla_changed_or_added
)
paths_added = paths_ingested_all.difference(paths_old)
paths_changed = paths_changed_or_added.difference(paths_added)
path_bugzilla_data = {}
for path_bugzilla in paths_bugzilla_ingested_all:
path = path_bugzilla[0]
product = path_bugzilla[1]
component = path_bugzilla[2]
path_bugzilla_data[path] = [product, component]
paths_bugzilla_update_needed = []
for path in paths_changed:
bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path)
if not bugzilla_component_data:
continue
path_bugzilla_update_needed = FilesBugzillaMap.objects.select_related(
"bugzilla_component"
).filter(path=path)[0]
path_bugzilla_update_needed.bugzilla_component_id = bugzilla_component_data.id
paths_bugzilla_update_needed.append(path_bugzilla_update_needed)
FilesBugzillaMap.objects.bulk_update(
paths_bugzilla_update_needed, ["bugzilla_component_id"], batch_size=1000
)
paths_bugzilla_addition_needed = []
for path in paths_added:
bugzilla_component_data = self.get_or_add_bugzilla_component(path_bugzilla_data, path)
if not bugzilla_component_data:
continue
file_name = (path.rsplit("/", 1))[-1]
paths_bugzilla_addition_needed.append(
FilesBugzillaMap(
path=path,
file_name=file_name,
bugzilla_component=bugzilla_component_data,
)
)
FilesBugzillaMap.objects.bulk_create(paths_bugzilla_addition_needed, batch_size=1000)
bugzilla_components_used = set(
FilesBugzillaMap.objects.values_list("bugzilla_component_id", flat=True).distinct()
)
bugzilla_components_all = set(
BugzillaComponent.objects.all().values_list("id", flat=True).distinct()
)
bugzilla_components_unused = bugzilla_components_all.difference(bugzilla_components_used)
(BugzillaComponent.objects.filter(id__in=bugzilla_components_unused).delete())