mozilla_schema_generator/probes.py (167 lines of code) (raw):

# -*- coding: utf-8 -*- # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from __future__ import annotations import json from datetime import datetime from typing import Any, List from .schema import SchemaException from .utils import _get class Probe(object): type_key = "type" name_key = "name" history_key = "history" in_source_key = "in-source" def __init__(self, identifier: str, definition: dict): self.id = identifier self.type = definition[self.type_key] self.name = definition[self.name_key] def __repr__(self): return json.dumps( { "id": self.id, "type": self.type, "name": self.name, "description": self.description, } ) def get_type(self) -> str: return self.type def get_name(self) -> str: return self.name def get_description(self) -> str: return self.description def get_last_change(self) -> datetime: raise NotImplementedError("Last Change is not available on generic probe") def get_first_added(self) -> datetime: raise NotImplementedError("First added is not available on generic probe") def get_schema(self, addtlProps: Any) -> Any: raise NotImplementedError("Get Schema is not available on generic probe") def get(self, *k) -> Any: return _get(self.definition, k) def __lt__(self, other: Probe) -> bool: if self.get_first_added() == other.get_first_added(): return self.get_name() < other.get_name() return self.get_first_added() < other.get_first_added() class MainProbe(Probe): first_added_key = "first_added" histogram_schema = {"type": "string"} parent_processes = {"main"} child_processes = {"content", "gpu", "extension", "dynamic", "socket"} processes_map = { "all_childs": child_processes, "all_children": child_processes, "all": child_processes | parent_processes, } def __init__(self, identifier: str, definition: dict): self._set_dates(definition[self.first_added_key]) self._set_definition(definition) self._set_description(self.definition) super().__init__(identifier, definition) def _set_definition(self, full_defn: dict): history = [d for arr in full_defn[self.history_key].values() for d in arr] self.definition = max(history, key=lambda x: int(x["versions"]["first"])) self.definition["name"] = full_defn[self.name_key] self._set_processes(history) def _set_processes(self, history): # Include all historical processes processes = { p for d in history for p in d["details"].get("record_in_processes", []) } processes = { sub_p for p in processes for sub_p in self.processes_map.get(p, [p]) } self.definition["details"]["record_in_processes"] = processes def _set_dates(self, first_added_value: dict): vals = [datetime.fromisoformat(v) for v in first_added_value.values()] self.first_added = min(vals) self.last_change = max(vals) def _set_description(self, definition): self.description = None if "description" in definition: self.description = definition["description"] # BigQuery limits descriptions to a maximum of 1024 characters, # so we truncate anything longer than 1000. if len(self.description) >= 1000: self.description = self.description[:1000] + "…" def get_first_added(self) -> datetime: return self.first_added def get_last_change(self) -> datetime: return self.last_change def get_schema(self, addtlProps: Any) -> Any: # Get the schema based on the probe type if self.get_type() == "scalar": ptype = self.get("details", "kind") if ptype == "boolean": pschema = {"type": "boolean"} elif ptype == "string": pschema = {"type": "string"} elif ptype == "uint": pschema = {"type": "integer"} else: raise Exception("Unknown scalar type " + ptype) elif self.get_type() == "histogram": pschema = self.histogram_schema if self.description is not None: pschema["description"] = self.description # Add nested level if keyed if self.get("details", "keyed"): final_schema = {"type": "object", "additionalProperties": pschema} else: final_schema = pschema return final_schema class GleanProbe(Probe): all_pings_keywords = ("all-pings", "all_pings") first_added_key = "first_added" def __init__(self, identifier: str, definition: dict, *, pings: List[str] = None): self._set_dates(definition) self._set_definition(definition) self._set_description(self.definition) self._in_source = definition.get(self.in_source_key, False) super().__init__(identifier, definition) defn_pings = set( [ p for d in definition[self.history_key] for p in d.get("send_in_pings", ["metrics"]) ] ) self.definition["send_in_pings"] = defn_pings if pings is not None: self._update_all_pings(pings) def _update_all_pings(self, pings: List[str]): if any( [ kw in self.definition["send_in_pings"] for kw in GleanProbe.all_pings_keywords ] ): self.definition["send_in_pings"] = set(pings) def _set_definition(self, full_defn: dict): # Expose the entire history, for special casing of the probe. self.definition_history = list( sorted( full_defn[self.history_key], key=lambda x: datetime.fromisoformat(x["dates"]["last"]), reverse=True, ) ) # The canonical definition for up-to-date schemas self.definition = self.definition_history[0] self.definition["name"] = full_defn[self.name_key] def _set_dates(self, definition: dict): vals = [ datetime.fromisoformat(d["dates"]["first"]) for d in definition[self.history_key] ] self.first_added = min(vals) self.last_change = max(vals) def _set_description(self, definition): if "description" in definition: self.description = definition["description"] else: self.description = None def is_in_source(self) -> bool: return self._in_source def get_first_added(self) -> datetime: return self.first_added def get_last_change(self) -> datetime: return self.last_change def get_schema(self, addtlProps: Any) -> Any: if addtlProps is None: raise SchemaException( "Additional Properties cannot be missing for Glean probes" ) if self.description: addtlProps["description"] = self.description return addtlProps