glean-core/python/glean/_loader.py (253 lines of code) (raw):
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Utilities for loading metrics.yaml and pings.yaml files and creating a tree
of metric types.
"""
import enum
from dataclasses import field, make_dataclass
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional, Tuple, Union
from glean_parser.parser import parse_objects # type: ignore
import glean_parser.lint # type: ignore
import glean_parser.metrics as gp_metrics # type: ignore
from glean_parser.util import Camelize, extra_metric_args # type: ignore
from . import metrics
# A mapping from the name of the metric type as it appears in the metrics.yaml
# to the Python class for that metric type.
_TYPE_MAPPING = {
"boolean": metrics.BooleanMetricType,
"counter": metrics.CounterMetricType,
"datetime": metrics.DatetimeMetricType,
"event": metrics.EventMetricType,
"labeled_boolean": metrics.LabeledBooleanMetricType,
"labeled_counter": metrics.LabeledCounterMetricType,
"labeled_string": metrics.LabeledStringMetricType,
"memory_distribution": metrics.MemoryDistributionMetricType,
"object": metrics.ObjectMetricType,
"ping": metrics.PingType,
"quantity": metrics.QuantityMetricType,
"string": metrics.StringMetricType,
"string_list": metrics.StringListMetricType,
"timespan": metrics.TimespanMetricType,
"timing_distribution": metrics.TimingDistributionMetricType,
"uuid": metrics.UuidMetricType,
}
# The arguments that should be passed to the constructor for the metric types.
_ARGS = [
"allowed_extra_keys",
"bucket_count",
"category",
"disabled",
"histogram_type",
"include_client_id",
"send_if_empty",
"lifetime",
"memory_unit",
"name",
"range_max",
"range_min",
"reason_codes",
"send_in_pings",
"precise_timestamps",
"include_info_sections",
"schedules_pings",
"enabled",
"follows_collection_enabled",
"time_unit",
"uploader_capabilities",
]
_ARG_CONVERSION = {
"lifetime": {
gp_metrics.Lifetime.ping: metrics.Lifetime.PING,
gp_metrics.Lifetime.application: metrics.Lifetime.APPLICATION,
gp_metrics.Lifetime.user: metrics.Lifetime.USER,
},
"time_unit": {
gp_metrics.TimeUnit.nanosecond: metrics.TimeUnit.NANOSECOND,
gp_metrics.TimeUnit.microsecond: metrics.TimeUnit.MICROSECOND,
gp_metrics.TimeUnit.millisecond: metrics.TimeUnit.MILLISECOND,
gp_metrics.TimeUnit.second: metrics.TimeUnit.SECOND,
gp_metrics.TimeUnit.minute: metrics.TimeUnit.MINUTE,
gp_metrics.TimeUnit.hour: metrics.TimeUnit.HOUR,
gp_metrics.TimeUnit.day: metrics.TimeUnit.DAY,
},
"memory_unit": {
gp_metrics.MemoryUnit.byte: metrics.MemoryUnit.BYTE,
gp_metrics.MemoryUnit.kilobyte: metrics.MemoryUnit.KILOBYTE,
gp_metrics.MemoryUnit.megabyte: metrics.MemoryUnit.MEGABYTE,
gp_metrics.MemoryUnit.gigabyte: metrics.MemoryUnit.GIGABYTE,
},
}
def getattr_conv(metric, arg):
"""
Get an attribute from the parsed metric
and optionally convert it to the correct glean-py type.
"""
val = getattr(metric, arg)
if arg in _ARG_CONVERSION:
val = _ARG_CONVERSION[arg].get(val, val)
return val
def _normalize_name(name):
"""
Convert kebab-case to snake_case.
"""
return name.replace("-", "_")
class UnsupportedMetricType:
"""
A placeholder class for unsupported metric types.
It raises a `TypeError` when trying to do anything with it, but this lets
us load the entire `metrics.yaml` even when it contains metric types that
aren't yet implemented.
"""
def __init__(self, type: str):
self._type = type
def __getattr__(self, attr):
raise TypeError(
f"The metric type '{self._type}' is not supported by the Glean Python bindings"
)
def _event_extra_factory(name: str, argnames: List[Tuple[str, str]]) -> Any:
"""
Generate a new class, inheriting from `metrics.EventExtras`
and implementing the `to_ffi_extra` method,
which serializes expected attributes to pass over FFI.
"""
def __init__(self, **kwargs):
for key, value in kwargs.items():
typ = next((t for (k, t) in argnames if key == k), None)
if typ is None:
raise TypeError(f"Argument '{key}' not valid for {self.__class__.__name__}")
elif typ == "boolean" and isinstance(value, bool):
pass
elif typ == "string" and isinstance(value, str):
pass
elif typ == "quantity" and isinstance(value, int):
pass
else:
raise TypeError(f"Field '{key}' requires type {typ} in {self.__class__.__name__}")
setattr(self, key, value)
def to_ffi_extra(self):
extras = {}
for name, typ in argnames:
attr = getattr(self, name, None)
if attr is not None:
if typ == "boolean" and isinstance(attr, bool):
# Special-case needed for booleans to turn them lowercase (true/false)
extras[name] = str(attr).lower()
elif typ == "string" and isinstance(attr, str):
extras[name] = str(attr)
elif typ == "quantity" and isinstance(attr, int):
extras[name] = str(attr)
# Don't support other data types
else:
raise TypeError(f"Type {type(attr)} not supported for {name}")
return extras
attr = {name: None for (name, _) in argnames} # type: Dict[str, Any]
attr["__init__"] = __init__
attr["to_ffi_extra"] = to_ffi_extra
newclass = type(name, (metrics.EventExtras,), attr)
return newclass
def _struct_type(typ) -> type:
if typ == "boolean":
return bool
elif typ == "string":
return str
elif typ == "number":
return int
else:
raise ValueError(f"Unsupported struct type '{typ}'")
def _object_factory(
name: str, structure: Dict[str, Any]
) -> Generator[Tuple[str, type], None, None]:
"""
Generate new classes, inheriting from `metrics.ObjectSerialize`
and implementing the `into_serialized_object` method,
which serializes objects into JSON.
"""
if structure["type"] == "array":
newclass = type(
name,
(
list,
metrics.ObjectSerialize,
),
{},
)
yield (name, newclass)
yield from _object_factory(f"{name}Item", structure["items"])
elif structure["type"] == "object":
fields = [] # list[tuple[str, type, Any]]
for itemname, val in structure["properties"].items():
if val["type"] == "object":
fct = _object_factory(f"{name}Item{Camelize(itemname)}Object", val)
n, ty = next(fct)
yield n, ty
yield from fct
fields.append((itemname, ty, field(default=None)))
elif val["type"] == "array":
fct = _object_factory(f"{name}Item{Camelize(itemname)}", val)
n, ty = next(fct)
yield n, ty
yield from fct
fields.append((itemname, ty, field(default=None)))
else:
fields.append((itemname, _struct_type(val["type"]), field(default=None)))
newclass = make_dataclass(name, fields, bases=(metrics.ObjectSerialize,))
yield (name, newclass)
else:
ty = structure["type"]
raise ValueError(f"Unsupported object type '{ty}'")
def _split_ctor_args(args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
meta_args = {}
extra_args = {}
for k, v in args.items():
if k in extra_metric_args:
extra_args[k] = v
else:
meta_args[k] = v
return (meta_args, extra_args)
def _get_metric_objects(
name: str, metric: glean_parser.metrics.Metric
) -> Generator[Tuple[str, Any], None, None]:
"""
Given a `glean_parser.metrics.Metric` instance, return the Glean Python
bindings metric instances for the metric.
"""
args = {}
for arg in _ARGS:
if hasattr(metric, arg):
args[arg] = getattr_conv(metric, arg)
metric_type = _TYPE_MAPPING.get(metric.type)
if metric_type is None:
glean_metric = UnsupportedMetricType(metric.type) # type: ignore
elif metric.type == "ping":
# Special-case Ping, doesn't take CommonMetricData
glean_metric = metrics.PingType(**args) # type: ignore
elif metric.type == "object":
# Special-case object metric, it needs the type
class_name = name + "_object"
class_name = Camelize(class_name)
obj_cls = None
for cls_name, cls in _object_factory(class_name, metric._generate_structure):
yield cls_name, cls
if obj_cls is None:
obj_cls = cls
glean_metric = metrics.ObjectMetricType(metrics.CommonMetricData(**args), obj_cls) # type: ignore
else:
# Hack for the time being.
if "dynamic_label" not in args:
args["dynamic_label"] = None
meta_args, rest = _split_ctor_args(args)
if getattr(metric, "labeled", False):
glean_metric = metric_type(
metrics.LabeledMetricData.COMMON(metrics.CommonMetricData(**meta_args)), **rest
)
else:
glean_metric = metric_type(metrics.CommonMetricData(**meta_args), **rest)
glean_metric.__doc__ = metric.description
yield name, glean_metric
# Events and Pings also need to define an enumeration
if metric.type == "event":
class_name = name + "_extra"
class_name = Camelize(class_name)
values = metric.allowed_extra_keys_with_types
keys_class = _event_extra_factory(class_name, values) # type: ignore
yield class_name, keys_class
elif metric.type == "ping":
enum_name = name + "_reason_codes"
class_name = Camelize(enum_name)
values = dict((x.upper(), i) for (i, x) in enumerate(metric.reason_codes))
keys_enum = enum.Enum(class_name, values) # type: ignore
yield enum_name, keys_enum
def load_metrics(
filepath: Union[Union[str, Path], List[Union[str, Path]]],
config: Optional[dict] = None,
) -> Any:
"""
Load metrics from a `metrics.yaml` file.
Args:
filepath (Path): The path to the file, or a list of paths, to load.
config (dict): A dictionary of options that change parsing behavior.
These are documented in glean_parser:
https://mozilla.github.io/glean_parser/glean_parser.html#glean_parser.parser.parse_objects
Returns:
metrics (object): An object containing a tree of metrics, as defined in
the `metrics.yaml` file.
Example:
>>> metrics = load_metrics("metrics.yaml")
>>> metrics.category.name.set("value")
"""
if config is None:
config = {}
if not isinstance(filepath, list):
filepath = [filepath]
filepath = [Path(x) for x in filepath]
result = parse_objects(filepath, config)
errors = list(result)
if len(errors):
raise ValueError("\n\n".join(errors))
metrics = result.value
if len(metrics) == 0:
raise ValueError(f"Didn't find any metrics in '{filepath}'")
root = type("Metrics", (object,), {})
for category_name, category in metrics.items():
cursor = root
for part in category_name.split("."):
if not hasattr(cursor, part):
setattr(cursor, part, type(category_name, (object,), {}))
cursor = getattr(cursor, part)
for name, metric in category.items():
for actual_name, glean_metric in _get_metric_objects(name, metric):
setattr(cursor, _normalize_name(actual_name), glean_metric)
return root
def load_pings(
filepath: Union[Union[str, Path], List[Union[str, Path]]],
config: Optional[dict] = None,
) -> Any:
"""
Load pings from a `pings.yaml` file.
Args:
filepath (Path): The path to the file, or a list of paths, to load.
config (dict): A dictionary of options that change parsing behavior.
These are documented in glean_parser:
https://mozilla.github.io/glean_parser/glean_parser.html#glean_parser.parser.parse_objects
Returns:
pings (object): An object where the attributes are pings, as defined in
the `pings.yaml` file.
Example:
>>> pings = load_pings("pings.yaml")
>>> pings.baseline.submit()
"""
metrics = load_metrics(filepath, config)
return metrics.pings
__all__ = ["load_metrics", "load_pings"]