in etl/glean_etl.py [0:0]
def _get_metric_sample_data(experiment_data) -> dict:
# get experiment metric sampling data to enrich metric definitions
interesting_experiments = [
experiment for experiment in experiment_data if "glean" in experiment["featureIds"]
]
active_experiments = [
experiment
for experiment in interesting_experiments
if (experiment["startDate"] is not None or experiment["isEnrollmentPaused"] is False)
and experiment["endDate"] is None
]
sampling_data = {}
for experiment in active_experiments:
app_name = experiment["appName"]
bucket_config = experiment["bucketConfig"]
sample_size = bucket_config["count"] / bucket_config["total"]
channel = experiment["channel"]
sampling_data[app_name] = sampling_data.get(app_name, {})
for branch in experiment["branches"]:
feature_configs = branch["features"]
filtered_configs = [
config for config in feature_configs if config["featureId"] == "glean"
]
metric_config = [
config["value"]["gleanMetricConfiguration"]
for config in filtered_configs
if config["value"].get("gleanMetricConfiguration") is not None
]
for entry in metric_config:
for key in entry:
sampling_data[app_name][key] = sampling_data[app_name].get(key, {})
sampling_data[app_name][key][channel] = sampling_data[app_name][key].get(
channel, {}
)
sampling_data[app_name][key][channel]["sample_size"] = sample_size
sampling_data[app_name][key][channel]["experiment_id"] = experiment["slug"]
sampling_data[app_name][key][channel]["start_date"] = experiment["startDate"]
sampling_data[app_name][key][channel]["end_date"] = experiment["endDate"]
sampling_data[app_name][key][channel]["targeting"] = experiment["targeting"]
sampling_data[app_name][key][channel]["experimenter_link"] = (
EXPERIMENTER_URL_TEMPLATE.format(experiment["slug"])
)
return sampling_data