auto_sizing/targets.py (294 lines of code) (raw):
import re
from datetime import datetime
from typing import Dict, List, Literal, Optional, TextIO
import attr
import toml
from mozanalysis.config import ConfigLoader
from mozanalysis.metrics import DataSource, Metric
from mozanalysis.segments import Segment, SegmentDataSource
from mozanalysis.utils import add_days
from .errors import MetricsTagNotFoundException, SegmentsTagNotFoundException
from .utils import default_dates_dict, dict_combinations
ALLOWED_APPS = Literal["firefox_desktop", "firefox_ios", "fenix"]
class SegmentsList:
"""Builds list of Segments from list of dictionaries"""
def from_repo(
self, target_list: Dict, app_id: ALLOWED_APPS, start_date: str = ""
) -> List[Segment]:
if app_id == "firefox_desktop":
return self._make_desktop_targets(target_list, start_date)
elif app_id == "firefox_ios":
return self._make_ios_targets(target_list, start_date)
elif app_id == "fenix":
return self._make_fenix_targets(target_list, start_date)
else:
raise ValueError(
"Invalid app_id: must be in ('firefox_desktop', 'firefox_ios', 'fenix')"
)
def from_file(self, target_dict: Dict, path: TextIO) -> List[Segment]:
if "segments" not in target_dict.keys():
raise SegmentsTagNotFoundException(path)
segments_dict = target_dict["segments"]
if "import_from_metric_hub" in segments_dict.keys():
for app_id, segments in segments_dict["import_from_metric_hub"].items():
for segment in segments:
segments_dict[segment] = ConfigLoader.get_segment(segment, app_id)
segments_dict.pop("import_from_metric_hub")
if (
"data_sources" in segments_dict.keys()
and "import_from_metric_hub" in segments_dict["data_sources"].keys()
):
for app_id, segment_data_sources in segments_dict["data_sources"][
"import_from_metric_hub"
].items():
for segment_data_source in segment_data_sources:
segments_dict["data_sources"][segment_data_source] = (
ConfigLoader.get_segment_data_source(segment_data_source, app_id)
)
segments_dict["data_sources"].pop("import_from_metric_hub")
Segment_list = []
for key, value in segments_dict.items():
if key == "data_sources":
continue
if isinstance(value, Segment):
Segment_list.append(value)
else:
data_source = segments_dict["data_sources"][value["data_source"]]
Segment_list.append(
Segment(
name=key,
data_source=SegmentDataSource(
name="", from_expr=data_source["from_expression"]
),
select_expr=ConfigLoader.configs.get_env()
.from_string(value["select_expression"])
.render(),
)
)
return Segment_list
def _make_clients_daily_filter(self, target: Dict[str, str]) -> str:
conditions = []
for dimension, value in target.items():
dimension = dimension.replace("release", "normalized")
if value != "all" and dimension != "user_type":
pattern = r"[\(\[]([^\)\]]+)[\)\]]"
if not re.search(pattern, value):
value = f"('{value}')"
conditions.append(f"(UPPER({dimension}) IN {value.upper()})")
condition_string = "\n AND ".join(conditions)
clients_daily_sql = f"""
COALESCE(
LOGICAL_OR(
{condition_string}
)
)
"""
return clients_daily_sql
def _make_desktop_targets(self, target: Dict[str, str], start_date: str = "") -> List[Segment]:
clients_daily = ConfigLoader.get_segment_data_source("clients_daily", "firefox_desktop")
clients_daily_sql = self._make_clients_daily_filter(target)
Segment_list = []
Segment_list.append(
Segment(
name="clients_daily_filter",
data_source=clients_daily,
select_expr=clients_daily_sql,
)
)
clients_last_seen = SegmentDataSource(
name="clients_last_seen",
from_expr="`moz-fx-data-shared-prod.telemetry.clients_last_seen`",
)
if target["user_type"] == "new":
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=clients_last_seen,
select_expr=f"COALESCE(MIN(first_seen_date) >= '{start_date}', TRUE)",
)
)
elif target["user_type"] == "existing":
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=clients_last_seen,
select_expr="""COALESCE(MIN(first_seen_date) <= '{first_day}', TRUE)
AND COALESCE(MIN(days_since_seen) = 0)""".format(
first_day=add_days(start_date, -28)
),
)
)
return Segment_list
def _make_ios_targets(self, target: Dict[str, str], start_date: str) -> List[Segment]:
clients_daily = SegmentDataSource(
name="clients_daily",
from_expr="mozdata.org_mozilla_ios_firefox.baseline_clients_daily",
)
clients_daily_sql = self._make_clients_daily_filter(target)
Segment_list = []
Segment_list.append(
Segment(
name="clients_daily_filter",
data_source=clients_daily,
select_expr=clients_daily_sql,
)
)
if target["user_type"] == "new":
baseline_clients_first_seen = SegmentDataSource(
name="baseline_clients_first_seen",
from_expr="`moz-fx-data-shared-prod.org_mozilla_ios_firefox.baseline_clients_first_seen`", # noqa: E501
)
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=baseline_clients_first_seen,
select_expr=f"COALESCE(MIN(first_seen_date) >= '{start_date}', TRUE)",
)
)
elif target["user_type"] == "existing":
baseline_clients_last_seen = SegmentDataSource(
name="baseline_clients_last_seen",
from_expr="`moz-fx-data-shared-prod.org_mozilla_ios_firefox.baseline_clients_last_seen`", # noqa: E501
)
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=baseline_clients_last_seen,
select_expr="""COALESCE(MIN(first_seen_date) <= '{first_day}', TRUE)
AND COALESCE(MIN(days_since_seen) = 0)""".format(
first_day=add_days(start_date, -28)
),
)
)
return Segment_list
def _make_fenix_targets(self, target: Dict[str, str], start_date: str) -> List[Segment]:
clients_daily = SegmentDataSource(
name="clients_daily",
from_expr="mozdata.org_mozilla_firefox.baseline_clients_daily",
)
clients_daily_sql = self._make_clients_daily_filter(target)
Segment_list = []
Segment_list.append(
Segment(
name="clients_daily_filter",
data_source=clients_daily,
select_expr=clients_daily_sql,
)
)
if target["user_type"] == "new":
baseline_clients_first_seen = SegmentDataSource(
name="baseline_clients_first_seen",
from_expr="`moz-fx-data-shared-prod.org_mozilla_firefox.baseline_clients_first_seen`", # noqa: E501
)
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=baseline_clients_first_seen,
select_expr=f"COALESCE(MIN(first_seen_date) >= '{start_date}', TRUE)",
)
)
elif target["user_type"] == "existing":
baseline_clients_last_seen = SegmentDataSource(
name="baseline_clients_last_seen",
from_expr="`moz-fx-data-shared-prod.org_mozilla_firefox.baseline_clients_last_seen`", # noqa: E501
)
Segment_list.append(
Segment(
name="clients_last_seen_filter",
data_source=baseline_clients_last_seen,
select_expr="""COALESCE(MIN(first_seen_date) <= '{first_day}', TRUE)
AND COALESCE(MIN(days_since_seen) = 0)""".format(
first_day=add_days(start_date, -28)
),
)
)
return Segment_list
@attr.s(auto_attribs=True)
class MetricsLists:
def from_file(self, target_dict: Dict, path: TextIO) -> List[Metric]:
if "metrics" not in target_dict.keys():
raise MetricsTagNotFoundException(path)
metrics_dict = target_dict["metrics"]
if "import_from_metric_hub" in metrics_dict.keys():
for app_id, metrics in metrics_dict["import_from_metric_hub"].items():
for metric in metrics:
metrics_dict[metric] = ConfigLoader.get_metric(metric, app_id)
metrics_dict.pop("import_from_metric_hub")
if (
"data_sources" in target_dict.keys()
and "import_from_metric_hub" in target_dict["data_sources"].keys()
):
for app_id, data_sources in target_dict["data_sources"][
"import_from_metric_hub"
].items():
for data_source in data_sources:
target_dict["data_sources"][data_source] = ConfigLoader.get_data_source(
data_source, app_id
)
Metric_list = []
for key, value in metrics_dict.items():
if isinstance(value, Metric):
Metric_list.append(value)
else:
data_source = target_dict["data_sources"][value["data_source"]]
Metric_list.append(
Metric(
name=key,
data_source=DataSource(
name=value["data_source"],
from_expr=data_source["from_expression"],
),
select_expr=ConfigLoader.configs.get_env()
.from_string(value["select_expression"])
.render(),
)
)
return Metric_list
def from_repo(self, target_dict: Dict, app_id: ALLOWED_APPS) -> List[Metric]:
metric_names = target_dict["metrics"][app_id]
Metric_list = []
for metric in metric_names:
Metric_list.append(ConfigLoader.get_metric(metric, app_id))
return Metric_list
@attr.s(auto_attribs=True)
class SizingConfiguration:
target_list: List[Segment]
target_slug: str
metric_list: List[Metric]
start_date: str
num_dates_enrollment: int
analysis_length: int
parameters: List[Dict]
config_file: Optional[TextIO] = None
@attr.s(auto_attribs=True)
class SizingCollection:
sizing_targets: List[Segment] = attr.Factory(list)
sizing_metrics: List[Metric] = attr.Factory(list)
sizing_parameters: List[Dict] = attr.Factory(list)
sizing_dates: Dict = attr.Factory(dict)
segments_list = SegmentsList()
metrics_list = MetricsLists()
@classmethod
def from_repo(
cls,
target: Dict,
jobs_dict: Dict,
app_id: ALLOWED_APPS = "firefox_desktop",
) -> "SizingCollection":
dates_dict = default_dates_dict(datetime.today())
segments_list = cls.segments_list.from_repo(
target,
app_id,
dates_dict["start_date"], # type: ignore[arg-type]
)
metric_list = cls.metrics_list.from_repo(jobs_dict, app_id)
parameters_list = dict_combinations(jobs_dict, "parameters")
return cls(segments_list, metric_list, parameters_list, dates_dict)
@classmethod
def from_file(cls, path: TextIO) -> "SizingCollection":
target_dict = toml.load(path)
segment_list = cls.segments_list.from_file(target_dict, path)
metric_list = cls.metrics_list.from_file(target_dict, path)
if "parameters" in target_dict.keys():
parameters_list = dict_combinations(target_dict["parameters"], "sizing")
dates_dict = target_dict["parameters"]["dates"]
else:
parameters_dict = {
"parameters": {"power": [0.8], "effect_size": [0.005, 0.01, 0.02, 0.05]}
}
parameters_list = dict_combinations(parameters_dict, "parameters")
dates_dict = default_dates_dict(datetime.today())
return cls(segment_list, metric_list, parameters_list, dates_dict)