etl/looker.py (249 lines of code) (raw):
import json
import stringcase
from furl import furl
from etl.utils import get_event_name_and_category
from .bigquery import get_bigquery_column_name, get_bigquery_ping_table_name
from .glean import GLEAN_DISTRIBUTION_TYPES
SUPPORTED_LOOKER_METRIC_TYPES = GLEAN_DISTRIBUTION_TYPES | {
"boolean",
"counter",
"datetime",
"jwe",
"labeled_counter",
"quantity",
"string",
"rate",
"timespan",
"uuid",
}
EVENT_MONITORING_DASHBOARD_URL = "https://mozilla.cloud.looker.com/dashboards/1452"
def _looker_explore_exists(looker_namespaces, app_name, explore_name):
return (
looker_namespaces.get(app_name)
and looker_namespaces[app_name].get("glean_app")
and looker_namespaces[app_name]["explores"].get(explore_name)
)
def _get_looker_ping_explore(
looker_namespaces, app_name, ping_name, _table_name, app_channel, app_group
):
ping_name_snakecase = stringcase.snakecase(ping_name)
if _looker_explore_exists(looker_namespaces, app_name, ping_name_snakecase):
url = furl(f"https://mozilla.cloud.looker.com/explore/{app_name}/{ping_name_snakecase}")
# if there are multiple channels, we need a channel identifier
if len(app_group["app_ids"]) > 1 and app_channel:
url = url.add({f"f[{ping_name_snakecase}.channel]": app_channel})
return {"name": ping_name_snakecase, "url": url.url}
return None
def _get_looker_event_explore(looker_namespaces, app_name, app_channel, app_group):
# firefox_desktop has an "events" explore that is for legacy telemetry,
# not Glean
if (
_looker_explore_exists(looker_namespaces, app_name, "events")
and app_name != "firefox_desktop"
):
url = furl(f"https://mozilla.cloud.looker.com/explore/{app_name}/event_counts").add(
{"fields": "events.submission_date,events.event_count,events.client_count"}
)
if len(app_group["app_ids"]) > 1 and app_channel:
url.add({"f[events.normalized_channel]": app_channel})
return {"name": "event_counts", "url": url.url}
# firefox_desktop Glean events explore is glean_event_counts
elif _looker_explore_exists(looker_namespaces, app_name, "glean_event_counts"):
url = furl(f"https://mozilla.cloud.looker.com/explore/{app_name}/glean_event_counts").add(
{
"fields": "glean_events.submission_date,"
+ "glean_events.event_count,glean_events.client_count"
}
)
if len(app_group["app_ids"]) > 1 and app_channel:
url.add({"f[events.normalized_channel]": app_channel})
return {"name": "glean_event_counts", "url": url.url}
elif _looker_explore_exists(looker_namespaces, app_name, "funnel_analysis"):
url = furl(f"https://mozilla.cloud.looker.com/explore/{app_name}/funnel_analysis").add(
{"fields": "funnel_analysis.count_completed_step_1"}
)
if len(app_group["app_ids"]) > 1 and app_channel:
url.add({"f[funnel_analysis.app_channel]": app_channel})
return {"name": "funnel_analysis", "url": url.url}
return None
def get_looker_explore_metadata_for_ping(looker_namespaces, app, app_group, ping):
if ping.identifier == "events":
return _get_looker_event_explore(
looker_namespaces, app.app_name, app.app.get("app_channel"), app_group
)
return _get_looker_ping_explore(
looker_namespaces,
app.app_name,
ping.identifier,
get_bigquery_ping_table_name(app.app["bq_dataset_family"], ping.identifier),
app.app.get("app_channel"),
app_group,
)
def get_looker_explore_metadata_for_metric(
looker_namespaces, app, app_group, metric, ping_name, ping_has_client_id
):
metric_type = metric.definition["type"]
metric_name_snakecase = stringcase.snakecase(metric.identifier)
ping_name_snakecase = stringcase.snakecase(ping_name)
base_looker_explore = (
_get_looker_event_explore(
looker_namespaces, app.app_name, app.app.get("app_channel"), app_group
)
if metric_type == "event"
else _get_looker_ping_explore(
looker_namespaces,
app.app_name,
ping_name,
get_bigquery_ping_table_name(app.app["bq_dataset_family"], ping_name),
app.app.get("app_channel"),
app_group,
)
)
# we deliberately don't show looker information for deprecated applications
if not app.app.get("deprecated") and base_looker_explore:
looker_metric_link = None
if metric_type == "event":
(metric_category, metric_name) = get_event_name_and_category(metric.identifier)
if base_looker_explore["name"] == "glean_event_counts":
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"f[glean_events.event_name]": f'"{metric_name}"',
"f[glean_events.event_category]": f'"{metric_category}"',
}
)
elif base_looker_explore["name"] == "event_counts":
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"f[events.event_name]": f'"{metric_name}"',
"f[events.event_category]": f'"{metric_category}"',
}
)
elif base_looker_explore["name"] == "funnel_analysis":
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"f[step_1.event]": f'"{metric_name}"',
"f[step_1.category]": f'"{metric_category}"',
}
)
else:
# this should never happen (unless we made a mistake in getting the
# base looker explore link)
raise Exception(f"Unexpected base looker explore {base_looker_explore['name']}")
# for counters, we can use measures directly
if metric_type == "counter":
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"fields": ",".join(
[
f"{ping_name_snakecase}.submission_date",
f"{ping_name_snakecase}.{metric_name_snakecase}",
]
)
}
)
elif metric_type == "labeled_counter":
counter_field_base = (
f"{ping_name_snakecase}"
+ "__metrics__labeled_counter__"
+ f"{metric_name_snakecase}"
)
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"fields": ",".join(
[
f"{ping_name_snakecase}.submission_date",
f"{counter_field_base}.label",
f"{counter_field_base}.count",
]
),
"pivots": f"{counter_field_base}.label",
}
)
elif metric_type == "timespan":
# Timespans are currently implemented as a dimension rather than a metric.
# Let's derive the median value
looker_dimension_name = "{}.{}__value".format(
ping_name_snakecase, get_bigquery_column_name(metric).replace(".", "__")
)
custom_field_name = f"median_of_{metric_name_snakecase}"
dynamic_fields = [
dict(
measure=custom_field_name,
label=f"Median of {metric.identifier}",
based_on=looker_dimension_name,
expression="",
type="median",
)
]
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"fields": ",".join(
[
f"{ping_name_snakecase}.submission_date",
custom_field_name,
]
),
"dynamic_fields": json.dumps(dynamic_fields),
}
)
elif metric_type in SUPPORTED_LOOKER_METRIC_TYPES:
base_looker_dimension_name = "{}.{}".format(
ping_name_snakecase, get_bigquery_column_name(metric).replace(".", "__")
) + ("_date" if metric_type == "datetime" else "")
# For distribution types, we'll aggregate the sum of all distributions per
# day. In most cases, this isn't super meaningful, but provides a starting
# place for further analysis
if metric_type in GLEAN_DISTRIBUTION_TYPES:
looker_dimension_name = base_looker_dimension_name + "__sum"
custom_field_name = f"sum_of_{metric_name_snakecase}"
dynamic_fields = [
dict(
measure=custom_field_name,
label=f"Sum of {metric.identifier}",
based_on=looker_dimension_name,
expression="",
type="sum",
)
]
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"fields": ",".join(
[
f"{ping_name_snakecase}.submission_date",
custom_field_name,
]
),
"dynamic_fields": json.dumps(dynamic_fields),
}
)
else:
# otherwise pivoting on the dimension is the best we can do (this works
# well for boolean measures)
looker_metric_link = furl(base_looker_explore["url"]).add(
{
"fields": ",".join(
[
f"{ping_name_snakecase}.submission_date",
base_looker_dimension_name,
(
f"{ping_name_snakecase}.clients"
if ping_has_client_id
else f"{ping_name_snakecase}.ping_count"
),
]
),
"pivots": base_looker_dimension_name,
}
)
if looker_metric_link:
return {
"base": base_looker_explore,
"metric": {
"name": metric.identifier,
"url": looker_metric_link.add({"toggle": "vis"}).url,
},
}
return None
def get_looker_monitoring_metadata_for_event(app, app_group, metric):
metric_type = metric.definition["type"]
if metric_type != "event":
return None
(metric_category, metric_name) = get_event_name_and_category(metric.identifier)
event_identifier = ".".join([metric_category, metric_name])
url = furl(EVENT_MONITORING_DASHBOARD_URL).add(
{"App Name": app.app["canonical_app_name"], "Event Name": '"' + event_identifier + '"'}
)
app_channel = app.app.get("app_channel")
if len(app_group["app_ids"]) > 1 and app_channel:
url.add({"Channel": app_channel})
return {
"event": {
"name": metric_name,
"url": url.url,
},
}