jobs/webcompat-kb/webcompat_kb/metric.py (69 lines of code) (raw):
import argparse
import logging
from datetime import date
from google.cloud import bigquery
from .base import EtlJob
from .bqhelpers import BigQuery
def update_metric_history(client: BigQuery, bq_dataset_id: str, write: bool) -> None:
for suffix in ["global_1000", "sightline", "all"]:
metrics_table_name = f"webcompat_topline_metric_{suffix}"
history_table_name = f"webcompat_topline_metric_{suffix}_history"
history_schema = [
bigquery.SchemaField("recorded_date", "DATE", mode="REQUIRED"),
bigquery.SchemaField("date", "DATE", mode="REQUIRED"),
bigquery.SchemaField("bug_count", "INTEGER", mode="REQUIRED"),
bigquery.SchemaField("needs_diagnosis_score", "NUMERIC", mode="REQUIRED"),
bigquery.SchemaField("platform_score", "NUMERIC", mode="REQUIRED"),
bigquery.SchemaField("not_supported_score", "NUMERIC", mode="REQUIRED"),
bigquery.SchemaField("total_score", "NUMERIC", mode="REQUIRED"),
]
history_table = client.ensure_table(
history_table_name, history_schema, recreate=False
)
query = f"""
SELECT recorded_date
FROM `{bq_dataset_id}.{history_table_name}`
ORDER BY recorded_date DESC
LIMIT 1
"""
rows = list(client.query(query))
today = date.today()
if rows and rows[0]["recorded_date"] >= today:
# We've already recorded historic data today
logging.info(
f"Already recorded historic data in {history_table} today, skipping"
)
continue
query = f"""
SELECT *
FROM `{bq_dataset_id}.{metrics_table_name}`
"""
rows = [
{
"recorded_date": today,
"date": row.date,
"bug_count": row.bug_count,
"needs_diagnosis_score": row.needs_diagnosis_score,
"platform_score": row.platform_score,
"not_supported_score": row.not_supported_score,
"total_score": row.total_score,
}
for row in client.query(query)
]
client.insert_rows(history_table, rows)
def update_metric_daily(client: BigQuery, bq_dataset_id: str, write: bool) -> None:
history_table = f"{bq_dataset_id}.webcompat_topline_metric_daily"
query = f"""
SELECT date
FROM `{history_table}`
ORDER BY date DESC
LIMIT 1"""
rows = list(client.query(query))
today = date.today()
if rows and rows[0]["date"] >= today:
# We've already recorded historic data today
logging.info(
f"Already recorded historic data in {history_table} today, skipping"
)
return
metrics_query = f"""
SELECT
current_date() as date,
count(bugs.number) as bug_count_all,
SUM(if(bugs.metric_type_needs_diagnosis, bugs.score, 0)) as needs_diagnosis_score_all,
SUM(if(bugs.metric_type_firefox_not_supported, bugs.score, 0)) as not_supported_score_all,
SUM(bugs.score) AS total_score_all,
COUNTIF(bugs.is_sightline) as bug_count_sightline,
SUM(if(bugs.is_sightline and bugs.metric_type_needs_diagnosis, bugs.score, 0)) as needs_diagnosis_score_sightline,
SUM(if(bugs.is_sightline and bugs.metric_type_firefox_not_supported, bugs.score, 0)) as not_supported_score_sightline,
SUM(if(bugs.is_sightline, bugs.score, 0)) AS total_score_sightline,
COUNTIF(bugs.is_global_1000) as bug_count_global_1000,
SUM(if(bugs.is_global_1000 and bugs.metric_type_needs_diagnosis, bugs.score, 0)) as needs_diagnosis_score_global_1000,
SUM(if(bugs.is_global_1000 and bugs.metric_type_firefox_not_supported, bugs.score, 0)) as not_supported_score_global_1000,
SUM(if(bugs.is_global_1000, bugs.score, 0)) AS total_score_global_1000
FROM
`{bq_dataset_id}.scored_site_reports` AS bugs
WHERE bugs.resolution = ""
"""
if write:
insert_query = f"""INSERT `{bq_dataset_id}.webcompat_topline_metric_daily`
(date,
bug_count_all,
needs_diagnosis_score_all,
not_supported_score_all,
total_score_all,
bug_count_sightline,
needs_diagnosis_score_sightline,
not_supported_score_sightline,
total_score_sightline,
bug_count_global_1000,
needs_diagnosis_score_global_1000,
not_supported_score_global_1000,
total_score_global_1000)
({metrics_query})"""
client.query(insert_query)
logging.info("Updated daily metric")
else:
result = client.query(metrics_query)
logging.info(f"Would insert {list(result)[0]}")
class MetricJob(EtlJob):
name = "metric"
def default_dataset(self, args: argparse.Namespace) -> str:
return args.bq_kb_dataset
def main(self, client: BigQuery, args: argparse.Namespace) -> None:
update_metric_history(client, args.bq_kb_dataset, args.write)
update_metric_daily(client, args.bq_kb_dataset, args.write)