def main()

in bigquery_etl/glam/generate.py [0:0]
215 lines of code
15 McCabe index (conditional complexity)

def main():
    """Generate GLAM ETL queries."""
    parser = ArgumentParser(description=main.__doc__)
    parser.add_argument("--prefix")
    parser.add_argument("--project", default="glam-fenix-dev")
    parser.add_argument("--dataset", default="glam_etl")
    parser.add_argument("--sql-root", default="sql/")
    parser.add_argument("--daily-view-only", action="store_true", default=False)
    parser.add_argument("--use-sample-id", action="store_true", default=False)
    args = parser.parse_args()

    env = Environment(loader=PackageLoader("bigquery_etl", "glam/templates"))

    dataset_path = Path(args.sql_root) / args.project / args.dataset
    if not dataset_path.is_dir():
        raise NotADirectoryError(f"path to {dataset_path} not found")

    # curry functions for convenience
    template = partial(
        from_template, environment=env, dataset_path=dataset_path, args=args
    )
    view = partial(template, QueryType.VIEW)
    table = partial(template, QueryType.TABLE)
    init = partial(template, QueryType.INIT)

    # If this is a logical app id, generate it. Assert that the daily view for
    # the app exists. This assumes that both scalar and histogram aggregates
    # exist and will break down in the case where a glean app only contains one
    # of the scalar or histogram view.
    for daily_view in [
        "view_clients_daily_scalar_aggregates_v1",
        "view_clients_daily_histogram_aggregates_v1",
    ]:
        try:
            view(f"logical_app_id/{args.prefix}__{daily_view}")
        except TemplateNotFound:
            print(f"{args.prefix} is not a logical app id")
            # generate the view for the app id directly
            view(daily_view)

        if not (dataset_path / f"{args.prefix}__{daily_view}").is_dir():
            raise ValueError(f"missing {daily_view}")

    # exit early if we're only generating a daily view
    if args.daily_view_only:
        return

    # Supported fenix/firefox for android products. These are logical ids that
    # are formed from the union of several app_ids (sometimes across date
    # boundaries).
    config_schema = {
        "type": "object",
        "additionalProperties": {
            "type": "object",
            "properties": {
                "build_date_udf": {
                    "type": "string",
                    "description": (
                        "The fully qualified path to a UDF that accepts the "
                        "client_info.app_build_id field and returns a datetime."
                    ),
                },
                "filter_version": {
                    "type": "boolean",
                    "description": (
                        "Whether the integer extracted from the "
                        "client_info.app_display_version should be used to "
                        "filter incremental aggregates."
                    ),
                },
                "num_versions_to_keep": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The number of versions to keep.",
                },
                "total_users": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The number of users to filter the data on.",
                },
                "minimum_client_count": {
                    "type": "integer",
                    "minimum": 0,
                    "description": "The minimum client count for each build id."
                    "We generally want this to be roughly 0.5% of WAU."
                    "For context see https://github.com/mozilla/glam/issues/1575#issuecomment-946880387",  # noqa E501
                },
            },
            "required": [
                "build_date_udf",
                "filter_version",
                "num_versions_to_keep",
                "total_users",
            ],
        },
    }
    config = {
        "org_mozilla_fenix_glam_nightly": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 10,
            "minimum_client_count": 800,
        },
        "org_mozilla_fenix_glam_beta": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 10,
            "minimum_client_count": 2000,
        },
        "org_mozilla_fenix_glam_release": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 10,
            "minimum_client_count": 90000,
        },
        "firefox_desktop_glam_nightly": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 10,
            "minimum_client_count": 100,
        },
        "firefox_desktop_glam_beta": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 100,
            "minimum_client_count": 1000,
        },
        "firefox_desktop_glam_release": {
            "build_date_udf": "mozfun.glam.build_hour_to_datetime",
            "filter_version": True,
            "num_versions_to_keep": 3,
            "total_users": 100,
            "minimum_client_count": 100000,
        },
    }

    channel_prefixes = {
        "firefox_desktop_glam_nightly": "nightly",
        "firefox_desktop_glam_beta": "beta",
        "firefox_desktop_glam_release": "release",
        "org_mozilla_fenix_glam_nightly": "nightly",
        "org_mozilla_fenix_glam_beta": "beta",
        "org_mozilla_fenix_glam_release": "release",
    }
    validate(instance=config, schema=config_schema)

    if not config.get(args.prefix, {}).get("build_date_udf"):
        raise ValueError(f"build date udf for {args.prefix} was not found")

    [
        table(
            "latest_versions_v1",
            **dict(app_id_channel=(f"'{channel_prefixes[args.prefix]}'")),
        ),
        init(
            "clients_scalar_aggregates_v1",
            **models.clients_scalar_aggregates(
                source_table=(
                    f"glam_etl.{args.prefix}__view_clients_daily_scalar_aggregates_v1"
                ),
                destination_table=(
                    f"glam_etl.{args.prefix}__clients_scalar_aggregates_v1"
                ),
            ),
        ),
        table(
            "clients_scalar_aggregates_v1",
            **models.clients_scalar_aggregates(
                source_table=(
                    f"glam_etl.{args.prefix}__view_clients_daily_scalar_aggregates_v1"
                ),
                destination_table=(
                    f"glam_etl.{args.prefix}__clients_scalar_aggregates_v1"
                ),
                **config[args.prefix],
            ),
        ),
        init(
            "clients_histogram_aggregates_v1",
            **models.clients_histogram_aggregates(parameterize=True),
        ),
        table(
            "clients_histogram_aggregates_v1",
            **models.clients_histogram_aggregates(
                parameterize=True, **config[args.prefix]
            ),
        ),
        table(
            "scalar_bucket_counts_v1",
            **models.scalar_bucket_counts(
                source_table=f"glam_etl.{args.prefix}__clients_scalar_aggregates_v1",
                **config[args.prefix],
            ),
        ),
        table(
            "histogram_bucket_counts_v1",
            **models.histogram_bucket_counts(
                source_table=f"glam_etl.{args.prefix}__clients_histogram_aggregates_v1",
                **config[args.prefix],
            ),
            channel=channel_prefixes[args.prefix],
            use_sample_id=args.use_sample_id,
        ),
        table(
            "probe_counts_v1",
            query_name_prefix="scalar",
            **models.probe_counts(
                source_table=f"glam_etl.{args.prefix}__scalar_bucket_counts_v1",
                is_scalar=True,
            ),
            channel=channel_prefixes[args.prefix],
        ),
        table(
            "probe_counts_v1",
            query_name_prefix="histogram",
            **models.probe_counts(
                source_table=f"glam_etl.{args.prefix}__histogram_bucket_counts_v1",
                is_scalar=False,
            ),
            channel=channel_prefixes[args.prefix],
        ),
        view("view_probe_counts_v1"),
        view("view_user_counts_v1", **models.user_counts()),
        view(
            "view_sample_counts_v1",
            **models.sample_counts(),
            channel=channel_prefixes[args.prefix],
        ),
        table("extract_user_counts_v1", **config[args.prefix]),
        table("extract_probe_counts_v1", **config[args.prefix]),
    ]