mozilla_schema_generator/__main__.py (224 lines of code) (raw):

# -*- coding: utf-8 -*- # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import json import re import sys from pathlib import Path import click import yaml from . import subset_pings from .bhr_ping import BhrPing from .common_ping import CommonPing from .config import Config from .glean_ping import GleanPing from .main_ping import MainPing from .schema import SchemaEncoder ROOT_DIR = Path(__file__).parent CONFIGS_DIR = ROOT_DIR / "configs" SCHEMA_NAME_RE = re.compile(r".+/([a-zA-Z0-9_-]+)\.([0-9]+)\.schema\.json") def _apply_options(func, options): """Apply options to a command.""" for option in options: func = option(func) return func def common_options(func): """Common options for schema generator commands.""" return _apply_options( func, [ click.option( "--out-dir", help=( "The directory to write the schema files to. " "If not provided, writes the schemas to stdout." ), type=click.Path(dir_okay=True, file_okay=False, writable=True), required=False, ), click.option( "--pretty", is_flag=True, help=( "If specified, pretty-prints the JSON " "schemas that are outputted. Otherwise " "the schemas will be on one line." ), ), click.option( "--mps-branch", help=( "If specified, the source branch of " "mozilla-pipeline-schemas to reference" ), required=False, type=str, default="main", ), ], ) @click.command() @click.argument( "config", type=click.Path(dir_okay=False, file_okay=True, writable=False, exists=True), default=CONFIGS_DIR / "main.yaml", ) @common_options def generate_main_ping(config, out_dir, pretty, mps_branch): schema_generator = MainPing(mps_branch=mps_branch) if out_dir: out_dir = Path(out_dir) with open(config, "r") as f: config_data = yaml.safe_load(f) config = Config("main", config_data) schemas = schema_generator.generate_schema(config) # schemas introduces an extra layer to the actual schema dump_schema(schemas, out_dir, pretty, version=4) @click.command() @common_options def generate_bhr_ping(out_dir, pretty, mps_branch): schema_generator = BhrPing(mps_branch=mps_branch) if out_dir: out_dir = Path(out_dir) config = Config("bhr", {}) schemas = schema_generator.generate_schema(config) dump_schema(schemas, out_dir, pretty, version=4) @click.command() @click.argument( "config-dir", type=click.Path(dir_okay=True, file_okay=False, writable=False, exists=True), default=CONFIGS_DIR, ) @common_options @click.option( "--common-pings-config", default="common_pings.json", help=( "File containing URLs to schemas and configs " "of pings in the common ping format." ), ) def generate_common_pings(config_dir, out_dir, pretty, mps_branch, common_pings_config): if out_dir: out_dir = Path(out_dir) common_pings = [] with open(common_pings_config, "r") as f: common_pings = json.load(f) for common_ping in common_pings: schema_generator = CommonPing(common_ping["schema_url"], mps_branch=mps_branch) config_data = {} if "config" in common_ping: with open(config_dir / common_ping["config"], "r") as f: config_data = yaml.safe_load(f) m = re.match(SCHEMA_NAME_RE, common_ping["schema_url"]) name = m.group(1) version = m.group(2) config = Config(name, config_data) schemas = schema_generator.generate_schema(config) dump_schema(schemas, out_dir, pretty, version=int(version)) @click.command() @click.argument( "config", type=click.Path(dir_okay=False, file_okay=True, writable=False, exists=True), default=CONFIGS_DIR / "glean.yaml", ) @common_options @click.option( "--repo", help=( "The repository id to write the schemas of. " "If not specified, writes the schemas of all " "repositories." ), required=False, type=str, ) @click.option( "--generic-schema", is_flag=True, help=( "When specified, schemas are not filled in, " "but instead the generic schema is used for " "every application's glean pings." ), ) def generate_glean_pings(config, out_dir, pretty, mps_branch, repo, generic_schema): if out_dir: out_dir = Path(out_dir) repos = GleanPing.get_repos() if repo is not None: repos = [r for r in repos if r["app_id"] == repo] with open(config, "r") as f: config_data = yaml.safe_load(f) glean_config = Config("glean", config_data) # validate that the config has mappings for every single metric type specified in the # Glean schema (see: https://bugzilla.mozilla.org/show_bug.cgi?id=1739239) glean_schema = GleanPing(repos[0]).get_schema() glean_matched_metrics_in_config = set(config_data["metrics"].keys()) glean_metrics_in_schema = set( glean_schema.get(["properties", "metrics", "properties"]).keys() ) new_unmatched_glean_types = ( glean_metrics_in_schema - glean_matched_metrics_in_config ) if new_unmatched_glean_types: raise click.ClickException( "Unknown metric types in Glean Schema: {}. Please add them to {}".format( ", ".join(sorted(new_unmatched_glean_types)), config ) ) for repo in repos: write_schema( repo, glean_config, out_dir, pretty, generic_schema, mps_branch, ) def write_schema(repo, config, out_dir, pretty, generic_schema, mps_branch): schema_generator = GleanPing(repo, mps_branch=mps_branch) schemas = schema_generator.generate_schema(config, generic_schema=generic_schema) dump_schema(schemas, out_dir and out_dir.joinpath(repo["app_id"]), pretty) @click.command() @click.argument( "config", type=click.Path(dir_okay=False, file_okay=True, writable=False, exists=True), default=CONFIGS_DIR / "subset.yaml", ) @common_options def generate_subset_pings(config, out_dir, pretty, mps_branch): """Read in pings from disk and move fields to new subset pings. If configured, also create a remainder ping with all the fields that weren't moved. Ignore mps_branch and use the schemas on disk, because those will be populated with probes. """ if not out_dir: raise NotImplementedError( "Generating subset pings without out_dir is not supported." ) out_dir = Path(out_dir) with open(config, "r") as f: config_data = yaml.safe_load(f) schemas = subset_pings.generate(config_data, out_dir) for namespace, doctypes in schemas.items(): for doctype, versions in doctypes.items(): for version, schema in versions.items(): dump_schema( {doctype: schema}, out_dir / namespace, pretty, version=version ) def dump_schema(schemas, out_dir, pretty, *, version=1): json_dump_args = {"cls": SchemaEncoder} if pretty: json_dump_args.update( {"indent": 4, "separators": (",", ":"), "sort_keys": True} ) if not out_dir: print(json.dumps(schemas, **json_dump_args)) else: for name, schema in schemas.items(): # Bug 1601270; we transform ping names from snake_case to kebab-case; # we can remove this line once all snake_case probes have converted. name = name.replace("_", "-") ping_out_dir = out_dir.joinpath(name) if not ping_out_dir.exists(): ping_out_dir.mkdir(parents=True) fname = ping_out_dir.joinpath("{}.{}.schema.json".format(name, version)) with open(fname, "w") as f: f.write(json.dumps(schema, **json_dump_args)) @click.group() def main(args=None): """Command line utility for mozilla-schema-generator.""" import logging logging.basicConfig(stream=sys.stderr, level=logging.INFO) main.add_command(generate_main_ping) main.add_command(generate_bhr_ping) main.add_command(generate_glean_pings) main.add_command(generate_common_pings) main.add_command(generate_subset_pings) if __name__ == "__main__": sys.exit(main())