in mozilla_schema_generator/glean_ping.py [0:0]
def generate_schema(self, config, generic_schema=False) -> Dict[str, Schema]:
pings = self.get_pings_and_pipeline_metadata()
schemas = {}
for ping, pipeline_meta in pings.items():
matchers = {
loc: m.clone(new_table_group=ping) for loc, m in config.matchers.items()
}
# Four newly introduced metric types were incorrectly deployed
# as repeated key/value structs in all Glean ping tables existing prior
# to November 2021. We maintain the incorrect fields for existing tables
# by disabling the associated matchers.
# Note that each of these types now has a "2" matcher ("text2", "url2", etc.)
# defined that will allow metrics of these types to be injected into proper
# structs. The gcp-ingestion repository includes logic to rewrite these
# metrics under the "2" names.
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1737656
bq_identifier = "{bq_dataset_family}.{bq_table}".format(**pipeline_meta)
if bq_identifier in self.bug_1737656_affected_tables:
matchers = {
loc: m
for loc, m in matchers.items()
if not m.matcher.get("bug_1737656_affected")
}
for matcher in matchers.values():
matcher.matcher["send_in_pings"]["contains"] = ping
new_config = Config(ping, matchers=matchers)
defaults = {"mozPipelineMetadata": pipeline_meta}
# Adjust the schema path if the ping does not require info sections
self.set_schema_url(pipeline_meta)
if generic_schema: # Use the generic glean ping schema
schema = self.get_schema(generic_schema=True)
schema.schema.update(defaults)
schemas[new_config.name] = schema
else:
generated = super().generate_schema(new_config)
for schema in generated.values():
# We want to override each individual key with assembled defaults,
# but keep values _inside_ them if they have been set in the schemas.
for key, value in defaults.items():
if key not in schema.schema:
schema.schema[key] = {}
schema.schema[key].update(value)
schemas.update(generated)
return schemas