in mozilla_schema_generator/schema.py [0:0]
def _get_schema_size(schema: dict, key=None) -> int:
if key is None:
key = tuple()
if isinstance(schema, list):
return sum(Schema._get_schema_size(s) for s in schema)
if "type" not in schema:
# A JSON column is just that: one column
if schema.get("format") == "json":
return 1
raise Exception("Missing type for schema element at key " + "/".join(key))
if isinstance(schema["type"], list):
max_size = 0
for t in schema["type"]:
s = copy.deepcopy(schema)
s["type"] = t
max_size = max(max_size, Schema._get_schema_size(s, key))
return max_size
# TODO: Tests and finalize the different types available and how they map to BQ
# e.g. (allOf, anyOf, etc.)
if schema["type"] == "object":
# Sometimes the "properties" field is empty...
if "properties" in schema and schema["properties"]:
# A ROW type with a known set of fields
return sum(
(
Schema._get_schema_size(p, key=key + (n,))
for n, p in schema["properties"].items()
)
)
# A MAP type with key and value groups
return 2
if schema["type"] == "array":
if "items" not in schema:
raise Exception(
"Missing items for array schema element at key " + "/".join(key)
)
# Arrays are repeated fields, get its size
return Schema._get_schema_size(schema["items"], key=key + ("arr-items",))
# Otherwise, assume a scalar value
return 1