generator/spoke.py (138 lines of code) (raw):
"""Generate directories and models for new namespaces."""
import logging
import os
import shutil
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, TypedDict
import click
import lkml
import looker_sdk
import yaml
from .lookml import ViewDict
MODEL_SETS_BY_INSTANCE: Dict[str, List[str]] = {
"https://mozilladev.cloud.looker.com": ["mozilla_confidential"],
"https://mozillastaging.cloud.looker.com": ["mozilla_confidential"],
"https://mozilla.cloud.looker.com": ["mozilla_confidential"],
}
DEFAULT_DB_CONNECTION = "telemetry"
class ExploreDict(TypedDict):
"""Represent an explore definition."""
type: str
views: List[Dict[str, str]]
class NamespaceDict(TypedDict):
"""Represent a Namespace definition."""
views: ViewDict
explores: ExploreDict
pretty_name: str
glean_app: bool
connection: str
spoke: str
def setup_env_with_looker_creds() -> bool:
"""
Set up env with looker credentials.
Returns TRUE if the config is complete.
"""
client_id = os.environ.get("LOOKER_API_CLIENT_ID")
client_secret = os.environ.get("LOOKER_API_CLIENT_SECRET")
instance = os.environ.get("LOOKER_INSTANCE_URI")
if client_id is None or client_secret is None or instance is None:
return False
os.environ["LOOKERSDK_BASE_URL"] = instance
os.environ["LOOKERSDK_API_VERSION"] = "4.0"
os.environ["LOOKERSDK_VERIFY_SSL"] = "true"
os.environ["LOOKERSDK_TIMEOUT"] = "120"
os.environ["LOOKERSDK_CLIENT_ID"] = client_id
os.environ["LOOKERSDK_CLIENT_SECRET"] = client_secret
return True
def generate_model(
spoke_path: Path, name: str, namespace_defn: NamespaceDict, db_connection: str
) -> Path:
"""
Generate a model file for a namespace.
We want these to have a nice label and a unique name.
We only import explores and dashboards, as we want those
to auto-import upon generation.
Views are not imported by default, since they should
be added one-by-one if they are included in an explore.
"""
logging.info(f"Generating model {name}...")
model_defn = {
"connection": db_connection,
"label": namespace_defn["pretty_name"],
}
# automatically import generated explores for new glean apps
has_explores = len(namespace_defn.get("explores", {})) > 0
path = spoke_path / name / f"{name}.model.lkml"
# lkml.dump may return None, in which case write an empty file
footer_text = f"""
# Include files from looker-hub or spoke-default below. For example:
{'' if has_explores else '# '}include: "//looker-hub/{name}/explores/*"
# include: "//looker-hub/{name}/dashboards/*"
# include: "views/*"
# include: "explores/*"
# include: "dashboards/*"
"""
model_text = lkml.dump(model_defn)
if model_text is None:
path.write_text("")
else:
path.write_text(model_text + footer_text)
return path
def configure_model(
sdk: looker_sdk.methods40.Looker40SDK,
model_name: str,
db_connection: str,
spoke_project: str,
):
"""Configure a Looker model by name."""
instance = os.environ["LOOKER_INSTANCE_URI"]
logging.info(f"Configuring model {model_name}...")
try:
sdk.lookml_model(model_name)
logging.info("Model is configured!")
return
except looker_sdk.error.SDKError:
pass
sdk.create_lookml_model(
looker_sdk.models40.WriteLookmlModel(
allowed_db_connection_names=[db_connection],
name=model_name,
project_name=spoke_project,
)
)
for model_set_name in MODEL_SETS_BY_INSTANCE[instance]:
model_sets = sdk.search_model_sets(name=model_set_name)
if len(model_sets) != 1:
raise click.ClickException("Error: Found more than one matching model set")
model_set = model_sets[0]
models, _id = model_set.models, model_set.id
if models is None or _id is None:
raise click.ClickException("Error: Missing models or name from model_set")
sdk.update_model_set(
_id, looker_sdk.models40.WriteModelSet(models=list(models) + [model_name])
)
def generate_directories(
namespaces: Dict[str, NamespaceDict], base_dir: Path, sdk_setup=False
):
"""Generate directories and model for a namespace, if it doesn't exist."""
seen_spoke_namespaces = defaultdict(list)
for namespace, defn in namespaces.items():
spoke = defn["spoke"]
seen_spoke_namespaces[spoke].append(namespace)
spoke_dir = base_dir / spoke
spoke_dir.mkdir(parents=True, exist_ok=True)
print(f"Writing {namespace} to {spoke_dir}")
existing_dirs = {p.name for p in spoke_dir.iterdir()}
if namespace in existing_dirs:
continue
(spoke_dir / namespace).mkdir()
for dirname in ("views", "explores", "dashboards"):
(spoke_dir / namespace / dirname).mkdir()
(spoke_dir / namespace / dirname / ".gitkeep").touch()
db_connection: str = defn.get("connection", DEFAULT_DB_CONNECTION)
generate_model(spoke_dir, namespace, defn, db_connection)
if sdk_setup:
spoke_project = spoke.lstrip("looker-")
sdk = looker_sdk.init40()
logging.info("Looker SDK 4.0 initialized successfully.")
configure_model(sdk, namespace, db_connection, spoke_project)
# remove directories for namespaces that got removed
for spoke in seen_spoke_namespaces.keys():
spoke_dir = base_dir / spoke
existing_dirs = {p.name for p in spoke_dir.iterdir()}
for existing_dir in existing_dirs:
# make sure the directory belongs to a namespace by checking if a model file exists
if (spoke_dir / existing_dir / f"{existing_dir}.model.lkml").is_file():
if existing_dir not in seen_spoke_namespaces[spoke]:
# namespace does not exists anymore, remove directory
print(f"Removing {existing_dir} from {spoke_dir}")
shutil.rmtree(spoke_dir / existing_dir)
@click.command(help=__doc__)
@click.option(
"--namespaces",
default="namespaces.yaml",
type=click.File(),
help="Path to the namespaces.yaml file.",
)
@click.option(
"--spoke-dir",
default=".",
type=click.Path(file_okay=False, dir_okay=True, writable=True),
help="Directory containing the Looker spoke.",
)
def update_spoke(namespaces, spoke_dir):
"""Generate updates to spoke project."""
_namespaces = yaml.safe_load(namespaces)
sdk_setup = setup_env_with_looker_creds()
generate_directories(_namespaces, Path(spoke_dir), sdk_setup)