images/airflow/generate-dockerfiles.py (112 lines of code) (raw):

""" Generate a Dockerfile based on the Jinja2-templated Dockerfile.j2 file. Dockerfile is very limited in nature, with just primitive commands. This usually results in Dockerfiles becoming lengthy, repetitive, and error prone, resulting in quality degradation. To work around this limitation, we use Jinja2 template engine which offers a lot of futures, e.g. if statements, for loops, etc., and enable integration with Python (via data variables) resulting in a way more powerful Dockerfile. When executed, this script takes the Dockerfile.j2 and pass it to Jinja2 engine to produce a Dockerfile. The reader is referred to the code below for a better understanding of the working mechanism of this. """ import os import sys from datetime import datetime from typing import Any, List from pathlib import Path try: from jinja2 import Environment, FileSystemLoader except ImportError: print( """ jinja2 pip library is required. Please install it with: pip3 install jinja2 """.strip() ) sys.exit(1) def raise_helper(msg: str) -> None: """ Helper method to enable Jinja2 templates to raise an exception. """ raise RuntimeError(msg) def remove_repeated_empty_lines(text: str) -> str: """ Removes repeated empty lines from a given text, leaving at most one empty line between non-empty lines. :param text: The input text from which repeated empty lines should be removed. :returns: The cleaned text with no more than one consecutive empty line. """ lines = text.split(os.linesep) # Split the text into lines previous_line_empty = False # Track if the previous line was empty cleaned_lines: List[str] = [] for line in lines: # Check if the current line is empty if not line.strip(): if not previous_line_empty: # If the current line is empty but the previous one wasn't, add # the empty line cleaned_lines.append(line) previous_line_empty = True else: # If the current line is not empty, add it and reset the flag cleaned_lines.append(line) previous_line_empty = False # Join the cleaned lines back into a single string cleaned_text = os.linesep.join(cleaned_lines) return cleaned_text def generate_dockerfile( image_root_dir: Path, template_filename: str, output_file: str, data: dict[str, Any] ) -> None: # Load Dockerfile Jinja template. file_loader = FileSystemLoader(image_root_dir) env = Environment(loader=file_loader, autoescape=True) env.globals["raise"] = raise_helper # type: ignore template = env.get_template(template_filename) # Render the template and generate the Dockerfile output = template.render(data) with open(os.path.join(image_root_dir, "Dockerfiles", output_file), "w") as f: f.write( f""" # # WARNING: Don't change this file manually. This file is auto-generated from # the Jinja2-templated Dockerfile.j2 file, so you need to change that file # instead. # """.strip() ) f.write(os.linesep) f.write(os.linesep) f.write(remove_repeated_empty_lines(output)) def generate_base_dockerfile(image_root_dir: Path) -> None: """Generate the Dockerfile.base file based on the Dockerfile.base.j2 template. We generate multiple Docker images for different purposes, as explained below under the documentation of `generate_derivative_dockerfiles`. However, these derivative images actually share most of the setup. So, to reduce build time and avoid duplication, we generate a "base" Docker image, and then derive the rest of the images from them. :param image_root_dir: The root directory of the Docker image, i.e. where the `Dockerfile` resides. """ # Template data data = { "bootstrapping_scripts_root_firstpass": sorted( [ os.path.join("bootstrap/01-root-firstpass", file.name) for file in (image_root_dir / "bootstrap/01-root-firstpass").iterdir() if file.is_file() ] ), "bootstrapping_scripts_airflow": sorted( [ os.path.join("bootstrap/02-airflow", file.name) for file in (image_root_dir / "bootstrap/02-airflow").iterdir() if file.is_file() ] ), "bootstrapping_scripts_root_secondpass": sorted( [ os.path.join("bootstrap/03-root-secondpass", file.name) for file in (image_root_dir / "bootstrap/03-root-secondpass").iterdir() if file.is_file() ] ), } template_name = "Dockerfile.base.j2" dockerfile_name = "Dockerfile.base" generate_dockerfile(image_root_dir, template_name, dockerfile_name, data) def generate_derivative_dockerfiles( image_root_dir: Path, build_type: str = "standard", dev: bool = False ) -> None: """Generate a Dockerfile based on the given build arguments. :param build_type: Specifies the build type. This can have the following values: - standard: This is the standard build type. it is what customer uses. - explorer: The 'explorer' build type is almost identical to the 'standard' build type but it doesn't include the entrypoint. This is useful for debugging purposes to run the image and look around its content without starting airflow, which might require further setup. - explorer-root: This is similar to the 'explorer' build type, but additionally uses the root user, giving the user of this Docker image elevated permissions. The user can, thus, install packages, remove packages, or anything else. :param dev: Whether to produce a development image or a production one. Development images have extra stuff that are useful during development, e.g. editors, sudo, etc. """ template_name = "Dockerfile.derivatives.j2" dockerfile_name = "Dockerfile" if build_type != "standard": dockerfile_name = f"{dockerfile_name}-{build_type}" if dev: dockerfile_name = f"{dockerfile_name}-dev" data = { "bootstrapping_scripts_dev": ( sorted( [ os.path.join("bootstrap-dev", file.name) for file in (image_root_dir / "bootstrap-dev").iterdir() if file.is_file() ] ) if dev else [] ), "build_type": build_type, } generate_dockerfile(image_root_dir, template_name, dockerfile_name, data) def generate_airflow_dockerfiles(image_root_dir: Path): # Generate the base Dockerfile file (Dockerfile.base). generate_base_dockerfile(image_root_dir) # Generate the derivative Dockerfiles (multiple Dockerfiles based on # the build arguments.) for dev in [True, False]: for build_type in ["standard", "explorer", "explorer-privileged"]: generate_derivative_dockerfiles( image_root_dir, build_type=build_type, dev=dev ) def main(): """Start execution of the script.""" for x in Path(__file__).parent.iterdir(): if not x.is_dir(): continue generate_airflow_dockerfiles(x) if __name__ == "__main__": main() else: print("This module cannot be imported.") sys.exit(1)