scripts/gen_dockerfile.py (138 lines of code) (raw):

#!/usr/bin/env python3 # Copyright 2017 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Generate a Dockerfile and helper files for a Python application.""" import argparse import collections import collections.abc import functools import io import os import re import sys import yaml import validation_utils # Validate characters for dockerfile image names. # # This roots out obvious mistakes, the full gory details are here: # https://github.com/docker/distribution/blob/master/reference/regexp.go IMAGE_REGEX = re.compile(r"""(?x) ^ [a-zA-Z0-9] # First char must be alphanumeric [a-zA-Z0-9-_./:@+]* # Punctuation allowed after that $ """) # `entrypoint` is specified as free-form text parsed as a unix shell # command line, which limits the sanity checking possible. We # disallow newlines and control characters which would break the # Dockerfile format. PRINTABLE_REGEX = re.compile(r"""^[^\x00-\x1f]*$""") # Map from app.yaml "python_version" to {python_version} in Dockerfile PYTHON_INTERPRETER_VERSION_MAP = { '': '', # == 2.7 '2': '', # == 2.7 '3': '3.6', '3.4': '3.4', '3.5': '3.5', '3.6': '3.6', '3.7': '3.7', } # Name of environment variable potentially set by gcloud GAE_APPLICATION_YAML_PATH = 'GAE_APPLICATION_YAML_PATH' # Validated application configuration AppConfig = collections.namedtuple( 'AppConfig', 'base_image dockerfile_python_version entrypoint has_requirements_txt is_python_compat' ) def get_app_config(raw_config, base_image, config_file, source_dir): """Read and validate the application runtime configuration. We validate the user input for security and better error messages. Consider parsing a yaml file which has a string value where we expected a list. Python will happily use the string as a sequence of individual characters, at least for a while, leading to confusing results when it finally fails. We also try to prevent Dockerfile and Bash injection attacks. For example, specifying entrypoint as "true\\nADD /etc/passwd /pwned" would allow the user to inject arbitrary directives into the Dockerfile, which is a support problem if nothing else. Args: raw_config (dict): deserialized app.yaml base_image (str): Docker image name to build on top of config_file (str): Path to user's app.yaml (might be <service>.yaml) source_dir (str): Directory containing user's source code Returns: AppConfig: valid configuration """ # Examine app.yaml if not isinstance(raw_config, collections.abc.Mapping): raise ValueError( 'Expected {} contents to be a Mapping type, but found type "{}"'. format(config_file, type(raw_config))) # Short circuit for python compat. if validation_utils.get_field_value( raw_config, 'runtime', str) == 'python-compat': return AppConfig( base_image=None, dockerfile_python_version=None, entrypoint=None, has_requirements_txt=None, is_python_compat=True) entrypoint = validation_utils.get_field_value( raw_config, 'entrypoint', str) if not PRINTABLE_REGEX.match(entrypoint): raise ValueError( 'Invalid "entrypoint" value in app.yaml: {!r}'.format(entrypoint)) # Mangle entrypoint in the same way as the Cloud SDK # (googlecloudsdk/third_party/appengine/api/validation.py) # # We could handle both string ("shell form") and list ("exec # form") but it appears that gcloud only handles string form. if entrypoint and not entrypoint.startswith('exec '): entrypoint = 'exec ' + entrypoint raw_runtime_config = validation_utils.get_field_value( raw_config, 'runtime_config', dict) python_version = validation_utils.get_field_value( raw_runtime_config, 'python_version', str) dockerfile_python_version = PYTHON_INTERPRETER_VERSION_MAP.get( python_version) if dockerfile_python_version is None: valid_versions = str(sorted(PYTHON_INTERPRETER_VERSION_MAP.keys())) raise ValueError( 'Invalid "python_version" field in "runtime_config" section ' 'of app.yaml: {!r}. Valid options are: {}'. format(python_version, valid_versions)) # Examine user's files has_requirements_txt = os.path.isfile( os.path.join(source_dir, 'requirements.txt')) return AppConfig( base_image=base_image, dockerfile_python_version=dockerfile_python_version, entrypoint=entrypoint, has_requirements_txt=has_requirements_txt, is_python_compat=False) def get_data(name): """Return the contents of the named data resource These templates are copied from the Google Cloud SDK at google-cloud-sdk/platform/ext-runtime/python/data and the two should be kept in sync. Args: name (str): Name of file, without directory Returns: str: Contents of data file """ filename = os.path.join(os.path.dirname(__file__), 'data', name) with io.open(filename, 'r', encoding='utf8') as template_file: return template_file.read() def generate_files(app_config): """Generate a Dockerfile and helper files for an application. Args: app_config (AppConfig): Validated configuration Returns: dict: Map of filename to desired file contents """ if app_config.has_requirements_txt: optional_requirements_txt = get_data('Dockerfile.requirements_txt') else: optional_requirements_txt = '' if app_config.entrypoint: optional_entrypoint = get_data( 'Dockerfile.entrypoint.template').format( entrypoint=app_config.entrypoint) else: optional_entrypoint = '' if app_config.is_python_compat: dockerfile = get_data('Dockerfile.python_compat') dockerignore = get_data('dockerignore.python_compat') else: dockerfile = ''.join([ get_data('Dockerfile.preamble.template').format( base_image=app_config.base_image), get_data('Dockerfile.virtualenv.template').format( python_version=app_config.dockerfile_python_version), optional_requirements_txt, get_data('Dockerfile.install_app'), optional_entrypoint, ]) dockerignore = get_data('dockerignore') return { 'Dockerfile': dockerfile, '.dockerignore': dockerignore, } def generate_dockerfile_command(base_image, config_file, source_dir): """Write a Dockerfile and helper files for an application. Args: base_image (str): Docker image name to build on top of config_file (str): Path to user's app.yaml (might be <service>.yaml) source_dir (str): Directory container user's source code """ # Read yaml file. Does not currently support multiple services # with configuration filenames besides app.yaml with io.open(config_file, 'r', encoding='utf8') as yaml_config_file: raw_config = yaml.safe_load(yaml_config_file) # Determine complete configuration app_config = get_app_config(raw_config, base_image, config_file, source_dir) # Generate list of filenames and their textual contents files = generate_files(app_config) # Write files for filename, contents in files.items(): full_filename = os.path.join(source_dir, filename) with io.open(full_filename, 'w', encoding='utf8') as outfile: outfile.write(contents) def parse_args(argv): """Parse and validate command line flags""" parser = argparse.ArgumentParser() parser.add_argument( '--base-image', type=functools.partial( validation_utils.validate_arg_regex, flag_regex=IMAGE_REGEX), default='gcr.io/google-appengine/python:latest', help='Name of Docker image to use as base') # In some cases, gcloud sets an environment variable to indicate # the location of the application configuration file, rather than # using the --config flag. The order of precedence from highest # to lowest is: # # 1) --config flag # 2) $GAE_APPLICATION_YAML_PATH environment variable # 3) a file named "app.yaml" in the current working directory parser.add_argument( '--config', type=functools.partial( validation_utils.validate_arg_regex, flag_regex=PRINTABLE_REGEX), default=(os.environ.get(GAE_APPLICATION_YAML_PATH) or 'app.yaml'), help='Path to application configuration file' ) parser.add_argument( '--source-dir', type=functools.partial( validation_utils.validate_arg_regex, flag_regex=PRINTABLE_REGEX), default='.', help=('Application source and output directory')) args = parser.parse_args(argv[1:]) return args def main(): args = parse_args(sys.argv) generate_dockerfile_command(args.base_image, args.config, args.source_dir) if __name__ == '__main__': main()