def strip_layer()

in docker/util/config_stripper.py [0:0]
61 lines of code
3 McCabe index (conditional complexity)

def strip_layer(path):
    # The original layer tar is of the form <random string>/layer.tar, the
    # working directory is one level up from where layer.tar is.
    original_dir = os.path.normpath(os.path.join(os.path.dirname(path), '..'))

    # Write compressed tar to a temporary name. We'll rename it to the correct
    # name after we compute the hash.
    gz_out = tempfile.NamedTemporaryFile(dir=original_dir, delete=False)

    # Keep track of sha hash for both the compressed and uncompressed tar
    uncompressed_sha = hashlib.sha256()
    compressed_sha = hashlib.sha256()

    # Start a gzip process that we'll use to compress tar output.
    # Shelling out to bash gzip is noticeably faster than using python's gzip
    #
    # This function takes special care to never store the full tar file or
    # gzip'd tar file in memory. Images can be quite large.
    gzip_process = subprocess.Popen(
        ['gzip', '-nf'],
        stdout=subprocess.PIPE,
        stdin=subprocess.PIPE,
        stderr=subprocess.PIPE)

    # Read the gzip'd output and accumulate the sha hash, and save the
    # compressed copy under the new name.
    gzip_stdout_exc = []
    def do_gzip_stdout():
        try:
            while True:
                buf = gzip_process.stdout.read(_BUF_SIZE)
                if not buf: break
                compressed_sha.update(buf)
                gz_out.write(buf)
        except Exception as e:
            gzip_stdout_exc.append(e)

    # Read the gzip stderr for error reporting.
    gzip_stderr_buf = io.BytesIO()
    def do_gzip_stderr():
        # Don't bother incrementally reading stderr.
        gzip_stderr_buf.write(gzip_process.stderr.read())

    # Start all of the threads to prepare for producing the tar file.
    gzip_stdout = threading.Thread(target=do_gzip_stdout)
    gzip_stdout.start()
    gzip_stderr = threading.Thread(target=do_gzip_stderr)
    gzip_stderr.start()
    try:
        # Go through each file/dir in the layer
        # Set its mtime to 0
        # If it's a file, add its content to the running buffer
        # Add it to the new gzip'd tar.
        with tempfile.TemporaryFile() as t:
            with tarfile.open(name=path, mode='r') as it:
                with tarfile.open(fileobj=t, encoding='utf-8', mode='w') as ot:
                    for tarinfo in it:
                        # Use a deterministic mtime that doesn't confuse other
                        # programs,  e.g. Python.  Also see
                        # https://github.com/bazelbuild/bazel/issues/1299
                        tarinfo.mtime = 946684800 # 2000-01-01 00:00:00.000 UTC
                        if tarinfo.isfile():
                            f = it.extractfile(tarinfo)
                            ot.addfile(tarinfo, f)
                        else:
                            ot.addfile(tarinfo)

            # Read the stripped tarfile. Accumulate a hash of the uncompressed
            # file and send data on to the gzip process for compression.
            t.seek(0)
            while True:
                buf = t.read(_BUF_SIZE)
                if not buf: break
                uncompressed_sha.update(buf)
                gzip_process.stdin.write(buf)
    finally:
        gzip_process.stdin.close() # Causes gzip to terminate.
        gzip_stdout.join() # Terminates after gzip closes stdout.
        gzip_stderr.join() # Terminates after gzip closes stderr.
        gzip_process.wait() # gzip terminated by now.

    # Check if any of our threads or processes failed.
    if gzip_stdout_exc:
        raise gzip_stdout_exc[0]
    if gzip_process.returncode != 0:
        raise RuntimeError(
                'Failed to gzip stripped layer %s. '
                'gzip exited with status %d: %s',
                path, gzip_process.returncode, gzip_stderr_buf.getvalue())

    # Create the new diff_id for the config
    diffid = 'sha256:%s' % uncompressed_sha.hexdigest()

    # Rename into correct location now that we know the hash.
    new_name = 'sha256:%s' % compressed_sha.hexdigest()
    os.rename(gz_out.name, os.path.join(original_dir, new_name))

    shutil.rmtree(os.path.dirname(path))
    return (new_name, diffid)