ci/scripts/bundle.py (326 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import io
import os
import pathlib
import re
def read_content(path_or_content):
if isinstance(path_or_content, pathlib.Path):
with open(path_or_content) as f:
return f.read()
else:
return str(path_or_content)
def write_content(path_or_content, out_path):
with open(out_path, "w") as f:
f.write(read_content(path_or_content))
def configure_content(paths_or_content, args):
content = read_content(paths_or_content)
for key, value in args.items():
replace_key = f"@{key}@"
if content.count(replace_key) != 1:
raise ValueError(
"Expected exactly one occurrence of "
f"'{replace_key}' in '{paths_or_content}'"
)
content = content.replace(replace_key, str(value))
return content
def concatenate_content(paths_or_content):
out = io.StringIO()
for path in paths_or_content:
out.write(read_content(path))
return out.getvalue()
def cmakelist_version(path_or_content):
content = read_content(path_or_content)
version_match = re.search(r'set\(NANOARROW_VERSION "(.*?)"\)', content)
if version_match is None:
raise ValueError(f"Can't find NANOARROW_VERSION in '{path_or_content}'")
version = version_match.group(1)
component_match = re.search(r"^([0-9]+)\.([0-9]+)\.([0-9]+)", version)
return (version,) + tuple(int(component) for component in component_match.groups())
def namespace_nanoarrow_includes(path_or_content, header_namespace="nanoarrow"):
content = read_content(path_or_content)
return re.sub(
r'#include "nanoarrow/([^"]+)"', f'#include "{header_namespace}\\1"', content
)
def bundle_nanoarrow(
root_dir,
symbol_namespace=None,
header_namespace="nanoarrow/",
output_source_dir="src",
output_include_dir="include",
cpp=False,
):
root_dir = pathlib.Path(root_dir)
src_dir = root_dir / "src" / "nanoarrow"
output_source_dir = pathlib.Path(output_source_dir)
output_include_dir = pathlib.Path(output_include_dir) / header_namespace
version, major, minor, patch = cmakelist_version(root_dir / "CMakeLists.txt")
if symbol_namespace is None:
namespace_define = "// #define NANOARROW_NAMESPACE YourNamespaceHere"
else:
namespace_define = f"#define NANOARROW_NAMESPACE {symbol_namespace}"
nanoarrow_config_h = configure_content(
src_dir / "nanoarrow_config.h.in",
{
"NANOARROW_VERSION": version,
"NANOARROW_VERSION_MAJOR": major,
"NANOARROW_VERSION_MINOR": minor,
"NANOARROW_VERSION_PATCH": patch,
"NANOARROW_NAMESPACE_DEFINE": namespace_define,
},
)
# Generate nanoarrow/nanoarrow.h
nanoarrow_h = concatenate_content(
[
nanoarrow_config_h,
src_dir / "common" / "inline_types.h",
src_dir / "nanoarrow.h",
src_dir / "common" / "inline_buffer.h",
src_dir / "common" / "inline_array.h",
]
)
nanoarrow_h = re.sub(r'#include "(nanoarrow/)?[a-z_./]+"', "", nanoarrow_h)
yield f"{output_include_dir}/nanoarrow.h", nanoarrow_h
# Generate nanoarrow/nanoarrow.hpp
nanoarrow_hpp = concatenate_content(
[
src_dir / "nanoarrow.hpp",
src_dir / "hpp" / "exception.hpp",
src_dir / "hpp" / "operators.hpp",
src_dir / "hpp" / "unique.hpp",
src_dir / "hpp" / "array_stream.hpp",
src_dir / "hpp" / "buffer.hpp",
src_dir / "hpp" / "view.hpp",
]
)
nanoarrow_hpp = re.sub(r'#include "(nanoarrow/)?hpp/[a-z_./]+"', "", nanoarrow_hpp)
nanoarrow_hpp = namespace_nanoarrow_includes(nanoarrow_hpp, header_namespace)
yield f"{output_include_dir}/nanoarrow.hpp", nanoarrow_hpp
# Generate nanoarrow/nanoarrow.c
nanoarrow_c = concatenate_content(
[
src_dir / "common" / "utils.c",
src_dir / "common" / "schema.c",
src_dir / "common" / "array.c",
src_dir / "common" / "array_stream.c",
]
)
nanoarrow_c = namespace_nanoarrow_includes(nanoarrow_c, header_namespace)
if cpp:
yield f"{output_source_dir}/nanoarrow.cc", nanoarrow_c
else:
yield f"{output_source_dir}/nanoarrow.c", nanoarrow_c
def bundle_nanoarrow_device(
root_dir,
header_namespace="nanoarrow/",
output_source_dir="src",
output_include_dir="include",
):
root_dir = pathlib.Path(root_dir)
src_dir = root_dir / "src" / "nanoarrow"
output_source_dir = pathlib.Path(output_source_dir)
output_include_dir = pathlib.Path(output_include_dir) / header_namespace
# Generate headers
for filename in ["nanoarrow_device.h", "nanoarrow_device.hpp"]:
content = read_content(src_dir / filename)
content = namespace_nanoarrow_includes(content, header_namespace)
yield f"{output_include_dir}/{filename}", content
# Generate sources
content = concatenate_content(
[src_dir / "device" / "device.c", src_dir / "device" / "cuda.c"]
)
content = namespace_nanoarrow_includes(content, header_namespace)
yield f"{output_source_dir}/nanoarrow_device.c", content
def bundle_nanoarrow_ipc(
root_dir,
header_namespace="nanoarrow/",
output_source_dir="src",
output_include_dir="include",
):
root_dir = pathlib.Path(root_dir)
src_dir = root_dir / "src" / "nanoarrow"
output_source_dir = pathlib.Path(output_source_dir)
output_include_dir = pathlib.Path(output_include_dir) / header_namespace
# Generate headers
for filename in [
"nanoarrow_ipc.h",
"nanoarrow_ipc.hpp",
]:
content = read_content(src_dir / filename)
content = namespace_nanoarrow_includes(content, header_namespace)
yield f"{output_include_dir}/{filename}", content
nanoarrow_ipc_c = concatenate_content(
[
src_dir / "ipc" / "flatcc_generated.h",
src_dir / "ipc" / "codecs.c",
src_dir / "ipc" / "decoder.c",
src_dir / "ipc" / "encoder.c",
src_dir / "ipc" / "reader.c",
src_dir / "ipc" / "writer.c",
]
)
nanoarrow_ipc_c = nanoarrow_ipc_c.replace(
'#include "nanoarrow/ipc/flatcc_generated.h"', ""
)
nanoarrow_ipc_c = namespace_nanoarrow_includes(nanoarrow_ipc_c, header_namespace)
yield f"{output_source_dir}/nanoarrow_ipc.c", nanoarrow_ipc_c
def bundle_nanoarrow_testing(
root_dir,
header_namespace="nanoarrow/",
output_source_dir="src",
output_include_dir="include",
):
root_dir = pathlib.Path(root_dir)
src_dir = root_dir / "src" / "nanoarrow"
output_source_dir = pathlib.Path(output_source_dir)
output_include_dir = pathlib.Path(output_include_dir) / header_namespace
# Generate headers
for filename in [
"nanoarrow_testing.hpp",
"nanoarrow_gtest_util.hpp",
]:
content = read_content(src_dir / filename)
content = namespace_nanoarrow_includes(content, header_namespace)
yield f"{output_include_dir}/{filename}", content
nanoarrow_testing_cc = concatenate_content(
[
src_dir / "testing" / "testing.cc",
]
)
nanoarrow_testing_cc = namespace_nanoarrow_includes(
nanoarrow_testing_cc, header_namespace
)
yield f"{output_source_dir}/nanoarrow_testing.cc", nanoarrow_testing_cc
def bundle_flatcc(
root_dir,
output_source_dir="src",
output_include_dir="include",
):
root_dir = pathlib.Path(root_dir)
flatcc_dir = root_dir / "thirdparty" / "flatcc"
output_source_dir = pathlib.Path(output_source_dir)
output_include_dir = pathlib.Path(output_include_dir)
# Generate headers
include_dir = flatcc_dir / "include"
for abs_filename in include_dir.glob("flatcc/**/*.h"):
filename = abs_filename.relative_to(include_dir)
yield f"{output_include_dir}/{filename}", read_content(
flatcc_dir / "include" / filename
)
# Generate sources
src_dir = flatcc_dir / "src" / "runtime"
flatcc_c = concatenate_content(
[
src_dir / "builder.c",
src_dir / "emitter.c",
src_dir / "verifier.c",
src_dir / "refmap.c",
]
)
yield f"{output_source_dir}/flatcc.c", flatcc_c
def ensure_output_path_exists(out_path: pathlib.Path):
if out_path.is_dir() and out_path.exists():
return
if out_path.is_file() and out_path.exists():
raise ValueError(f"Can't create directory '{out_path}': exists and is a file")
ensure_output_path_exists(out_path.parent)
os.mkdir(out_path)
def do_bundle(bundler):
for out_file, out_content in bundler:
out_path = pathlib.Path(out_file)
ensure_output_path_exists(out_path.parent)
write_content(out_content, out_path)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Bundled nanoarrow distribution")
parser.add_argument(
"--include-output-dir",
help="include/ directory in which nanoarrow headers should be placed",
)
parser.add_argument(
"--source-output-dir",
help="Directory in which nanoarrow source files should be placed",
)
parser.add_argument(
"--symbol-namespace", help="A value with which symbols should be prefixed"
)
parser.add_argument(
"--header-namespace",
help=(
"The directory within include-output-dir that nanoarrow headers should be"
"placed"
),
default="nanoarrow/",
)
parser.add_argument(
"--output-dir",
help=(
"If include-output-dir or source-output-dir are missing, ensures a single "
"output directory with include/ and src/ subdirectories containing the "
"headers and sources, respectively"
),
default="dist",
)
parser.add_argument(
"--cpp", help="Bundle sources as C++ where possible", action="store_true"
)
parser.add_argument(
"--with-device",
help="Include nanoarrow_device sources/headers",
action="store_true",
)
parser.add_argument(
"--with-ipc",
help="Include nanoarrow_ipc sources/headers",
action="store_true",
)
parser.add_argument(
"--with-testing",
help="Include nanoarrow_testing sources/headers",
action="store_true",
)
parser.add_argument(
"--with-flatcc",
help="Include flatcc sources/headers",
action="store_true",
)
args = parser.parse_args()
if args.include_output_dir is None:
args.include_output_dir = pathlib.Path(args.output_dir) / "include"
if args.source_output_dir is None:
args.source_output_dir = pathlib.Path(args.output_dir) / "src"
root_dir = pathlib.Path(__file__).parent.parent.parent
# Bundle nanoarrow
do_bundle(
bundle_nanoarrow(
root_dir,
symbol_namespace=args.symbol_namespace,
header_namespace=args.header_namespace,
output_source_dir=args.source_output_dir,
output_include_dir=args.include_output_dir,
cpp=args.cpp,
)
)
# Bundle nanoarrow_device
if args.with_device:
do_bundle(
bundle_nanoarrow_device(
root_dir,
header_namespace=args.header_namespace,
output_source_dir=args.source_output_dir,
output_include_dir=args.include_output_dir,
)
)
# Bundle nanoarrow_ipc
if args.with_ipc:
do_bundle(
bundle_nanoarrow_ipc(
root_dir,
header_namespace=args.header_namespace,
output_source_dir=args.source_output_dir,
output_include_dir=args.include_output_dir,
)
)
# Bundle nanoarrow_testing
if args.with_testing:
do_bundle(
bundle_nanoarrow_testing(
root_dir,
header_namespace=args.header_namespace,
output_source_dir=args.source_output_dir,
output_include_dir=args.include_output_dir,
)
)
# Bundle flatcc
if args.with_flatcc:
do_bundle(
bundle_flatcc(
root_dir,
output_source_dir=args.source_output_dir,
output_include_dir=args.include_output_dir,
)
)