bazel/arrow/pyarrow_configure.bzl (232 lines of code) (raw):
"""Setup pyarrow as external dependency."""
# This file is derived from https://github.com/tensorflow/tensorflow/blob/5a244072f2b33d2347e803146c244c179c1ddb75/third_party/py/python_configure.bzl.
def _fail(msg):
"""Output failure message when auto configuration fails."""
red = "\033[0;31m"
no_color = "\033[0m"
fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg))
def _is_windows(repository_ctx):
"""Returns true if the host operating system is windows."""
os_name = repository_ctx.os.name.lower()
if os_name.find("windows") != -1:
return True
return False
def _execute(
repository_ctx,
cmdline,
error_msg = None,
error_details = None,
empty_stdout_fine = False):
"""Executes an arbitrary shell command.
Helper for executes an arbitrary shell command.
Args:
repository_ctx: the repository_ctx object.
cmdline: list of strings, the command to execute.
error_msg: string, a summary of the error if the command fails.
error_details: string, details about the error or steps to fix it.
empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise
it's an error.
Returns:
The result of repository_ctx.execute(cmdline).
"""
result = repository_ctx.execute(cmdline)
if (result.return_code != 0) or not (empty_stdout_fine or result.stdout):
_fail("\n".join([
error_msg.strip() if error_msg else "Repository command failed",
"return code: " + str(result.return_code),
result.stderr.strip(),
result.stdout.strip(),
error_details if error_details else "",
]))
return result
def _read_dir(repository_ctx, src_dir):
"""Returns a string with all files in a directory.
Finds all files inside a directory, traversing subfolders and following
symlinks. The returned string contains the full path of all files
separated by line breaks.
Args:
repository_ctx: the repository_ctx object.
src_dir: directory to find files from.
Returns:
A string of all files inside the given dir.
"""
if _is_windows(repository_ctx):
src_dir = src_dir.replace("/", "\\")
find_result = _execute(
repository_ctx,
["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
empty_stdout_fine = True,
)
# src_files will be used in genrule.outs where the paths must
# use forward slashes.
result = find_result.stdout.replace("\\", "/")
else:
find_result = _execute(
repository_ctx,
["find", src_dir, "-follow", "-type", "f"],
empty_stdout_fine = True,
)
result = find_result.stdout
return result
def _genrule(genrule_name, command, outs):
"""Returns a string with a genrule.
Genrule executes the given command and produces the given outputs.
Args:
genrule_name: A unique name for genrule target.
command: The command to run.
outs: A list of files generated by this rule.
Returns:
A genrule target.
"""
return (
"genrule(\n" +
' name = "' +
genrule_name + '",\n' +
" outs = [\n" +
outs +
"\n ],\n" +
' cmd = """\n' +
command +
'\n """,\n' +
")\n"
)
def _norm_path(path):
"""Returns a path with '/' and remove the trailing slash."""
path = path.replace("\\", "/")
if path[-1] == "/":
path = path[:-1]
return path
def _symlink_genrule_for_dir(
repository_ctx,
src_dir,
dest_dir,
genrule_name,
src_files = [],
dest_files = []):
"""Returns a genrule to symlink(or copy if on Windows) a set of files.
If src_dir is passed, files will be read from the given directory; otherwise
we assume files are in src_files and dest_files.
Args:
repository_ctx: the repository_ctx object.
src_dir: source directory.
dest_dir: directory to create symlink in.
genrule_name: genrule name.
src_files: list of source files instead of src_dir.
dest_files: list of corresonding destination files.
Returns:
genrule target that creates the symlinks.
"""
if src_dir != None:
src_dir = _norm_path(src_dir)
dest_dir = _norm_path(dest_dir)
files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines()))
# Create a list with the src_dir stripped to use for outputs.
dest_files = files.replace(src_dir, "").splitlines()
src_files = files.splitlines()
command = []
outs = []
for i in range(len(dest_files)):
if dest_files[i] != "":
# If we have only one file to link we do not want to use the dest_dir, as
# $(@D) will include the full path to the file.
dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i]
# Copy the headers to create a sandboxable setup.
cmd = "cp -f"
command.append(cmd + ' "%s" "%s"' % (src_files[i], dest))
outs.append(' "' + dest_dir + dest_files[i] + '",')
genrule = _genrule(
genrule_name,
" && ".join(command),
"\n".join(outs),
)
return genrule
def _get_pyarrow_include(repository_ctx, python_bin="python3"):
"""Gets the pyarrow include path."""
result = _execute(
repository_ctx, [
python_bin, "-c", 'import pyarrow;print(pyarrow.get_include())'
],
error_msg="Problem getting pyarrow include path.",
error_details=(
"Is the Python binary path set up right? " + "(See ./configure or "
+ python_bin + ".) " + "Is distutils installed?"))
return result.stdout.splitlines()[0].replace('\\', '/')
def _get_pyarrow_shared_library(repository_ctx, library_name, python_bin="python3"):
"""Gets the pyarrow shared library path."""
code = """import pyarrow, os, glob;print(glob.glob(os.path.join(""" +\
"""os.path.dirname(pyarrow.__file__), '{}'))[0])""".format(library_name)
result = _execute(
repository_ctx, [
python_bin, "-c", code
],
error_msg="Problem getting pyarrow shared library path.",
error_details=(
"Is the Python binary path set up right? " + "(See ./configure or "
+ python_bin + ".) " + "Is distutils installed?"))
return result.stdout.splitlines()[0].replace('\\', '/')
#python numpy include
def _get_python_numpy_include(repository_ctx, python_bin="python3"):
"""Gets the python numpy include path."""
result = _execute(
repository_ctx, [
python_bin, "-c", 'from __future__ import print_function;import numpy;print(numpy.get_include())'
],
error_msg="Problem getting python numpy include path.",
error_details=(
"Is the Python binary path set up right? " + "(See ./configure or "
+ python_bin + ".) " + "Is distutils installed?"))
return result.stdout.splitlines()[0].replace('\\', '/')
def _pyarrow_pip_impl(repository_ctx):
python_bin = "python3"
# python 3.x is usually named as `python` by default on windows.
if _is_windows(repository_ctx):
python_bin = "python"
arrow_header_dir = _get_pyarrow_include(repository_ctx, python_bin)
arrow_header_rule = _symlink_genrule_for_dir(
repository_ctx,
arrow_header_dir,
"include",
"arrow_header_include",
)
arrow_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.dll" if _is_windows(repository_ctx) else "libarrow.*", python_bin)
arrow_library = arrow_library_path.rsplit("/",1 )[-1]
arrow_library_rule = _symlink_genrule_for_dir(
repository_ctx, None, "", "libarrow", [arrow_library_path], [arrow_library])
arrow_python_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.dll" if _is_windows(repository_ctx) else "libarrow_python.*", python_bin)
arrow_python_library = arrow_python_library_path.rsplit("/",1 )[-1]
arrow_python_library_rule = _symlink_genrule_for_dir(
repository_ctx, None, "", "libarrow_python",
[arrow_python_library_path], [arrow_python_library])
python_numpy_include = _get_python_numpy_include(repository_ctx, python_bin)
python_numpy_include_rule = _symlink_genrule_for_dir(
repository_ctx, python_numpy_include, 'python_numpy_include', 'python_numpy_include')
if _is_windows(repository_ctx):
arrow_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.lib", python_bin)
arrow_interface_library = arrow_interface_library_path.rsplit("/",1 )[-1]
arrow_interface_library_rule = _symlink_genrule_for_dir(
repository_ctx, None, "", "libarrow_interface", [arrow_interface_library_path], [arrow_interface_library])
arrow_python_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.lib", python_bin)
arrow_python_interface_library = arrow_python_interface_library_path.rsplit("/",1 )[-1]
arrow_python_interface_library_rule = _symlink_genrule_for_dir(
repository_ctx, None, "", "libarrow_python_interface",
[arrow_python_interface_library_path], [arrow_python_interface_library])
build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.windows.bzl"))
repository_ctx.template("BUILD", build_tpl, {
"%{ARROW_HEADER_GENRULE}": arrow_header_rule,
"%{ARROW_LIBRARY_GENRULE}": arrow_library_rule,
"%{ARROW_ITF_LIBRARY_GENRULE}": arrow_interface_library_rule,
"%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule,
"%{ARROW_PYTHON_ITF_LIB_GENRULE}": arrow_python_interface_library_rule,
"%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule,
})
else:
build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.tpl.bzl"))
repository_ctx.template("BUILD", build_tpl, {
"%{ARROW_HEADER_GENRULE}": arrow_header_rule,
"%{ARROW_LIBRARY_GENRULE}": arrow_library_rule,
"%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule,
"%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule,
})
pyarrow_configure = repository_rule(
implementation = _pyarrow_pip_impl,
)