bazel/arrow/pyarrow_configure.bzl (232 lines of code) (raw):

"""Setup pyarrow as external dependency.""" # This file is derived from https://github.com/tensorflow/tensorflow/blob/5a244072f2b33d2347e803146c244c179c1ddb75/third_party/py/python_configure.bzl. def _fail(msg): """Output failure message when auto configuration fails.""" red = "\033[0;31m" no_color = "\033[0m" fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) def _is_windows(repository_ctx): """Returns true if the host operating system is windows.""" os_name = repository_ctx.os.name.lower() if os_name.find("windows") != -1: return True return False def _execute( repository_ctx, cmdline, error_msg = None, error_details = None, empty_stdout_fine = False): """Executes an arbitrary shell command. Helper for executes an arbitrary shell command. Args: repository_ctx: the repository_ctx object. cmdline: list of strings, the command to execute. error_msg: string, a summary of the error if the command fails. error_details: string, details about the error or steps to fix it. empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise it's an error. Returns: The result of repository_ctx.execute(cmdline). """ result = repository_ctx.execute(cmdline) if (result.return_code != 0) or not (empty_stdout_fine or result.stdout): _fail("\n".join([ error_msg.strip() if error_msg else "Repository command failed", "return code: " + str(result.return_code), result.stderr.strip(), result.stdout.strip(), error_details if error_details else "", ])) return result def _read_dir(repository_ctx, src_dir): """Returns a string with all files in a directory. Finds all files inside a directory, traversing subfolders and following symlinks. The returned string contains the full path of all files separated by line breaks. Args: repository_ctx: the repository_ctx object. src_dir: directory to find files from. Returns: A string of all files inside the given dir. """ if _is_windows(repository_ctx): src_dir = src_dir.replace("/", "\\") find_result = _execute( repository_ctx, ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], empty_stdout_fine = True, ) # src_files will be used in genrule.outs where the paths must # use forward slashes. result = find_result.stdout.replace("\\", "/") else: find_result = _execute( repository_ctx, ["find", src_dir, "-follow", "-type", "f"], empty_stdout_fine = True, ) result = find_result.stdout return result def _genrule(genrule_name, command, outs): """Returns a string with a genrule. Genrule executes the given command and produces the given outputs. Args: genrule_name: A unique name for genrule target. command: The command to run. outs: A list of files generated by this rule. Returns: A genrule target. """ return ( "genrule(\n" + ' name = "' + genrule_name + '",\n' + " outs = [\n" + outs + "\n ],\n" + ' cmd = """\n' + command + '\n """,\n' + ")\n" ) def _norm_path(path): """Returns a path with '/' and remove the trailing slash.""" path = path.replace("\\", "/") if path[-1] == "/": path = path[:-1] return path def _symlink_genrule_for_dir( repository_ctx, src_dir, dest_dir, genrule_name, src_files = [], dest_files = []): """Returns a genrule to symlink(or copy if on Windows) a set of files. If src_dir is passed, files will be read from the given directory; otherwise we assume files are in src_files and dest_files. Args: repository_ctx: the repository_ctx object. src_dir: source directory. dest_dir: directory to create symlink in. genrule_name: genrule name. src_files: list of source files instead of src_dir. dest_files: list of corresonding destination files. Returns: genrule target that creates the symlinks. """ if src_dir != None: src_dir = _norm_path(src_dir) dest_dir = _norm_path(dest_dir) files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) # Create a list with the src_dir stripped to use for outputs. dest_files = files.replace(src_dir, "").splitlines() src_files = files.splitlines() command = [] outs = [] for i in range(len(dest_files)): if dest_files[i] != "": # If we have only one file to link we do not want to use the dest_dir, as # $(@D) will include the full path to the file. dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] # Copy the headers to create a sandboxable setup. cmd = "cp -f" command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) outs.append(' "' + dest_dir + dest_files[i] + '",') genrule = _genrule( genrule_name, " && ".join(command), "\n".join(outs), ) return genrule def _get_pyarrow_include(repository_ctx, python_bin="python3"): """Gets the pyarrow include path.""" result = _execute( repository_ctx, [ python_bin, "-c", 'import pyarrow;print(pyarrow.get_include())' ], error_msg="Problem getting pyarrow include path.", error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) return result.stdout.splitlines()[0].replace('\\', '/') def _get_pyarrow_shared_library(repository_ctx, library_name, python_bin="python3"): """Gets the pyarrow shared library path.""" code = """import pyarrow, os, glob;print(glob.glob(os.path.join(""" +\ """os.path.dirname(pyarrow.__file__), '{}'))[0])""".format(library_name) result = _execute( repository_ctx, [ python_bin, "-c", code ], error_msg="Problem getting pyarrow shared library path.", error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) return result.stdout.splitlines()[0].replace('\\', '/') #python numpy include def _get_python_numpy_include(repository_ctx, python_bin="python3"): """Gets the python numpy include path.""" result = _execute( repository_ctx, [ python_bin, "-c", 'from __future__ import print_function;import numpy;print(numpy.get_include())' ], error_msg="Problem getting python numpy include path.", error_details=( "Is the Python binary path set up right? " + "(See ./configure or " + python_bin + ".) " + "Is distutils installed?")) return result.stdout.splitlines()[0].replace('\\', '/') def _pyarrow_pip_impl(repository_ctx): python_bin = "python3" # python 3.x is usually named as `python` by default on windows. if _is_windows(repository_ctx): python_bin = "python" arrow_header_dir = _get_pyarrow_include(repository_ctx, python_bin) arrow_header_rule = _symlink_genrule_for_dir( repository_ctx, arrow_header_dir, "include", "arrow_header_include", ) arrow_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.dll" if _is_windows(repository_ctx) else "libarrow.*", python_bin) arrow_library = arrow_library_path.rsplit("/",1 )[-1] arrow_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow", [arrow_library_path], [arrow_library]) arrow_python_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.dll" if _is_windows(repository_ctx) else "libarrow_python.*", python_bin) arrow_python_library = arrow_python_library_path.rsplit("/",1 )[-1] arrow_python_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow_python", [arrow_python_library_path], [arrow_python_library]) python_numpy_include = _get_python_numpy_include(repository_ctx, python_bin) python_numpy_include_rule = _symlink_genrule_for_dir( repository_ctx, python_numpy_include, 'python_numpy_include', 'python_numpy_include') if _is_windows(repository_ctx): arrow_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow.lib", python_bin) arrow_interface_library = arrow_interface_library_path.rsplit("/",1 )[-1] arrow_interface_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow_interface", [arrow_interface_library_path], [arrow_interface_library]) arrow_python_interface_library_path = _get_pyarrow_shared_library(repository_ctx, "arrow_python.lib", python_bin) arrow_python_interface_library = arrow_python_interface_library_path.rsplit("/",1 )[-1] arrow_python_interface_library_rule = _symlink_genrule_for_dir( repository_ctx, None, "", "libarrow_python_interface", [arrow_python_interface_library_path], [arrow_python_interface_library]) build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.windows.bzl")) repository_ctx.template("BUILD", build_tpl, { "%{ARROW_HEADER_GENRULE}": arrow_header_rule, "%{ARROW_LIBRARY_GENRULE}": arrow_library_rule, "%{ARROW_ITF_LIBRARY_GENRULE}": arrow_interface_library_rule, "%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule, "%{ARROW_PYTHON_ITF_LIB_GENRULE}": arrow_python_interface_library_rule, "%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule, }) else: build_tpl = repository_ctx.path(Label("//bazel/arrow:BUILD.tpl.bzl")) repository_ctx.template("BUILD", build_tpl, { "%{ARROW_HEADER_GENRULE}": arrow_header_rule, "%{ARROW_LIBRARY_GENRULE}": arrow_library_rule, "%{ARROW_PYTHON_LIBRARY_GENRULE}": arrow_python_library_rule, "%{PYTHON_NUMPY_INCLUDE_GENRULE}": python_numpy_include_rule, }) pyarrow_configure = repository_rule( implementation = _pyarrow_pip_impl, )