python/setup.py (284 lines of code) (raw):
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import contextlib
import os
import os.path
from os.path import join as pjoin
import re
import shlex
import sys
import warnings
if sys.version_info >= (3, 10):
import sysconfig
else:
# Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
from distutils import sysconfig
from setuptools import setup, Extension, Distribution
from Cython.Distutils import build_ext as _build_ext
import Cython
# Check if we're running 64-bit Python
is_64_bit = sys.maxsize > 2**32
# We can't use sys.platform in a cross-compiling situation
# as here it may be set to the host not target platform
is_emscripten = (
sysconfig.get_config_var("SOABI")
and sysconfig.get_config_var("SOABI").find("emscripten") != -1
)
if Cython.__version__ < '3':
raise Exception(
'Please update your Cython version. Supported Cython >= 3')
setup_dir = os.path.abspath(os.path.dirname(__file__))
ext_suffix = sysconfig.get_config_var('EXT_SUFFIX')
@contextlib.contextmanager
def changed_dir(dirname):
oldcwd = os.getcwd()
os.chdir(dirname)
try:
yield
finally:
os.chdir(oldcwd)
def strtobool(val):
"""Convert a string representation of truth to true (1) or false (0).
True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
'val' is anything else.
"""
# Copied from distutils
val = val.lower()
if val in ('y', 'yes', 't', 'true', 'on', '1'):
return 1
elif val in ('n', 'no', 'f', 'false', 'off', '0'):
return 0
else:
raise ValueError("invalid truth value %r" % (val,))
MSG_DEPR_SETUP_BUILD_FLAGS = """
!!
***********************************************************************
The '{}' flag is being passed to setup.py, but this is
deprecated.
If a certain component is available in Arrow C++, it will automatically
be enabled for the PyArrow build as well. If you want to force the
build of a certain component, you can still use the
PYARROW_WITH_$COMPONENT environment variable.
***********************************************************************
!!
"""
class build_ext(_build_ext):
_found_names = ()
def build_extensions(self):
import numpy
numpy_incl = numpy.get_include()
self.extensions = [ext for ext in self.extensions
if ext.name != '__dummy__']
for ext in self.extensions:
if (hasattr(ext, 'include_dirs') and
numpy_incl not in ext.include_dirs):
ext.include_dirs.append(numpy_incl)
_build_ext.build_extensions(self)
def run(self):
self._run_cmake()
_build_ext.run(self)
# adapted from cmake_build_ext in dynd-python
# github.com/libdynd/dynd-python
description = "Build the C-extensions for arrow"
user_options = ([('cmake-generator=', None, 'CMake generator'),
('extra-cmake-args=', None, 'extra arguments for CMake'),
('build-type=', None,
'build type (debug or release), default release'),
('boost-namespace=', None,
'namespace of boost (default: boost)'),
('with-cuda', None, 'build the Cuda extension'),
('with-flight', None, 'build the Flight extension'),
('with-substrait', None, 'build the Substrait extension'),
('with-acero', None, 'build the Acero Engine extension'),
('with-dataset', None, 'build the Dataset extension'),
('with-parquet', None, 'build the Parquet extension'),
('with-parquet-encryption', None,
'build the Parquet encryption extension'),
('with-azure', None,
'build the Azure Blob Storage extension'),
('with-gcs', None,
'build the Google Cloud Storage (GCS) extension'),
('with-s3', None, 'build the Amazon S3 extension'),
('with-static-parquet', None, 'link parquet statically'),
('with-static-boost', None, 'link boost statically'),
('with-orc', None, 'build the ORC extension'),
('with-gandiva', None, 'build the Gandiva extension'),
('generate-coverage', None,
'enable Cython code coverage'),
('bundle-boost', None,
'bundle the (shared) Boost libraries'),
('bundle-cython-cpp', None,
'bundle generated Cython C++ code '
'(used for code coverage)'),
('bundle-arrow-cpp', None,
'bundle the Arrow C++ libraries'),
('bundle-arrow-cpp-headers', None,
'bundle the Arrow C++ headers')] +
_build_ext.user_options)
def initialize_options(self):
_build_ext.initialize_options(self)
self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR')
if not self.cmake_generator and sys.platform == 'win32':
self.cmake_generator = 'Visual Studio 15 2017 Win64'
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
self.build_type = os.environ.get('PYARROW_BUILD_TYPE',
'release').lower()
self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '')
if sys.platform == 'win32':
# Cannot do debug builds in Windows unless Python itself is a debug
# build
if not hasattr(sys, 'gettotalrefcount'):
self.build_type = 'release'
self.with_azure = None
self.with_gcs = None
self.with_s3 = None
self.with_hdfs = None
self.with_cuda = None
self.with_substrait = None
self.with_flight = None
self.with_acero = None
self.with_dataset = None
self.with_parquet = None
self.with_parquet_encryption = None
self.with_orc = None
self.with_gandiva = None
self.generate_coverage = strtobool(
os.environ.get('PYARROW_GENERATE_COVERAGE', '0'))
self.bundle_arrow_cpp = strtobool(
os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0'))
self.bundle_cython_cpp = strtobool(
os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0'))
CYTHON_MODULE_NAMES = [
'lib',
'_fs',
'_csv',
'_json',
'_compute',
'_cuda',
'_flight',
'_dataset',
'_dataset_orc',
'_dataset_parquet',
'_acero',
'_feather',
'_parquet',
'_parquet_encryption',
'_pyarrow_cpp_tests',
'_orc',
'_azurefs',
'_gcsfs',
'_s3fs',
'_substrait',
'_hdfs',
'gandiva']
def _run_cmake(self):
# check if build_type is correctly passed / set
if self.build_type.lower() not in ('release', 'debug',
'relwithdebinfo'):
raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
"be 'release', 'debug' or 'relwithdebinfo'")
# The directory containing this setup.py
source = os.path.dirname(os.path.abspath(__file__))
# The staging directory for the module being built
build_cmd = self.get_finalized_command('build')
saved_cwd = os.getcwd()
build_temp = pjoin(saved_cwd, build_cmd.build_temp)
build_lib = pjoin(saved_cwd, build_cmd.build_lib)
if not os.path.isdir(build_temp):
self.mkpath(build_temp)
if self.inplace:
# a bit hacky
build_lib = saved_cwd
install_prefix = pjoin(build_lib, "pyarrow")
# Change to the build directory
with changed_dir(build_temp):
# Detect if we built elsewhere
if os.path.isfile('CMakeCache.txt'):
cachefile = open('CMakeCache.txt', 'r')
cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)',
cachefile.read()).group(1)
cachefile.close()
if (cachedir != build_temp):
build_base = pjoin(saved_cwd, build_cmd.build_base)
print(f"-- Skipping build. Temp build {build_temp} does "
f"not match cached dir {cachedir}")
print("---- For a clean build you might want to delete "
f"{build_base}.")
return
cmake_options = [
f'-DCMAKE_INSTALL_PREFIX={install_prefix}',
f'-DPYTHON_EXECUTABLE={sys.executable}',
f'-DPython3_EXECUTABLE={sys.executable}',
f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}',
]
def append_cmake_bool(value, varname):
cmake_options.append('-D{0}={1}'.format(
varname, 'on' if value else 'off'))
def append_cmake_component(flag, varname):
# only pass this to cmake if the user pass the --with-component
# flag to setup.py build_ext
if flag is not None:
flag_name = (
"--with-"
+ varname.removeprefix("PYARROW_").lower().replace("_", "-"))
warnings.warn(
MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name),
UserWarning, stacklevel=2
)
append_cmake_bool(flag, varname)
if self.cmake_generator:
cmake_options += ['-G', self.cmake_generator]
append_cmake_component(self.with_cuda, 'PYARROW_CUDA')
append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT')
append_cmake_component(self.with_flight, 'PYARROW_FLIGHT')
append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA')
append_cmake_component(self.with_acero, 'PYARROW_ACERO')
append_cmake_component(self.with_dataset, 'PYARROW_DATASET')
append_cmake_component(self.with_orc, 'PYARROW_ORC')
append_cmake_component(self.with_parquet, 'PYARROW_PARQUET')
append_cmake_component(self.with_parquet_encryption,
'PYARROW_PARQUET_ENCRYPTION')
append_cmake_component(self.with_azure, 'PYARROW_AZURE')
append_cmake_component(self.with_gcs, 'PYARROW_GCS')
append_cmake_component(self.with_s3, 'PYARROW_S3')
append_cmake_component(self.with_hdfs, 'PYARROW_HDFS')
append_cmake_bool(self.bundle_arrow_cpp,
'PYARROW_BUNDLE_ARROW_CPP')
append_cmake_bool(self.bundle_cython_cpp,
'PYARROW_BUNDLE_CYTHON_CPP')
append_cmake_bool(self.generate_coverage,
'PYARROW_GENERATE_COVERAGE')
cmake_options.append(
f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}')
extra_cmake_args = shlex.split(self.extra_cmake_args)
build_tool_args = []
if sys.platform == 'win32':
if not is_64_bit:
raise RuntimeError('Not supported on 32-bit Windows')
else:
build_tool_args.append('--')
if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1':
cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON')
parallel = os.environ.get('PYARROW_PARALLEL')
if parallel:
build_tool_args.append(f'-j{parallel}')
# Generate the build files
if is_emscripten:
print("-- Running emcmake cmake for PyArrow on Emscripten")
self.spawn(['emcmake', 'cmake'] + extra_cmake_args +
cmake_options + [source])
else:
print("-- Running cmake for PyArrow")
self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source])
print("-- Finished cmake for PyArrow")
print("-- Running cmake --build for PyArrow")
self.spawn(['cmake', '--build', '.', '--config', self.build_type] +
build_tool_args)
print("-- Finished cmake --build for PyArrow")
print("-- Running cmake --build --target install for PyArrow")
self.spawn(['cmake', '--build', '.', '--config', self.build_type] +
['--target', 'install'] + build_tool_args)
print("-- Finished cmake --build --target install for PyArrow")
self._found_names = []
for name in self.CYTHON_MODULE_NAMES:
built_path = pjoin(install_prefix, name + ext_suffix)
if os.path.exists(built_path):
self._found_names.append(name)
def _get_build_dir(self):
# Get the package directory from build_py
build_py = self.get_finalized_command('build_py')
return build_py.get_package_dir('pyarrow')
def _get_cmake_ext_path(self, name):
# This is the name of the arrow C-extension
filename = name + ext_suffix
return pjoin(self._get_build_dir(), filename)
def get_ext_generated_cpp_source(self, name):
if sys.platform == 'win32':
head, tail = os.path.split(name)
return pjoin(head, tail + ".cpp")
else:
return pjoin(name + ".cpp")
def get_ext_built_api_header(self, name):
if sys.platform == 'win32':
head, tail = os.path.split(name)
return pjoin(head, tail + "_api.h")
else:
return pjoin(name + "_api.h")
def get_names(self):
return self._found_names
def get_outputs(self):
# Just the C extensions
# regular_exts = _build_ext.get_outputs(self)
return [self._get_cmake_ext_path(name)
for name in self.get_names()]
class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True
setup(
distclass=BinaryDistribution,
# Dummy extension to trigger build_ext
ext_modules=[Extension('__dummy__', sources=[])],
cmdclass={
'build_ext': build_ext
},
)