concourse/scripts/pxf_common.bash (648 lines of code) (raw):
#!/bin/bash
GPHOME=${GPHOME:=/usr/local/greenplum-db-devel}
PXF_HOME=${PXF_HOME:=${GPHOME}/pxf}
MDD_VALUE=/data/gpdata/master/gpseg-1
PXF_COMMON_SRC_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
PXF_VERSION=${PXF_VERSION:=6}
PROXY_USER=${PROXY_USER:-pxfuser}
PROTOCOL=${PROTOCOL:-}
GOOGLE_PROJECT_ID=${GOOGLE_PROJECT_ID:-data-gpdb-ud}
PXF_SRC=$(find /tmp/build -name pxf_src -type d)
# on purpose do not call this PXF_CONF|PXF_BASE so that it is not set during pxf operations
if [[ ${PXF_VERSION} == 5 ]]; then
BASE_DIR=~gpadmin/pxf
SHARE_DIR="${PXF_HOME}/lib"
TEMPLATES_DIR=${BASE_DIR}
else
BASE_DIR=${PXF_BASE_DIR:-$PXF_HOME}
SHARE_DIR="${PXF_HOME}/share"
TEMPLATES_DIR=${PXF_HOME}
fi
if [[ -f ~/.pxfrc ]]; then
source <(grep JAVA_HOME ~/.pxfrc)
echo "JAVA_HOME found in ${HOME}/.pxfrc, set to ${JAVA_HOME}..."
else
JAVA_HOME=$(find /usr/lib/jvm -name 'java-1.8.0-openjdk*' | head -1)
fi
# java home for hadoop services
HADOOP_JAVA_HOME=${HADOOP_JAVA_HOME:-$JAVA_HOME}
if [[ -d gpdb_src/gpAux/extensions/pxf ]]; then
PXF_EXTENSIONS_DIR=gpdb_src/gpAux/extensions/pxf
else
PXF_EXTENSIONS_DIR=gpdb_src/gpcontrib/pxf
fi
function inflate_dependencies() {
local tarballs=() files_to_link=()
if [[ -f pxf-build-dependencies/pxf-build-dependencies.tar.gz ]]; then
tarballs+=(pxf-build-dependencies/pxf-build-dependencies.tar.gz)
files_to_link+=(~gpadmin/.{go-mod-cached-sources,gradle})
fi
if [[ -f pxf-automation-dependencies/pxf-automation-dependencies.tar.gz ]]; then
tarballs+=(pxf-automation-dependencies/pxf-automation-dependencies.tar.gz)
files_to_link+=(~gpadmin/.m2)
fi
if [[ -f regression-tools/regression-tools.tar.gz ]]; then
tarballs+=(regression-tools/regression-tools.tar.gz)
fi
# when running automation against GP7, we need python2 dependencies shipped with gp6 to make Tinc work
# if required, these libraries will be fetched by a CI pipeline under gp6-python-libs directory
if [[ -f gp6-python-libs/gp6-python-libs.tar.gz ]]; then
tarballs+=(gp6-python-libs/gp6-python-libs.tar.gz)
fi
(( ${#tarballs[@]} == 0 )) && return
for t in "${tarballs[@]}"; do
tar -xzf "${t}" -C ~gpadmin
done
ln -s "${files_to_link[@]}" ~root
chown -R gpadmin:gpadmin ~gpadmin
}
function inflate_singlecluster() {
local singlecluster=$(find singlecluster -name 'singlecluster*.tar.gz')
if [[ ! -f ${singlecluster} ]]; then
echo "Didn't find ${PWD}/singlecluster directory... skipping tarball inflation..."
return
fi
tar zxf "${singlecluster}" -C /
mv /singlecluster-* /singlecluster
chmod a+w /singlecluster
mkdir -p /etc/hadoop/conf /etc/hive/conf /etc/hbase/conf
ln -s /singlecluster/hadoop/etc/hadoop/*-site.xml /etc/hadoop/conf
ln -s /singlecluster/hive/conf/hive-site.xml /etc/hive/conf
ln -s /singlecluster/hbase/conf/hbase-site.xml /etc/hbase/conf
}
function set_env() {
export TERM=xterm-256color
export TIMEFORMAT=$'\e[4;33mIt took %R seconds to complete this step\e[0m';
}
function run_regression_test() {
ln -s "${PWD}/gpdb_src" ~gpadmin/gpdb_src
cat > ~gpadmin/run_regression_test.sh <<-EOF
#!/bin/bash
source /opt/gcc_env.sh || true
source ${GPHOME}/greenplum_path.sh
source gpdb_src/gpAux/gpdemo/gpdemo-env.sh
export PATH=\$PATH:${GPHD_ROOT}/bin
cd "${PXF_EXTENSIONS_DIR}"
make installcheck USE_PGXS=1
[[ -s regression.diffs ]] && cat regression.diffs && exit 1
exit 0
EOF
chown -R gpadmin:gpadmin "${PXF_EXTENSIONS_DIR}"
chown gpadmin:gpadmin ~gpadmin/run_regression_test.sh
chmod a+x ~gpadmin/run_regression_test.sh
su gpadmin -c ~gpadmin/run_regression_test.sh
}
function build_install_gpdb() {
bash -c "
source /opt/gcc_env.sh || true
cd '${PWD}/gpdb_src' || return 1
CC=\$(command -v gcc) CXX=\$(command -v g++) ./configure \
--with-{perl,python,libxml,zstd} \
--disable-{gpfdist,orca} \
'--prefix=${GPHOME}'
make -j4 -s
make -s install
"
}
function install_gpdb_binary() {
if [[ -d bin_gpdb ]]; then
mkdir -p ${GPHOME}
tar -xzf bin_gpdb/*.tar.gz -C ${GPHOME}
else
build_install_gpdb
fi
local gphome python_dir python_version=2.7 export_pythonpath='export PYTHONPATH=$PYTHONPATH'
# CentOS releases contain a /etc/redhat-release which is symlinked to /etc/centos-release
if [[ -f /etc/redhat-release ]]; then
# We can't use service sshd restart as service is not installed on CentOS 7 or RHEL 8.
/usr/sbin/sshd &
python_dir=python${python_version}/site-packages
export_pythonpath+=:/usr/lib/${python_dir}:/usr/lib64/$python_dir
elif [[ -f /etc/debian_version ]]; then
service ssh start
python_dir=python${python_version}/dist-packages
export_pythonpath+=:/usr/local/lib/$python_dir
fi
echo "$export_pythonpath" >> "${PXF_SRC}/automation/tinc/main/tinc_env.sh"
}
function install_gpdb_package() {
local gphome python_dir python_version=2.7 export_pythonpath='export PYTHONPATH=$PYTHONPATH' pkg_file version
gpdb_package=${PWD}/${GPDB_PKG_DIR:-gpdb_package}
if command -v rpm; then
# install GPDB RPM
pkg_file=$(find "${gpdb_package}" -name 'greenplum-db-*x86_64.rpm')
if [[ -z ${pkg_file} ]]; then
echo "Couldn't find RPM file in ${gpdb_package}. Skipping install..."
return 1
fi
echo "Installing ${pkg_file}..."
rpm --quiet -ivh "${pkg_file}" >/dev/null
# We can't use service sshd restart as service is not installed on CentOS 7 or RHEL 8.
/usr/sbin/sshd &
python_dir=python${python_version}/site-packages
export_pythonpath+=:/usr/lib/${python_dir}:/usr/lib64/${python_dir}
elif command -v apt-get; then
# install GPDB DEB, apt-get wants an absolute path
pkg_file=$(find "${gpdb_package}" -name 'greenplum-db-*-ubuntu18.04-amd64.deb')
if [[ -z ${pkg_file} ]]; then
echo "Couldn't find DEB file in ${gpdb_package}. Skipping install..."
return 1
fi
echo "Installing ${pkg_file}..."
apt-get install -qq "${pkg_file}" >/dev/null
service ssh start
python_dir=python${python_version}/dist-packages
export_pythonpath+=:/usr/local/lib/$python_dir
else
echo "Unsupported operating system '$(source /etc/os-release && echo "${PRETTY_NAME}")'. Exiting..."
exit 1
fi
echo "$export_pythonpath" >> "${PXF_SRC}/automation/tinc/main/tinc_env.sh"
# create symlink to allow pgregress to run (hardcoded to look for /usr/local/greenplum-db-devel/psql)
rm -rf /usr/local/greenplum-db-devel
# get version from the package file name
: "${pkg_file#*greenplum-db-}"
version=${_%%-*}
gphome_dir=$(find /usr/local/ -name "greenplum-db-${version}*" -type d)
ln -sf "${gphome_dir}" /usr/local/greenplum-db-devel
# change permissions to gpadmin
chown -R gpadmin:gpadmin /usr/local/greenplum-db*
}
function remote_access_to_gpdb() {
# Copy cluster keys to root user
passwd -u root
cp -Rf cluster_env_files/.ssh/* /root/.ssh
cp -f cluster_env_files/private_key.pem /root/.ssh/id_rsa
cp -f cluster_env_files/public_key.pem /root/.ssh/id_rsa.pub
cp -f cluster_env_files/public_key.openssh /root/.ssh/authorized_keys
sed 's/edw0/hadoop/' cluster_env_files/etc_hostfile >> /etc/hosts
# Copy cluster keys to gpadmin user
rm -rf /home/gpadmin/.ssh/*
cp cluster_env_files/.ssh/* /home/gpadmin/.ssh
cp cluster_env_files/.ssh/*.pem /home/gpadmin/.ssh/id_rsa
cp cluster_env_files/public_key.openssh /home/gpadmin/.ssh/authorized_keys
{ ssh-keyscan localhost; ssh-keyscan 0.0.0.0; } >> /home/gpadmin/.ssh/known_hosts
ssh "${SSH_OPTS[@]}" gpadmin@mdw "
source ${GPHOME}/greenplum_path.sh &&
export MASTER_DATA_DIRECTORY=${MDD_VALUE} &&
echo 'host all all 10.0.0.0/16 trust' >> ${MDD_VALUE}/pg_hba.conf &&
psql -d template1 <<-EOF && gpstop -u
CREATE EXTENSION pxf;
CREATE DATABASE gpadmin;
CREATE ROLE root LOGIN;
EOF
"
}
function create_gpdb_cluster() {
su gpadmin -c "source ${GPHOME}/greenplum_path.sh && make -C gpdb_src/gpAux/gpdemo create-demo-cluster"
}
function add_remote_user_access_for_gpdb() {
local username=${1}
# load local cluster configuration
echo "Adding access entry for ${username} to pg_hba.conf and restarting GPDB for change to take effect"
su gpadmin -c "
if [[ -f gpdb_src/gpAux/gpdemo/gpdemo-env.sh ]]; then
source gpdb_src/gpAux/gpdemo/gpdemo-env.sh
else
export MASTER_DATA_DIRECTORY=~gpadmin/data/master/gpseg-1
fi
echo 'local all ${username} trust' >> \${MASTER_DATA_DIRECTORY}/pg_hba.conf
source ${GPHOME}/greenplum_path.sh
gpstop -u
"
}
function setup_gpadmin_user() {
# Don't create gpadmin user if already exists
if ! id -u gpadmin; then
groupadd -g 1000 gpadmin && useradd -u 1000 -g 1000 -M gpadmin
echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin
groupadd supergroup && usermod -a -G supergroup gpadmin
mkdir -p ~gpadmin/.ssh
ssh-keygen -t rsa -N "" -f ~gpadmin/.ssh/id_rsa
cat /home/gpadmin/.ssh/id_rsa.pub >> ~gpadmin/.ssh/authorized_keys
chmod 0600 /home/gpadmin/.ssh/authorized_keys
{ ssh-keyscan localhost; ssh-keyscan 0.0.0.0; } >> ~gpadmin/.ssh/known_hosts
chown -R gpadmin:gpadmin ${GPHOME} ~gpadmin/.ssh # don't chown cached dirs ~/.m2, etc.
echo -e "password\npassword" | passwd gpadmin 2> /dev/null
fi
cat <<-EOF >> /etc/security/limits.d/gpadmin-limits.conf
gpadmin soft core unlimited
gpadmin soft nproc 131072
gpadmin soft nofile 65536
EOF
echo "export JAVA_HOME=${JAVA_HOME}" >> ~gpadmin/.bashrc
if [[ -d gpdb_src/gpAux/gpdemo ]]; then
chown -R gpadmin:gpadmin gpdb_src/gpAux/gpdemo
fi
if grep -i ubuntu /etc/os-release; then
echo '[[ -f ~/.bashrc ]] && . ~/.bashrc' >> ~gpadmin/.bash_profile
chown gpadmin:gpadmin ~gpadmin/.bash_profile
fi
}
function install_pxf_client() {
[[ ${TEST_ENV} == dev ]] || return 0
# recompile pxf.so file for dev environments only
bash -c "
source '${GPHOME}/greenplum_path.sh'
source /opt/gcc_env.sh || true
USE_PGXS=1 make -C '${PXF_EXTENSIONS_DIR}' install
"
}
function install_pxf_server() {
if [[ ! -d ${PXF_HOME} ]]; then
if [[ -d pxf_tarball ]]; then
tar -xzf pxf_tarball/pxf.tar.gz -C ${GPHOME}
else
# requires login shell so that Go's dep is on PATH
bash --login -c "
export JAVA_TOOL_OPTIONS=-Dfile.encoding=UTF8
make -C '${PWD}/pxf_src' install
"
fi
fi
chown -R gpadmin:gpadmin "${PXF_HOME}"
}
function install_pxf_tarball() {
local tarball_dir=${PXF_PKG_DIR:-pxf_tarball}
tar -xzf "${tarball_dir}/"pxf-*.tar.gz -C /tmp
/tmp/pxf*/install_component
chown -R gpadmin:gpadmin "${PXF_HOME}"
}
function install_pxf_package() {
if command -v rpm; then
# install PXF RPM
pkg_file=$(find pxf_package -name 'pxf-gp*.x86_64.rpm')
if [[ -z ${pkg_file} ]]; then
echo "Couldn't find PXF RPM file in pxf_package. Skipping install..."
return 1
fi
echo "Installing ${pkg_file}..."
rpm --quiet -ivh "${pkg_file}" >/dev/null
elif command -v dpkg; then
# install PXF DEB
pkg_file=$(find pxf_package -name 'pxf-gp*amd64.deb')
if [[ -z ${pkg_file} ]]; then
echo "Couldn't find PXF DEB file in pxf_package. Skipping install..."
return 1
fi
echo "Installing ${pkg_file}..."
dpkg --install "${pkg_file}" >/dev/null
fi
chown -R gpadmin:gpadmin "${PXF_HOME}"
}
function setup_impersonation() {
local GPHD_ROOT=${1}
# enable impersonation by gpadmin user
if [[ ${IMPERSONATION} == true ]]; then
echo 'Impersonation is enabled, adding support for gpadmin proxy user'
cat > proxy-config.xml <<-EOF
<property>
<name>hadoop.proxyuser.${PROXY_USER}.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.${PROXY_USER}.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property>
<property>
<name>hbase.security.authorization</name>
<value>true</value>
</property>
<property>
<name>hbase.rpc.protection</name>
<value>authentication</value>
</property>
<property>
<name>hbase.coprocessor.master.classes</name>
<value>org.apache.hadoop.hbase.security.access.AccessController</value>
</property>
<property>
<name>hbase.coprocessor.region.classes</name>
<value>org.apache.hadoop.hbase.security.access.AccessController,org.apache.hadoop.hbase.security.access.SecureBulkLoadEndpoint</value>
</property>
<property>
<name>hbase.coprocessor.regionserver.classes</name>
<value>org.apache.hadoop.hbase.security.access.AccessController</value>
</property>
EOF
sed -i -e '/<configuration>/r proxy-config.xml' "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml" "${GPHD_ROOT}/hbase/conf/hbase-site.xml"
rm proxy-config.xml
elif [[ ${IMPERSONATION} == false ]]; then
echo 'Impersonation is disabled, no proxy user setup performed.'
else
echo "ERROR: Invalid or missing CI property value: IMPERSONATION=${IMPERSONATION}"
exit 1
fi
if ! find "${GPHD_ROOT}/hbase/lib" -name 'pxf-hbase-*.jar' | grep pxf-hbase; then
cp "${SHARE_DIR}"/pxf-hbase-*.jar "${GPHD_ROOT}/hbase/lib"
fi
}
function adjust_for_hadoop3() {
local GPHD_ROOT=${1}
# remove deprecated conf from hive-env.sh
sed -i -e 's|-hiveconf hive.log.dir=$LOGS_ROOT ||g' "${GPHD_ROOT}/hive/conf/hive-env.sh"
# add properties to hive-site.xml
cat > patch.xml <<-EOF
<property>
<name>hive.tez.container.size</name>
<value>2048</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>True</value>
</property>
<property>
<name>metastore.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
EOF
sed -i -e '/<configuration>/r patch.xml' -e 's|>mr|>tez|g' "${GPHD_ROOT}/hive/conf/hive-site.xml"
# add properties to tez-site.xml
cat > patch.xml <<-EOF
<property>
<name>tez.use.cluster.hadoop-libs</name>
<value>true</value>
</property>
EOF
sed -i -e '/<configuration supports_final="true">/r patch.xml' "${GPHD_ROOT}/tez/conf/tez-site.xml"
rm patch.xml
# update properties in yarn-site.xml
sed -i -e 's|HADOOP_CONF|HADOOP_CONF_DIR|g' \
-e 's|HADOOP_ROOT|HADOOP_HOME|g' "${GPHD_ROOT}/hadoop/etc/hadoop/yarn-site.xml"
}
function start_hadoop_services() {
local GPHD_ROOT=${1}
# Start all hadoop services
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/init-gphd.sh"
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/start-hdfs.sh"
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/start-zookeeper.sh"
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/start-yarn.sh" &
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/start-hbase.sh" &
init_hive_metastore "${GPHD_ROOT}"
JAVA_HOME=${HADOOP_JAVA_HOME} "${GPHD_ROOT}/bin/start-hive.sh" &
wait
export PATH=$PATH:${GPHD_ROOT}/bin
# list running Hadoop daemons
JAVA_HOME=${HADOOP_JAVA_HOME} jps
# grant gpadmin user admin privilege for feature tests to be able to run on secured cluster
if [[ ${IMPERSONATION} == true ]]; then
echo 'Granting gpadmin user admin privileges for HBase'
echo "grant 'gpadmin', 'RWXCA'" | hbase shell
fi
}
# explicitly init the hive metastore to ensure necessary system tables have been created
function init_hive_metastore() {
local GPHD_ROOT=${1}
mkdir -p "${GPHD_ROOT}/storage/hive"
pushd "${GPHD_ROOT}/storage/hive"
JAVA_HOME=${HADOOP_JAVA_HOME} ${GPHD_ROOT}/hive/bin/schematool -dbType derby -initSchema
popd
}
function init_pxf() {
echo 'Ensure pxf version can be run before pxf init'
su gpadmin -c "${PXF_HOME}/bin/pxf version | grep -E '^PXF version [0-9]+.[0-9]+.[0-9]+'" || exit 1
echo 'Initializing PXF service'
# requires a login shell to source startup scripts (JAVA_HOME)
su --login gpadmin -c "PXF_CONF=${BASE_DIR} ${PXF_HOME}/bin/pxf init"
}
function configure_pxf_server() {
echo 'Ensure pxf version can be run before configuring pxf'
su gpadmin -c "${PXF_HOME}/bin/pxf version | grep -E '^PXF version [0-9]+.[0-9]+.[0-9]+'" || exit 1
echo 'Register PXF extension in Greenplum'
# requires a login shell to source startup scripts (JAVA_HOME)
su --login gpadmin -c "${PXF_HOME}/bin/pxf register"
# prepare pxf if BASE_DIR is different from PXF_HOME
if [[ "$BASE_DIR" != "$PXF_HOME" ]]; then
echo "Prepare PXF in $BASE_DIR"
su --login gpadmin -c "PXF_BASE=${BASE_DIR} pxf prepare"
export PXF_BASE=${BASE_DIR}
echo "export PXF_BASE=${BASE_DIR}" >> ~/.pxfrc
echo "export PXF_BASE=${BASE_DIR}" >> ~gpadmin/.bashrc
fi
# update impersonation value based on CI parameter
if [[ ! ${IMPERSONATION} == true ]]; then
echo 'Impersonation is disabled, updating pxf-site.xml property'
if [[ ! -f ${BASE_DIR}/servers/default/pxf-site.xml ]]; then
cp ${PXF_HOME}/templates/pxf-site.xml ${BASE_DIR}/servers/default/pxf-site.xml
fi
sed -i -e "s|<value>true</value>|<value>false</value>|g" ${BASE_DIR}/servers/default/pxf-site.xml
elif [[ -z ${PROTOCOL} ]]; then
# Copy pxf-site.xml to the server configuration and update the
# pxf.service.user.name property value to use the PROXY_USER
# Only copy this file when testing against non-cloud
if [[ ! -f ${BASE_DIR}/servers/default/pxf-site.xml ]]; then
cp ${TEMPLATES_DIR}/templates/pxf-site.xml ${BASE_DIR}/servers/default/pxf-site.xml
sed -i -e "s|</configuration>|<property><name>pxf.service.user.name</name><value>${PROXY_USER}</value></property></configuration>|g" ${BASE_DIR}/servers/default/pxf-site.xml
fi
fi
# update runtime JDK value based on CI parameter
RUN_JDK_VERSION=${RUN_JDK_VERSION:-8}
if [[ $RUN_JDK_VERSION == 11 ]]; then
echo 'JDK 11 requested for runtime, setting PXF JAVA_HOME=/usr/lib/jvm/jdk-11 in pxf-env.sh'
su gpadmin -c "echo 'export JAVA_HOME=/usr/lib/jvm/jdk-11' >> ${BASE_DIR}/conf/pxf-env.sh"
fi
}
function configure_hdfs_client_for_s3() {
S3_CORE_SITE_XML=$(mktemp)
cat <<-EOF > "${S3_CORE_SITE_XML}"
<property>
<name>fs.s3a.access.key</name>
<value>${ACCESS_KEY_ID}</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>${SECRET_ACCESS_KEY}</value>
</property>
EOF
sed -i -e "/<configuration>/r ${S3_CORE_SITE_XML}" "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml"
}
function configure_hdfs_client_for_minio() {
MINIO_CORE_SITE_XML=$(mktemp)
cat <<-EOF > "${MINIO_CORE_SITE_XML}"
<property>
<name>fs.s3a.endpoint</name>
<value>http://localhost:9000</value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>${ACCESS_KEY_ID}</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>${SECRET_ACCESS_KEY}</value>
</property>
EOF
sed -i -e "/<configuration>/r ${MINIO_CORE_SITE_XML}" "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml"
}
function configure_hdfs_client_for_gs() {
cp "${PXF_HOME}/lib/shared/"gcs-connector-hadoop2-*-shaded.jar \
"${GPHD_ROOT}/hadoop/share/hadoop/hdfs/lib"
GS_CORE_SITE_XML=$(mktemp)
cat <<-EOF > "${GS_CORE_SITE_XML}"
<property>
<name>fs.AbstractFileSystem.gs.impl</name>
<value>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS</value>
<description>The AbstractFileSystem for gs: uris.</description>
</property>
<property>
<name>google.cloud.auth.service.account.enable</name>
<value>true</value>
<description>
Whether to use a service account for GCS authorization.
Setting this property to \`false\` will disable use of service accounts for
authentication.
</description>
</property>
<property>
<name>google.cloud.auth.service.account.json.keyfile</name>
<value>${GOOGLE_KEYFILE}</value>
<description>
The JSON key file of the service account used for GCS
access when google.cloud.auth.service.account.enable is true.
</description>
</property>
EOF
sed -i -e "/<configuration>/r ${GS_CORE_SITE_XML}" "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml"
}
function configure_hdfs_client_for_adl() {
cp "${PXF_HOME}/lib/shared/"azure-data-lake-store-sdk-*.jar \
"${PXF_HOME}/lib/shared/"hadoop-azure-*.jar \
"${PXF_HOME}/lib/shared/"hadoop-azure-datalake-*.jar \
"${PXF_HOME}/lib/shared/"hadoop-common-*.jar \
"${PXF_HOME}/lib/shared/"htrace-core4-*-incubating.jar \
"${PXF_HOME}/lib/shared/"stax2-api-*.jar \
"${PXF_HOME}/lib/shared/"woodstox-core-*.jar "${GPHD_ROOT}/hadoop/share/hadoop/common/lib"
ADL_CORE_SITE_XML=$(mktemp)
cat <<-EOF > "${ADL_CORE_SITE_XML}"
<property>
<name>fs.adl.oauth2.access.token.provider.type</name>
<value>ClientCredential</value>
</property>
<property>
<name>fs.adl.oauth2.refresh.url</name>
<value>${ADL_OAUTH2_REFRESH_URL}</value>
</property>
<property>
<name>fs.adl.oauth2.client.id</name>
<value>${ADL_OAUTH2_CLIENT_ID}</value>
</property>
<property>
<name>fs.adl.oauth2.credential</name>
<value>${ADL_OAUTH2_CREDENTIAL}</value>
</property>
EOF
sed -i -e "/<configuration>/r ${ADL_CORE_SITE_XML}" "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml"
}
function configure_hdfs_client_for_wasbs() {
WASBS_CORE_SITE_XML=$(mktemp)
cat <<-EOF > "${WASBS_CORE_SITE_XML}"
<property>
<name>fs.azure.account.key.${WASBS_ACCOUNT_NAME}.blob.core.windows.net</name>
<value>${WASBS_ACCOUNT_KEY}</value>
</property>
EOF
sed -i -e "/<configuration>/r ${WASBS_CORE_SITE_XML}" "${GPHD_ROOT}/hadoop/etc/hadoop/core-site.xml"
}
function configure_pxf_gs_server() {
mkdir -p ${BASE_DIR}/servers/gs
GOOGLE_KEYFILE=$(mktemp)
echo "${GOOGLE_CREDENTIALS}" > "${GOOGLE_KEYFILE}"
chown gpadmin "${GOOGLE_KEYFILE}"
sed -e "s|YOUR_GOOGLE_STORAGE_KEYFILE|${GOOGLE_KEYFILE}|" \
${TEMPLATES_DIR}/templates/gs-site.xml >"${BASE_DIR}/servers/gs/gs-site.xml"
}
function configure_pxf_s3_server() {
mkdir -p ${BASE_DIR}/servers/s3
sed -e "s|YOUR_AWS_ACCESS_KEY_ID|${ACCESS_KEY_ID}|" \
-e "s|YOUR_AWS_SECRET_ACCESS_KEY|${SECRET_ACCESS_KEY}|" \
${TEMPLATES_DIR}/templates/s3-site.xml >${BASE_DIR}/servers/s3/s3-site.xml
mkdir -p ${BASE_DIR}/servers/s3-invalid
cp ${TEMPLATES_DIR}/templates/s3-site.xml ${BASE_DIR}/servers/s3-invalid/s3-site.xml
chown -R gpadmin:gpadmin "${BASE_DIR}/servers/s3" "${BASE_DIR}/servers/s3-invalid"
}
function configure_pxf_minio_server() {
mkdir -p ${BASE_DIR}/servers/minio
sed -e "s|YOUR_AWS_ACCESS_KEY_ID|${ACCESS_KEY_ID}|" \
-e "s|YOUR_AWS_SECRET_ACCESS_KEY|${SECRET_ACCESS_KEY}|" \
-e "s|YOUR_MINIO_URL|http://localhost:9000|" \
${TEMPLATES_DIR}/templates/minio-site.xml >${BASE_DIR}/servers/minio/minio-site.xml
}
function configure_pxf_adl_server() {
mkdir -p "${BASE_DIR}/servers/adl"
sed -e "s|YOUR_ADL_REFRESH_URL|${ADL_OAUTH2_REFRESH_URL}|g" \
-e "s|YOUR_ADL_CLIENT_ID|${ADL_OAUTH2_CLIENT_ID}|g" \
-e "s|YOUR_ADL_CREDENTIAL|${ADL_OAUTH2_CREDENTIAL}|g" \
"${TEMPLATES_DIR}/templates/adl-site.xml" >"${BASE_DIR}/servers/adl/adl-site.xml"
}
function configure_pxf_wasbs_server() {
mkdir -p ${BASE_DIR}/servers/wasbs
sed -e "s|YOUR_AZURE_BLOB_STORAGE_ACCOUNT_NAME|${WASBS_ACCOUNT_NAME}|g" \
-e "s|YOUR_AZURE_BLOB_STORAGE_ACCOUNT_KEY|${WASBS_ACCOUNT_KEY}|g" \
"${TEMPLATES_DIR}/templates/wasbs-site.xml" >"${BASE_DIR}/servers/wasbs/wasbs-site.xml"
}
function configure_pxf_default_server() {
AMBARI_DIR=$(find /tmp/build/ -name ambari_env_files)
if [[ -n $AMBARI_DIR ]]; then
AMBARI_KEYTAB_FILE=$(find "$AMBARI_DIR" -name "*.keytab")
cp "${AMBARI_DIR}"/conf/*-site.xml "${BASE_DIR}/servers/default"
if [[ -n $AMBARI_KEYTAB_FILE ]]; then
REALM=$(cat "$AMBARI_DIR"/REALM)
HADOOP_USER=$(cat "$AMBARI_DIR"/HADOOP_USER)
cp ${TEMPLATES_DIR}/templates/mapred-site.xml ${BASE_DIR}/servers/default/mapred1-site.xml
cp ${TEMPLATES_DIR}/templates/pxf-site.xml ${BASE_DIR}/servers/default/pxf-site.xml
sed -i -e "s|gpadmin/_HOST@EXAMPLE.COM|${HADOOP_USER}@${REALM}|g" ${BASE_DIR}/servers/default/pxf-site.xml
if [[ ${PXF_VERSION} == 5 ]]; then
sed -i -e "s|\${pxf.conf}/keytabs/pxf.service.keytab|$AMBARI_KEYTAB_FILE|g" ${BASE_DIR}/servers/default/pxf-site.xml
else
sed -i -e "s|\${pxf.base}/keytabs/pxf.service.keytab|$AMBARI_KEYTAB_FILE|g" ${BASE_DIR}/servers/default/pxf-site.xml
fi
sed -i -e "s|\${user.name}||g" ${BASE_DIR}/servers/default/pxf-site.xml
sudo mkdir -p /etc/security/keytabs/
sudo cp "$AMBARI_KEYTAB_FILE" /etc/security/keytabs/"${HADOOP_USER}".headless.keytab
sudo chown gpadmin:gpadmin /etc/security/keytabs/"${HADOOP_USER}".headless.keytab
mkdir -p ${BASE_DIR}/servers/db-hive/
cp ${BASE_DIR}/servers/default/pxf-site.xml ${BASE_DIR}/servers/db-hive/
cp ${TEMPLATES_DIR}/templates/jdbc-site.xml ${BASE_DIR}/servers/db-hive/
REALM=$(cat "$AMBARI_DIR"/REALM)
HIVE_HOSTNAME=$(grep < "$AMBARI_DIR"/etc_hostfile ambari-2 | awk '{print $2}')
KERBERIZED_HADOOP_URI="hive/${HIVE_HOSTNAME}.c.${GOOGLE_PROJECT_ID}.internal@${REALM};saslQop=auth" # quoted because of semicolon
sed -i -e 's|YOUR_DATABASE_JDBC_DRIVER_CLASS_NAME|org.apache.hive.jdbc.HiveDriver|' \
-e "s|YOUR_DATABASE_JDBC_URL|jdbc:hive2://${HIVE_HOSTNAME}:10000/default;principal=${KERBERIZED_HADOOP_URI}|" \
-e 's|YOUR_DATABASE_JDBC_USER||' \
-e 's|YOUR_DATABASE_JDBC_PASSWORD||' \
-e 's|</configuration>|<property><name>hadoop.security.authentication</name><value>kerberos</value></property></configuration>|g' \
${BASE_DIR}/servers/db-hive/jdbc-site.xml
cp "${PXF_SRC}"/automation/src/test/resources/hive-report.sql ${BASE_DIR}/servers/db-hive/
fi
else
# copy hadoop config files to BASE_DIR/servers/default
if [[ -d /etc/hadoop/conf/ ]]; then
cp /etc/hadoop/conf/*-site.xml "${BASE_DIR}/servers/default"
fi
if [[ -d /etc/hive/conf/ ]]; then
cp /etc/hive/conf/*-site.xml "${BASE_DIR}/servers/default"
fi
if [[ -d /etc/hbase/conf/ ]]; then
cp /etc/hbase/conf/*-site.xml "${BASE_DIR}/servers/default"
fi
fi
if [[ ${IMPERSONATION} == true ]]; then
cp -r ${BASE_DIR}/servers/default ${BASE_DIR}/servers/default-no-impersonation
if [[ ! -f ${BASE_DIR}/servers/default-no-impersonation/pxf-site.xml ]]; then
cp ${TEMPLATES_DIR}/templates/pxf-site.xml ${BASE_DIR}/servers/default-no-impersonation/pxf-site.xml
fi
sed -i \
-e "/<name>pxf.service.user.impersonation<\/name>/ {n;s|<value>.*</value>|<value>false</value>|g;}" \
-e "s|</configuration>|<property><name>pxf.service.user.name</name><value>foobar</value></property></configuration>|g" \
${BASE_DIR}/servers/default-no-impersonation/pxf-site.xml
fi
chown -R gpadmin:gpadmin "${BASE_DIR}/servers"
}
function start_pxf_server() {
# Check if some other process is listening on 5888
netstat -tlpna | grep 5888 || true
echo 'Starting PXF service'
su --login gpadmin -c "${PXF_HOME}/bin/pxf start"
# grep with regex to avoid catching grep process itself
if [[ ${PXF_VERSION} == 5 ]]; then
ps -aef | grep '[t]omcat'
else
ps -aef | grep '[p]xf-app'
fi
}
function setup_s3_for_pg_regress() {
configure_pxf_s3_server
configure_hdfs_client_for_s3
HCFS_BUCKET=gpdb-ud-scratch
}
function setup_gs_for_pg_regress() {
configure_pxf_gs_server
configure_hdfs_client_for_gs
HCFS_BUCKET=data-gpdb-ud-tpch
}
function setup_adl_for_pg_regress() {
configure_pxf_adl_server
configure_hdfs_client_for_adl
HCFS_BUCKET=${ADL_ACCOUNT}.azuredatalakestore.net
}
function setup_wasbs_for_pg_regress() {
configure_pxf_wasbs_server
configure_hdfs_client_for_wasbs
HCFS_BUCKET=pxf-container@${WASBS_ACCOUNT_NAME}.blob.core.windows.net
}
function setup_minio_for_pg_regress() {
configure_pxf_minio_server
configure_hdfs_client_for_minio
# this is set in setup_minio()
HCFS_BUCKET=gpdb-ud-scratch
}
function setup_minio() {
echo 'Adding test bucket gpdb-ud-scratch to Minio ...'
mkdir -p /opt/minio/data/gpdb-ud-scratch
export MINIO_ACCESS_KEY=admin MINIO_SECRET_KEY=password
echo "Minio credentials: accessKey=${MINIO_ACCESS_KEY} secretKey=${MINIO_SECRET_KEY}"
echo 'Starting Minio ...'
MINIO_DOMAIN=localhost /opt/minio/bin/minio server /opt/minio/data &
# export minio credentials as access environment variables
export ACCESS_KEY_ID=${MINIO_ACCESS_KEY} SECRET_ACCESS_KEY=${MINIO_SECRET_KEY}
}