concourse/scripts/install_pxf.bash (259 lines of code) (raw):
#!/bin/bash
set -euxo pipefail
SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
GPHOME=/usr/local/greenplum-db-devel
# whether PXF is being installed from a new component-based packaging
PXF_COMPONENT=${PXF_COMPONENT:=false}
if [[ ${PXF_COMPONENT} == "true" ]]; then
PXF_HOME=/usr/local/pxf-gp${GP_VER}
else
PXF_HOME=${GPHOME}/pxf
fi
PXF_VERSION=${PXF_VERSION:=6}
# read default user from terraform metadata file
DEFAULT_OS_USER=$(jq --raw-output ".ami_default_user" terraform/metadata)
# we need word boundary in case of standby master (smdw)
MASTER_HOSTNAME=$(grep < cluster_env_files/etc_hostfile '\bmdw' | awk '{print $2}')
REALM=${REALM:-}
REALM_2=${REALM_2:-}
GOOGLE_PROJECT_ID=${GOOGLE_PROJECT_ID:-data-gpdb-ud}
KERBEROS=${KERBEROS:-false}
function get_hadoop_ip() {
if [[ $SKIP_HADOOP_SETUP == true ]]; then
HADOOP_IP=''
return
fi
if [[ -f terraform_dataproc/name ]]; then
HADOOP_HOSTNAME=ccp-$(< terraform_dataproc/name)-m
HADOOP_IP=$(getent hosts "${HADOOP_HOSTNAME}.c.${GOOGLE_PROJECT_ID}.internal" | awk '{ print $1 }')
elif [[ -f dataproc_env_files/name ]]; then
HADOOP_HOSTNAME=$(< dataproc_env_files/name)
HADOOP_IP=$(getent hosts "${HADOOP_HOSTNAME}.c.${GOOGLE_PROJECT_ID}.internal" | awk '{ print $1 }')
REALM=$(< dataproc_env_files/REALM)
else
HADOOP_HOSTNAME=hadoop
HADOOP_IP=$(grep < cluster_env_files/etc_hostfile edw0 | awk '{print $1}')
fi
}
SKIP_HADOOP_SETUP=${SKIP_HADOOP_SETUP:-false}
get_hadoop_ip
PROXY_USER=${PROXY_USER:-pxfuser}
if [[ ${PXF_VERSION} == 5 ]]; then
BASE_DIR=~gpadmin/pxf
else
BASE_DIR=${PXF_BASE_DIR:-$PXF_HOME}
fi
INSTALL_GPHDFS=${INSTALL_GPHDFS:-true}
cat << EOF
############################
# #
# PXF Installation #
# #
############################
EOF
function create_pxf_installer_scripts() {
cat > /tmp/configure_pxf.sh <<-EOFF
#!/bin/bash
set -euxo pipefail
GPHOME=${GPHOME}
PXF_HOME=${PXF_HOME}
if [[ ${PXF_VERSION} == 5 ]]; then
PXF_CONF=${BASE_DIR}
else
PXF_BASE=${BASE_DIR}
fi
function setup_pxf_env() {
#Check if some other process is listening on 5888
netstat -tlpna | grep 5888 || true
if [[ $IMPERSONATION == false ]]; then
if [[ ${PXF_VERSION} == 5 ]]; then
echo 'Impersonation is disabled, updating pxf-env.sh property'
# sed -ie 's|^[[:blank:]]*export PXF_USER_IMPERSONATION=.*$|export PXF_USER_IMPERSONATION=false|g' "\${PXF_CONF}/conf/pxf-env.sh"
echo 'PXF_USER_IMPERSONATION=false' >> "\${PXF_CONF}/conf/pxf-env.sh"
else
echo 'Impersonation is disabled, updating pxf-site.xml property'
if [[ ! -f \${PXF_BASE}/servers/default/pxf-site.xml ]]; then
cp \${PXF_HOME}/templates/pxf-site.xml \${PXF_BASE}/servers/default/pxf-site.xml
fi
sed -i -e "s|<value>true</value>|<value>false</value>|g" \${PXF_BASE}/servers/default/pxf-site.xml
fi
fi
if [[ -n "${PXF_JVM_OPTS}" ]]; then
if [[ ${PXF_VERSION} == 5 ]]; then
echo 'export PXF_JVM_OPTS="${PXF_JVM_OPTS}"' >> "\${PXF_CONF}/conf/pxf-env.sh"
else
echo 'export PXF_JVM_OPTS="${PXF_JVM_OPTS}"' >> "\${PXF_BASE}/conf/pxf-env.sh"
fi
fi
if [[ $KERBEROS == true ]]; then
cp ~/dataproc_env_files/krb5.conf /tmp/krb5.conf
if [[ -f ~/dataproc_2_env_files/krb5.conf ]]; then
# Merge krb5.conf files from two different REALMS
diff --line-format %L /tmp/krb5.conf ~/dataproc_2_env_files/krb5.conf > /tmp/krb5.conf-tmp || true
rm -f /tmp/krb5.conf && mv /tmp/krb5.conf-tmp /tmp/krb5.conf
# Remove the second instance of default_realm from the file
awk '!/default_realm/ || !f++' /tmp/krb5.conf > /tmp/krb5.conf-tmp
rm -f /tmp/krb5.conf && mv /tmp/krb5.conf-tmp /tmp/krb5.conf
# Add missing } to the new REALM
REALM_2=\$(cat ~/dataproc_2_env_files/REALM)
sed -i "s/\${REALM_2} =/}\n\t\${REALM_2} =/g" /tmp/krb5.conf
fi
if [[ -d ~/ipa_env_files ]]; then
REALM_3="\$(< ipa_env_files/REALM)"
sed -i \
-e '/^\[libdefaults.*/a \\\\tforwardable=true' \
-e '/^\[realms/ r ipa_env_files/krb5_realm' \
-e '/^\[domain_realm/ r ipa_env_files/krb5_domain_realm' /tmp/krb5.conf
fi
if [[ ${PXF_VERSION} == 5 ]]; then
echo 'export PXF_KEYTAB="\${PXF_CONF}/keytabs/pxf.service.keytab"' >> "\${PXF_CONF}/conf/pxf-env.sh"
echo 'export PXF_PRINCIPAL="gpadmin@${REALM}"' >> "\${PXF_CONF}/conf/pxf-env.sh"
gpscp -f ~gpadmin/hostfile_all -v -r -u gpadmin ~/dataproc_env_files/pxf.service.keytab =:/home/gpadmin/pxf/keytabs/
else
if [[ ! -f \${PXF_BASE}/servers/default/pxf-site.xml ]]; then
cp \${PXF_HOME}/templates/pxf-site.xml \${PXF_BASE}/servers/default/pxf-site.xml
fi
sed -i -e "s|gpadmin/_HOST@EXAMPLE.COM|gpadmin@${REALM}|g" ${BASE_DIR}/servers/default/pxf-site.xml
gpscp -f ~gpadmin/hostfile_all -v -r -u gpadmin ~/dataproc_env_files/pxf.service.keytab =:${BASE_DIR}/keytabs/
fi
gpscp -f ~gpadmin/hostfile_all -v -r -u ${DEFAULT_OS_USER} /tmp/krb5.conf =:/tmp/krb5.conf
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e 'sudo mv /tmp/krb5.conf /etc/krb5.conf'
fi
}
function main() {
if [[ ${PXF_VERSION} == 5 ]]; then
rm -rf \$PXF_CONF/servers/default/*-site.xml
if [[ -d ~/dataproc_env_files/conf ]]; then
cp ~/dataproc_env_files/conf/*-site.xml "\$PXF_CONF/servers/default"
# required for recursive directories tests
cp "\$PXF_CONF/templates/mapred-site.xml" "\$PXF_CONF/servers/default/mapred1-site.xml"
else
cp \$PXF_CONF/templates/{hdfs,mapred,yarn,core,hbase,hive,pxf}-site.xml "\$PXF_CONF/servers/default"
sed -i -e 's/\(0.0.0.0\|localhost\|127.0.0.1\)/${HADOOP_IP}/g' \$PXF_CONF/servers/default/*-site.xml
sed -i -e 's|\${user.name}|${PROXY_USER}|g' \$PXF_CONF/servers/default/pxf-site.xml
fi
else
if [[ "\$PXF_BASE" != "\$PXF_HOME" ]]; then
echo 'Prepare PXF in $BASE_DIR'
PXF_BASE=\$PXF_BASE \$PXF_HOME/bin/pxf cluster prepare
echo "export PXF_BASE=${BASE_DIR}" >> ~gpadmin/.bashrc
fi
rm -rf \$PXF_BASE/servers/default/*-site.xml
if [[ -d ~/dataproc_env_files/conf ]]; then
cp ~/dataproc_env_files/conf/*-site.xml "\$PXF_BASE/servers/default"
# required for recursive directories tests
cp "\$PXF_HOME/templates/mapred-site.xml" "\$PXF_BASE/servers/default/mapred1-site.xml"
else
cp \$PXF_HOME/templates/{hdfs,mapred,yarn,core,hbase,hive,pxf}-site.xml "\$PXF_BASE/servers/default"
sed -i -e 's/\(0.0.0.0\|localhost\|127.0.0.1\)/${HADOOP_IP}/g' \$PXF_BASE/servers/default/*-site.xml
sed -i -e 's|\${user.name}|${PROXY_USER}|g' \$PXF_BASE/servers/default/pxf-site.xml
fi
fi
setup_pxf_env
}
main
EOFF
cat > /tmp/install_pxf_dependencies.sh <<-EOFF
#!/bin/bash
set -euxo pipefail
GPHOME=${GPHOME}
PXF_HOME=${PXF_HOME}
if [[ ${PXF_VERSION} == 5 ]]; then
PXF_CONF=${BASE_DIR}
else
PXF_BASE=${BASE_DIR}
fi
export HADOOP_VER=2.6.5.0-292
function install_java() {
yum install -y -q -e 0 java-1.8.0-openjdk
echo 'export JAVA_HOME=/usr/lib/jvm/jre' | sudo tee -a ~gpadmin/.bashrc
echo 'export JAVA_HOME=/usr/lib/jvm/jre' | sudo tee -a ~${DEFAULT_OS_USER}/.bashrc
}
# this function is only used for GPHDFS, which we only test with centos.
function install_hadoop_client() {
cat > /etc/yum.repos.d/hdp.repo <<EOF
[HDP-2.6.5.0]
name=HDP Version - HDP-2.6.5.0
baseurl=http://public-repo-1.hortonworks.com/HDP/centos7/2.x/updates/2.6.5.0
gpgcheck=1
gpgkey=http://public-repo-1.hortonworks.com/HDP/centos7/2.x/updates/2.6.5.0/RPM-GPG-KEY/RPM-GPG-KEY-Jenkins
enabled=1
priority=1
EOF
yum install -y -d 1 hadoop-client
echo "export HADOOP_VERSION=\${HADOOP_VER}" | sudo tee -a ~gpadmin/.bash_profile
echo "export HADOOP_HOME=/usr/hdp/\${HADOOP_VER}" | sudo tee -a ~gpadmin/.bash_profile
echo "export HADOOP_HOME=/usr/hdp/\${HADOOP_VER}" | sudo tee -a ~${DEFAULT_OS_USER}/.bash_profile
}
function main() {
install_java
if [[ $INSTALL_GPHDFS == true ]]; then
install_hadoop_client
fi
}
main
EOFF
chmod +x /tmp/{install_pxf_dependencies,configure_pxf}.sh
scp /tmp/{install_pxf_dependencies,configure_pxf}.sh "${MASTER_HOSTNAME}:~gpadmin"
}
function run_pxf_installer_scripts() {
ssh "${MASTER_HOSTNAME}" "
source ${GPHOME}/greenplum_path.sh &&
export JAVA_HOME=/usr/lib/jvm/jre &&
export MASTER_DATA_DIRECTORY=/data/gpdata/master/gpseg-1/ &&
export PXF_COMPONENT=${PXF_COMPONENT} &&
if [[ $INSTALL_GPHDFS == true ]]; then
gpconfig -c gp_hadoop_home -v '/usr/hdp/2.6.5.0-292'
gpconfig -c gp_hadoop_target_version -v hdp
gpstop -u
fi &&
sed -i '/edw/d' hostfile_all &&
gpscp -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -r ~/pxf_installer ~gpadmin/install_pxf_dependencies.sh ${DEFAULT_OS_USER}@=: &&
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e 'sudo ~${DEFAULT_OS_USER}/install_pxf_dependencies.sh' &&
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e 'sudo GPHOME=${GPHOME} ~${DEFAULT_OS_USER}/pxf_installer/install_component'
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e 'sudo chown -R gpadmin:gpadmin ${PXF_HOME}'
if [[ ${PXF_VERSION} == 5 ]]; then
GPHOME=${GPHOME} PXF_CONF=${BASE_DIR} ${PXF_HOME}/bin/pxf cluster init
else
${PXF_HOME}/bin/pxf cluster register
fi &&
if [[ -d ~/dataproc_env_files ]]; then
gpscp -f ~gpadmin/hostfile_init -v -r -u gpadmin ~/dataproc_env_files =:
fi &&
~gpadmin/configure_pxf.sh &&
source ~gpadmin/.bashrc &&
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e \"sudo sed -i -e 's/edw0/edw0 hadoop/' /etc/hosts\" &&
echo \"PXF_BASE=\${PXF_BASE}\" &&
${PXF_HOME}/bin/pxf cluster sync &&
${PXF_HOME}/bin/pxf cluster start &&
if [[ $INSTALL_GPHDFS == true ]]; then
gpssh -f ~gpadmin/hostfile_all -v -u ${DEFAULT_OS_USER} -s -e '
sudo cp ${BASE_DIR}/servers/default/{core,hdfs}-site.xml /etc/hadoop/conf
'
fi
"
# Create a database for PXF extension upgrade testing
if [[ ${PXF_VERSION} == 5 ]]; then
ssh "${MASTER_HOSTNAME}" "
source ${GPHOME}/greenplum_path.sh &&
createdb testupgrade &&
psql -d testupgrade -c 'CREATE EXTENSION IF NOT EXISTS pxf'
"
fi
}
function _main() {
mkdir -p /tmp/pxf_installer/
if [[ -d pxf_tarball ]]; then
if [[ ${PXF_COMPONENT} == true ]]; then
mkdir -p /tmp/pxf_inflate
tar -xzf pxf_tarball/pxf-*.tar.gz -C /tmp/pxf_inflate
cp /tmp/pxf_inflate/pxf*/* /tmp/pxf_installer/
else
cp pxf_tarball/pxf.tar.gz /tmp/pxf_installer
cat << EOF > /tmp/pxf_installer/install_component
#!/usr/bin/env bash
tar -xzf ~/pxf_tarball/pxf.tar.gz -C \${GPHOME}
EOF
fi
elif [[ -d pxf_artifact ]]; then
cp pxf_artifact/*.rpm /tmp/pxf_installer
cp pxf_src/package/install_rpm /tmp/pxf_installer/install_component
else
echo "Unable to find a suitable PXF installer"
exit 1
fi
chmod +x /tmp/pxf_installer/install_component
local SCP_FILES=(/tmp/pxf_installer cluster_env_files/*)
if [[ -d dataproc_env_files ]]; then
SCP_FILES+=(dataproc_env_files)
fi
if [[ -d dataproc_2_env_files ]]; then
SCP_FILES+=(dataproc_2_env_files)
fi
if [[ -d ipa_env_files ]]; then
SCP_FILES+=(ipa_env_files)
fi
scp -r "${SCP_FILES[@]}" "${MASTER_HOSTNAME}:~gpadmin"
create_pxf_installer_scripts
run_pxf_installer_scripts
}
_main