in infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_configure.py [0:0]
def configure_dataengine_service(instance, dataproc_conf):
dataproc_conf['instance_ip'] = GCPMeta.get_private_ip_address(instance)
# configuring proxy on Data Engine service
try:
logging.info('[CONFIGURE PROXY ON DATAENGINE SERVICE]')
additional_config = {"proxy_host": dataproc_conf['edge_instance_name'], "proxy_port": "3128"}
params = "--hostname {} --instance_name {} --keyfile {} --additional_config '{}' --os_user {}" \
.format(dataproc_conf['instance_ip'], dataproc_conf['cluster_name'], dataproc_conf['key_path'],
json.dumps(additional_config), dataproc_conf['datalab_ssh_user'])
try:
subprocess.run("~/scripts/{}.py {}".format('common_configure_proxy', params), shell=True, check=True)
except:
traceback.print_exc()
raise Exception
except Exception as err:
datalab.fab.append_result("Failed to configure proxy.", str(err))
GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
sys.exit(1)
try:
logging.info('[CONFIGURE DATAENGINE SERVICE]')
try:
global conn
conn = datalab.fab.init_datalab_connection(dataproc_conf['instance_ip'], dataproc_conf['datalab_ssh_user'], dataproc_conf['key_path'])
datalab.fab.configure_data_engine_service_livy(dataproc_conf['instance_ip'],
dataproc_conf['datalab_ssh_user'],
dataproc_conf['key_path'])
datalab.notebook_lib.install_os_pkg([['python3-pip', 'N/A']])
datalab.fab.configure_data_engine_service_pip(dataproc_conf['instance_ip'],
dataproc_conf['datalab_ssh_user'],
dataproc_conf['key_path'])
except:
traceback.print_exc()
raise Exception
except Exception as err:
datalab.fab.append_result("Failed to configure dataengine service.", str(err))
GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
sys.exit(1)
try:
logging.info('[SETUP EDGE REVERSE PROXY TEMPLATE]')
slaves = []
for idx, instance in enumerate(dataproc_conf['cluster_core_instances']):
slave_ip = GCPMeta.get_private_ip_address(instance)
slave = {
'name': 'datanode{}'.format(idx + 1),
'ip': slave_ip,
'dns': "{0}.c.{1}.internal".format(instance, os.environ['gcp_project_id'])
}
slaves.append(slave)
additional_info = {
"computational_name": dataproc_conf['computational_name'],
"master_ip": dataproc_conf['master_ip'],
"master_dns": "{0}.c.{1}.internal".format(dataproc_conf['master_name'], os.environ['gcp_project_id']),
"slaves": slaves,
"tensor": False
}
params = "--edge_hostname {} " \
"--keyfile {} " \
"--os_user {} " \
"--type {} " \
"--exploratory_name {} " \
"--additional_info '{}'"\
.format(dataproc_conf['edge_instance_hostname'],
dataproc_conf['key_path'],
dataproc_conf['datalab_ssh_user'],
'dataengine-service',
dataproc_conf['exploratory_name'],
json.dumps(additional_info))
try:
subprocess.run("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params), shell=True, check=True)
except:
datalab.fab.append_result("Failed edge reverse proxy template")
raise Exception
except Exception as err:
datalab.fab.append_result("Failed to configure reverse proxy.", str(err))
GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
sys.exit(1)