def configure_dataengine_service()

in infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_configure.py [0:0]


def configure_dataengine_service(instance, dataproc_conf):
    dataproc_conf['instance_ip'] = GCPMeta.get_private_ip_address(instance)
    # configuring proxy on Data Engine service
    try:
        logging.info('[CONFIGURE PROXY ON DATAENGINE SERVICE]')
        additional_config = {"proxy_host": dataproc_conf['edge_instance_name'], "proxy_port": "3128"}
        params = "--hostname {} --instance_name {} --keyfile {} --additional_config '{}' --os_user {}" \
            .format(dataproc_conf['instance_ip'], dataproc_conf['cluster_name'], dataproc_conf['key_path'],
                    json.dumps(additional_config), dataproc_conf['datalab_ssh_user'])
        try:
            subprocess.run("~/scripts/{}.py {}".format('common_configure_proxy', params), shell=True, check=True)
        except:
            traceback.print_exc()
            raise Exception
    except Exception as err:
        datalab.fab.append_result("Failed to configure proxy.", str(err))
        GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
        sys.exit(1)

    try:
        logging.info('[CONFIGURE DATAENGINE SERVICE]')
        try:
            global conn
            conn = datalab.fab.init_datalab_connection(dataproc_conf['instance_ip'], dataproc_conf['datalab_ssh_user'], dataproc_conf['key_path'])
            datalab.fab.configure_data_engine_service_livy(dataproc_conf['instance_ip'],
                                                           dataproc_conf['datalab_ssh_user'],
                                                           dataproc_conf['key_path'])
            datalab.notebook_lib.install_os_pkg([['python3-pip', 'N/A']])
            datalab.fab.configure_data_engine_service_pip(dataproc_conf['instance_ip'],
                                                          dataproc_conf['datalab_ssh_user'],
                                                          dataproc_conf['key_path'])
        except:
            traceback.print_exc()
            raise Exception
    except Exception as err:
        datalab.fab.append_result("Failed to configure dataengine service.", str(err))
        GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
        sys.exit(1)

    try:
        logging.info('[SETUP EDGE REVERSE PROXY TEMPLATE]')
        slaves = []
        for idx, instance in enumerate(dataproc_conf['cluster_core_instances']):
            slave_ip = GCPMeta.get_private_ip_address(instance)
            slave = {
                'name': 'datanode{}'.format(idx + 1),
                'ip': slave_ip,
                'dns': "{0}.c.{1}.internal".format(instance, os.environ['gcp_project_id'])
            }
            slaves.append(slave)
        additional_info = {
            "computational_name": dataproc_conf['computational_name'],
            "master_ip": dataproc_conf['master_ip'],
            "master_dns": "{0}.c.{1}.internal".format(dataproc_conf['master_name'], os.environ['gcp_project_id']),
            "slaves": slaves,
            "tensor": False
        }
        params = "--edge_hostname {} " \
                 "--keyfile {} " \
                 "--os_user {} " \
                 "--type {} " \
                 "--exploratory_name {} " \
                 "--additional_info '{}'"\
            .format(dataproc_conf['edge_instance_hostname'],
                    dataproc_conf['key_path'],
                    dataproc_conf['datalab_ssh_user'],
                    'dataengine-service',
                    dataproc_conf['exploratory_name'],
                    json.dumps(additional_info))
        try:
            subprocess.run("~/scripts/{}.py {}".format('common_configure_reverse_proxy', params), shell=True, check=True)
        except:
            datalab.fab.append_result("Failed edge reverse proxy template")
            raise Exception
    except Exception as err:
        datalab.fab.append_result("Failed to configure reverse proxy.", str(err))
        GCPActions.delete_dataproc_cluster(dataproc_conf['cluster_name'], os.environ['gcp_region'])
        sys.exit(1)