in aws_emr_blog_v2/code/launch-cluster/cremr.py [0:0]
def create(event, context):
apps = event["ResourceProperties"]["AppsEMR"]
s3Bucket = event["ResourceProperties"]["S3Bucket"]
emrReleaseLabel = event["ResourceProperties"]["emrReleaseLabel"]
prestoEngineRequested = "Presto"
isPrestoAppRequested = False
isSparkAppRequested = False
formatted_applist = apps.split(",")
applist = []
for app in formatted_applist:
applist.append({"Name": app.strip()})
if app.strip() in ["Presto", "PrestoSQL"]:
isPrestoAppRequested = True
prestoEngineRequested = app.strip()
if app.strip() in ["Spark"]:
isSparkAppRequested = True
try:
emrVersion = emrReleaseLabel.split("-")[1].split(".")
client = boto3.client("emr", region_name=event["ResourceProperties"]["StackRegion"])
cluster_name = "EMR-" + event["ResourceProperties"]["StackName"]
cluster_parameters = {'Name': cluster_name, 'ReleaseLabel': emrReleaseLabel,
'LogUri': event["ResourceProperties"]["LogFolder"], 'BootstrapActions': [
{
"Name": "Install packages",
"ScriptBootstrapAction": {
"Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/install-required-packages.sh"
}
},
{
"Name": "Download scripts",
"ScriptBootstrapAction": {
"Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/download-scripts.sh",
"Args": [
"s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"]
]
}
}
,
{
"Name": "Setup HDFS home dir",
"ScriptBootstrapAction": {
"Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/create-hdfs-home-ba.sh"
}
}
],
'Applications': applist,
'EbsRootVolumeSize': 100,
'Steps': [
{
"Name": "CreateDefaultHiveTables",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createHiveTables.sh",
event["ResourceProperties"]["StackRegion"]
]
}
},
{
"Name": "CreateExtendedHiveTables",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createdExtendedHiveTables.sh",
event["ResourceProperties"]["StackRegion"]
]
}
},
{
"Name": "LoadHDFSData",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/loadDataIntoHDFS.sh",
event["ResourceProperties"]["StackRegion"]
]
}
},
{
"Name": "Cloudformation-Signal",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/send-cf-signal.sh",
event["ResourceProperties"]["SignalURL"]
]
}
}
], 'VisibleToAllUsers': True, 'JobFlowRole': event["ResourceProperties"]["JobFlowRole"],
'ServiceRole': event["ResourceProperties"]["ServiceRole"],
'Tags': [
{
"Key": "Name",
"Value": "EMREC2Instance"
}
],
'Configurations': [
{
"Classification": "livy-conf",
"Properties": {
"livy.superusers": "knox,hue,livy",
"livy.impersonation.enabled": "true",
"livy.repl.enable-hive-context": "true"
},
"Configurations": []
},
{
"Classification": "hcatalog-webhcat-site",
"Properties": {
"webhcat.proxyuser.knox.groups": "*",
"webhcat.proxyuser.knox.hosts": "*",
"webhcat.proxyuser.livy.groups": "*",
"webhcat.proxyuser.livy.hosts": "*",
"webhcat.proxyuser.hive.groups": "*",
"webhcat.proxyuser.hive.hosts": "*",
"webhcat.proxyuser.presto.hosts": "*",
"webhcat.proxyuser.presto.groups": "*"
}
},
{
"Classification": "hadoop-kms-site",
"Properties": {
"hadoop.kms.proxyuser.knox.hosts": "*",
"hadoop.kms.proxyuser.knox.groups": "*",
"hadoop.kms.proxyuser.knox.users": "*",
"hadoop.kms.proxyuser.livy.users": "*",
"hadoop.kms.proxyuser.livy.groups": "*",
"hadoop.kms.proxyuser.livy.hosts": "*",
"hadoop.kms.proxyuser.hive.users": "*",
"hadoop.kms.proxyuser.hive.groups": "*",
"hadoop.kms.proxyuser.hive.hosts": "*",
"hadoop.kms.proxyuser.presto.hosts": "*",
"hadoop.kms.proxyuser.presto.groups": "*"
},
"Configurations": []
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Configurations": [
],
"Properties": {
"SPARK_HISTORY_OPTS": "\"-Dspark.ui.proxyBase=/gateway/emr-cluster-top/sparkhistory\""
}
}
],
"Properties": {
}
},
{
"Classification": "hue-ini",
"Configurations": [
{
"Classification": "desktop",
"Configurations": [
{
"Classification": "auth",
"Properties": {
"backend": "desktop.auth.backend.LdapBackend"
}
},
{
"Classification": "ldap",
"Properties": {
"base_dn": event["ResourceProperties"]["LDAPGroupSearchBase"],
"bind_dn": event["ResourceProperties"]["ADDomainUser"] + '@' +
event["ResourceProperties"]["DomainDNSName"],
"bind_password": event["ResourceProperties"][
"ADDomainJoinPassword"],
"debug": "true",
"force_username_lowercase": "true",
"ignore_username_case": "true",
"ldap_url": "ldap://" + event["ResourceProperties"][
"LDAPHostPrivateIP"],
"ldap_username_pattern": "uid:<username>," +
event["ResourceProperties"][
"LDAPSearchBase"],
"nt_domain": event["ResourceProperties"]["DomainDNSName"],
"search_bind_authentication": "true",
"trace_level": "0",
"sync_groups_on_login": "true",
"create_users_on_login": "true",
"use_start_tls": "false"
}
}
]
}
],
"Properties": {
}
}
], 'Instances': {
"InstanceGroups": [
{
"Name": "Master nodes",
"Market": "ON_DEMAND",
"InstanceRole": "MASTER",
"InstanceType": event["ResourceProperties"]["MasterInstanceType"],
"InstanceCount": int(event["ResourceProperties"]["MasterInstanceCount"]),
}
],
"Ec2KeyName": event["ResourceProperties"]["KeyName"],
"KeepJobFlowAliveWhenNoSteps": True,
"TerminationProtected": False,
"Ec2SubnetId": event["ResourceProperties"]["subnetID"],
"AdditionalMasterSecurityGroups": [event["ResourceProperties"]["masterSG"]]
}}
if (int(event["ResourceProperties"]["CoreInstanceCount"]) > 0):
cluster_parameters['Instances']['InstanceGroups'].append(
{
"Name": "Slave nodes",
"Market": "ON_DEMAND",
"InstanceRole": "CORE",
"InstanceType": event["ResourceProperties"]["CoreInstanceType"],
"InstanceCount": int(event["ResourceProperties"]["CoreInstanceCount"])
}
)
if event["ResourceProperties"]["InstallRangerPlugins"] == "true":
cluster_parameters['Steps'].append({
"Name": "InstallHiveHDFSRangerPlugin",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-plugin.sh",
event["ResourceProperties"]["RangerHostname"],
event["ResourceProperties"]["RangerVersion"],
"s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
event["ResourceProperties"][
"ProjectVersion"],
event["ResourceProperties"]["emrReleaseLabel"],
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
]
}
})
cluster_parameters['Steps'].append({
"Name": "InstallRangerServiceDef",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-servicedef.sh",
event["ResourceProperties"]["RangerHostname"],
"s3://" + event["ResourceProperties"]["S3Bucket"] + "/" +
event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/inputdata",
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["RangerVersion"],
event["ResourceProperties"]["RangerAdminPassword"]
]
}
})
cluster_parameters['Steps'].append({
"Name": "InstallRangerPolicies",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-policies.sh",
event["ResourceProperties"]["RangerHostname"],
"s3://" + event["ResourceProperties"]["S3Bucket"] + "/" +
event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/inputdata",
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["RangerVersion"],
event["ResourceProperties"]["RangerAdminPassword"]
]
}
});
elif event["ResourceProperties"]["InstallPrivaceraPlugins"] == "true":
cluster_parameters['Steps'].append({
"Name": "InstallPrivaceraRangerPlugin",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-privacera-plugins.sh",
event["ResourceProperties"]["PrivaceraPluginURL"]
]
}
})
if event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] == "true":
cluster_parameters['BootstrapActions'].append(
{
"Name": "Install cloudwatch agent",
"ScriptBootstrapAction": {
"Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/install-cloudwatch-agent.sh"
}
})
if event["ResourceProperties"]["EMRSecurityConfig"] != "false":
cluster_parameters['SecurityConfiguration'] = event["ResourceProperties"]["EMRSecurityConfig"]
cluster_parameters['KerberosAttributes'] = {
"Realm": event["ResourceProperties"]["KerberosRealm"],
"KdcAdminPassword": event["ResourceProperties"]["KdcAdminPassword"],
"CrossRealmTrustPrincipalPassword": event["ResourceProperties"]["CrossRealmTrustPrincipalPassword"],
"ADDomainJoinUser": event["ResourceProperties"]["ADDomainUser"],
"ADDomainJoinPassword": event["ResourceProperties"]["ADDomainJoinPassword"]
}
if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
cluster_parameters['Configurations'].append({
"Classification": "hive-site",
"Properties": {
"hive.server2.thrift.http.port": "10001",
"hive.server2.thrift.http.path": "cliservice",
"hive.server2.transport.mode": "binary",
"hive.server2.allow.user.substitution": "true",
"hive.server2.authentication.kerberos.principal": "hive/_HOST@EC2.INTERNAL",
"hive.server2.enable.doAs": "false",
"hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
}
})
else:
cluster_parameters['Configurations'].append({
"Classification": "hive-site",
"Properties": {
"javax.jdo.option.ConnectionURL": "jdbc:mysql://" + event["ResourceProperties"][
"DBHostName"] + ":3306/hive?createDatabaseIfNotExist=true",
"javax.jdo.option.ConnectionDriverName": "org.mariadb.jdbc.Driver",
"javax.jdo.option.ConnectionUserName": event["ResourceProperties"]["DBUserName"],
"javax.jdo.option.ConnectionPassword": event["ResourceProperties"]["DBRootPassword"],
"hive.server2.thrift.http.port": "10001",
"hive.server2.thrift.http.path": "cliservice",
"hive.server2.transport.mode": "binary",
"hive.server2.allow.user.substitution": "true",
"hive.server2.authentication.kerberos.principal": "hive/_HOST@EC2.INTERNAL",
"hive.server2.enable.doAs": "false"
}
})
# ## If Hive LDAP
# cluster_parameters['Configurations'].append({
# "Classification": "hive-site",
# "Properties": {
# "hive.server2.authentication": "LDAP",
# "hive.server2.authentication.ldap.url": "ldap://" + event["ResourceProperties"][
# "LDAPHostPrivateIP"],
# "hive.server2.authentication.ldap.baseDN": event["ResourceProperties"]["LDAPGroupSearchBase"]
# }
# })
cluster_parameters['Configurations'].append(
{
"Classification": "core-site",
"Properties": {
# "hadoop.security.group.mapping": "org.apache.hadoop.security.LdapGroupsMapping",
# "hadoop.security.group.mapping.ldap.bind.user": event["ResourceProperties"]["ADDomainUser"],
# "hadoop.security.group.mapping.ldap.bind.password": event["ResourceProperties"]["ADDomainJoinPassword"],
# "hadoop.security.group.mapping.ldap.url": "ldap://" + event["ResourceProperties"]["LDAPHostPrivateIP"],
# "hadoop.security.group.mapping.ldap.base": event["ResourceProperties"]["LDAPGroupSearchBase"],
# "hadoop.security.group.mapping.ldap.search.filter.user": "(objectclass=*)",
# "hadoop.security.group.mapping.ldap.search.filter.group": "(objectclass=*)",
# "hadoop.security.group.mapping.ldap.search.attr.member": "member",
# "hadoop.security.group.mapping.ldap.search.attr.group.name": "cn",
"hadoop.proxyuser.knox.groups": "*",
"hadoop.proxyuser.knox.hosts": "*",
"hadoop.proxyuser.livy.groups": "*",
"hadoop.proxyuser.livy.hosts": "*",
"hadoop.proxyuser.hive.hosts": "*",
"hadoop.proxyuser.hive.groups": "*",
"hadoop.proxyuser.hue_hive.groups": "*",
"hadoop.proxyuser.presto.hosts": "*",
"hadoop.proxyuser.presto.groups": "*",
"hadoop.proxyuser.hbase.hosts": "*",
"hadoop.proxyuser.hbase.groups": "*"
}
})
if isPrestoAppRequested:
if event["ResourceProperties"]["EnablePrestoKerberos"] == "true":
# if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "false":
cluster_parameters['BootstrapActions'].append(
{
"Name": "Setup Presto Kerberos",
"ScriptBootstrapAction": {
"Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/configure_presto_kerberos_ba.sh",
"Args": [
"s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"],
event["ResourceProperties"]["KdcAdminPassword"],
prestoEngineRequested,
'glue' if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == 'true' else 'database'
]
}
})
cluster_parameters['Steps'].append({
"Name": "PrestoSSLUpdate",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/presto-cli-kerberos_fix.sh",
event["ResourceProperties"]["emrReleaseLabel"],
prestoEngineRequested
]
}
})
cluster_parameters['Steps'].append({
"Name": "UpdateHueConfigurationForPrestoSSL",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/hue-update.sh",
event["ResourceProperties"]["emrReleaseLabel"],
prestoEngineRequested
]
}
})
if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
if prestoEngineRequested == "PrestoSQL":
cluster_parameters['Configurations'].append(
{
"Classification": "prestosql-connector-hive",
"Properties": {
"hive.metastore": "glue"
}
});
else:
cluster_parameters['Configurations'].append(
{
"Classification": "presto-connector-hive",
"Properties": {
"hive.metastore": "glue"
}
});
if isSparkAppRequested and event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
cluster_parameters['Configurations'].append(
{
"Classification": "spark-hive-site",
"Properties": {
"hive.server2.enable.doAs": "true",
"hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
}
});
if event["ResourceProperties"]["InstallHBasePlugin"] == "true":
cluster_parameters['Configurations'].append(
{
"Classification": "hbase-site",
"Properties": {
"hbase.superuser": "hbase"
}
});
cluster_parameters['BootstrapActions'].append(
{
"Name": "Install Ranger HBase Plugin",
"ScriptBootstrapAction": {
"Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/install-hbase-plugin-ba.sh",
"Args": [
event["ResourceProperties"]["RangerHostname"],
event["ResourceProperties"]["RangerVersion"],
"s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
event["ResourceProperties"][
"ProjectVersion"],
event["ResourceProperties"]["emrReleaseLabel"],
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
]
}
})
# cluster_parameters['Steps'].append({
# "Name": "InstallRangerHBasePlugin",
# "ActionOnFailure": "CONTINUE",
# "HadoopJarStep": {
# "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
# "Args": [
# "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hbase-ranger-plugin.sh",
# event["ResourceProperties"]["RangerHostname"],
# event["ResourceProperties"]["RangerVersion"],
# "s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
# event["ResourceProperties"][
# "ProjectVersion"],
# event["ResourceProperties"]["emrReleaseLabel"],
# event["ResourceProperties"]["RangerHttpProtocol"],
# event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
# ]
# }
# })
if isPrestoAppRequested and event["ResourceProperties"]["InstallPrestoPlugin"] == "true":
cluster_parameters['Steps'].append({
"Name": "InstallRangerPrestoPlugin",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-plugin.sh",
event["ResourceProperties"]["RangerHostname"],
event["ResourceProperties"]["RangerVersion"],
"s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
event["ResourceProperties"][
"ProjectVersion"],
event["ResourceProperties"]["emrReleaseLabel"],
prestoEngineRequested,
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
]
}
})
cluster_id = client.run_job_flow(**cluster_parameters)
physical_resource_id = cluster_id["JobFlowId"]
response_data = {
"ClusterID": cluster_id["JobFlowId"]
}
return physical_resource_id, response_data
except Exception as E:
raise