in aws_emr_blog_v3/code/launch-cluster/cremr.py [0:0]
def create(event, context):
apps = event["ResourceProperties"]["AppsEMR"]
emrReleaseLabel = event["ResourceProperties"]["emrReleaseLabel"]
prestoEngineRequested = "Presto"
isPrestoAppRequested = False
isSparkAppRequested = False
formatted_applist = apps.split(",")
applist = []
for app in formatted_applist:
applist.append({"Name": app.strip()})
if app.strip() in ["Presto", "PrestoSQL"]:
isPrestoAppRequested = True
prestoEngineRequested = app.strip()
if app.strip() in ["Spark"]:
isSparkAppRequested = True
try:
emrVersion = emrReleaseLabel.split("-")[1].split(".")
client = boto3.client("emr", region_name=event["ResourceProperties"]["StackRegion"])
scriptRunnerJar = "s3://"+event["ResourceProperties"]["StackRegion"]+".elasticmapreduce/libs/script-runner/script-runner.jar"
cluster_name = "EMR-" + event["ResourceProperties"]["StackName"]
cluster_parameters = {'Name': cluster_name, 'ReleaseLabel': emrReleaseLabel,
'LogUri': event["ResourceProperties"]["LogFolder"],
'AdditionalInfo': '{"clusterType":"development"}',
'BootstrapActions': [
# {
# "Name": "Install packages",
# "ScriptBootstrapAction": {
# "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
# "S3Key"] + "/" + event["ResourceProperties"][
# "ProjectVersion"] + "/scripts/install-required-packages.sh"
# }
# },
{
"Name": "Download scripts",
"ScriptBootstrapAction": {
"Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
"S3Key"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/scripts/download-scripts.sh",
"Args": [
"s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
"S3ArtifactKey"] + "/" + event["ResourceProperties"][
"ProjectVersion"]
]
}
}
# ,
# {
# "Name": "Setup HDFS home dir",
# "ScriptBootstrapAction": {
# "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
# "S3ArtifactKey"] + "/" + event["ResourceProperties"][
# "ProjectVersion"] + "/scripts/create-hdfs-home-ba.sh"
# }
# }
],
'Applications': applist,
'Steps': [
{
"Name": "CreateDefaultHiveTables",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createHiveTables.sh",
event["ResourceProperties"]["StackRegion"]
]
}
},
# {
# "Name": "CreateExtendedHiveTables",
# "ActionOnFailure": "CONTINUE",
# "HadoopJarStep": {
# "Jar": scriptRunnerJar,
# "Args": [
# "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createdExtendedHiveTables.sh",
# event["ResourceProperties"]["StackRegion"]
# ]
# }
# },
{
"Name": "LoadHDFSData",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/loadDataIntoHDFS.sh",
event["ResourceProperties"]["StackRegion"]
]
}
},
# {
# "Name": "InstallHiveHDFSRangerPlugin",
# "ActionOnFailure": "CONTINUE",
# "HadoopJarStep": {
# "Jar": scriptRunnerJar,
# "Args": [
# "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-plugin.sh",
# event["ResourceProperties"]["RangerHostname"],
# event["ResourceProperties"]["RangerVersion"],
# "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"]["S3ArtifactKey"],
# event["ResourceProperties"][
# "ProjectVersion"],
# event["ResourceProperties"]["emrReleaseLabel"],
# event["ResourceProperties"]["RangerHttpProtocol"],
# event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
# ]
# }
# },
{
"Name": "InstallRangerServiceDef",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-servicedef.sh",
event["ResourceProperties"]["RangerHostname"],
"s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" +
event["ResourceProperties"][
"S3ArtifactKey"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/inputdata",
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["RangerVersion"],
event["ResourceProperties"]["RangerAdminPassword"],
str(event["ResourceProperties"]["DefaultDomain"]).lower()
]
}
},
{
"Name": "InstallRangerPolicies",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-policies.sh",
event["ResourceProperties"]["RangerHostname"],
"s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" +
event["ResourceProperties"][
"S3ArtifactKey"] + "/" + event["ResourceProperties"][
"ProjectVersion"] + "/inputdata",
event["ResourceProperties"]["RangerHttpProtocol"],
event["ResourceProperties"]["RangerVersion"],
event["ResourceProperties"]["RangerAdminPassword"],
str(event["ResourceProperties"]["DefaultDomain"]).lower()
]
}
},
{
"Name": "Hue-Permission-Update",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/hue-update.sh"
]
}
},
{
"Name": "Cloudformation-Signal",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": scriptRunnerJar,
"Args": [
"/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/send-cf-signal.sh",
event["ResourceProperties"]["SignalURL"]
]
}
}
], 'VisibleToAllUsers': True, 'JobFlowRole': event["ResourceProperties"]["JobFlowRole"],
'ServiceRole': event["ResourceProperties"]["ServiceRole"],
'Tags': [
{
"Key": "Name",
"Value": "EMREC2Instance"
}
],
'Configurations': [
{
"Classification": "livy-conf",
"Properties": {
"livy.superusers": "knox,hue,livy",
"livy.impersonation.enabled": "true",
"livy.repl.enable-hive-context": "true"
},
"Configurations": []
},
{
"Classification": "hcatalog-webhcat-site",
"Properties": {
"webhcat.proxyuser.knox.groups": "*",
"webhcat.proxyuser.knox.hosts": "*",
"webhcat.proxyuser.livy.groups": "*",
"webhcat.proxyuser.livy.hosts": "*",
"webhcat.proxyuser.hive.groups": "*",
"webhcat.proxyuser.hive.hosts": "*"
}
},
{
"Classification": "hadoop-kms-site",
"Properties": {
"hadoop.kms.proxyuser.knox.hosts": "*",
"hadoop.kms.proxyuser.knox.groups": "*",
"hadoop.kms.proxyuser.knox.users": "*",
"hadoop.kms.proxyuser.livy.users": "*",
"hadoop.kms.proxyuser.livy.groups": "*",
"hadoop.kms.proxyuser.livy.hosts": "*",
"hadoop.kms.proxyuser.hive.users": "*",
"hadoop.kms.proxyuser.hive.groups": "*",
"hadoop.kms.proxyuser.hive.hosts": "*"
},
"Configurations": []
},
{
"Classification": "spark-env",
"Configurations": [
{
"Classification": "export",
"Configurations": [
],
"Properties": {
"SPARK_HISTORY_OPTS": "\"-Dspark.ui.proxyBase=/gateway/emr-cluster-top/sparkhistory\""
}
}
],
"Properties": {
}
},
{
"Classification": "hue-ini",
"Configurations": [
{
"Classification": "desktop",
"Configurations": [
{
"Classification": "auth",
"Properties": {
"backend": "desktop.auth.backend.LdapBackend"
}
},
{
"Classification": "ldap",
"Properties": {
"base_dn": event["ResourceProperties"]["LDAPGroupSearchBase"],
"bind_dn": event["ResourceProperties"]["LDAPBindUserName"] + '@' +
event["ResourceProperties"]["DomainDNSName"],
"bind_password": event["ResourceProperties"][
"LDAPBindPassword"],
"debug": "true",
"force_username_lowercase": "true",
"ignore_username_case": "true",
"ldap_url": "ldap://" + event["ResourceProperties"][
"LDAPHostPrivateIP"],
"ldap_username_pattern": "uid:<username>," +
event["ResourceProperties"][
"LDAPSearchBase"],
"nt_domain": event["ResourceProperties"]["DomainDNSName"],
"search_bind_authentication": "true",
"trace_level": "0",
"sync_groups_on_login": "true",
"create_users_on_login": "true",
"use_start_tls": "false"
}
}
]
}
],
"Properties": {
}
}
], 'Instances': {
"InstanceGroups": [
{
"Name": "Master nodes",
"Market": "ON_DEMAND",
"InstanceRole": "MASTER",
"InstanceType": event["ResourceProperties"]["MasterInstanceType"],
"InstanceCount": int(event["ResourceProperties"]["MasterInstanceCount"]),
}
],
"Ec2KeyName": event["ResourceProperties"]["KeyPairName"],
"KeepJobFlowAliveWhenNoSteps": True,
"TerminationProtected": False,
"Ec2SubnetId": event["ResourceProperties"]["subnetID"],
"AdditionalMasterSecurityGroups": [event["ResourceProperties"]["masterSG"]]
}}
if (int(event["ResourceProperties"]["CoreInstanceCount"]) > 0):
cluster_parameters['Instances']['InstanceGroups'].append(
{
"Name": "Slave nodes",
"Market": "ON_DEMAND",
"InstanceRole": "CORE",
"InstanceType": event["ResourceProperties"]["CoreInstanceType"],
"InstanceCount": int(event["ResourceProperties"]["CoreInstanceCount"])
}
)
# if event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] == "true":
# cluster_parameters['BootstrapActions'].append(
# {
# "Name": "Install cloudwatch agent",
# "ScriptBootstrapAction": {
# "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
# "S3ArtifactKey"] + "/" + event["ResourceProperties"][
# "ProjectVersion"] + "/scripts/install-cloudwatch-agent.sh"
# }
# })
if event["ResourceProperties"]["EMRSecurityConfig"] != "false":
cluster_parameters['SecurityConfiguration'] = event["ResourceProperties"]["EMRSecurityConfig"]
cluster_parameters['KerberosAttributes'] = {
"Realm": event["ResourceProperties"]["KerberosRealm"],
"KdcAdminPassword": event["ResourceProperties"]["KdcAdminPassword"],
"CrossRealmTrustPrincipalPassword": event["ResourceProperties"]["CrossRealmTrustPrincipalPassword"],
"ADDomainJoinUser": event["ResourceProperties"]["ADDomainUser"],
"ADDomainJoinPassword": event["ResourceProperties"]["ADDomainJoinPassword"]
}
# if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
# cluster_parameters['Configurations'].append({
# "Classification": "hive-site",
# "Properties": {
# "hive.server2.thrift.http.port": "10001",
# "hive.server2.thrift.http.path": "cliservice",
# "hive.server2.transport.mode": "binary",
# "hive.server2.allow.user.substitution": "true",
# "hive.server2.authentication.kerberos.principal": "hive/_HOST@"+event["ResourceProperties"]["DefaultDomain"],
# "hive.server2.enable.doAs": "false",
# "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
# }
# })
# else:
cluster_parameters['Configurations'].append({
"Classification": "hive-site",
"Properties": {
"javax.jdo.option.ConnectionURL": "jdbc:mysql://" + event["ResourceProperties"][
"DBHostName"] + ":3306/hive?createDatabaseIfNotExist=true",
"javax.jdo.option.ConnectionDriverName": "org.mariadb.jdbc.Driver",
"javax.jdo.option.ConnectionUserName": event["ResourceProperties"]["DBUserName"],
"javax.jdo.option.ConnectionPassword": event["ResourceProperties"]["DBRootPassword"],
"hive.server2.thrift.http.port": "10001",
"hive.server2.thrift.http.path": "cliservice",
"hive.server2.transport.mode": "binary",
"hive.server2.allow.user.substitution": "true",
"hive.server2.authentication.kerberos.principal": "hive/_HOST@"+event["ResourceProperties"]["DefaultDomain"],
"hive.server2.enable.doAs": "false"
}
})
# ## If Hive LDAP
# cluster_parameters['Configurations'].append({
# "Classification": "hive-site",
# "Properties": {
# "hive.server2.authentication": "LDAP",
# "hive.server2.authentication.ldap.url": "ldap://" + event["ResourceProperties"][
# "LDAPHostPrivateIP"],
# "hive.server2.authentication.ldap.baseDN": event["ResourceProperties"]["LDAPGroupSearchBase"]
# }
# })
cluster_parameters['Configurations'].append({
"Classification": "core-site",
"Properties": {
# "hadoop.security.group.mapping": "org.apache.hadoop.security.LdapGroupsMapping",
# "hadoop.security.group.mapping.ldap.bind.user": event["ResourceProperties"]["ADDomainUser"],
# "hadoop.security.group.mapping.ldap.bind.password": event["ResourceProperties"]["ADDomainJoinPassword"],
# "hadoop.security.group.mapping.ldap.url": "ldap://" + event["ResourceProperties"]["LDAPHostPrivateIP"],
# "hadoop.security.group.mapping.ldap.base": event["ResourceProperties"]["LDAPGroupSearchBase"],
# "hadoop.security.group.mapping.ldap.search.filter.user": "(objectclass=*)",
# "hadoop.security.group.mapping.ldap.search.filter.group": "(objectclass=*)",
# "hadoop.security.group.mapping.ldap.search.attr.member": "member",
# "hadoop.security.group.mapping.ldap.search.attr.group.name": "cn",
"hadoop.proxyuser.knox.groups": "*",
"hadoop.proxyuser.knox.hosts": "*",
"hadoop.proxyuser.livy.groups": "*",
"hadoop.proxyuser.livy.hosts": "*",
"hadoop.proxyuser.hive.hosts": "*",
"hadoop.proxyuser.hive.groups": "*",
"hadoop.proxyuser.hue_hive.groups": "*"
}
})
# if isPrestoAppRequested:
# if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "false":
# cluster_parameters['BootstrapActions'].append(
# {
# "Name": "Setup Presto Kerberos",
# "ScriptBootstrapAction": {
# "Path": "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
# "S3ArtifactKey"] + "/" + event["ResourceProperties"][
# "ProjectVersion"] + "/scripts/configure_presto_kerberos_ba.sh",
# "Args": [
# "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"][
# "S3ArtifactKey"] + "/" + event["ResourceProperties"][
# "ProjectVersion"],
# event["ResourceProperties"]["KdcAdminPassword"]
# ]
# }
# })
# if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
# if prestoEngineRequested == "PrestoSQL":
# cluster_parameters['Configurations'].append(
# {
# "Classification": "prestosql-connector-hive",
# "Properties": {
# "hive.metastore": "glue"
# }
# });
# else:
# cluster_parameters['Configurations'].append(
# {
# "Classification": "presto-connector-hive",
# "Properties": {
# "hive.metastore": "glue"
# }
# });
# if isSparkAppRequested and event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
# cluster_parameters['Configurations'].append(
# {
# "Classification": "spark-hive-site",
# "Properties": {
# "hive.server2.enable.doAs": "true",
# "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
# }
# });
# if isPrestoAppRequested and event["ResourceProperties"]["InstallPrestoPlugin"] == "true":
# cluster_parameters['Steps'].append({
# "Name": "InstallRangerPrestoPlugin",
# "ActionOnFailure": "CONTINUE",
# "HadoopJarStep": {
# "Jar": scriptRunnerJar,
# "Args": [
# "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-plugin.sh",
# event["ResourceProperties"]["RangerHostname"],
# event["ResourceProperties"]["RangerVersion"],
# "s3://" + event["ResourceProperties"]["S3ArtifactBucket"] + "/" + event["ResourceProperties"]["S3ArtifactKey"],
# event["ResourceProperties"][
# "ProjectVersion"],
# event["ResourceProperties"]["emrReleaseLabel"],
# prestoEngineRequested,
# event["ResourceProperties"]["RangerHttpProtocol"],
# event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
# ]
# }
# })
cluster_id = client.run_job_flow(**cluster_parameters)
physical_resource_id = cluster_id["JobFlowId"]
response_data = {
"ClusterID": cluster_id["JobFlowId"]
}
return physical_resource_id, response_data
except Exception as E:
raise