def create()

in aws_emr_blog_v2/code/launch-cluster/cremr.py [0:0]


def create(event, context):
    apps = event["ResourceProperties"]["AppsEMR"]
    s3Bucket = event["ResourceProperties"]["S3Bucket"]
    emrReleaseLabel = event["ResourceProperties"]["emrReleaseLabel"]
    prestoEngineRequested = "Presto"
    isPrestoAppRequested = False
    isSparkAppRequested = False
    formatted_applist = apps.split(",")
    applist = []
    for app in formatted_applist:
        applist.append({"Name": app.strip()})
        if app.strip() in ["Presto", "PrestoSQL"]:
            isPrestoAppRequested = True
            prestoEngineRequested = app.strip()
        if app.strip() in ["Spark"]:
            isSparkAppRequested = True

    try:
        emrVersion = emrReleaseLabel.split("-")[1].split(".")
        client = boto3.client("emr", region_name=event["ResourceProperties"]["StackRegion"])
        cluster_name = "EMR-" + event["ResourceProperties"]["StackName"]
        cluster_parameters = {'Name': cluster_name, 'ReleaseLabel': emrReleaseLabel,
                              'LogUri': event["ResourceProperties"]["LogFolder"], 'BootstrapActions': [
                {
                    "Name": "Install packages",
                    "ScriptBootstrapAction": {
                        "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"] + "/scripts/install-required-packages.sh"
                    }
                },
                {
                    "Name": "Download scripts",
                    "ScriptBootstrapAction": {
                        "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"] + "/scripts/download-scripts.sh",
                        "Args": [
                            "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
                                "S3Key"] + "/" + event["ResourceProperties"][
                                "ProjectVersion"]
                        ]
                    }
                }
                ,
                {
                    "Name": "Setup HDFS home dir",
                    "ScriptBootstrapAction": {
                        "Path": "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"] + "/scripts/create-hdfs-home-ba.sh"
                    }
                }
            ],
                              'Applications': applist,
                              'EbsRootVolumeSize': 100,
                              'Steps': [
                                  {
                                      "Name": "CreateDefaultHiveTables",
                                      "ActionOnFailure": "CONTINUE",
                                      "HadoopJarStep": {
                                          "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                                          "Args": [
                                              "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createHiveTables.sh",
                                              event["ResourceProperties"]["StackRegion"]
                                          ]
                                      }
                                  },
                                  {
                                      "Name": "CreateExtendedHiveTables",
                                      "ActionOnFailure": "CONTINUE",
                                      "HadoopJarStep": {
                                          "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                                          "Args": [
                                              "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createdExtendedHiveTables.sh",
                                              event["ResourceProperties"]["StackRegion"]
                                          ]
                                      }
                                  },
                                  {
                                      "Name": "LoadHDFSData",
                                      "ActionOnFailure": "CONTINUE",
                                      "HadoopJarStep": {
                                          "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                                          "Args": [
                                              "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/loadDataIntoHDFS.sh",
                                              event["ResourceProperties"]["StackRegion"]
                                          ]
                                      }
                                  },
                                  {
                                      "Name": "Cloudformation-Signal",
                                      "ActionOnFailure": "CONTINUE",
                                      "HadoopJarStep": {
                                          "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                                          "Args": [
                                              "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/send-cf-signal.sh",
                                              event["ResourceProperties"]["SignalURL"]
                                          ]
                                      }
                                  }
                              ], 'VisibleToAllUsers': True, 'JobFlowRole': event["ResourceProperties"]["JobFlowRole"],
                              'ServiceRole': event["ResourceProperties"]["ServiceRole"],
                              'Tags': [
                                  {
                                      "Key": "Name",
                                      "Value": "EMREC2Instance"
                                  }
                              ],
                              'Configurations': [
                                  {
                                      "Classification": "livy-conf",
                                      "Properties": {
                                          "livy.superusers": "knox,hue,livy",
                                          "livy.impersonation.enabled": "true",
                                          "livy.repl.enable-hive-context": "true"
                                      },
                                      "Configurations": []
                                  },
                                  {
                                      "Classification": "hcatalog-webhcat-site",
                                      "Properties": {
                                          "webhcat.proxyuser.knox.groups": "*",
                                          "webhcat.proxyuser.knox.hosts": "*",
                                          "webhcat.proxyuser.livy.groups": "*",
                                          "webhcat.proxyuser.livy.hosts": "*",
                                          "webhcat.proxyuser.hive.groups": "*",
                                          "webhcat.proxyuser.hive.hosts": "*",
                                          "webhcat.proxyuser.presto.hosts": "*",
                                          "webhcat.proxyuser.presto.groups": "*"
                                      }
                                  },
                                  {
                                      "Classification": "hadoop-kms-site",
                                      "Properties": {
                                          "hadoop.kms.proxyuser.knox.hosts": "*",
                                          "hadoop.kms.proxyuser.knox.groups": "*",
                                          "hadoop.kms.proxyuser.knox.users": "*",
                                          "hadoop.kms.proxyuser.livy.users": "*",
                                          "hadoop.kms.proxyuser.livy.groups": "*",
                                          "hadoop.kms.proxyuser.livy.hosts": "*",
                                          "hadoop.kms.proxyuser.hive.users": "*",
                                          "hadoop.kms.proxyuser.hive.groups": "*",
                                          "hadoop.kms.proxyuser.hive.hosts": "*",
                                          "hadoop.kms.proxyuser.presto.hosts": "*",
                                          "hadoop.kms.proxyuser.presto.groups": "*"
                                      },
                                      "Configurations": []
                                  },
                                  {
                                      "Classification": "spark-env",
                                      "Configurations": [
                                          {
                                              "Classification": "export",
                                              "Configurations": [

                                              ],
                                              "Properties": {
                                                  "SPARK_HISTORY_OPTS": "\"-Dspark.ui.proxyBase=/gateway/emr-cluster-top/sparkhistory\""
                                              }
                                          }
                                      ],
                                      "Properties": {
                                      }
                                  },
                                  {
                                      "Classification": "hue-ini",
                                      "Configurations": [
                                          {
                                              "Classification": "desktop",
                                              "Configurations": [
                                                  {
                                                      "Classification": "auth",
                                                      "Properties": {
                                                          "backend": "desktop.auth.backend.LdapBackend"
                                                      }
                                                  },
                                                  {
                                                      "Classification": "ldap",
                                                      "Properties": {
                                                          "base_dn": event["ResourceProperties"]["LDAPGroupSearchBase"],
                                                          "bind_dn": event["ResourceProperties"]["ADDomainUser"] + '@' +
                                                                     event["ResourceProperties"]["DomainDNSName"],
                                                          "bind_password": event["ResourceProperties"][
                                                              "ADDomainJoinPassword"],
                                                          "debug": "true",
                                                          "force_username_lowercase": "true",
                                                          "ignore_username_case": "true",
                                                          "ldap_url": "ldap://" + event["ResourceProperties"][
                                                              "LDAPHostPrivateIP"],
                                                          "ldap_username_pattern": "uid:<username>," +
                                                                                   event["ResourceProperties"][
                                                                                       "LDAPSearchBase"],
                                                          "nt_domain": event["ResourceProperties"]["DomainDNSName"],
                                                          "search_bind_authentication": "true",
                                                          "trace_level": "0",
                                                          "sync_groups_on_login": "true",
                                                          "create_users_on_login": "true",
                                                          "use_start_tls": "false"
                                                      }
                                                  }
                                              ]
                                          }
                                      ],
                                      "Properties": {
                                      }
                                  }
                              ], 'Instances': {
                "InstanceGroups": [
                    {
                        "Name": "Master nodes",
                        "Market": "ON_DEMAND",
                        "InstanceRole": "MASTER",
                        "InstanceType": event["ResourceProperties"]["MasterInstanceType"],
                        "InstanceCount": int(event["ResourceProperties"]["MasterInstanceCount"]),
                    }
                ],
                "Ec2KeyName": event["ResourceProperties"]["KeyName"],
                "KeepJobFlowAliveWhenNoSteps": True,
                "TerminationProtected": False,
                "Ec2SubnetId": event["ResourceProperties"]["subnetID"],
                "AdditionalMasterSecurityGroups": [event["ResourceProperties"]["masterSG"]]
            }}
        if (int(event["ResourceProperties"]["CoreInstanceCount"]) > 0):
            cluster_parameters['Instances']['InstanceGroups'].append(
                {
                    "Name": "Slave nodes",
                    "Market": "ON_DEMAND",
                    "InstanceRole": "CORE",
                    "InstanceType": event["ResourceProperties"]["CoreInstanceType"],
                    "InstanceCount": int(event["ResourceProperties"]["CoreInstanceCount"])
                }
            )

        if event["ResourceProperties"]["InstallRangerPlugins"] == "true":
            cluster_parameters['Steps'].append({
                "Name": "InstallHiveHDFSRangerPlugin",
                "ActionOnFailure": "CONTINUE",
                "HadoopJarStep": {
                    "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                    "Args": [
                        "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-plugin.sh",
                        event["ResourceProperties"]["RangerHostname"],
                        event["ResourceProperties"]["RangerVersion"],
                        "s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
                        event["ResourceProperties"][
                            "ProjectVersion"],
                        event["ResourceProperties"]["emrReleaseLabel"],
                        event["ResourceProperties"]["RangerHttpProtocol"],
                        event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
                    ]
                }
            })
            cluster_parameters['Steps'].append({
                "Name": "InstallRangerServiceDef",
                "ActionOnFailure": "CONTINUE",
                "HadoopJarStep": {
                    "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                    "Args": [
                        "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-servicedef.sh",
                        event["ResourceProperties"]["RangerHostname"],
                        "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" +
                        event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                            "ProjectVersion"] + "/inputdata",
                        event["ResourceProperties"]["RangerHttpProtocol"],
                        event["ResourceProperties"]["RangerVersion"],
                        event["ResourceProperties"]["RangerAdminPassword"]
                    ]
                }
            })
            cluster_parameters['Steps'].append({
                "Name": "InstallRangerPolicies",
                "ActionOnFailure": "CONTINUE",
                "HadoopJarStep": {
                    "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                    "Args": [
                        "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-ranger-policies.sh",
                        event["ResourceProperties"]["RangerHostname"],
                        "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" +
                        event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                            "ProjectVersion"] + "/inputdata",
                        event["ResourceProperties"]["RangerHttpProtocol"],
                        event["ResourceProperties"]["RangerVersion"],
                        event["ResourceProperties"]["RangerAdminPassword"]
                    ]
                }
            });

        elif event["ResourceProperties"]["InstallPrivaceraPlugins"] == "true":
            cluster_parameters['Steps'].append({
                "Name": "InstallPrivaceraRangerPlugin",
                "ActionOnFailure": "CONTINUE",
                "HadoopJarStep": {
                    "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                    "Args": [
                        "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-privacera-plugins.sh",
                        event["ResourceProperties"]["PrivaceraPluginURL"]
                    ]
                }
            })

        if event["ResourceProperties"]["InstallCloudWatchAgentForAudit"] == "true":
            cluster_parameters['BootstrapActions'].append(
                {
                    "Name": "Install cloudwatch agent",
                    "ScriptBootstrapAction": {
                        "Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"] + "/scripts/install-cloudwatch-agent.sh"
                    }
                })
        if event["ResourceProperties"]["EMRSecurityConfig"] != "false":
            cluster_parameters['SecurityConfiguration'] = event["ResourceProperties"]["EMRSecurityConfig"]
            cluster_parameters['KerberosAttributes'] = {
                "Realm": event["ResourceProperties"]["KerberosRealm"],
                "KdcAdminPassword": event["ResourceProperties"]["KdcAdminPassword"],
                "CrossRealmTrustPrincipalPassword": event["ResourceProperties"]["CrossRealmTrustPrincipalPassword"],
                "ADDomainJoinUser": event["ResourceProperties"]["ADDomainUser"],
                "ADDomainJoinPassword": event["ResourceProperties"]["ADDomainJoinPassword"]
            }

        if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
            cluster_parameters['Configurations'].append({
                "Classification": "hive-site",
                "Properties": {
                    "hive.server2.thrift.http.port": "10001",
                    "hive.server2.thrift.http.path": "cliservice",
                    "hive.server2.transport.mode": "binary",
                    "hive.server2.allow.user.substitution": "true",
                    "hive.server2.authentication.kerberos.principal": "hive/_HOST@EC2.INTERNAL",
                    "hive.server2.enable.doAs": "false",
                    "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                }
            })
        else:
            cluster_parameters['Configurations'].append({
                "Classification": "hive-site",
                "Properties": {
                    "javax.jdo.option.ConnectionURL": "jdbc:mysql://" + event["ResourceProperties"][
                        "DBHostName"] + ":3306/hive?createDatabaseIfNotExist=true",
                    "javax.jdo.option.ConnectionDriverName": "org.mariadb.jdbc.Driver",
                    "javax.jdo.option.ConnectionUserName": event["ResourceProperties"]["DBUserName"],
                    "javax.jdo.option.ConnectionPassword": event["ResourceProperties"]["DBRootPassword"],
                    "hive.server2.thrift.http.port": "10001",
                    "hive.server2.thrift.http.path": "cliservice",
                    "hive.server2.transport.mode": "binary",
                    "hive.server2.allow.user.substitution": "true",
                    "hive.server2.authentication.kerberos.principal": "hive/_HOST@EC2.INTERNAL",
                    "hive.server2.enable.doAs": "false"
                }
            })

        # ## If Hive LDAP
        #     cluster_parameters['Configurations'].append({
        #         "Classification": "hive-site",
        #         "Properties": {
        #             "hive.server2.authentication": "LDAP",
        #             "hive.server2.authentication.ldap.url": "ldap://" + event["ResourceProperties"][
        #                 "LDAPHostPrivateIP"],
        #             "hive.server2.authentication.ldap.baseDN": event["ResourceProperties"]["LDAPGroupSearchBase"]
        #         }
        #     })
        cluster_parameters['Configurations'].append(
            {
                "Classification": "core-site",
                "Properties": {
                    # "hadoop.security.group.mapping": "org.apache.hadoop.security.LdapGroupsMapping",
                    # "hadoop.security.group.mapping.ldap.bind.user": event["ResourceProperties"]["ADDomainUser"],
                    # "hadoop.security.group.mapping.ldap.bind.password": event["ResourceProperties"]["ADDomainJoinPassword"],
                    # "hadoop.security.group.mapping.ldap.url": "ldap://" + event["ResourceProperties"]["LDAPHostPrivateIP"],
                    # "hadoop.security.group.mapping.ldap.base": event["ResourceProperties"]["LDAPGroupSearchBase"],
                    # "hadoop.security.group.mapping.ldap.search.filter.user": "(objectclass=*)",
                    # "hadoop.security.group.mapping.ldap.search.filter.group": "(objectclass=*)",
                    # "hadoop.security.group.mapping.ldap.search.attr.member": "member",
                    # "hadoop.security.group.mapping.ldap.search.attr.group.name": "cn",
                    "hadoop.proxyuser.knox.groups": "*",
                    "hadoop.proxyuser.knox.hosts": "*",
                    "hadoop.proxyuser.livy.groups": "*",
                    "hadoop.proxyuser.livy.hosts": "*",
                    "hadoop.proxyuser.hive.hosts": "*",
                    "hadoop.proxyuser.hive.groups": "*",
                    "hadoop.proxyuser.hue_hive.groups": "*",
                    "hadoop.proxyuser.presto.hosts": "*",
                    "hadoop.proxyuser.presto.groups": "*",
                    "hadoop.proxyuser.hbase.hosts": "*",
                    "hadoop.proxyuser.hbase.groups": "*"
                }
            })

        if isPrestoAppRequested:
            if event["ResourceProperties"]["EnablePrestoKerberos"] == "true":
                # if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "false":
                cluster_parameters['BootstrapActions'].append(
                    {
                        "Name": "Setup Presto Kerberos",
                        "ScriptBootstrapAction": {
                            "Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
                                "S3Key"] + "/" + event["ResourceProperties"][
                                        "ProjectVersion"] + "/scripts/configure_presto_kerberos_ba.sh",
                            "Args": [
                                "s3://" + event["ResourceProperties"]["S3Bucket"] + "/" + event["ResourceProperties"][
                                    "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"],
                                event["ResourceProperties"]["KdcAdminPassword"],
                                prestoEngineRequested,
                                'glue' if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == 'true' else 'database'
                            ]
                        }
                    })
                cluster_parameters['Steps'].append({
                    "Name": "PrestoSSLUpdate",
                    "ActionOnFailure": "CONTINUE",
                    "HadoopJarStep": {
                        "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                        "Args": [
                            "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/presto-cli-kerberos_fix.sh",
                            event["ResourceProperties"]["emrReleaseLabel"],
                            prestoEngineRequested
                        ]
                    }
                })
                cluster_parameters['Steps'].append({
                    "Name": "UpdateHueConfigurationForPrestoSSL",
                    "ActionOnFailure": "CONTINUE",
                    "HadoopJarStep": {
                        "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                        "Args": [
                            "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/hue-update.sh",
                            event["ResourceProperties"]["emrReleaseLabel"],
                            prestoEngineRequested
                        ]
                    }
                })

            if event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
                if prestoEngineRequested == "PrestoSQL":
                    cluster_parameters['Configurations'].append(
                        {
                            "Classification": "prestosql-connector-hive",
                            "Properties": {
                                "hive.metastore": "glue"
                            }
                        });
                else:
                    cluster_parameters['Configurations'].append(
                        {
                            "Classification": "presto-connector-hive",
                            "Properties": {
                                "hive.metastore": "glue"
                            }
                        });
        if isSparkAppRequested and event["ResourceProperties"]["UseAWSGlueForHiveMetastore"] == "true":
            cluster_parameters['Configurations'].append(
                {
                    "Classification": "spark-hive-site",
                    "Properties": {
                        "hive.server2.enable.doAs": "true",
                        "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
                    }
                });

        if event["ResourceProperties"]["InstallHBasePlugin"] == "true":
            cluster_parameters['Configurations'].append(
                {
                    "Classification": "hbase-site",
                    "Properties": {
                        "hbase.superuser": "hbase"
                    }
                });
            cluster_parameters['BootstrapActions'].append(
                {
                    "Name": "Install Ranger HBase Plugin",
                    "ScriptBootstrapAction": {
                        "Path": "s3://" + s3Bucket + "/" + event["ResourceProperties"][
                            "S3Key"] + "/" + event["ResourceProperties"][
                                    "ProjectVersion"] + "/scripts/install-hbase-plugin-ba.sh",
                        "Args": [
                            event["ResourceProperties"]["RangerHostname"],
                            event["ResourceProperties"]["RangerVersion"],
                            "s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
                            event["ResourceProperties"][
                                "ProjectVersion"],
                            event["ResourceProperties"]["emrReleaseLabel"],
                            event["ResourceProperties"]["RangerHttpProtocol"],
                            event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
                        ]
                    }
                })
            # cluster_parameters['Steps'].append({
            #     "Name": "InstallRangerHBasePlugin",
            #     "ActionOnFailure": "CONTINUE",
            #     "HadoopJarStep": {
            #         "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
            #         "Args": [
            #             "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hbase-ranger-plugin.sh",
            #             event["ResourceProperties"]["RangerHostname"],
            #             event["ResourceProperties"]["RangerVersion"],
            #             "s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
            #             event["ResourceProperties"][
            #                 "ProjectVersion"],
            #             event["ResourceProperties"]["emrReleaseLabel"],
            #             event["ResourceProperties"]["RangerHttpProtocol"],
            #             event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
            #         ]
            #     }
            # })

        if isPrestoAppRequested and event["ResourceProperties"]["InstallPrestoPlugin"] == "true":
            cluster_parameters['Steps'].append({
                "Name": "InstallRangerPrestoPlugin",
                "ActionOnFailure": "CONTINUE",
                "HadoopJarStep": {
                    "Jar": "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
                    "Args": [
                        "/mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-plugin.sh",
                        event["ResourceProperties"]["RangerHostname"],
                        event["ResourceProperties"]["RangerVersion"],
                        "s3://" + s3Bucket + "/" + event["ResourceProperties"]["S3Key"],
                        event["ResourceProperties"][
                            "ProjectVersion"],
                        event["ResourceProperties"]["emrReleaseLabel"],
                        prestoEngineRequested,
                        event["ResourceProperties"]["RangerHttpProtocol"],
                        event["ResourceProperties"]["InstallCloudWatchAgentForAudit"]
                    ]
                }
            })
        cluster_id = client.run_job_flow(**cluster_parameters)

        physical_resource_id = cluster_id["JobFlowId"]
        response_data = {
            "ClusterID": cluster_id["JobFlowId"]
        }
        return physical_resource_id, response_data

    except Exception as E:
        raise