def setupgluejob()

in src/graph_notebook/notebooks/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py [0:0]


    def setupgluejob(self):
    
        self.glue_client.create_database(
            DatabaseInput={
                "Name": self.glue_database_name,
                "Description": "Database to define tables for glue jobs"
            }
        )
        
        table_descriptions = {
            "demographics": [ 
                        {"Name": "id", "Type": "string"},
                        {"Name": "name", "Type": "string"},
                        {"Name": "phone", "Type": "string"},
                        {"Name": "email", "Type": "string"},
                        {"Name": "city", "Type": "string"},
                        {"Name": "state", "Type": "string"},
                        {"Name": "country", "Type": "string"},
                        {"Name": "pincode", "Type": "string"},
                        {"Name": "address", "Type": "string"},
                        {"Name": "joinedDate", "Type": "string"},
                        {"Name": "updatedDate", "Type": "string"}
            ],
            "telemetry": [ 
                        {"Name": "session_id", "Type": "string"},
                        {"Name": "user_id", "Type": "string"},
                        {"Name": "user_agent", "Type": "string"},
                        {"Name": "ip_address", "Type": "string"},
                        {"Name": "siteid", "Type": "string"},
                        {"Name": "pageid", "Type": "string"},
                        {"Name": "session_start", "Type": "string"}
            ],
            "transactions": [
                        {"Name": "transaction_id", "Type": "string"},
                        {"Name": "user_id", "Type": "string"},
                        {"Name": "product_id", "Type": "string"},
                        {"Name": "product_name", "Type": "string"},
                        {"Name": "purchased_date", "Type": "string"},
                        {"Name": "review", "Type": "string"}
            ]     
        }
        
        for job in self.jobs:
            self.s3.meta.client.upload_file('source/' + job + "/" + job + '.csv', self.s3_bucket,
                                            'data/' + job + "/" + job + '.csv')
            self.s3.meta.client.upload_file('script/neptune-glue-' + job + '.py', self.s3_bucket,
                                            'script/neptune-glue-' + job + self.etlformatted + '.py')
            
        self.s3.meta.client.upload_file('lib/neptune_python_utils.zip', self.s3_bucket, 'lib/neptune_python_utils.zip') 
            
        for job in self.jobs:
            self.glue_client.create_table(
                DatabaseName=self.glue_database_name,
                TableInput={
                    'Name': job,
                    'Description': job,
                    'StorageDescriptor': {
                        "Columns": table_descriptions[job],
                        "Location": "s3://" + self.s3_bucket + "/data/" + job + "/",
                        "InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
                        "SerdeInfo": {
                            "SerializationLibrary": "org.apache.hadoop.hive.serde2.OpenCSVSerde",
                            "Parameters": {
                                "separatorChar": ",",
                                "quoteChar": "\""
                            }
                        }
                    },
                    "Parameters": {
                        "skip.header.line.count": "1"
                    }
                }
            )