in src/graph_notebook/notebooks/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py [0:0]
def setupgluejob(self):
self.glue_client.create_database(
DatabaseInput={
"Name": self.glue_database_name,
"Description": "Database to define tables for glue jobs"
}
)
table_descriptions = {
"demographics": [
{"Name": "id", "Type": "string"},
{"Name": "name", "Type": "string"},
{"Name": "phone", "Type": "string"},
{"Name": "email", "Type": "string"},
{"Name": "city", "Type": "string"},
{"Name": "state", "Type": "string"},
{"Name": "country", "Type": "string"},
{"Name": "pincode", "Type": "string"},
{"Name": "address", "Type": "string"},
{"Name": "joinedDate", "Type": "string"},
{"Name": "updatedDate", "Type": "string"}
],
"telemetry": [
{"Name": "session_id", "Type": "string"},
{"Name": "user_id", "Type": "string"},
{"Name": "user_agent", "Type": "string"},
{"Name": "ip_address", "Type": "string"},
{"Name": "siteid", "Type": "string"},
{"Name": "pageid", "Type": "string"},
{"Name": "session_start", "Type": "string"}
],
"transactions": [
{"Name": "transaction_id", "Type": "string"},
{"Name": "user_id", "Type": "string"},
{"Name": "product_id", "Type": "string"},
{"Name": "product_name", "Type": "string"},
{"Name": "purchased_date", "Type": "string"},
{"Name": "review", "Type": "string"}
]
}
for job in self.jobs:
self.s3.meta.client.upload_file('source/' + job + "/" + job + '.csv', self.s3_bucket,
'data/' + job + "/" + job + '.csv')
self.s3.meta.client.upload_file('script/neptune-glue-' + job + '.py', self.s3_bucket,
'script/neptune-glue-' + job + self.etlformatted + '.py')
self.s3.meta.client.upload_file('lib/neptune_python_utils.zip', self.s3_bucket, 'lib/neptune_python_utils.zip')
for job in self.jobs:
self.glue_client.create_table(
DatabaseName=self.glue_database_name,
TableInput={
'Name': job,
'Description': job,
'StorageDescriptor': {
"Columns": table_descriptions[job],
"Location": "s3://" + self.s3_bucket + "/data/" + job + "/",
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"SerdeInfo": {
"SerializationLibrary": "org.apache.hadoop.hive.serde2.OpenCSVSerde",
"Parameters": {
"separatorChar": ",",
"quoteChar": "\""
}
}
},
"Parameters": {
"skip.header.line.count": "1"
}
}
)