in backend/lambdas/tasks/generate_queries.py [0:0]
def write_partitions(partitions):
"""
In order for the manifests to be used by Athena in a JOIN, we make them
available as partitions with Job and DataMapperId tuple.
"""
max_create_batch_size = 100
for i in range(0, len(partitions), max_create_batch_size):
glue_client.batch_create_partition(
DatabaseName=glue_db,
TableName=glue_table,
PartitionInputList=[
{
"Values": partition_tuple,
"StorageDescriptor": {
"Columns": [
{"Name": "columns", "Type": "array<string>"},
{"Name": "matchid", "Type": "array<string>"},
{"Name": "deletionqueueitemid", "Type": "string"},
{"Name": "createdat", "Type": "int"},
{"Name": "queryablecolumns", "Type": "string"},
{"Name": "queryablematchid", "Type": "string"},
],
"Location": "s3://{}/manifests/{}/{}/".format(
manifests_bucket_name,
partition_tuple[0],
partition_tuple[1],
),
"InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"Compressed": False,
"SerdeInfo": {
"SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe",
},
"StoredAsSubDirectories": False,
},
}
for partition_tuple in partitions[i : i + max_create_batch_size]
],
)