def write_partitions()

in backend/lambdas/tasks/generate_queries.py [0:0]


def write_partitions(partitions):
    """
    In order for the manifests to be used by Athena in a JOIN, we make them
    available as partitions with Job and DataMapperId tuple.
    """
    max_create_batch_size = 100
    for i in range(0, len(partitions), max_create_batch_size):
        glue_client.batch_create_partition(
            DatabaseName=glue_db,
            TableName=glue_table,
            PartitionInputList=[
                {
                    "Values": partition_tuple,
                    "StorageDescriptor": {
                        "Columns": [
                            {"Name": "columns", "Type": "array<string>"},
                            {"Name": "matchid", "Type": "array<string>"},
                            {"Name": "deletionqueueitemid", "Type": "string"},
                            {"Name": "createdat", "Type": "int"},
                            {"Name": "queryablecolumns", "Type": "string"},
                            {"Name": "queryablematchid", "Type": "string"},
                        ],
                        "Location": "s3://{}/manifests/{}/{}/".format(
                            manifests_bucket_name,
                            partition_tuple[0],
                            partition_tuple[1],
                        ),
                        "InputFormat": "org.apache.hadoop.mapred.TextInputFormat",
                        "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
                        "Compressed": False,
                        "SerdeInfo": {
                            "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe",
                        },
                        "StoredAsSubDirectories": False,
                    },
                }
                for partition_tuple in partitions[i : i + max_create_batch_size]
            ],
        )