public static createGlueInvTable()

in packages/utilities/s3-inventory-helper/lib/index.ts [75:202]


  public static createGlueInvTable(
    scope: Construct,
    catalogId: string,
    tableNamePrefix: string,
    database: Database,
    locationBucketName: string,
    bucketInventories: BucketInventory[],
    locationPrefix?: string,
  ): CfnTable {
    const location = locationPrefix
      ? `s3://${locationBucketName}/${MdaaBucket.formatS3Prefix(locationPrefix)}/`
      : `s3://${locationBucketName}/`;
    const tableInput = {
      name: `${tableNamePrefix}_inv`.replace(/-/gi, '_'),
      parameters: {
        EXTERNAL: 'TRUE',
        'projection.enabled': 'true',
        'projection.dt.type': 'date',
        'projection.dt.range': '2022-01-01-00-00,NOW',
        'projection.dt.format': 'yyyy-MM-dd-HH-mm',
        'projection.dt.interval': '1',
        'projection.dt.interval.unit': 'HOURS',
        'projection.bucket_inventory.type': 'enum',
        'projection.bucket_inventory.values': bucketInventories
          .map(x => `${x.bucketName}/${x.inventoryName}`)
          .join(','),
        'storage.location.template': location + '${bucket_inventory}/hive/dt=${dt}',
      },
      tableType: 'EXTERNAL_TABLE',
      partitionKeys: [
        {
          name: 'bucket_inventory',
          type: 'string',
        },
        {
          name: 'dt',
          type: 'string',
        },
      ],
      storageDescriptor: {
        columns: [
          {
            name: 'bucket',
            type: 'string',
          },
          {
            name: 'key',
            type: 'string',
          },
          {
            name: 'version_id',
            type: 'string',
          },
          {
            name: 'is_latest',
            type: 'boolean',
          },
          {
            name: 'is_delete_marker',
            type: 'boolean',
          },
          {
            name: 'size',
            type: 'bigint',
          },
          {
            name: 'last_modified_date',
            type: 'timestamp',
          },
          {
            name: 'e_tag',
            type: 'string',
          },
          {
            name: 'storage_class',
            type: 'string',
          },
          {
            name: 'is_multipart_uploaded',
            type: 'boolean',
          },
          {
            name: 'replication_status',
            type: 'string',
          },
          {
            name: 'encryption_status',
            type: 'string',
          },
          {
            name: 'intelligent_tiering_tier',
            type: 'string',
          },
          // ,
          // {
          //     name: "object_lock_retain_until_date",
          //     type: "timestamp"
          // },
          // {
          //     name: "object_lock_mode",
          //     type: "string"
          // },
          // {
          //     name: "object_lock_legal_hold_status",
          //     type: "string"
          // }
        ],
        location: location,
        inputFormat: 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat',
        outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
        compressed: false,
        numberOfBuckets: -1,
        serdeInfo: {
          serializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe',
          parameters: {
            'serialization.format': '1',
          },
        },
      },
    };

    const tableProps: CfnTableProps = {
      catalogId: catalogId,
      databaseName: database.databaseName,
      tableInput: tableInput,
    };
    return new CfnTable(scope, `inv-table-${tableNamePrefix}`, tableProps);
  }