def transform_storage_descriptors()

in utilities/Hive_metastore_migration/src/hive_metastore_migration.py [0:0]


    def transform_storage_descriptors(self, ms_sds, ms_sd_params, ms_columns, ms_bucketing_cols, ms_serdes,
                                      ms_serde_params, ms_skewed_col_names, ms_skewed_string_list_values,
                                      ms_skewed_col_value_loc_map, ms_sort_cols):
        bucket_columns = self.transform_ms_bucketing_cols(ms_bucketing_cols)
        columns = self.transform_ms_columns(ms_columns)
        parameters = self.transform_params(params_df=ms_sd_params, id_col='SD_ID')
        serde_info = self.transform_ms_serde_info(ms_serdes=ms_serdes, ms_serde_params=ms_serde_params)
        skewed_info = self.transform_skewed_info(ms_skewed_col_names=ms_skewed_col_names,
                                                 ms_skewed_string_list_values=ms_skewed_string_list_values,
                                                 ms_skewed_col_value_loc_map=ms_skewed_col_value_loc_map)
        sort_columns = self.transform_ms_sort_cols(ms_sort_cols)

        storage_descriptors_joined = ms_sds \
            .join(other=bucket_columns, on='SD_ID', how='left_outer') \
            .join(other=columns, on='CD_ID', how='left_outer') \
            .join(other=parameters, on='SD_ID', how='left_outer') \
            .join_other_to_single_column(other=serde_info, on='SERDE_ID', how='left_outer',
                                         new_column_name='serdeInfo') \
            .join_other_to_single_column(other=skewed_info, on='SD_ID', how='left_outer',
                                         new_column_name='skewedInfo') \
            .join(other=sort_columns, on='SD_ID', how='left_outer')

        storage_descriptors_s3_location_fixed = \
            HiveMetastoreTransformer.s3a_or_s3n_to_s3_in_location(storage_descriptors_joined, 'LOCATION')
        storage_descriptors_renamed = storage_descriptors_s3_location_fixed.rename_columns(rename_tuples=[
            ('INPUT_FORMAT', 'inputFormat'),
            ('OUTPUT_FORMAT', 'outputFormat'),
            ('LOCATION', 'location'),
            ('NUM_BUCKETS', 'numberOfBuckets'),
            ('IS_COMPRESSED', 'compressed'),
            ('IS_STOREDASSUBDIRECTORIES', 'storedAsSubDirectories')
        ])

        storage_descriptors_with_empty_sorted_cols = HiveMetastoreTransformer.fill_none_with_empty_list(
            storage_descriptors_renamed, 'sortColumns')
        storage_descriptors_final = storage_descriptors_with_empty_sorted_cols.drop_columns(['SERDE_ID', 'CD_ID'])
        return storage_descriptors_final