in utilities/Hive_metastore_migration/src/hive_metastore_migration.py [0:0]
def transform_storage_descriptors(self, ms_sds, ms_sd_params, ms_columns, ms_bucketing_cols, ms_serdes,
ms_serde_params, ms_skewed_col_names, ms_skewed_string_list_values,
ms_skewed_col_value_loc_map, ms_sort_cols):
bucket_columns = self.transform_ms_bucketing_cols(ms_bucketing_cols)
columns = self.transform_ms_columns(ms_columns)
parameters = self.transform_params(params_df=ms_sd_params, id_col='SD_ID')
serde_info = self.transform_ms_serde_info(ms_serdes=ms_serdes, ms_serde_params=ms_serde_params)
skewed_info = self.transform_skewed_info(ms_skewed_col_names=ms_skewed_col_names,
ms_skewed_string_list_values=ms_skewed_string_list_values,
ms_skewed_col_value_loc_map=ms_skewed_col_value_loc_map)
sort_columns = self.transform_ms_sort_cols(ms_sort_cols)
storage_descriptors_joined = ms_sds \
.join(other=bucket_columns, on='SD_ID', how='left_outer') \
.join(other=columns, on='CD_ID', how='left_outer') \
.join(other=parameters, on='SD_ID', how='left_outer') \
.join_other_to_single_column(other=serde_info, on='SERDE_ID', how='left_outer',
new_column_name='serdeInfo') \
.join_other_to_single_column(other=skewed_info, on='SD_ID', how='left_outer',
new_column_name='skewedInfo') \
.join(other=sort_columns, on='SD_ID', how='left_outer')
storage_descriptors_s3_location_fixed = \
HiveMetastoreTransformer.s3a_or_s3n_to_s3_in_location(storage_descriptors_joined, 'LOCATION')
storage_descriptors_renamed = storage_descriptors_s3_location_fixed.rename_columns(rename_tuples=[
('INPUT_FORMAT', 'inputFormat'),
('OUTPUT_FORMAT', 'outputFormat'),
('LOCATION', 'location'),
('NUM_BUCKETS', 'numberOfBuckets'),
('IS_COMPRESSED', 'compressed'),
('IS_STOREDASSUBDIRECTORIES', 'storedAsSubDirectories')
])
storage_descriptors_with_empty_sorted_cols = HiveMetastoreTransformer.fill_none_with_empty_list(
storage_descriptors_renamed, 'sortColumns')
storage_descriptors_final = storage_descriptors_with_empty_sorted_cols.drop_columns(['SERDE_ID', 'CD_ID'])
return storage_descriptors_final