in data_validation/config_manager.py [0:0]
def add_rstrip_to_comp_fields(self, comparison_fields: List[str]) -> List[str]:
"""As per #1190, add an rstrip calculated field for Teradata string comparison fields.
Parameters:
comparison_fields: List[str] of comparison field columns
Returns:
comp_fields_with_aliases: List[str] of comparison field columns with rstrip aliases
"""
source_table = self.get_source_ibis_calculated_table()
target_table = self.get_target_ibis_calculated_table()
source_table_schema = {k: v for k, v in source_table.schema().items()}
target_table_schema = {k: v for k, v in target_table.schema().items()}
casefold_source_columns = {x.casefold(): str(x) for x in source_table.columns}
casefold_target_columns = {x.casefold(): str(x) for x in target_table.columns}
comp_fields_with_aliases = []
calculated_configs = []
for field in comparison_fields:
if field.casefold() not in casefold_source_columns:
raise ValueError(f"Column DNE in source: {field}")
if field.casefold() not in casefold_target_columns:
raise ValueError(f"Column DNE in target: {field}")
source_ibis_type = source_table[
casefold_source_columns[field.casefold()]
].type()
target_ibis_type = target_table[
casefold_target_columns[field.casefold()]
].type()
if (
source_ibis_type.is_string() or target_ibis_type.is_string()
) and not self._comp_field_cast(
# Do not add rstrip if the column is a bool or UUID hiding in a string.
source_table_schema,
target_table_schema,
field,
):
logging.info(
f"Adding rtrim() to string comparison field `{field.casefold()}` due to Teradata CHAR padding."
)
alias = f"rstrip__{field.casefold()}"
calculated_configs.append(
self.build_config_calculated_fields(
[casefold_source_columns[field.casefold()]],
[casefold_target_columns[field.casefold()]],
consts.CALC_FIELD_RSTRIP,
alias,
0,
)
)
comp_fields_with_aliases.append(alias)
else:
comp_fields_with_aliases.append(field)
self.append_calculated_fields(calculated_configs)
return comp_fields_with_aliases