def add_rstrip_to_comp_fields()

in data_validation/config_manager.py [0:0]


    def add_rstrip_to_comp_fields(self, comparison_fields: List[str]) -> List[str]:
        """As per #1190, add an rstrip calculated field for Teradata string comparison fields.

        Parameters:
            comparison_fields: List[str] of comparison field columns
        Returns:
            comp_fields_with_aliases: List[str] of comparison field columns with rstrip aliases
        """
        source_table = self.get_source_ibis_calculated_table()
        target_table = self.get_target_ibis_calculated_table()
        source_table_schema = {k: v for k, v in source_table.schema().items()}
        target_table_schema = {k: v for k, v in target_table.schema().items()}
        casefold_source_columns = {x.casefold(): str(x) for x in source_table.columns}
        casefold_target_columns = {x.casefold(): str(x) for x in target_table.columns}

        comp_fields_with_aliases = []
        calculated_configs = []
        for field in comparison_fields:
            if field.casefold() not in casefold_source_columns:
                raise ValueError(f"Column DNE in source: {field}")
            if field.casefold() not in casefold_target_columns:
                raise ValueError(f"Column DNE in target: {field}")

            source_ibis_type = source_table[
                casefold_source_columns[field.casefold()]
            ].type()
            target_ibis_type = target_table[
                casefold_target_columns[field.casefold()]
            ].type()

            if (
                source_ibis_type.is_string() or target_ibis_type.is_string()
            ) and not self._comp_field_cast(
                # Do not add rstrip if the column is a bool or UUID hiding in a string.
                source_table_schema,
                target_table_schema,
                field,
            ):
                logging.info(
                    f"Adding rtrim() to string comparison field `{field.casefold()}` due to Teradata CHAR padding."
                )
                alias = f"rstrip__{field.casefold()}"
                calculated_configs.append(
                    self.build_config_calculated_fields(
                        [casefold_source_columns[field.casefold()]],
                        [casefold_target_columns[field.casefold()]],
                        consts.CALC_FIELD_RSTRIP,
                        alias,
                        0,
                    )
                )
                comp_fields_with_aliases.append(alias)
            else:
                comp_fields_with_aliases.append(field)

        self.append_calculated_fields(calculated_configs)
        return comp_fields_with_aliases