in data_validation/__main__.py [0:0]
def build_config_from_args(args: "Namespace", config_manager: ConfigManager):
"""This function is used to append build configs to the config manager for all validation commands and generate-table-partitions.
Instead of having two separate commands, e.g. validate row and validate custom-query row, generate-table-partitions
uses implicit choice of table or custom-query. A user can specify either tables or source/target query/file,
but not both. In the case of generate-table-partitions with custom query, the user will not provide
args.custom_query_type. However, the code will inject args.custom_query_type as 'row' before invoking
build_config_from_args.
Args:
args (Namespace): User specified Arguments
config_manager (ConfigManager): Validation config manager instance.
"""
# Append SCHEMA_VALIDATION configs
if config_manager.validation_type == consts.SCHEMA_VALIDATION:
if args.exclusion_columns:
exclusion_columns = cli_tools.get_arg_list(args.exclusion_columns)
config_manager.append_exclusion_columns(
[col.casefold() for col in exclusion_columns]
)
config_manager.append_allow_list(args.allow_list, args.allow_list_file)
# Append configs specific to CUSTOM_QUERY (i.e. query strings or strings from files)
if config_manager.validation_type == consts.CUSTOM_QUERY:
config_manager.append_custom_query_type(args.custom_query_type)
# Get source sql query from source sql file or inline query
config_manager.append_source_query(
cli_tools.get_query_from_query_args(
args.source_query, args.source_query_file
)
)
# Get target sql query from target sql file or inline query
config_manager.append_target_query(
cli_tools.get_query_from_query_args(
args.target_query, args.target_query_file
)
)
# Append COLUMN_VALIDATION configs, including custom-query column validation
if (
config_manager.validation_type == consts.COLUMN_VALIDATION
or config_manager.validation_type == consts.CUSTOM_QUERY
and args.custom_query_type == consts.COLUMN_VALIDATION.lower()
):
config_manager.append_aggregates(get_aggregate_config(args, config_manager))
if (
config_manager.validation_type == consts.COLUMN_VALIDATION
and args.grouped_columns # grouped_columns not supported in custom queries - at least now.
):
grouped_columns = cli_tools.get_arg_list(args.grouped_columns)
config_manager.append_query_groups(
config_manager.build_column_configs(grouped_columns)
)
# Append ROW_VALIDATION configs, including custom-query row validation
if (
config_manager.validation_type == consts.ROW_VALIDATION
or config_manager.validation_type == consts.CUSTOM_QUERY
and args.custom_query_type == consts.ROW_VALIDATION.lower()
):
# Append calculated fields: --hash/--concat
config_manager.append_calculated_fields(
_get_calculated_config(args, config_manager)
)
# Append primary_keys
primary_keys = cli_tools.get_arg_list(args.primary_keys)
if not primary_keys and config_manager.validation_type != consts.CUSTOM_QUERY:
primary_keys = config_manager.auto_list_primary_keys()
if not primary_keys:
raise ValueError(
"No primary keys were provided and neither the source or target tables have primary keys. Please include --primary-keys argument"
)
primary_keys = [_.casefold() for _ in primary_keys]
config_manager.append_primary_keys(
config_manager.build_column_configs(primary_keys)
)
# Append Comparison fields
if args.comparison_fields:
config_manager.append_comparison_fields(
_get_comparison_config(args, config_manager, primary_keys)
)
return config_manager