data_validation/consts.py (190 lines of code) (raw):

# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Configuration Fields # These are all keys into the config dict. Where these values come from the command line, these would # match with the command line argument - so --secret-manager-type value would be inserted with the # 'secret_manager_type' key. # This is true for the most part, except where a typo gets introduced, i.e. --use-random-row became use_random_rows # Fixing this would be a breaking change. A future cleanup to prevent this would be to use # f'--{config_constant.repl('_','-')}' as the name argument to argparse. SOURCE_TYPE = "source_type" SECRET_MANAGER_TYPE = "secret_manager_type" SECRET_MANAGER_PROJECT_ID = "secret_manager_project_id" CONFIG = "config" CONFIG_FILE = "config_file" CONFIG_FILE_JSON = "config_file_json" CONFIG_SOURCE_CONN_NAME = "source_conn_name" CONFIG_TARGET_CONN_NAME = "target_conn_name" CONFIG_SOURCE_CONN = "source_conn" CONFIG_TARGET_CONN = "target_conn" CONFIG_TYPE = "type" CONFIG_DEFAULT_CAST = "default_cast" CONFIG_CUSTOM = "custom" CONFIG_CUSTOM_IBIS_EXPR = "ibis_expr" CONFIG_CUSTOM_PARAMS = "params" CONFIG_CUSTOM_PARAM_FORMAT_STR = "format_str" CONFIG_SCHEMA_NAME = "schema_name" CONFIG_TABLE_NAME = "table_name" CONFIG_TARGET_SCHEMA_NAME = "target_schema_name" CONFIG_TARGET_TABLE_NAME = "target_table_name" CONFIG_LABELS = "labels" CONFIG_COMPARISON_FIELDS = "comparison_fields" CONFIG_FIELD_ALIAS = "field_alias" CONFIG_AGGREGATES = "aggregates" CONFIG_CALCULATED_FIELDS = "calculated_fields" CONFIG_GROUPED_COLUMNS = "grouped_columns" CONFIG_CALCULATED_SOURCE_COLUMNS = "source_calculated_columns" CONFIG_CALCULATED_TARGET_COLUMNS = "target_calculated_columns" CONFIG_USE_RANDOM_ROWS = "use_random_rows" CONFIG_RANDOM_ROW_BATCH_SIZE = "random_row_batch_size" CONFIG_PRIMARY_KEYS = "primary_keys" CONFIG_TRIM_STRING_PKS = "trim_string_pks" CONFIG_CASE_INSENSITIVE_MATCH = "case_insensitive_match" CONFIG_ROW_CONCAT = "concat" CONFIG_ROW_HASH = "hash" CONFIG_RUN_ID = "run_id" CONFIG_START_TIME = "start_time" CONFIG_END_TIME = "end_time" CONFIG_SOURCE_COLUMN = "source_column" CONFIG_TARGET_COLUMN = "target_column" CONFIG_THRESHOLD = "threshold" CONFIG_CAST = "cast" CONFIG_CAST_BOOL_STRING = "bool_string" CONFIG_CAST_UUID_STRING = "uuid_string" CONFIG_DEPTH = "depth" CONFIG_FORMAT = "format" CONFIG_LIMIT = "limit" CONFIG_FILTERS = "filters" CONFIG_FILTER_SOURCE = "source" CONFIG_FILTER_TARGET = "target" CONFIG_MAX_RECURSIVE_QUERY_SIZE = "max_recursive_query_size" CONFIG_SOURCE_QUERY = "source_query" CONFIG_SOURCE_QUERY_FILE = "source_query_file" CONFIG_TARGET_QUERY = "target_query" CONFIG_TARGET_QUERY_FILE = "target_query_file" CONFIG_CUSTOM_QUERY_TYPE = "custom_query_type" CONFIG_FILTER_SOURCE_COLUMN = "source_column" CONFIG_FILTER_SOURCE_VALUE = "source_value" CONFIG_FILTER_TARGET_COLUMN = "target_column" CONFIG_FILTER_TARGET_VALUE = "target_value" CONFIG_EXCLUSION_COLUMNS = "exclusion_columns" CONFIG_ALLOW_LIST = "allow_list" CONFIG_FILTER_STATUS = "filter_status" CONFIG_RESULT_HANDLER = "result_handler" CONFIG_TYPE_COUNT = "count" CONFIG_TYPE_SUM = "sum" # Default values DEFAULT_NUM_RANDOM_ROWS = 10000 # Filter Type Options FILTER_TYPE_CUSTOM = "custom" FILTER_TYPE_EQUALS = "equals" FILTER_TYPE_ISIN = "isin" # Validation Types COLUMN_VALIDATION = "Column" GROUPED_COLUMN_VALIDATION = "GroupedColumn" ROW_VALIDATION = "Row" SCHEMA_VALIDATION = "Schema" CUSTOM_QUERY = "Custom-query" CONFIG_TYPES = [ COLUMN_VALIDATION, GROUPED_COLUMN_VALIDATION, ROW_VALIDATION, SCHEMA_VALIDATION, CUSTOM_QUERY, ] # State Manager Fields DEFAULT_ENV_DIRECTORY = "~/.config/google-pso-data-validator/" ENV_DIRECTORY_VAR = "PSO_DV_CONN_HOME" # Yaml File Config Fields YAML_RESULT_HANDLER = "result_handler" YAML_SOURCE = "source" YAML_TARGET = "target" YAML_VALIDATIONS = "validations" # Connection key constants. SOURCE_TYPE_BIGQUERY = "BigQuery" SOURCE_TYPE_DB2 = "DB2" SOURCE_TYPE_FILESYSTEM = "FileSystem" SOURCE_TYPE_IMPALA = "Impala" SOURCE_TYPE_MSSQL = "MSSQL" SOURCE_TYPE_MYSQL = "MySQL" SOURCE_TYPE_ORACLE = "Oracle" SOURCE_TYPE_POSTGRES = "Postgres" SOURCE_TYPE_REDSHIFT = "Redshift" SOURCE_TYPE_SNOWFLAKE = "Snowflake" SOURCE_TYPE_SPANNER = "Spanner" SOURCE_TYPE_TERADATA = "Teradata" # BigQuery Result Handler Configs RH_TYPE = "type" RH_CONN = "connection" PROJECT_ID = "project_id" TABLE_ID = "table_id" GOOGLE_SERVICE_ACCOUNT_KEY_PATH = "google_service_account_key_path" API_ENDPOINT = "api_endpoint" # Result Handler Output Table Fields VALIDATION_TYPE = "validation_type" VALIDATION_NAME = "validation_name" AGGREGATION_TYPE = "aggregation_type" GROUP_BY_COLUMNS = "group_by_columns" SOURCE_TABLE_NAME = "source_table_name" SOURCE_COLUMN_NAME = "source_column_name" SOURCE_AGG_VALUE = "source_agg_value" TARGET_TABLE_NAME = "target_table_name" TARGET_COLUMN_NAME = "target_column_name" TARGET_AGG_VALUE = "target_agg_value" VALIDATION_STATUS = "validation_status" VALIDATION_STATUS_SUCCESS = "success" VALIDATION_STATUS_FAIL = "fail" VALIDATION_STATUSES = [ VALIDATION_STATUS_SUCCESS, VALIDATION_STATUS_FAIL, ] VALIDATION_DIFFERENCE = "difference" VALIDATION_PCT_DIFFERENCE = "pct_difference" VALIDATION_PCT_THRESHOLD = "pct_threshold" NUM_RANDOM_ROWS = "num_random_rows" # Summary stats of Row Validation results TOTAL_SOURCE_ROWS = "total_source_rows" TOTAL_TARGET_ROWS = "total_target_rows" TOTAL_ROWS_VALIDATED = "total_rows_validated" TOTAL_ROWS_SUCCESS = "total_rows_success_validation_status" TOTAL_ROWS_FAIL = "total_rows_fail_validation_status" FAILED_SOURCE_NOT_IN_TARGET = "failed_rows_present_in_source_not_in_target" FAILED_TARGET_NOT_IN_SOURCE = "failed_rows_present_in_target_not_in_source" FAILED_PRESENT_IN_BOTH_TABLES = "failed_rows_present_in_both_source_and_target" # Combiner only constants COMBINER_TABLE_NAME = "dvt_table_name" COMBINER_COLUMN_NAME = "dvt_column_name" COMBINER_AGG_VALUE = "dvt_agg_value" # SQL Template Formatting # TODO: should this be managed in query_builder if that is the only place its used? COUNT_STAR = "{count_star}" # Validation metadata RESULT_TYPE_SOURCE = "source" RESULT_TYPE_TARGET = "target" RESULT_TYPE_DIFFERENCES = "differences" FORMAT_TYPE_CSV = "csv" FORMAT_TYPE_JSON = "json" FORMAT_TYPE_MINIMAL = "minimal" FORMAT_TYPE_PYTHON = "python" FORMAT_TYPE_TABLE = "table" FORMAT_TYPE_TEXT = "text" FORMAT_TYPES = [FORMAT_TYPE_CSV, FORMAT_TYPE_JSON, FORMAT_TYPE_TABLE, FORMAT_TYPE_TEXT] RAW_QUERY_FORMAT_TYPES = [FORMAT_TYPE_MINIMAL, FORMAT_TYPE_PYTHON] # Text Result Handler column filter list COLUMN_FILTER_LIST = [ AGGREGATION_TYPE, CONFIG_END_TIME, CONFIG_LABELS, VALIDATION_PCT_THRESHOLD, CONFIG_START_TIME, TARGET_TABLE_NAME, TARGET_COLUMN_NAME, VALIDATION_DIFFERENCE, CONFIG_PRIMARY_KEYS, GROUP_BY_COLUMNS, NUM_RANDOM_ROWS, ] SCHEMA_VALIDATION_COLUMN_FILTER_LIST = [ CONFIG_START_TIME, CONFIG_END_TIME, AGGREGATION_TYPE, VALIDATION_DIFFERENCE, CONFIG_PRIMARY_KEYS, GROUP_BY_COLUMNS, NUM_RANDOM_ROWS, VALIDATION_PCT_THRESHOLD, ] # Constants for the named column used in generate partitions # this cannot conflict with primary key column names DVT_POS_COL = "dvt_pos_num" # Default limit for the number of columns we will attempt in a single validation. MAX_CONCAT_COLUMNS_DEFAULTS = { # Preventing: The concat function requires 2 to 254 arguments "mssql": 254, # Minimizing risk of: ORA-01489: result of string concatenation is too long "oracle": 125, # Preventing: cannot pass more than 100 arguments to a function "postgres": 99, # Minimizing risk of: [Error 3556] Too many columns defined for this table. "teradata": 500, } # CalculatedField expression constants. CALC_FIELD_CAST = "cast" CALC_FIELD_CONCAT = "concat" CALC_FIELD_BYTE_LENGTH = "byte_length" CALC_FIELD_EPOCH_SECONDS = "epoch_seconds" CALC_FIELD_HASH = "hash" CALC_FIELD_IFNULL = "ifnull" CALC_FIELD_LENGTH = "length" CALC_FIELD_PADDED_CHAR_LENGTH = "padded_char_length" CALC_FIELD_RSTRIP = "rstrip" CALC_FIELD_UPPER = "upper"