in datascan/bulk-creation-scripts/dataquality /lib.py [0:0]
def validateConfigFile(config_path) -> list:
"""
Method to valide the Config File
return: configs
"""
# load the config file
with open(config_path, 'r') as f:
config_file = list(yaml.load_all(f, Loader=LineNumberLoader))
# validate the config file
for config in config_file:
if not {'projectId', 'locationId', 'bqTable', 'dataQualitySpec'} <= config.keys():
raise ValueError(
"Config file must define all the required config fields: "
"'projectId', 'locationId', 'bqTable', 'dataQualitySpec') at line ",config.get('__line__')
)
if not 'rules' in config['dataQualitySpec']:
raise ValueError(
"Config file must define at least 1 rule for the block at line ",config.get('__line__')
)
# validate format for bqTable
full_table_name = config['bqTable']
if not re.match(r'^[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+$', full_table_name):
raise ValueError(
f"bqTable - {full_table_name} does not match the expected format 'project_id.dataset_id.table_id'"
"at line ", config.get('__line__')
)
# validate nested fields
validateConfigFields(config)
configs = [removeLineKeys(config) for config in config_file]
return configs