data_validation/metadata.py (48 lines of code) (raw):
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Metadata classes with data about the validation run."""
import dataclasses
import datetime
import typing
from data_validation import consts
@dataclasses.dataclass
class ValidationMetadata(object):
validation_type: str
aggregation_type: str
source_table_schema: str
source_table_name: str
target_table_schema: str
target_table_name: str
source_column_name: str
target_column_name: str
primary_keys: list
num_random_rows: int
threshold: float
def get_table_name(self, result_type: str) -> str:
if result_type == consts.RESULT_TYPE_SOURCE:
return (
self.source_table_schema + "." + self.source_table_name
if self.source_table_schema
else self.source_table_name
)
elif result_type == consts.RESULT_TYPE_TARGET:
return (
self.target_table_schema + "." + self.target_table_name
if self.target_table_schema
else self.target_table_name
)
else:
raise ValueError(f"Unexpected result_type: {result_type}")
def get_column_name(self, result_type: str) -> str:
if result_type == consts.RESULT_TYPE_SOURCE:
return self.source_column_name
elif result_type == consts.RESULT_TYPE_TARGET:
return self.target_column_name
else:
raise ValueError(f"Unexpected result_type: {result_type}")
@dataclasses.dataclass
class RunMetadata(object):
run_id: str = dataclasses.field(default_factory=str)
validations: dict = dataclasses.field(default_factory=dict)
labels: list = dataclasses.field(default_factory=list)
start_time: typing.Optional[datetime.datetime] = dataclasses.field(
default_factory=lambda: datetime.datetime.now(datetime.timezone.utc)
)
end_time: typing.Optional[datetime.datetime] = None