sql-scripts/rideshare_lakehouse_curated/sp_demo_data_quality_columns.sql (30 lines of code) (raw):

/*################################################################################## # Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ###################################################################################*/ /* Use Cases: - Gets the Data Quality (Dataplex) results for each column in a table - This will be called by Airflow DAG: sample-dataplex-run-data-quality Description: - Shows gathering the data which will be ingeted into the Data Catalog Tag Template - You could make this procedure more generic and pass in the table as a parameter (this is just a demo) - You could also deploy this to the dataplex_data_quality dataset (instead of rideshare) Show: - References: - Clean up / Reset script: */ --SELECT * from `${project_id}.dataplex_data_quality.data_quality_results` ; WITH LatestExecution AS ( SELECT MAX(execution_ts) AS latest_execution_ts FROM `${project_id}.dataplex_data_quality.data_quality_results` WHERE table_id = '${project_id}.rideshare_lakehouse_curated.bigquery_rideshare_trip' ) , TableData AS ( SELECT invocation_id, execution_ts, column_id, rule_binding_id, rule_id, dimension, rows_validated, success_count, success_percentage, failed_count, failed_percentage, null_count, null_percentage FROM `${project_id}.dataplex_data_quality.data_quality_results` AS data_quality_results CROSS JOIN LatestExecution WHERE invocation_id = (SELECT DISTINCT invocation_id FROM `${project_id}.dataplex_data_quality.data_quality_results` WHERE execution_ts = (SELECT latest_execution_ts FROM LatestExecution)) AND column_id IS NOT NULL ) SELECT * FROM TableData;