# Copyright 2020-2024 Google, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import requests, configparser, time
from datetime import datetime, date
from datetime import time as dtime
import pytz
from operator import itemgetter
import pandas as pd
from pyarrow import parquet
import json
import os

from google.api_core.gapic_v1.client_info import ClientInfo
from google.protobuf.timestamp_pb2 import Timestamp
from google.cloud import datacatalog
from google.cloud.datacatalog_v1 import types
from google.cloud.datacatalog import DataCatalogClient
from google.cloud import bigquery
from google.cloud import storage

import Resources as res
import BigQueryUtils as bq
import constants
from common import log_error, log_error_tag_dict, log_info, log_info_tag_dict

config = configparser.ConfigParser()
config.read("tagengine.ini")

BIGQUERY_REGION = config['DEFAULT']['BIGQUERY_REGION']

USER_AGENT = 'cloud-solutions/datacatalog-tag-engine-v2'

class DataCatalogController:
    
    def __init__(self, credentials, tag_creator_account=None, tag_invoker_account=None, \
                 template_id=None, template_project=None, template_region=None):
        self.credentials = credentials
        self.tag_creator_account = tag_creator_account
        self.tag_invoker_account = tag_invoker_account
        self.template_id = template_id
        self.template_project = template_project
        self.template_region = template_region
        self.client = DataCatalogClient(credentials=self.credentials, client_info=ClientInfo(user_agent=USER_AGENT))
        
        if template_id != None and template_project != None and template_region != None:
            self.template_path = self.client.tag_template_path(template_project, template_region, template_id)
        else:
            self.template_path = None
            
        self.bq_client = bigquery.Client(credentials=self.credentials, location=BIGQUERY_REGION, client_info=ClientInfo(user_agent=USER_AGENT))
        self.gcs_client = storage.Client(credentials=self.credentials, client_info=ClientInfo(user_agent=USER_AGENT))
        self.ptm_client = datacatalog.PolicyTagManagerClient(credentials=self.credentials, client_info=ClientInfo(user_agent=USER_AGENT))
    
    def get_template(self, included_fields=None):
        
        fields = []
        
        try:
            tag_template = self.client.get_tag_template(name=self.template_path)
        
        except Exception as e:
            msg = 'Error retrieving tag template {}'.format(self.template_path)
            log_error(msg, e)
            return fields
        
        for field_id, field_value in tag_template.fields.items():
            
            field_id = str(field_id)
            
            if included_fields:
                match_found = False
                for included_field in included_fields:
                    if included_field['field_id'] == field_id:
                        match_found = True
                        
                        if 'field_value' in included_field:
                            assigned_value = included_field['field_value']
                        else:
                            assigned_value = None
                            
                        if 'query_expression' in included_field:
                            query_expression = included_field['query_expression']
                        else:
                            query_expression = None
                        
                        break
                
                if match_found == False:
                    continue
            
            display_name = field_value.display_name
            is_required = field_value.is_required
            order = field_value.order
     
            enum_values = []
            
            field_type = None
            
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.DOUBLE:
                field_type = "double"
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.STRING:
                field_type = "string"
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.BOOL:
                field_type = "bool"
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.TIMESTAMP:
                field_type = "datetime"
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.RICHTEXT:
                field_type = "richtext"
            if field_value.type_.primitive_type == datacatalog.FieldType.PrimitiveType.PRIMITIVE_TYPE_UNSPECIFIED:
                field_type = "enum"
                       
                index = 0
                enum_values_long = str(field_value.type_).split(":") 
                for long_value in enum_values_long:
                    if index > 0:
                        enum_value = long_value.split('"')[1]
                        #print("enum value: " + enum_value)
                        enum_values.append(enum_value)
                    index = index + 1
            
            # populate dict
            field = {}
            field['field_id'] = field_id
            field['display_name'] = display_name
            field['field_type'] = field_type
            field['is_required'] = is_required
            field['order'] = order
            
            if field_type == "enum":
                field['enum_values'] = enum_values
                
            if included_fields:
                if assigned_value:
                   field['field_value'] = assigned_value
                if query_expression:
                   field['query_expression'] = query_expression

            fields.append(field)
                          
        return sorted(fields, key=itemgetter('order'), reverse=True)
    
        
    def check_if_tag_exists(self, parent, column=None):
        
        print(f'enter check_if_tag_exists, parent: {parent}')
        
        tag_exists = False
        tag_id = ""
        
        tag_list = self.client.list_tags(parent=parent, timeout=120)
        
        for tag_instance in tag_list:
            
            tagged_column = tag_instance.column          
            tagged_template_project = tag_instance.template.split('/')[1]
            tagged_template_location = tag_instance.template.split('/')[3]
            tagged_template_id = tag_instance.template.split('/')[5]
            
            if column == '' or column == None:
                # looking for a table-level tag
                if tagged_template_id == self.template_id and tagged_template_project == self.template_project and \
                    tagged_template_location == self.template_region and tagged_column == "":
                    tag_exists = True
                    tag_id = tag_instance.name
                    break
            else:
                # looking for a column-level tag
                if column.lower() == tagged_column and tagged_template_id == self.template_id and tagged_template_project == self.template_project and \
                    tagged_template_location == self.template_region:
                    tag_exists = True
                    tag_id = tag_instance.name
                    break
         
        return tag_exists, tag_id
    
    
    def apply_static_asset_config(self, fields, uri, job_uuid, config_uuid, template_uuid, tag_history, overwrite=False):
        
        # uri is either a BQ table/view path or GCS file path       
        op_status = constants.SUCCESS
        column = ''
        
        is_gcs = False
        is_bq = False
        
        # look up the entry based on the resource type
        if isinstance(uri, list):
            is_gcs = True
            bucket = uri[0].replace('-', '_')
            filename = uri[1].split('.')[0].replace('/', '_') # extract the filename without extension, replace '/' with '_'
            gcs_resource = '//datacatalog.googleapis.com/projects/' + self.template_project + '/locations/' + self.template_region + '/entryGroups/' + bucket + '/entries/' + filename
            print('gcs_resource: ', gcs_resource)
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=gcs_resource
            uri = '/'.join(uri)
            #print('uri:', uri)
            
            try:
                entry = self.client.lookup_entry(request)
                print('GCS entry:', entry.name)
            except Exception as e:
                msg = 'Unable to find the entry in the catalog. Entry {} does not exist'.format(gcs_resource)
                log_error(msg, e)
                op_status = constants.ERROR
                return op_status
                
        elif isinstance(uri, str):
            is_bq = True
            bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
            print("bigquery_resource: " + bigquery_resource)
        
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=bigquery_resource
            entry = self.client.lookup_entry(request)
            print('entry: ', entry.name)
        
        try:    
            tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name)
            print('tag exists: ', tag_exists)
        
        except Exception as e:
            msg = 'Error during check_if_tag_exists {}'.format(entry.name)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status

        if tag_exists and overwrite == False:
            msg = 'Tag already exists and overwrite is False'
            log_info(msg)
            op_status = constants.SUCCESS
            return op_status
        
        op_status = self.create_update_delete_tag(fields, tag_exists, tag_id, job_uuid, config_uuid, 'STATIC_ASSET_TAG', tag_history, entry, uri)    
           
        return op_status


    def apply_dynamic_table_config(self, fields, uri, job_uuid, config_uuid, template_uuid, tag_history, batch_mode=False):
        
        print('*** apply_dynamic_table_config ***')

        op_status = constants.SUCCESS
        error_exists = False
        
        bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
        
        #print('bigquery_resource: ', bigquery_resource)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource=bigquery_resource
        entry = self.client.lookup_entry(request)

        tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name)
        print("tag_exists: " + str(tag_exists))
        
        # create new tag
        tag = datacatalog.Tag()
        tag.template = self.template_path
        verified_field_count = 0
        
        for field in fields:
            field_id = field['field_id']
            field_type = field['field_type']
            query_expression = field['query_expression']

            # parse and run query in BQ
            query_str = self.parse_query_expression(uri, query_expression)
            print('returned query_str: ' + query_str)
            
            # note: field_values is of type list
            field_values, error_exists = self.run_query(query_str, field_type, batch_mode, job_uuid)
            print('field_values: ', field_values)
            print('error_exists: ', error_exists)
    
            if error_exists or field_values == []:
                continue
            
            tag, error_exists = self.populate_tag_field(tag, field_id, field_type, field_values, job_uuid)
    
            if error_exists:
                continue
                                    
            verified_field_count = verified_field_count + 1
            #print('verified_field_count: ' + str(verified_field_count))    
            
            # store the value back in the dict, so that it can be accessed by the exporter
            #print('field_value: ' + str(field_value))
            if field_type == 'richtext':
                formatted_value = ', '.join(str(v) for v in field_values)
            else:
                formatted_value = field_values[0]
                
            field['field_value'] = formatted_value
            
        # for loop ends here
                
        if error_exists:
            # error was encountered while running SQL expression
            # proceed with tag creation / update, but return error to user
            op_status = constants.ERROR
            
        if verified_field_count == 0:
            # tag is empty due to errors, skip tag creation
            op_status = constants.ERROR
            return op_status
                        
        if tag_exists == True:
            tag.name = tag_id
            op_status = self.do_create_update_delete_action(job_uuid, 'update', tag)
        else:
            op_status = self.do_create_update_delete_action(job_uuid, 'create', tag, entry)
            
        if op_status == constants.SUCCESS and tag_history:
            bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
            template_fields = self.get_template()
            bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, uri, None, fields)
               
        return op_status

    
    def column_exists_in_table(self, target_column, entry_columns):
        
        column_exists = False
        
        for catalog_column in entry_columns:
            #print('column:', catalog_column.column)
            #print('subcolumns:', catalog_column.subcolumns)
            
            is_nested_column = False
            
            # figure out if column is nested
            if len(target_column.split('.')) > 1:
                is_nested_column = True
                parent_column = target_column.split('.')[0]
                nested_column = target_column.split('.')[1]
            
            if is_nested_column == True:
                if catalog_column.column == parent_column:
                    for subcolumn in catalog_column.subcolumns:
                        if nested_column == subcolumn.column:
                            column_exists = True
                            break 
            else:
                if catalog_column.column == target_column:
                    column_exists = True
                    break
                    
        return column_exists
            
    
    def apply_dynamic_column_config(self, fields, columns_query, uri, job_uuid, config_uuid, template_uuid, tag_history, batch_mode=False):
        
        print('*** apply_dynamic_column_config ***')

        tag_work_queue = [] # collection of Tag objects that will be passed to the API to be created or updated 
        
        op_status = constants.SUCCESS
        error_exists = False
        
        target_columns = [] # columns in the table which need to be tagged
        columns_query = self.parse_query_expression(uri, columns_query)
        print('columns_query:', columns_query)
        
        rows = self.bq_client.query(columns_query).result()

        num_columns = 0
        for row in rows:    
            for column in row:
                print('column:', column)
                target_columns.append(column)
                num_columns += 1
                 
        if num_columns == 0:
            # no columns to tag
            msg = f"Error could not find columns to tag. Please check column_query parameter in your config. Current value: {columns_query}"
            log_error(msg, None, job_uuid)
            op_status = constants.ERROR
            return op_status
                
        #print('columns to be tagged:', target_columns)
                    
        bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
        #print('bigquery_resource: ', bigquery_resource)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource=bigquery_resource
        entry = self.client.lookup_entry(request)
                    
        column_fields_list = [] # list<dictionaries> where dict = {column, fields}

        for target_column in target_columns:
            
            #print('target_column:', target_column)
            
            # fail quickly if a column is not found in the entry's schema
            column_exists = self.column_exists_in_table(target_column, entry.schema.columns)
            
            if column_exists != True:
                msg = f"Error could not find column {target_column} in {resource}"
                log_error(msg, None, job_uuid)
                op_status = constants.ERROR
                return op_status

            # initialize the new column-level tag
            tag = datacatalog.Tag()
            tag.template = self.template_path
            tag.column = target_column
            
            verified_field_count = 0
            query_strings = []
            
            for field in fields:
                query_expression = field['query_expression']
                query_str = self.parse_query_expression(uri, query_expression, target_column)
                query_strings.append(query_str)

            # combine query expressions 
            combined_query = self.combine_queries(query_strings)
            
            # run combined query, adding the results to the field_values for each field
            # Note: field_values is of type list
            fields, error_exists = self.run_combined_query(combined_query, target_column, fields, job_uuid)

            if error_exists:
                op_status = constants.ERROR
                continue
            
            # populate tag fields
            tag, error_exists = self.populate_tag_fields(tag, fields, job_uuid)
    
            if error_exists:
                op_status = constants.ERROR
                continue
                                                         
            column_fields_list.append({"column": target_column, "fields": fields})
            tag_work_queue.append(tag)

        # outer loop ends here
        if len(tag_work_queue) == 0:
            op_status = constants.ERROR
            return op_status
            
        # ready to create or update all the tags in work queue         
        rec_request = datacatalog.ReconcileTagsRequest(
            parent=entry.name,
            tag_template=self.template_path,
            tags=tag_work_queue
        )

        #print('rec_request:', rec_request)
        
        try:
            operation = self.client.reconcile_tags(request=rec_request)
            print("Waiting for operation to complete...")
            resp = operation.result()
            #print("resp:", resp)
        except Exception as e:
            msg = 'Error during reconcile_tags on entry {}'.format(entry.name)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status
                        
        if tag_history and op_status != constants.ERROR:
            bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
            success = bqu.copy_tags(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, self.get_template(), uri, column_fields_list)
            print('Tag history completed successfully:', success) 
            
            if success:
                op_status = constants.SUCCESS
            else:
                op_status = constants.ERROR
                                              
        return op_status

    
    def combine_queries(self, query_strings):
        
        large_query = "select "
        
        for query in query_strings:
             large_query += "({}), ".format(query)
        
        return large_query[0:-2]  
        
    
    def apply_entry_config(self, fields, uri, job_uuid, config_uuid, template_uuid, tag_history):
        
        print('** apply_entry_config **')
        
        op_status = constants.SUCCESS        
        bucket_name, filename = uri
        bucket = self.gcs_client.get_bucket(bucket_name)
        blob = bucket.get_blob(filename)
        
        entry_group_short_name = bucket_name.replace('-', '_')
        entry_group_full_name = 'projects/' + self.template_project + '/locations/' + self.template_region + '/entryGroups/' + bucket_name.replace('-', '_')
        
        # create the entry group    
        is_entry_group = self.entry_group_exists(entry_group_full_name)
        print('is_entry_group: ', is_entry_group)
        
        if is_entry_group != True:
            self.create_entry_group(entry_group_short_name)
        
        # generate the entry id, replace '/' with '_' and remove the file extension from the name
        entry_id = filename.split('.')[0].replace('/', '_')
         
        try:
            entry_name = entry_group_full_name + '/entries/' + entry_id
            print('Info: entry_name: ', entry_name)
            
            entry = self.client.get_entry(name=entry_name)
            print('Info: entry already exists: ', entry.name)
            
        except Exception as e: 
            msg = 'Entry does not exist {}'.format(entry_name)
            log_error(msg, e, job_uuid)
         
            # populate the entry
            entry = datacatalog.Entry()
            entry.name = filename
            
            entry.display_name = entry_id 
            entry.type_ = 'FILESET'
            entry.gcs_fileset_spec.file_patterns = ['gs://' + bucket_name + '/' + filename]
            entry.fully_qualified_name = 'gs://' + bucket_name + '/' + filename
            entry.source_system_timestamps.create_time = datetime.utcnow() 
            entry.source_system_timestamps.update_time = datetime.utcnow() 
            
            # get the file's schema
            # download the file to App Engine's tmp directory 
            tmp_file = '/tmp/' + entry_id
            blob.download_to_filename(filename=tmp_file)
        
            # validate that it's a parquet file
            try:
                parquet.ParquetFile(tmp_file)
            except Exception as e:
                # not a parquet file, ignore it
                msg = 'Error: {} is not a parquet file, ignoring it'.format(filename)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status   
        
            schema = parquet.read_schema(tmp_file, memory_map=True)
            df = pd.DataFrame(({"column": name, "datatype": str(pa_dtype)} for name, pa_dtype in zip(schema.names, schema.types)))
            df = df.reindex(columns=["column", "datatype"], fill_value=pd.NA)  
            #print('df: ', df)

            for index, row in df.iterrows():                            
                entry.schema.columns.append(
                   types.ColumnSchema(
                       column=row['column'],
                       type_=row['datatype'],
                       description=None,
                       mode=None
                   )
                ) 
                                         
            # create the entry
            #print('entry request: ', entry)            
            created_entry = self.client.create_entry(parent=entry_group_full_name, entry_id=entry_id, entry=entry)
            print('Info: created entry: ', created_entry.name)
            
            # get the number of rows in the file
            num_rows = parquet.ParquetFile(tmp_file).metadata.num_rows
            #print('num_rows: ', num_rows)
            
            # delete the tmp file ASAP to free up memory
            os.remove(tmp_file)
            
            # create the file metadata tag
            template_path = self.client.tag_template_path(self.template_project, self.template_region, self.template_id)
            tag = datacatalog.Tag()
            tag.template = template_path
    
            for field in fields:
                
                if field['field_id'] == 'name':
                    string_field = datacatalog.TagField()
                    string_field.string_value = filename
                    tag.fields['name'] = string_field
                    field['field_value'] = filename # field_value is used by the BQ exporter
                    
                if field['field_id'] == 'bucket':
                    string_field = datacatalog.TagField()
                    string_field.string_value = bucket_name
                    tag.fields['bucket'] = string_field
                    field['field_value'] = bucket_name # field_value is used by the BQ exporter
                    
                if field['field_id'] == 'path':
                    string_field = datacatalog.TagField()
                    string_field.string_value = 'gs://' + bucket_name + '/' + filename
                    tag.fields['path'] = string_field
                    field['field_value'] = 'gs://' + bucket_name + '/' + filename # field_value is used by the BQ exporter
    
                if field['field_id'] == 'type':
                    enum_field = datacatalog.TagField()
                    enum_field.enum_value.display_name = 'PARQUET' # hardcode file extension for now
                    tag.fields['type'] = enum_field
                    field['field_value'] = 'PARQUET' # field_value is used by the BQ exporter
    
                if field['field_id'] == 'size':
                    double_field = datacatalog.TagField()
                    double_field.double_value = blob.size
                    tag.fields['size'] = double_field
                    field['field_value'] = blob.size # field_value is used by the BQ exporter

                if field['field_id'] == 'num_rows':
                    double_field = datacatalog.TagField()
                    double_field.double_value = num_rows
                    tag.fields['num_rows'] = double_field
                    field['field_value'] = num_rows # field_value is used by the BQ exporter

                if field['field_id'] == 'created_time':
                     datetime_field = datacatalog.TagField()
                     datetime_field.timestamp_value = blob.time_created
                     tag.fields['created_time'] = datetime_field
                     field['field_value'] = blob.time_created # field_value is used by the BQ exporter

                if field['field_id'] == 'updated_time':    
                     datetime_field = datacatalog.TagField()
                     datetime_field.timestamp_value = blob.time_created
                     tag.fields['updated_time'] = datetime_field
                     field['field_value'] = blob.time_created # field_value is used by the BQ exporter
 
                if field['field_id'] == 'storage_class':              
                      string_field = datacatalog.TagField()
                      string_field.string_value = blob.storage_class
                      tag.fields['storage_class'] = string_field
                      field['field_value'] = blob.storage_class # field_value is used by the BQ exporter
            
                if field['field_id'] == 'content_encoding':   
                    if blob.content_encoding:
                        string_field = datacatalog.TagField()
                        string_field.string_value = blob.content_encoding
                        tag.fields['content_encoding'] = string_field
                        field['field_value'] = blob.content_encoding # field_value is used by the BQ exporter
            
                if field['field_id'] == 'content_language':
                    if blob.content_language:
                        string_field = datacatalog.TagField()
                        string_field.string_value = blob.content_language
                        tag.fields['content_language'] = string_field
                        field['field_value'] = blob.content_language # field_value is used by the BQ exporter

                if field['field_id'] == 'media_link':            
                    string_field = datacatalog.TagField()
                    string_field.string_value = blob.media_link
                    tag.fields['media_link'] = string_field
                    field['field_value'] = blob.media_link # field_value is used by the BQ exporter

            #print('tag request: ', tag)
            created_tag = self.client.create_tag(parent=entry_name, tag=tag)
            #print('created_tag: ', created_tag)
            
            if tag_history:
                bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
                template_fields = self.get_template()
                bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, '/'.join(uri), None, fields)
                                                
        return op_status


    def entry_group_exists(self, entry_group_full_name):
    
        request = datacatalog.GetEntryGroupRequest(name=entry_group_full_name)
        
        try:
            response = self.client.get_entry_group(request=request)
            return True
        except Exception as e:
            msg = 'Error entry goup does not exist {}'.format(entry_group_full_name)
            log_error(msg, e)
            return False
    
    
    def create_entry_group(self, entry_group_short_name):
    
        eg = datacatalog.EntryGroup()
        eg.display_name = entry_group_short_name
        
        entry_group = self.client.create_entry_group(
                    parent='projects/' + self.template_project + '/locations/' + self.template_region,
                    entry_group_id=entry_group_short_name,
                    entry_group=eg)
        
        print('created entry_group: ', entry_group.name)
        return entry_group.name
           

    def apply_glossary_asset_config(self, fields, mapping_table, uri, job_uuid, config_uuid, template_uuid, tag_history, overwrite=False):
        
        # uri is either a BQ table/view path or GCS file path    
        op_status = constants.SUCCESS
        
        is_gcs = False
        is_bq = False
        
        # look up the entry based on the resource type
        if isinstance(uri, list):
            is_gcs = True
            bucket = uri[0].replace('-', '_')
            filename = uri[1].split('.')[0].replace('/', '_') # extract the filename without the extension, replace '/' with '_'
            gcs_resource = '//datacatalog.googleapis.com/projects/' + self.template_project + '/locations/' + self.template_region + '/entryGroups/' + bucket + '/entries/' + filename
            #print('gcs_resource: ', gcs_resource)
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=gcs_resource
            
            try:
                entry = self.client.lookup_entry(request)
                print('entry: ', entry.name)
            except Exception as e:
                msg = 'Unable to find entry in the catalog. Entry {} does not exist: {}'.format(gcs_resource, e)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status
                #print('entry found: ', entry)
        
        elif isinstance(uri, str):
            is_bq = True        
            bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
            print("bigquery_resource: " + bigquery_resource)
        
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=bigquery_resource
            entry = self.client.lookup_entry(request)
            print('entry: ', entry.name)
        
        try:    
            tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name)
            print('tag_exists: ', tag_exists)
        
        except Exception as e:
            msg = 'Error during check_if_tag_exists: {}'.format(e)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status

        if tag_exists and overwrite == False:
            msg = 'Info: tag already exists and overwrite set to False'
            error = {'job_uuid': job_uuid, 'msg': msg}
            print(json.dumps(info))
            
            op_status = constants.SUCCESS
            return op_status
         
        if entry.schema == None:
            msg = 'Error entry {} does not have a schema in the catalog'.format(entry.name)
            error = {'job_uuid': job_uuid, 'msg': msg}
            print(json.dumps(info))
            
            op_status = constants.ERROR
            return op_status
        
        # retrieve the schema columns from the entry
        column_schema_str = ''
        for column_schema in entry.schema.columns: 
            column_schema_str += "'" + column_schema.column + "',"
        
        #print('column_schema_str: ', column_schema_str)
             
        mapping_table_formatted = mapping_table.replace('bigquery/project/', '').replace('/dataset/', '.').replace('/', '.')
                
        query_str = 'select canonical_name from `' + mapping_table_formatted + '` where source_name in (' + column_schema_str[0:-1] + ')'
        #print('query_str: ', query_str)

        rows = self.bq_client.query(query_str).result()
        
        tag = datacatalog.Tag()
        tag.template = self.template_path
        
        tag_is_empty = True
        
        for row in rows:
            canonical_name = row['canonical_name']
            #print('canonical_name: ', canonical_name)
        
            for field in fields:
                if field['field_id'] == canonical_name:
                    #print('found match')
                    bool_field = datacatalog.TagField()
                    bool_field.bool_value = True
                    tag.fields[canonical_name] = bool_field
                    field['field_value'] = True
                    tag_is_empty = False
                    break
                    
        if tag_is_empty:
            print("Error: can't create the tag because it's empty")
            op_status = constants.ERROR
            return op_status
                            
        if tag_exists:
            # tag already exists and overwrite is True
            tag.name = tag_id
            op_status = self.do_create_update_delete_action(job_uuid, 'update', tag)
        else:
            op_status = self.do_create_update_delete_action(job_uuid, 'create', tag, entry)
                    
        if tag_history:
            bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
            template_fields = self.get_template()
            if is_gcs:
                bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, '/'.join(uri), None, fields)
            if is_bq:
                bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, uri, None, fields)
                   
        return op_status
      
                 
    def apply_sensitive_column_config(self, fields, dlp_dataset, infotype_selection_table, infotype_classification_table, \
                                      uri, create_policy_tags, taxonomy_id, job_uuid, config_uuid, template_uuid, \
                                      tag_history, overwrite=False):
        
        if create_policy_tags:

            request = datacatalog.ListPolicyTagsRequest(
                parent=taxonomy_id
            )

            try:
                page_result = self.ptm_client.list_policy_tags(request=request)
            except Exception as e:
                msg = 'Unable to retrieve the policy tag taxonomy for taxonomy_id {}'.format(taxonomy_id)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status    

            policy_tag_names = [] # list of fully qualified policy tag names and sensitive categories

            for response in page_result:
                policy_tag_names.append((response.name, response.display_name))

            policy_tag_requests = [] # stores the list of fully qualified policy tag names and table column names, 
                                     # so that we can create the policy tags on the various sensitive fields
 
        # uri is a BQ table path       
        op_status = constants.SUCCESS
        column = ''
        
        if isinstance(uri, str) == False:
            print('Error: url ' + str(url) + ' is not of type string.')
            op_status = constants.ERROR
            return op_status
            
        bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
        #print("bigquery_resource: ", bigquery_resource)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource=bigquery_resource
        
        try:
            entry = self.client.lookup_entry(request)
        except Exception as e:
            msg = 'Error looking up entry {} in the catalog: {}'.format(bigquery_resource, e)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status
           
        dlp_dataset = dlp_dataset.replace('bigquery/project/', '').replace('/dataset/', '.').replace('/', '.')        
        infotype_selection_table = infotype_selection_table.replace('bigquery/project/', '').replace('/dataset/', '.').replace('/', '.')
        infotype_classification_table = infotype_classification_table.replace('bigquery/project/', '').replace('/dataset/', '.').replace('/', '.')
        dlp_table = dlp_dataset + '.' + uri.split('/')[4]
               
        infotype_fields = []
        notable_infotypes = []
    
        # get an array of infotypes associated with each field in the DLP findings table
        dlp_sql = 'select field, array_agg(infotype) infotypes '
        dlp_sql += 'from (select distinct cl.record_location.field_id.name as field, info_type.name as infotype '
        dlp_sql += 'from ' + dlp_table + ', unnest(location.content_locations) as cl '
        dlp_sql += 'order by cl.record_location.field_id.name) '
        dlp_sql += 'group by field'
        
        try:
            dlp_rows = self.bq_client.query(dlp_sql).result()
        
        except Exception as e:
            msg = 'Error querying DLP findings table: {}'.format(dlp_sql)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status

        dlp_row_count = 0
    
        for dlp_row in dlp_rows:
        
            dlp_row_count += 1
        
            field = dlp_row['field']
            infotype_fields.append(field)
            infotypes = dlp_row['infotypes']
        
            print('field ', field, ', infotypes [', infotypes, ']')
        
            is_sql = 'select notable_infotype '
            is_sql += 'from ' + infotype_selection_table + ' i, '
        
            infotype_count = len(infotypes)
        
            for i in range(0, infotype_count):
            
                is_sql += 'unnest(i.field_infotypes) as i' + str(i) + ', '
        
            is_sql = is_sql[:-2] + ' '
            
            for i, infotype in enumerate(infotypes):
            
                if i == 0:
                    is_sql += 'where i' + str(i) + ' = "' + infotype + '" ' 
                else:
                    is_sql += 'and i' + str(i) + ' = "' + infotype + '" ' 
        
            is_sql += 'order by array_length(i.field_infotypes) '
            is_sql += 'limit 1'
        
            #print('is_sql: ', is_sql)
            
            try:
                ni_rows = self.bq_client.query(is_sql).result()
            except Exception as e:
                msg = 'Error querying infotype selection table: {}'.format(is_sql)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status
        
            for ni_row in ni_rows:
                notable_infotypes.append(ni_row['notable_infotype']) # there should be just one notable infotype per field
    
        # there are no DLP findings
        if dlp_row_count == 0:
            op_status = constants.SUCCESS
            return op_status
    
        # remove duplicate infotypes from notable list
        final_set = list(set(notable_infotypes))
        print('final_set: ', final_set)
        
        # lookup classification using set of notable infotypes   
        c_sql = 'select classification_result '
        c_sql += 'from ' + infotype_classification_table + ' c, '
    
        for i in range(0, len(final_set)):
            c_sql += 'unnest(c.notable_infotypes) as c' + str(i) + ', '
    
        c_sql = c_sql[:-2] + ' '
    
        for i, notable_infotype in enumerate(final_set):
        
            if i == 0:
                c_sql += 'where c' + str(i) + ' = "' + notable_infotype + '" '
            else:
                c_sql += 'and c' + str(i) + ' = "' + notable_infotype + '" '

        c_sql += 'order by array_length(c.notable_infotypes) '
        c_sql += 'limit 1'  

        #print('c_sql: ', c_sql)
    
        try:
            c_rows = self.bq_client.query(c_sql).result()
        except Exception as e:
            msg = 'Error querying infotype classification table: {}'.format(c_sql)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
            return op_status
        
        classification_result = None
    
        for c_row in c_rows:
            classification_result = c_row['classification_result'] # we should end up with one classification result per table
    
        print('classification_result: ', classification_result)
        
        tag = datacatalog.Tag()
        tag.template = self.template_path
        
        # each element represents a field which needs to be tagged
        for infotype_field in infotype_fields:
            
            for field in fields:
                if 'sensitive_field' in field['field_id']:
                    bool_field = datacatalog.TagField()
                    
                    if classification_result == 'Public_Information':
                        bool_field.bool_value = False
                        field['field_value'] = False
                    else:
                        bool_field.bool_value = True
                        field['field_value'] = True
                    
                    tag.fields['sensitive_field'] = bool_field
                    
                if 'sensitive_type' in field['field_id']:
                    enum_field = datacatalog.TagField()
                    enum_field.enum_value.display_name = classification_result
                    tag.fields['sensitive_type'] = enum_field
                    field['field_value'] = classification_result
           
            tag.column = infotype_field # DLP has a bug and sometimes the infotype field does not equal to the column name in the table
            print('tag.column: ', infotype_field)
            
            # check if a tag already exists on this column
            try:    
                tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name, column=infotype_field)
        
            except Exception as e:
                msg = 'Error during check_if_tag_exists: {}'.format(entry.name)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status   
            
            # tag already exists    
            if tag_exists:
                
                if overwrite == False:
                    # skip this sensitive column because it is already tagged
                    continue
                
                tag.name = tag_id
                op_status = self.do_create_update_delete_action(job_uuid, 'update', tag)
            else:
                op_status = self.do_create_update_delete_action(job_uuid, 'create', tag, entry)
                                    
            if op_status == constants.SUCCESS and create_policy_tags and classification_result != 'Public_Information':
                # add the column name and policy tag name to a list
                for policy_tag_name, policy_tag_category in policy_tag_names:
                    if policy_tag_category == classification_result:
                        policy_tag_requests.append((infotype_field, policy_tag_name))
                    
                            
            if op_status == constants.SUCCESS and tag_history:
                bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
                template_fields = self.get_template()
                bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, uri, infotype_field, fields)
        
                        
        # once we have created the regular tags, we can create/update the policy tags
        if create_policy_tags and len(policy_tag_requests) > 0:
            table_id = uri.replace('/datasets/', '.').replace('/tables/', '.')
            op_status = self.apply_policy_tags(table_id, policy_tag_requests)
        
        if op_status != constants.SUCCESS:
            msg = 'Error occurred when tagging {}'.format(uri) 
            error = {'job_uuid': job_uuid, 'msg': msg}
            print(json.dumps(error))
                
        return op_status

    
    def apply_policy_tags(self, table_id, policy_tag_requests):
        op_status = constants.SUCCESS
        table = self.bq_client.get_table(table_id) 
        schema = table.schema

        new_schema = []
        
        for field in schema:
            
            field_match = False
            
            for column, policy_tag_name in policy_tag_requests:
                
                if field.name == column:
                    print('applying policy tag on', field.name)
                    policy = bigquery.schema.PolicyTagList(names=[policy_tag_name,])
                    new_schema.append(bigquery.schema.SchemaField(field.name, field.field_type, field.mode, policy_tags=policy)) 
                    field_match = True
                    break
        
            if field_match == False:    
                new_schema.append(field)
                
        table.schema = new_schema
        
        try:
            table = self.bq_client.update_table(table, ["schema"])  
        
        except Exception as e:
            msg = 'Error occurred while updating the schema of {}'.format(table_id)
            log_error(msg, e, job_uuid)
            op_status = constants.ERROR
        
        return op_status
        
            
    def apply_export_config(self, config_uuid, target_project, target_dataset, target_region, uri):
        
        column_tag_records = []
        table_tag_records = []
        dataset_tag_records = []
        
        export_status = constants.SUCCESS
        bqu = bq.BigQueryUtils(self.credentials, target_region)
        
        if isinstance(uri, str) == False:
            print('Error: url ' + str(url) + ' is not of type string.')
            export_status = constants.ERROR
            return export_status
        
        tagged_project = uri.split('/')[0]
        tagged_dataset = uri.split('/')[2]
        
        if '/tables/' in uri:
            target_table_id = 'catalog_report_table_tags'
            tagged_table = uri.split('/')[4]
        else:
            target_table_id = 'catalog_report_dataset_tags'
            tagged_table = None
            
        bigquery_resource = '//bigquery.googleapis.com/projects/' + uri
        #print("bigquery_resource: ", bigquery_resource)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource=bigquery_resource
        
        try:
            entry = self.client.lookup_entry(request)
        except Exception as e:
            msg = 'Error looking up entry {} in catalog'.format(bigquery_resource)
            log_error(msg, e, job_uuid)
            export_status = constants.ERROR
            return export_status

        tag_list = self.client.list_tags(parent=entry.name, timeout=120)
    
        for tag in tag_list:
            print('tag.template:', tag.template)
            print('tag.column:', tag.column)
            
            # get tag template fields
            self.template_id = tag.template.split('/')[5]
            self.template_project = tag.template.split('/')[1]
            self.template_region = tag.template.split('/')[3]
            self.template_path = tag.template
            template_fields = self.get_template()
            
            if tag.column and len(tag.column) > 1:
                tagged_column = tag.column
                target_table_id = 'catalog_report_column_tags'
            else:
                tagged_column = None
                target_table_id = 'catalog_report_table_tags'
            
            for template_field in template_fields:
    
                #print('template_field:', template_field)
                field_id = template_field['field_id']
                
                if field_id not in tag.fields:
                    continue
                    
                tagged_field = tag.fields[field_id]
                tagged_field_str = str(tagged_field)
                tagged_field_split = tagged_field_str.split('\n')
                #print('tagged_field_split:', tagged_field_split)
                
                split_index = 0
                
                for split in tagged_field_split:
                    if '_value:' in split:
                        start_index = split.index(':', 0) + 1
                        #print('start_index:', start_index)
                        field_value = split[start_index:].strip().replace('"', '').replace('<br>', ',')
                        print('extracted field_value:', field_value)
                        break
                    elif 'enum_value' in split:
                        field_value = tagged_field_split[split_index+1].replace('display_name:', '').replace('"', '').strip()
                        print('extracted field_value:', field_value)
                        break
                    
                    split_index += 1                    
                    
                # format record to be written
                current_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " UTC"
                
                if target_table_id in 'catalog_report_column_tags':
                    column_tag_records.append({"project": tagged_project, "dataset": tagged_dataset, "table": tagged_table, "column": tagged_column, "tag_template": self.template_id, "tag_field": field_id, "tag_value": field_value, "export_time": current_ts})
                
                elif target_table_id in 'catalog_report_table_tags':
                    table_tag_records.append({"project": tagged_project, "dataset": tagged_dataset, "table": tagged_table, "tag_template": self.template_id, "tag_field": field_id, "tag_value": field_value, "export_time": current_ts})
                
                elif target_table_id in 'catalog_report_dataset_tags':
                    dataset_tag_records.append({"project": tagged_project, "dataset": tagged_dataset, "tag_template": self.template_id, "tag_field": field_id, "tag_value": field_value, "export_time": current_ts})
                      
        # write exported records to BQ
        if len(dataset_tag_records) > 0:
            target_table_id = target_project + '.' + target_dataset + '.catalog_report_dataset_tags'
            success = bqu.insert_exported_records(target_table_id, dataset_tag_records)
        
        if len(table_tag_records) > 0:
            target_table_id = target_project + '.' + target_dataset + '.catalog_report_table_tags'
            success = bqu.insert_exported_records(target_table_id, table_tag_records)
                    
        if len(column_tag_records) > 0:
            target_table_id = target_project + '.' + target_dataset + '.catalog_report_column_tags'
            success = bqu.insert_exported_records(target_table_id, column_tag_records)
                     
        return export_status
        
            
    def apply_import_config(self, job_uuid, config_uuid, data_asset_type, data_asset_region, tag_dict, tag_history, overwrite=False):
    
        #print(f'apply_import_config: {job_uuid}, {config_uuid}, {data_asset_type}, {data_asset_region}, {tag_dict}, {tag_history}')
        
        op_status = constants.SUCCESS
        
        if 'project' in tag_dict:
            project = tag_dict['project']
        else:
            msg = "Error: project info missing from CSV"
            log_error_tag_dict(msg, None, job_uuid, tag_dict)
            op_status = constants.ERROR
            return op_status
        
        if data_asset_type == constants.BQ_ASSET:
            if 'dataset' not in tag_dict:
                msg = "Error: could not find the required dataset field in the CSV"
                log_error_tag_dict(msg, None, job_uuid, tag_dict)
                op_status = constants.ERROR
                return op_status
            else:
                entry_type = constants.DATASET
                dataset = tag_dict['dataset']
                
                if 'table' in tag_dict:
                    table = tag_dict['table']
                    entry_type = constants.BQ_TABLE
            
        if data_asset_type == constants.FILESET_ASSET:
            if 'entry_group' not in tag_dict or 'fileset' not in tag_dict:
                msg = "Error: could not find the required fields in the CSV. Missing entry_group or fileset or both"
                log_error_tag_dict(msg, None, job_uuid, tag_dict)
                op_status = constants.ERROR
                return op_status
            else:
                entry_type = constants.FILESET
                entry_group = tag_dict['entry_group']
                fileset = tag_dict['fileset']
        
        if data_asset_type == constants.SPAN_ASSET:
            if 'instance' not in tag_dict or 'database' not in tag_dict or 'table' not in tag_dict:
                msg = "Error: could not find the required fields in the CSV. The required fields for Spanner are instance, database, and table"
                log_error_tag_dict(msg, None, job_uuid, tag_dict)
                op_status = constants.ERROR
                return op_status
            else:
               entry_type = constants.SPAN_TABLE
               instance = tag_dict['instance']
               database = tag_dict['database']
               
               if 'schema' in tag_dict:
                   schema = tag_dict['schema']
                   table = tag_dict['table']
                   table = f"`{schema}.{table}`"
               else:
                   table = tag_dict['table']
                                
        if entry_type == constants.DATASET:
            resource = f'//bigquery.googleapis.com/projects/{project}/datasets/{dataset}'
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=resource
            
        if entry_type == constants.BQ_TABLE:
            resource = f'//bigquery.googleapis.com/projects/{project}/datasets/{dataset}/tables/{table}'
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=resource
         
        if entry_type == constants.FILESET:
            resource = f'//datacatalog.googleapis.com/projects/{project}/locations/{data_asset_region}/entryGroups/{entry_group}/entries/{fileset}'
            request = datacatalog.LookupEntryRequest()
            request.linked_resource=resource
            
        if entry_type == constants.SPAN_TABLE:
            resource = f'spanner:{project}.regional-{data_asset_region}.{instance}.{database}.{table}'
            request = datacatalog.LookupEntryRequest()
            request.fully_qualified_name=resource
            request.project=project
            request.location=data_asset_region

        try:
            entry = self.client.lookup_entry(request)
        except Exception as e:
            msg = "Error could not find {} entry for {}".format(entry_type, resource)
            log_error_tag_dict(msg, e, job_uuid, tag_dict)
            op_status = constants.ERROR
            return op_status

        # format uri for storing in tag history table
        if data_asset_type == constants.BQ_ASSET:
            uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '')
        if data_asset_type == constants.SPAN_ASSET:
            uri = entry.linked_resource.replace('///projects/', '').replace('instances', 'instance').replace('databases', 'database') + '/table/' + table.replace('`', '')
        if data_asset_type == constants.FILESET_ASSET:
            uri = entry.linked_resource.replace('//datacatalog.googleapis.com/projects/', '').replace('locations', 'location').replace('entryGroups', 'entry_group').replace('entries', 'entry')
        
        target_column = None
        
        if 'column' in tag_dict:
            target_column = tag_dict['column'] 
            
            column_exists = self.column_exists_in_table(target_column, entry.schema.columns)
            
            if column_exists == False:
                msg = f"Error could not find column {target_column} in {resource}"
                log_error_tag_dict(msg, None, job_uuid, tag_dict)
                op_status = constants.ERROR
                return op_status
            
            uri = uri + '/column/' + target_column
  
        try:    
            tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name, column=target_column)

        except Exception as e:
            msg = f"Error during check_if_tag_exists: {entry.name}"
            log_error_tag_dict(msg, e, job_uuid, tag_dict)
            op_status = constants.ERROR
            return op_status

        if tag_exists and overwrite == False:
            msg = "Info: Tag already exists and overwrite flag is False"
            log_info_tag_dict(msg, job_uuid, tag_dict)
            op_status = constants.SUCCESS
            return op_status
        
        tag_fields = []
        template_fields = self.get_template()
        
        for field_name in tag_dict:
           
            if field_name == 'project' or field_name == 'dataset' or field_name == 'table' or \
                field_name == 'column' or field_name == 'entry_group' or field_name == 'fileset' or \
                field_name == 'instance' or field_name == 'database' or field_name == 'schema':
                continue
        
            field_type = None
            field_value = tag_dict[field_name].strip()
            
            for template_field in template_fields:
                if template_field['field_id'] == field_name:
                    field_type = template_field['field_type']
                    break
    
            if field_type == None:
                print('Error while preparing the tag. The field ', field_name, ' was not found in the tag template ', self.template_id)
                op_status = constants.ERROR
                return op_status
    
            # this check allows for tags with empty enums to get created, otherwise the empty enum gets flagged because DC thinks that you are storing an empty string as the enum value
            if field_type == 'enum' and field_value == '':
                continue
                
            field = {'field_id': field_name, 'field_type': field_type, 'field_value': field_value}    
            tag_fields.append(field)
            
        
        op_status = self.create_update_delete_tag(tag_fields, tag_exists, tag_id, job_uuid, config_uuid, 'IMPORT_TAG', tag_history, \
                                                  entry, uri, target_column)
                                
        return op_status
    

    def apply_restore_config(self, job_uuid, config_uuid, tag_extract, tag_history, overwrite=False):
             
        op_status = constants.SUCCESS
        
        for json_obj in tag_extract:
            #print('json_obj: ', json_obj)
        
            entry_group = json_obj['entryGroupId']
            entry_id = json_obj['id']
            location_id = json_obj['locationId']
            project_id = json_obj['projectId']
    
            #print('entry_group: ', entry_group)
            #print('entry_id: ', entry_id)
        
            entry_name = 'projects/' + project_id + '/locations/' + location_id + '/entryGroups/' + entry_group + '/entries/' + entry_id
            print('entry_name: ', entry_name)
    
            try:
                entry = self.client.get_entry(name=entry_name)
                
            except Exception as e:
                msg = "Error couldn't find the entry: {}".format(entry_name)
                log_error(msg, e, job_uuid)
                op_status = constants.ERROR
                return op_status
            
            if 'columns' in json_obj:
                # column-level tag
                json_columns = json_obj['columns']
                #print('json_columns: ', json_columns)
                
                for column_obj in json_columns:
                
                    column_name = column_obj['name'].split(':')[1]
                    column_tags = column_obj['tags']
                    fields = column_tags[0]['fields']
                                     
                    try:    
                        tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name, column=column_name)
        
                    except Exception as e:
                        msg = 'Error during check_if_tag_exists:{}'.format(entry.name)
                        log_error(msg, e, job_uuid)
                        op_status = constants.ERROR
                        return op_status

                    if tag_exists and overwrite == False:
                        msg = 'Info: Tag already exists and overwrite flag is False'
                        info = {'job_uuid': job_uuid, 'msg': msg}
                        print(json.dumps(info))
                        
                        op_status = constants.SUCCESS
                        return op_status
            
                    # create or update column-level tag
                    uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '') + '/column/' + column_name
                    op_status = self.create_update_delete_tag(fields, tag_exists, tag_id, job_uuid, config_uuid, 'RESTORE_TAG', tag_history, \
                                                                     entry, uri, column_name)
            
            if 'tags' in json_obj:
                # table-level tag
                json_tags = json_obj['tags'] 
                fields = json_tags[0]['fields']
                #print('fields: ', fields)  
                
                try:    
                    tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name, column='')
    
                except Exception as e:
                    msg = 'Error during check_if_tag_exists:{}'.format(entry.name)
                    log_error(msg, e, job_uuid)
                    op_status = constants.ERROR
                    return op_status

                if tag_exists and overwrite == False:
                    msg = 'Info: Tag already exists and overwrite flag is False'
                    info = {'job_uuid': job_uuid, 'msg': msg}
                    print(json.dumps(info))
                    
                    op_status = constants.SUCCESS
                    return op_status
                
                # create or update table-level tag
                uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '')
                op_status = self.create_update_delete_tag(fields, tag_exists, tag_id, job_uuid, config_uuid, 'RESTORE_TAG', tag_history, \
                                                          entry, uri)                     
                    
        return op_status
        
    # used by multiple apply methods
    def create_update_delete_tag(self, fields, tag_exists, tag_id, job_uuid, config_uuid, config_type, tag_history, entry, uri, column_name=''):
        
        op_status = constants.SUCCESS
        valid_field = False
        
        num_fields = len(fields)
        num_empty_values = 0
        
        tag = datacatalog.Tag()
        tag.template = self.template_path

        for field in fields:
            
            if 'name' in field:
                valid_field = True
                field_id = field['name']
                field_type = field['type']
                field_value = field['value']
                                
                # rename the keys, which will be used by tag history
                if tag_history:
                    field['field_id'] = field['name']
                    field['field_type'] = field['type']
                    field['field_value'] = field['value']
                    del field['name']
                    del field['type']
                    del field['value']
                
            elif 'field_id' in field:
                valid_field = True
                field_id = field['field_id']
                field_type = field['field_type'].upper()
                field_value = field['field_value']
                
            else:
                # export file contains invalid tags (e.g. a tagged field without a name)
                continue
            
            # keep track of empty values
            if field_value == '':
                num_empty_values += 1
            
            if field_type == 'BOOL':
                bool_field = datacatalog.TagField()

                if isinstance(field_value, str):
                    if field_value == 'TRUE':
                        bool_field.bool_value = True
                    else:
                        bool_field.bool_value = False
                else:
                    bool_field.bool_value = field_value

                tag.fields[field_id] = bool_field

            if field_type == 'STRING':
                string_field = datacatalog.TagField()
                string_field.string_value = str(field_value)
                tag.fields[field_id] = string_field
            if field_type == 'DOUBLE':
                float_field = datacatalog.TagField()
                float_field.double_value = float(field_value)
                tag.fields[field_id] = float_field
            if field_type == 'RICHTEXT':
                richtext_field = datacatalog.TagField()
                richtext_field.richtext_value = field_value.replace(',', '<br>')
                tag.fields[field_id] = richtext_field
                
                # For richtext values, replace '<br>' with ',' when exporting to BQ
                field['field_value'] = field_value.replace('<br>', ', ')
                
            if field_type == 'ENUM':
                enum_field = datacatalog.TagField()
                enum_field.enum_value.display_name = field_value
                tag.fields[field_id] = enum_field
            
            if field_type == 'DATETIME' or field_type == 'TIMESTAMP': 
                
                # field_value may be empty or date value e.g. "2022-05-08" or datetime value e.g. "2022-05-08 15:00:00"
                if field_value == '':
                    timestamp = ''
                
                else:
                
                    if len(field_value) == 10:
                        d = date(int(field_value[0:4]), int(field_value[5:7]), int(field_value[8:10]))
                        dt = datetime.combine(d, dtime(00, 00)) # when no time is supplied, default to 12:00:00 AM UTC  
                
                    else:
                        # raw timestamp format: 2022-05-11 21:18:20
                        d = date(int(field_value[0:4]), int(field_value[5:7]), int(field_value[8:10]))
                        t = dtime(int(field_value[11:13]), int(field_value[14:16]))
                        dt = datetime.combine(d, t)
            
                    utc = pytz.timezone('UTC')
                    timestamp = utc.localize(dt)
                
                    datetime_field = datacatalog.TagField()
                    datetime_field.timestamp_value = timestamp
                    tag.fields[field_id] = datetime_field
                
                field['field_value'] = timestamp  # store this value back in the field, so it can be recorded in tag history
    
        # exported file from DataCatalog can have invalid tags, skip tag creation if that's the case 
        if valid_field == False:
            msg = f"Invalid field {field}"
            log_error(msg, error='', job_uuid=job_uuid)
            op_status = constants.ERROR
            return op_status
        
        if column_name != '':
            tag.column = column_name 
    
        if tag_exists == True:
            tag.name = tag_id
            
            # delete tag if every field in it is empty
            if num_fields == num_empty_values:
                op_status = self.do_create_update_delete_action(job_uuid, 'delete', tag)
            else:
                op_status = self.do_create_update_delete_action(job_uuid, 'update', tag)
        else:
            # create the table only if it has at least one non-empty fields
            if num_fields != num_empty_values:
                op_status = self.do_create_update_delete_action(job_uuid, 'create', tag, entry)
        
        # only write to tag history if the operation was successful
        if tag_history and op_status == constants.SUCCESS:
            bqu = bq.BigQueryUtils(self.credentials, BIGQUERY_REGION)
            template_fields = self.get_template()
            success = bqu.copy_tag(self.tag_creator_account, self.tag_invoker_account, job_uuid, self.template_id, template_fields, uri, column_name, fields)
            
            if success == False:
                msg = 'Error occurred while writing to tag history table'
                log_error(msg, error='', job_uuid=job_uuid)
                op_status = constants.ERROR
       
        return op_status
        
    
    def do_create_update_delete_action(self, job_uuid, action, tag, entry=None):
       
       op_status = constants.SUCCESS
       
       try:
           print('do {}, tag: {}'.format(action, tag))
           
           if action == 'delete':
               response = self.client.delete_tag(name=tag.name)
         
           if action == 'update':
               respect = self.client.update_tag(tag=tag)
            
           if action == 'create':
               response = self.client.create_tag(parent=entry.name, tag=tag)
                                        
       except Exception as e:
           msg = f'Error occurred during tag {action}: {tag}'
           log_error(msg, e, job_uuid)
         
           # if it's a quota issue, sleep and retry the operation 
           if '429' in str(e) or '503' in str(e):
               msg = 'Info: sleep for 2 minutes due to {}'.format(e)
               log_info(msg, job_uuid)
               time.sleep(120)
 
               try:
                   if action == 'delete':
                       response = self.client.delete_tag(name=tag.name)
         
                   if action == 'update':
                       respect = self.client.update_tag(tag=tag)
            
                   if action == 'create':
                       response = self.client.create_tag(parent=entry.name, tag=tag)
             
               except Exception as e:
                   msg = f'Error occurred during tag {action} after sleep: {tag}'
                   log_error(msg, e, job_uuid)
                   op_status = constants.ERROR
                   return op_status
           else:
               op_status = constants.ERROR
         
       return op_status
       
            
    def search_catalog(self, bigquery_project, bigquery_dataset):
        
        linked_resources = {}
        
        scope = datacatalog.SearchCatalogRequest.Scope()
        scope.include_project_ids.append(bigquery_project)
        
        request = datacatalog.SearchCatalogRequest()
        request.scope = scope
    
        query = 'parent:' + bigquery_project + '.' + bigquery_dataset
        print('query string: ' + query)
    
        request.query = query
        request.page_size = 1
    
        for result in self.client.search_catalog(request):
            print('result: ' + str(result))
            
            resp = self.client.list_tags(parent=result.relative_resource_name)
            tags = list(resp.tags)
            tag_count = len(tags)
            
            index = result.linked_resource.rfind('/')
            table_name = result.linked_resource[index+1:]
            linked_resources[table_name] = tag_count
            
        return linked_resources

  
    def parse_query_expression(self, uri, query_expression, column=None):
        
        query_str = None
        
        # analyze query expression
        from_index = query_expression.rfind(" from ", 0)
        where_index = query_expression.rfind(" where ", 0)
        project_index = query_expression.rfind("$project", 0)
        dataset_index = query_expression.rfind("$dataset", 0)
        table_index = query_expression.rfind("$table", 0)
        from_clause_table_index = query_expression.rfind(" from $table", 0)
        from_clause_backticks_table_index = query_expression.rfind(" from `$table`", 0)
        column_index = query_expression.rfind("$column", 0)
        
        #print('table_index: ', table_index)
        #print('column_index: ', column_index)
        
        if project_index != -1:
            project_end = uri.find('/') 
            project = uri[0:project_end]
            #print('project: ' + project)
            #print('project_index: ', project_index)
            
        if dataset_index != -1:
            dataset_start = uri.find('/datasets/') + 10
            dataset_string = uri[dataset_start:]
            dataset_end = dataset_string.find('/') 
            
            if dataset_end == -1:
                dataset = dataset_string[0:]
            else:
                dataset = dataset_string[0:dataset_end]
            print('dataset:', dataset)
            print('dataset_end:', dataset_end)
            print('dataset_index:', dataset_index)
        
        # $table referenced in from clause, use fully qualified table
        if from_clause_table_index > 0 or from_clause_backticks_table_index > 0:
             #print('$table referenced in from clause')
             qualified_table = uri.replace('/project/', '.').replace('/datasets/', '.').replace('/tables/', '.')
             #print('qualified_table:', qualified_table)
             #print('query_expression:', query_expression)
             query_str = query_expression.replace('$table', qualified_table)
             #print('query_str:', query_str)
             
        # $table is referenced somewhere in the expression, replace $table with actual table name
        else:
        
            if table_index != -1:
                #print('$table referenced somewhere, but not in the from clause')
                table_index = uri.rfind('/') + 1
                table_name = uri[table_index:]
                #print('table_name: ' + table_name)
                query_str = query_expression.replace('$table', table_name)
            
            # $project referenced in where clause too
            if project_index > -1:
                
                if query_str == None:
                    query_str = query_expression.replace('$project', project)
                else:
                    query_str = query_str.replace('$project', project)
                
                #print('query_str: ', query_str)
            
            # $dataset referenced in where clause too    
            if dataset_index > -1:

                if query_str == None:
                    query_str = query_expression.replace('$dataset', dataset)
                else:
                    query_str = query_str.replace('$dataset', dataset)
                    
                print('query_str: ', query_str)
            
        # table not in query expression (e.g. select 'string')
        if table_index == -1 and query_str == None:
            query_str = query_expression
            
        if column_index != -1:
            
            if query_str == None:
                query_str = query_expression.replace('$column', column)
            else:
                query_str = query_str.replace('$column', column)
        
        #print('returning query_str:', query_str)            
        return query_str
    
    
    def run_query(self, query_str, field_type, batch_mode, job_uuid):
        
        field_values = []
        error_exists = False
            
        try:
            
            if batch_mode:
                
                batch_config = bigquery.QueryJobConfig(
                    # run at batch priority which won't count toward concurrent rate limit
                    priority=bigquery.QueryPriority.BATCH
                )
                
                query_job = self.bq_client.query_and_wait(query_str, job_config=batch_config)
                job = self.bq_client.get_job(query_job.job_id, location=query_job.location)
                rows = job.result()
            
            else:
                print('query_str:', query_str)
                rows = self.bq_client.query_and_wait(query_str)
            
            # if query expression is well-formed, there should only be a single row returned with a single field_value
            # However, user may mistakenly run a query that returns a list of rows. In that case, grab only the top row.  
            row_count = 0

            for row in rows:
                row_count = row_count + 1
                field_values.append(row[0])
            
                if field_type != 'richtext' and row_count == 1:
                    return field_values, error_exists
        
            # check row_count
            if row_count == 0:
                #error_exists = True
                print('sql query returned nothing:', query_str)
        
        except Exception as e:
            error_exists = True
            msg = 'Error occurred during run_query {}'.format(query_str)
            log_error(msg, e, job_uuid)
            
        #print('field_values: ', field_values)
        
        return field_values, error_exists
        

    def run_combined_query(self, combined_query, column, fields, job_uuid):
        
        error_exists = False
            
        try:
            rows = self.bq_client.query_and_wait(combined_query)
            row_count = 0

            for row in rows:
                for i, field in enumerate(fields):
                    field['field_value'] = row[i]
            
                row_count += 1    
        
            if row_count == 0:
                error_exists = True
                print('sql query returned empty set:', combined_query)
        
        except Exception as e:
            error_exists = True
            msg = 'Error occurred during run_combined_query {}'.format(combined_query)
            log_error(msg, e, job_uuid)
            
        return fields, error_exists


    def populate_tag_fields(self, tag, fields, job_uuid=None):
        
        for field in fields:
            tag, error_exists = self.populate_tag_field(tag, field['field_id'], field['field_type'], field['field_value'], job_uuid)
        
        return tag, error_exists
        
        
    def populate_tag_field(self, tag, field_id, field_type, field_values, job_uuid=None):
        
        error_exists = False
        
        # handle richtext types
        if type(field_values) == list:
            field_value = field_values[0]
        else:
            field_value = field_values
        
        if field_values == None:
            print('Cannot store null value in tag field', field_id)
            return tag, error_exists
        
        try:             
            if field_type == "bool":
                bool_field = datacatalog.TagField()
                bool_field.bool_value = bool(field_value)
                tag.fields[field_id] = bool_field
            if field_type == "string":
                string_field = datacatalog.TagField()
                string_field.string_value = str(field_value)
                tag.fields[field_id] = string_field
            if field_type == "richtext":
                richtext_field = datacatalog.TagField()
                formatted_value = '<br>'.join(str(v) for v in field_values)
                richtext_field.richtext_value = str(formatted_value)
                tag.fields[field_id] = richtext_field
            if field_type == "double":
                float_field = datacatalog.TagField()
                float_field.double_value = float(field_value)
                tag.fields[field_id] = float_field
            if field_type == "enum":
                enum_field = datacatalog.TagField()
                enum_field.enum_value.display_name = field_value
                tag.fields[field_id] = enum_field
            if field_type == "datetime" or field_type == "timestamp":
                # expected format for datetime values in DC: 2020-12-02T16:34:14Z
                # however, field_value can be a date value e.g. "2022-05-08", a datetime value e.g. "2022-05-08 15:00:00"
                # or timestamp value e.g. datetime.datetime(2022, 9, 14, 18, 24, 31, 615000, tzinfo=datetime.timezone.utc)
                #print('field_value:', field_value)
                #print('field_value type:', type(field_value))
                
                # we have a datetime value
                # example: 2024-03-30 18:29:48.621617+00:00 
                if type(field_value) == datetime:
                    timestamp = Timestamp()
                    timestamp.FromDatetime(field_value)
                # we have a date value
                elif type(field_value) == date:
                    dt = datetime.combine(field_value, datetime.min.time())
                    timestamp = pytz.utc.localize(dt)
                # we have a date cast as a string
                elif len(str(field_value)) == 10:
                    utc = pytz.timezone('UTC')
                    d = date(int(field_value[0:4]), int(field_value[5:7]), int(field_value[8:10]))
                    dt = datetime.combine(d, dtime(00, 00)) # when no time is supplied, default to 12:00:00 AM UTC
                    timestamp = utc.localize(dt)
                # we have a timestamp with this format: '2022-12-05 15:05:26'
                elif len(str(field_value)) == 19:
                    year = int(field_value[0:4])
                    month = int(field_value[5:7])
                    day = int(field_value[8:10])
                    hour = int(field_value[11:13])
                    minute = int(field_value[14:16])
                    second = int(field_value[17:19])
                    dt = datetime(year, month, day, hour, minute, second)
                    timestamp = pytz.utc.localize(dt) 
                # we have a timestamp cast as a string
                else:
                    timestamp_value = field_value.isoformat()
                    field_value = timestamp_value[0:19] + timestamp_value[26:32] + "Z"
                    timestamp = Timestamp()
                    timestamp.FromJsonString(field_value[0])
                
                #print('timestamp:', timestamp)
                datetime_field = datacatalog.TagField()
                datetime_field.timestamp_value = timestamp
                tag.fields[field_id] = datetime_field
                
        except Exception as e:
            error_exists = True
            msg = "Error storing values {} into field {}".format(field_values, field_id)
            log_error(msg, e, job_uuid)
        
        return tag, error_exists
    
    
    def copy_tags(self, source_project, source_dataset, source_table, target_project, target_dataset, target_table, include_policy_tags=False):
        
        success = True
        
        # lookup the source entry
        linked_resource = '//bigquery.googleapis.com/projects/{0}/datasets/{1}/tables/{2}'.format(source_project, source_dataset, source_table)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource = linked_resource
        source_entry = self.client.lookup_entry(request)
        
        if source_entry.bigquery_table_spec.table_source_type != types.TableSourceType.BIGQUERY_TABLE:
            success = False
            msg = 'Error {} is not a BQ table'.format(source_table)
            log_info(msg, None)
            print(json.dumps(msg))
            return success
        
        # lookup the target entry
        linked_resource = '//bigquery.googleapis.com/projects/{0}/datasets/{1}/tables/{2}'.format(target_project, target_dataset, target_table)
        
        request = datacatalog.LookupEntryRequest()
        request.linked_resource = linked_resource
        target_entry = self.client.lookup_entry(request)
        
        if target_entry.bigquery_table_spec.table_source_type != types.TableSourceType.BIGQUERY_TABLE:
            success = False
            msg = 'Error {} is not a BQ table'.format(target_table)
            log_info(msg, None)
            print(json.dumps(error))
            return success
        
        # look to see if the source table is tagged
        tag_list = self.client.list_tags(parent=source_entry.name, timeout=120)
    
        for source_tag in tag_list:
            print('source_tag.template:', source_tag.template)
            print('source_tag.column:', source_tag.column)
            
            # get tag template fields
            self.template_id = source_tag.template.split('/')[5]
            self.template_project = source_tag.template.split('/')[1]
            self.template_region = source_tag.template.split('/')[3]
            self.template_path = source_tag.template
            template_fields = self.get_template()
            
            # start a new target tag
            target_tag = datacatalog.Tag()
            target_tag.template = source_tag.template
            
            if source_tag.column:
                target_tag.column = source_tag.column
            
            for template_field in template_fields:
    
                #print('template_field:', template_field)
                
                if template_field['field_id'] in source_tag.fields:
                    field_id = template_field['field_id']
                    tagged_field = source_tag.fields[field_id]
                    
                    print('field_id:', field_id)
                    
                    if tagged_field.bool_value:
                        field_type = 'bool'
                        field_value = tagged_field.bool_value
                    if tagged_field.double_value:
                        field_type = 'double'
                        field_value = tagged_field.double_value
                    if tagged_field.string_value:
                        field_type = 'string'
                        field_value = tagged_field.string_value
                    if tagged_field.enum_value:
                        field_type = 'enum'
                        field_value = tagged_field.enum_value.display_name
                    if tagged_field.timestamp_value:
                        field_type = 'timestamp'
                        field_value = tagged_field.timestamp_value
                    if tagged_field.richtext_value:
                        field_type = 'richtext'
                        field_value = tagged_field.richtext_value
                        
                    target_tag, error_exists = self.populate_tag_field(target_tag, field_id, field_type, [field_value], None)
            
            # create the target tag            
            tag_exists, tag_id = self.check_if_tag_exists(parent=target_entry.name, column=source_tag.column)
		
            if tag_exists == True:
                target_tag.name = tag_id
            
                try:
                    print('tag update request: ', target_tag)
                    response = self.client.update_tag(tag=target_tag)
                except Exception as e:
                    success = False
                    msg = 'Error occurred during tag update: {}'.format(target_tag)
                    log_error(msg, e)
            
            else:
                try:
                    print('tag create request: ', target_tag)
                    response = self.client.create_tag(parent=target_entry.name, tag=target_tag)
                except Exception as e:
                    success = False
                    msg = 'Error occurred during tag create: {}'.format(target_tag)
                    log_error(msg, e)
                        
        # copy policy tags            
        success = self.copy_policy_tags(source_project, source_dataset, source_table, target_project, target_dataset, target_table)    
        
        return success

    
    def copy_policy_tags(self, source_project, source_dataset, source_table, target_project, target_dataset, target_table):
    
        success = True
        source_table_id = source_project + '.' + source_dataset + '.' + source_table
        target_table_id = target_project + '.' + target_dataset + '.' + target_table
    
        try:
            source_schema = self.bq_client.get_table(source_table_id).schema
        except Exception as e:
            success = False
            msg = 'Error occurred while retrieving the schema of {}'.format(source_table_id)
            log_error(msg, e)
            return success 
    
        policy_tag_list = []
    
        for field in source_schema:
            if field.policy_tags != None:
                policy_tag = field.policy_tags.names[0]
                pt_tuple = (field.name, policy_tag)
                policy_tag_list.append(pt_tuple)
	
        if len(policy_tag_list) == 0:
            return success
    
        print('policy_tag_list:', policy_tag_list)
        success = self.apply_policy_tags(target_table_id, policy_tag_list)
    
        return success
    
    # used to update the status of a data product tag as part of the product_registration_pipeline
    # https://github.com/GoogleCloudPlatform/datacatalog-tag-engine/tree/main/examples/product_registration_pipeline    
    def update_tag_subset(self, template_id, template_project, template_region, entry_name, changed_fields):
        
        success = True
        
        tag_list = self.client.list_tags(parent=entry_name, timeout=120)
    
        for tag in tag_list:
            print('tag.template:', tag.template)
            
            # get tag template fields
            tagged_template_id = tag.template.split('/')[5]
            tagged_template_project = tag.template.split('/')[1]
            tagged_template_region = tag.template.split('/')[3]
            
            if tagged_template_id != template_id:
                continue
            
            if tagged_template_project != template_project:
                continue
                
            if tagged_template_region != template_region:
                continue
                
            # start a new target tag to overwrite the existing one
            target_tag = datacatalog.Tag()
            target_tag.template = tag.template
            target_tag.name = tag.name
            
            self.template_path = tag.template
            template_fields = self.get_template()
            
            for template_field in template_fields:
    
                #print('template_field:', template_field)
                field_id = template_field['field_id']
                
                # skip this field if it's not in the tag
                if field_id not in tag.fields:
                    continue
                    
                tagged_field = tag.fields[field_id]
                    
                if tagged_field.bool_value:
                    field_type = 'bool'
                    field_value = str(tagged_field.bool_value)
                if tagged_field.double_value:
                    field_type = 'double'
                    field_value = str(tagged_field.double_value)
                if tagged_field.string_value:
                    field_type = 'string'
                    field_value = tagged_field.string_value
                if tagged_field.enum_value:
                    field_type = 'enum'
                    field_value = str(tagged_field.enum_value.display_name)
                if tagged_field.timestamp_value:
                    field_type = 'timestamp'
                    field_value = str(tagged_field.timestamp_value)
                    print('orig timestamp:', field_value)
                if tagged_field.richtext_value:
                    field_type = 'richtext'
                    field_value = str(tagged_field.richtext_value)
        		
                # overwrite logic
                for changed_field in changed_fields: 
                    if changed_field['field_id'] == field_id:
                        field_value = changed_field['field_value']
                        break
                
                target_tag, error_exists = self.populate_tag_field(target_tag, field_id, field_type, [field_value], None)
                
                if error_exists:
                    msg = 'Error while populating the tag field. Aborting tag update.'
                    error = {'msg': msg}
                    print(json.dumps(error))
                    
                    success = False
                    return success

            # update the tag
            try:
                print('tag update request: ', target_tag)
                response = self.client.update_tag(tag=target_tag)
            except Exception as e:
                success = False
                msg = 'Error occurred during tag update: {}'.format(tag)
                log_error(msg, e)
 
        return success 
                        
        
if __name__ == '__main__':
    
    import google.auth
    from google.auth import impersonated_credentials
    SCOPES = ['openid', 'https://www.googleapis.com/auth/cloud-platform', 'https://www.googleapis.com/auth/userinfo.email']
    
    source_credentials, _ = google.auth.default() 
    target_service_account = config['DEFAULT']['TAG_CREATOR_SA']
     
    credentials = impersonated_credentials.Credentials(source_credentials=source_credentials,
        target_principal=target_service_account,
        target_scopes=SCOPES,
        lifetime=1200)
        
    template_id = 'data_sensitivity'
    template_project = 'tag-engine-run'
    template_region = 'us-central1' 
    
    job_uuid = 'df0ddb3e477511ef95dc42004e494300' 
    config_uuid = '3404d03a477a11ef995442004e494300'
    data_asset_type = 'fileset'
    data_asset_region = 'us-central1'
    tag_dict = {'project': 'tag-engine-run', 'entry_group': 'sakila_eg', 'fileset': 'staff', 'column': 'first_name', 'sensitive_field': 'TRUE', 'sensitive_type': 'Sensitive_Personal_Identifiable_Information'}
    tag_history = True
    overwrite = True
    
    dcu = DataCatalogController(credentials, target_service_account, 'scohen@gcp.solutions', template_id, template_project, template_region)
    dcu.apply_import_config(job_uuid, config_uuid, data_asset_type, data_asset_region, tag_dict, tag_history, overwrite)