def return_column_schema_sql()

in dbconnectors/BQConnector.py [0:0]


    def return_column_schema_sql(self, dataset, table_names=None): 
        """
        Returns the SQL query to be run on 'Source DB' to get the column schema 
         
        The SQL query below returns a df containing the cols table_schema, table_name, column_name, data_type, column_description, table_description, primary_key, column_constraints
        for the schema specified above, e.g. 'retail'
        - table_schema: e.g. retail 
        - table_name: name of the tables inside the schema, e.g. products 
        - column_name: name of each col in each table in the schema, e.g. id_product 
        - data_type: data type of each col 
        - column_description: col descriptor, can be empty 
        - table_description: text descriptor, can be empty 
        - primary_key: whether the col is PK; if yes, the field contains the col_name 
        - column_constraints: e.g. "Primary key for this table"
        """

        user_dataset = self.project_id + '.' + dataset

        table_filter_clause = ""

        if table_names:
            
            # table_names = [name.strip() for name in table_names[1:-1].split(",")]  # Handle the string as a list
            formatted_table_names = [f"'{name}'" for name in table_names]
            table_filter_clause = f"""AND C.TABLE_NAME IN ({', '.join(formatted_table_names)})"""
            
        column_schema_sql = f"""
        SELECT
            C.TABLE_CATALOG as project_id, C.TABLE_SCHEMA as table_schema, C.TABLE_NAME as table_name, C.COLUMN_NAME as column_name,
            C.DATA_TYPE as data_type, C.DESCRIPTION as column_description, CASE WHEN T.CONSTRAINT_TYPE="PRIMARY KEY" THEN "This Column is a Primary Key for this table" WHEN 
            T.CONSTRAINT_TYPE = "FOREIGN_KEY" THEN "This column is Foreign Key" ELSE NULL END as column_constraints
        FROM
            `{user_dataset}.INFORMATION_SCHEMA.COLUMN_FIELD_PATHS` C 
        LEFT JOIN 
            `{user_dataset}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` T 
            ON C.TABLE_CATALOG = T.TABLE_CATALOG AND
            C.TABLE_SCHEMA = T.TABLE_SCHEMA AND 
            C.TABLE_NAME = T.TABLE_NAME AND  
            T.ENFORCED ='YES'
        LEFT JOIN 
            `{user_dataset}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` K
            ON K.CONSTRAINT_NAME=T.CONSTRAINT_NAME AND C.COLUMN_NAME = K.COLUMN_NAME 
        WHERE
            1=1
            {table_filter_clause} 
        ORDER BY
            project_id, table_schema, table_name, column_name;
    """

        return column_schema_sql