UI/dbai_src/dbai.py [9:138]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    GenerativeModel,
    Part,
    Tool,
    # ToolConfig
)
import streamlit as st
from dbai_src.bot_functions import (
    list_tables_func,
    get_table_metadata_func,
    sql_query_func,
    plot_chart_auto_func
)

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

gemini = GenerativeModel(
    "gemini-1.5-pro-001",
    generation_config={"temperature": 0.05},
    safety_settings=safety_settings,
    )


class Response:
    """The base response template class for DBAI output"""
    def __init__(self, text, interim_steps) -> None:
        self.text = text
        self.interim_steps = interim_steps


class DBAI:
    """
    The base class for DBAI agent which is the multi-turn chat
    and can plot graphs
    """
    def __init__(
            self,
            proj_id="proj-kous",
            dataset_id="Albertsons",
            tables_list=['camain_oracle_hcm', 'camain_ps']
            ):

        self.proj_id = proj_id
        self.dataset_id = dataset_id
        self.tables_list = tables_list

        self.sql_query_tool = Tool(
            function_declarations=[
                list_tables_func,
                get_table_metadata_func,
                sql_query_func,
                plot_chart_auto_func,
                # plot_chart_func,
            ],
        )

        self.agent = GenerativeModel("gemini-1.5-pro-001",
                                     generation_config={"temperature": 0.05},
                                     safety_settings=safety_settings,
                                     tools=[self.sql_query_tool],
                                     )

        self.bq_client = bigquery.Client(project=self.proj_id)
        self.system_prompt = """
        You are a fluent person who efficiently communicates with the user
        over different Database queries. Please always call the functions
        at your disposal whenever you need to know something,
        and do not reply unless you feel you have all information to answer
        the question satisfactorily.
        Only use information that you learn from BigQuery,
        do not make up information.
        Always use date or time functions instead of hard-coded values in SQL
        to reflect true current value.
        """
        self.load_metadata()

        vertexai.init(project=self.proj_id)

    def load_metadata(self):
        """
        Load the metadata cache file from the defined path if exists
        else creates
        """
        metdata_cache_path = f"./metadata_cache_{self.dataset_id}.json"
        if not os.path.exists(metdata_cache_path):
            self.metadata = self.create_metadata_cache()
            with open(metdata_cache_path, 'w') as f:
                # pylint: disable=unspecified-encoding
                f.write(json.dumps(self.metadata))
        else:
            with open(metdata_cache_path, 'r') as f:
                # pylint: disable=unspecified-encoding
                self.metadata = json.load(f)

    def create_metadata_cache(self):
        """
        create the metadata cache file for the specified Tables in DB
        for all columns
        """
        gen_description_prompt = """
        Based on the columns information of this table.
        Generate a very brief description for this table.
        TABLE: {table_id}
        columns_info: {columns_info}
        """

        if self.tables_list in [[], [''], '']:
            api_response = self.bq_client.list_tables(self.dataset_id)
            self.tables_list = [table.table_id for table in api_response]

        metadata = {}
        for table_id in self.tables_list:
            columns_info = self.bq_client.get_table(
                    f'{self.dataset_id}.{table_id}'
                ).to_api_repr()['schema']
            # remove unwanted details like 'mode'
            for field in columns_info.get('fields', []):
                field.pop('mode', None)

            metadata[table_id] = {}
            metadata[table_id]["table_name"] = table_id
            metadata[table_id]["columns_info"] = columns_info
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



dbai_src/dbai.py [9:136]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    GenerativeModel,
    Part,
    Tool,
    # ToolConfig
)
import streamlit as st
from bot_functions import (
    list_tables_func,
    get_table_metadata_func,
    sql_query_func,
    plot_chart_auto_func
)

safety_settings = {
    generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
    generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT:
        generative_models.HarmBlockThreshold.BLOCK_ONLY_HIGH,
}

gemini = GenerativeModel(
    "gemini-1.5-pro-001",
    generation_config={"temperature": 0.05},
    safety_settings=safety_settings,
    )


class Response:
    """The base response template class for DBAI output"""
    def __init__(self, text, interim_steps) -> None:
        self.text = text
        self.interim_steps = interim_steps


class DBAI:
    """
    The base class for DBAI agent which is the multi-turn chat
    and can plot graphs.
    """
    def __init__(
            self,
            proj_id="proj-kous",
            dataset_id="Albertsons",
            tables_list=['camain_oracle_hcm', 'camain_ps']
            ):

        self.proj_id = proj_id
        self.dataset_id = dataset_id
        self.tables_list = tables_list

        self.sql_query_tool = Tool(
            function_declarations=[
                list_tables_func,
                get_table_metadata_func,
                sql_query_func,
                plot_chart_auto_func,
                # plot_chart_func,
            ],
        )

        self.agent = GenerativeModel("gemini-1.5-pro-001",
                                     generation_config={"temperature": 0.05},
                                     safety_settings=safety_settings,
                                     tools=[self.sql_query_tool],
                                     )

        self.bq_client = bigquery.Client(project=self.proj_id)
        self.system_prompt = """
        You are a fluent person who efficiently communicates with the user over
 different Database queries. Please always call the functions at your disposal
 whenever you need to know something, and do not reply unless you feel you have
 all information to answer the question satisfactorily.
 Only use information that you learn from BigQuery, do not make up information.
 Always use date or time functions instead of hard-coded values in SQL
 to reflect true current value.
        """
        self.load_metadata()

        vertexai.init(project=self.proj_id)

    def load_metadata(self):
        """
        Load the metadata cache file from the defined path if exists
        else creates.
        """
        metdata_cache_path = f"./metadata_cache_{self.dataset_id}.json"
        if not os.path.exists(metdata_cache_path):
            self.metadata = self.create_metadata_cache()
            with open(metdata_cache_path, 'w') as f:
                # pylint: disable=unspecified-encoding
                f.write(json.dumps(self.metadata))
        else:
            with open(metdata_cache_path, 'r') as f:
                # pylint: disable=unspecified-encoding
                self.metadata = json.load(f)

    def create_metadata_cache(self):
        """
        create the metadata cache file for the specified Tables in DB
        for all columns.
        """
        gen_description_prompt = """
        Based on the columns information of this table.
        Generate a very brief description for this table.
        TABLE: {table_id}
        columns_info: {columns_info}
        """

        if self.tables_list in [[], [''], '']:
            api_response = self.bq_client.list_tables(self.dataset_id)
            self.tables_list = [table.table_id for table in api_response]

        metadata = {}
        for table_id in self.tables_list:
            columns_info = self.bq_client.get_table(
                    f'{self.dataset_id}.{table_id}'
                ).to_api_repr()['schema']
            # remove unwanted details like 'mode'
            for field in columns_info.get('fields', []):
                field.pop('mode', None)

            metadata[table_id] = {}
            metadata[table_id]["table_name"] = table_id
            metadata[table_id]["columns_info"] = columns_info
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



