def create_vector_store()

in env_setup.py [0:0]


def create_vector_store():
    """
    Initializes the environment and sets up the vector store for Open Data QnA.

    This function performs the following steps:
        
    1. Loads configurations from the "config.ini" file.
    2. Determines the data source (BigQuery or CloudSQL PostgreSQL) and sets the dataset region accordingly.
    3. If the vector store is "cloudsql-pgvector" and the data source is not CloudSQL PostgreSQL, it creates a new PostgreSQL dataset for the vector store.
    4. If logging is enabled or the vector store is "bigquery-vector", it creates a BigQuery dataset for the vector store and logging table.
    5. It creates a Vertex AI connection for the specified model and embeds the table schemas and columns into the vector database.
    6. If embeddings are stored in BigQuery, creates a table column_details_embeddings in the BigQuery Dataset.
    7. It generates the embeddings for the table schemas and column descriptions, and then inserts those embeddings into the BigQuery table.
   

    Configuration:
        - Requires the following environment variables to be set in "config.ini":
            - `DATA_SOURCE`: The data source (e.g., "bigquery" or "cloudsql-pg").
            - `VECTOR_STORE`: The type of vector store (e.g., "bigquery-vector" or "cloudsql-pgvector").
            - `BQ_REGION`: The BigQuery region.
            - `PROJECT_ID`: The Google Cloud project ID.
            - `BQ_OPENDATAQNA_DATASET_NAME`: The name of the BigQuery dataset for Open Data QnA.
            - `LOGGING`: Whether logging is enabled.

        - If `VECTOR_STORE` is "cloudsql-pgvector" and `DATA_SOURCE` is not "cloudsql-pg":
            - Requires additional environment variables for PostgreSQL instance setup.

    Returns:
        None

    Raises:
        RuntimeError: If there are errors during the setup process (e.g., dataset creation failure).
    """


    print("Initializing environment setup.")
    print("Loading configurations from config.ini file.")



    print("Vector Store source set to: ", VECTOR_STORE)

    # Create PostgreSQL Instance is data source is different from PostgreSQL Instance
    if VECTOR_STORE == 'cloudsql-pgvector' :
        print("Generating pg dataset for vector store.")
        # Parameters for PostgreSQL Instance
        pg_region = DATASET_REGION
        pg_instance = "pg15-opendataqna"
        pg_database = "opendataqna-db"
        pg_user = "pguser"
        pg_password = "pg123"
        pg_schema = 'pg-vector-store' 

        setup_postgresql(pg_instance, pg_region, pg_database, pg_user, pg_password)


    # Create a new data set on Bigquery to use for the logs table
    if LOGGING or VECTOR_STORE == 'bigquery-vector':
        if LOGGING: 
            print("Logging is enabled")

        if VECTOR_STORE == 'bigquery-vector':
            print("Vector store set to 'bigquery-vector'")

        print(f"Generating Big Query dataset {BQ_OPENDATAQNA_DATASET_NAME}")
        client=bigquery.Client(project=PROJECT_ID)
        dataset_ref = f"{PROJECT_ID}.{BQ_OPENDATAQNA_DATASET_NAME}"


        # Create the dataset if it does not exist already
        try:
            client.get_dataset(dataset_ref)
            print("Destination Dataset exists")
        except google.api_core.exceptions.NotFound:
            print("Cannot find the dataset hence creating.......")
            dataset=bigquery.Dataset(dataset_ref)
            dataset.location=DATASET_REGION
            client.create_dataset(dataset)
            print(str(dataset_ref)+" is created")