def generate_missing_descriptions()

in agents/DescriptionAgent.py [0:0]


    def generate_missing_descriptions(self,source,table_desc_df, column_name_df):
        llm_generated=0
        print("\n\n")
        for index, row in table_desc_df.iterrows():
            if row['table_description'] is None or row['table_description']=='NA':
                q=f"table_name == '{row['table_name']}' and table_schema == '{row['table_schema']}'"
                if source=='bigquery':
                    context_prompt = f"""
                        Generate short and crisp description for the table {row['project_id']}.{row['table_schema']}.{row['table_name']}
                        Remember that this desciprtion should help LLMs to help build better SQL for any quries related to this table.
                        Parameters:
                        - column metadata: {column_name_df.query(q).to_markdown(index = False)}
                        - table metadata: {table_desc_df.query(q).to_markdown(index = False)}
                        
                        DO NOT generate description that is more than two lines
                    """
                else:
                     context_prompt = f"""
                        Generate short and crisp description for the table {row['table_schema']}.{row['table_name']}
                        Remember that this desciprtions should help LLMs to help build better SQL for any quries related to this table.
                        Parameters:
                        - column metadata: {column_name_df.query(q).to_markdown(index = False)}
                        - table metadata: {table_desc_df.query(q).to_markdown(index = False)}
                        DO NOT generate description that is more than two lines
                    """

                table_desc_df.at[index,'table_description']=self.generate_llm_response(context_prompt)
                print(f"Generated table description for {row['table_schema']}.{row['table_name']}")
                llm_generated=llm_generated+1
        print("LLM generated "+ str(llm_generated) + " Table Descriptions")
        llm_generated = 0
        print("\n\n")
        for index, row in column_name_df.iterrows():
            # print(row['column_description'])
            if row['column_description'] is None or row['column_description']=='':
                q=f"table_name == '{row['table_name']}' and table_schema == '{row['table_schema']}'"
                if source=='bigquery':
                    context_prompt = f"""
                    Generate short and crisp description for the column {row['project_id']}.{row['table_schema']}.{row['table_name']}.{row['column_name']}
                    Remember that this description should help LLMs to help generate better SQL for any queries related to these columns.

                    Consider the below information while generating the description
                        Name of the column : {row['column_name']}
                        Data type of the column is : {row['data_type']}
                        Details of the table of this column are below:
                        {table_desc_df.query(q).to_markdown(index=False)}
                        Column Contrainst of this column are : {row['column_constraints']}

                    DO NOT generate description that is more than two lines
                """

                else:
                    context_prompt = f"""
                    Generate short and crisp description for the column {row['table_schema']}.{row['table_name']}.{row['column_name']}
                    Remember that this description should help LLMs to help generate better SQL for any queries related to these columns.

                    Consider the below information while generating the description

                        Name of the column : {row['column_name']}
                        Data type of the column is : {row['data_type']}
                        Details of the table of this column are below:
                        {table_desc_df.query(q).to_markdown(index=False)}
                        Column Contrainst of this column are : {row['column_constraints']}

                    DO NOT generate description that is more than two lines
                """
                column_name_df.at[index,'column_description']=self.generate_llm_response(prompt=context_prompt)
                print(f"Generated column description for {row['table_schema']}.{row['table_name']}.{row['column_name']}")
                llm_generated=llm_generated+1
                
        print("LLM generated "+ str(llm_generated) + " Column Descriptions")
        return table_desc_df,column_name_df