src/databao_context_engine/plugins/databases/database_chunker.py (52 lines of code) (raw):

from dataclasses import dataclass from databao_context_engine.pluginlib.build_plugin import EmbeddableChunk from databao_context_engine.plugins.databases.databases_types import ( DatabaseColumn, DatabaseIntrospectionResult, DatabaseTable, ) @dataclass class DatabaseTableChunkContent: catalog_name: str schema_name: str table: DatabaseTable @dataclass class DatabaseColumnChunkContent: catalog_name: str schema_name: str table_name: str column: DatabaseColumn def build_database_chunks(result: DatabaseIntrospectionResult) -> list[EmbeddableChunk]: chunks = [] for catalog in result.catalogs: for schema in catalog.schemas: for table in schema.tables: chunks.append(_create_table_chunk(catalog.name, schema.name, table)) for column in table.columns: chunks.append(_create_column_chunk(catalog.name, schema.name, table.name, column)) return chunks def _create_table_chunk(catalog_name: str, schema_name: str, table: DatabaseTable) -> EmbeddableChunk: return EmbeddableChunk( embeddable_text=_build_table_chunk_text(table), content=DatabaseTableChunkContent( catalog_name=catalog_name, schema_name=schema_name, table=table, ), ) def _create_column_chunk( catalog_name: str, schema_name: str, table_name: str, column: DatabaseColumn ) -> EmbeddableChunk: return EmbeddableChunk( embeddable_text=_build_column_chunk_text(table_name, column), content=DatabaseColumnChunkContent( catalog_name=catalog_name, schema_name=schema_name, table_name=table_name, column=column, ), ) def _build_table_chunk_text(database_table: DatabaseTable) -> str: return f"Table {database_table.name} with columns {','.join([column.name for column in database_table.columns])}" def _build_column_chunk_text(table_name: str, database_object: DatabaseColumn) -> str: return f"Column {database_object.name} in table {table_name}"