in agora/cerebral_api/src/tools/tools.py [0:0]
def search_content(self,
query: str = None,
document_name: str = None,
limit: int = 5,
show_scores: bool = False) -> List[dict]:
"""
Search for content in the index by query or document name.
Args:
query: Optional semantic search query
document_name: Optional specific document name to search for
limit: Maximum number of results to return
show_scores: Whether to include relevance scores in results
Returns:
List[dict]: List of matching documents with their metadata
"""
try:
logger.info(f"Searching content{'by query: ' + query if query else ' by document name: ' + document_name if document_name else ''}")
if not query and not document_name:
logger.error("Must provide either query or document_name")
return []
if document_name:
logger.info("1. document_name")
# Search by document name in metadata
results = self.indexer.collection.get(
where={"source": {"$eq": document_name}}
)
else:
logger.info("1. by query")
# Semantic search by query
results = self.indexer.collection.query(
query_texts=[query],
n_results=10,
where_document={"$contains": "search_string"}
#where_document={"$contains":"search_string"}
#n_results=limit,
#include=['documents', 'metadatas', 'distances']
)
# Process and format results
formatted_results = []
if document_name:
# Format results from get()
if results and 'documents' in results and results['documents']:
for doc, meta in zip(results['documents'], results['metadatas']):
result = {
'content': doc,
'source': os.path.basename(meta.get('source', 'Unknown')),
'metadata': meta
}
formatted_results.append(result)
else:
# Format results from query()
if results and 'documents' in results and results['documents'][0]:
documents = results['documents'][0]
metadatas = results['metadatas'][0]
distances = results['distances'][0] if show_scores else None
for idx, (doc, meta) in enumerate(zip(documents, metadatas)):
result = {
'content': doc,
'source': os.path.basename(meta.get('source', 'Unknown')),
'metadata': meta
}
if show_scores and distances:
result['relevance_score'] = 1 - distances[idx] # Convert distance to similarity score
formatted_results.append(result)
# Log results summary
logger.info(f"Found {len(formatted_results)} matching documents")
return formatted_results
except Exception as e:
logger.error(f"Error searching content: {str(e)}")
if VERBOSE:
import traceback
logger.debug(traceback.format_exc())
return []