def search_content()

in agora/cerebral_api/src/tools/tools.py [0:0]


    def search_content(self, 
                      query: str = None, 
                      document_name: str = None, 
                      limit: int = 5, 
                      show_scores: bool = False) -> List[dict]:
        """
        Search for content in the index by query or document name.
        
        Args:
            query: Optional semantic search query
            document_name: Optional specific document name to search for
            limit: Maximum number of results to return
            show_scores: Whether to include relevance scores in results
            
        Returns:
            List[dict]: List of matching documents with their metadata
        """
        try:
            logger.info(f"Searching content{'by query: ' + query if query else ' by document name: ' + document_name if document_name else ''}")
            
            if not query and not document_name:
                logger.error("Must provide either query or document_name")
                return []

            if document_name:
                logger.info("1. document_name")
                # Search by document name in metadata
                results = self.indexer.collection.get(
                    where={"source": {"$eq": document_name}}
                )
            else:
                logger.info("1. by query")
                # Semantic search by query
                results = self.indexer.collection.query(
                    query_texts=[query],
                    n_results=10,
                    where_document={"$contains": "search_string"}
                    #where_document={"$contains":"search_string"}
                    #n_results=limit,
                    #include=['documents', 'metadatas', 'distances']
                )

            # Process and format results
            formatted_results = []
            
            if document_name:
                # Format results from get()
                if results and 'documents' in results and results['documents']:
                    for doc, meta in zip(results['documents'], results['metadatas']):
                        result = {
                            'content': doc,
                            'source': os.path.basename(meta.get('source', 'Unknown')),
                            'metadata': meta
                        }
                        formatted_results.append(result)
            else:
                # Format results from query()
                if results and 'documents' in results and results['documents'][0]:
                    documents = results['documents'][0]
                    metadatas = results['metadatas'][0]
                    distances = results['distances'][0] if show_scores else None
                    
                    for idx, (doc, meta) in enumerate(zip(documents, metadatas)):
                        result = {
                            'content': doc,
                            'source': os.path.basename(meta.get('source', 'Unknown')),
                            'metadata': meta
                        }
                        if show_scores and distances:
                            result['relevance_score'] = 1 - distances[idx]  # Convert distance to similarity score
                        formatted_results.append(result)

            # Log results summary
            logger.info(f"Found {len(formatted_results)} matching documents")
            
            return formatted_results
            
        except Exception as e:
            logger.error(f"Error searching content: {str(e)}")
            if VERBOSE:
                import traceback
                logger.debug(traceback.format_exc())
            return []