def search_knn()

in supporting-blog-content/elasticsearch_llm_cache/elasticRAG_with_cache.py [0:0]


def search_knn(query_text, es):
    query = {
        "bool": {
            "must": [{"match": {"body_content": {"query": query_text}}}],
            "filter": [{"term": {"url_path_dir3": "elasticsearch"}}],
        }
    }

    knn = [
        {
            "field": "chunk-vector",
            "k": 10,
            "num_candidates": 10,
            "filter": {
                "bool": {
                    "filter": [
                        {"range": {"chunklength": {"gte": 0}}},
                        {"term": {"url_path_dir3": "elasticsearch"}},
                    ]
                }
            },
            "query_vector_builder": {
                "text_embedding": {
                    "model_id": "sentence-transformers__msmarco-minilm-l-12-v3",
                    "model_text": query_text,
                }
            },
        }
    ]

    rank = {"rrf": {}}

    fields = ["title", "url", "position", "url_path_dir3", "body_content"]

    resp = es.search(
        index=index,
        query=query,
        knn=knn,
        rank=rank,
        fields=fields,
        size=10,
        source=False,
    )

    body = resp["hits"]["hits"][0]["fields"]["body_content"][0]
    url = resp["hits"]["hits"][0]["fields"]["url"][0]

    return body, url