rally-custom/custom_tracks/opensearch/dense_vector/track.py [14:62]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def compute_percentile(data: List[Any], percentile):
    size = len(data)
    if size <= 0:
        return None
    sorted_data = sorted(data)
    index = int(round(percentile * size / 100)) - 1
    return sorted_data[max(min(index, size - 1), 0)]


def load_query_vectors(queries_file) -> Dict[int, List[float]]:
    if not (os.path.exists(queries_file) and os.path.isfile(queries_file)):
        raise ValueError(f"Provided queries file '{queries_file}' does not exist or is not a file")
    query_vectors: Dict[int, List[float]]
    with open(queries_file, "r") as f:
        logger.debug(f"Reading query vectors from '{queries_file}'")
        lines = f.readlines()
        query_vectors = {_index: json.loads(vector) for _index, vector in enumerate(lines)}
        logger.debug(f"Finished reading query vectors from '{queries_file}'")
    return query_vectors


async def extract_exact_neighbors(
    query_vector: List[float], index: str, max_size: int, vector_field: str, request_cache: bool, client
) -> List[str]:
    script_query = {
        "_source": False,
        "query": {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "knn_score",
                    "lang": "knn",
                    "params": {
                        "field": vector_field,
                        "query_value": query_vector,
                        "space_type": "cosinesimil"
                    }
                    }
                }
            }
        }
            
    script_result = await client.search(
        body=script_query,
        index=index,
        request_cache=request_cache,
        size=max_size,
    )
    return [hit["_id"] for hit in script_result["hits"]["hits"]]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



rally-custom/custom_tracks/opensearch/openai_vector/track.py [16:63]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def compute_percentile(data: List[Any], percentile):
    size = len(data)
    if size <= 0:
        return None
    sorted_data = sorted(data)
    index = int(round(percentile * size / 100)) - 1
    return sorted_data[max(min(index, size - 1), 0)]


def load_query_vectors(queries_file) -> Dict[int, List[float]]:
    if not (os.path.exists(queries_file) and os.path.isfile(queries_file)):
        raise ValueError(f"Provided queries file '{queries_file}' does not exist or is not a file")
    query_vectors: Dict[int, List[float]]
    with open(queries_file, "r") as f:
        logger.debug(f"Reading query vectors from '{queries_file}'")
        lines = f.readlines()
        query_vectors = {_index: json.loads(vector) for _index, vector in enumerate(lines)}
        logger.debug(f"Finished reading query vectors from '{queries_file}'")
    return query_vectors


async def extract_exact_neighbors(
    query_vector: List[float], index: str, max_size: int, vector_field: str, request_cache: bool, client
) -> List[str]:
    script_query = {
        "_source": False,
        "query": {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "knn_score",
                    "lang": "knn",
                    "params": {
                        "field": vector_field,
                        "query_value": query_vector,
                        "space_type": "cosinesimil"
                    }
                    }
                }
            }
        }
    script_result = await client.search(
        body=script_query,
        index=index,
        request_cache=request_cache,
        size=max_size,
    )
    return [hit["_id"] for hit in script_result["hits"]["hits"]]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



