def _rank()

in experiments/google/cloud/ml/applied/categories/category.py [0:0]


def _rank(desc: str, candidates: list[list[str]]) -> list[list[str]]:
    """See rank() for docstring."""
    logging.info(f"Candidates:\n{candidates}")
    if not candidates:
        return []

    query = f"""
  Given the following product description:
  {desc}

  Rank the following categories from most relevant to least:
  {(chr(10) + '  ').join(['->'.join(cat) for cat in candidates])}

  Do not include any commentary in the result.
  """
    # chr(10) == \n. workaround since backslash not allowed in f-string in python < 3.12

    llm_parameters = {
        "max_output_tokens": 256,
        "temperature": 0.0,
    }
    response = llm.predict(query, **llm_parameters)
    res = response.text.splitlines()
    if not res:
        raise ValueError(
            "ERROR: No LLM response returned. This seems to be an intermittent bug"
        )

    logging.info(f"Response:\n{res}")
    formatted_res = [
        re.sub(r"^\s*(\d+\.|\*|-)\s+", "", line.strip()).split("->") for line in res
    ]
    formatted_res = [
        res for res in formatted_res if len(res) == len(candidates[0])
    ]  # remove answers that don't match expected length

    unique_res = list(dict.fromkeys([tuple(item) for item in formatted_res]))
    logging.info(f"Formatted Response:\n {unique_res}")
    if not unique_res:
        raise ValueError("ERROR: No responses returned in expected format")
    return unique_res