in experiments/google/cloud/ml/applied/categories/category.py [0:0]
def _rank(desc: str, candidates: list[list[str]]) -> list[list[str]]:
"""See rank() for docstring."""
logging.info(f"Candidates:\n{candidates}")
if not candidates:
return []
query = f"""
Given the following product description:
{desc}
Rank the following categories from most relevant to least:
{(chr(10) + ' ').join(['->'.join(cat) for cat in candidates])}
Do not include any commentary in the result.
"""
# chr(10) == \n. workaround since backslash not allowed in f-string in python < 3.12
llm_parameters = {
"max_output_tokens": 256,
"temperature": 0.0,
}
response = llm.predict(query, **llm_parameters)
res = response.text.splitlines()
if not res:
raise ValueError(
"ERROR: No LLM response returned. This seems to be an intermittent bug"
)
logging.info(f"Response:\n{res}")
formatted_res = [
re.sub(r"^\s*(\d+\.|\*|-)\s+", "", line.strip()).split("->") for line in res
]
formatted_res = [
res for res in formatted_res if len(res) == len(candidates[0])
] # remove answers that don't match expected length
unique_res = list(dict.fromkeys([tuple(item) for item in formatted_res]))
logging.info(f"Formatted Response:\n {unique_res}")
if not unique_res:
raise ValueError("ERROR: No responses returned in expected format")
return unique_res