def _rank()

in experiments/legacy/backend/category.py [0:0]


def _rank(desc: str, candidates: list[list[str]]) -> list[list[str]]:
  """See rank() for docstring."""
  logging.info(f'Candidates:\n{candidates}')
  if not candidates:
    return []

  query = f"""
  Given the following product description:
  {desc}

  Rank the following categories from most relevant to least:
  {(chr(10)+'  ').join(['->'.join(cat) for cat in candidates])}

  Do not include any commentary in the result.
  """
  # chr(10) == \n. workaround since backslash not allowed in f-string in python < 3.12

  llm_parameters = {
    "max_output_tokens": 256,
    "temperature": 0.0,
  }
  response = llm.predict(
      query,
      **llm_parameters
  )
  res = response.text.splitlines()
  if not res:
    raise ValueError('ERROR: No LLM response returned. This seems to be an intermittent bug')
  
  logging.info(f'Response:\n{res}')
  formatted_res = [re.sub(r"^\s*(\d+\.|\*|-)\s+", "", line.strip()).split('->') for line in res]
  formatted_res = [res for res in formatted_res if len(res) == len(candidates[0])] #remove answers that don't match expected length
  
  unique_res = list(dict.fromkeys([tuple(l) for l in formatted_res]))
  logging.info(f'Formatted Response:\n {unique_res}')
  if not unique_res:
    raise ValueError('ERROR: No responses returned in expected format')
  return unique_res