in optimum/utils/preprocessing/text_classification.py [0:0]
def try_to_guess_data_keys(self, column_names: List[str]) -> Optional[Dict[str, str]]:
primary_key_name = None
primary_key_name_candidates = ["sentence", "text", "premise"]
for name in column_names:
if any(candidate in name for candidate in primary_key_name_candidates):
primary_key_name = name
break
secondary_key_name = None
secondary_key_name_candidates = ["hypothesis"]
for name in column_names:
if any(candidate in name for candidate in secondary_key_name_candidates):
secondary_key_name = name
break
if primary_key_name is None:
return None
elif secondary_key_name is None:
logger.info(
"Could not infer the secondary key in the dataset, if it does contain one, please provide it manually."
)
return {"primary": primary_key_name}
else:
return {"primary": primary_key_name, "secondary": secondary_key_name}