def ensure_tokenized()

in arctic_inference/suffix_decoding/simulator.py [0:0]


def ensure_tokenized(dataset: pd.DataFrame):
    for _, row in dataset.iterrows():
        if not isinstance(row["prompt"], list):
            break
        if not all(isinstance(x, int) for x in row["prompt"]):
            break
        if not isinstance(row["response"], list):
            break
        if not all(isinstance(x, int) for x in row["response"]):
            break
    else:
        return
    raise ValueError(
        "Dataset must be tokenized or a tokenizer must be provided")