in grok/data.py [0:0]
def make_data(cls, operator, operands=None, shuffle=True, seed=0) -> List[str]:
operator, noise_level = cls._get_operator_and_noise_level(operator)
assert operator in VALID_OPERATORS
if operator not in ["sort", "reverse", "copy"]:
data = cls._make_binary_operation_data(operator)
else:
data = cls._make_unary_operation_data(operator, operands)
rng = np.random.RandomState(seed=seed)
if shuffle:
rng.shuffle(data)
if noise_level > 0:
random_answer_eqns = rng.choice(data, size=noise_level)
random_answers = [
random_eq.split(" = ")[1] for random_eq in random_answer_eqns
]
for i in range(noise_level):
data[i] = data[i].split(" = ")[0] + " = " + random_answers[i]
data = [EOS_TOKEN + " " + eq + " " + EOS_TOKEN for eq in data]
return data