in src/run_sentiment.py [0:0]
def read_data(dataset_name, roberta, num_examples=None):
if dataset_name == 'sst2':
data = []
with open(SST2_DEV_FILE) as f:
for i, line in enumerate(f):
if i == 0: continue
s, y = line.strip().split('\t')
x = roberta.encode(s.strip())
data.append((x, int(y)))
elif dataset_name == 'mr':
data = []
with open(MR_TEST_FILE, newline='') as f:
reader = csv.reader(f)
for row in reader:
data.append((roberta.encode(row[1]), int(row[0])))
elif dataset_name == 'cr':
data = []
with open(CR_TEST_FILE, newline='') as f:
reader = csv.reader(f)
for row in reader:
data.append((roberta.encode(row[1]), int(row[0])))
else:
raise NotImplementedError
print(f'Read {len(data)} examples')
if num_examples:
random.shuffle(data)
data = data[:num_examples]
print(f'Truncated to {len(data)} examples')
return data