in leaderboard/linear.py [0:0]
def generate_examples(self):
label_dist = {1: 0, -1: 0}
for page in track(self._dev_squad.data):
for paragraph in page.paragraphs:
for question in paragraph.qas:
if self._irt is None:
example_stats = None
else:
example_stats = self._irt.example_stats[question.id]
if self._topic_model is None:
vw_topic_feature = None
else:
vw_topic_feature = self._topic_model.vw_features(question.id)
if len(question.answers) == 0:
answer_position = 0
answer_length = 0
else:
answer_position = min(a.answer_start for a in question.answers)
answer_length = min(len(a.text) for a in question.answers)
if question.id in self._hardness:
hardness = self._hardness[question.id]
else:
# This happens for impossible questions
hardness = "NA"
for model_id, preds in self._predictions.scored_predictions.items():
if self._irt is None:
model_stats = None
else:
model_stats = self._irt.model_stats[model_id]
em_scores = preds["exact_match"]
score = int(em_scores[question.id])
c_char_length = len(paragraph.context)
c_word_length = len(paragraph.context.split())
q_char_length = len(question.question)
q_word_length = len(question.question.split())
if score == 1:
label = 1
elif score == 0:
label = -1
else:
raise ValueError("Invalid label")
label_dist[label] += 1
text = vw_escape(question.question)
features = []
features.append(f"{label}")
if Feature.GUIDS in self._feature_set:
features.append(f"|ids {question.id} {model_id}")
if Feature.M_ID in self._feature_set:
features.append(f"|m_id {model_id}")
if Feature.EX_ID in self._feature_set:
features.append(f"|ex_id {question.id}")
if Feature.QWORDS in self._feature_set:
features.append(f"|question {text}")
if Feature.STATS in self._feature_set:
q_word_length = len(question.question.split())
if question.is_impossible:
vw_impossible = "impossible"
else:
vw_impossible = "possible"
stats_features = [
"|stats",
f"q_word_length:{q_word_length}",
f"q_char_length:{q_char_length}",
f"answer_position:{answer_position}",
f"answer_char_length:{answer_length}",
f"c_char_length:{c_char_length}",
f"c_word_length:{c_word_length}",
vw_impossible,
hardness,
]
features.append(" ".join(stats_features))
if Feature.CWORDS in self._feature_set:
context_text = vw_escape(paragraph.context)
features.append(f"|context {context_text}")
if Feature.TITLE in self._feature_set:
article_text = vw_escape(page.title)
features.append(f"|title {article_text}")
if Feature.IRT in self._feature_set:
features.append(
create_vw_irt_features(
diff=example_stats.diff,
disc=example_stats.disc,
ability=model_stats.skill,
lambda_=example_stats.lambda_,
)
)
if Feature.BASELINE in self._feature_set:
features.append("|baseline a")
if Feature.TOPICS in self._feature_set:
if vw_topic_feature is None:
raise ValueError("Invalid topic feature")
else:
features.append(vw_topic_feature)
example = " ".join(features)
if (model_id, question.id) in self._train_items:
yield "train", example
elif (model_id, question.id) in self._test_items:
yield "test", example
else:
raise ValueError(
f"Item not in train or test: {(model_id, question.id)}"
)
console.log("Label Distribution")
total = sum(label_dist.values())
console.log(label_dist)
console.log({k: v / total for k, v in label_dist.items()})
console.log("dataset summary")