in dpr/data/biencoder_data.py [0:0]
def __getitem__(self, index) -> BiEncoderSample:
json_sample = self.data[index]
r = BiEncoderSample()
r.query = self._process_query(json_sample["question"])
positive_ctxs = json_sample["positive_ctxs"]
negative_ctxs = json_sample["negative_ctxs"] if "negative_ctxs" in json_sample else []
hard_negative_ctxs = json_sample["hard_negative_ctxs"] if "hard_negative_ctxs" in json_sample else []
for ctx in positive_ctxs + negative_ctxs + hard_negative_ctxs:
if "title" not in ctx:
ctx["title"] = None
def create_passage(ctx: dict):
return BiEncoderPassage(
normalize_passage(ctx["text"]) if self.normalize else ctx["text"],
ctx["title"],
)
r.positive_passages = [create_passage(ctx) for ctx in positive_ctxs]
r.negative_passages = [create_passage(ctx) for ctx in negative_ctxs]
r.hard_negative_passages = [create_passage(ctx) for ctx in hard_negative_ctxs]
return r