in src/fmeval/data_loaders/json_data_loader.py [0:0]
def _read_stream(self, f: "pyarrow.NativeFile", path: str) -> pyarrow.Table: # pragma: no cover
"""
Reads the JSON or JSON Lines dataset file given by `f`, parses the JSON/JSON Lines,
then returns a pyarrow.Table representing the dataset.
:param f: The file object to read. Note that pyarrow.NativeFile objects differ
slightly from regular Python files.
:param path: Unused. Required so that this class conforms to FileBasedDatasource.
"""
parser = self.config.parser
if self.config.dataset_mime_type == MIME_TYPE_JSON:
dataset = json.load(f)
pydict = parser.parse_dataset_columns(
dataset=dataset, dataset_mime_type=MIME_TYPE_JSON, dataset_name=self.config.dataset_name
)
yield pyarrow.Table.from_pydict(pydict)
elif self.config.dataset_mime_type == MIME_TYPE_JSONLINES:
json_lines_strings = f.readall().decode().strip().split("\n")
json_lines = [json.loads(line) for line in json_lines_strings]
parsed_json_lines = [
parser.parse_dataset_columns(
dataset=line, dataset_mime_type=MIME_TYPE_JSONLINES, dataset_name=self.config.dataset_name
)
for line in json_lines
]
yield pyarrow.Table.from_pylist(parsed_json_lines)
else: # pragma: no cover
raise EvalAlgorithmInternalError(
f"Got an unexpected dataset MIME type {self.config.dataset_mime_type} "
"that is not JSON or JSON Lines."
)