def read_file()

in misc/reference_datasets/monolingual/fr/download_croissant.py [0:0]


    def read_file(self, filepath: str):
        import pyarrow as pa

        with self.data_folder.open(filepath, "rb") as f:
            reader = pa.ipc.open_stream(f)
            li = 0
            columns = [self.text_key, self.id_key] if not self.read_metadata else None
            documents = []
            with self.track_time("table"):
                df = reader.read_pandas(categories=columns)
                for _, row in df.iterrows():
                    document = self.get_document_from_dict(row.to_dict(), filepath, li)
                    if not document:
                        continue
                    documents.append(document)
                    li += 1
            yield from documents