path # lines of code # active days days since first update days since last update # commits # contributors first updated last updated first contributor last contributor utils/release.py 62 3 1478 726 3 3 2021-06-14 2023-07-06 8515462+albertvillanova@users.noreply.github.com mariosasko777@gmail.com benchmarks/benchmark_indices_mapping.py 41 4 1769 1478 5 2 2020-08-27 2021-06-14 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com benchmarks/benchmark_iterating.py 79 3 1769 1478 4 2 2020-08-27 2021-06-14 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com benchmarks/benchmark_map_filter.py 42 3 1768 1478 4 2 2020-08-28 2021-06-14 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com benchmarks/benchmark_getitem_100B.py 53 2 1547 1478 2 2 2021-04-06 2021-06-14 42851186+lhoestq@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com benchmarks/format.py 34 6 1769 868 6 4 2020-08-27 2023-02-14 thomwolf@users.noreply.github.com mariosasko777@gmail.com benchmarks/utils.py 47 5 1769 1208 6 4 2020-08-27 2022-03-11 thomwolf@users.noreply.github.com mariosasko777@gmail.com benchmarks/benchmark_array_xd.py 108 4 1769 1478 5 2 2020-08-27 2021-06-14 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/iterable_dataset.py 2750 96 1469 6 105 37 2021-06-23 2025-06-25 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/io/abc.py 46 5 1572 797 5 5 2021-03-12 2023-04-26 8515462+albertvillanova@users.noreply.github.com maddie.dawson@databricks.com src/datasets/io/__init__.py 1 1 1572 1572 1 1 2021-03-12 2021-03-12 8515462+albertvillanova@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/io/csv.py 122 19 1572 476 19 8 2021-03-12 2024-03-12 8515462+albertvillanova@users.noreply.github.com mariosasko777@gmail.com src/datasets/io/generator.py 52 8 1019 340 8 6 2022-09-16 2024-07-26 mariosasko777@gmail.com piercus@users.noreply.github.com src/datasets/io/text.py 53 6 1566 476 6 4 2021-03-18 2024-03-12 8515462+albertvillanova@users.noreply.github.com mariosasko777@gmail.com src/datasets/io/parquet.py 103 19 1462 246 19 11 2021-06-30 2024-10-28 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/io/sql.py 101 8 1002 476 8 3 2022-10-03 2024-03-12 frederic.branchaud.charron@gmail.com mariosasko777@gmail.com src/datasets/io/json.py 148 25 1566 225 25 12 2021-03-18 2024-11-18 8515462+albertvillanova@users.noreply.github.com varadhbhatnagar@rediffmail.com src/datasets/io/spark.py 46 3 797 768 3 1 2023-04-26 2023-05-25 maddie.dawson@databricks.com maddie.dawson@databricks.com src/datasets/fingerprint.py 258 31 1755 118 32 15 2020-09-10 2025-03-05 thomwolf@users.noreply.github.com cyyever@outlook.com src/datasets/utils/_dataset_viewer.py 70 4 449 118 4 3 2024-04-08 2025-03-05 sylvain.lesage@huggingface.co cyyever@outlook.com src/datasets/utils/file_utils.py 807 105 1755 22 121 26 2020-09-10 2025-06-09 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/extract.py 253 19 1454 118 21 8 2021-07-08 2025-03-05 8515462+albertvillanova@users.noreply.github.com cyyever@outlook.com src/datasets/utils/info_utils.py 67 9 1755 386 9 5 2020-09-10 2024-06-10 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/utils/__init__.py 11 21 1755 392 22 7 2020-09-10 2024-06-04 thomwolf@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/utils/resources/__init__.py 1 1 1527 1527 1 1 2021-04-26 2021-04-26 theo-m@users.noreply.github.com theo-m@users.noreply.github.com src/datasets/utils/resources/readme_structure.yaml 116 2 1513 1509 2 1 2021-05-10 2021-05-14 chhablani.gunjan@gmail.com chhablani.gunjan@gmail.com src/datasets/utils/stratify.py 46 3 1133 173 3 2 2022-05-25 2025-01-09 48522685+nandwalritik@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/hub.py 9 9 965 315 9 4 2022-11-09 2024-08-20 8515462+albertvillanova@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/utils/typing.py 6 3 1572 118 3 2 2021-03-12 2025-03-05 8515462+albertvillanova@users.noreply.github.com cyyever@outlook.com src/datasets/utils/track.py 45 3 560 118 3 2 2023-12-19 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/utils/deprecation_utils.py 70 7 1588 970 7 5 2021-02-24 2022-11-04 33657802+sbrandeis@users.noreply.github.com mariosasko777@gmail.com src/datasets/utils/py_utils.py 402 75 1755 22 79 19 2020-09-10 2025-06-09 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/doc_utils.py 6 1 1495 1495 1 1 2021-05-28 2021-05-28 lewis.c.tunstall@gmail.com lewis.c.tunstall@gmail.com src/datasets/utils/filelock.py 8 17 1688 581 18 8 2020-11-16 2023-11-28 42851186+lhoestq@users.noreply.github.com mariosasko777@gmail.com src/datasets/utils/_dill.py 334 7 581 15 8 4 2023-11-28 2025-06-16 mariosasko777@gmail.com 45557362+qgallouedec@users.noreply.github.com src/datasets/utils/sharding.py 56 4 965 118 4 4 2022-11-09 2025-03-05 teven.lescao@gmail.com cyyever@outlook.com src/datasets/utils/experimental.py 12 2 748 487 2 2 2023-06-14 2024-03-01 ying.chen@databricks.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/version.py 52 11 1755 935 11 8 2020-09-10 2022-12-09 thomwolf@users.noreply.github.com 59462357+stevhliu@users.noreply.github.com src/datasets/utils/tqdm.py 40 2 587 487 2 2 2023-11-22 2024-03-01 mariosasko777@gmail.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/logging.py 69 20 1755 22 21 7 2020-09-10 2025-06-09 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/utils/patching.py 65 9 1449 588 9 5 2021-07-13 2023-11-21 8515462+albertvillanova@users.noreply.github.com mariosasko777@gmail.com src/datasets/utils/tf_utils.py 390 9 1121 118 9 6 2022-06-06 2025-03-05 rocketknight1@users.noreply.github.com cyyever@outlook.com src/datasets/utils/_filelock.py 30 4 586 118 4 3 2023-11-23 2025-03-05 mariosasko777@gmail.com cyyever@outlook.com src/datasets/utils/metadata.py 180 40 1527 118 44 16 2021-04-26 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/__init__.py 32 128 1755 14 215 13 2020-09-10 2025-06-17 lhoest.q@gmail.com 42851186+lhoestq@users.noreply.github.com src/datasets/info.py 254 42 1755 6 45 16 2020-09-10 2025-06-25 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/search.py 393 41 1755 6 42 25 2020-09-10 2025-06-25 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/splits.py 260 30 1755 118 32 13 2020-09-10 2025-03-05 thomwolf@users.noreply.github.com cyyever@outlook.com src/datasets/exceptions.py 61 7 630 118 7 3 2023-10-10 2025-03-05 8515462+albertvillanova@users.noreply.github.com cyyever@outlook.com src/datasets/dataset_dict.py 1075 134 1755 6 151 42 2020-09-10 2025-06-25 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/commands/__init__.py 10 2 1755 1513 2 2 2020-09-10 2021-05-10 thomwolf@users.noreply.github.com mariosasko777@gmail.com src/datasets/commands/delete_from_hub.py 35 1 427 427 1 1 2024-04-30 2024-04-30 8515462+albertvillanova@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/commands/datasets_cli.py 25 8 1586 22 8 3 2021-02-26 2025-06-09 mariosasko777@gmail.com 42851186+lhoestq@users.noreply.github.com src/datasets/commands/env.py 31 7 1755 614 7 5 2020-09-10 2023-10-26 thomwolf@users.noreply.github.com mariosasko777@gmail.com src/datasets/builder.py 1185 144 1755 6 165 33 2020-09-10 2025-06-25 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/formatting/__init__.py 84 17 1607 64 17 8 2021-02-05 2025-04-28 42851186+lhoestq@users.noreply.github.com 35225576+afuetterer@users.noreply.github.com src/datasets/formatting/np_formatter.py 79 8 995 12 9 5 2022-10-10 2025-06-19 42851186+lhoestq@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/formatting/tf_formatter.py 83 14 1607 12 14 7 2021-02-05 2025-06-19 42851186+lhoestq@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/formatting/torch_formatter.py 82 14 1607 12 14 8 2021-02-05 2025-06-19 42851186+lhoestq@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/formatting/jax_formatter.py 116 13 1471 12 14 8 2021-06-21 2025-06-19 42851186+lhoestq@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/formatting/formatting.py 464 39 1607 22 39 14 2021-02-05 2025-06-09 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/formatting/polars_formatter.py 88 2 480 152 2 2 2024-03-08 2025-01-30 psmyth1994@gmail.com 42851186+lhoestq@users.noreply.github.com src/datasets/hub.py 100 4 427 22 4 2 2024-04-30 2025-06-09 8515462+albertvillanova@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/combine.py 90 23 1469 118 24 12 2021-06-23 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/streaming.py 83 37 1469 22 41 7 2021-06-23 2025-06-09 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/features/audio.py 163 43 1357 12 44 12 2021-10-13 2025-06-19 8515462+albertvillanova@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/features/__init__.py 24 10 1357 6 10 5 2021-10-13 2025-06-25 8515462+albertvillanova@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/features/pdf.py 141 6 105 14 7 4 2025-03-18 2025-06-17 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/features/features.py 1354 91 1357 6 107 32 2021-10-13 2025-06-25 8515462+albertvillanova@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/features/translation.py 52 14 1357 6 14 9 2021-10-13 2025-06-25 8515462+albertvillanova@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/features/video.py 186 8 250 12 11 5 2024-10-24 2025-06-19 42851186+lhoestq@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/features/_torchcodec.py 13 1 12 12 1 1 2025-06-19 2025-06-19 49127578+tytodd@users.noreply.github.com 49127578+tytodd@users.noreply.github.com src/datasets/features/image.py 250 33 1303 12 34 10 2021-12-06 2025-06-19 mario@huggingface.co 49127578+tytodd@users.noreply.github.com src/datasets/inspect.py 149 45 1755 22 51 14 2020-09-10 2025-06-09 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/data_files.py 470 58 1359 50 63 17 2021-10-11 2025-05-12 42851186+lhoestq@users.noreply.github.com matthew@protopia.ai src/datasets/config.py 176 69 1602 6 76 20 2021-02-10 2025-06-25 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/arrow_writer.py 469 72 1755 64 75 26 2020-09-10 2025-04-28 thomwolf@users.noreply.github.com 35225576+afuetterer@users.noreply.github.com src/datasets/keyhash.py 38 5 1513 55 5 5 2021-05-10 2025-05-07 nikhilbartwal1234@gmail.com 73196164+giraffacarp@users.noreply.github.com src/datasets/naming.py 47 11 1755 487 11 8 2020-09-10 2024-03-01 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/parallel/parallel.py 65 2 748 442 2 2 2023-06-14 2024-04-15 ying.chen@databricks.com 42851186+lhoestq@users.noreply.github.com src/datasets/parallel/__init__.py 1 2 748 392 2 2 2023-06-14 2024-06-04 ying.chen@databricks.com 8515462+albertvillanova@users.noreply.github.com src/datasets/arrow_reader.py 309 34 1755 95 34 16 2020-09-10 2025-03-28 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/csv/__init__.py 1 1 1624 1624 1 1 2021-01-19 2021-01-19 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/csv/csv.py 164 27 1624 118 28 10 2021-01-19 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/packaged_modules/arrow/__init__.py 1 1 749 749 1 1 2023-06-13 2023-06-13 mariusz.jachimowicz.83@gmail.com mariusz.jachimowicz.83@gmail.com src/datasets/packaged_modules/arrow/arrow.py 56 6 749 173 6 5 2023-06-13 2025-01-09 mariusz.jachimowicz.83@gmail.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/sql/__init__.py 1 1 1002 1002 1 1 2022-10-03 2022-10-03 frederic.branchaud.charron@gmail.com frederic.branchaud.charron@gmail.com src/datasets/packaged_modules/sql/sql.py 92 5 1002 118 5 5 2022-10-03 2025-03-05 frederic.branchaud.charron@gmail.com cyyever@outlook.com src/datasets/packaged_modules/generator/__init__.py 1 1 1019 1019 1 1 2022-09-16 2022-09-16 mariosasko777@gmail.com mariosasko777@gmail.com src/datasets/packaged_modules/generator/generator.py 23 5 1019 118 5 5 2022-09-16 2025-03-05 mariosasko777@gmail.com cyyever@outlook.com src/datasets/packaged_modules/pandas/pandas.py 47 9 1624 390 9 5 2021-01-19 2024-06-06 42851186+lhoestq@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/packaged_modules/pandas/__init__.py 1 1 1624 1624 1 1 2021-01-19 2021-01-19 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/text/__init__.py 1 1 1624 1624 1 1 2021-01-19 2021-01-19 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/text/text.py 83 19 1624 314 19 5 2021-01-19 2024-08-21 42851186+lhoestq@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/packaged_modules/__init__.py 85 28 1624 105 30 13 2021-01-19 2025-03-18 42851186+lhoestq@users.noreply.github.com yabran.muvdi@gmail.com src/datasets/packaged_modules/webdataset/webdataset.py 263 12 581 118 13 4 2023-11-28 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/packaged_modules/webdataset/__init__.py 1 1 581 581 1 1 2023-11-28 2023-11-28 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/webdataset/_tenbin.py 167 1 581 581 1 1 2023-11-28 2023-11-28 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/spark/__init__.py 1 1 797 797 1 1 2023-04-26 2023-04-26 maddie.dawson@databricks.com maddie.dawson@databricks.com src/datasets/packaged_modules/spark/spark.py 289 9 797 118 10 5 2023-04-26 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/packaged_modules/json/__init__.py 1 1 1624 1624 1 1 2021-01-19 2021-01-19 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/json/json.py 141 31 1624 377 32 7 2021-01-19 2024-06-19 42851186+lhoestq@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/packaged_modules/videofolder/__init__.py 1 1 250 250 1 1 2024-10-24 2024-10-24 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/videofolder/videofolder.py 21 2 250 118 2 2 2024-10-24 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/packaged_modules/audiofolder/__init__.py 1 1 1044 1044 1 1 2022-08-22 2022-08-22 polina@huggingface.co polina@huggingface.co src/datasets/packaged_modules/audiofolder/audiofolder.py 61 8 1044 12 8 5 2022-08-22 2025-06-19 polina@huggingface.co 49127578+tytodd@users.noreply.github.com src/datasets/packaged_modules/pdffolder/pdffolder.py 13 1 105 105 1 1 2025-03-18 2025-03-18 yabran.muvdi@gmail.com yabran.muvdi@gmail.com src/datasets/packaged_modules/pdffolder/__init__.py 1 1 105 105 1 1 2025-03-18 2025-03-18 yabran.muvdi@gmail.com yabran.muvdi@gmail.com src/datasets/packaged_modules/imagefolder/__init__.py 1 1 1218 1218 1 1 2022-03-01 2022-03-01 nxr9266@g.rit.edu nxr9266@g.rit.edu src/datasets/packaged_modules/imagefolder/imagefolder.py 77 13 1218 118 14 6 2022-03-01 2025-03-05 nxr9266@g.rit.edu cyyever@outlook.com src/datasets/packaged_modules/xml/__init__.py 1 1 250 250 1 1 2024-10-24 2024-10-24 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/xml/xml.py 46 1 250 250 1 1 2024-10-24 2024-10-24 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/parquet/__init__.py 1 2 1462 1446 2 2 2021-06-30 2021-07-16 42851186+lhoestq@users.noreply.github.com stevhliu@gmail.com src/datasets/packaged_modules/parquet/parquet.py 86 19 1462 118 19 8 2021-06-30 2025-03-05 42851186+lhoestq@users.noreply.github.com cyyever@outlook.com src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py 347 19 1044 6 20 8 2022-08-22 2025-06-25 polina@huggingface.co 42851186+lhoestq@users.noreply.github.com src/datasets/packaged_modules/folder_based_builder/__init__.py 1 1 1044 1044 1 1 2022-08-22 2022-08-22 polina@huggingface.co polina@huggingface.co src/datasets/table.py 942 63 1558 6 72 21 2021-03-26 2025-06-25 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/distributed.py 9 2 897 249 2 1 2023-01-16 2024-10-25 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/download/download_manager.py 172 23 1133 118 24 7 2022-05-25 2025-03-05 8515462+albertvillanova@users.noreply.github.com cyyever@outlook.com src/datasets/download/__init__.py 9 1 1133 1133 1 1 2022-05-25 2022-05-25 8515462+albertvillanova@users.noreply.github.com 8515462+albertvillanova@users.noreply.github.com src/datasets/download/streaming_download_manager.py 106 36 1133 118 42 9 2022-05-25 2025-03-05 8515462+albertvillanova@users.noreply.github.com cyyever@outlook.com src/datasets/download/download_config.py 33 15 1133 64 17 9 2022-05-25 2025-04-28 8515462+albertvillanova@users.noreply.github.com 35225576+afuetterer@users.noreply.github.com src/datasets/filesystems/__init__.py 26 18 1617 118 18 12 2021-01-26 2025-03-05 32632186+philschmid@users.noreply.github.com cyyever@outlook.com src/datasets/filesystems/compression.py 80 9 1408 414 9 5 2021-08-23 2024-05-13 42851186+lhoestq@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com src/datasets/load.py 952 181 1755 6 217 46 2020-09-10 2025-06-25 thomwolf@users.noreply.github.com 42851186+lhoestq@users.noreply.github.com pyproject.toml 23 9 988 368 9 5 2022-10-17 2024-06-28 mariosasko777@gmail.com 8515462+albertvillanova@users.noreply.github.com setup.py 131 278 1904 12 437 57 2020-04-14 2025-06-19 thomwolf@users.noreply.github.com 49127578+tytodd@users.noreply.github.com