huggingface / dataset-viewer
File Size

The distribution of size of files (measured in lines of code).

Intro
Learn more...
File Size Overall
4% | 20% | 26% | 17% | 31%
Legend:
1001+
501-1000
201-500
101-200
1-100


explore: grouped by folders | grouped by size | sunburst | 3D view
File Size per Extension
1001+
501-1000
201-500
101-200
1-100
py4% | 22% | 27% | 19% | 25%
yaml0% | 21% | 31% | 0% | 46%
tpl0% | 0% | 0% | 17% | 82%
toml0% | 0% | 0% | 14% | 85%
html0% | 0% | 0% | 0% | 100%
File Size per Logical Decomposition
primary
1001+
501-1000
201-500
101-200
1-100
services10% | 12% | 23% | 21% | 31%
libs0% | 33% | 35% | 17% | 12%
front0% | 97% | 0% | 0% | 2%
chart0% | 14% | 21% | 5% | 57%
jobs0% | 0% | 16% | 19% | 64%
Longest Files (Top 50)
File# lines# units
parquet_and_info.py
in services/worker/src/worker/job_runners/config
1039 46
orchestrator.py
in libs/libcommon/src/libcommon
926 48
statistics_utils.py
in libs/libcommon/src/libcommon
677 43
app.py
in front/admin_ui
672 10
compatible_libraries.py
in services/worker/src/worker/job_runners/dataset
646 13
simple_cache.py
in libs/libcommon/src/libcommon
620 38
presidio_scan.py
in services/worker/src/worker/job_runners/split
579 11
jobs.py
in libs/libcommon/src/libcommon/queue
573 31
prod.yaml
in chart/env
515 -
parquet_utils.py
in libs/libcommon/src/libcommon
482 17
features.py
in libs/libcommon/src/libcommon/viewer_utils
476 13
processing_graph.py
in libs/libcommon/src/libcommon
469 21
values.yaml
in chart
454 -
collector.py
in jobs/mongodb_migration/src/mongodb_migration
414 1
exceptions.py
in libs/libcommon/src/libcommon
357 60
state.py
in libs/libcommon/src/libcommon
301 12
config.py
in services/worker/src/worker
297 13
staging.yaml
in chart/env
290 -
modalities.py
in services/worker/src/worker/job_runners/dataset
264 7
duckdb.py
in libs/libapi/src/libapi
254 7
descriptive_statistics.py
in services/worker/src/worker/job_runners/split
251 7
dtos.py
in services/worker/src/worker
248 1
utils.py
in services/worker/src/worker
233 18
operations.py
in libs/libcommon/src/libcommon
232 11
first_rows.py
in services/worker/src/worker/job_runners/split
225 7
search.py
in services/search/src/search/routes
225 2
job_runner_factory.py
in services/worker/src/worker
215 3
opt_in_out_urls_scan_from_streaming.py
in services/worker/src/worker/job_runners/split
208 4
job_manager.py
in services/worker/src/worker
206 14
utils.py
in libs/libcommon/src/libcommon
205 19
asset.py
in libs/libcommon/src/libcommon/viewer_utils
202 6
croissant_crumbs.py
in services/worker/src/worker/job_runners/dataset
198 6
filter.py
in services/search/src/search/routes
195 3
executor.py
in services/worker/src/worker
191 11
endpoint.py
in services/api/src/api/routes
187 4
tpl
_helpers.tpl
in chart/templates/_common
185 -
app.py
in services/admin/src/admin
184 2
webhook.py
in services/webhook/src/webhook/routes
182 3
utils.py
in libs/libapi/src/libapi
181 10
config.py
in libs/libcommon/src/libcommon
176 11
jwt_token.py
in libs/libapi/src/libapi
166 8
deletion_migrations.py
in jobs/mongodb_migration/src/mongodb_migration
155 33
watcher.py
in services/sse-api/src/sse_api
140 10
rows.py
in services/rows/src/rows/routes
139 1
croissant_utils.py
in libs/libcommon/src/libcommon
138 5
app.py
in services/search/src/search
133 3
rows.py
in libs/libcommon/src/libcommon/viewer_utils
129 3
lock.py
in libs/libcommon/src/libcommon/queue
125 9
parquet_metadata.py
in services/worker/src/worker/job_runners/config
123 5
url_preparator.py
in libs/libcommon/src/libcommon
122 10
Files With Most Units (Top 50)
File# lines# units
exceptions.py
in libs/libcommon/src/libcommon
357 60
orchestrator.py
in libs/libcommon/src/libcommon
926 48
parquet_and_info.py
in services/worker/src/worker/job_runners/config
1039 46
statistics_utils.py
in libs/libcommon/src/libcommon
677 43
simple_cache.py
in libs/libcommon/src/libcommon
620 38
deletion_migrations.py
in jobs/mongodb_migration/src/mongodb_migration
155 33
jobs.py
in libs/libcommon/src/libcommon/queue
573 31
exceptions.py
in libs/libapi/src/libapi
106 21
processing_graph.py
in libs/libcommon/src/libcommon
469 21
utils.py
in libs/libcommon/src/libcommon
205 19
utils.py
in services/worker/src/worker
233 18
parquet_utils.py
in libs/libcommon/src/libcommon
482 17
job_manager.py
in services/worker/src/worker
206 14
config.py
in services/worker/src/worker
297 13
compatible_libraries.py
in services/worker/src/worker/job_runners/dataset
646 13
features.py
in libs/libcommon/src/libcommon/viewer_utils
476 13
state.py
in libs/libcommon/src/libcommon
301 12
executor.py
in services/worker/src/worker
191 11
presidio_scan.py
in services/worker/src/worker/job_runners/split
579 11
config.py
in libs/libcommon/src/libcommon
176 11
prometheus.py
in libs/libcommon/src/libcommon
107 11
operations.py
in libs/libcommon/src/libcommon
232 11
resources.py
in libs/libcommon/src/libcommon
67 11
storage_client.py
in libs/libcommon/src/libcommon
102 11
app.py
in front/admin_ui
672 10
watcher.py
in services/sse-api/src/sse_api
140 10
utils.py
in libs/libapi/src/libapi
181 10
url_preparator.py
in libs/libcommon/src/libcommon
122 10
lock.py
in libs/libcommon/src/libcommon/queue
125 9
metrics.py
in libs/libcommon/src/libcommon/queue
87 9
renaming_migrations.py
in jobs/mongodb_migration/src/mongodb_migration
93 8
migration.py
in jobs/mongodb_migration/src/mongodb_migration
47 8
plan.py
in jobs/mongodb_migration/src/mongodb_migration
77 8
backfill.py
in jobs/cache_maintenance/src/cache_maintenance
115 8
loop.py
in services/worker/src/worker
98 8
jwt_token.py
in libs/libapi/src/libapi
166 8
storage.py
in libs/libcommon/src/libcommon
70 8
discussions.py
in jobs/cache_maintenance/src/cache_maintenance
118 7
modalities.py
in services/worker/src/worker/job_runners/dataset
264 7
filetypes.py
in services/worker/src/worker/job_runners/dataset
80 7
descriptive_statistics.py
in services/worker/src/worker/job_runners/split
251 7
first_rows.py
in services/worker/src/worker/job_runners/split
225 7
duckdb.py
in libs/libapi/src/libapi
254 7
croissant_crumbs.py
in services/worker/src/worker/job_runners/dataset
198 6
job_runner.py
in services/worker/src/worker
23 6
asset.py
in libs/libcommon/src/libcommon/viewer_utils
202 6
parquet_metadata.py
in services/worker/src/worker/job_runners/config
123 5
dataset_blockages.py
in libs/libcommon/src/libcommon/queue
44 5
croissant_utils.py
in libs/libcommon/src/libcommon
138 5
drop_migrations.py
in jobs/mongodb_migration/src/mongodb_migration
20 4
Files With Long Lines (Top 50)

There are 52 files with lines longer than 120 characters. In total, there are 306 long lines.

File# lines# units# long lines
presidio_scan.py
in services/worker/src/worker/job_runners/split
579 11 200
tpl
_helpers.tpl
in chart/templates/_common
185 - 10
app.py
in front/admin_ui
672 10 9
split_names.py
in services/worker/src/worker/job_runners/config
91 4 6
prod.yaml
in chart/env
515 - 5
pyproject.toml
in services/worker
86 - 5
orchestrator.py
in libs/libcommon/src/libcommon
926 48 5
backfill.py
in jobs/cache_maintenance/src/cache_maintenance
115 8 4
compatible_libraries.py
in services/worker/src/worker/job_runners/dataset
646 13 4
parquet_and_info.py
in services/worker/src/worker/job_runners/config
1039 46 3
collector.py
in jobs/mongodb_migration/src/mongodb_migration
414 1 2
staging.yaml
in chart/env
290 - 2
ingress-internal.yaml
in chart/templates/services/webhook
23 - 2
tpl
_envCloudfront.tpl
in chart/templates/_env
26 - 2
tpl
_envHf.tpl
in chart/templates/_env
32 - 2
tpl
_envS3.tpl
in chart/templates/_env
26 - 2
hub_cache.py
in services/worker/src/worker/job_runners/dataset
114 3 2
search.py
in services/search/src/search/routes
225 2 2
pyproject.toml
in libs/libcommon
103 - 2
state.py
in libs/libcommon/src/libcommon
301 12 2
operations.py
in libs/libcommon/src/libcommon
232 11 2
storage.py
in libs/libcommon/src/libcommon
70 8 2
storage_client.py
in libs/libcommon/src/libcommon
102 11 2
_20240221160700_cache_merge_split_first_rows.py
in jobs/mongodb_migration/src/mongodb_migration/migrations
63 3 1
_20240206153000_cache_add_tags_in_hub_cache.py
in jobs/mongodb_migration/src/mongodb_migration/migrations
23 3 1
_20240221103200_cache_merge_config_split_names.py
in jobs/mongodb_migration/src/mongodb_migration/migrations
62 3 1
values.yaml
in chart
454 - 1
ingress.yaml
in chart/templates/services/webhook
23 - 1
ingress-internal.yaml
in chart/templates/services/api
41 - 1
ingress.yaml
in chart/templates/services/api
41 - 1
ingress-internal.yaml
in chart/templates/services/sse-api
23 - 1
ingress.yaml
in chart/templates/services/sse-api
23 - 1
ingress-internal.yaml
in chart/templates/services/rows
23 - 1
ingress.yaml
in chart/templates/services/rows
23 - 1
ingress-internal.yaml
in chart/templates/services/admin
32 - 1
ingress.yaml
in chart/templates/services/admin
32 - 1
ingress-internal.yaml
in chart/templates/services/search
33 - 1
ingress.yaml
in chart/templates/services/search
33 - 1
tpl
_envDiscussions.tpl
in chart/templates/_env
19 - 1
tpl
_envCommitter.tpl
in chart/templates/_env
14 - 1
tpl
_envWorker.tpl
in chart/templates/_env
79 - 1
tpl
_envCommon.tpl
in chart/templates/_env
35 - 1
croissant_crumbs.py
in services/worker/src/worker/job_runners/dataset
198 6 1
descriptive_statistics.py
in services/worker/src/worker/job_runners/split
251 7 1
first_rows.py
in services/worker/src/worker/job_runners/split
225 7 1
webhook.py
in services/webhook/src/webhook/routes
182 3 1
endpoint.py
in services/api/src/api/routes
187 4 1
authentication.py
in services/admin/src/admin
68 - 1
parquet_utils.py
in libs/libcommon/src/libcommon
482 17 1
croissant_utils.py
in libs/libcommon/src/libcommon
138 5 1