libs/libcommon/src/libcommon/orchestrator.py (4 lines):
	- line 42: # TODO: clean dangling cache entries
	- line 580: "job_id": "not used",  # TODO: remove this field
	- line 950: }:  # TODO: maybe support .huggingface.yaml later
	- line 1080: # TODO: delete the other files: metadata parquet


services/api/src/api/routes/endpoint.py (3 lines):
	- line 54: # TODO: remove once full scan is implemented for spawning urls scan
	- line 65: # TODO: remove once full scan is implemented for spawning urls scan
	- line 139: # TODO: remove once full scan is implemented for spawning urls scan


services/worker/src/worker/resources.py (1 line):
	- line 45: # TODO: check here if huggingface_hub and datasets use the same endpoint


libs/libcommon/src/libcommon/prometheus.py (1 line):
	- line 101: # TODO: move to metrics, as for the other metrics (queue, cache)


services/worker/src/worker/job_runners/_job_runner_with_cache.py (1 line):
	- line 39: # TODO: Refactor, need a way to generate payload based only on provided params


services/worker/src/worker/utils.py (1 line):
	- line 143: # TODO: use huggingface_hub's hf_hub_url after


libs/libcommon/src/libcommon/state.py (1 line):
	- line 20: # TODO: assets, cached_assets, parquet files


services/admin/src/admin/routes/dataset_status.py (1 line):
	- line 47: # TODO: add information about the assets, cached_assets, and other files (metadata, parquet, duckdb)


libs/libcommon/src/libcommon/exceptions.py (1 line):
	- line 536: )  # TODO: Change URL after next datasets release


services/admin/src/admin/routes/metrics.py (1 line):
	- line 30: # TODO: Update disk usage from fsspec


services/search/src/search/routes/filter.py (1 line):
	- line 166: # TODO: Will be moved to another process in parallel


services/search/src/search/routes/search.py (1 line):
	- line 221: # TODO: Will be moved to another process in parallel


services/worker/src/worker/job_runners/split/opt_in_out_urls_scan_from_streaming.py (1 line):
	- line 265: # ^ TODO: Change step name referring to image URLs scan specifically.


services/api/src/api/config.py (1 line):
	- line 102: # TODO: allow passing the mapping between endpoint and processing steps via env vars


services/sse-api/src/sse_api/watcher.py (1 line):
	- line 124: TODO: we don't want to send to all the suscribers


jobs/mongodb_migration/src/mongodb_migration/collector.py (1 line):
	- line 105: # TODO: add a way to automatically collect migrations from the migrations/ folder


libs/libapi/src/libapi/exceptions.py (1 line):
	- line 75: TODO: should we return DatasetNotFoundError instead? maybe the error code is leaking existence of private datasets.