libs/libcommon/src/libcommon/orchestrator.py (4 lines): - line 42: # TODO: clean dangling cache entries - line 580: "job_id": "not used", # TODO: remove this field - line 950: }: # TODO: maybe support .huggingface.yaml later - line 1080: # TODO: delete the other files: metadata parquet services/api/src/api/routes/endpoint.py (3 lines): - line 54: # TODO: remove once full scan is implemented for spawning urls scan - line 65: # TODO: remove once full scan is implemented for spawning urls scan - line 139: # TODO: remove once full scan is implemented for spawning urls scan services/worker/src/worker/resources.py (1 line): - line 45: # TODO: check here if huggingface_hub and datasets use the same endpoint libs/libcommon/src/libcommon/prometheus.py (1 line): - line 101: # TODO: move to metrics, as for the other metrics (queue, cache) services/worker/src/worker/job_runners/_job_runner_with_cache.py (1 line): - line 39: # TODO: Refactor, need a way to generate payload based only on provided params services/worker/src/worker/utils.py (1 line): - line 143: # TODO: use huggingface_hub's hf_hub_url after libs/libcommon/src/libcommon/state.py (1 line): - line 20: # TODO: assets, cached_assets, parquet files services/admin/src/admin/routes/dataset_status.py (1 line): - line 47: # TODO: add information about the assets, cached_assets, and other files (metadata, parquet, duckdb) libs/libcommon/src/libcommon/exceptions.py (1 line): - line 536: ) # TODO: Change URL after next datasets release services/admin/src/admin/routes/metrics.py (1 line): - line 30: # TODO: Update disk usage from fsspec services/search/src/search/routes/filter.py (1 line): - line 166: # TODO: Will be moved to another process in parallel services/search/src/search/routes/search.py (1 line): - line 221: # TODO: Will be moved to another process in parallel services/worker/src/worker/job_runners/split/opt_in_out_urls_scan_from_streaming.py (1 line): - line 265: # ^ TODO: Change step name referring to image URLs scan specifically. services/api/src/api/config.py (1 line): - line 102: # TODO: allow passing the mapping between endpoint and processing steps via env vars services/sse-api/src/sse_api/watcher.py (1 line): - line 124: TODO: we don't want to send to all the suscribers jobs/mongodb_migration/src/mongodb_migration/collector.py (1 line): - line 105: # TODO: add a way to automatically collect migrations from the migrations/ folder libs/libapi/src/libapi/exceptions.py (1 line): - line 75: TODO: should we return DatasetNotFoundError instead? maybe the error code is leaking existence of private datasets.