chart/env/prod.yaml (515 lines of code) (raw):
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.
# --- common parameters ---
global:
huggingface:
service:
ports:
datasetsServer:
admin: 32702
api: 31370
rows: 31371
search: 31372
sseApi: 31373
images:
pullPolicy: IfNotPresent
pullSecrets: []
jobs:
mongodbMigration:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-jobs-mongodb_migration
tag: sha-fb3399a
cacheMaintenance:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-jobs-cache_maintenance
tag: sha-fb3399a
services:
admin:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-admin
tag: sha-fb3399a
api:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-api
tag: sha-fb3399a
rows:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-rows
tag: sha-fb3399a
search:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-search
tag: sha-fb3399a
sseApi:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-sse-api
tag: sha-fb3399a
worker:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-worker
tag: sha-fb3399a
webhook:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-webhook
tag: sha-fb3399a
secrets:
infisical:
enabled: true
env: "prod-us-east-1"
mongoUrl:
fromSecret: true
secretName: ""
appHfToken:
fromSecret: true
secretName: ""
appParquetConverterHfToken:
fromSecret: true
secretName: ""
hfWebhookSecret:
fromSecret: true
secretName: ""
hfJwtAdditionalPublicKeys:
fromSecret: true
secretName: ""
spawningToken:
fromSecret: true
secretName: ""
s3:
accessKeyId:
fromSecret: true
secretName: ""
secretAccessKey:
fromSecret: true
secretName: ""
cloudfront:
keyPairId:
fromSecret: true
secretName: ""
privateKey:
fromSecret: true
secretName: ""
persistence:
duckDBIndex:
existingClaim: "datasets-server-duckdb-pvc"
parquetMetadata:
existingClaim: "datasets-server-parquet-pvc"
monitoring:
enabled: true
mongodb:
enabled: false
common:
# URL of the HuggingFace Hub
hfEndpoint: "https://huggingface.co"
log:
# Log level
level: "INFO"
firstRows:
maxBytes: "200_000"
parquetAndInfo:
maxDatasetSizeBytes: "5_000_000_000"
maxRowGroupByteSizeForCopy: "300_000_000"
optInOutUrlsScan:
maxConcurrentRequestsNumber: 100
maxRequestsPerSecond: 50
rowsMaxNumber: 100_000
urlsNumberPerBatch: 1000
assets:
storageRoot: "hf-datasets-server-statics/assets"
storageProtocol: "s3"
cachedAssets:
storageRoot: "hf-datasets-server-statics/cached-assets"
storageProtocol: "s3"
duckDBIndex:
maxSplitSizeBytes: "5_000_000_000"
rowsIndex:
maxArrowDataInMemory: "300_000_000"
descriptiveStatistics:
maxSplitSizeBytes: "5_000_000_000"
# --- jobs (pre-install/upgrade hooks) ---
mongodbMigration:
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
resources:
requests:
cpu: 1
limits:
cpu: 1
# --- cron jobs ---
backfill:
enabled: true
log:
level: "debug"
action: "backfill"
schedule: "05 12 * * *"
# every day
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
resources:
requests:
cpu: 1
memory: "16Gi"
limits:
cpu: 2
memory: "16Gi"
backfillRetryableErrors:
enabled: true
log:
level: "debug"
action: "backfill-retryable-errors"
schedule: "0 */2 * * *"
# every 30 minutes
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
resources:
requests:
cpu: 1
memory: "8Gi"
limits:
cpu: 2
memory: "8Gi"
postMessages:
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
queueMetricsCollector:
action: "collect-queue-metrics"
schedule: "*/10 * * * *"
# every ten minutes
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
resources:
requests:
cpu: 1
limits:
cpu: 1
memory: "512Mi"
cacheMetricsCollector:
enabled: true
action: "collect-cache-metrics"
schedule: "50 */3 * * *"
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
resources:
requests:
cpu: 1
limits:
cpu: 1
memory: "512Mi"
# --- ALB ---
ingress:
tls:
- hosts:
- "datasets-server.huggingface.co"
annotations:
alb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:us-east-1:707930574880:certificate/134d1ed3-7ac1-481d-b2e4-31e17fbacb28
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod"
alb.ingress.kubernetes.io/tags: "Env=prod,Project=datasets-server,Terraform=true"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server=true
alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck"
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80, "HTTPS": 443}]'
alb.ingress.kubernetes.io/scheme: "internet-facing"
alb.ingress.kubernetes.io/group.name: "datasets-server"
kubernetes.io/ingress.class: "alb"
alb.ingress.kubernetes.io/group.order: "100"
# --- services ---
admin:
# Number of reports in /cache-reports/... endpoints
cacheReportsNumResults: 1000
# Number of reports in /cache-reports-with-content/... endpoints
cacheReportsWithContentNumResults: 100
# the timeout in seconds for the requests to the Hugging Face Hub.
hfTimeoutSeconds: "10"
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
uvicornNumWorkers: "9"
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
replicas: 2
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "1"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server=true
alb.ingress.kubernetes.io/actions.metrics-unauthorized: '{"type":"fixed-response","fixedResponseConfig":{"contentType":"text/plain","statusCode":"401","messageBody":"401 Unauthorized"}}'
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 1
memory: "8Gi"
limits:
cpu: 4
memory: "8Gi"
hf:
timeoutSeconds: "10"
api:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
uvicornNumWorkers: "9"
nodeSelector:
role-datasets-server-api: "true"
tolerations:
- key: "huggingface.co/datasets-server-api"
operator: "Exists"
effect: "NoSchedule"
replicas: 20
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server-api=true
alb.ingress.kubernetes.io/actions.openapi-redirect: '{"Type":"redirect","RedirectConfig":{"Host":"raw.githubusercontent.com","Path":"/huggingface/dataset-viewer/main/docs/source/openapi.json","Port":"443","Protocol":"HTTPS","Query":"#{query}","StatusCode":"HTTP_302"}}'
alb.ingress.kubernetes.io/actions.metrics-unauthorized: '{"type":"fixed-response","fixedResponseConfig":{"contentType":"text/plain","statusCode":"401","messageBody":"401 Unauthorized"}}'
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 4
memory: "14Gi"
limits:
cpu: 4
memory: "14Gi"
rows:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
# but we only set to 2 to avoid OOM
uvicornNumWorkers: "2"
nodeSelector:
role-datasets-server-rows: "true"
tolerations:
- key: "huggingface.co/datasets-server-rows"
operator: "Exists"
effect: "NoSchedule"
replicas: 12
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "2"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server-rows=true
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/group.order: "2"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 1
memory: "6500Mi"
limits:
cpu: 4
memory: "6500Mi"
search:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
# but we only set to 2 to avoid OOM
uvicornNumWorkers: "2"
nodeSelector:
role-datasets-server-search: "true"
tolerations:
- key: "huggingface.co/datasets-server-search"
operator: "Exists"
effect: "NoSchedule"
replicas: 3
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "3"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server-search=true
alb.ingress.kubernetes.io/target-group-attributes: stickiness.enabled=true,stickiness.lb_cookie.duration_seconds=300
alb.ingress.kubernetes.io/target-type: ip
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/group.order: "3"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 1
memory: "250Gi"
limits:
cpu: 64
memory: "250Gi"
sseApi:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
uvicornNumWorkers: "1"
nodeSelector:
role-datasets-server: "true"
tolerations:
- key: "huggingface.co/datasets-server"
operator: "Exists"
effect: "NoSchedule"
replicas: 2
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "4"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server=true
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/group.order: "4"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 1
memory: "512Mi"
limits:
cpu: 4
memory: "4Gi"
workers:
- deployName: "heavy"
prometheusMultiprocDirectory: "/tmp"
uvicornHostname: "0.0.0.0"
uvicornNumWorkers: "1"
uvicornPort: 8080
workerDifficultyMax: 100
workerDifficultyMin: 70
nodeSelector:
role-datasets-server-worker: "true"
tolerations:
- key: "huggingface.co/datasets-server-worker"
operator: "Exists"
effect: "NoSchedule"
replicas: 4
autoscaling:
enabled: true
minReplicas: 4
maxReplicas: 40
targets:
- targetQueueName: "worker_size_jobs_count"
targetQueueLength: 50
targetWorkerSize: "heavy"
resources:
requests:
cpu: 8
memory: "34Gi"
limits:
cpu: 8
memory: "44Gi"
- deployName: "medium"
prometheusMultiprocDirectory: "/tmp"
uvicornHostname: "0.0.0.0"
uvicornNumWorkers: "1"
uvicornPort: 8080
workerDifficultyMax: 70
workerDifficultyMin: 40
nodeSelector:
role-datasets-server-worker: "true"
tolerations:
- key: "huggingface.co/datasets-server-worker"
operator: "Exists"
effect: "NoSchedule"
replicas: 10
autoscaling:
enabled: true
minReplicas: 10
maxReplicas: 80
targets:
- targetQueueName: "worker_size_jobs_count"
targetQueueLength: 150
targetWorkerSize: "medium"
resources:
requests:
cpu: 2
memory: "14Gi"
limits:
cpu: 2
memory: "14Gi"
- deployName: "light"
prometheusMultiprocDirectory: "/tmp"
uvicornHostname: "0.0.0.0"
uvicornNumWorkers: "1"
uvicornPort: 8080
workerDifficultyMax: 40
workerDifficultyMin: 0
nodeSelector:
role-datasets-server-worker-light: "true"
tolerations:
- key: "huggingface.co/datasets-server-worker-light"
operator: "Exists"
effect: "NoSchedule"
replicas: 4
autoscaling:
enabled: true
minReplicas: 4
maxReplicas: 50
targets:
- targetQueueName: "worker_size_jobs_count"
targetQueueLength: 150
targetWorkerSize: "light"
resources:
requests:
cpu: 200m
memory: "100Mi"
limits:
cpu: 2
memory: "1Gi"
webhook:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
uvicornNumWorkers: "9"
nodeSelector:
role-datasets-server-webhook: "true"
tolerations:
- key: "huggingface.co/datasets-server-webhook"
operator: "Exists"
effect: "NoSchedule"
replicas: 4
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "5"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server-webhook=true
ingressInternal:
enabled: true
annotations:
external-dns.alpha.kubernetes.io/hostname: "internal.datasets-server.huggingface.co"
alb.ingress.kubernetes.io/group.order: "5"
alb.ingress.kubernetes.io/scheme: "internal"
alb.ingress.kubernetes.io/load-balancer-name: "hub-datasets-server-prod-int"
alb.ingress.kubernetes.io/group.name: "datasets-server-internal"
resources:
requests:
cpu: 4
memory: "4Gi"
limits:
cpu: 4
memory: "4Gi"