chart/templates/_env/_envWorker.tpl (79 lines of code) (raw):

# SPDX-License-Identifier: Apache-2.0 # Copyright 2022 The HuggingFace Authors. {{- define "envWorker" -}} - name: WORKER_CONTENT_MAX_BYTES value: {{ .Values.worker.contentMaxBytes | quote}} - name: WORKER_HEARTBEAT_INTERVAL_SECONDS value: {{ .Values.worker.heartbeatIntervalSeconds | quote}} - name: WORKER_KILL_ZOMBIES_INTERVAL_SECONDS value: {{ .Values.worker.killZombiesIntervalSeconds | quote}} - name: WORKER_KILL_LONG_JOB_INTERVAL_SECONDS value: {{ .Values.worker.killLongJobIntervalSeconds | quote}} - name: WORKER_MAX_JOB_DURATION_SECONDS value: {{ .Values.worker.maxJobDurationSeconds | quote }} - name: WORKER_MAX_LOAD_PCT value: {{ .Values.worker.maxLoadPct | quote }} - name: WORKER_MAX_MEMORY_PCT value: {{ .Values.worker.maxMemoryPct | quote }} - name: WORKER_MAX_MISSING_HEARTBEATS value: {{ .Values.worker.maxMissingHeartbeats | quote }} - name: WORKER_SLEEP_SECONDS value: {{ .Values.worker.sleepSeconds | quote }} - name: TMPDIR value: "/tmp" # ^ensure the temporary files are created in /tmp, which is writable # specific to the /first-rows job runner - name: FIRST_ROWS_MAX_BYTES value: {{ .Values.firstRows.maxBytes | quote }} - name: FIRST_ROWS_MIN_CELL_BYTES value: {{ .Values.firstRows.minCellBytes | quote }} - name: FIRST_ROWS_MIN_NUMBER value: {{ .Values.firstRows.minNumber| quote }} - name: FIRST_ROWS_COLUMNS_MAX_NUMBER value: {{ .Values.firstRows.columnsMaxNumber| quote }} # specific to 'config-parquet-and-info' job runner - name: PARQUET_AND_INFO_COMMIT_MESSAGE value: {{ .Values.parquetAndInfo.commitMessage | quote }} - name: PARQUET_AND_INFO_MAX_DATASET_SIZE_BYTES value: {{ .Values.parquetAndInfo.maxDatasetSizeBytes | quote }} - name: PARQUET_AND_INFO_MAX_ROW_GROUP_BYTE_SIZE_FOR_COPY value: {{ .Values.parquetAndInfo.maxRowGroupByteSizeForCopy | quote }} - name: PARQUET_AND_INFO_SOURCE_REVISION value: {{ .Values.parquetAndInfo.sourceRevision | quote }} - name: PARQUET_AND_INFO_TARGET_REVISION value: {{ .Values.parquetAndInfo.targetRevision | quote }} - name: PARQUET_AND_INFO_URL_TEMPLATE value: {{ .Values.parquetAndInfo.urlTemplate | quote }} # specific to the split-opt-in-out-urls-scan job runner - name: OPT_IN_OUT_URLS_SCAN_COLUMNS_MAX_NUMBER value: {{ .Values.optInOutUrlsScan.columnsMaxNumber | quote }} - name: OPT_IN_OUT_URLS_SCAN_MAX_CONCURRENT_REQUESTS_NUMBER value: {{ .Values.optInOutUrlsScan.maxConcurrentRequestsNumber | quote }} - name: OPT_IN_OUT_URLS_SCAN_MAX_REQUESTS_PER_SECOND value: {{ .Values.optInOutUrlsScan.maxRequestsPerSecond | quote }} - name: OPT_IN_OUT_URLS_SCAN_ROWS_MAX_NUMBER value: {{ .Values.optInOutUrlsScan.rowsMaxNumber | quote }} - name: OPT_IN_OUT_URLS_SCAN_SPAWNING_TOKEN {{- if .Values.secrets.spawningToken.fromSecret }} valueFrom: secretKeyRef: name: {{ .Values.secrets.spawningToken.secretName | default (include "datasetsServer.infisical.secretName" $) | quote }} key: SPAWNING_TOKEN optional: false {{- else }} value: {{ .Values.secrets.spawningToken.value }} {{- end }} - name: OPT_IN_OUT_URLS_SCAN_URLS_NUMBER_PER_BATCH value: {{ .Values.optInOutUrlsScan.urlsNumberPerBatch | quote }} - name: OPT_IN_OUT_URLS_SCAN_SPAWNING_URL value: {{ .Values.optInOutUrlsScan.spawningUrl | quote }} - name: CONFIG_NAMES_MAX_NUMBER value: {{ .Values.configNames.maxNumber | quote }} # specific to 'split-descriptive-statistics' - name: DESCRIPTIVE_STATISTICS_MAX_SPLIT_SIZE_BYTES value: {{ .Values.descriptiveStatistics.maxSplitSizeBytes | quote }} - name: DESCRIPTIVE_STATISTICS_CACHE_DIRECTORY value: {{ .Values.descriptiveStatistics.cacheDirectory | quote }} - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" {{- end -}}