k8s_spark/parquet.spark.helm/values.yaml (87 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Default values for parquet.spark.helm. # This is a YAML-formatted file. # Declare variables to be passed into your templates. replicaCount: 4 flask_env: parquet_file_name: "s3a://cdms-dev-in-situ-parquet/CDMS_insitu.parquet" spark_app_name: "parquet_flask_demo" log_level: "DEBUG" flask_prefix: "insitu" es_url: "https://aws-es-url/" es_port: "443" parquet_metadata_tbl: "cdms_parquet_meta_dev_v1" # "spark.shuffle.service.enabled": "true", This should be "true", but "shuffle.service" not setup yet. ## Specify Spark config options ## When using AWS EKS IRSA for AWS auth, use: {"spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"} spark_config_dict: { "spark.shuffle.service.enabled": "false", "spark.dynamicAllocation.shuffleTracking.enabled": "true", "spark.dynamicAllocation.minExecutors": "4", "spark.dynamicAllocation.maxExecutors": "4", "spark.dynamicAllocation.initialExecutors": "4", "spark.dynamicAllocation.executorAllocationRatio": "1", "spark.dynamicAllocation.enabled": "true", "spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider", "spark.memory.offHeap.enabled":"true", "spark.driver.memoryOverhead":"512M", "spark.hadoop.fs.s3a.block.size":"1073741824", "fs.s3a.block.size":"1073741824", "spark.sql.sources.partitionOverwriteMode": "dynamic", "spark.memory.offHeap.size":"512M" } # The config, geo_spatial_interval_by_platform, allows specifying geo-spartial # partition interval/limit at platform level. The dict is organized like this # {<project>: {<platform_code_01: <interval>, <platform_code_02>: <interval>}}. # If not specified, 30 (which is the value defined in ingest_new_file.py) is # the default value. geospatial_interval_by_platform: { "ICOADS Release 3.0": { "23": "100", "31": "50" } } # AWS EKS IRSA is favored over AWS IAM User credentials when possible # Uncomment to enable IAM User Credential authentication # aws_creds: # awskey: "xxxxxx" # awssecret: "xxxxxx" # awstoken: "xxxxxx" image: repository: "waiphyojpl/cdms.parquet.flask" pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. tag: "t25" imagePullSecrets: [] nameOverride: "" fullnameOverride: "" serviceAccount: # Specifies whether a service account should be created create: true # Annotations to add to the service account annotations: {} ## When using AWS EKS IRSA for AWS auth, set the below annotation using the configured IAM Role ARN. # # eks.amazonaws.com/role-arn: 'arn:aws:iam::xxxxxxxxxxxxxx:role/parquet-spark' # The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: "" extraEnvs: {} # - name: master_spark_url # value: spark://my.external.spark.service:7077 podAnnotations: {} podSecurityContext: {} # fsGroup: 2000 securityContext: {} # capabilities: # drop: # - ALL # readOnlyRootFilesystem: true # runAsNonRoot: true # runAsUser: 1000 service: ## Service Type: ClusterIP / NodePort ## type: NodePort port: 9801 nodePort: 30801 ingress: enabled: false annotations: {} # kubernetes.io/ingress.class: nginx # kubernetes.io/tls-acme: "true" hosts: - host: chart-example.local paths: [] # - path: "/" # pathType: "ImplementationSpecific" tls: [] # - secretName: chart-example-tls # hosts: # - chart-example.local resources: {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # limits: # cpu: 100m # memory: 128Mi # requests: # cpu: 100m # memory: 128Mi autoscaling: enabled: false minReplicas: 1 maxReplicas: 100 targetCPUUtilizationPercentage: 80 # targetMemoryUtilizationPercentage: 80 nodeSelector: {} tolerations: [] affinity: {} # livenessProbe: # httpGet: # path: '/1.0/doc/' # port: 9801 # initialDelaySeconds: 5 # readinessProbe: # httpGet: # path: '/1.0/doc/' # port: 9801 # initialDelaySeconds: 5 ## The following are sane bitnami-spark defaults when being pared with a parquet-spark-helm deployment. ## bitnami-spark: enabled: true ## To retain bitnami-spark but render ONLY default bitnami-spark helm chart values, remove the following keys and values. ## image: tag: 3.2.0-debian-10-r44 master: ## When using AWS EKS IRSA for AWS auth, disable securityContext. Can potentially also keep ## enabled and set 'fsGroup: 65534', though this is currently untested. ## # securityContext: # enabled: false # fsGroup: 1001 # runAsUser: 1001 # runAsGroup: 0 # seLinuxOptions: {} worker: ## @param worker.memoryLimit Set the maximum memory the worker is allowed to use ## memoryLimit: "4g" ## @param worker.coreLimit Se the maximum number of cores that the worker can use ## coreLimit: "2" ## @param worker.replicaCount Number of spark workers (will be the minimum number when autoscaling is enabled) ## replicaCount: 8 ## When using AWS EKS IRSA for AWS auth, disable securityContext. Can potentially also keep ## enabled and set 'fsGroup: 65534', though this is currently untested. ## # securityContext: # enabled: false # fsGroup: 1001 # runAsUser: 1001 # runAsGroup: 0 # seLinuxOptions: {} ## Service parameters ## service: ## @param service.type Kubernetes Service type ## type: NodePort ## Specify the nodePort(s) value(s) for the LoadBalancer and NodePort service types. ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport ## @param service.nodePorts.cluster Kubernetes cluster node port ## @param service.nodePorts.web Kubernetes web node port ## nodePorts: cluster: "32131" web: "31140" ## Configure the ingress resource that allows you to access the ## Spark installation. Set up the URL ## ref: http://kubernetes.io/docs/user-guide/ingress/ ## ingress: ## @param ingress.hostname Default host for the ingress resource ## hostname: tt.spark.local.test1 serviceAccount: ## @param serviceAccount.create Enable the creation of a ServiceAccount for Spark pods ## create: true ## @param serviceAccount.name The name of the ServiceAccount to use. ## If not set and create is true, a name is generated using the spark.fullname template ## name: "" ## @param serviceAccount.annotations Annotations for Spark Service Account ## annotations: {} ## When using AWS EKS IRSA for AWS auth, set the below annotation using the configured IAM Role ARN. # # eks.amazonaws.com/role-arn: 'arn:aws:iam::xxxxxxxxxxxxxx:role/parquet-spark' ## @param serviceAccount.automountServiceAccountToken Automount API credentials for a service account. ## automountServiceAccountToken: true