use-cases/rag-pipeline/backend/manifests/deployment.yaml (173 lines of code) (raw):

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. apiVersion: apps/v1 kind: Deployment metadata: name: rag-backend spec: replicas: 1 selector: matchLabels: app: rag-backend template: metadata: labels: app: rag-backend spec: serviceAccountName: ${KUBERNETES_SERVICE_ACCOUNT} containers: - name: rag image: ${CONTAINER_IMAGE_URL} imagePullPolicy: Always env: - name: CATALOG_DB value: "${CATALOG_DB}" - name: CATALOG_TABLE_NAME value: "${CATALOG_TABLE_NAME}" - name: MLP_DB_INSTANCE_URI value: "${DB_INSTANCE_URI}" - name: GEMMA_IT_ENDPOINT value: "${GEMMA_IT_ENDPOINT}" - name: MLP_KUBERNETES_NAMESPACE value: "${MLP_KUBERNETES_NAMESPACE}" - name: TEXT_EMBEDDING_ENDPOINT value: "${EMBEDDING_ENDPOINT_TEXT}" - name: IMAGE_EMBEDDING_ENDPOINT value: "${EMBEDDING_ENDPOINT_IMAGE}" - name: MULTIMODAL_EMBEDDING_ENDPOINT value: "${EMBEDDING_ENDPOINT_MULTIMODAL}" - name: EMBEDDING_COLUMN_TEXT value: "${EMBEDDING_COLUMN_TEXT}" - name: EMBEDDING_COLUMN_IMAGE value: "${EMBEDDING_COLUMN_IMAGE}" - name: EMBEDDING_COLUMN_MULTIMODAL value: "${EMBEDDING_COLUMN_MULTIMODAL}" - name: ROW_COUNT value: "${ROW_COUNT}" - name: OTEL_SERVICE_NAME value: "opentelemetry-collector" - name: OTEL_EXPORTER_OTLP_ENDPOINT value: "http://$(OTEL_SERVICE_NAME):4317" - name: OTEL_TRACES_EXPORTER value: "otlp" resources: requests: cpu: "2" memory: "25Gi" ephemeral-storage: "25Gi" limits: cpu: "2" memory: "25Gi" ephemeral-storage: "25Gi" nodeSelector: resource-type: "cpu" tolerations: - key: "on-demand" value: "true" operator: "Equal" effect: "NoSchedule" --- apiVersion: v1 kind: Service metadata: name: rag-backend spec: selector: app: rag-backend type: ClusterIP ports: - protocol: TCP port: 8000 targetPort: 8000 --- apiVersion: apps/v1 kind: Deployment metadata: name: otel-collector spec: replicas: 1 selector: matchLabels: app: otel-collector template: metadata: labels: app: otel-collector spec: serviceAccountName: ${OTEL_KUBERNETES_SERVICE_ACCOUNT} containers: - name: otel-collector imagePullPolicy: Always image: otel/opentelemetry-collector-contrib:0.108.0 args: - --config - /etc/otel/config.yaml env: - name: MY_POD_IP valueFrom: fieldRef: apiVersion: v1 fieldPath: status.podIP volumeMounts: - mountPath: /etc/otel/ name: otel-config volumes: - name: otel-config configMap: name: otel-config items: - key: collector.yaml path: config.yaml --- apiVersion: v1 kind: Service metadata: name: opentelemetry-collector labels: app: opentelemetry-collector spec: type: ClusterIP selector: app: otel-collector internalTrafficPolicy: Cluster ports: - name: otel-grpc protocol: TCP port: 4317 targetPort: 4317 - name: otlp-http port: 4318 targetPort: 4318 protocol: TCP --- apiVersion: v1 kind: ConfigMap metadata: name: otel-config data: collector.yaml: | receivers: # Enable endpoints for receiving data # supplied in OTLP format. otlp: protocols: http: endpoint: ${env:MY_POD_IP}:4318 grpc: endpoint: ${env:MY_POD_IP}:4317 exporters: # Export traces using the standard googlecloud exporter googlecloud: user_agent: Google-Cloud-OTLP processors: # Batch telemetry together to more efficiently send to GCP batch: send_batch_max_size: 500 send_batch_size: 500 timeout: 1s # If running on GCP (e.g. on GKE), detect resource attributes from the environment. resourcedetection: detectors: ["env", "gcp"] service: pipelines: traces: receivers: ["otlp"] processors: ["batch", "resourcedetection"] exporters: ["googlecloud"] metrics: receivers: ["otlp"] processors: ["batch", "resourcedetection"] exporters: ["googlecloud"]