use-cases/rag-pipeline/backend/manifests/deployment.yaml (173 lines of code) (raw):
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: Deployment
metadata:
name: rag-backend
spec:
replicas: 1
selector:
matchLabels:
app: rag-backend
template:
metadata:
labels:
app: rag-backend
spec:
serviceAccountName: ${KUBERNETES_SERVICE_ACCOUNT}
containers:
- name: rag
image: ${CONTAINER_IMAGE_URL}
imagePullPolicy: Always
env:
- name: CATALOG_DB
value: "${CATALOG_DB}"
- name: CATALOG_TABLE_NAME
value: "${CATALOG_TABLE_NAME}"
- name: MLP_DB_INSTANCE_URI
value: "${DB_INSTANCE_URI}"
- name: GEMMA_IT_ENDPOINT
value: "${GEMMA_IT_ENDPOINT}"
- name: MLP_KUBERNETES_NAMESPACE
value: "${MLP_KUBERNETES_NAMESPACE}"
- name: TEXT_EMBEDDING_ENDPOINT
value: "${EMBEDDING_ENDPOINT_TEXT}"
- name: IMAGE_EMBEDDING_ENDPOINT
value: "${EMBEDDING_ENDPOINT_IMAGE}"
- name: MULTIMODAL_EMBEDDING_ENDPOINT
value: "${EMBEDDING_ENDPOINT_MULTIMODAL}"
- name: EMBEDDING_COLUMN_TEXT
value: "${EMBEDDING_COLUMN_TEXT}"
- name: EMBEDDING_COLUMN_IMAGE
value: "${EMBEDDING_COLUMN_IMAGE}"
- name: EMBEDDING_COLUMN_MULTIMODAL
value: "${EMBEDDING_COLUMN_MULTIMODAL}"
- name: ROW_COUNT
value: "${ROW_COUNT}"
- name: OTEL_SERVICE_NAME
value: "opentelemetry-collector"
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://$(OTEL_SERVICE_NAME):4317"
- name: OTEL_TRACES_EXPORTER
value: "otlp"
resources:
requests:
cpu: "2"
memory: "25Gi"
ephemeral-storage: "25Gi"
limits:
cpu: "2"
memory: "25Gi"
ephemeral-storage: "25Gi"
nodeSelector:
resource-type: "cpu"
tolerations:
- key: "on-demand"
value: "true"
operator: "Equal"
effect: "NoSchedule"
---
apiVersion: v1
kind: Service
metadata:
name: rag-backend
spec:
selector:
app: rag-backend
type: ClusterIP
ports:
- protocol: TCP
port: 8000
targetPort: 8000
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: otel-collector
spec:
replicas: 1
selector:
matchLabels:
app: otel-collector
template:
metadata:
labels:
app: otel-collector
spec:
serviceAccountName: ${OTEL_KUBERNETES_SERVICE_ACCOUNT}
containers:
- name: otel-collector
imagePullPolicy: Always
image: otel/opentelemetry-collector-contrib:0.108.0
args:
- --config
- /etc/otel/config.yaml
env:
- name: MY_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
volumeMounts:
- mountPath: /etc/otel/
name: otel-config
volumes:
- name: otel-config
configMap:
name: otel-config
items:
- key: collector.yaml
path: config.yaml
---
apiVersion: v1
kind: Service
metadata:
name: opentelemetry-collector
labels:
app: opentelemetry-collector
spec:
type: ClusterIP
selector:
app: otel-collector
internalTrafficPolicy: Cluster
ports:
- name: otel-grpc
protocol: TCP
port: 4317
targetPort: 4317
- name: otlp-http
port: 4318
targetPort: 4318
protocol: TCP
---
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-config
data:
collector.yaml: |
receivers:
# Enable endpoints for receiving data
# supplied in OTLP format.
otlp:
protocols:
http:
endpoint: ${env:MY_POD_IP}:4318
grpc:
endpoint: ${env:MY_POD_IP}:4317
exporters:
# Export traces using the standard googlecloud exporter
googlecloud:
user_agent: Google-Cloud-OTLP
processors:
# Batch telemetry together to more efficiently send to GCP
batch:
send_batch_max_size: 500
send_batch_size: 500
timeout: 1s
# If running on GCP (e.g. on GKE), detect resource attributes from the environment.
resourcedetection:
detectors: ["env", "gcp"]
service:
pipelines:
traces:
receivers: ["otlp"]
processors: ["batch", "resourcedetection"]
exporters: ["googlecloud"]
metrics:
receivers: ["otlp"]
processors: ["batch", "resourcedetection"]
exporters: ["googlecloud"]