# TRTIS

In [None]:
%%writefile trtis_service.yaml

apiVersion: v1
kind: Service
metadata:
  labels:
    name: inference-server
  name: inference-server
  namespace: default
spec:
  #externalTrafficPolicy: Cluster
  ports:
  - name: http-inference-server
    port: 8000
    protocol: TCP
    targetPort: 8000
  - name: grpc-inference-server
    port: 8001
    protocol: TCP
    targetPort: 8001
  - name: metrics-inference-server
    port: 8002
    protocol: TCP
    targetPort: 8002
  selector:
    app: inference-server
  sessionAffinity: None
  type: ClusterIP

In [None]:
%%writefile trtis_deploy.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: inference-server
  labels:
    name: inference-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: inference-server
  template:
    metadata:
      labels:
        app: inference-server
    spec:
      dnsPolicy: ClusterFirst
      imagePullSecrets:
      - name: ngc
      priority: 0
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: default
      serviceAccountName: default
      terminationGracePeriodSeconds: 30
      containers:
      - args:
        - trtserver
        - --model-store=gs://${BUCKET_NAME}/resnet/
        image: nvcr.io/nvidia/tensorrtserver:19.05-py3
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /api/health/live
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          successThreshold: 1
          timeoutSeconds: 1
        name: inference-server
        ports:
        - containerPort: 8000
          protocol: TCP
        - containerPort: 8001
          protocol: TCP
        - containerPort: 8002
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /api/health/ready
            port: 8000
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 5
          successThreshold: 1
          timeoutSeconds: 1
        resources:
          limits:
            nvidia.com/gpu: "1"
          requests:
            cpu: 1000m
            nvidia.com/gpu: "1"
        securityContext:
          procMount: Default
          runAsUser: 1000
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File


In [None]:
!kubectl create -f trtis_service.yaml

In [None]:
!kubectl create -f trtis_deploy.yaml

In [None]:
!kubectl get svc inference-server -o "jsonpath={.spec['clusterIP']}:{.spec['ports'][0]['port']}"

In [None]:
!kubectl get pods

In [None]:
!kubectl logs inference-server-6dd698b787-nzs4h

# Prometheus

In [None]:
%%writefile clusterRole.yml

apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - ingresses
  verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: default
  namespace: monitoring

In [None]:
!kubectl get svc inference-server -o "jsonpath={.spec['clusterIP']}:{.spec['ports'][2]['port']}"

In [None]:
%%writefile prometheus-configmap.yml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-server-conf
  labels:
    name: prometheus-server-conf
  namespace: monitoring
data:
  prometheus.yml: |-
    # my global config
    global:
      scrape_interval:     10s
      evaluation_interval: 10s
      # scrape_timeout is set to the global default (10s).

    # Alertmanager configuration
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          # - alertmanager:9093

    # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
    rule_files:
      # - "first_rules.yml"
      # - "second_rules.yml"

    # A scrape configuration containing exactly one endpoint to scrape:
    # Here it's Prometheus itself.
    scrape_configs:
      # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
      - job_name: 'prometheus'

        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        static_configs:
        - targets: ['CLUSTER_IP:8002']

In [None]:
%%writefile prometheus-deployment.yml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: prometheus-deployment
  namespace: monitoring
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: prometheus-server
    spec:
      containers:
        - name: prometheus
          image: prom/prometheus:latest
          args:
            - "--config.file=/etc/prometheus/prometheus.yml"
            - "--storage.tsdb.path=/prometheus/"
          ports:
            - containerPort: 9090
          volumeMounts:
            - name: prometheus-config-volume
              mountPath: /etc/prometheus
            - name: prometheus-storage-volume
              mountPath: /prometheus
      volumes:
        - name: prometheus-config-volume
          configMap:
            defaultMode: 420
            name: prometheus-server-conf
        - name: prometheus-storage-volume
          emptyDir: {}

In [None]:
%%writefile prometheus-service.yml

apiVersion: v1
kind: Service
metadata:
  name: prometheus-service
spec:
  selector: 
    app: prometheus-server
  type: ClusterIP
  ports:
    - port: 8080
      targetPort: 9090

In [None]:
!kubectl create namespace monitoring
!kubectl create -f clusterRole.yml
!kubectl create -f prometheus-configmap.yml -n monitoring
!kubectl create -f prometheus-deployment.yml -n monitoring
!kubectl create -f prometheus-service.yml -n monitoring

In [None]:
!kubectl get svc prometheus-service -o "jsonpath={.spec['clusterIP']}:{.spec['ports'][0]['port']}" -n monitoring

# Grafana

In [None]:
%%writefile grafana-deployment.yml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: grafana-deployment
  namespace: monitoring
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: grafana-server
    spec:
      containers:
        - name: grafana
          image: grafana/grafana:latest
          #args:
          #  - "--config.file=/root/prometheus.yml"
          #  - "--storage.tsdb.path=/prometheus/"
          ports:
            - containerPort: 3000

In [None]:
%%writefile grafana-service.yml

apiVersion: v1
kind: Service
metadata:
  name: grafana-service
spec:
  selector: 
    app: grafana-server
  type: LoadBalancer
  ports:
    - port: 8100
      targetPort: 3000

In [None]:
!kubectl create -f grafana-service.yml -n monitoring
!kubectl create -f grafana-deployment.yml -n monitoring

In [None]:
!kubectl get svc prometheus-service -o "jsonpath={.spec['clusterIP']}:{.spec['ports'][0]['port']}" -n monitoring

In [None]:
!kubectl get pods -n monitoring