helm_chart/HyperPodHelmChart/charts/health-monitoring-agent/templates/health-monitoring-agent.yaml (162 lines of code) (raw):

# rbac.yaml --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: health-monitoring-agent rules: - apiGroups: - "" resources: - nodes verbs: - get - apiGroups: - "" resources: - nodes - nodes/status verbs: - patch - apiGroups: - "" - events.k8s.io resources: - events verbs: - create - patch - update --- apiVersion: v1 kind: ServiceAccount metadata: name: health-monitoring-agent namespace: {{ .Values.namespace }} --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: health-monitoring-agent namespace: {{ .Values.namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: health-monitoring-agent subjects: - kind: ServiceAccount name: health-monitoring-agent namespace: {{ .Values.namespace }} --- apiVersion: apps/v1 kind: DaemonSet metadata: name: health-monitoring-agent namespace: {{ .Values.namespace }} labels: app: health-monitoring-agent spec: selector: matchLabels: app: health-monitoring-agent template: metadata: labels: app: health-monitoring-agent spec: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: node.kubernetes.io/instance-type operator: In values: - ml.p5en.48xlarge - ml.p5e.48xlarge - ml.p5.48xlarge - ml.p4d.24xlarge - ml.p4de.24xlarge - ml.g5.xlarge - ml.g5.2xlarge - ml.g5.4xlarge - ml.g5.8xlarge - ml.g5.12xlarge - ml.g5.16xlarge - ml.g5.24xlarge - ml.g5.48xlarge - ml.inf2.xlarge - ml.inf2.8xlarge - ml.inf2.24xlarge - ml.inf2.48xlarge - ml.trn1.32xlarge - ml.trn1n.32xlarge - ml.g6.xlarge - ml.g6.2xlarge - ml.g6.4xlarge - ml.g6.8xlarge - ml.g6.16xlarge - ml.g6.12xlarge - ml.g6.24xlarge - ml.g6.48xlarge - ml.gr6.4xlarge - ml.gr6.8xlarge - ml.g6e.xlarge - ml.g6e.2xlarge - ml.g6e.4xlarge - ml.g6e.8xlarge - ml.g6e.16xlarge - ml.g6e.12xlarge - ml.g6e.24xlarge - ml.g6e.48xlarge - ml.trn2.48xlarge containers: - name: health-monitoring-agent args: - --enable-k8s-exporter=false - --config.system-log-monitor=/config/system-message-monitor.json image: {{ .Values.hmaimage }} resources: limits: cpu: 500m memory: 512Mi requests: cpu: 500m memory: 512Mi imagePullPolicy: IfNotPresent securityContext: runAsUser: 1000 runAsGroup: 2000 env: - name: NODE_NAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: NODE_IP valueFrom: fieldRef: fieldPath: status.hostIP volumeMounts: - name: log mountPath: /var/log - name: kmsg mountPath: /dev/kmsg readOnly: true # Make sure node problem detector is in the same timezone # with the host. - name: localtime mountPath: /etc/localtime readOnly: true serviceAccountName: health-monitoring-agent volumes: - name: log # Config `log` to your system log directory hostPath: path: /var/log/ - name: kmsg hostPath: path: /dev/kmsg - name: localtime hostPath: path: /etc/localtime tolerations: - effect: NoSchedule operator: Exists - effect: NoExecute operator: Exists