anthos-bm-edge-deployment/roles/abm-post-install/tasks/setup-google-observability.yaml (229 lines of code) (raw):

# Copyright 2022 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - name: Get service facts service_facts: become: yes tags: - observability-agent-install - observability-setup - name: Create stackdriver config folder if not exist become: yes ansible.builtin.file: path: /etc/stackdriver state: directory owner: root group: root mode: '0755' tags: - observability-agent-install - observability-setup - name: Create fluentd config folder if not exist become: yes ansible.builtin.file: path: /etc/google-fluentd state: directory owner: root group: root mode: '0755' tags: - observability-agent-install - observability-setup ### Configure Monitoring agent - name: Copy collectd config become: yes template: src: collectd.conf.j2 dest: "/etc/stackdriver/collectd.conf" owner: root group: root mode: '0644' tags: - monitoring-agent-config - observability-setup - name: Copy stackdriver initd script become: yes template: src: stackdriver-agent.j2 dest: "/etc/init.d/stackdriver-agent" owner: root group: root mode: '0755' tags: - monitoring-agent-config - observability-setup ### Download install scripts - name: Download Agent scripts become: yes ansible.builtin.get_url: url: "https://dl.google.com/cloudagents/{{ item.script }}" dest: "/tmp/{{ item.script }}" mode: '0744' when: (ansible_facts.services[item.service + '.service'] is undefined) or (ansible_facts.services[item.service + '.service'].status != "enabled") with_items: - { service: 'stackdriver-agent', script: 'add-monitoring-agent-repo.sh' } - { service: 'google-fluentd', script: 'add-logging-agent-repo.sh' } tags: - observability-agent-install - observability-removal # make available if running the removal - observability-setup ### Run scripts - name: Run Agent install scripts become: yes ignore_errors: yes # (tries to start google-fluentd without configuration, this will be setup later command: cmd: "bash /tmp/{{ item.script }} --also-install" timeout: 90 # (in seconds) 1.5 minutes to install (and likely fail) when: (ansible_facts.services[item.service + '.service'] is undefined) or (ansible_facts.services[item.service + '.service'].status != "enabled") with_items: - { service: 'stackdriver-agent', script: 'add-monitoring-agent-repo.sh' } - { service: 'google-fluentd', script: 'add-logging-agent-repo.sh' } register: agent_results environment: GOOGLE_APPLICATION_CREDENTIALS: "/var/keys/gsa-key.json" tags: - observability-agent-install - observability-setup - name: Reset dpkg on failure only ignore_errors: yes become: yes command: cmd: dpkg --configure -a environment: - DEBIAN_FRONTEND: "noninteractive" - DEBCONF_NONINTERACTIVE_SEEN: true when: item is failed with_items: "{{ agent_results.results }}" tags: - observability-agent-install - observability-setup - observability-cleanup ### Configure Logging agent (copies over the top of the "default" installed .deb configuration) - name: Copy fluent.d config become: yes template: src: google-fluentd.conf.j2 dest: "/etc/google-fluentd/google-fluentd.conf" owner: root group: root mode: '0644' tags: - fluentd-config - logging-agent-config - observability-setup - name: Copy fluentd initd script become: yes template: src: google-fluentd.j2 dest: "/etc/init.d/google-fluentd" owner: root group: root mode: '0755' tags: - fluentd-config - logging-agent-config - observability-setup - name: Enable the syslog plugin become: yes ignore_errors: yes copy: src: /etc/google-fluentd/config.d/syslog.conf.dpkg-new dest: /etc/google-fluentd/config.d/syslog.conf remote_src: yes owner: root group: root mode: '0644' tags: - fluentd-config - logging-agent-config - observability-setup - name: Copy auditd config for fluentd plugin become: yes ignore_errors: yes template: src: auditd.conf.j2 dest: "/etc/google-fluentd/config.d/auditd.conf" owner: root group: root mode: '0755' tags: - fluentd-config - auditd-config - logging-agent-config - observability-setup - name: Enable the KV plugin for auditd become: yes command: cmd: /opt/google-fluentd/embedded/bin/gem install fluent-plugin-kv-parser tags: - fluentd-config - auditd-config - logging-agent-config - observability-setup ### Restart both agents after config changes - name: Restart observability services become: yes ansible.builtin.service: name: "{{ item.service }}" enabled: yes state: restarted with_items: - { service: 'stackdriver-agent', script: 'add-monitoring-agent-repo.sh' } - { service: 'google-fluentd', script: 'add-logging-agent-repo.sh' } tags: - observability-agent-install - observability-service-reset - observability-setup # apt-updates to google-fluentd hang becasue the incoming config # overwrites the /etc/init.d script and therefore removes the google credentials line (roughly line 20) # it is a legacy product anyway, so no real updates - name: Mark google-fluentd as not updateable become: yes command: cmd: apt-mark hold google-fluentd tags: - observability-agent-install - observability-service-reset - observability-setup - name: Reset daemon status for systemctl become: yes command: cmd: systemctl daemon-reload tags: - observability-agent-install - observability-service-reset - observability-setup # Automatically restarting the agent with cron - name: Create cron-job to check if the agent is running and then restart the agent in the event that it crashed. template: src: stackdriver-agent-check-cron-script.j2 dest: /etc/cron.d/stackdriver-agent-supervisor-cron-job owner: root group: root mode: '0744' tags: - observability-agent-install - observability-setup ############################## ############################## - name: Uninstall packages apt: pkg: - google-fluentd - google-fluentd-catch-all-config state: absent force: yes purge: yes when: (full_reset is defined) and (full_reset == "True") tags: - observability-removal - name: Remove agents command: cmd: "bash /tmp/{{ item.script }} --uninstall" with_items: - { service: 'stackdriver-agent', script: 'add-monitoring-agent-repo.sh' } - { service: 'google-fluentd', script: 'add-logging-agent-repo.sh' } when: (full_reset is defined) and (full_reset == "True") tags: - observability-removal - name: Remove all of the config files ansible.builtin.file: path: "{{ item }}" state: absent loop: - '/etc/init.d/google-fluentd' - '/etc/init.d/stackdriver-agent' - '/etc/stackdriver/collectd.conf' - '/etc/init.d/google-fluentd' - '/etc/google-fluentd/baseline/google-fluentd.conf' # default installed during google-fluentd install when: (full_reset is defined) and (full_reset == "True") tags: - observability-removal