cookbooks/aws-parallelcluster-platform/resources/fetch_config.rb (132 lines of code) (raw):

# frozen_string_literal: true resource_name :fetch_config provides :fetch_config unified_mode true property :update, [true, false], default: false default_action :run action :run do return if on_docker? Chef::Log.debug("Called fetch_config with update (#{new_resource.update})") sync_file_compute_nodes = "#{node['cluster']['shared_dir']}/cluster-config-version" sync_file_login_nodes = "#{node['cluster']['shared_dir_login_nodes']}/cluster-config-version" case node['cluster']['node_type'] when 'HeadNode' if new_resource.update Chef::Log.info("Backing up old configuration from (#{node['cluster']['cluster_config_path']}) to (#{node['cluster']['previous_cluster_config_path']})") ::FileUtils.cp_r(node['cluster']['cluster_config_path'], node['cluster']['previous_cluster_config_path'], remove_destination: true) fetch_cluster_config(node['cluster']['cluster_config_path']) Chef::Log.info("Cluster config is:\n#{::File.read(node['cluster']['cluster_config_path'])}") Chef::Log.info("Backing up old instance types data from (#{node['cluster']['instance_types_data_path']}) to (#{node['cluster']['previous_instance_types_data_path']})") ::FileUtils.cp_r(node['cluster']['instance_types_data_path'], node['cluster']['previous_instance_types_data_path'], remove_destination: true) fetch_change_set Chef::Log.info("Changeset is:\n#{::File.read(node['cluster']['change_set_path'])}") fetch_instance_type_data unless ::FileUtils.identical?(node['cluster']['previous_cluster_config_path'], node['cluster']['cluster_config_path']) Chef::Log.info("Backing up old shared storages data from (#{node['cluster']['shared_storages_mapping_path']}) to (#{node['cluster']['previous_shared_storages_mapping_path']})") ::FileUtils.cp_r(node['cluster']['shared_storages_mapping_path'], node['cluster']['previous_shared_storages_mapping_path'], remove_destination: true) else fetch_cluster_config(node['cluster']['cluster_config_path']) unless ::File.exist?(node['cluster']['cluster_config_path']) fetch_instance_type_data unless ::File.exist?(node['cluster']['instance_types_data_path']) end # ensure config is shared also with login nodes share_config_with_login_nodes # TODO: Improvement - do this only if cluster has login nodes # load cluster config into a node object load_cluster_config(node['cluster']['cluster_config_path']) # Write config version file to signal other cluster nodes that all configuration files within the shared folder # are aligned with the latest config version. write_config_version_file(sync_file_compute_nodes) write_config_version_file(sync_file_login_nodes) # TODO: Improvement - do this only if cluster has login nodes when 'ComputeFleet' if kitchen_test? fetch_cluster_config(node['cluster']['cluster_config_path']) unless ::File.exist?(node['cluster']['cluster_config_path']) write_config_version_file(sync_file_compute_nodes) end if new_resource.update # Wait for the head node to write the config version file, which is the signal that # all configuration files within the shared folder are aligned with the latest config version. # This is required only on update because on create it is guaranteed that this recipe is executed on compute node # only after it has completed on head node. wait_cluster_config_file(sync_file_compute_nodes) # TODO: If the shared storage mapping files contain cluster-wide information and not node-specific data, # then make the head node write the shared storage mapping files in the shared folder # and let compute node consume them. Chef::Log.info("Backing up old shared storages data from (#{node['cluster']['shared_storages_mapping_path']}) to (#{node['cluster']['previous_shared_storages_mapping_path']})") ::FileUtils.cp_r(node['cluster']['shared_storages_mapping_path'], node['cluster']['previous_shared_storages_mapping_path'], remove_destination: true) else raise "Cluster config not found in #{node['cluster']['cluster_config_path']}" unless ::File.exist?(node['cluster']['cluster_config_path']) end # load cluster config into a node object load_cluster_config(node['cluster']['cluster_config_path']) when 'LoginNode' if kitchen_test? fetch_cluster_config(node['cluster']['login_cluster_config_path']) unless ::File.exist?(node['cluster']['login_cluster_config_path']) write_config_version_file(sync_file_login_nodes) end if new_resource.update # Wait for the head node to write the config version file, which is the signal that # all configuration files (cluster config, change-set, ...) have been written on the shared folder # and are aligned with the latest config version. # This is required only on update because on create it is guaranteed that this recipe is executed on compute node # only after it has completed on head node. wait_cluster_config_file(sync_file_login_nodes) # TODO: If the shared storage mapping files contain cluster-wide information and not node-specific data, # then make the head node write the shared storage mapping files in the shared folder # and let compute node consume them. Chef::Log.info("Backing up old shared storages data from (#{node['cluster']['shared_storages_mapping_path']}) to (#{node['cluster']['previous_shared_storages_mapping_path']})") ::FileUtils.cp_r(node['cluster']['shared_storages_mapping_path'], node['cluster']['previous_shared_storages_mapping_path'], remove_destination: true) else raise "Cluster config not found in #{node['cluster']['login_cluster_config_path']}" unless ::File.exist?(node['cluster']['login_cluster_config_path']) end # load cluster config into a node object load_cluster_config(node['cluster']['login_cluster_config_path']) else raise "node_type must be HeadNode, LoginNode or ComputeFleet" end end action_class do # rubocop:disable Metrics/BlockLength def execute_command(label, run_command) execute label do command run_command retries 3 retry_delay 5 timeout 300 end end def fetch_s3_object(command_label, key, output, version_id = nil) fetch_s3_object_command = "#{cookbook_virtualenv_path}/bin/aws s3api get-object" \ " --bucket #{node['cluster']['cluster_s3_bucket']}" \ " --key #{key}" \ " --region #{node['cluster']['region']}" \ " #{output}" fetch_s3_object_command += " --version-id #{version_id}" unless version_id.nil? execute_command(command_label, fetch_s3_object_command) end def fetch_cluster_config(config_path) if kitchen_test? && !node['interact_with_s3'] remote_file "copy fake cluster config" do path node['cluster']['cluster_config_path'] source "file://#{kitchen_cluster_config_path}" end else # Copy cluster config file from S3 URI version_id = node['cluster']['cluster_config_version'] unless node['cluster']['cluster_config_version'].nil? fetch_s3_object("copy_cluster_config_from_s3", node['cluster']['cluster_config_s3_key'], config_path, version_id) end end def fetch_change_set # Copy change set file from S3 URI fetch_s3_object("copy_change_set_from_s3", node['cluster']['change_set_s3_key'], node['cluster']['change_set_path']) end def share_config_with_login_nodes # Share cluster config with login nodes (only if they exist) Chef::Log.info("Sharing cluster config with login nodes") ::FileUtils.cp_r(node['cluster']['cluster_config_path'], node['cluster']['login_cluster_config_path'], remove_destination: true) unless !::File.exist?(node['cluster']['cluster_config_path']) ::FileUtils.cp_r(node['cluster']['previous_cluster_config_path'], node['cluster']['login_previous_cluster_config_path'], remove_destination: true) unless !::File.exist?(node['cluster']['previous_cluster_config_path']) end def fetch_instance_type_data if kitchen_test? && !node['interact_with_s3'] remote_file "copy fake instance type data" do path node['cluster']['instance_types_data_path'] source "file://#{kitchen_instance_types_data_path}" end else # Copy instance type infos file from S3 URI instance_version_id = node['cluster']['instance_types_data_version'] unless node['cluster']['instance_types_data_version'].nil? fetch_s3_object("copy_instance_type_data_from_s3", node['cluster']['instance_types_data_s3_key'], node['cluster']['instance_types_data_path'], instance_version_id) end end def write_config_version_file(path) # Write the cluster config version into the specified file. # This file is used as a synchronization point between the head node and the other cluster nodes. # In particular, the head node uses this file to signal to other cluster nodes that all files # in the shared folder related to the cluster config have been updated with the current config version. file path do content node['cluster']['cluster_config_version'] mode '0644' owner 'root' group 'root' end end def wait_cluster_config_file(path) # Wait for the config version file to contain the current cluster config version. bash "Wait cluster config files to be updated by the head node" do code "[[ \"$(cat #{path})\" == \"#{node['cluster']['cluster_config_version']}\" ]] || exit 1" retries 30 retry_delay 15 timeout 5 end end end