cookbooks/aws-parallelcluster-platform/resources/gdrcopy/partial/_gdrcopy_common.rb (80 lines of code) (raw):

# frozen_string_literal: true # # Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). # You may not use this file except in compliance with the License. # A copy of the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "LICENSE.txt" file accompanying this file. # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. # See the License for the specific language governing permissions and limitations under the License. def gdrcopy_version '2.4.4' end def gdrcopy_checksum '8802f7bc4a589a610118023bdcdd83c10a56dea399acf6eeaac32e8cc10739a8' end unified_mode true default_action :setup action :setup do return unless gdrcopy_enabled? return if on_docker? # Save gdrcopy version for InSpec tests node.default['cluster']['nvidia']['gdrcopy']['version'] = gdrcopy_version node.default['cluster']['nvidia']['gdrcopy']['service'] = gdrcopy_service node_attributes 'dump node attributes' gdrcopy_tarball = "#{node['cluster']['sources_dir']}/gdrcopy-#{gdrcopy_version}.tar.gz" directory node['cluster']['sources_dir'] do recursive true end remote_file gdrcopy_tarball do source gdrcopy_url mode '0644' retries 3 retry_delay 5 checksum gdrcopy_checksum action :create_if_missing end package_repos 'update package repos' do action :update end package gdrcopy_build_dependencies do retries 3 retry_delay 5 end bash 'Install NVIDIA GDRCopy' do user 'root' group 'root' cwd Chef::Config[:file_cache_path] code <<-GDRCOPY_INSTALL set -e tar -xf #{gdrcopy_tarball} cd gdrcopy-#{gdrcopy_version}/packages #{installation_code} GDRCOPY_INSTALL end service gdrcopy_service do action %i(disable stop) end end action :verify do %w(copybw).each do |command| bash "Verify NVIDIA GDRCopy: #{command}" do user 'root' group 'root' cwd Chef::Config[:file_cache_path] code <<-GDRCOPY_VERIFY set -e #{command} GDRCOPY_VERIFY end end end action :configure do return if on_docker? # Save gdrcopy version for InSpec tests node.default['cluster']['nvidia']['gdrcopy']['version'] = gdrcopy_version node.default['cluster']['nvidia']['gdrcopy']['service'] = gdrcopy_service node_attributes 'dump node attributes' if graphic_instance? && is_service_installed?(gdrcopy_service) # NVIDIA GDRCopy execute "enable #{gdrcopy_service} service" do # Using command in place of service resource because of: https://github.com/chef/chef/issues/12053 command "systemctl enable #{gdrcopy_service}" end service gdrcopy_service do action :start supports status: true end end end def gdrcopy_version_extended "#{gdrcopy_version}-1" end def gdrcopy_url "#{node['cluster']['artifacts_s3_url']}/dependencies/gdr_copy/v#{gdrcopy_version}.tar.gz" end