lib/runit.rb (196 lines of code) (raw):

# frozen_string_literal: true require 'pathname' require_relative 'gdk_src' require_relative 'utils' module Runit SERVICE_SHORTCUTS = { 'rails' => 'rails-*', 'praefect' => 'praefect*', 'gitaly' => '{gitaly,praefect*}', 'db' => '{redis,redis-cluster,postgresql,postgresql-geo,clickhouse}', 'rails-migration-dependencies' => '{redis,redis-cluster,postgresql,postgresql-geo,gitaly,praefect*,minio}', 'workhorse' => 'gitlab-workhorse' }.freeze SERVICES_DIR = GDK::SRC.join('services') LOG_DIR = GDK::SRC.join('log') ALL_DATA_ORIENTED_SERVICE_NAMES = %w[minio openldap gitaly praefect redis redis-cluster postgresql-geo postgresql].freeze STOP_RETRY_COUNT = 3 def self.start_runsvdir runit_installed! runit_config = Runit::Config.new(GDK.root) # To make transition easier, we merge legacy services that haven't been migrated yet # so that using Ruby services will always working even when partially migrated services = GDK::Services.enabled legacy_services = GDK::Services.legacy.select(&:enabled?) runit_config.render(services: services + legacy_services) # It is important that we use an absolute path with `runsvdir`: this # allows us to distinguish processes belonging to different GDK # installations on the same machine. args = runsvdir_base_args return if runsvdir_pid(args) dots = '.' * 395 Process.fork do Dir.chdir('/') Process.setsid # Cargo-culting the use of 395 periods from omnibus-gitlab. # https://gitlab.com/gitlab-org/omnibus-gitlab/blob/5dfdcafa30ad6e203a04a917f180b630d5121cf6/config/templates/runit/runsvdir-start.erb#L42 args << "log: #{dots}" spawn(cleaned_path_env, *args, in: '/dev/null', out: '/dev/null', err: '/dev/null') end end # Runit does not handle ENOTDIR from execve well, so let's try to # prevent that. # https://gitlab.com/gitlab-org/gitlab-development-kit/issues/666#note_241939982 def self.cleaned_path_env valid_path_entries = ENV['PATH'].split(File::PATH_SEPARATOR).select do |dir| File.directory?(dir) end { 'PATH' => valid_path_entries.join(File::PATH_SEPARATOR) } end def self.runsvdir_base_args ['runsvdir', '-P', GDK.root.join('services').to_s] end def self.runsvdir_pid(args) pgrep = GDK::Shellout.new(%w[pgrep runsvdir]).run return if pgrep.empty? pids = pgrep.split("\n").map { |str| Integer(str) } runsvdir_ps = "#{args.join(' ')} " pids.find do |pid| GDK::Shellout.new(%W[ps -o args= -p #{pid}]).run.start_with?(runsvdir_ps) end end def self.runit_installed! return if Utils.executable_exist?('runsvdir') abort <<~MESSAGE ERROR: gitlab-development-kit requires Runit to be installed. You can install Runit with: #{runit_instructions} MESSAGE end def self.runit_instructions if GDK::Dependencies.homebrew_available? 'brew install runit' elsif GDK::Dependencies.macports_available? 'sudo port install runit' elsif GDK::Dependencies.linux_apt_available? 'sudo apt install runit' else '(no copy-paste Runit installation snippet available for your OS)' end end def self.start(services, quiet: false) services = Array(services) if services.empty? # Redis, PostgresSQL, etc should be started first. data_oriented_service_names.reverse_each.all? { |service_name| sv('start', [service_name], quiet: quiet) } services = non_data_oriented_service_names end sv('start', services, quiet: quiet) end def self.stop(quiet: false) # Redis, PostgresSQL, etc should be stopped last. stop_services(non_data_oriented_service_names, quiet: quiet) data_oriented_service_names.all? { |service_name| stop_services([service_name], quiet: quiet) } unload_runsvdir! end def self.stop_services(services, quiet: false) # The first stop attempt may fail; ignore its return value. stopped = false STOP_RETRY_COUNT.times do |i| # From http://smarden.org/runit/sv.8.html: # # down: If the service is running, send it the TERM signal, and the CONT signal. If ./run exits, start ./finish if it exists. After it stops, do not restart service. # force-stop: Same as down, but wait up to (default) 7 seconds for the service to become down. Then report the status, and on timeout send the service the kill command. # stopped = sv('force-stop', services, quiet: quiet) break if stopped GDK::Output.notice("Retrying stop (#{i + 1}/#{STOP_RETRY_COUNT})") end true end def self.unload_runsvdir! # Unload runsvdir: this is safe because we have just stopped all services. pid = runsvdir_pid(runsvdir_base_args) !Process.kill('HUP', pid).nil? end def self.sv_shellout(cmd, services) start_runsvdir expanded_services = expand_services(services) ensure_services_are_supervised(expanded_services) expanded_services = expanded_services.filter { |es| !es.to_s.include?('redis-cluster') } unless GDK.config.redis_cluster.enabled? return nil if expanded_services.empty? # silent skip assuming successful command = ['sv', '-w', GDK.config.gdk.runit_wait_secs.to_s, cmd, *expanded_services.map(&:to_s)] GDK::Shellout.new(command) end def self.sv(cmd, services, quiet: false) sh = sv_shellout(cmd, services) return true if sh.nil? quiet ? sh.run : sh.stream sh.success? end def self.ensure_services_are_supervised(services) services.each { |svc| wait_runsv_supervise_ok!(svc) } end def self.data_oriented_service_names ALL_DATA_ORIENTED_SERVICE_NAMES.select do |service_name| SERVICES_DIR.join(service_name).exist? end end def self.non_data_oriented_service_names all_service_names - data_oriented_service_names end def self.all_service_names return [] unless SERVICES_DIR.exist? # praefect-gitaly-* services are stopped/started automatically. Pathname.new(SERVICES_DIR).children.filter_map do |path| path.basename.to_s if path.directory? && !path.basename.to_s.start_with?('praefect-gitaly-') end.sort end def self.expand_services(services) return SERVICES_DIR.glob('*').sort if services.empty? services.flat_map do |svc| service_shortcut(svc) || SERVICES_DIR.join(svc) end.uniq.sort end def self.service_shortcut(svc) glob = SERVICE_SHORTCUTS[svc] return unless glob if glob.include?('/') GDK::Output.error "invalid service shortcut: #{svc} -> #{glob}" abort end shortcut_services = SERVICES_DIR.glob(glob) shortcut_services.empty? ? nil : shortcut_services end def self.wait_runsv_supervise_ok!(service_dir) unless service_dir.directory? GDK::Output.error "unknown runit service: #{service_dir}" abort end 50.times do begin service_dir.join('supervise', 'ok').open(File::WRONLY | File::NONBLOCK).close rescue StandardError sleep 0.1 next end return end GDK::Output.error "timeout waiting for runsv in #{service_dir}" abort end def self.tail(services) log_files_for_services = log_files(services) if log_files_for_services.empty? GDK::Output.warn(<<~MSG) No matching services to tail. To view a list of services and shortcuts, run `gdk tail --help`. MSG return true end exec('tail', '-qF', *log_files_for_services.map(&:to_s)) end def self.log_files(services) return LOG_DIR.glob(File.join('*', 'current')) if services.empty? services.flat_map do |svc| shortcut = log_shortcut(svc) next shortcut if shortcut current_log = LOG_DIR.join(svc, 'current') current_log if current_log.exist? end.compact.uniq end def self.log_shortcut(svc) glob = SERVICE_SHORTCUTS[svc] return unless glob if glob.include?('/') GDK::Output.error "invalid service shortcut: #{svc} -> #{glob}" abort end shortcut_logs = LOG_DIR.glob(File.join(glob, 'current')) shortcut_logs unless shortcut_logs.empty? end def self.kill_processes(pids) pids.each do |pid| Process.kill('TERM', pid) rescue SystemCallError end end end