lib/runit.rb (196 lines of code) (raw):
# frozen_string_literal: true
require 'pathname'
require_relative 'gdk_src'
require_relative 'utils'
module Runit
SERVICE_SHORTCUTS = {
'rails' => 'rails-*',
'praefect' => 'praefect*',
'gitaly' => '{gitaly,praefect*}',
'db' => '{redis,redis-cluster,postgresql,postgresql-geo,clickhouse}',
'rails-migration-dependencies' => '{redis,redis-cluster,postgresql,postgresql-geo,gitaly,praefect*,minio}',
'workhorse' => 'gitlab-workhorse'
}.freeze
SERVICES_DIR = GDK::SRC.join('services')
LOG_DIR = GDK::SRC.join('log')
ALL_DATA_ORIENTED_SERVICE_NAMES = %w[minio openldap gitaly praefect redis redis-cluster postgresql-geo postgresql].freeze
STOP_RETRY_COUNT = 3
def self.start_runsvdir
runit_installed!
runit_config = Runit::Config.new(GDK.root)
# To make transition easier, we merge legacy services that haven't been migrated yet
# so that using Ruby services will always working even when partially migrated
services = GDK::Services.enabled
legacy_services = GDK::Services.legacy.select(&:enabled?)
runit_config.render(services: services + legacy_services)
# It is important that we use an absolute path with `runsvdir`: this
# allows us to distinguish processes belonging to different GDK
# installations on the same machine.
args = runsvdir_base_args
return if runsvdir_pid(args)
dots = '.' * 395
Process.fork do
Dir.chdir('/')
Process.setsid
# Cargo-culting the use of 395 periods from omnibus-gitlab.
# https://gitlab.com/gitlab-org/omnibus-gitlab/blob/5dfdcafa30ad6e203a04a917f180b630d5121cf6/config/templates/runit/runsvdir-start.erb#L42
args << "log: #{dots}"
spawn(cleaned_path_env, *args, in: '/dev/null', out: '/dev/null', err: '/dev/null')
end
end
# Runit does not handle ENOTDIR from execve well, so let's try to
# prevent that.
# https://gitlab.com/gitlab-org/gitlab-development-kit/issues/666#note_241939982
def self.cleaned_path_env
valid_path_entries = ENV['PATH'].split(File::PATH_SEPARATOR).select do |dir|
File.directory?(dir)
end
{ 'PATH' => valid_path_entries.join(File::PATH_SEPARATOR) }
end
def self.runsvdir_base_args
['runsvdir', '-P', GDK.root.join('services').to_s]
end
def self.runsvdir_pid(args)
pgrep = GDK::Shellout.new(%w[pgrep runsvdir]).run
return if pgrep.empty?
pids = pgrep.split("\n").map { |str| Integer(str) }
runsvdir_ps = "#{args.join(' ')} "
pids.find do |pid|
GDK::Shellout.new(%W[ps -o args= -p #{pid}]).run.start_with?(runsvdir_ps)
end
end
def self.runit_installed!
return if Utils.executable_exist?('runsvdir')
abort <<~MESSAGE
ERROR: gitlab-development-kit requires Runit to be installed.
You can install Runit with:
#{runit_instructions}
MESSAGE
end
def self.runit_instructions
if GDK::Dependencies.homebrew_available?
'brew install runit'
elsif GDK::Dependencies.macports_available?
'sudo port install runit'
elsif GDK::Dependencies.linux_apt_available?
'sudo apt install runit'
else
'(no copy-paste Runit installation snippet available for your OS)'
end
end
def self.start(services, quiet: false)
services = Array(services)
if services.empty?
# Redis, PostgresSQL, etc should be started first.
data_oriented_service_names.reverse_each.all? { |service_name| sv('start', [service_name], quiet: quiet) }
services = non_data_oriented_service_names
end
sv('start', services, quiet: quiet)
end
def self.stop(quiet: false)
# Redis, PostgresSQL, etc should be stopped last.
stop_services(non_data_oriented_service_names, quiet: quiet)
data_oriented_service_names.all? { |service_name| stop_services([service_name], quiet: quiet) }
unload_runsvdir!
end
def self.stop_services(services, quiet: false)
# The first stop attempt may fail; ignore its return value.
stopped = false
STOP_RETRY_COUNT.times do |i|
# From http://smarden.org/runit/sv.8.html:
#
# down: If the service is running, send it the TERM signal, and the CONT signal. If ./run exits, start ./finish if it exists. After it stops, do not restart service.
# force-stop: Same as down, but wait up to (default) 7 seconds for the service to become down. Then report the status, and on timeout send the service the kill command.
#
stopped = sv('force-stop', services, quiet: quiet)
break if stopped
GDK::Output.notice("Retrying stop (#{i + 1}/#{STOP_RETRY_COUNT})")
end
true
end
def self.unload_runsvdir!
# Unload runsvdir: this is safe because we have just stopped all services.
pid = runsvdir_pid(runsvdir_base_args)
!Process.kill('HUP', pid).nil?
end
def self.sv_shellout(cmd, services)
start_runsvdir
expanded_services = expand_services(services)
ensure_services_are_supervised(expanded_services)
expanded_services = expanded_services.filter { |es| !es.to_s.include?('redis-cluster') } unless GDK.config.redis_cluster.enabled?
return nil if expanded_services.empty? # silent skip assuming successful
command = ['sv', '-w', GDK.config.gdk.runit_wait_secs.to_s, cmd, *expanded_services.map(&:to_s)]
GDK::Shellout.new(command)
end
def self.sv(cmd, services, quiet: false)
sh = sv_shellout(cmd, services)
return true if sh.nil?
quiet ? sh.run : sh.stream
sh.success?
end
def self.ensure_services_are_supervised(services)
services.each { |svc| wait_runsv_supervise_ok!(svc) }
end
def self.data_oriented_service_names
ALL_DATA_ORIENTED_SERVICE_NAMES.select do |service_name|
SERVICES_DIR.join(service_name).exist?
end
end
def self.non_data_oriented_service_names
all_service_names - data_oriented_service_names
end
def self.all_service_names
return [] unless SERVICES_DIR.exist?
# praefect-gitaly-* services are stopped/started automatically.
Pathname.new(SERVICES_DIR).children.filter_map do |path|
path.basename.to_s if path.directory? && !path.basename.to_s.start_with?('praefect-gitaly-')
end.sort
end
def self.expand_services(services)
return SERVICES_DIR.glob('*').sort if services.empty?
services.flat_map do |svc|
service_shortcut(svc) || SERVICES_DIR.join(svc)
end.uniq.sort
end
def self.service_shortcut(svc)
glob = SERVICE_SHORTCUTS[svc]
return unless glob
if glob.include?('/')
GDK::Output.error "invalid service shortcut: #{svc} -> #{glob}"
abort
end
shortcut_services = SERVICES_DIR.glob(glob)
shortcut_services.empty? ? nil : shortcut_services
end
def self.wait_runsv_supervise_ok!(service_dir)
unless service_dir.directory?
GDK::Output.error "unknown runit service: #{service_dir}"
abort
end
50.times do
begin
service_dir.join('supervise', 'ok').open(File::WRONLY | File::NONBLOCK).close
rescue StandardError
sleep 0.1
next
end
return
end
GDK::Output.error "timeout waiting for runsv in #{service_dir}"
abort
end
def self.tail(services)
log_files_for_services = log_files(services)
if log_files_for_services.empty?
GDK::Output.warn(<<~MSG)
No matching services to tail.
To view a list of services and shortcuts, run `gdk tail --help`.
MSG
return true
end
exec('tail', '-qF', *log_files_for_services.map(&:to_s))
end
def self.log_files(services)
return LOG_DIR.glob(File.join('*', 'current')) if services.empty?
services.flat_map do |svc|
shortcut = log_shortcut(svc)
next shortcut if shortcut
current_log = LOG_DIR.join(svc, 'current')
current_log if current_log.exist?
end.compact.uniq
end
def self.log_shortcut(svc)
glob = SERVICE_SHORTCUTS[svc]
return unless glob
if glob.include?('/')
GDK::Output.error "invalid service shortcut: #{svc} -> #{glob}"
abort
end
shortcut_logs = LOG_DIR.glob(File.join(glob, 'current'))
shortcut_logs unless shortcut_logs.empty?
end
def self.kill_processes(pids)
pids.each do |pid|
Process.kill('TERM', pid)
rescue SystemCallError
end
end
end