files/gitlab-ctl-commands-ee/lib/geo/promote.rb (252 lines of code) (raw):
require 'io/console'
require 'rainbow/ext/string'
module Geo
class Promote
GEO_SITE_ROLES = %i[primary secondary].freeze
PATRONI_NODE_ROLES = %i[leader replica standby_leader].freeze
PATRONI_LEADER_ROLES = %i[leader standby_leader].freeze
SERVICE_NAMES = %w[
geo-logcursor
geo-postgresql
gitaly
gitlab-workhorse
patroni
postgresql
praefect
puma
sidekiq
].freeze
attr_accessor :base_path, :ctl, :options
def initialize(ctl, options)
@ctl = ctl
@base_path = @ctl.base_path
@options = options
end
def execute
ask_for_confirmation
check_running_services
promote_database
toggle_geo_services
promote_to_primary
run_reconfigure
restart_services
print_success_message
end
private
def check_running_services
unless progress_message('Checking if we need to promote any service running on this node') do
SERVICE_NAMES.any? { |service| service_enabled?(service) }
end
print_no_actions_required_message
exit 0
end
end
def ask_for_confirmation
return if options[:force]
puts
puts 'WARNING: The current secondary node will now be promoted to a primary node. '\
'Are you sure you want to proceed? (y/n)'.color(:yellow)
return if $stdin.gets.chomp.casecmp('y').zero?
exit 1
end
def promote_database
promote_postgresql if pg_enabled?
promote_patroni_standby_cluster if patroni_enabled?
end
def promote_postgresql
log('Detected a PostgreSQL Standby server cluster.')
promote_postgresql_read_write
end
def promote_postgresql_read_write
return unless pg_is_in_recovery?
progress_message('Promoting the PostgreSQL to end standby mode and begin read-write operations') do
Geo::PromoteDb.new(ctl).execute
end
end
def promote_patroni_standby_cluster
promote_postgresql_read_write if patroni_leader?
disable_patroni_standby_cluster
run_reconfigure
end
def disable_patroni_standby_cluster
unless progress_message('Disabling Patroni Standby server settings in the cluster configuration file') do
GitlabCluster.config.set('patroni', 'standby_cluster', 'enable', false)
GitlabCluster.config.save
end
die("Unable to write to #{GitlabCluster::JSON_FILE}.")
end
end
def pause_patroni_cluster
return unless patroni_enabled? && patroni_leader?
unless progress_message('Disabling Patroni auto-failover') do
run_command("#{base_path}/bin/gitlab-ctl patroni pause")
end
die('Unable to disable Patroni auto-failover')
end
end
def resume_patroni_cluster
return unless patroni_enabled? && patroni_leader?
unless progress_message('Resuming Patroni auto-failover') do
run_command("#{base_path}/bin/gitlab-ctl patroni resume")
end
die('Unable to resume Patroni auto-failover')
end
end
def patroni_leader?
@patroni_leader ||= PATRONI_LEADER_ROLES.include?(patroni_node_role)
end
def patroni_node_role
return @patroni_node_role if defined?(@patroni_node_role)
unless progress_message('Attempting to detect the Patroni role of this node') do
node = Patroni::Client.new
@patroni_node_role = :standby_leader if node.standby_leader?
@patroni_node_role = :leader if node.leader?
@patroni_node_role = :replica if node.replica?
PATRONI_NODE_ROLES.include?(@patroni_node_role)
end
die('Unable to detect the Patroni role of this node.')
end
@patroni_node_role
end
def toggle_geo_services
return unless puma_enabled? || sidekiq_enabled? || geo_logcursor_enabled? || geo_postgresql_enabled?
log('Detected an application or a Sidekiq or a Geo log cursor or a Geo PostgreSQL node.')
unless progress_message('Disabling the secondary services and enabling the primary services in the cluster configuration file') do
# The geo_secondary_role must not be used in a mutiple-server setup.
# It is very convenient only for single-server Geo secondary sites.
if single_server_site?
GitlabCluster.config.set('primary', true)
GitlabCluster.config.set('secondary', false)
else
GitlabCluster.config.set('geo_secondary', 'enable', false) if puma_enabled? || sidekiq_enabled?
GitlabCluster.config.set('geo_logcursor', 'enable', false) if geo_logcursor_enabled?
GitlabCluster.config.set('geo_postgresql', 'enable', false) if geo_postgresql_enabled?
end
GitlabCluster.config.save
end
die("Unable to write to #{GitlabCluster::JSON_FILE}.")
end
end
def promote_to_primary
return unless puma_enabled? && secondary_node?
log('Detected an application node.')
unless progress_message('Promoting secondary site to primary site') do
!run_task('geo:set_secondary_as_primary', env: { ENABLE_SILENT_MODE: options[:enable_silent_mode].to_s }).error?
end
die("Unable to promote secondary site to primary site.")
end
end
def restart_services
restart_gitaly
restart_praefect
restart_puma
restart_workhorse
end
def restart_gitaly
sv_progress('restart', 'gitaly') if gitaly_enabled?
end
def restart_praefect
sv_progress('restart', 'praefect') if praefect_enabled?
end
def restart_puma
sv_progress('restart', 'puma') if puma_enabled?
end
def restart_workhorse
sv_progress('restart', 'gitlab-workhorse') if workhorse_enabled?
end
def secondary_node?
node_role == :secondary
end
def node_role
return @node_role if defined?(@node_role)
unless progress_message('Attempting to detect the role of this Geo node') do
task = run_task('geo:site:role')
next false if task.error?
@node_role = task.stdout.strip.to_sym
GEO_SITE_ROLES.include?(@node_role)
end
die('Unable to detect the role of this Geo node.')
end
@node_role
end
def run_reconfigure
# If the current node is a Patroni leader, we need to enable Patroni
# maintenance mode to prevent an automatic failover during reconfigure.
pause_patroni_cluster
progress_message('Running reconfigure to apply changes') do
ctl.run_chef("#{base_path}/embedded/cookbooks/dna.json").success?
end
resume_patroni_cluster
end
def print_no_actions_required_message
puts
puts "The #{SERVICE_NAMES.join(' or ')} services are not enabled. No actions are required to promote this node.".color(:green)
end
def print_success_message
puts
puts 'You successfully promoted the current node! It might take some time to reload the services, and for the changes to take effect.'.color(:green)
end
def pg_is_in_recovery?
query = run_query('SELECT pg_is_in_recovery();')
raise PgIsInRecoveryError, "Unable to check if PostgreSQL is in recovery #{query.stderr.strip}" if query.error?
query.stdout.strip.to_s == 't'
end
def single_server_site?
@single_server ||= GitlabCtl::Util.roles(base_path).include?('geo-secondary')
end
def geo_logcursor_enabled?
@geo_logcursor_enabled ||= service_enabled?('geo-logcursor')
end
def geo_postgresql_enabled?
@geo_postgresql_enabled ||= service_enabled?('geo-postgresql')
end
def gitaly_enabled?
@gitaly_enabled ||= service_enabled?('gitaly')
end
def patroni_enabled?
@patroni_enabled ||= service_enabled?('patroni')
end
def pg_enabled?
@pg_enabled ||= service_enabled?('postgresql')
end
def praefect_enabled?
@praefect_enabled ||= service_enabled?('praefect')
end
def puma_enabled?
@puma_enabled ||= service_enabled?('puma')
end
def sidekiq_enabled?
@sidekiq_enabled ||= service_enabled?('sidekiq')
end
def workhorse_enabled?
@workhorse_enabled ||= service_enabled?('gitlab-workhorse')
end
def service_enabled?(service)
ctl.service_enabled?(service)
end
def progress_message(message, &block)
GitlabCtl::Util.progress_message(message, &block)
end
def die(message)
warn(message)
exit 1
end
def log(*args)
ctl.log(*args)
end
def attributes
@attributes ||= GitlabCtl::Util.get_node_attributes(base_path)
end
def run_command(cmd, live: true, env: {})
GitlabCtl::Util.run_command(cmd, live: live, env: env)
end
def run_query(query, live: false, env: {})
run_command("#{base_path}/bin/gitlab-psql -c \"#{query}\" -q -t", live: live, env: env)
end
def run_task(task, live: true, env: {})
run_command("#{base_path}/bin/gitlab-rake #{task}", live: live, env: env)
end
def sv_progress(action, service)
progress_message("Running #{action} on #{service}") do
ctl.run_sv_command_for_service(action, service).zero?
end
end
end
PgIsInRecoveryError = Class.new(StandardError)
end