lib/gitlab/database/partitioning/partition_manager.rb (237 lines of code) (raw):
# frozen_string_literal: true
module Gitlab
module Database
module Partitioning
class PartitionManager
include ::Gitlab::Utils::StrongMemoize
include ::Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers
UnsafeToDetachPartitionError = Class.new(StandardError)
LEASE_TIMEOUT = 1.hour
STATEMENT_TIMEOUT = 1.hour
MANAGEMENT_LEASE_KEY = 'database_partition_management_%s'
RETAIN_DETACHED_PARTITIONS_FOR = 1.week
MAX_PARTITION_SIZE = 150.gigabytes
def initialize(model, connection: nil)
@model = model
@connection = connection || model.connection
@connection_name = @connection.pool.db_config.name
end
def execute(sql)
@connection.execute(sql)
end
def sync_partitions(analyze: true)
return skip_syncing_partitions unless table_partitioned?
Gitlab::AppLogger.info(
message: "Checking state of dynamic postgres partitions",
table_name: model.table_name,
connection_name: @connection_name
)
only_with_exclusive_lease(model, lease_key: MANAGEMENT_LEASE_KEY) do
model.partitioning_strategy.validate_and_fix
partitions_to_create = missing_partitions
partitions_to_detach = extra_partitions
create(partitions_to_create) unless partitions_to_create.empty?
detach(partitions_to_detach) unless partitions_to_detach.empty?
run_analyze_on_partitioned_table if analyze
end
rescue ArgumentError => e
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(e)
rescue StandardError => e
Gitlab::AppLogger.error(
message: "Failed to create / detach partition(s)",
table_name: model.table_name,
exception_class: e.class,
exception_message: e.message,
connection_name: @connection_name
)
end
private
attr_reader :model, :connection
# Create all partition tables (doesn't take any lock on parent)
def create_partition_tables(partitions)
partitions.each do |partition|
connection.execute(partition.to_create_sql)
end
end
# Attach all partitions (takes SHARE UPDATE EXCLUSIVE lock)
def attach_partition_tables(partitions)
partitions.each do |partition|
connection.execute(partition.to_attach_sql)
process_created_partition(partition)
end
end
def process_created_partition(partition)
Gitlab::AppLogger.info(message: "Created partition",
partition_name: partition.partition_name,
table_name: partition.table,
connection_name: @connection_name)
lock_partitions_for_writes(partition) if should_lock_for_writes?
attach_loose_foreign_key_trigger(partition) if parent_table_has_loose_foreign_key?
end
def missing_partitions
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.missing_partitions
end
def extra_partitions
return [] unless connection.table_exists?(model.table_name)
model.partitioning_strategy.extra_partitions
end
def only_with_exclusive_lease(model, lease_key:)
lease = Gitlab::ExclusiveLease.new(lease_key % model.table_name, timeout: LEASE_TIMEOUT)
yield if lease.try_obtain
ensure
lease&.cancel
end
def create(partitions)
# with_lock_retries starts a requires_new transaction most of the time, but not on the last iteration
with_lock_retries do
connection.transaction(requires_new: false) do # so we open a transaction here if not already in progress
if Feature.enabled?(:reduce_lock_usage_during_partition_creation)
create_partition_tables(partitions)
attach_partition_tables(partitions)
else
# Partitions might not get created (IF NOT EXISTS) so explicit locking will not happen.
# This LOCK TABLE ensures to have exclusive lock as the first step.
quoted_table_name = connection.quote_table_name(model.table_name)
connection.execute("LOCK TABLE #{quoted_table_name} IN ACCESS EXCLUSIVE MODE")
partitions.each do |partition|
connection.execute(partition.to_sql)
process_created_partition(partition)
end
end
model.partitioning_strategy.after_adding_partitions
end
end
end
def detach(partitions)
# with_lock_retries starts a requires_new transaction most of the time, but not on the last iteration
with_lock_retries do
connection.transaction(requires_new: false) do # so we open a transaction here if not already in progress
partitions.each { |p| detach_one_partition(p) }
end
end
end
def detach_one_partition(partition)
assert_partition_detachable!(partition)
schedule_detached_partition_cleanup(partition)
connection.execute partition.to_detach_sql
Gitlab::AppLogger.info(
message: "Detached Partition",
partition_name: partition.partition_name,
table_name: partition.table,
connection_name: @connection_name
)
end
def assert_partition_detachable!(partition)
parent_table_identifier = "#{connection.current_schema}.#{partition.table}"
if (example_fk = PostgresForeignKey.by_referenced_table_identifier(parent_table_identifier).first)
raise UnsafeToDetachPartitionError, "Cannot detach #{partition.partition_name}, it would block while " \
"checking foreign key #{example_fk.name} on #{example_fk.constrained_table_identifier}"
end
end
def with_lock_retries(&block)
Gitlab::Database::Partitioning::WithPartitioningLockRetries.new(
klass: self.class,
logger: Gitlab::AppLogger,
connection: connection
).run(raise_on_exhaustion: true, &block)
end
def table_partitioned?
Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PostgresPartitionedTable.find_by_name_in_current_schema(model.table_name).present?
end
end
def skip_syncing_partitions
Gitlab::AppLogger.warn(
message: "Skipping syncing partitions",
table_name: model.table_name,
connection_name: @connection_name
)
end
def run_analyze_on_partitioned_table
return if ineligible_for_analyzing?
primary_transaction(statement_timeout: STATEMENT_TIMEOUT) do
# Running ANALYZE on partitioned table will go through itself and its partitions
connection.execute("ANALYZE (SKIP_LOCKED) #{model.quoted_table_name}")
end
end
def ineligible_for_analyzing?
analyze_interval.blank? ||
first_model_partition.blank? ||
last_analyzed_at_within_interval?
end
def last_analyzed_at_within_interval?
table_to_query = first_model_partition.identifier
primary_transaction do
# We don't need to get the last_analyze_time from partitioned table,
# because it's not supported and always returns NULL for PG version below 14
# Therefore, we can always get the last_analyze_time from the first partition
last_analyzed_at = connection.select_value(
"SELECT pg_stat_get_last_analyze_time('#{table_to_query}'::regclass)"
)
last_analyzed_at.present? && last_analyzed_at >= ::Time.current - analyze_interval
end
end
def first_model_partition
Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PostgresPartition.for_parent_table(model.table_name).first
end
end
strong_memoize_attr :first_model_partition
def analyze_interval
model.partitioning_strategy.analyze_interval
end
def primary_transaction(statement_timeout: nil)
Gitlab::Database::LoadBalancing::SessionMap.current(connection.load_balancer).use_primary do
connection.transaction(requires_new: false) do
if statement_timeout.present?
connection.execute(
format("SET LOCAL statement_timeout TO '%ds'", statement_timeout)
)
end
yield
end
end
end
def should_lock_for_writes?
Feature.enabled?(:automatic_lock_writes_on_partition_tables, type: :ops) &&
Gitlab::Database.database_mode == Gitlab::Database::MODE_MULTIPLE_DATABASES &&
connection != model.connection
end
strong_memoize_attr :should_lock_for_writes?
def lock_partitions_for_writes(partition)
table_name = "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition.partition_name}"
Gitlab::Database::LockWritesManager.new(
table_name: table_name,
connection: connection,
database_name: @connection_name,
with_retries: !connection.transaction_open?
).lock_writes
end
def attach_loose_foreign_key_trigger(partition)
partition_identifier = "#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition.partition_name}"
return unless has_loose_foreign_key?(partition.table)
track_record_deletions_override_table_name(partition_identifier, partition.table)
end
def parent_table_has_loose_foreign_key?
has_loose_foreign_key?(model.table_name)
end
strong_memoize_attr :parent_table_has_loose_foreign_key?
def schedule_detached_partition_cleanup(partition)
identifier = identifier(partition)
if above_threshold?(identifier)
Postgresql::DetachedPartition.create!(
table_name: partition.partition_name,
drop_after: RETAIN_DETACHED_PARTITIONS_FOR.from_now.next_occurring(:saturday)
)
else
Postgresql::DetachedPartition.create!(
table_name: partition.partition_name,
drop_after: RETAIN_DETACHED_PARTITIONS_FOR.from_now
)
end
end
def above_threshold?(identifier)
Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PostgresPartition
.for_identifier(identifier)
.above_threshold(MAX_PARTITION_SIZE)
.exists?
end
end
def identifier(partition)
"#{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partition.partition_name}"
end
end
end
end
end