spec/lib/gitlab/database/partitioning/partition_manager_spec.rb (448 lines of code) (raw):

# frozen_string_literal: true require 'spec_helper' RSpec.describe Gitlab::Database::Partitioning::PartitionManager, feature_category: :database do include ActiveSupport::Testing::TimeHelpers include Database::PartitioningHelpers include ExclusiveLeaseHelpers include Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers using RSpec::Parameterized::TableSyntax let(:partitioned_table_name) { :_test_gitlab_main_my_model_example_table } context 'creating partitions (mocked)' do subject(:sync_partitions) { manager.sync_partitions } let(:manager) { described_class.new(model) } let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) } let(:connection) { ActiveRecord::Base.connection } let(:table) { partitioned_table_name } let(:partitioning_strategy) do double(missing_partitions: partitions, extra_partitions: [], after_adding_partitions: nil, analyze_interval: nil) end let(:partitions) do [ instance_double(Gitlab::Database::Partitioning::TimePartition, table: 'bar', partition_name: 'foo', to_sql: "SELECT 1", to_create_sql: "CREATE TABLE _partition_1", to_attach_sql: "ALTER TABLE foo ATTACH PARTITION _partition_1"), instance_double(Gitlab::Database::Partitioning::TimePartition, table: 'bar', partition_name: 'foo2', to_sql: "SELECT 2", to_create_sql: "CREATE TABLE _partition_2", to_attach_sql: "ALTER TABLE foo2 ATTACH PARTITION _partition_2") ] end context 'when the given table is partitioned' do before do create_partitioned_table(connection, table) allow(connection).to receive(:table_exists?).and_call_original allow(connection).to receive(:table_exists?).with(table).and_return(true) allow(connection).to receive(:execute).and_call_original expect(partitioning_strategy).to receive(:validate_and_fix) stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT) stub_feature_flags(reduce_lock_usage_during_partition_creation: false) end context 'with reduce_lock_usage_during_partition_creation feature flag enabled' do before do stub_feature_flags(reduce_lock_usage_during_partition_creation: true) end it 'creates and attaches the partition in 2 steps', :aggregate_failures do expect(connection).not_to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") expect(manager).to receive(:create_partition_tables).with(partitions) expect(manager).to receive(:attach_partition_tables).with(partitions) sync_partitions end end it 'creates the partition' do expect(connection).to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") expect(connection).to receive(:execute).with(partitions.first.to_sql) expect(connection).to receive(:execute).with(partitions.second.to_sql) sync_partitions end context 'with explicitly provided connection' do let(:connection) { Ci::ApplicationRecord.connection } it 'uses the explicitly provided connection when any' do skip_if_multiple_databases_not_setup(:ci) expect(connection).to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") expect(connection).to receive(:execute).with(partitions.first.to_sql) expect(connection).to receive(:execute).with(partitions.second.to_sql) described_class.new(model, connection: connection).sync_partitions end end context 'when an ArgumentError occurs during partition management' do it 'raises error' do expect(partitioning_strategy).to receive(:missing_partitions).and_raise(ArgumentError) expect { sync_partitions }.to raise_error(ArgumentError) end end context 'when an error occurs during partition management' do it 'does not raise an error' do expect(partitioning_strategy).to receive(:missing_partitions).and_raise('this should never happen (tm)') expect { sync_partitions }.not_to raise_error end end end context 'when the table is not partitioned' do let(:table) { 'this_does_not_need_to_be_real_table' } it 'does not try creating the partitions' do expect(connection).not_to receive(:execute).with("LOCK TABLE \"#{table}\" IN ACCESS EXCLUSIVE MODE") expect(Gitlab::AppLogger).to receive(:warn).with( { message: 'Skipping syncing partitions', table_name: table, connection_name: 'main' } ) sync_partitions end end end context 'creating partitions' do subject(:sync_partitions) { described_class.new(my_model).sync_partitions } let(:connection) { ActiveRecord::Base.connection } let(:my_model) do Class.new(ApplicationRecord) do include PartitionedTable partitioned_by :created_at, strategy: :monthly end end context 'when single database is configured' do before do skip_if_database_exists(:ci) my_model.table_name = partitioned_table_name create_partitioned_table(connection, partitioned_table_name) end it 'creates partitions' do expect { sync_partitions }.to change { find_partitions(my_model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA).size }.from(0) end end context 'when partitioned table has a loose foreign key trigger' do before do my_model.table_name = partitioned_table_name create_partitioned_table(connection, partitioned_table_name) track_record_deletions(my_model.table_name) end it 'attaches LFK trigger on the newly created partitions' do expect(trigger_exists?(my_model.table_name, record_deletion_trigger_name(my_model.table_name))).to eq(true) expect { sync_partitions }.to change { find_partitions(my_model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA).size }.from(0) partitions = find_partitions(my_model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA) partitions.each do |partition| partition_name = partition.first expect(trigger_exists?(partition_name, record_deletion_trigger_name(partition_name), Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA)).to eq(true) end end end context 'when multiple databases are configured' do before do skip_if_shared_database(:ci) my_model.table_name = partitioned_table_name create_partitioned_table(connection, partitioned_table_name) stub_feature_flags(automatic_lock_writes_on_partition_tables: ff_enabled) end where(:gitlab_schema, :database, :expectation) do :gitlab_main | :main | false :gitlab_main | :ci | true :gitlab_ci | :main | true :gitlab_ci | :ci | false end with_them do subject(:sync_partitions) { described_class.new(my_model, connection: connection).sync_partitions } let(:partitioned_table_name) { "_test_gitlab_#{database}_my_model_example_#{gitlab_schema}" } let(:base_model) { Gitlab::Database.schemas_to_base_models[gitlab_schema].first } let(:connection) { Gitlab::Database.database_base_models[database.to_s].connection } let(:my_model) do Class.new(base_model) do include PartitionedTable partitioned_by :created_at, strategy: :monthly end end let(:partitions) do Gitlab::Database::PostgresPartition.using_connection(connection) { Gitlab::Database::PostgresPartition.for_parent_table(partitioned_table_name).to_a } end let(:partitions_locked_for_writes?) do partitions.map do |partition| Gitlab::Database::LockWritesManager.new( table_name: "#{partition.schema}.#{partition.name}", connection: connection, database_name: gitlab_schema ).table_locked_for_writes? end.all?(true) end context 'when feature flag is enabled' do let(:ff_enabled) { true } it "matches expectation" do sync_partitions expect(partitions_locked_for_writes?).to eq(expectation) end end context 'when feature flag is disabled' do let(:ff_enabled) { false } it "will not lock created partition" do sync_partitions expect(partitions_locked_for_writes?).to eq(false) end end end end end context 'detaching partitions (mocked)' do subject(:sync_partitions) { manager.sync_partitions } let(:manager) { described_class.new(model) } let(:model) { double(partitioning_strategy: partitioning_strategy, table_name: table, connection: connection) } let(:connection) { ActiveRecord::Base.connection } let(:table) { :_test_foo } let(:partitioning_strategy) do double(extra_partitions: extra_partitions, missing_partitions: [], after_adding_partitions: nil, analyze_interval: nil) end before do create_partitioned_table(connection, table) allow(connection).to receive(:table_exists?).and_call_original allow(connection).to receive(:table_exists?).with(table).and_return(true) expect(partitioning_strategy).to receive(:validate_and_fix) stub_exclusive_lease(described_class::MANAGEMENT_LEASE_KEY % table, timeout: described_class::LEASE_TIMEOUT) end let(:extra_partitions) do [ instance_double(Gitlab::Database::Partitioning::TimePartition, table: table, partition_name: 'foo1', to_detach_sql: 'SELECT 1'), instance_double(Gitlab::Database::Partitioning::TimePartition, table: table, partition_name: 'foo2', to_detach_sql: 'SELECT 2') ] end it 'detaches each extra partition' do extra_partitions.each { |p| expect(manager).to receive(:detach_one_partition).with(p) } sync_partitions end it 'logs an error if the partitions are not detachable' do allow(Gitlab::Database::PostgresForeignKey).to receive(:by_referenced_table_identifier).with("public._test_foo") .and_return([double(name: "fk_1", constrained_table_identifier: "public.constrainted_table_1")]) expect(Gitlab::AppLogger).to receive(:error).with( { message: "Failed to create / detach partition(s)", connection_name: "main", exception_class: Gitlab::Database::Partitioning::PartitionManager::UnsafeToDetachPartitionError, exception_message: "Cannot detach foo1, it would block while checking foreign key fk_1 on public.constrainted_table_1", table_name: :_test_foo } ) sync_partitions end end describe '#detach_partitions' do around do |ex| travel_to(Date.parse('2021-06-23')) do ex.run end end subject { described_class.new(my_model).sync_partitions } let(:connection) { ActiveRecord::Base.connection } let(:my_model) do Class.new(ApplicationRecord) do include PartitionedTable partitioned_by :created_at, strategy: :monthly, retain_for: 1.month end end before do connection.execute(<<~SQL) CREATE TABLE #{partitioned_table_name} (id serial not null, created_at timestamptz not null, primary key (id, created_at)) PARTITION BY RANGE (created_at); CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partitioned_table_name}_202104 PARTITION OF #{partitioned_table_name} FOR VALUES FROM ('2021-04-01') TO ('2021-05-01'); CREATE TABLE #{Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA}.#{partitioned_table_name}_202105 PARTITION OF #{partitioned_table_name} FOR VALUES FROM ('2021-05-01') TO ('2021-06-01'); SQL my_model.table_name = partitioned_table_name # Also create all future partitions so that the sync is only trying to detach old partitions my_model.partitioning_strategy.missing_partitions.each do |p| connection.execute p.to_sql end end def num_tables connection.select_value(<<~SQL) SELECT COUNT(*) FROM pg_class where relkind IN ('r', 'p') SQL end it 'detaches exactly one partition' do expect { subject }.to change { find_partitions(my_model.table_name).size }.from(9).to(8) end it 'detaches the old partition' do expect { subject }.to change { has_partition(my_model, 2.months.ago.beginning_of_month) }.from(true).to(false) end it 'deletes zero tables' do expect { subject }.not_to change { num_tables } end it 'creates the appropriate PendingPartitionDrop entry' do subject pending_drop = Postgresql::DetachedPartition.find_by!(table_name: "#{partitioned_table_name}_202104") expect(pending_drop.drop_after).to eq(Time.current + described_class::RETAIN_DETACHED_PARTITIONS_FOR) end context 'when the model is the target of a foreign key' do before do connection.execute(<<~SQL) create unique index idx_for_fk ON #{partitioned_table_name}(created_at); create table _test_gitlab_main_referencing_table ( id bigserial primary key not null, referencing_created_at timestamptz references #{partitioned_table_name}(created_at) ); SQL end it 'does not detach partitions with a referenced foreign key' do expect { subject }.not_to change { find_partitions(my_model.table_name).size } end end context 'when scheduling partition drops for large partitions' do before do stub_const("#{described_class}::MAX_PARTITION_SIZE", 1.byte) connection.execute(<<~SQL) INSERT INTO #{partitioned_table_name} (created_at) VALUES ('2021-04-15'); SQL end it 'schedules large partitions for weekend drops' do next_saturday = Date.parse('2021-07-03') expect(Postgresql::DetachedPartition).to receive(:create!) .with(table_name: "#{partitioned_table_name}_202104", drop_after: next_saturday) subject end end end describe 'analyze partitioned table' do let(:analyze) { true } let(:analyze_table) { partitioned_table_name } let(:analyze_partition) { "#{partitioned_table_name}_1" } let(:analyze_regex) { /ANALYZE \(SKIP_LOCKED\) "#{analyze_table}"/ } let(:analyze_interval) { 1.week } let(:connection) { my_model.connection } let(:create_partition) { true } let(:my_model) do interval = analyze_interval Class.new(ApplicationRecord) do include PartitionedTable partitioned_by :partition_id, strategy: :ci_sliding_list, next_partition_if: proc { false }, detach_partition_if: proc { false }, analyze_interval: interval end end shared_examples_for 'run only once analyze within interval' do before do allow_next_instance_of(described_class) do |instance| # Checking of LFK trigger affects the analyze tests allow(instance).to receive(:parent_table_has_loose_foreign_key?).and_return(false) end end specify do control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).to include(analyze_regex) control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).not_to include(analyze_regex) travel_to((analyze_interval * 2).since) do control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).to include(analyze_regex) end end end shared_examples_for 'not to run the analyze at all' do specify do control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).not_to include(analyze_regex) control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).not_to include(analyze_regex) travel_to((analyze_interval * 2).since) do control = ActiveRecord::QueryRecorder.new { described_class.new(my_model, connection: connection).sync_partitions(analyze: analyze) } expect(control.occurrences).not_to include(analyze_regex) end end end before do my_model.table_name = partitioned_table_name connection.execute(<<~SQL) CREATE TABLE #{analyze_table}(id serial) PARTITION BY LIST (id); SQL connection.execute(<<~SQL) if create_partition CREATE TABLE IF NOT EXISTS #{analyze_partition} PARTITION OF #{analyze_table} FOR VALUES IN (1); SQL allow(connection).to receive(:select_value).and_return(nil, Time.current, Time.current) end it_behaves_like 'run only once analyze within interval' context 'when analyze is false' do let(:analyze) { false } it_behaves_like 'not to run the analyze at all' end context 'when model does not set analyze_interval' do let(:my_model) do Class.new(ApplicationRecord) do include PartitionedTable partitioned_by :partition_id, strategy: :ci_sliding_list, next_partition_if: proc { false }, detach_partition_if: proc { false } end end it_behaves_like 'not to run the analyze at all' end context 'when no partition is created' do let(:create_partition) { false } it_behaves_like 'run only once analyze within interval' end end describe 'strategies that support analyze_interval' do [ ::Gitlab::Database::Partitioning::Time::MonthlyStrategy, ::Gitlab::Database::Partitioning::SlidingListStrategy, ::Gitlab::Database::Partitioning::CiSlidingListStrategy ].each do |klass| specify "#{klass} supports analyze_interval" do expect(klass).to be_method_defined(:analyze_interval) end end end context 'creating and then detaching partitions for a table' do let(:connection) { ActiveRecord::Base.connection } let(:my_model) do Class.new(ApplicationRecord) do include PartitionedTable partitioned_by :created_at, strategy: :monthly, retain_for: 1.month end end before do my_model.table_name = partitioned_table_name connection.execute(<<~SQL) CREATE TABLE #{partitioned_table_name} (id serial not null, created_at timestamptz not null, primary key (id, created_at)) PARTITION BY RANGE (created_at); SQL end def num_partitions(model) find_partitions(model.table_name, schema: Gitlab::Database::DYNAMIC_PARTITIONS_SCHEMA).size end it 'creates partitions for the future then drops the oldest one after a month' do # 1 month for the current month, 1 month for the old month that we're retaining data for, headroom expected_num_partitions = (Gitlab::Database::Partitioning::Time::MonthlyStrategy::HEADROOM + 2.months) / 1.month expect { described_class.new(my_model).sync_partitions }.to change { num_partitions(my_model) }.from(0).to(expected_num_partitions) travel 1.month expect { described_class.new(my_model).sync_partitions }.to change { has_partition(my_model, 2.months.ago.beginning_of_month) }.from(true).to(false).and(change { num_partitions(my_model) }.by(0)) end end def has_partition(model, month) Gitlab::Database::PostgresPartition.for_parent_table(model.table_name).any? do |partition| Gitlab::Database::Partitioning::TimePartition.from_sql( model.table_name, partition.name, partition.condition ).from == month end end def create_partitioned_table(connection, table) connection.execute(<<~SQL) CREATE TABLE #{table} (id serial not null, created_at timestamptz not null, primary key (id, created_at)) PARTITION BY RANGE (created_at); SQL end # Needed by track_record_deletions def execute(sql) connection.execute(sql) end end