in src/backend/access/heap/vacuumlazy.c [421:905]
static void lazy_scan_heap(LVRelState *vacrel, VacuumParams *params,
bool aggressive);
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
GlobalVisState *vistest,
LVPagePruneState *prunestate);
static void lazy_vacuum(LVRelState *vacrel);
static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
static void lazy_vacuum_heap_rel(LVRelState *vacrel);
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
Buffer buffer, int tupindex, Buffer *vmbuffer);
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
LVRelState *vacrel);
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
static void do_parallel_lazy_vacuum_all_indexes(LVRelState *vacrel);
static void do_parallel_lazy_cleanup_all_indexes(LVRelState *vacrel);
static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers);
static void do_parallel_processing(LVRelState *vacrel,
LVShared *lvshared);
static void do_serial_processing_for_unsafe_indexes(LVRelState *vacrel,
LVShared *lvshared);
static IndexBulkDeleteResult *parallel_process_one_index(Relation indrel,
IndexBulkDeleteResult *istat,
LVShared *lvshared,
LVSharedIndStats *shared_indstats,
LVRelState *vacrel);
static void lazy_cleanup_all_indexes(LVRelState *vacrel);
static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
IndexBulkDeleteResult *istat,
double reltuples,
LVRelState *vacrel);
static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
IndexBulkDeleteResult *istat,
double reltuples,
bool estimated_count,
LVRelState *vacrel);
static bool should_attempt_truncation(LVRelState *vacrel);
static void lazy_truncate_heap(LVRelState *vacrel);
static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
bool *lock_waiter_detected);
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
static void lazy_space_alloc(LVRelState *vacrel, int nworkers,
BlockNumber relblocks);
static void lazy_space_free(LVRelState *vacrel);
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
static int vac_cmp_itemptr(const void *left, const void *right);
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
TransactionId *visibility_cutoff_xid, bool *all_frozen);
#if 0
static int compute_parallel_vacuum_workers(LVRelState *vacrel,
int nrequested,
bool *will_parallel_vacuum);
#endif
static void update_index_statistics(LVRelState *vacrel);
#if 0
static LVParallelState *begin_parallel_vacuum(LVRelState *vacrel,
BlockNumber nblocks,
int nrequested);
#endif
static void end_parallel_vacuum(LVRelState *vacrel);
static LVSharedIndStats *parallel_stats_for_idx(LVShared *lvshared, int getidx);
static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared);
static void vacuum_error_callback(void *arg);
static void update_vacuum_error_info(LVRelState *vacrel,
LVSavedErrInfo *saved_vacrel,
int phase, BlockNumber blkno,
OffsetNumber offnum);
static void restore_vacuum_error_info(LVRelState *vacrel,
const LVSavedErrInfo *saved_vacrel);
/*
* lazy_vacuum_rel_heap() -- perform VACUUM for one heap relation
*
* This routine vacuums a single heap, cleans out its indexes, and
* updates its relpages and reltuples statistics.
*
* At entry, we have already established a transaction and opened
* and locked the relation.
*/
void
heap_vacuum_rel(Relation rel, VacuumParams *params,
BufferAccessStrategy bstrategy)
{
LVRelState *vacrel;
PGRUsage ru0;
TimestampTz starttime = 0;
WalUsage walusage_start = pgWalUsage;
WalUsage walusage = {0, 0, 0};
long secs;
int usecs;
double read_rate,
write_rate;
bool aggressive; /* should we scan all unfrozen pages? */
bool scanned_all_unfrozen; /* actually scanned all such pages? */
char **indnames = NULL;
TransactionId xidFullScanLimit;
MultiXactId mxactFullScanLimit;
BlockNumber new_rel_pages;
BlockNumber new_rel_allvisible;
double new_live_tuples;
TransactionId new_frozen_xid;
MultiXactId new_min_multi;
ErrorContextCallback errcallback;
PgStat_Counter startreadtime = 0;
PgStat_Counter startwritetime = 0;
TransactionId OldestXmin;
TransactionId FreezeLimit;
MultiXactId MultiXactCutoff;
/* measure elapsed time iff autovacuum logging requires it */
if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
{
pg_rusage_init(&ru0);
starttime = GetCurrentTimestamp();
if (track_io_timing)
{
startreadtime = pgStatBlockReadTime;
startwritetime = pgStatBlockWriteTime;
}
}
if (params->options & VACOPT_VERBOSE)
elevel = INFO;
else
elevel = DEBUG2;
if (Gp_role == GP_ROLE_DISPATCH)
elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */
pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
RelationGetRelid(rel));
/*
* MPP-23647. Update xid limits for heap as well as appendonly
* relations. This allows setting relfrozenxid to correct value
* for an appendonly (AO/CO) table.
*/
vacuum_set_xid_limits(rel,
params->freeze_min_age,
params->freeze_table_age,
params->multixact_freeze_min_age,
params->multixact_freeze_table_age,
&OldestXmin, &FreezeLimit, &xidFullScanLimit,
&MultiXactCutoff, &mxactFullScanLimit);
/*
* We request an aggressive scan if the table's frozen Xid is now older
* than or equal to the requested Xid full-table scan limit; or if the
* table's minimum MultiXactId is older than or equal to the requested
* mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
*/
aggressive = TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
xidFullScanLimit);
aggressive |= MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
mxactFullScanLimit);
if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
aggressive = true;
vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
/* Set up high level stuff about rel */
vacrel->rel = rel;
vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
&vacrel->indrels);
vacrel->failsafe_active = false;
vacrel->consider_bypass_optimization = true;
/*
* The index_cleanup param either disables index vacuuming and cleanup or
* forces it to go ahead when we would otherwise apply the index bypass
* optimization. The default is 'auto', which leaves the final decision
* up to lazy_vacuum().
*
* The truncate param allows user to avoid attempting relation truncation,
* though it can't force truncation to happen.
*/
Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
params->truncate != VACOPTVALUE_AUTO);
vacrel->do_index_vacuuming = true;
vacrel->do_index_cleanup = true;
vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
if (params->index_cleanup == VACOPTVALUE_DISABLED)
{
/* Force disable index vacuuming up-front */
vacrel->do_index_vacuuming = false;
vacrel->do_index_cleanup = false;
}
else if (params->index_cleanup == VACOPTVALUE_ENABLED)
{
/* Force index vacuuming. Note that failsafe can still bypass. */
vacrel->consider_bypass_optimization = false;
}
else
{
/* Default/auto, make all decisions dynamically */
Assert(params->index_cleanup == VACOPTVALUE_AUTO);
}
vacrel->bstrategy = bstrategy;
vacrel->old_rel_pages = rel->rd_rel->relpages;
vacrel->old_live_tuples = rel->rd_rel->reltuples;
vacrel->relfrozenxid = rel->rd_rel->relfrozenxid;
vacrel->relminmxid = rel->rd_rel->relminmxid;
/* Set cutoffs for entire VACUUM */
vacrel->OldestXmin = OldestXmin;
vacrel->FreezeLimit = FreezeLimit;
vacrel->MultiXactCutoff = MultiXactCutoff;
vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
vacrel->relname = pstrdup(RelationGetRelationName(rel));
vacrel->indname = NULL;
vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
/* Save index names iff autovacuum logging requires it */
if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0 &&
vacrel->nindexes > 0)
{
indnames = palloc(sizeof(char *) * vacrel->nindexes);
for (int i = 0; i < vacrel->nindexes; i++)
indnames[i] =
pstrdup(RelationGetRelationName(vacrel->indrels[i]));
}
/*
* Setup error traceback support for ereport(). The idea is to set up an
* error context callback to display additional information on any error
* during a vacuum. During different phases of vacuum (heap scan, heap
* vacuum, index vacuum, index clean up, heap truncate), we update the
* error context callback to display appropriate information.
*
* Note that the index vacuum and heap vacuum phases may be called
* multiple times in the middle of the heap scan phase. So the old phase
* information is restored at the end of those phases.
*/
errcallback.callback = vacuum_error_callback;
errcallback.arg = vacrel;
errcallback.previous = error_context_stack;
error_context_stack = &errcallback;
/* Do the vacuuming */
lazy_scan_heap(vacrel, params, aggressive);
/* Done with indexes */
vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
/*
* Compute whether we actually scanned the all unfrozen pages. If we did,
* we can adjust relfrozenxid and relminmxid.
*
* NB: We need to check this before truncating the relation, because that
* will change ->rel_pages.
*/
if ((vacrel->scanned_pages + vacrel->frozenskipped_pages)
< vacrel->rel_pages)
{
Assert(!aggressive);
scanned_all_unfrozen = false;
}
else
scanned_all_unfrozen = true;
/*
* Optionally truncate the relation.
*/
if (should_attempt_truncation(vacrel))
{
/*
* Update error traceback information. This is the last phase during
* which we add context information to errors, so we don't need to
* revert to the previous phase.
*/
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
vacrel->nonempty_pages,
InvalidOffsetNumber);
lazy_truncate_heap(vacrel);
}
/* Pop the error context stack */
error_context_stack = errcallback.previous;
/* Report that we are now doing final cleanup */
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
/*
* Update statistics in pg_class.
*
* In principle new_live_tuples could be -1 indicating that we (still)
* don't know the tuple count. In practice that probably can't happen,
* since we'd surely have scanned some pages if the table is new and
* nonempty.
*
* For safety, clamp relallvisible to be not more than what we're setting
* relpages to.
*
* Also, don't change relfrozenxid/relminmxid if we skipped any pages,
* since then we don't know for certain that all tuples have a newer xmin.
*/
new_rel_pages = vacrel->rel_pages;
new_live_tuples = vacrel->new_live_tuples;
visibilitymap_count(rel, &new_rel_allvisible, NULL);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
vac_update_relstats(rel,
new_rel_pages,
new_live_tuples,
new_rel_allvisible,
vacrel->nindexes > 0,
new_frozen_xid,
new_min_multi,
false,
true /* isvacuum */);
/*
* Report results to the stats collector, too.
*
* Deliberately avoid telling the stats collector about LP_DEAD items that
* remain in the table due to VACUUM bypassing index and heap vacuuming.
* ANALYZE will consider the remaining LP_DEAD items to be dead tuples. It
* seems like a good idea to err on the side of not vacuuming again too
* soon in cases where the failsafe prevented significant amounts of heap
* vacuuming.
*/
pgstat_report_vacuum(RelationGetRelid(rel),
rel->rd_rel->relisshared,
Max(new_live_tuples, 0),
vacrel->new_dead_tuples);
pgstat_progress_end_command();
/* and log the action if appropriate */
if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
{
TimestampTz endtime = GetCurrentTimestamp();
if (params->log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
params->log_min_duration))
{
StringInfoData buf;
char *msgfmt;
BlockNumber orig_rel_pages;
TimestampDifference(starttime, endtime, &secs, &usecs);
memset(&walusage, 0, sizeof(WalUsage));
WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
read_rate = 0;
write_rate = 0;
if ((secs > 0) || (usecs > 0))
{
read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
(secs + usecs / 1000000.0);
write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
(secs + usecs / 1000000.0);
}
/*
* This is pretty messy, but we split it up so that we can skip
* emitting individual parts of the message when not applicable.
*/
initStringInfo(&buf);
if (params->is_wraparound)
{
/*
* While it's possible for a VACUUM to be both is_wraparound
* and !aggressive, that's just a corner-case -- is_wraparound
* implies aggressive. Produce distinct output for the corner
* case all the same, just in case.
*/
if (aggressive)
msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
else
msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
}
else
{
if (aggressive)
msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
else
msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
}
appendStringInfo(&buf, msgfmt,
get_database_name(MyDatabaseId),
vacrel->relnamespace,
vacrel->relname,
vacrel->num_index_scans);
appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
vacrel->pages_removed,
vacrel->rel_pages,
vacrel->pinskipped_pages,
vacrel->frozenskipped_pages);
appendStringInfo(&buf,
_("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %u\n"),
(long long) vacrel->tuples_deleted,
(long long) vacrel->new_rel_tuples,
(long long) vacrel->new_dead_tuples,
OldestXmin);
orig_rel_pages = vacrel->rel_pages + vacrel->pages_removed;
if (orig_rel_pages > 0)
{
if (vacrel->do_index_vacuuming)
{
if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
appendStringInfoString(&buf, _("index scan not needed: "));
else
appendStringInfoString(&buf, _("index scan needed: "));
msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
}
else
{
if (!vacrel->failsafe_active)
appendStringInfoString(&buf, _("index scan bypassed: "));
else
appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
}
appendStringInfo(&buf, msgfmt,
vacrel->lpdead_item_pages,
100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
(long long) vacrel->lpdead_items);
}
for (int i = 0; i < vacrel->nindexes; i++)
{
IndexBulkDeleteResult *istat = vacrel->indstats[i];
if (!istat)
continue;
appendStringInfo(&buf,
_("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
indnames[i],
istat->num_pages,
istat->pages_newly_deleted,
istat->pages_deleted,
istat->pages_free);
}
if (track_io_timing)
{
double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
read_ms, write_ms);
}
appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
read_rate, write_rate);
appendStringInfo(&buf,
_("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
(long long) VacuumPageHit,
(long long) VacuumPageMiss,
(long long) VacuumPageDirty);
appendStringInfo(&buf,
_("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
(long long) walusage.wal_records,
(long long) walusage.wal_fpi,
(unsigned long long) walusage.wal_bytes);
appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
ereport(LOG,
(errmsg_internal("%s", buf.data)));
pfree(buf.data);
}
}
/* Cleanup index statistics and index names */
for (int i = 0; i < vacrel->nindexes; i++)
{
if (vacrel->indstats[i])
pfree(vacrel->indstats[i]);
if (indnames && indnames[i])
pfree(indnames[i]);
}
}