smgrclose(dstrel);
}
+static void
+heapam_scan_analyze_next_block(TableScanDesc sscan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+
+ /*
+ * We must maintain a pin on the target page's buffer to ensure that the
+ * maxoffset value stays good (else concurrent VACUUM might delete tuples
+ * out from under us). Hence, pin the page until we are done looking at
+ * it. We also choose to hold sharelock on the buffer throughout --- we
+ * could release and re-acquire sharelock for each tuple, but since we
+ * aren't doing much work per tuple, the extra lock traffic is probably
+ * better avoided.
+ */
+ scan->rs_cblock = blockno;
+ scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, blockno,
+ RBM_NORMAL, bstrategy);
+ scan->rs_cindex = FirstOffsetNumber;
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+}
+
+static bool
+heapam_scan_analyze_next_tuple(TableScanDesc sscan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+ Page targpage;
+ OffsetNumber maxoffset;
+ BufferHeapTupleTableSlot *hslot;
+
+ Assert(TTS_IS_BUFFERTUPLE(slot));
+
+ hslot = (BufferHeapTupleTableSlot *) slot;
+ targpage = BufferGetPage(scan->rs_cbuf);
+ maxoffset = PageGetMaxOffsetNumber(targpage);
+
+ /* Inner loop over all tuples on the selected page */
+ for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
+ {
+ ItemId itemid;
+ HeapTuple targtuple = &hslot->base.tupdata;
+ bool sample_it = false;
+
+ itemid = PageGetItemId(targpage, scan->rs_cindex);
+
+ /*
+ * We ignore unused and redirect line pointers. DEAD line pointers
+ * should be counted as dead, because we need vacuum to run to get rid
+ * of them. Note that this rule agrees with the way that
+ * heap_page_prune() counts things.
+ */
+ if (!ItemIdIsNormal(itemid))
+ {
+ if (ItemIdIsDead(itemid))
+ *deadrows += 1;
+ continue;
+ }
+
+ ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
+
+ targtuple->t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd);
+ targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
+ targtuple->t_len = ItemIdGetLength(itemid);
+
+ switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, scan->rs_cbuf))
+ {
+ case HEAPTUPLE_LIVE:
+ sample_it = true;
+ *liverows += 1;
+ break;
+
+ case HEAPTUPLE_DEAD:
+ case HEAPTUPLE_RECENTLY_DEAD:
+ /* Count dead and recently-dead rows */
+ *deadrows += 1;
+ break;
+
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+ /*
+ * Insert-in-progress rows are not counted. We assume that
+ * when the inserting transaction commits or aborts, it will
+ * send a stats message to increment the proper count. This
+ * works right only if that transaction ends after we finish
+ * analyzing the table; if things happen in the other order,
+ * its stats update will be overwritten by ours. However, the
+ * error will be large only if the other transaction runs long
+ * enough to insert many tuples, so assuming it will finish
+ * after us is the safer option.
+ *
+ * A special case is that the inserting transaction might be
+ * our own. In this case we should count and sample the row,
+ * to accommodate users who load a table and analyze it in one
+ * transaction. (pgstat_report_analyze has to adjust the
+ * numbers we send to the stats collector to make this come
+ * out right.)
+ */
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
+ {
+ sample_it = true;
+ *liverows += 1;
+ }
+ break;
+
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+ /*
+ * We count and sample delete-in-progress rows the same as
+ * live ones, so that the stats counters come out right if the
+ * deleting transaction commits after us, per the same
+ * reasoning given above.
+ *
+ * If the delete was done by our own transaction, however, we
+ * must count the row as dead to make pgstat_report_analyze's
+ * stats adjustments come out right. (Note: this works out
+ * properly when the row was both inserted and deleted in our
+ * xact.)
+ *
+ * The net effect of these choices is that we act as though an
+ * IN_PROGRESS transaction hasn't happened yet, except if it
+ * is our own transaction, which we assume has happened.
+ *
+ * This approach ensures that we behave sanely if we see both
+ * the pre-image and post-image rows for a row being updated
+ * by a concurrent transaction: we will sample the pre-image
+ * but not the post-image. We also get sane results if the
+ * concurrent transaction never commits.
+ */
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
+ deadrows += 1;
+ else
+ {
+ sample_it = true;
+ liverows += 1;
+ }
+ break;
+
+ default:
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+ break;
+ }
+
+ if (sample_it)
+ {
+ ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+ scan->rs_cindex++;
+
+ /* note that we leave the buffer locked here! */
+ return true;
+ }
+ }
+
+ /* Now release the lock and pin on the page */
+ UnlockReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+
+ return false;
+}
static void
heapam_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex,
.relation_set_new_filenode = heapam_set_new_filenode,
.relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
.relation_copy_data = heapam_relation_copy_data,
+ .relation_vacuum = heap_vacuum_rel,
+ .scan_analyze_next_block = heapam_scan_analyze_next_block,
+ .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
.relation_copy_for_cluster = heapam_copy_for_cluster,
.index_build_range_scan = heapam_index_build_range_scan,
.index_validate_scan = heapam_index_validate_scan,
#include <math.h>
#include "access/genam.h"
-#include "access/heapam.h"
#include "access/multixact.h"
#include "access/relation.h"
#include "access/sysattr.h"
#include "access/table.h"
+#include "access/tableam.h"
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
TransactionId OldestXmin;
BlockSamplerData bs;
ReservoirStateData rstate;
+ TupleTableSlot *slot;
+ TableScanDesc scan;
Assert(targrows > 0);
/* Prepare for sampling rows */
reservoir_init_selection_state(&rstate, targrows);
+ scan = table_beginscan_analyze(onerel);
+ slot = table_gimmegimmeslot(onerel, NULL);
+
/* Outer loop over blocks to sample */
while (BlockSampler_HasMore(&bs))
{
BlockNumber targblock = BlockSampler_Next(&bs);
- Buffer targbuffer;
- Page targpage;
- OffsetNumber targoffset,
- maxoffset;
vacuum_delay_point();
/*
- * We must maintain a pin on the target page's buffer to ensure that
- * the maxoffset value stays good (else concurrent VACUUM might delete
- * tuples out from under us). Hence, pin the page until we are done
- * looking at it. We also choose to hold sharelock on the buffer
- * throughout --- we could release and re-acquire sharelock for each
- * tuple, but since we aren't doing much work per tuple, the extra
- * lock traffic is probably better avoided.
+ * XXX: we could have this function return a boolean, instead of
+ * forcing such checks to happen in next_tuple().
*/
- targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
- RBM_NORMAL, vac_strategy);
- LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
- targpage = BufferGetPage(targbuffer);
- maxoffset = PageGetMaxOffsetNumber(targpage);
-
- /* Inner loop over all tuples on the selected page */
- for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
- {
- ItemId itemid;
- HeapTupleData targtuple;
- bool sample_it = false;
-
- itemid = PageGetItemId(targpage, targoffset);
+ table_scan_analyze_next_block(scan, targblock, vac_strategy);
+ while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+ {
/*
- * We ignore unused and redirect line pointers. DEAD line
- * pointers should be counted as dead, because we need vacuum to
- * run to get rid of them. Note that this rule agrees with the
- * way that heap_page_prune() counts things.
+ * The first targrows sample rows are simply copied into the
+ * reservoir. Then we start replacing tuples in the sample
+ * until we reach the end of the relation. This algorithm is
+ * from Jeff Vitter's paper (see full citation below). It
+ * works by repeatedly computing the number of tuples to skip
+ * before selecting a tuple, which replaces a randomly chosen
+ * element of the reservoir (current set of tuples). At all
+ * times the reservoir is a true random sample of the tuples
+ * we've passed over so far, so when we fall off the end of
+ * the relation we're done.
*/
- if (!ItemIdIsNormal(itemid))
- {
- if (ItemIdIsDead(itemid))
- deadrows += 1;
- continue;
- }
-
- ItemPointerSet(&targtuple.t_self, targblock, targoffset);
-
- targtuple.t_tableOid = RelationGetRelid(onerel);
- targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
- targtuple.t_len = ItemIdGetLength(itemid);
-
- switch (HeapTupleSatisfiesVacuum(&targtuple,
- OldestXmin,
- targbuffer))
- {
- case HEAPTUPLE_LIVE:
- sample_it = true;
- liverows += 1;
- break;
-
- case HEAPTUPLE_DEAD:
- case HEAPTUPLE_RECENTLY_DEAD:
- /* Count dead and recently-dead rows */
- deadrows += 1;
- break;
-
- case HEAPTUPLE_INSERT_IN_PROGRESS:
-
- /*
- * Insert-in-progress rows are not counted. We assume
- * that when the inserting transaction commits or aborts,
- * it will send a stats message to increment the proper
- * count. This works right only if that transaction ends
- * after we finish analyzing the table; if things happen
- * in the other order, its stats update will be
- * overwritten by ours. However, the error will be large
- * only if the other transaction runs long enough to
- * insert many tuples, so assuming it will finish after us
- * is the safer option.
- *
- * A special case is that the inserting transaction might
- * be our own. In this case we should count and sample
- * the row, to accommodate users who load a table and
- * analyze it in one transaction. (pgstat_report_analyze
- * has to adjust the numbers we send to the stats
- * collector to make this come out right.)
- */
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
- {
- sample_it = true;
- liverows += 1;
- }
- break;
-
- case HEAPTUPLE_DELETE_IN_PROGRESS:
-
- /*
- * We count and sample delete-in-progress rows the same as
- * live ones, so that the stats counters come out right if
- * the deleting transaction commits after us, per the same
- * reasoning given above.
- *
- * If the delete was done by our own transaction, however,
- * we must count the row as dead to make
- * pgstat_report_analyze's stats adjustments come out
- * right. (Note: this works out properly when the row was
- * both inserted and deleted in our xact.)
- *
- * The net effect of these choices is that we act as
- * though an IN_PROGRESS transaction hasn't happened yet,
- * except if it is our own transaction, which we assume
- * has happened.
- *
- * This approach ensures that we behave sanely if we see
- * both the pre-image and post-image rows for a row being
- * updated by a concurrent transaction: we will sample the
- * pre-image but not the post-image. We also get sane
- * results if the concurrent transaction never commits.
- */
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
- deadrows += 1;
- else
- {
- sample_it = true;
- liverows += 1;
- }
- break;
-
- default:
- elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
- break;
- }
-
- if (sample_it)
+ if (numrows < targrows)
+ rows[numrows++] = ExecCopySlotHeapTuple(slot);
+ else
{
/*
- * The first targrows sample rows are simply copied into the
- * reservoir. Then we start replacing tuples in the sample
- * until we reach the end of the relation. This algorithm is
- * from Jeff Vitter's paper (see full citation below). It
- * works by repeatedly computing the number of tuples to skip
- * before selecting a tuple, which replaces a randomly chosen
- * element of the reservoir (current set of tuples). At all
- * times the reservoir is a true random sample of the tuples
- * we've passed over so far, so when we fall off the end of
- * the relation we're done.
+ * t in Vitter's paper is the number of records already
+ * processed. If we need to compute a new S value, we
+ * must use the not-yet-incremented value of samplerows as
+ * t.
*/
- if (numrows < targrows)
- rows[numrows++] = heap_copytuple(&targtuple);
- else
+ if (rowstoskip < 0)
+ rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+
+ if (rowstoskip <= 0)
{
/*
- * t in Vitter's paper is the number of records already
- * processed. If we need to compute a new S value, we
- * must use the not-yet-incremented value of samplerows as
- * t.
+ * Found a suitable tuple, so save it, replacing one
+ * old tuple at random
*/
- if (rowstoskip < 0)
- rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+ int k = (int) (targrows * sampler_random_fract(rstate.randstate));
- if (rowstoskip <= 0)
- {
- /*
- * Found a suitable tuple, so save it, replacing one
- * old tuple at random
- */
- int k = (int) (targrows * sampler_random_fract(rstate.randstate));
-
- Assert(k >= 0 && k < targrows);
- heap_freetuple(rows[k]);
- rows[k] = heap_copytuple(&targtuple);
- }
-
- rowstoskip -= 1;
+ Assert(k >= 0 && k < targrows);
+ heap_freetuple(rows[k]);
+ rows[k] = ExecCopySlotHeapTuple(slot);
}
- samplerows += 1;
+ rowstoskip -= 1;
}
- }
- /* Now release the lock and pin on the page */
- UnlockReleaseBuffer(targbuffer);
+ samplerows += 1;
+ }
}
+ ExecDropSingleTupleTableSlot(slot);
+ table_endscan(scan);
+
/*
* If we didn't find as many tuples as we wanted then we're done. No sort
* is needed, since they're already in order.
extern bool synchronize_seqscans;
+struct VacuumParams;
struct ValidateIndexState;
struct BulkInsertStateData;
MultiXactId *minmulti);
void (*relation_nontransactional_truncate) (Relation rel);
void (*relation_copy_data) (Relation rel, RelFileNode newrnode);
+ void (*relation_vacuum) (Relation onerel, int options,
+ struct VacuumParams *params, BufferAccessStrategy bstrategy);
+ void (*scan_analyze_next_block) (TableScanDesc scan, BlockNumber blockno,
+ BufferAccessStrategy bstrategy);
+ bool (*scan_analyze_next_tuple) (TableScanDesc scan, TransactionId OldestXmin,
+ double *liverows, double *deadrows, TupleTableSlot *slot);
void (*relation_copy_for_cluster) (Relation NewHeap, Relation OldHeap, Relation OldIndex,
bool use_sort,
TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
rel->rd_tableam->relation_copy_data(rel, newrnode);
}
+static inline void
+table_vacuum_rel(Relation rel, int options,
+ struct VacuumParams *params, BufferAccessStrategy bstrategy)
+{
+ rel->rd_tableam->relation_vacuum(rel, options, params, bstrategy);
+}
+
+static inline void
+table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+ scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno, bstrategy);
+}
+
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+ return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin, liverows, deadrows, slot);
+}
/* XXX: Move arguments to struct? */
static inline void