From ab27a8fb0c93498157de40a7cf7ab30feaf2c1e5 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 20 Jan 2019 00:08:58 -0800 Subject: [PATCH] tableam: Sample Scan Support. Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- contrib/tsm_system_rows/tsm_system_rows.c | 86 ++------ contrib/tsm_system_time/tsm_system_time.c | 13 +- src/backend/access/heap/heapam_handler.c | 217 +++++++++++++++++++ src/backend/access/tablesample/system.c | 11 +- src/backend/executor/nodeSamplescan.c | 249 +++------------------- src/include/access/tableam.h | 31 +++ src/include/access/tsmapi.h | 2 +- src/include/nodes/execnodes.h | 4 + src/include/nodes/tidbitmap.h | 2 +- 9 files changed, 307 insertions(+), 308 deletions(-) diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c index 1d35ea3c53..3611d05833 100644 --- a/contrib/tsm_system_rows/tsm_system_rows.c +++ b/contrib/tsm_system_rows/tsm_system_rows.c @@ -28,7 +28,6 @@ #include "postgres.h" -#include "access/heapam.h" #include "access/relscan.h" #include "access/tsmapi.h" #include "catalog/pg_type.h" @@ -46,7 +45,6 @@ typedef struct { uint32 seed; /* random seed */ int64 ntuples; /* number of tuples to return */ - int64 donetuples; /* number of tuples already returned */ OffsetNumber lt; /* last tuple returned from current block */ BlockNumber doneblocks; /* number of already-scanned blocks */ BlockNumber lb; /* last block visited */ @@ -67,11 +65,10 @@ static void system_rows_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_rows_nextsampleblock(SampleScanState *node); +static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_rows_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); -static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan); static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate); @@ -187,7 +184,6 @@ system_rows_beginsamplescan(SampleScanState *node, sampler->seed = seed; sampler->ntuples = ntuples; - sampler->donetuples = 0; sampler->lt = InvalidOffsetNumber; sampler->doneblocks = 0; /* lb will be initialized during first NextSampleBlock call */ @@ -206,11 +202,9 @@ system_rows_beginsamplescan(SampleScanState *node, * Uses linear probing algorithm for picking next block. */ static BlockNumber -system_rows_nextsampleblock(SampleScanState *node) +system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - TableScanDesc scan = node->ss.ss_currentScanDesc; - HeapScanDesc hscan = (HeapScanDesc) scan; /* First call within scan? */ if (sampler->doneblocks == 0) @@ -222,14 +216,14 @@ system_rows_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (hscan->rs_nblocks == 0) + if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = hscan->rs_nblocks; + sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -246,7 +240,7 @@ system_rows_nextsampleblock(SampleScanState *node) /* If we've read all blocks or returned all needed tuples, we're done */ if (++sampler->doneblocks > sampler->nblocks || - sampler->donetuples >= sampler->ntuples) + node->donetuples >= sampler->ntuples) return InvalidBlockNumber; /* @@ -259,7 +253,7 @@ system_rows_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= hscan->rs_nblocks); + } while (sampler->lb >= nblocks); return sampler->lb; } @@ -279,77 +273,27 @@ system_rows_nextsampletuple(SampleScanState *node, OffsetNumber maxoffset) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - TableScanDesc scan = node->ss.ss_currentScanDesc; - HeapScanDesc hscan = (HeapScanDesc) scan; OffsetNumber tupoffset = sampler->lt; /* Quit if we've returned all needed tuples */ - if (sampler->donetuples >= sampler->ntuples) + if (node->donetuples >= sampler->ntuples) return InvalidOffsetNumber; - /* - * Because we should only count visible tuples as being returned, we need - * to search for a visible tuple rather than just let the core code do it. - */ - - /* We rely on the data accumulated in pagemode access */ - Assert(scan->rs_pageatatime); - for (;;) - { - /* Advance to next possible offset on page */ - if (tupoffset == InvalidOffsetNumber) - tupoffset = FirstOffsetNumber; - else - tupoffset++; - - /* Done? */ - if (tupoffset > maxoffset) - { - tupoffset = InvalidOffsetNumber; - break; - } + /* Advance to next possible offset on page */ + if (tupoffset == InvalidOffsetNumber) + tupoffset = FirstOffsetNumber; + else + tupoffset++; - /* Found a candidate? */ - if (SampleOffsetVisible(tupoffset, hscan)) - { - sampler->donetuples++; - break; - } - } + /* Done? */ + if (tupoffset > maxoffset) + tupoffset = InvalidOffsetNumber; sampler->lt = tupoffset; return tupoffset; } -/* - * Check if tuple offset is visible - * - * In pageatatime mode, heapgetpage() already did visibility checks, - * so just look at the info it left in rs_vistuples[]. - */ -static bool -SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan) -{ - int start = 0, - end = scan->rs_ntuples - 1; - - while (start <= end) - { - int mid = (start + end) / 2; - OffsetNumber curoffset = scan->rs_vistuples[mid]; - - if (tupoffset == curoffset) - return true; - else if (tupoffset < curoffset) - end = mid - 1; - else - start = mid + 1; - } - - return false; -} - /* * Compute greatest common divisor of two uint32's. */ diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c index 1cc7264e08..ab9f685f0a 100644 --- a/contrib/tsm_system_time/tsm_system_time.c +++ b/contrib/tsm_system_time/tsm_system_time.c @@ -26,7 +26,6 @@ #include -#include "access/heapam.h" #include "access/relscan.h" #include "access/tsmapi.h" #include "catalog/pg_type.h" @@ -66,7 +65,7 @@ static void system_time_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_time_nextsampleblock(SampleScanState *node); +static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_time_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); @@ -213,11 +212,9 @@ system_time_beginsamplescan(SampleScanState *node, * Uses linear probing algorithm for picking next block. */ static BlockNumber -system_time_nextsampleblock(SampleScanState *node) +system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state; - TableScanDesc scan = node->ss.ss_currentScanDesc; - HeapScanDesc hscan = (HeapScanDesc) scan; instr_time cur_time; /* First call within scan? */ @@ -230,14 +227,14 @@ system_time_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (hscan->rs_nblocks == 0) + if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = hscan->rs_nblocks; + sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -273,7 +270,7 @@ system_time_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= hscan->rs_nblocks); + } while (sampler->lb >= nblocks); return sampler->lb; } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index b3d453b957..2118532db2 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -27,6 +27,7 @@ #include "access/heapam.h" #include "access/multixact.h" #include "access/tableam.h" +#include "access/tsmapi.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" @@ -35,6 +36,7 @@ #include "catalog/storage_xlog.h" #include "executor/executor.h" #include "optimizer/plancat.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" #include "storage/bufmgr.h" @@ -1794,6 +1796,218 @@ heapam_index_validate_scan(Relation heapRelation, indexInfo->ii_PredicateState = NULL; } +/* + * Check visibility of the tuple. + */ +static bool +SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer, + HeapTuple tuple, + OffsetNumber tupoffset) +{ + if (scan->rs_scan.rs_pageatatime) + { + /* + * In pageatatime mode, heapgetpage() already did visibility checks, + * so just look at the info it left in rs_vistuples[]. + * + * We use a binary search over the known-sorted array. Note: we could + * save some effort if we insisted that NextSampleTuple select tuples + * in increasing order, but it's not clear that there would be enough + * gain to justify the restriction. + */ + int start = 0, + end = scan->rs_ntuples - 1; + + while (start <= end) + { + int mid = (start + end) / 2; + OffsetNumber curoffset = scan->rs_vistuples[mid]; + + if (tupoffset == curoffset) + return true; + else if (tupoffset < curoffset) + end = mid - 1; + else + start = mid + 1; + } + + return false; + } + else + { + /* Otherwise, we have to check the tuple individually. */ + return HeapTupleSatisfiesVisibility(tuple, scan->rs_scan.rs_snapshot, + buffer); + } +} + +static bool +heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + TsmRoutine *tsm = scanstate->tsmroutine; + BlockNumber blockno; + + /* return false immediately if relation is empty */ + if (scan->rs_nblocks == 0) + return false; + + if (tsm->NextSampleBlock) + { + blockno = tsm->NextSampleBlock(scanstate, scan->rs_nblocks); + scan->rs_cblock = blockno; + } + else + { + /* scanning table sequentially */ + + if (scan->rs_cblock == InvalidBlockNumber) + { + Assert(!scan->rs_inited); + blockno = scan->rs_startblock; + } + else + { + Assert(scan->rs_inited); + + blockno = scan->rs_cblock + 1; + + if (blockno >= scan->rs_nblocks) + { + /* wrap to begining of rel, might not have started at 0 */ + blockno = 0; + } + + /* + * Report our new scan position for synchronization purposes. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, blockno); + + if (blockno == scan->rs_startblock) + { + blockno = InvalidBlockNumber; + } + } + } + + if (!BlockNumberIsValid(blockno)) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + scan->rs_inited = false; + + return false; + } + + heapgetpage(sscan, blockno); + scan->rs_inited = true; + + return true; +} + +static bool +heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + TsmRoutine *tsm = scanstate->tsmroutine; + BlockNumber blockno = scan->rs_cblock; + bool pagemode = scan->rs_scan.rs_pageatatime; + + Page page; + bool all_visible; + OffsetNumber maxoffset; + + ExecClearTuple(slot); + + /* + * When not using pagemode, we must lock the buffer during tuple + * visibility checks. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + all_visible = PageIsAllVisible(page) && !scan->rs_scan.rs_snapshot->takenDuringRecovery; + maxoffset = PageGetMaxOffsetNumber(page); + + for (;;) + { + OffsetNumber tupoffset; + + CHECK_FOR_INTERRUPTS(); + + /* Ask the tablesample method which tuples to check on this page. */ + tupoffset = tsm->NextSampleTuple(scanstate, + blockno, + maxoffset); + + if (OffsetNumberIsValid(tupoffset)) + { + ItemId itemid; + bool visible; + HeapTuple tuple = &(scan->rs_ctup); + + /* Skip invalid tuple pointers. */ + itemid = PageGetItemId(page, tupoffset); + if (!ItemIdIsNormal(itemid)) + continue; + + tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple->t_len = ItemIdGetLength(itemid); + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + + + if (all_visible) + visible = true; + else + visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset); + + /* in pagemode, heapgetpage did this for us */ + if (!pagemode) + CheckForSerializableConflictOut(visible, scan->rs_scan.rs_rd, tuple, + scan->rs_cbuf, scan->rs_scan.rs_snapshot); + + /* Try next tuple from same page. */ + if (!visible) + continue; + + /* Found visible tuple, return it. */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf); + + /* Count successfully-fetched tuples as heap fetches */ + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); + + return true; + } + else + { + /* + * If we get here, it means we've exhausted the items on this page + * and it's time to move to the next. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + break; + } + } + + return false; +} + /* * Reconstruct and rewrite the given tuple * @@ -1983,6 +2197,9 @@ static const TableAmRoutine heapam_methods = { .index_validate_scan = heapam_index_validate_scan, .relation_estimate_size = heapam_estimate_rel_size, + + .scan_sample_next_block = heapam_scan_sample_next_block, + .scan_sample_next_tuple = heapam_scan_sample_next_tuple }; diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index fe62a73341..476411caf1 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -27,7 +27,6 @@ #include #include "access/hash.h" -#include "access/heapam.h" #include "access/relscan.h" #include "access/tsmapi.h" #include "catalog/pg_type.h" @@ -56,7 +55,7 @@ static void system_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_nextsampleblock(SampleScanState *node); +static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); @@ -177,11 +176,9 @@ system_beginsamplescan(SampleScanState *node, * Select next block to sample. */ static BlockNumber -system_nextsampleblock(SampleScanState *node) +system_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; - TableScanDesc scan = node->ss.ss_currentScanDesc; - HeapScanDesc hscan = (HeapScanDesc) scan; BlockNumber nextblock = sampler->nextblock; uint32 hashinput[2]; @@ -200,7 +197,7 @@ system_nextsampleblock(SampleScanState *node) * Loop over block numbers until finding suitable block or reaching end of * relation. */ - for (; nextblock < hscan->rs_nblocks; nextblock++) + for (; nextblock < nblocks; nextblock++) { uint32 hash; @@ -212,7 +209,7 @@ system_nextsampleblock(SampleScanState *node) break; } - if (nextblock < hscan->rs_nblocks) + if (nextblock < nblocks) { /* Found a suitable block; remember where we should start next time */ sampler->nextblock = nextblock + 1; diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index b311eb72ff..ce0d3bfa57 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -15,7 +15,6 @@ #include "postgres.h" #include "access/hash.h" -#include "access/heapam.h" #include "access/relscan.h" #include "access/tableam.h" #include "access/tsmapi.h" @@ -29,9 +28,7 @@ static TupleTableSlot *SampleNext(SampleScanState *node); static void tablesample_init(SampleScanState *scanstate); -static HeapTuple tablesample_getnext(SampleScanState *scanstate); -static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, - HeapScanDesc scan); +static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate); /* ---------------------------------------------------------------- * Scan Support @@ -47,10 +44,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, static TupleTableSlot * SampleNext(SampleScanState *node) { - HeapTuple tuple; - TupleTableSlot *slot; - HeapScanDesc hscan; - /* * if this is first call within a scan, initialize */ @@ -60,19 +53,7 @@ SampleNext(SampleScanState *node) /* * get the next tuple, and store it in our result slot */ - tuple = tablesample_getnext(node); - - slot = node->ss.ss_ScanTupleSlot; - hscan = (HeapScanDesc) node->ss.ss_currentScanDesc; - - if (tuple) - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - hscan->rs_cbuf); /* tuple's buffer */ - else - ExecClearTuple(slot); - - return slot; + return tablesample_getnext(node); } /* @@ -237,6 +218,9 @@ ExecReScanSampleScan(SampleScanState *node) { /* Remember we need to do BeginSampleScan again (if we did it at all) */ node->begun = false; + node->done = false; + node->haveblock = false; + node->donetuples = 0; ExecScanReScan(&node->ss); } @@ -258,6 +242,7 @@ tablesample_init(SampleScanState *scanstate) int i; ListCell *arg; + scanstate->donetuples = 0; params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum)); i = 0; @@ -345,225 +330,49 @@ tablesample_init(SampleScanState *scanstate) /* * Get next tuple from TABLESAMPLE method. - * - * Note: an awful lot of this is copied-and-pasted from heapam.c. It would - * perhaps be better to refactor to share more code. */ -static HeapTuple +static TupleTableSlot * tablesample_getnext(SampleScanState *scanstate) { - TsmRoutine *tsm = scanstate->tsmroutine; TableScanDesc scan = scanstate->ss.ss_currentScanDesc; - HeapScanDesc hscan = (HeapScanDesc) scan; - HeapTuple tuple = &(hscan->rs_ctup); - Snapshot snapshot = scan->rs_snapshot; - bool pagemode = scan->rs_pageatatime; - BlockNumber blockno; - Page page; - bool all_visible; - OffsetNumber maxoffset; - - if (!hscan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (hscan->rs_nblocks == 0) - { - Assert(!BufferIsValid(hscan->rs_cbuf)); - tuple->t_data = NULL; - return NULL; - } - if (tsm->NextSampleBlock) - { - blockno = tsm->NextSampleBlock(scanstate); - if (!BlockNumberIsValid(blockno)) - { - tuple->t_data = NULL; - return NULL; - } - } - else - blockno = hscan->rs_startblock; - Assert(blockno < hscan->rs_nblocks); - heapgetpage(scan, blockno); - hscan->rs_inited = true; - } - else - { - /* continue from previously returned page/tuple */ - blockno = hscan->rs_cblock; /* current page */ - } + TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot; - /* - * When not using pagemode, we must lock the buffer during tuple - * visibility checks. - */ - if (!pagemode) - LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); + ExecClearTuple(slot); - page = (Page) BufferGetPage(hscan->rs_cbuf); - all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; - maxoffset = PageGetMaxOffsetNumber(page); + if (scanstate->done) + return NULL; for (;;) { - OffsetNumber tupoffset; - bool finished; - - CHECK_FOR_INTERRUPTS(); - - /* Ask the tablesample method which tuples to check on this page. */ - tupoffset = tsm->NextSampleTuple(scanstate, - blockno, - maxoffset); - - if (OffsetNumberIsValid(tupoffset)) + if (!scanstate->haveblock) { - ItemId itemid; - bool visible; - - /* Skip invalid tuple pointers. */ - itemid = PageGetItemId(page, tupoffset); - if (!ItemIdIsNormal(itemid)) - continue; - - tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple->t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple->t_self), blockno, tupoffset); - - if (all_visible) - visible = true; - else - visible = SampleTupleVisible(tuple, tupoffset, hscan); - - /* in pagemode, heapgetpage did this for us */ - if (!pagemode) - CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, - hscan->rs_cbuf, snapshot); - - if (visible) - { - /* Found visible tuple, return it. */ - if (!pagemode) - LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); - break; - } - else + if (!table_scan_sample_next_block(scan, scanstate)) { - /* Try next tuple from same page. */ - continue; - } - } + scanstate->haveblock = false; + scanstate->done = true; - /* - * if we get here, it means we've exhausted the items on this page and - * it's time to move to the next. - */ - if (!pagemode) - LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); + /* exhausted relation */ + return NULL; + } - if (tsm->NextSampleBlock) - { - blockno = tsm->NextSampleBlock(scanstate); - Assert(!scan->rs_syncscan); - finished = !BlockNumberIsValid(blockno); + scanstate->haveblock = true; } - else - { - /* Without NextSampleBlock, just do a plain forward seqscan. */ - blockno++; - if (blockno >= hscan->rs_nblocks) - blockno = 0; + if (!table_scan_sample_next_tuple(scan, scanstate, slot)) + { /* - * Report our new scan position for synchronization purposes. - * - * Note: we do this before checking for end of scan so that the - * final state of the position hint is back at the start of the - * rel. That's not strictly necessary, but otherwise when you run - * the same query multiple times the starting position would shift - * a little bit backwards on every invocation, which is confusing. - * We don't guarantee any specific ordering in general, though. + * If we get here, it means we've exhausted the items on this page + * and it's time to move to the next. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, blockno); - - finished = (blockno == hscan->rs_startblock); + scanstate->haveblock = false; + continue; } - /* - * Reached end of scan? - */ - if (finished) - { - if (BufferIsValid(hscan->rs_cbuf)) - ReleaseBuffer(hscan->rs_cbuf); - hscan->rs_cbuf = InvalidBuffer; - hscan->rs_cblock = InvalidBlockNumber; - tuple->t_data = NULL; - hscan->rs_inited = false; - return NULL; - } - - Assert(blockno < hscan->rs_nblocks); - heapgetpage(scan, blockno); - - /* Re-establish state for new page */ - if (!pagemode) - LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - - page = (Page) BufferGetPage(hscan->rs_cbuf); - all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; - maxoffset = PageGetMaxOffsetNumber(page); + /* Found visible tuple, return it. */ + break; } - /* Count successfully-fetched tuples as heap fetches */ - pgstat_count_heap_getnext(scan->rs_rd); - - return &(hscan->rs_ctup); -} + scanstate->donetuples++; -/* - * Check visibility of the tuple. - */ -static bool -SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) -{ - if (scan->rs_scan.rs_pageatatime) - { - /* - * In pageatatime mode, heapgetpage() already did visibility checks, - * so just look at the info it left in rs_vistuples[]. - * - * We use a binary search over the known-sorted array. Note: we could - * save some effort if we insisted that NextSampleTuple select tuples - * in increasing order, but it's not clear that there would be enough - * gain to justify the restriction. - */ - int start = 0, - end = scan->rs_ntuples - 1; - - while (start <= end) - { - int mid = (start + end) / 2; - OffsetNumber curoffset = scan->rs_vistuples[mid]; - - if (tupoffset == curoffset) - return true; - else if (tupoffset < curoffset) - end = mid - 1; - else - start = mid + 1; - } - - return false; - } - else - { - /* Otherwise, we have to check the tuple individually. */ - return HeapTupleSatisfiesVisibility(tuple, - scan->rs_scan.rs_snapshot, - scan->rs_cbuf); - } + return slot; } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 4a1d83a87c..e8ab0839df 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -29,6 +29,7 @@ extern bool synchronize_seqscans; struct VacuumParams; struct ValidateIndexState; +struct SampleScanState; struct BulkInsertStateData; /* @@ -239,6 +240,18 @@ typedef struct TableAmRoutine void (*relation_estimate_size) (Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac); + + + /* ------------------------------------------------------------------------ + * Executor related functions. + * ------------------------------------------------------------------------ + */ + + bool (*scan_sample_next_block) (TableScanDesc scan, + struct SampleScanState *scanstate); + bool (*scan_sample_next_tuple) (TableScanDesc scan, + struct SampleScanState *scanstate, + TupleTableSlot *slot); } TableAmRoutine; @@ -710,6 +723,24 @@ table_estimate_size(Relation rel, int32 *attr_widths, } +/* ---------------------------------------------------------------------------- + * Executor related functionality + * ---------------------------------------------------------------------------- + */ + +static inline bool +table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate) +{ + return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate); +} + +static inline bool +table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot) +{ + return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate, slot); +} + + /* ---------------------------------------------------------------------------- * Functions to make modifications a bit simpler. * ---------------------------------------------------------------------------- diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h index a5c0b4cafe..ccef65aedb 100644 --- a/src/include/access/tsmapi.h +++ b/src/include/access/tsmapi.h @@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node, int nparams, uint32 seed); -typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node); +typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks); typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node, BlockNumber blockno, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index fb61a339ac..5ad1bdfd9b 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1298,6 +1298,9 @@ typedef struct SampleScanState bool use_pagemode; /* use page-at-a-time visibility checking? */ bool begun; /* false means need to call BeginSampleScan */ uint32 seed; /* random seed */ + int64 donetuples; /* number of tuples already returned */ + bool haveblock; /* has a block for sampling been determined */ + bool done; /* exhausted all tuples? */ } SampleScanState; /* @@ -1526,6 +1529,7 @@ typedef struct BitmapHeapScanState Buffer pvmbuffer; long exact_pages; long lossy_pages; + int return_empty_tuples; TBMIterator *prefetch_iterator; int prefetch_pages; int prefetch_target; diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 2645085b34..6a7f3054a4 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator; typedef struct TBMSharedIterator TBMSharedIterator; /* Result structure for tbm_iterate */ -typedef struct +typedef struct TBMIterateResult { BlockNumber blockno; /* page number containing tuples */ int ntuples; /* -1 indicates lossy result */ -- 2.39.5