From: Andres Freund Date: Tue, 11 Dec 2018 01:36:11 +0000 (-0800) Subject: Pluggable Storage. X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=ade97164b96c516487e00f75e8de8d0f8900d09a;p=users%2Fandresfreund%2Fpostgres.git Pluggable Storage. Author: Haribabu Komi, Andres Freund and others. Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 14ed31753f..aa2ac1b852 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -25,6 +25,7 @@ #include "access/htup_details.h" #include "access/nbtree.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/index.h" @@ -35,6 +36,7 @@ #include "storage/lmgr.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "utils/tqual.h" PG_MODULE_MAGIC; @@ -478,7 +480,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, if (state->heapallindexed) { IndexInfo *indexinfo = BuildIndexInfo(state->rel); - HeapScanDesc scan; + TableScanDesc scan; /* Report on extra downlink checks performed in readonly case */ if (state->readonly) @@ -497,7 +499,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, * * Note that IndexBuildHeapScan() calls heap_endscan() for us. */ - scan = heap_beginscan_strat(state->heaprel, /* relation */ + scan = table_beginscan_strat(state->heaprel, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ NULL, /* scan key */ @@ -531,8 +533,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly, RelationGetRelationName(state->rel), RelationGetRelationName(state->heaprel)); - IndexBuildHeapScan(state->heaprel, state->rel, indexinfo, true, - bt_tuple_present_callback, (void *) state, scan); + table_index_build_scan(state->heaprel, state->rel, indexinfo, true, + bt_tuple_present_callback, (void *) state, scan); ereport(DEBUG1, (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set", diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c index 9f223d3b2a..413828818b 100644 --- a/contrib/bloom/blinsert.c +++ b/contrib/bloom/blinsert.c @@ -14,6 +14,7 @@ #include "access/genam.h" #include "access/generic_xlog.h" +#include "access/tableam.h" #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -69,7 +70,7 @@ initCachedPage(BloomBuildState *buildstate) } /* - * Per-tuple callback from IndexBuildHeapScan. + * Per-tuple callback from table_index_build_scan. */ static void bloomBuildCallback(Relation index, HeapTuple htup, Datum *values, @@ -141,7 +142,7 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo) initCachedPage(&buildstate); /* Do the heap scan */ - reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, + reltuples = table_index_build_scan(heap, index, indexInfo, true, bloomBuildCallback, (void *) &buildstate, NULL); diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index c1aae9d655..ce9ca704f6 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -13,6 +13,7 @@ #include "access/htup_details.h" #include "access/visibilitymap.h" #include "catalog/pg_type.h" +#include "catalog/pg_am_d.h" #include "catalog/storage_xlog.h" #include "funcapi.h" #include "miscadmin.h" @@ -565,6 +566,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Only some relkinds have a visibility map */ check_relation_relkind(rel); + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + + nblocks = RelationGetNumberOfBlocks(rel); /* diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index 94e051d642..852adba359 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -26,8 +26,10 @@ #include "access/multixact.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/pg_am_d.h" #include "catalog/pg_authid.h" #include "funcapi.h" #include "miscadmin.h" @@ -55,7 +57,7 @@ PG_FUNCTION_INFO_V1(pgrowlocks); typedef struct { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; int ncolumns; } MyData; @@ -70,7 +72,8 @@ Datum pgrowlocks(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; HeapTuple tuple; TupleDesc tupdesc; AttInMetadata *attinmeta; @@ -99,6 +102,10 @@ pgrowlocks(PG_FUNCTION_ARGS) relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -124,7 +131,8 @@ pgrowlocks(PG_FUNCTION_ARGS) aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind), RelationGetRelationName(rel)); - scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL); + scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + hscan = (HeapScanDesc) scan; mydata = palloc(sizeof(*mydata)); mydata->rel = rel; mydata->scan = scan; @@ -138,20 +146,20 @@ pgrowlocks(PG_FUNCTION_ARGS) attinmeta = funcctx->attinmeta; mydata = (MyData *) funcctx->user_fctx; scan = mydata->scan; + hscan = (HeapScanDesc) scan; - /* scan the relation */ - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + /* scan the relation (will error if not heap) */ + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { HTSU_Result htsu; TransactionId xmax; uint16 infomask; /* must hold a buffer lock to call HeapTupleSatisfiesUpdate */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - htsu = HeapTupleSatisfiesUpdate(tuple, - GetCurrentCommandId(false), - scan->rs_cbuf); + htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), + hscan->rs_cbuf); xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; @@ -284,7 +292,7 @@ pgrowlocks(PG_FUNCTION_ARGS) BackendXidGetPid(xmax)); } - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* build a tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); @@ -301,11 +309,11 @@ pgrowlocks(PG_FUNCTION_ARGS) } else { - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); } } - heap_endscan(scan); + table_endscan(scan); heap_close(mydata->rel, AccessShareLock); SRF_RETURN_DONE(funcctx); diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index ef33cacec6..c59fd10dc1 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -12,12 +12,14 @@ */ #include "postgres.h" -#include "access/visibilitymap.h" #include "access/transam.h" +#include "access/visibilitymap.h" #include "access/xact.h" #include "access/multixact.h" #include "access/htup_details.h" #include "catalog/namespace.h" +#include "catalog/pg_am_d.h" +#include "commands/vacuum.h" #include "funcapi.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -26,7 +28,7 @@ #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/tqual.h" -#include "commands/vacuum.h" + PG_FUNCTION_INFO_V1(pgstattuple_approx); PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5); @@ -287,6 +289,10 @@ pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo) errmsg("\"%s\" is not a table or materialized view", RelationGetRelationName(rel)))); + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + statapprox_heap(rel, &stat); relation_close(rel, AccessShareLock); diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 6d67bd8271..520438d779 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -28,6 +28,7 @@ #include "access/hash.h" #include "access/nbtree.h" #include "access/relscan.h" +#include "access/tableam.h" #include "catalog/namespace.h" #include "catalog/pg_am.h" #include "funcapi.h" @@ -317,7 +318,8 @@ pgstat_relation(Relation rel, FunctionCallInfo fcinfo) static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo) { - HeapScanDesc scan; + TableScanDesc scan; + HeapScanDesc hscan; HeapTuple tuple; BlockNumber nblocks; BlockNumber block = 0; /* next block to count free space in */ @@ -327,20 +329,22 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) SnapshotData SnapshotDirty; /* Disable syncscan because we assume we scan from block zero upwards */ - scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + scan = table_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + hscan = (HeapScanDesc) scan; + InitDirtySnapshot(SnapshotDirty); nblocks = scan->rs_nblocks; /* # blocks to be scanned */ - /* scan the relation */ - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + /* scan the relation (will error if not heap) */ + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf)) + if (HeapTupleSatisfies(tuple, &SnapshotDirty, hscan->rs_cbuf)) { stat.tuple_len += tuple->t_len; stat.tuple_count++; @@ -351,7 +355,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) stat.dead_tuple_count++; } - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* * To avoid physically reading the table twice, try to do the @@ -366,7 +370,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, - RBM_NORMAL, scan->rs_strategy); + RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); @@ -379,14 +383,14 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, - RBM_NORMAL, scan->rs_strategy); + RBM_NORMAL, hscan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } - heap_endscan(scan); + table_endscan(scan); relation_close(rel, AccessShareLock); stat.table_len = (uint64) nblocks * BLCKSZ; diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 674eb982d0..cc5b928950 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3927,7 +3927,7 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, /* * Use the trigger tuple slot as a place to store the result tuple. */ - resultSlot = estate->es_trig_tuple_slot; + resultSlot = ExecTriggerGetReturnSlot(estate, dmstate->resultRel); if (resultSlot->tts_tupleDescriptor != resultTupType) ExecSetSlotDescriptor(resultSlot, resultTupType); diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c index 83f841f0c2..9f26c76ba6 100644 --- a/contrib/tsm_system_rows/tsm_system_rows.c +++ b/contrib/tsm_system_rows/tsm_system_rows.c @@ -46,7 +46,6 @@ typedef struct { uint32 seed; /* random seed */ int64 ntuples; /* number of tuples to return */ - int64 donetuples; /* number of tuples already returned */ OffsetNumber lt; /* last tuple returned from current block */ BlockNumber doneblocks; /* number of already-scanned blocks */ BlockNumber lb; /* last block visited */ @@ -67,11 +66,10 @@ static void system_rows_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_rows_nextsampleblock(SampleScanState *node); +static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_rows_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); -static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan); static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate); @@ -187,7 +185,6 @@ system_rows_beginsamplescan(SampleScanState *node, sampler->seed = seed; sampler->ntuples = ntuples; - sampler->donetuples = 0; sampler->lt = InvalidOffsetNumber; sampler->doneblocks = 0; /* lb will be initialized during first NextSampleBlock call */ @@ -206,10 +203,9 @@ system_rows_beginsamplescan(SampleScanState *node, * Uses linear probing algorithm for picking next block. */ static BlockNumber -system_rows_nextsampleblock(SampleScanState *node) +system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; /* First call within scan? */ if (sampler->doneblocks == 0) @@ -221,14 +217,14 @@ system_rows_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (scan->rs_nblocks == 0) + if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = scan->rs_nblocks; + sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -245,7 +241,7 @@ system_rows_nextsampleblock(SampleScanState *node) /* If we've read all blocks or returned all needed tuples, we're done */ if (++sampler->doneblocks > sampler->nblocks || - sampler->donetuples >= sampler->ntuples) + node->donetuples >= sampler->ntuples) return InvalidBlockNumber; /* @@ -258,7 +254,7 @@ system_rows_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= scan->rs_nblocks); + } while (sampler->lb >= nblocks); return sampler->lb; } @@ -278,76 +274,27 @@ system_rows_nextsampletuple(SampleScanState *node, OffsetNumber maxoffset) { SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; OffsetNumber tupoffset = sampler->lt; /* Quit if we've returned all needed tuples */ - if (sampler->donetuples >= sampler->ntuples) + if (node->donetuples >= sampler->ntuples) return InvalidOffsetNumber; - /* - * Because we should only count visible tuples as being returned, we need - * to search for a visible tuple rather than just let the core code do it. - */ - - /* We rely on the data accumulated in pagemode access */ - Assert(scan->rs_pageatatime); - for (;;) - { - /* Advance to next possible offset on page */ - if (tupoffset == InvalidOffsetNumber) - tupoffset = FirstOffsetNumber; - else - tupoffset++; - - /* Done? */ - if (tupoffset > maxoffset) - { - tupoffset = InvalidOffsetNumber; - break; - } + /* Advance to next possible offset on page */ + if (tupoffset == InvalidOffsetNumber) + tupoffset = FirstOffsetNumber; + else + tupoffset++; - /* Found a candidate? */ - if (SampleOffsetVisible(tupoffset, scan)) - { - sampler->donetuples++; - break; - } - } + /* Done? */ + if (tupoffset > maxoffset) + tupoffset = InvalidOffsetNumber; sampler->lt = tupoffset; return tupoffset; } -/* - * Check if tuple offset is visible - * - * In pageatatime mode, heapgetpage() already did visibility checks, - * so just look at the info it left in rs_vistuples[]. - */ -static bool -SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan) -{ - int start = 0, - end = scan->rs_ntuples - 1; - - while (start <= end) - { - int mid = (start + end) / 2; - OffsetNumber curoffset = scan->rs_vistuples[mid]; - - if (tupoffset == curoffset) - return true; - else if (tupoffset < curoffset) - end = mid - 1; - else - start = mid + 1; - } - - return false; -} - /* * Compute greatest common divisor of two uint32's. */ diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c index 249d6f4d46..ee6f4b95a2 100644 --- a/contrib/tsm_system_time/tsm_system_time.c +++ b/contrib/tsm_system_time/tsm_system_time.c @@ -66,7 +66,7 @@ static void system_time_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_time_nextsampleblock(SampleScanState *node); +static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_time_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); @@ -213,10 +213,9 @@ system_time_beginsamplescan(SampleScanState *node, * Uses linear probing algorithm for picking next block. */ static BlockNumber -system_time_nextsampleblock(SampleScanState *node) +system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; instr_time cur_time; /* First call within scan? */ @@ -229,14 +228,14 @@ system_time_nextsampleblock(SampleScanState *node) SamplerRandomState randstate; /* If relation is empty, there's nothing to scan */ - if (scan->rs_nblocks == 0) + if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ sampler_random_init_state(sampler->seed, randstate); /* Compute nblocks/firstblock/step only once per query */ - sampler->nblocks = scan->rs_nblocks; + sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ @@ -272,7 +271,7 @@ system_time_nextsampleblock(SampleScanState *node) { /* Advance lb, using uint64 arithmetic to forestall overflow */ sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks; - } while (sampler->lb >= scan->rs_nblocks); + } while (sampler->lb >= nblocks); return sampler->lb; } diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 4ce88dd77c..12769f3288 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -988,23 +988,25 @@ GetForeignRowMarkType(RangeTblEntry *rte, -HeapTuple +TupleTableSlot * RefetchForeignRow(EState *estate, ExecRowMark *erm, Datum rowid, + TupleTableSlot *slot, bool *updated); - Re-fetch one tuple from the foreign table, after locking it if required. + Re-fetch one tuple slot from the foreign table, after locking it if required. estate is global execution state for the query. erm is the ExecRowMark struct describing the target foreign table and the row lock type (if any) to acquire. rowid identifies the tuple to be fetched. - updated is an output parameter. + slot contains nothing useful upon call, but can be used to + hold the returned tuple. updated is an output parameter. - This function should return a palloc'ed copy of the fetched tuple, + This function should return a slot containing the fetched tuple or NULL if the row lock couldn't be obtained. The row lock type to acquire is defined by erm->markType, which is the value previously returned by GetForeignRowMarkType. diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile index bd93a6a8d1..0880e0a8bb 100644 --- a/src/backend/access/Makefile +++ b/src/backend/access/Makefile @@ -9,6 +9,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \ - tablesample transam + table tablesample transam include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index e95fbbcea7..b70737a7a6 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -21,6 +21,7 @@ #include "access/brin_xlog.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xloginsert.h" #include "catalog/index.h" #include "catalog/pg_am.h" @@ -585,7 +586,7 @@ brinendscan(IndexScanDesc scan) } /* - * Per-heap-tuple callback for IndexBuildHeapScan. + * Per-heap-tuple callback for table_index_build_scan. * * Note we don't worry about the page range at the end of the table here; it is * present in the build state struct after we're called the last time, but not @@ -716,8 +717,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Now scan the relation. No syncscan allowed here because we want the * heap blocks in physical order. */ - reltuples = IndexBuildHeapScan(heap, index, indexInfo, false, - brinbuildCallback, (void *) state, NULL); + reltuples = table_index_build_scan(heap, index, indexInfo, false, + brinbuildCallback, (void *) state, NULL); /* process the final batch */ form_and_insert_tuple(state); @@ -1228,13 +1229,16 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, * short of brinbuildCallback creating the new index entry. * * Note that it is critical we use the "any visible" mode of - * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted - * by transactions that are still in progress, among other corner cases. + * table_index_build_range_scan here: otherwise, we would miss tuples + * inserted by transactions that are still in progress, among other corner + * cases. + * + * ZBORKED? */ state->bs_currRangeStart = heapBlk; - IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, - heapBlk, scanNumBlks, - brinbuildCallback, (void *) state, NULL); + table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, + heapBlk, scanNumBlks, + brinbuildCallback, (void *) state, NULL); /* * Now we update the values obtained by the scan with the placeholder diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 5281eb6823..621bd93ccd 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -17,6 +17,7 @@ #include "access/gin_private.h" #include "access/ginxlog.h" #include "access/xloginsert.h" +#include "access/tableam.h" #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -394,8 +395,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Do the heap scan. We disallow sync scan here because dataPlaceToPage * prefers to receive tuples in TID order. */ - reltuples = IndexBuildHeapScan(heap, index, indexInfo, false, - ginBuildCallback, (void *) &buildstate, NULL); + reltuples = table_index_build_scan(heap, index, indexInfo, false, + ginBuildCallback, (void *) &buildstate, NULL); /* dump remaining entries to the index */ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 434f15f014..c39ddc910c 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/gist_private.h" #include "access/gistxlog.h" +#include "access/tableam.h" #include "access/xloginsert.h" #include "catalog/index.h" #include "miscadmin.h" @@ -202,8 +203,8 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) /* * Do the heap scan. */ - reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, - gistBuildCallback, (void *) &buildstate, NULL); + reltuples = table_index_build_scan(heap, index, indexInfo, true, + gistBuildCallback, (void *) &buildstate, NULL); /* * If buffering was used, flush out all the tuples that are still in the @@ -452,7 +453,7 @@ calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep) } /* - * Per-tuple callback from IndexBuildHeapScan. + * Per-tuple callback from table_index_build_scan. */ static void gistBuildCallback(Relation index, diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index e4a3786be0..7c75461dd1 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -561,7 +561,7 @@ getNextNearest(IndexScanDesc scan) if (GISTSearchItemIsHeap(*item)) { /* found a heap item at currently minimal distance */ - scan->xs_ctup.t_self = item->data.heap.heapPtr; + scan->xs_heaptid = item->data.heap.heapPtr; scan->xs_recheck = item->data.heap.recheck; index_store_float8_orderby_distances(scan, so->orderByTypes, @@ -650,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) so->pageData[so->curPageData - 1].offnum; } /* continuing to return tuples from a leaf page */ - scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; + scan->xs_heaptid = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; /* in an index-only scan, also return the reconstructed tuple */ diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 0002df30c0..18b26d9026 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -21,6 +21,7 @@ #include "access/hash.h" #include "access/hash_xlog.h" #include "access/relscan.h" +#include "access/tableam.h" #include "catalog/index.h" #include "commands/vacuum.h" #include "miscadmin.h" @@ -159,7 +160,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.heapRel = heap; /* do the heap scan */ - reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, + reltuples = table_index_build_scan(heap, index, indexInfo, true, hashbuildCallback, (void *) &buildstate, NULL); if (buildstate.spool) @@ -190,7 +191,7 @@ hashbuildempty(Relation index) } /* - * Per-tuple callback from IndexBuildHeapScan + * Per-tuple callback from table_index_build_scan */ static void hashbuildCallback(Relation index, diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 650041db0a..edda7991ad 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -119,7 +119,7 @@ _hash_next(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; return true; } @@ -432,7 +432,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; /* if we're here, _hash_readpage found a valid tuples */ return true; diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile index 7e7324a916..aee7bfd834 100644 --- a/src/backend/access/heap/Makefile +++ b/src/backend/access/heap/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/access/heap top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o \ - vacuumlazy.o visibilitymap.o +OBJS = heapam.o heapam_handler.o heapam_visibility.o hio.o pruneheap.o \ + rewriteheap.o syncscan.o tuptoaster.o vacuumlazy.o visibilitymap.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 9650145642..f769d828ff 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -45,6 +45,7 @@ #include "access/multixact.h" #include "access/parallel.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/tuptoaster.h" @@ -79,22 +80,6 @@ #include "nodes/execnodes.h" #include "executor/executor.h" -/* GUC variable */ -bool synchronize_seqscans = true; - - -static HeapScanDesc heap_beginscan_internal(Relation relation, - Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap); -static void heap_parallelscan_startblock_init(HeapScanDesc scan); -static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, @@ -233,10 +218,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * results for a non-MVCC snapshot, the caller must hold some higher-level * lock that ensures the interesting tuple(s) won't change.) */ - if (scan->rs_parallel != NULL) - scan->rs_nblocks = scan->rs_parallel->phs_nblocks; + if (scan->rs_scan.rs_parallel != NULL) + scan->rs_scan.rs_nblocks = scan->rs_scan.rs_parallel->phs_nblocks; else - scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd); + scan->rs_scan.rs_nblocks = RelationGetNumberOfBlocks(scan->rs_scan.rs_rd); /* * If the table is large relative to NBuffers, use a bulk-read access @@ -250,11 +235,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * Note that heap_parallelscan_initialize has a very similar test; if you * change this, consider changing that one, too. */ - if (!RelationUsesLocalBuffers(scan->rs_rd) && - scan->rs_nblocks > NBuffers / 4) + if (!RelationUsesLocalBuffers(scan->rs_scan.rs_rd) && + scan->rs_scan.rs_nblocks > NBuffers / 4) { - allow_strat = scan->rs_allow_strat; - allow_sync = scan->rs_allow_sync; + allow_strat = scan->rs_scan.rs_allow_strat; + allow_sync = scan->rs_scan.rs_allow_sync; } else allow_strat = allow_sync = false; @@ -272,10 +257,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) scan->rs_strategy = NULL; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - /* For parallel scan, believe whatever ParallelHeapScanDesc says. */ - scan->rs_syncscan = scan->rs_parallel->phs_syncscan; + /* For parallel scan, believe whatever ParallelTableScanDesc says. */ + scan->rs_scan.rs_syncscan = scan->rs_scan.rs_parallel->phs_syncscan; } else if (keep_startblock) { @@ -284,20 +269,20 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * so that rewinding a cursor doesn't generate surprising results. * Reset the active syncscan setting, though. */ - scan->rs_syncscan = (allow_sync && synchronize_seqscans); + scan->rs_scan.rs_syncscan = (allow_sync && synchronize_seqscans); } else if (allow_sync && synchronize_seqscans) { - scan->rs_syncscan = true; - scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks); + scan->rs_scan.rs_syncscan = true; + scan->rs_scan.rs_startblock = ss_get_location(scan->rs_scan.rs_rd, scan->rs_scan.rs_nblocks); } else { - scan->rs_syncscan = false; - scan->rs_startblock = 0; + scan->rs_scan.rs_syncscan = false; + scan->rs_scan.rs_startblock = 0; } - scan->rs_numblocks = InvalidBlockNumber; + scan->rs_scan.rs_numblocks = InvalidBlockNumber; scan->rs_inited = false; scan->rs_ctup.t_data = NULL; ItemPointerSetInvalid(&scan->rs_ctup.t_self); @@ -310,15 +295,15 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * copy the scan key, if appropriate */ if (key != NULL) - memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); + memcpy(scan->rs_scan.rs_key, key, scan->rs_scan.rs_nkeys * sizeof(ScanKeyData)); /* * Currently, we don't have a stats counter for bitmap heap scans (but the * underlying bitmap index scans will be counted) or sample scans (we only * update stats for tuple fetches there) */ - if (!scan->rs_bitmapscan && !scan->rs_samplescan) - pgstat_count_heap_scan(scan->rs_rd); + if (!scan->rs_scan.rs_bitmapscan && !scan->rs_scan.rs_samplescan) + pgstat_count_heap_scan(scan->rs_scan.rs_rd); } /* @@ -328,16 +313,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * numBlks is number of pages to scan (InvalidBlockNumber means "all") */ void -heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks) +heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks) { + HeapScanDesc scan = (HeapScanDesc) sscan; + Assert(!scan->rs_inited); /* else too late to change */ - Assert(!scan->rs_syncscan); /* else rs_startblock is significant */ + Assert(!scan->rs_scan.rs_syncscan); /* else rs_startblock is + * significant */ /* Check startBlk is valid (but allow case of zero blocks...) */ - Assert(startBlk == 0 || startBlk < scan->rs_nblocks); + Assert(startBlk == 0 || startBlk < scan->rs_scan.rs_nblocks); - scan->rs_startblock = startBlk; - scan->rs_numblocks = numBlks; + scan->rs_scan.rs_startblock = startBlk; + scan->rs_scan.rs_numblocks = numBlks; } /* @@ -348,8 +336,9 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks) * which tuples on the page are visible. */ void -heapgetpage(HeapScanDesc scan, BlockNumber page) +heapgetpage(TableScanDesc sscan, BlockNumber page) { + HeapScanDesc scan = (HeapScanDesc) sscan; Buffer buffer; Snapshot snapshot; Page dp; @@ -359,7 +348,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) ItemId lpp; bool all_visible; - Assert(page < scan->rs_nblocks); + Assert(page < scan->rs_scan.rs_nblocks); /* release previous scan buffer, if any */ if (BufferIsValid(scan->rs_cbuf)) @@ -376,20 +365,20 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) CHECK_FOR_INTERRUPTS(); /* read page using selected strategy */ - scan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM, page, - RBM_NORMAL, scan->rs_strategy); + scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, page, + RBM_NORMAL, scan->rs_strategy); scan->rs_cblock = page; - if (!scan->rs_pageatatime) + if (!scan->rs_scan.rs_pageatatime) return; buffer = scan->rs_cbuf; - snapshot = scan->rs_snapshot; + snapshot = scan->rs_scan.rs_snapshot; /* * Prune and repair fragmentation for the whole page, if possible. */ - heap_page_prune_opt(scan->rs_rd, buffer); + heap_page_prune_opt(scan->rs_scan.rs_rd, buffer); /* * We must hold share lock on the buffer content while examining tuple @@ -399,7 +388,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) LockBuffer(buffer, BUFFER_LOCK_SHARE); dp = BufferGetPage(buffer); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); ntup = 0; @@ -434,7 +423,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) HeapTupleData loctup; bool valid; - loctup.t_tableOid = RelationGetRelid(scan->rs_rd); + loctup.t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); loctup.t_len = ItemIdGetLength(lpp); ItemPointerSet(&(loctup.t_self), page, lineoff); @@ -442,9 +431,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) if (all_visible) valid = true; else - valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); + valid = HeapTupleSatisfies(&loctup, snapshot, buffer); - CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup, buffer, snapshot); if (valid) @@ -488,7 +477,7 @@ heapgettup(HeapScanDesc scan, ScanKey key) { HeapTuple tuple = &(scan->rs_ctup); - Snapshot snapshot = scan->rs_snapshot; + Snapshot snapshot = scan->rs_scan.rs_snapshot; bool backward = ScanDirectionIsBackward(dir); BlockNumber page; bool finished; @@ -508,17 +497,17 @@ heapgettup(HeapScanDesc scan, /* * return null immediately if relation is empty */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0) { Assert(!BufferIsValid(scan->rs_cbuf)); tuple->t_data = NULL; return; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - heap_parallelscan_startblock_init(scan); + table_parallelscan_startblock_init(&scan->rs_scan); - page = heap_parallelscan_nextpage(scan); + page = table_parallelscan_nextpage(&scan->rs_scan); /* Other processes might have already finished the scan. */ if (page == InvalidBlockNumber) @@ -529,8 +518,8 @@ heapgettup(HeapScanDesc scan, } } else - page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); + page = scan->rs_scan.rs_startblock; /* first page */ + heapgetpage((TableScanDesc) scan, page); lineoff = FirstOffsetNumber; /* first offnum */ scan->rs_inited = true; } @@ -545,7 +534,7 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); /* page and lineoff now reference the physically next tid */ @@ -554,14 +543,14 @@ heapgettup(HeapScanDesc scan, else if (backward) { /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); + Assert(scan->rs_scan.rs_parallel == NULL); if (!scan->rs_inited) { /* * return null immediately if relation is empty */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0) { Assert(!BufferIsValid(scan->rs_cbuf)); tuple->t_data = NULL; @@ -574,13 +563,13 @@ heapgettup(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_syncscan = false; + scan->rs_scan.rs_syncscan = false; /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; + if (scan->rs_scan.rs_startblock > 0) + page = scan->rs_scan.rs_startblock - 1; else - page = scan->rs_nblocks - 1; - heapgetpage(scan, page); + page = scan->rs_scan.rs_nblocks - 1; + heapgetpage((TableScanDesc) scan, page); } else { @@ -591,7 +580,7 @@ heapgettup(HeapScanDesc scan, LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber(dp); if (!scan->rs_inited) @@ -622,11 +611,11 @@ heapgettup(HeapScanDesc scan, page = ItemPointerGetBlockNumber(&(tuple->t_self)); if (page != scan->rs_cblock) - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); /* Since the tuple was previously fetched, needn't lock page here */ dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -657,15 +646,13 @@ heapgettup(HeapScanDesc scan, /* * if current tuple qualifies, return it. */ - valid = HeapTupleSatisfiesVisibility(tuple, - snapshot, - scan->rs_cbuf); + valid = HeapTupleSatisfies(tuple, snapshot, scan->rs_cbuf); - CheckForSerializableConflictOut(valid, scan->rs_rd, tuple, + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, tuple, scan->rs_cbuf, snapshot); if (valid && key != NULL) - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd), nkeys, key, valid); if (valid) @@ -702,24 +689,24 @@ heapgettup(HeapScanDesc scan, */ if (backward) { - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + finished = (page == scan->rs_scan.rs_startblock) || + (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false); if (page == 0) - page = scan->rs_nblocks; + page = scan->rs_scan.rs_nblocks; page--; } - else if (scan->rs_parallel != NULL) + else if (scan->rs_scan.rs_parallel != NULL) { - page = heap_parallelscan_nextpage(scan); + page = table_parallelscan_nextpage(&scan->rs_scan); finished = (page == InvalidBlockNumber); } else { page++; - if (page >= scan->rs_nblocks) + if (page >= scan->rs_scan.rs_nblocks) page = 0; - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + finished = (page == scan->rs_scan.rs_startblock) || + (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false); /* * Report our new scan position for synchronization purposes. We @@ -733,8 +720,8 @@ heapgettup(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, page); } /* @@ -751,12 +738,12 @@ heapgettup(HeapScanDesc scan, return; } - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(snapshot, scan->rs_rd, dp); + TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp); lines = PageGetMaxOffsetNumber((Page) dp); linesleft = lines; if (backward) @@ -812,17 +799,17 @@ heapgettup_pagemode(HeapScanDesc scan, /* * return null immediately if relation is empty */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0) { Assert(!BufferIsValid(scan->rs_cbuf)); tuple->t_data = NULL; return; } - if (scan->rs_parallel != NULL) + if (scan->rs_scan.rs_parallel != NULL) { - heap_parallelscan_startblock_init(scan); + table_parallelscan_startblock_init(&scan->rs_scan); - page = heap_parallelscan_nextpage(scan); + page = table_parallelscan_nextpage(&scan->rs_scan); /* Other processes might have already finished the scan. */ if (page == InvalidBlockNumber) @@ -833,8 +820,8 @@ heapgettup_pagemode(HeapScanDesc scan, } } else - page = scan->rs_startblock; /* first page */ - heapgetpage(scan, page); + page = scan->rs_scan.rs_startblock; /* first page */ + heapgetpage((TableScanDesc) scan, page); lineindex = 0; scan->rs_inited = true; } @@ -845,8 +832,9 @@ heapgettup_pagemode(HeapScanDesc scan, lineindex = scan->rs_cindex + 1; } + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; /* page and lineindex now reference the next visible tid */ @@ -855,14 +843,14 @@ heapgettup_pagemode(HeapScanDesc scan, else if (backward) { /* backward parallel scan not supported */ - Assert(scan->rs_parallel == NULL); + Assert(scan->rs_scan.rs_parallel == NULL); if (!scan->rs_inited) { /* * return null immediately if relation is empty */ - if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0) + if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0) { Assert(!BufferIsValid(scan->rs_cbuf)); tuple->t_data = NULL; @@ -875,13 +863,13 @@ heapgettup_pagemode(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_syncscan = false; + scan->rs_scan.rs_syncscan = false; /* start from last page of the scan */ - if (scan->rs_startblock > 0) - page = scan->rs_startblock - 1; + if (scan->rs_scan.rs_startblock > 0) + page = scan->rs_scan.rs_startblock - 1; else - page = scan->rs_nblocks - 1; - heapgetpage(scan, page); + page = scan->rs_scan.rs_nblocks - 1; + heapgetpage((TableScanDesc) scan, page); } else { @@ -889,8 +877,9 @@ heapgettup_pagemode(HeapScanDesc scan, page = scan->rs_cblock; /* current page */ } + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; if (!scan->rs_inited) @@ -920,11 +909,11 @@ heapgettup_pagemode(HeapScanDesc scan, page = ItemPointerGetBlockNumber(&(tuple->t_self)); if (page != scan->rs_cblock) - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); /* Since the tuple was previously fetched, needn't lock page here */ dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self)); lpp = PageGetItemId(dp, lineoff); Assert(ItemIdIsNormal(lpp)); @@ -962,17 +951,19 @@ heapgettup_pagemode(HeapScanDesc scan, { bool valid; - HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd), + HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd), nkeys, key, valid); if (valid) { scan->rs_cindex = lineindex; + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); return; } } else { scan->rs_cindex = lineindex; + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); return; } @@ -986,30 +977,36 @@ heapgettup_pagemode(HeapScanDesc scan, ++lineindex; } + /* + * if we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + /* * if we get here, it means we've exhausted the items on this page and * it's time to move to the next. */ if (backward) { - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + finished = (page == scan->rs_scan.rs_startblock) || + (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false); if (page == 0) - page = scan->rs_nblocks; + page = scan->rs_scan.rs_nblocks; page--; } - else if (scan->rs_parallel != NULL) + else if (scan->rs_scan.rs_parallel != NULL) { - page = heap_parallelscan_nextpage(scan); + page = table_parallelscan_nextpage(&scan->rs_scan); finished = (page == InvalidBlockNumber); } else { page++; - if (page >= scan->rs_nblocks) + if (page >= scan->rs_scan.rs_nblocks) page = 0; - finished = (page == scan->rs_startblock) || - (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false); + finished = (page == scan->rs_scan.rs_startblock) || + (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false); /* * Report our new scan position for synchronization purposes. We @@ -1023,8 +1020,8 @@ heapgettup_pagemode(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, page); + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, page); } /* @@ -1041,10 +1038,11 @@ heapgettup_pagemode(HeapScanDesc scan, return; } - heapgetpage(scan, page); + heapgetpage((TableScanDesc) scan, page); + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); dp = BufferGetPage(scan->rs_cbuf); - TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp); + TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp); lines = scan->rs_ntuples; linesleft = lines; if (backward) @@ -1387,87 +1385,16 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, return r; } - -/* ---------------- - * heap_beginscan - begin relation scan - * - * heap_beginscan is the "standard" case. - * - * heap_beginscan_catalog differs in setting up its own temporary snapshot. - * - * heap_beginscan_strat offers an extended API that lets the caller control - * whether a nondefault buffer access strategy can be used, and whether - * syncscan can be chosen (possibly resulting in the scan not starting from - * block zero). Both of these default to true with plain heap_beginscan. - * - * heap_beginscan_bm is an alternative entry point for setting up a - * HeapScanDesc for a bitmap heap scan. Although that scan technology is - * really quite unlike a standard seqscan, there is just enough commonality - * to make it worth using the same data structure. - * - * heap_beginscan_sampling is an alternative entry point for setting up a - * HeapScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth - * using the same data structure although the behavior is rather different. - * In addition to the options offered by heap_beginscan_strat, this call - * also allows control of whether page-mode visibility checking is used. - * ---------------- - */ -HeapScanDesc +TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, false); -} - -HeapScanDesc -heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key) -{ - Oid relid = RelationGetRelid(relation); - Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); - - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - true, true, true, false, false, true); -} - -HeapScanDesc -heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, true, - false, false, false); -} - -HeapScanDesc -heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - false, false, true, true, false, false); -} - -HeapScanDesc -heap_beginscan_sampling(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) -{ - return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL, - allow_strat, allow_sync, allow_pagemode, - false, true, false); -} - -static HeapScanDesc -heap_beginscan_internal(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - ParallelHeapScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap) + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap) { HeapScanDesc scan; @@ -1485,21 +1412,21 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, */ scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData)); - scan->rs_rd = relation; - scan->rs_snapshot = snapshot; - scan->rs_nkeys = nkeys; - scan->rs_bitmapscan = is_bitmapscan; - scan->rs_samplescan = is_samplescan; + scan->rs_scan.rs_rd = relation; + scan->rs_scan.rs_snapshot = snapshot; + scan->rs_scan.rs_nkeys = nkeys; + scan->rs_scan.rs_bitmapscan = is_bitmapscan; + scan->rs_scan.rs_samplescan = is_samplescan; scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_temp_snap = temp_snap; - scan->rs_parallel = parallel_scan; + scan->rs_scan.rs_allow_strat = allow_strat; + scan->rs_scan.rs_allow_sync = allow_sync; + scan->rs_scan.rs_temp_snap = temp_snap; + scan->rs_scan.rs_parallel = parallel_scan; /* * we can use page-at-a-time mode if it's an MVCC-safe snapshot */ - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot); + scan->rs_scan.rs_pageatatime = allow_pagemode && snapshot && IsMVCCSnapshot(snapshot); /* * For a seqscan in a serializable transaction, acquire a predicate lock @@ -1512,7 +1439,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * covering the predicate. But in that case we still have to lock any * matching heap tuples. */ - if (!is_bitmapscan) + if (!is_bitmapscan && snapshot) PredicateLockRelation(relation, snapshot); /* we only need to set this up once */ @@ -1523,13 +1450,13 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * initscan() and we don't want to allocate memory again */ if (nkeys > 0) - scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); + scan->rs_scan.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); else - scan->rs_key = NULL; + scan->rs_scan.rs_key = NULL; initscan(scan, key, false); - return scan; + return (TableScanDesc) scan; } /* ---------------- @@ -1537,9 +1464,18 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, * ---------------- */ void -heap_rescan(HeapScanDesc scan, - ScanKey key) +heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) { + HeapScanDesc scan = (HeapScanDesc) sscan; + + if (set_params) + { + scan->rs_scan.rs_allow_strat = allow_strat; + scan->rs_scan.rs_allow_sync = allow_sync; + scan->rs_scan.rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_scan.rs_snapshot); + } + /* * unpin scan buffers */ @@ -1550,27 +1486,21 @@ heap_rescan(HeapScanDesc scan, * reinitialize scan descriptor */ initscan(scan, key, true); -} -/* ---------------- - * heap_rescan_set_params - restart a relation scan after changing params - * - * This call allows changing the buffer strategy, syncscan, and pagemode - * options before starting a fresh scan. Note that although the actual use - * of syncscan might change (effectively, enabling or disabling reporting), - * the previously selected startblock will be kept. - * ---------------- - */ -void -heap_rescan_set_params(HeapScanDesc scan, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode) -{ - /* adjust parameters */ - scan->rs_allow_strat = allow_strat; - scan->rs_allow_sync = allow_sync; - scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot); - /* ... and rescan */ - heap_rescan(scan, key); + /* + * reset parallel scan, if present + */ + if (scan->rs_scan.rs_parallel != NULL) + { + ParallelTableScanDesc parallel_scan; + + /* + * Caller is responsible for making sure that all workers have + * finished the scan before calling this. + */ + parallel_scan = scan->rs_scan.rs_parallel; + pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); + } } /* ---------------- @@ -1581,8 +1511,10 @@ heap_rescan_set_params(HeapScanDesc scan, ScanKey key, * ---------------- */ void -heap_endscan(HeapScanDesc scan) +heap_endscan(TableScanDesc sscan) { + HeapScanDesc scan = (HeapScanDesc) sscan; + /* Note: no locking manipulations needed */ /* @@ -1594,215 +1526,20 @@ heap_endscan(HeapScanDesc scan) /* * decrement relation reference count and free scan descriptor storage */ - RelationDecrementReferenceCount(scan->rs_rd); + RelationDecrementReferenceCount(scan->rs_scan.rs_rd); - if (scan->rs_key) - pfree(scan->rs_key); + if (scan->rs_scan.rs_key) + pfree(scan->rs_scan.rs_key); if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); - if (scan->rs_temp_snap) - UnregisterSnapshot(scan->rs_snapshot); + if (scan->rs_scan.rs_temp_snap) + UnregisterSnapshot(scan->rs_scan.rs_snapshot); pfree(scan); } -/* ---------------- - * heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc - * - * Sadly, this doesn't reduce to a constant, because the size required - * to serialize the snapshot can vary. - * ---------------- - */ -Size -heap_parallelscan_estimate(Snapshot snapshot) -{ - return add_size(offsetof(ParallelHeapScanDescData, phs_snapshot_data), - EstimateSnapshotSpace(snapshot)); -} - -/* ---------------- - * heap_parallelscan_initialize - initialize ParallelHeapScanDesc - * - * Must allow as many bytes of shared memory as returned by - * heap_parallelscan_estimate. Call this just once in the leader - * process; then, individual workers attach via heap_beginscan_parallel. - * ---------------- - */ -void -heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation, - Snapshot snapshot) -{ - target->phs_relid = RelationGetRelid(relation); - target->phs_nblocks = RelationGetNumberOfBlocks(relation); - /* compare phs_syncscan initialization to similar logic in initscan */ - target->phs_syncscan = synchronize_seqscans && - !RelationUsesLocalBuffers(relation) && - target->phs_nblocks > NBuffers / 4; - SpinLockInit(&target->phs_mutex); - target->phs_startblock = InvalidBlockNumber; - pg_atomic_init_u64(&target->phs_nallocated, 0); - if (IsMVCCSnapshot(snapshot)) - { - SerializeSnapshot(snapshot, target->phs_snapshot_data); - target->phs_snapshot_any = false; - } - else - { - Assert(snapshot == SnapshotAny); - target->phs_snapshot_any = true; - } -} - -/* ---------------- - * heap_parallelscan_reinitialize - reset a parallel scan - * - * Call this in the leader process. Caller is responsible for - * making sure that all workers have finished the scan beforehand. - * ---------------- - */ -void -heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan) -{ - pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); -} - -/* ---------------- - * heap_beginscan_parallel - join a parallel scan - * - * Caller must hold a suitable lock on the correct relation. - * ---------------- - */ -HeapScanDesc -heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan) -{ - Snapshot snapshot; - - Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); - - if (!parallel_scan->phs_snapshot_any) - { - /* Snapshot was serialized -- restore it */ - snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data); - RegisterSnapshot(snapshot); - } - else - { - /* SnapshotAny passed by caller (not serialized) */ - snapshot = SnapshotAny; - } - - return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan, - true, true, true, false, false, - !parallel_scan->phs_snapshot_any); -} - -/* ---------------- - * heap_parallelscan_startblock_init - find and set the scan's startblock - * - * Determine where the parallel seq scan should start. This function may - * be called many times, once by each parallel worker. We must be careful - * only to set the startblock once. - * ---------------- - */ -static void -heap_parallelscan_startblock_init(HeapScanDesc scan) -{ - BlockNumber sync_startpage = InvalidBlockNumber; - ParallelHeapScanDesc parallel_scan; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - -retry: - /* Grab the spinlock. */ - SpinLockAcquire(¶llel_scan->phs_mutex); - - /* - * If the scan's startblock has not yet been initialized, we must do so - * now. If this is not a synchronized scan, we just start at block 0, but - * if it is a synchronized scan, we must get the starting position from - * the synchronized scan machinery. We can't hold the spinlock while - * doing that, though, so release the spinlock, get the information we - * need, and retry. If nobody else has initialized the scan in the - * meantime, we'll fill in the value we fetched on the second time - * through. - */ - if (parallel_scan->phs_startblock == InvalidBlockNumber) - { - if (!parallel_scan->phs_syncscan) - parallel_scan->phs_startblock = 0; - else if (sync_startpage != InvalidBlockNumber) - parallel_scan->phs_startblock = sync_startpage; - else - { - SpinLockRelease(¶llel_scan->phs_mutex); - sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks); - goto retry; - } - } - SpinLockRelease(¶llel_scan->phs_mutex); -} - -/* ---------------- - * heap_parallelscan_nextpage - get the next page to scan - * - * Get the next page to scan. Even if there are no pages left to scan, - * another backend could have grabbed a page to scan and not yet finished - * looking at it, so it doesn't follow that the scan is done when the - * first backend gets an InvalidBlockNumber return. - * ---------------- - */ -static BlockNumber -heap_parallelscan_nextpage(HeapScanDesc scan) -{ - BlockNumber page; - ParallelHeapScanDesc parallel_scan; - uint64 nallocated; - - Assert(scan->rs_parallel); - parallel_scan = scan->rs_parallel; - - /* - * phs_nallocated tracks how many pages have been allocated to workers - * already. When phs_nallocated >= rs_nblocks, all blocks have been - * allocated. - * - * Because we use an atomic fetch-and-add to fetch the current value, the - * phs_nallocated counter will exceed rs_nblocks, because workers will - * still increment the value, when they try to allocate the next block but - * all blocks have been allocated already. The counter must be 64 bits - * wide because of that, to avoid wrapping around when rs_nblocks is close - * to 2^32. - * - * The actual page to return is calculated by adding the counter to the - * starting block number, modulo nblocks. - */ - nallocated = pg_atomic_fetch_add_u64(¶llel_scan->phs_nallocated, 1); - if (nallocated >= scan->rs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ - else - page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks; - - /* - * Report scan location. Normally, we report the current page number. - * When we reach the end of the scan, though, we report the starting page, - * not the ending page, just so the starting positions for later scans - * doesn't slew backwards. We only report the position at the end of the - * scan once, though: subsequent callers will report nothing. - */ - if (scan->rs_syncscan) - { - if (page != InvalidBlockNumber) - ss_report_location(scan->rs_rd, page); - else if (nallocated == scan->rs_nblocks) - ss_report_location(scan->rs_rd, parallel_scan->phs_startblock); - } - - return page; -} - /* ---------------- * heap_update_snapshot * @@ -1810,13 +1547,15 @@ heap_parallelscan_nextpage(HeapScanDesc scan) * ---------------- */ void -heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot) +heap_update_snapshot(TableScanDesc sscan, Snapshot snapshot) { + HeapScanDesc scan = (HeapScanDesc) sscan; + Assert(IsMVCCSnapshot(snapshot)); RegisterSnapshot(snapshot); - scan->rs_snapshot = snapshot; - scan->rs_temp_snap = true; + scan->rs_scan.rs_snapshot = snapshot; + scan->rs_scan.rs_temp_snap = true; } /* ---------------- @@ -1842,19 +1581,20 @@ heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot) #define HEAPDEBUG_3 #endif /* !defined(HEAPDEBUGALL) */ - HeapTuple -heap_getnext(HeapScanDesc scan, ScanDirection direction) +heap_getnext(TableScanDesc sscan, ScanDirection direction) { + HeapScanDesc scan = (HeapScanDesc) sscan; + /* Note: no locking manipulations needed */ HEAPDEBUG_1; /* heap_getnext( info ) */ - if (scan->rs_pageatatime) + if (scan->rs_scan.rs_pageatatime) heapgettup_pagemode(scan, direction, - scan->rs_nkeys, scan->rs_key); + scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); else - heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key); + heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); if (scan->rs_ctup.t_data == NULL) { @@ -1868,9 +1608,57 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) */ HEAPDEBUG_3; /* heap_getnext returning tuple */ - pgstat_count_heap_getnext(scan->rs_rd); + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); + + return &scan->rs_ctup; +} + +#ifdef HEAPAMSLOTDEBUGALL +#define HEAPAMSLOTDEBUG_1 \ + elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \ + RelationGetRelationName(scan->rs_scan.rs_rd), scan->rs_scan.rs_nkeys, (int) direction) +#define HEAPAMSLOTDEBUG_2 \ + elog(DEBUG2, "heapam_getnext returning EOS") +#define HEAPAMSLOTDEBUG_3 \ + elog(DEBUG2, "heapam_getnext returning tuple") +#else +#define HEAPAMSLOTDEBUG_1 +#define HEAPAMSLOTDEBUG_2 +#define HEAPAMSLOTDEBUG_3 +#endif + +TupleTableSlot * +heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* Note: no locking manipulations needed */ + + HEAPAMSLOTDEBUG_1; /* heap_getnext( info ) */ + + if (scan->rs_scan.rs_pageatatime) + heapgettup_pagemode(scan, direction, + scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); + else + heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key); - return &(scan->rs_ctup); + if (scan->rs_ctup.t_data == NULL) + { + HEAPAMSLOTDEBUG_2; /* heap_getnext returning EOS */ + ExecClearTuple(slot); + return NULL; + } + + /* + * if we get here it means we have a new current scan tuple, so point to + * the proper return buffer and return the tuple. + */ + HEAPAMSLOTDEBUG_3; /* heap_getnext returning tuple */ + + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); + + return ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, + scan->rs_cbuf); } /* @@ -1890,10 +1678,8 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) * If the tuple is found but fails the time qual check, then false is returned * but tuple->t_data is left pointing to the tuple. * - * keep_buf determines what is done with the buffer in the false-result cases. - * When the caller specifies keep_buf = true, we retain the pin on the buffer - * and return it in *userbuf (so the caller must eventually unpin it); when - * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer. + * In the false-result cases the buffer pin is released and *userbuf is set to + * InvalidBuffer. * * stats_relation is the relation to charge the heap_fetch operation against * for statistical purposes. (This could be the heap rel itself, an @@ -1913,13 +1699,12 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) */ bool heap_fetch(Relation relation, + ItemPointer tid, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, - bool keep_buf, Relation stats_relation) { - ItemPointer tid = &(tuple->t_self); ItemId lp; Buffer buffer; Page page; @@ -1946,13 +1731,8 @@ heap_fetch(Relation relation, if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page)) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - if (keep_buf) - *userbuf = buffer; - else - { - ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - } + ReleaseBuffer(buffer); + *userbuf = InvalidBuffer; tuple->t_data = NULL; return false; } @@ -1968,20 +1748,16 @@ heap_fetch(Relation relation, if (!ItemIdIsNormal(lp)) { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - if (keep_buf) - *userbuf = buffer; - else - { - ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - } + ReleaseBuffer(buffer); + *userbuf = InvalidBuffer; tuple->t_data = NULL; return false; } /* - * fill in *tuple fields + * fill in tuple fields and place it in stuple */ + ItemPointerCopy(tid, &(tuple->t_self)); tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); @@ -1989,7 +1765,7 @@ heap_fetch(Relation relation, /* * check time qualification of tuple, then release lock */ - valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer); + valid = HeapTupleSatisfies(tuple, snapshot, buffer); if (valid) PredicateLockTuple(relation, tuple, snapshot); @@ -2013,14 +1789,9 @@ heap_fetch(Relation relation, return true; } - /* Tuple failed time qual, but maybe caller wants to see it anyway. */ - if (keep_buf) - *userbuf = buffer; - else - { - ReleaseBuffer(buffer); - *userbuf = InvalidBuffer; - } + /* Tuple failed time qual */ + ReleaseBuffer(buffer); + *userbuf = InvalidBuffer; return false; } @@ -2136,7 +1907,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum); /* If it's visible per the snapshot, we must return it */ - valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer); + valid = HeapTupleSatisfies(heapTuple, snapshot, buffer); CheckForSerializableConflictOut(valid, relation, heapTuple, buffer, snapshot); /* reset to original, non-redirected, tid */ @@ -2310,7 +2081,7 @@ heap_get_latest_tid(Relation relation, * Check time qualification of tuple; if visible, set it as the new * result candidate. */ - valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer); + valid = HeapTupleSatisfies(&tp, snapshot, buffer); CheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot); if (valid) *tid = ctid; @@ -2333,7 +2104,6 @@ heap_get_latest_tid(Relation relation, } /* end of loop */ } - /* * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends * @@ -2380,7 +2150,7 @@ GetBulkInsertState(void) bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData)); bistate->strategy = GetAccessStrategy(BAS_BULKWRITE); bistate->current_buf = InvalidBuffer; - return bistate; + return (void *)bistate; } /* @@ -2694,7 +2464,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, * temporary context before calling this, if that's a problem. */ void -heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, +heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { TransactionId xid = GetCurrentTransactionId(); @@ -2715,12 +2485,17 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, saveFreeSpace = RelationGetTargetPageFreeSpace(relation, HEAP_DEFAULT_FILLFACTOR); - /* Toast and set header data in all the tuples */ + /* Toast and set header data in all the slots */ heaptuples = palloc(ntuples * sizeof(HeapTuple)); for (i = 0; i < ntuples; i++) - heaptuples[i] = heap_prepare_insert(relation, tuples[i], + { + heaptuples[i] = heap_prepare_insert(relation, ExecFetchSlotHeapTuple(slots[i], true, NULL), xid, cid, options); + if (slots[i]->tts_tableOid != InvalidOid) + heaptuples[i]->t_tableOid = slots[i]->tts_tableOid; + } + /* * We're about to do the actual inserts -- but check for conflict first, * to minimize the possibility of having to roll back work we've just @@ -2955,7 +2730,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, * probably faster to always copy than check. */ for (i = 0; i < ntuples; i++) - tuples[i]->t_self = heaptuples[i]->t_self; + slots[i]->tts_tid = heaptuples[i]->t_self; pgstat_count_heap_insert(relation, ntuples); } @@ -3225,7 +3000,7 @@ l1: if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated) { /* Perform additional check for transaction-snapshot mode RI updates */ - if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer)) + if (!HeapTupleSatisfies(&tp, crosscheck, buffer)) result = HeapTupleUpdated; } @@ -3233,6 +3008,7 @@ l1: { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || + result == HeapTupleDeleted || result == HeapTupleBeingUpdated); Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tp.t_data->t_ctid; @@ -3246,6 +3022,8 @@ l1: UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); + if (result == HeapTupleUpdated && ItemPointerEquals(tid, &hufd->ctid)) + result = HeapTupleDeleted; return result; } @@ -3463,6 +3241,10 @@ simple_heap_delete(Relation relation, ItemPointer tid) elog(ERROR, "tuple concurrently updated"); break; + case HeapTupleDeleted: + elog(ERROR, "tuple concurrently deleted"); + break; + default: elog(ERROR, "unrecognized heap_delete status: %u", result); break; @@ -3860,7 +3642,7 @@ l2: if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated) { /* Perform additional check for transaction-snapshot mode RI updates */ - if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer)) + if (!HeapTupleSatisfies(&oldtup, crosscheck, buffer)) result = HeapTupleUpdated; } @@ -3868,6 +3650,7 @@ l2: { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || + result == HeapTupleDeleted || result == HeapTupleBeingUpdated); Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = oldtup.t_data->t_ctid; @@ -3887,6 +3670,8 @@ l2: bms_free(id_attrs); bms_free(modified_attrs); bms_free(interesting_attrs); + if (result == HeapTupleUpdated && ItemPointerEquals(otid, &hufd->ctid)) + result = HeapTupleDeleted; return result; } @@ -4593,6 +4378,10 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) elog(ERROR, "tuple concurrently updated"); break; + case HeapTupleDeleted: + elog(ERROR, "tuple concurrently deleted"); + break; + default: elog(ERROR, "unrecognized heap_update status: %u", result); break; @@ -4627,7 +4416,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * * Input parameters: * relation: relation containing tuple (caller must hold suitable lock) - * tuple->t_self: TID of tuple to lock (rest of struct need not be valid) + * tid: TID of tuple to lock * cid: current command ID (used for visibility test, and stored into * tuple's cmax if lock is successful) * mode: indicates if shared or exclusive tuple lock is desired @@ -4645,6 +4434,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * HeapTupleInvisible: lock failed because tuple was never visible to us * HeapTupleSelfUpdated: lock failed because tuple updated by self * HeapTupleUpdated: lock failed because tuple updated by other xact + * HeapTupleDeleted: lock failed because tuple deleted by other xact * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip * * In the failure cases other than HeapTupleInvisible, the routine fills @@ -4657,13 +4447,12 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update) * See README.tuplock for a thorough explanation of this mechanism. */ HTSU_Result -heap_lock_tuple(Relation relation, HeapTuple tuple, +heap_lock_tuple(Relation relation, ItemPointer tid, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, - Buffer *buffer, HeapUpdateFailureData *hufd) + HeapTuple tuple, Buffer *buffer, HeapUpdateFailureData *hufd) { HTSU_Result result; - ItemPointer tid = &(tuple->t_self); ItemId lp; Page page; Buffer vmbuffer = InvalidBuffer; @@ -4698,6 +4487,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + tuple->t_self = *tid; l3: result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); @@ -4713,7 +4503,7 @@ l3: result = HeapTupleInvisible; goto out_locked; } - else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated) + else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated || result == HeapTupleDeleted) { TransactionId xwait; uint16 infomask; @@ -4993,7 +4783,7 @@ l3: * or we must wait for the locking transaction or multixact; so below * we ensure that we grab buffer lock after the sleep. */ - if (require_sleep && result == HeapTupleUpdated) + if (require_sleep && (result == HeapTupleUpdated || result == HeapTupleDeleted)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); goto failed; @@ -5153,6 +4943,8 @@ l3: HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || HeapTupleHeaderIsOnlyLocked(tuple->t_data)) result = HeapTupleMayBeUpdated; + else if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) + result = HeapTupleDeleted; else result = HeapTupleUpdated; } @@ -5161,7 +4953,7 @@ failed: if (result != HeapTupleMayBeUpdated) { Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated || - result == HeapTupleWouldBlock); + result == HeapTupleWouldBlock || result == HeapTupleDeleted); Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tuple->t_data->t_ctid; hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); @@ -5765,9 +5557,8 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, new_infomask = 0; new_xmax = InvalidTransactionId; block = ItemPointerGetBlockNumber(&tupid); - ItemPointerCopy(&tupid, &(mytup.t_self)); - if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL)) + if (!heap_fetch(rel, &tupid, SnapshotAny, &mytup, &buf, NULL)) { /* * if we fail to find the updated version of the tuple, it's @@ -6050,6 +5841,10 @@ next: result = HeapTupleMayBeUpdated; out_locked: + + if (result == HeapTupleUpdated && ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid)) + result = HeapTupleDeleted; + UnlockReleaseBuffer(buf); out_unlocked: @@ -6228,6 +6023,10 @@ heap_abort_speculative(Relation relation, HeapTuple tuple) BlockNumber block; Buffer buffer; + /* + * Assert(slot->tts_speculativeToken != 0); This needs some update in + * toast + */ Assert(ItemPointerIsValid(tid)); block = ItemPointerGetBlockNumber(tid); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c new file mode 100644 index 0000000000..49f8d833f6 --- /dev/null +++ b/src/backend/access/heap/heapam_handler.c @@ -0,0 +1,2122 @@ +/*------------------------------------------------------------------------- + * + * heapam_handler.c + * heap table access method code + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/heap/heapam_handler.c + * + * + * NOTES + * This file contains the heap_ routines which implement + * the POSTGRES heap table access method used for all POSTGRES + * relations. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "miscadmin.h" + +#include "access/heapam.h" +#include "access/relscan.h" +#include "access/rewriteheap.h" +#include "access/tableam.h" +#include "access/tsmapi.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/pg_am_d.h" +#include "executor/executor.h" +#include "pgstat.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/tqual.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "access/xact.h" + + +/* ---------------------------------------------------------------- + * storage AM support routines for heapam + * ---------------------------------------------------------------- + */ + +static bool +heapam_fetch_row_version(Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + Relation stats_relation) +{ + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + Buffer buffer; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + + if (heap_fetch(relation, tid, snapshot, &bslot->base.tupdata, &buffer, stats_relation)) + { + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, buffer); + ReleaseBuffer(buffer); + + slot->tts_tableOid = RelationGetRelid(relation); + + return true; + } + + slot->tts_tableOid = RelationGetRelid(relation); + + return false; +} + +/* + * Insert a heap tuple from a slot, which may contain an OID and speculative + * insertion token. + */ +static void +heapam_heap_insert(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate) +{ + bool shouldFree = true; + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + + /* Update the tuple with table oid */ + slot->tts_tableOid = RelationGetRelid(relation); + if (slot->tts_tableOid != InvalidOid) + tuple->t_tableOid = slot->tts_tableOid; + + /* Perform the insertion, and copy the resulting ItemPointer */ + heap_insert(relation, tuple, cid, options, bistate); + ItemPointerCopy(&tuple->t_self, &slot->tts_tid); + + if (shouldFree) + pfree(tuple); +} + +static void +heapam_heap_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate, uint32 specToken) +{ + bool shouldFree = true; + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + + /* Update the tuple with table oid */ + slot->tts_tableOid = RelationGetRelid(relation); + if (slot->tts_tableOid != InvalidOid) + tuple->t_tableOid = slot->tts_tableOid; + + HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken); + + /* Perform the insertion, and copy the resulting ItemPointer */ + heap_insert(relation, tuple, cid, options, bistate); + ItemPointerCopy(&tuple->t_self, &slot->tts_tid); + + if (shouldFree) + pfree(tuple); +} + +static void +heapam_heap_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 spekToken, + bool succeeded) +{ + bool shouldFree = true; + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + + /* adjust the tuple's state accordingly */ + if (!succeeded) + heap_finish_speculative(relation, tuple); + else + { + heap_abort_speculative(relation, tuple); + } + + if (shouldFree) + pfree(tuple); +} + + +static HTSU_Result +heapam_heap_delete(Relation relation, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + HeapUpdateFailureData *hufd, bool changingPart) +{ + /* + * Currently Deleting of index tuples are handled at vacuum, in case + * if the storage itself is cleaning the dead tuples by itself, it is + * the time to call the index tuple deletion also. + */ + return heap_delete(relation, tid, cid, crosscheck, wait, hufd, changingPart); +} + + +/* + * Locks tuple and fetches its newest version and TID. + * + * relation - table containing tuple + * tid - TID of tuple to lock + * snapshot - snapshot indentifying required version (used for assert check only) + * slot - tuple to be returned + * cid - current command ID (used for visibility test, and stored into + * tuple's cmax if lock is successful) + * mode - indicates if shared or exclusive tuple lock is desired + * wait_policy - what to do if tuple lock is not available + * flags – indicating how do we handle updated tuples + * *hufd - filled in failure cases + * + * Function result may be: + * HeapTupleMayBeUpdated: lock was successfully acquired + * HeapTupleInvisible: lock failed because tuple was never visible to us + * HeapTupleSelfUpdated: lock failed because tuple updated by self + * HeapTupleUpdated: lock failed because tuple updated by other xact + * HeapTupleDeleted: lock failed because tuple deleted by other xact + * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip + * + * In the failure cases other than HeapTupleInvisible, the routine fills + * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact, + * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated, + * since we cannot obtain cmax from a combocid generated by another + * transaction). + * See comments for struct HeapUpdateFailureData for additional info. + */ +static HTSU_Result +heapam_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + HeapUpdateFailureData *hufd) +{ + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + HTSU_Result result; + Buffer buffer; + HeapTuple tuple = &bslot->base.tupdata; + + hufd->traversed = false; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + +retry: + result = heap_lock_tuple(relation, tid, cid, mode, wait_policy, + (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) ? true : false, + tuple, &buffer, hufd); + + if (result == HeapTupleUpdated && + (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION)) + { + ReleaseBuffer(buffer); + /* Should not encounter speculative tuple on recheck */ + Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data)); + + if (!ItemPointerEquals(&hufd->ctid, &tuple->t_self)) + { + SnapshotData SnapshotDirty; + TransactionId priorXmax; + + /* it was updated, so look at the updated version */ + *tid = hufd->ctid; + /* updated row should have xmin matching this xmax */ + priorXmax = hufd->xmax; + + /* + * fetch target tuple + * + * Loop here to deal with updated or busy tuples + */ + InitDirtySnapshot(SnapshotDirty); + for (;;) + { + if (ItemPointerIndicatesMovedPartitions(tid)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); + + + if (heap_fetch(relation, tid, &SnapshotDirty, tuple, &buffer, NULL)) + { + /* + * If xmin isn't what we're expecting, the slot must have been + * recycled and reused for an unrelated tuple. This implies that + * the latest version of the row was deleted, so we need do + * nothing. (Should be safe to examine xmin without getting + * buffer's content lock. We assume reading a TransactionId to be + * atomic, and Xmin never changes in an existing tuple, except to + * invalid or frozen, and neither of those can match priorXmax.) + */ + if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + priorXmax)) + { + ReleaseBuffer(buffer); + return HeapTupleDeleted; + } + + /* otherwise xmin should not be dirty... */ + if (TransactionIdIsValid(SnapshotDirty.xmin)) + elog(ERROR, "t_xmin is uncommitted in tuple to be updated"); + + /* + * If tuple is being updated by other transaction then we have to + * wait for its commit/abort, or die trying. + */ + if (TransactionIdIsValid(SnapshotDirty.xmax)) + { + ReleaseBuffer(buffer); + switch (wait_policy) + { + case LockWaitBlock: + XactLockTableWait(SnapshotDirty.xmax, + relation, &tuple->t_self, + XLTW_FetchUpdated); + break; + case LockWaitSkip: + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + return result; /* skip instead of waiting */ + break; + case LockWaitError: + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + ereport(ERROR, + (errcode(ERRCODE_LOCK_NOT_AVAILABLE), + errmsg("could not obtain lock on row in relation \"%s\"", + RelationGetRelationName(relation)))); + break; + } + continue; /* loop back to repeat heap_fetch */ + } + + /* + * If tuple was inserted by our own transaction, we have to check + * cmin against es_output_cid: cmin >= current CID means our + * command cannot see the tuple, so we should ignore it. Otherwise + * heap_lock_tuple() will throw an error, and so would any later + * attempt to update or delete the tuple. (We need not check cmax + * because HeapTupleSatisfiesDirty will consider a tuple deleted + * by our transaction dead, regardless of cmax.) We just checked + * that priorXmax == xmin, so we can test that variable instead of + * doing HeapTupleHeaderGetXmin again. + */ + if (TransactionIdIsCurrentTransactionId(priorXmax) && + HeapTupleHeaderGetCmin(tuple->t_data) >= cid) + { + ReleaseBuffer(buffer); + return result; + } + + hufd->traversed = true; + *tid = tuple->t_data->t_ctid; + ReleaseBuffer(buffer); + goto retry; + } + + /* + * If the referenced slot was actually empty, the latest version of + * the row must have been deleted, so we need do nothing. + */ + if (tuple->t_data == NULL) + { + return HeapTupleDeleted; + } + + /* + * As above, if xmin isn't what we're expecting, do nothing. + */ + if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + priorXmax)) + { + if (BufferIsValid(buffer)) + ReleaseBuffer(buffer); + return HeapTupleDeleted; + } + + /* + * If we get here, the tuple was found but failed SnapshotDirty. + * Assuming the xmin is either a committed xact or our own xact (as it + * certainly should be if we're trying to modify the tuple), this must + * mean that the row was updated or deleted by either a committed xact + * or our own xact. If it was deleted, we can ignore it; if it was + * updated then chain up to the next version and repeat the whole + * process. + * + * As above, it should be safe to examine xmax and t_ctid without the + * buffer content lock, because they can't be changing. + */ + if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) + { + /* deleted, so forget about it */ + if (BufferIsValid(buffer)) + ReleaseBuffer(buffer); + return HeapTupleDeleted; + } + + /* updated, so look at the updated row */ + *tid = tuple->t_data->t_ctid; + /* updated row should have xmin matching this xmax */ + priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + if (BufferIsValid(buffer)) + ReleaseBuffer(buffer); + /* loop back to fetch next in chain */ + } + } + else + { + /* tuple was deleted, so give up */ + return HeapTupleDeleted; + } + } + + slot->tts_tableOid = RelationGetRelid(relation); + ExecStoreBufferHeapTuple(tuple, slot, buffer); + ReleaseBuffer(buffer); // FIXME: invent option to just transfer pin? + + return result; +} + + +static HTSU_Result +heapam_heap_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, + bool wait, HeapUpdateFailureData *hufd, + LockTupleMode *lockmode, bool *update_indexes) +{ + bool shouldFree = true; + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + HTSU_Result result; + + /* Update the tuple with table oid */ + if (slot->tts_tableOid != InvalidOid) + tuple->t_tableOid = slot->tts_tableOid; + + result = heap_update(relation, otid, tuple, cid, crosscheck, wait, + hufd, lockmode); + ItemPointerCopy(&tuple->t_self, &slot->tts_tid); + + slot->tts_tableOid = RelationGetRelid(relation); + + /* + * Note: instead of having to update the old index tuples associated with + * the heap tuple, all we do is form and insert new index tuples. This is + * because UPDATEs are actually DELETEs and INSERTs, and index tuple + * deletion is done later by VACUUM (see notes in ExecDelete). All we do + * here is insert new index tuples. -cim 9/27/89 + */ + + /* + * insert index entries for tuple + * + * Note: heap_update returns the tid (location) of the new tuple in the + * t_self field. + * + * If it's a HOT update, we mustn't insert new index entries. + */ + *update_indexes = result == HeapTupleMayBeUpdated && + !HeapTupleIsHeapOnly(tuple); + + if (shouldFree) + pfree(tuple); + + return result; +} + +static const TupleTableSlotOps * +heapam_slot_callbacks(Relation relation) +{ + return &TTSOpsBufferHeapTuple; +} + +HeapTuple +heap_scan_getnext(TableScanDesc sscan, ScanDirection direction) +{ + if (unlikely(sscan->rs_rd->rd_rel->relam != HEAP_TABLE_AM_OID)) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + return heap_getnext(sscan, direction); +} + +static bool +heapam_satisfies(Relation rel, TupleTableSlot *slot, Snapshot snapshot) +{ + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + bool res; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + Assert(BufferIsValid(bslot->buffer)); + + /* + * We need buffer pin and lock to call HeapTupleSatisfiesVisibility. + * Caller should be holding pin, but not lock. + */ + LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE); + res = HeapTupleSatisfies(bslot->base.tuple, snapshot, bslot->buffer); + LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK); + + return res; +} + +static IndexFetchTableData* +heapam_begin_index_fetch(Relation rel) +{ + IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData)); + + hscan->xs_base.rel = rel; + hscan->xs_cbuf = InvalidBuffer; + //hscan->xs_continue_hot = false; + + return &hscan->xs_base; +} + + +static void +heapam_reset_index_fetch(IndexFetchTableData* scan) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + + if (BufferIsValid(hscan->xs_cbuf)) + { + ReleaseBuffer(hscan->xs_cbuf); + hscan->xs_cbuf = InvalidBuffer; + } + + //hscan->xs_continue_hot = false; +} + +static void +heapam_end_index_fetch(IndexFetchTableData* scan) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + + heapam_reset_index_fetch(scan); + + pfree(hscan); +} + +static bool +heapam_fetch_follow(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan; + BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; + bool got_heap_tuple; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + + /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ + if (!*call_again) + { + /* Switch to correct buffer if we don't have it already */ + Buffer prev_buf = hscan->xs_cbuf; + + hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf, + hscan->xs_base.rel, + ItemPointerGetBlockNumber(tid)); + + /* + * Prune page, but only if we weren't already on this page + */ + if (prev_buf != hscan->xs_cbuf) + heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf); + } + + /* Obtain share-lock on the buffer so we can examine visibility */ + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE); + got_heap_tuple = heap_hot_search_buffer(tid, + hscan->xs_base.rel, + hscan->xs_cbuf, + snapshot, + &bslot->base.tupdata, + all_dead, + !*call_again); + bslot->base.tupdata.t_self = *tid; + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK); + + if (got_heap_tuple) + { + /* + * Only in a non-MVCC snapshot can more than one member of the HOT + * chain be visible. + */ + *call_again = !IsMVCCSnapshot(snapshot); + // FIXME pgstat_count_heap_fetch(scan->indexRelation); + + slot->tts_tableOid = RelationGetRelid(scan->rel); + ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf); + } + else + { + /* We've reached the end of the HOT chain. */ + *call_again = false; + } + + return got_heap_tuple; +} + +/* + * As above, except that instead of scanning the complete heap, only the given + * number of blocks are scanned. Scan to end-of-rel can be signalled by + * passing InvalidBlockNumber as numblocks. Note that restricting the range + * to scan cannot be done when requesting syncscan. + * + * When "anyvisible" mode is requested, all tuples visible to any transaction + * are indexed and counted as live, including those inserted or deleted by + * transactions that are still in progress. + */ +static double +IndexBuildHeapRangeScan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + BlockNumber start_blockno, + BlockNumber numblocks, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc sscan) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + bool is_system_catalog; + bool checking_uniqueness; + HeapTuple heapTuple; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + double reltuples; + ExprState *predicate; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + Snapshot snapshot; + bool need_unregister_snapshot = false; + TransactionId OldestXmin; + BlockNumber root_blkno = InvalidBlockNumber; + OffsetNumber root_offsets[MaxHeapTuplesPerPage]; + + /* + * sanity checks + */ + Assert(OidIsValid(indexRelation->rd_rel->relam)); + + /* Remember if it's a system catalog */ + is_system_catalog = IsSystemRelation(heapRelation); + + /* See whether we're verifying uniqueness/exclusion properties */ + checking_uniqueness = (indexInfo->ii_Unique || + indexInfo->ii_ExclusionOps != NULL); + + /* + * "Any visible" mode is not compatible with uniqueness checks; make sure + * only one of those is requested. + */ + Assert(!(anyvisible && checking_uniqueness)); + + /* + * Need an EState for evaluation of index expressions and partial-index + * predicates. Also a slot to hold the current tuple. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate, if any. */ + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + + /* + * Prepare for scan of the base relation. In a normal index build, we use + * SnapshotAny because we must retrieve all tuples and do our own time + * qual checks (because we have to index RECENTLY_DEAD tuples). In a + * concurrent build, or during bootstrap, we take a regular MVCC snapshot + * and index whatever's live according to that. + */ + OldestXmin = InvalidTransactionId; + + /* okay to ignore lazy VACUUMs here */ + if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent) + OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM); + + if (!scan) + { + /* + * Serial index build. + * + * Must begin our own heap scan in this case. We may also need to + * register a snapshot whose lifetime is under our direct control. + */ + if (!TransactionIdIsValid(OldestXmin)) + { + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + need_unregister_snapshot = true; + } + else + snapshot = SnapshotAny; + + sscan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + allow_sync); /* syncscan OK? */ + scan = (HeapScanDesc) sscan; + } + else + { + /* + * Parallel index build. + * + * Parallel case never registers/unregisters own snapshot. Snapshot + * is taken from parallel heap scan, and is SnapshotAny or an MVCC + * snapshot, based on same criteria as serial case. + */ + Assert(!IsBootstrapProcessingMode()); + Assert(allow_sync); + snapshot = scan->rs_scan.rs_snapshot; + } + + /* + * Must call GetOldestXmin() with SnapshotAny. Should never call + * GetOldestXmin() with MVCC snapshot. (It's especially worth checking + * this for parallel builds, since ambuild routines that support parallel + * builds must work these details out for themselves.) + */ + Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot)); + Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) : + !TransactionIdIsValid(OldestXmin)); + Assert(snapshot == SnapshotAny || !anyvisible); + + /* set our scan endpoints */ + if (!allow_sync) + table_setscanlimits(sscan, start_blockno, numblocks); + else + { + /* syncscan can only be requested on whole relation */ + Assert(start_blockno == 0); + Assert(numblocks == InvalidBlockNumber); + } + + reltuples = 0; + + /* + * Scan all tuples in the base relation. + */ + while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL) + { + bool tupleIsAlive; + + CHECK_FOR_INTERRUPTS(); + + /* + * When dealing with a HOT-chain of updated tuples, we want to index + * the values of the live tuple (if any), but index it under the TID + * of the chain's root tuple. This approach is necessary to preserve + * the HOT-chain structure in the heap. So we need to be able to find + * the root item offset for every tuple that's in a HOT-chain. When + * first reaching a new page of the relation, call + * heap_get_root_tuples() to build a map of root item offsets on the + * page. + * + * It might look unsafe to use this information across buffer + * lock/unlock. However, we hold ShareLock on the table so no + * ordinary insert/update/delete should occur; and we hold pin on the + * buffer continuously while visiting the page, so no pruning + * operation can occur either. + * + * Also, although our opinions about tuple liveness could change while + * we scan the page (due to concurrent transaction commits/aborts), + * the chain root locations won't, so this info doesn't need to be + * rebuilt after waiting for another transaction. + * + * Note the implied assumption that there is no more than one live + * tuple per HOT-chain --- else we could create more than one index + * entry pointing to the same root tuple. + */ + if (scan->rs_cblock != root_blkno) + { + Page page = BufferGetPage(scan->rs_cbuf); + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + heap_get_root_tuples(page, root_offsets); + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + root_blkno = scan->rs_cblock; + } + + if (snapshot == SnapshotAny) + { + /* do our own time qual check */ + bool indexIt; + TransactionId xwait; + + recheck: + + /* + * We could possibly get away with not locking the buffer here, + * since caller should hold ShareLock on the relation, but let's + * be conservative about it. (This remark is still correct even + * with HOT-pruning: our pin on the buffer prevents pruning.) + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + /* + * The criteria for counting a tuple as live in this block need to + * match what analyze.c's acquire_sample_rows() does, otherwise + * CREATE INDEX and ANALYZE may produce wildly different reltuples + * values, e.g. when there are many recently-dead tuples. + */ + switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, scan->rs_cbuf)) + { + case HEAPTUPLE_DEAD: + /* Definitely dead, we can ignore it */ + indexIt = false; + tupleIsAlive = false; + break; + case HEAPTUPLE_LIVE: + /* Normal case, index and unique-check it */ + indexIt = true; + tupleIsAlive = true; + /* Count it as live, too */ + reltuples += 1; + break; + case HEAPTUPLE_RECENTLY_DEAD: + + /* + * If tuple is recently deleted then we must index it + * anyway to preserve MVCC semantics. (Pre-existing + * transactions could try to use the index after we finish + * building it, and may need to see such tuples.) + * + * However, if it was HOT-updated then we must only index + * the live tuple at the end of the HOT-chain. Since this + * breaks semantics for pre-existing snapshots, mark the + * index as unusable for them. + * + * We don't count recently-dead tuples in reltuples, even + * if we index them; see acquire_sample_rows(). + */ + if (HeapTupleIsHotUpdated(heapTuple)) + { + indexIt = false; + /* mark the index as unsafe for old snapshots */ + indexInfo->ii_BrokenHotChain = true; + } + else + indexIt = true; + /* In any case, exclude the tuple from unique-checking */ + tupleIsAlive = false; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + + /* + * In "anyvisible" mode, this tuple is visible and we + * don't need any further checks. + */ + if (anyvisible) + { + indexIt = true; + tupleIsAlive = true; + reltuples += 1; + break; + } + + /* + * Since caller should hold ShareLock or better, normally + * the only way to see this is if it was inserted earlier + * in our own transaction. However, it can happen in + * system catalogs, since we tend to release write lock + * before commit there. Give a warning if neither case + * applies. + */ + xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + if (!TransactionIdIsCurrentTransactionId(xwait)) + { + if (!is_system_catalog) + elog(WARNING, "concurrent insert in progress within table \"%s\"", + RelationGetRelationName(heapRelation)); + + /* + * If we are performing uniqueness checks, indexing + * such a tuple could lead to a bogus uniqueness + * failure. In that case we wait for the inserting + * transaction to finish and check again. + */ + if (checking_uniqueness) + { + /* + * Must drop the lock on the buffer before we wait + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + XactLockTableWait(xwait, heapRelation, + &heapTuple->t_self, + XLTW_InsertIndexUnique); + CHECK_FOR_INTERRUPTS(); + goto recheck; + } + } + else + { + /* + * For consistency with acquire_sample_rows(), count + * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only + * when inserted by our own transaction. + */ + reltuples += 1; + } + + /* + * We must index such tuples, since if the index build + * commits then they're good. + */ + indexIt = true; + tupleIsAlive = true; + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: + + /* + * As with INSERT_IN_PROGRESS case, this is unexpected + * unless it's our own deletion or a system catalog; but + * in anyvisible mode, this tuple is visible. + */ + if (anyvisible) + { + indexIt = true; + tupleIsAlive = false; + reltuples += 1; + break; + } + + xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + if (!TransactionIdIsCurrentTransactionId(xwait)) + { + if (!is_system_catalog) + elog(WARNING, "concurrent delete in progress within table \"%s\"", + RelationGetRelationName(heapRelation)); + + /* + * If we are performing uniqueness checks, assuming + * the tuple is dead could lead to missing a + * uniqueness violation. In that case we wait for the + * deleting transaction to finish and check again. + * + * Also, if it's a HOT-updated tuple, we should not + * index it but rather the live tuple at the end of + * the HOT-chain. However, the deleting transaction + * could abort, possibly leaving this tuple as live + * after all, in which case it has to be indexed. The + * only way to know what to do is to wait for the + * deleting transaction to finish and check again. + */ + if (checking_uniqueness || + HeapTupleIsHotUpdated(heapTuple)) + { + /* + * Must drop the lock on the buffer before we wait + */ + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + XactLockTableWait(xwait, heapRelation, + &heapTuple->t_self, + XLTW_InsertIndexUnique); + CHECK_FOR_INTERRUPTS(); + goto recheck; + } + + /* + * Otherwise index it but don't check for uniqueness, + * the same as a RECENTLY_DEAD tuple. + */ + indexIt = true; + + /* + * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live, + * if they were not deleted by the current + * transaction. That's what acquire_sample_rows() + * does, and we want the behavior to be consistent. + */ + reltuples += 1; + } + else if (HeapTupleIsHotUpdated(heapTuple)) + { + /* + * It's a HOT-updated tuple deleted by our own xact. + * We can assume the deletion will commit (else the + * index contents don't matter), so treat the same as + * RECENTLY_DEAD HOT-updated tuples. + */ + indexIt = false; + /* mark the index as unsafe for old snapshots */ + indexInfo->ii_BrokenHotChain = true; + } + else + { + /* + * It's a regular tuple deleted by our own xact. Index + * it, but don't check for uniqueness nor count in + * reltuples, the same as a RECENTLY_DEAD tuple. + */ + indexIt = true; + } + /* In any case, exclude the tuple from unique-checking */ + tupleIsAlive = false; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + indexIt = tupleIsAlive = false; /* keep compiler quiet */ + break; + } + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + if (!indexIt) + continue; + } + else + { + /* heap_getnext did the time qual check */ + tupleIsAlive = true; + reltuples += 1; + } + + MemoryContextReset(econtext->ecxt_per_tuple_memory); + + /* Set up for predicate or expression evaluation */ + ExecStoreHeapTuple(heapTuple, slot, false); + + /* + * In a partial index, discard tuples that don't satisfy the + * predicate. + */ + if (predicate != NULL) + { + if (!ExecQual(predicate, econtext)) + continue; + } + + /* + * For the current heap tuple, extract all the attributes we use in + * this index, and note which are null. This also performs evaluation + * of any expressions needed. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* + * You'd think we should go ahead and build the index tuple here, but + * some index AMs want to do further processing on the data first. So + * pass the values[] and isnull[] arrays, instead. + */ + + if (HeapTupleIsHeapOnly(heapTuple)) + { + /* + * For a heap-only tuple, pretend its TID is that of the root. See + * src/backend/access/heap/README.HOT for discussion. + */ + HeapTupleData rootTuple; + OffsetNumber offnum; + + rootTuple = *heapTuple; + offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self); + + if (!OffsetNumberIsValid(root_offsets[offnum - 1])) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", + ItemPointerGetBlockNumber(&heapTuple->t_self), + offnum, + RelationGetRelationName(heapRelation)))); + + ItemPointerSetOffsetNumber(&rootTuple.t_self, + root_offsets[offnum - 1]); + + /* Call the AM's callback routine to process the tuple */ + callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive, + callback_state); + } + else + { + /* Call the AM's callback routine to process the tuple */ + callback(indexRelation, heapTuple, values, isnull, tupleIsAlive, + callback_state); + } + } + + table_endscan(sscan); + + /* we can now forget our snapshot, if set and registered by us */ + if (need_unregister_snapshot) + UnregisterSnapshot(snapshot); + + ExecDropSingleTupleTableSlot(slot); + + FreeExecutorState(estate); + + /* These may have been pointing to the now-gone estate */ + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_PredicateState = NULL; + + return reltuples; +} + +/* + * validate_index_heapscan - second table scan for concurrent index build + * + * This has much code in common with IndexBuildHeapScan, but it's enough + * different that it seems cleaner to have two routines not one. + */ +static void +validate_index_heapscan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + ValidateIndexState *state) +{ + TableScanDesc sscan; + HeapScanDesc scan; + HeapTuple heapTuple; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + ExprState *predicate; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + BlockNumber root_blkno = InvalidBlockNumber; + OffsetNumber root_offsets[MaxHeapTuplesPerPage]; + bool in_index[MaxHeapTuplesPerPage]; + + /* state variables for the merge */ + ItemPointer indexcursor = NULL; + ItemPointerData decoded; + bool tuplesort_empty = false; + + /* + * sanity checks + */ + Assert(OidIsValid(indexRelation->rd_rel->relam)); + + /* + * Need an EState for evaluation of index expressions and partial-index + * predicates. Also a slot to hold the current tuple. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate, if any. */ + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + + /* + * Prepare for scan of the base relation. We need just those tuples + * satisfying the passed-in reference snapshot. We must disable syncscan + * here, because it's critical that we read from block zero forward to + * match the sorted TIDs. + */ + sscan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + false); /* syncscan not OK */ + scan = (HeapScanDesc) sscan; + + /* + * Scan all tuples matching the snapshot. + * + * PBORKED: Slotify + */ + while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL) + { + ItemPointer heapcursor = &heapTuple->t_self; + ItemPointerData rootTuple; + OffsetNumber root_offnum; + + CHECK_FOR_INTERRUPTS(); + + state->htups += 1; + + /* + * As commented in IndexBuildHeapScan, we should index heap-only + * tuples under the TIDs of their root tuples; so when we advance onto + * a new heap page, build a map of root item offsets on the page. + * + * This complicates merging against the tuplesort output: we will + * visit the live tuples in order by their offsets, but the root + * offsets that we need to compare against the index contents might be + * ordered differently. So we might have to "look back" within the + * tuplesort output, but only within the current page. We handle that + * by keeping a bool array in_index[] showing all the + * already-passed-over tuplesort output TIDs of the current page. We + * clear that array here, when advancing onto a new heap page. + */ + if (scan->rs_cblock != root_blkno) + { + Page page = BufferGetPage(scan->rs_cbuf); + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + heap_get_root_tuples(page, root_offsets); + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + memset(in_index, 0, sizeof(in_index)); + + root_blkno = scan->rs_cblock; + } + + /* Convert actual tuple TID to root TID */ + rootTuple = *heapcursor; + root_offnum = ItemPointerGetOffsetNumber(heapcursor); + + if (HeapTupleIsHeapOnly(heapTuple)) + { + root_offnum = root_offsets[root_offnum - 1]; + if (!OffsetNumberIsValid(root_offnum)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", + ItemPointerGetBlockNumber(heapcursor), + ItemPointerGetOffsetNumber(heapcursor), + RelationGetRelationName(heapRelation)))); + ItemPointerSetOffsetNumber(&rootTuple, root_offnum); + } + + /* + * "merge" by skipping through the index tuples until we find or pass + * the current root tuple. + */ + while (!tuplesort_empty && + (!indexcursor || + ItemPointerCompare(indexcursor, &rootTuple) < 0)) + { + Datum ts_val; + bool ts_isnull; + + if (indexcursor) + { + /* + * Remember index items seen earlier on the current heap page + */ + if (ItemPointerGetBlockNumber(indexcursor) == root_blkno) + in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true; + } + + tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true, + &ts_val, &ts_isnull, NULL); + Assert(tuplesort_empty || !ts_isnull); + if (!tuplesort_empty) + { + itemptr_decode(&decoded, DatumGetInt64(ts_val)); + indexcursor = &decoded; + + /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ +#ifndef USE_FLOAT8_BYVAL + pfree(DatumGetPointer(ts_val)); +#endif + } + else + { + /* Be tidy */ + indexcursor = NULL; + } + } + + /* + * If the tuplesort has overshot *and* we didn't see a match earlier, + * then this tuple is missing from the index, so insert it. + */ + if ((tuplesort_empty || + ItemPointerCompare(indexcursor, &rootTuple) > 0) && + !in_index[root_offnum - 1]) + { + MemoryContextReset(econtext->ecxt_per_tuple_memory); + + /* Set up for predicate or expression evaluation */ + ExecStoreHeapTuple(heapTuple, slot, false); + + /* + * In a partial index, discard tuples that don't satisfy the + * predicate. + */ + if (predicate != NULL) + { + if (!ExecQual(predicate, econtext)) + continue; + } + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. This also performs + * evaluation of any expressions needed. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* + * You'd think we should go ahead and build the index tuple here, + * but some index AMs want to do further processing on the data + * first. So pass the values[] and isnull[] arrays, instead. + */ + + /* + * If the tuple is already committed dead, you might think we + * could suppress uniqueness checking, but this is no longer true + * in the presence of HOT, because the insert is actually a proxy + * for a uniqueness check on the whole HOT-chain. That is, the + * tuple we have here could be dead because it was already + * HOT-updated, and if so the updating transaction will not have + * thought it should insert index entries. The index AM will + * check the whole HOT-chain and correctly detect a conflict if + * there is one. + */ + + index_insert(indexRelation, + values, + isnull, + &rootTuple, + heapRelation, + indexInfo->ii_Unique ? + UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, + indexInfo); + + state->tups_inserted += 1; + } + } + + table_endscan(sscan); + + ExecDropSingleTupleTableSlot(slot); + + FreeExecutorState(estate); + + /* These may have been pointing to the now-gone estate */ + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_PredicateState = NULL; +} + +static bool +heapam_scan_bitmap_pagescan(TableScanDesc sscan, + TBMIterateResult *tbmres) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + BlockNumber page = tbmres->blockno; + Buffer buffer; + Snapshot snapshot; + int ntup; + + scan->rs_cindex = 0; + scan->rs_ntuples = 0; + + /* + * Ignore any claimed entries past what we think is the end of the + * relation. (This is probably not necessary given that we got at + * least AccessShareLock on the table before performing any of the + * indexscans, but let's be safe.) + */ + if (page >= scan->rs_scan.rs_nblocks) + return false; + + scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf, + scan->rs_scan.rs_rd, + page); + scan->rs_cblock = page; + buffer = scan->rs_cbuf; + snapshot = scan->rs_scan.rs_snapshot; + + ntup = 0; + + /* + * Prune and repair fragmentation for the whole page, if possible. + */ + heap_page_prune_opt(scan->rs_scan.rs_rd, buffer); + + /* + * We must hold share lock on the buffer content while examining tuple + * visibility. Afterwards, however, the tuples we have found to be + * visible are guaranteed good as long as we hold the buffer pin. + */ + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + /* + * We need two separate strategies for lossy and non-lossy cases. + */ + if (tbmres->ntuples >= 0) + { + /* + * Bitmap is non-lossy, so we just look through the offsets listed in + * tbmres; but we have to follow any HOT chain starting at each such + * offset. + */ + int curslot; + + for (curslot = 0; curslot < tbmres->ntuples; curslot++) + { + OffsetNumber offnum = tbmres->offsets[curslot]; + ItemPointerData tid; + HeapTupleData heapTuple; + + ItemPointerSet(&tid, page, offnum); + if (heap_hot_search_buffer(&tid, sscan->rs_rd, buffer, snapshot, + &heapTuple, NULL, true)) + scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); + } + } + else + { + /* + * Bitmap is lossy, so we must examine each item pointer on the page. + * But we can ignore HOT chains, since we'll check each tuple anyway. + */ + Page dp = (Page) BufferGetPage(buffer); + OffsetNumber maxoff = PageGetMaxOffsetNumber(dp); + OffsetNumber offnum; + + for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) + { + ItemId lp; + HeapTupleData loctup; + bool valid; + + lp = PageGetItemId(dp, offnum); + if (!ItemIdIsNormal(lp)) + continue; + loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + loctup.t_len = ItemIdGetLength(lp); + loctup.t_tableOid = scan->rs_scan.rs_rd->rd_id; + ItemPointerSet(&loctup.t_self, page, offnum); + valid = HeapTupleSatisfies(&loctup, snapshot, buffer); + if (valid) + { + scan->rs_vistuples[ntup++] = offnum; + PredicateLockTuple(scan->rs_scan.rs_rd, &loctup, snapshot); + } + CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup, + buffer, snapshot); + } + } + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + Assert(ntup <= MaxHeapTuplesPerPage); + scan->rs_ntuples = ntup; + + return ntup > 0; +} + +static bool +heapam_scan_bitmap_pagescan_next(TableScanDesc sscan, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + OffsetNumber targoffset; + Page dp; + ItemId lp; + + if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) + return false; + + targoffset = scan->rs_vistuples[scan->rs_cindex]; + dp = (Page) BufferGetPage(scan->rs_cbuf); + lp = PageGetItemId(dp, targoffset); + Assert(ItemIdIsNormal(lp)); + + scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + scan->rs_ctup.t_len = ItemIdGetLength(lp); + scan->rs_ctup.t_tableOid = scan->rs_scan.rs_rd->rd_id; + ItemPointerSet(&scan->rs_ctup.t_self, scan->rs_cblock, targoffset); + + pgstat_count_heap_fetch(scan->rs_scan.rs_rd); + + /* + * Set up the result slot to point to this tuple. Note that the + * slot acquires a pin on the buffer. + */ + ExecStoreBufferHeapTuple(&scan->rs_ctup, + slot, + scan->rs_cbuf); + + scan->rs_cindex++; + + return true; +} + +/* + * Check visibility of the tuple. + */ +static bool +SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer, + HeapTuple tuple, + OffsetNumber tupoffset) +{ + if (scan->rs_scan.rs_pageatatime) + { + /* + * In pageatatime mode, heapgetpage() already did visibility checks, + * so just look at the info it left in rs_vistuples[]. + * + * We use a binary search over the known-sorted array. Note: we could + * save some effort if we insisted that NextSampleTuple select tuples + * in increasing order, but it's not clear that there would be enough + * gain to justify the restriction. + */ + int start = 0, + end = scan->rs_ntuples - 1; + + while (start <= end) + { + int mid = (start + end) / 2; + OffsetNumber curoffset = scan->rs_vistuples[mid]; + + if (tupoffset == curoffset) + return true; + else if (tupoffset < curoffset) + end = mid - 1; + else + start = mid + 1; + } + + return false; + } + else + { + /* Otherwise, we have to check the tuple individually. */ + return HeapTupleSatisfies(tuple, scan->rs_scan.rs_snapshot, buffer); + } +} + +static bool +heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + TsmRoutine *tsm = scanstate->tsmroutine; + BlockNumber blockno; + + /* return false immediately if relation is empty */ + if (scan->rs_scan.rs_nblocks == 0) + return false; + + if (tsm->NextSampleBlock) + { + blockno = tsm->NextSampleBlock(scanstate, scan->rs_scan.rs_nblocks); + scan->rs_cblock = blockno; + } + else + { + /* scanning table sequentially */ + + if (scan->rs_cblock == InvalidBlockNumber) + { + Assert(!scan->rs_inited); + blockno = scan->rs_scan.rs_startblock; + } + else + { + Assert(scan->rs_inited); + + blockno = scan->rs_cblock + 1; + + if (blockno >= scan->rs_scan.rs_nblocks) + { + /* wrap to begining of rel, might not have started at 0 */ + blockno = 0; + } + + /* + * Report our new scan position for synchronization purposes. + * + * Note: we do this before checking for end of scan so that the + * final state of the position hint is back at the start of the + * rel. That's not strictly necessary, but otherwise when you run + * the same query multiple times the starting position would shift + * a little bit backwards on every invocation, which is confusing. + * We don't guarantee any specific ordering in general, though. + */ + if (scan->rs_scan.rs_syncscan) + ss_report_location(scan->rs_scan.rs_rd, blockno); + + if (blockno == scan->rs_scan.rs_startblock) + { + blockno = InvalidBlockNumber; + } + } + } + + if (!BlockNumberIsValid(blockno)) + { + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + scan->rs_inited = false; + + return false; + } + + heapgetpage(sscan, blockno); + scan->rs_inited = true; + + return true; +} + +static bool +heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + TsmRoutine *tsm = scanstate->tsmroutine; + BlockNumber blockno = scan->rs_cblock; + bool pagemode = scan->rs_scan.rs_pageatatime; + + Page page; + bool all_visible; + OffsetNumber maxoffset; + + ExecClearTuple(slot); + + /* + * When not using pagemode, we must lock the buffer during tuple + * visibility checks. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + page = (Page) BufferGetPage(scan->rs_cbuf); + all_visible = PageIsAllVisible(page) && !scan->rs_scan.rs_snapshot->takenDuringRecovery; + maxoffset = PageGetMaxOffsetNumber(page); + + for (;;) + { + OffsetNumber tupoffset; + + CHECK_FOR_INTERRUPTS(); + + /* Ask the tablesample method which tuples to check on this page. */ + tupoffset = tsm->NextSampleTuple(scanstate, + blockno, + maxoffset); + + if (OffsetNumberIsValid(tupoffset)) + { + ItemId itemid; + bool visible; + HeapTuple tuple = &(scan->rs_ctup); + + /* Skip invalid tuple pointers. */ + itemid = PageGetItemId(page, tupoffset); + if (!ItemIdIsNormal(itemid)) + continue; + + tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple->t_len = ItemIdGetLength(itemid); + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + + + if (all_visible) + visible = true; + else + visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset); + + /* in pagemode, heapgetpage did this for us */ + if (!pagemode) + CheckForSerializableConflictOut(visible, scan->rs_scan.rs_rd, tuple, + scan->rs_cbuf, scan->rs_scan.rs_snapshot); + + /* Try next tuple from same page. */ + if (!visible) + continue; + + ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf); + + /* Found visible tuple, return it. */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + /* Count successfully-fetched tuples as heap fetches */ + pgstat_count_heap_getnext(scan->rs_scan.rs_rd); + + return true; + } + else + { + /* + * If we get here, it means we've exhausted the items on this page and + * it's time to move to the next. + */ + if (!pagemode) + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + break; + } + } + + return false; +} + +static void +heapam_scan_analyze_next_block(TableScanDesc sscan, BlockNumber blockno, BufferAccessStrategy bstrategy) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + + /* + * We must maintain a pin on the target page's buffer to ensure that + * the maxoffset value stays good (else concurrent VACUUM might delete + * tuples out from under us). Hence, pin the page until we are done + * looking at it. We also choose to hold sharelock on the buffer + * throughout --- we could release and re-acquire sharelock for each + * tuple, but since we aren't doing much work per tuple, the extra + * lock traffic is probably better avoided. + */ + scan->rs_cblock = blockno; + scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, blockno, + RBM_NORMAL, bstrategy); + scan->rs_cindex = FirstOffsetNumber; + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); +} + +static bool +heapam_scan_analyze_next_tuple(TableScanDesc sscan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) +{ + HeapScanDesc scan = (HeapScanDesc) sscan; + Page targpage; + OffsetNumber maxoffset; + BufferHeapTupleTableSlot *hslot; + + Assert(TTS_IS_BUFFERTUPLE(slot)); + + hslot = (BufferHeapTupleTableSlot *) slot; + targpage = BufferGetPage(scan->rs_cbuf); + maxoffset = PageGetMaxOffsetNumber(targpage); + + /* Inner loop over all tuples on the selected page */ + for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++) + { + ItemId itemid; + HeapTuple targtuple = &hslot->base.tupdata; + bool sample_it = false; + + itemid = PageGetItemId(targpage, scan->rs_cindex); + + /* + * We ignore unused and redirect line pointers. DEAD line + * pointers should be counted as dead, because we need vacuum to + * run to get rid of them. Note that this rule agrees with the + * way that heap_page_prune() counts things. + */ + if (!ItemIdIsNormal(itemid)) + { + if (ItemIdIsDead(itemid)) + *deadrows += 1; + continue; + } + + ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex); + + targtuple->t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd); + targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); + targtuple->t_len = ItemIdGetLength(itemid); + + switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, scan->rs_cbuf)) + { + case HEAPTUPLE_LIVE: + sample_it = true; + *liverows += 1; + break; + + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: + /* Count dead and recently-dead rows */ + *deadrows += 1; + break; + + case HEAPTUPLE_INSERT_IN_PROGRESS: + + /* + * Insert-in-progress rows are not counted. We assume + * that when the inserting transaction commits or aborts, + * it will send a stats message to increment the proper + * count. This works right only if that transaction ends + * after we finish analyzing the table; if things happen + * in the other order, its stats update will be + * overwritten by ours. However, the error will be large + * only if the other transaction runs long enough to + * insert many tuples, so assuming it will finish after us + * is the safer option. + * + * A special case is that the inserting transaction might + * be our own. In this case we should count and sample + * the row, to accommodate users who load a table and + * analyze it in one transaction. (pgstat_report_analyze + * has to adjust the numbers we send to the stats + * collector to make this come out right.) + */ + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data))) + { + sample_it = true; + *liverows += 1; + } + break; + + case HEAPTUPLE_DELETE_IN_PROGRESS: + + /* + * We count delete-in-progress rows as still live, using + * the same reasoning given above; but we don't bother to + * include them in the sample. + * + * If the delete was done by our own transaction, however, + * we must count the row as dead to make + * pgstat_report_analyze's stats adjustments come out + * right. (Note: this works out properly when the row was + * both inserted and deleted in our xact.) + */ + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data))) + *deadrows += 1; + else + *liverows += 1; + break; + + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + + if (sample_it) + { + ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf); + scan->rs_cindex++; + + /* note that we leave the buffer locked here! */ + return true; + } + } + + /* Now release the lock and pin on the page */ + UnlockReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + + return false; +} + +/* + * Reconstruct and rewrite the given tuple + * + * We cannot simply copy the tuple as-is, for several reasons: + * + * 1. We'd like to squeeze out the values of any dropped columns, both + * to save space and to ensure we have no corner-case failures. (It's + * possible for example that the new table hasn't got a TOAST table + * and so is unable to store any large values of dropped cols.) + * + * 2. The tuple might not even be legal for the new table; this is + * currently only known to happen as an after-effect of ALTER TABLE + * SET WITHOUT OIDS. + * + * So, we must reconstruct the tuple from component Datums. + */ +static void +reform_and_rewrite_tuple(HeapTuple tuple, + Relation OldHeap, Relation NewHeap, + Datum *values, bool *isnull, RewriteState rwstate) +{ + TupleDesc oldTupDesc = RelationGetDescr(OldHeap); + TupleDesc newTupDesc = RelationGetDescr(NewHeap); + HeapTuple copiedTuple; + int i; + + heap_deform_tuple(tuple, oldTupDesc, values, isnull); + + /* Be sure to null out any dropped columns */ + for (i = 0; i < newTupDesc->natts; i++) + { + if (TupleDescAttr(newTupDesc, i)->attisdropped) + isnull[i] = true; + } + + copiedTuple = heap_form_tuple(newTupDesc, values, isnull); + + /* The heap rewrite module does the rest */ + rewrite_heap_tuple(rwstate, tuple, copiedTuple); + + heap_freetuple(copiedTuple); +} + +static void +heap_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, + bool use_sort, + TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, + double *num_tuples, double *tups_vacuumed, double *tups_recently_dead) +{ + RewriteState rwstate; + IndexScanDesc indexScan; + TableScanDesc heapScan; + bool use_wal; + bool is_system_catalog; + Tuplesortstate *tuplesort; + TupleDesc oldTupDesc = RelationGetDescr(OldHeap); + TupleDesc newTupDesc = RelationGetDescr(NewHeap); + TupleTableSlot *slot; + int natts; + Datum *values; + bool *isnull; + BufferHeapTupleTableSlot *hslot; + + /* Remember if it's a system catalog */ + is_system_catalog = IsSystemRelation(OldHeap); + + /* + * We need to log the copied data in WAL iff WAL archiving/streaming is + * enabled AND it's a WAL-logged rel. + */ + use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap); + + /* use_wal off requires smgr_targblock be initially invalid */ + Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber); + + /* Preallocate values/isnull arrays */ + natts = newTupDesc->natts; + values = (Datum *) palloc(natts * sizeof(Datum)); + isnull = (bool *) palloc(natts * sizeof(bool)); + + /* Initialize the rewrite operation */ + rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid, + MultiXactCutoff, use_wal); + + + /* Set up sorting if wanted */ + if (use_sort) + tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, + maintenance_work_mem, + NULL, false); + else + tuplesort = NULL; + + /* + * Prepare to scan the OldHeap. To ensure we see recently-dead tuples + * that still need to be copied, we scan with SnapshotAny and use + * HeapTupleSatisfiesVacuum for the visibility test. + */ + if (OldIndex != NULL && !use_sort) + { + heapScan = NULL; + indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); + index_rescan(indexScan, NULL, 0, NULL, 0); + } + else + { + heapScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); + indexScan = NULL; + } + + slot = table_gimmegimmeslot(OldHeap, NULL); + hslot = (BufferHeapTupleTableSlot *) slot; + + /* + * Scan through the OldHeap, either in OldIndex order or sequentially; + * copy each tuple into the NewHeap, or transiently to the tuplesort + * module. Note that we don't bother sorting dead tuples (they won't get + * to the new table anyway). + */ + for (;;) + { + bool isdead; + TransactionId xid; + + CHECK_FOR_INTERRUPTS(); + + if (indexScan != NULL) + { + if (!index_getnext_slot(indexScan, ForwardScanDirection, slot)) + break; + + /* Since we used no scan keys, should never need to recheck */ + if (indexScan->xs_recheck) + elog(ERROR, "CLUSTER does not support lossy index conditions"); + } + else + { + if (!table_scan_getnextslot(heapScan, ForwardScanDirection, slot)) + break; + } + + LockBuffer(hslot->buffer, BUFFER_LOCK_SHARE); + + switch (HeapTupleSatisfiesVacuum(hslot->base.tuple, OldestXmin, hslot->buffer)) + { + case HEAPTUPLE_DEAD: + /* Definitely dead */ + isdead = true; + break; + case HEAPTUPLE_RECENTLY_DEAD: + *tups_recently_dead += 1; + /* fall through */ + case HEAPTUPLE_LIVE: + /* Live or recently dead, must copy it */ + isdead = false; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + + /* + * Since we hold exclusive lock on the relation, normally the + * only way to see this is if it was inserted earlier in our + * own transaction. However, it can happen in system + * catalogs, since we tend to release write lock before commit + * there. Give a warning if neither case applies; but in any + * case we had better copy it. + */ + xid = HeapTupleHeaderGetXmin(hslot->base.tuple->t_data); + if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid)) + elog(WARNING, "concurrent insert in progress within table \"%s\"", + RelationGetRelationName(OldHeap)); + /* treat as live */ + isdead = false; + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: + + /* + * Similar situation to INSERT_IN_PROGRESS case. + */ + xid = HeapTupleHeaderGetUpdateXid(hslot->base.tuple->t_data); + if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid)) + elog(WARNING, "concurrent delete in progress within table \"%s\"", + RelationGetRelationName(OldHeap)); + /* treat as recently dead */ + *tups_recently_dead += 1; + isdead = false; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + isdead = false; /* keep compiler quiet */ + break; + } + + LockBuffer(hslot->buffer, BUFFER_LOCK_UNLOCK); + + if (isdead) + { + *tups_vacuumed += 1; + /* heap rewrite module still needs to see it... */ + if (rewrite_heap_dead_tuple(rwstate, ExecFetchSlotHeapTuple(slot, false, NULL))) + { + /* A previous recently-dead tuple is now known dead */ + *tups_vacuumed += 1; + *tups_recently_dead -= 1; + } + continue; + } + + *num_tuples += 1; + if (tuplesort != NULL) + tuplesort_puttupleslot(tuplesort, slot); + else + reform_and_rewrite_tuple(ExecFetchSlotHeapTuple(slot, false, NULL), + OldHeap, NewHeap, + values, isnull, rwstate); + } + + if (indexScan != NULL) + index_endscan(indexScan); + if (heapScan != NULL) + table_endscan(heapScan); + + ExecDropSingleTupleTableSlot(slot); + + /* + * In scan-and-sort mode, complete the sort, then read out all live tuples + * from the tuplestore and write them to the new relation. + */ + if (tuplesort != NULL) + { + tuplesort_performsort(tuplesort); + + for (;;) + { + HeapTuple tuple; + + CHECK_FOR_INTERRUPTS(); + + tuple = tuplesort_getheaptuple(tuplesort, true); + if (tuple == NULL) + break; + + reform_and_rewrite_tuple(tuple, + OldHeap, NewHeap, + values, isnull, rwstate); + } + + tuplesort_end(tuplesort); + } + + /* Write out any remaining tuples, and fsync if needed */ + end_heap_rewrite(rwstate); + + /* Clean up */ + pfree(values); + pfree(isnull); +} + +static const TableAmRoutine heapam_methods = { + .type = T_TableAmRoutine, + + .slot_callbacks = heapam_slot_callbacks, + + .snapshot_satisfies = heapam_satisfies, + + .scan_begin = heap_beginscan, + .scansetlimits = heap_setscanlimits, + .scan_getnextslot = heap_getnextslot, + .scan_end = heap_endscan, + .scan_rescan = heap_rescan, + .scan_update_snapshot = heap_update_snapshot, + + .scan_bitmap_pagescan = heapam_scan_bitmap_pagescan, + .scan_bitmap_pagescan_next = heapam_scan_bitmap_pagescan_next, + + .scan_sample_next_block = heapam_scan_sample_next_block, + .scan_sample_next_tuple = heapam_scan_sample_next_tuple, + + .tuple_fetch_row_version = heapam_fetch_row_version, + .tuple_fetch_follow = heapam_fetch_follow, + .tuple_insert = heapam_heap_insert, + .tuple_insert_speculative = heapam_heap_insert_speculative, + .tuple_complete_speculative = heapam_heap_complete_speculative, + .tuple_delete = heapam_heap_delete, + .tuple_update = heapam_heap_update, + .tuple_lock = heapam_lock_tuple, + .multi_insert = heap_multi_insert, + + .tuple_get_latest_tid = heap_get_latest_tid, + + .relation_vacuum = heap_vacuum_rel, + .scan_analyze_next_block = heapam_scan_analyze_next_block, + .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple, + .relation_copy_for_cluster = heap_copy_for_cluster, + .relation_sync = heap_sync, + + .begin_index_fetch = heapam_begin_index_fetch, + .reset_index_fetch = heapam_reset_index_fetch, + .end_index_fetch = heapam_end_index_fetch, + + .index_build_range_scan = IndexBuildHeapRangeScan, + + .index_validate_scan = validate_index_heapscan +}; + +const TableAmRoutine * +GetHeapamTableAmRoutine(void) +{ + return &heapam_methods; +} + +Datum +heap_tableam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&heapam_methods); +} diff --git a/src/backend/utils/time/tqual.c b/src/backend/access/heap/heapam_visibility.c similarity index 96% rename from src/backend/utils/time/tqual.c rename to src/backend/access/heap/heapam_visibility.c index f7c4c9188c..1ac1a20c1d 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -1,7 +1,6 @@ /*------------------------------------------------------------------------- * - * tqual.c - * POSTGRES "time qualification" code, ie, tuple visibility rules. + * POSTGRES "time qualification" code, ie, tuple visibility rules. * * NOTE: all the HeapTupleSatisfies routines will update the tuple's * "hint" status bits if we see that the inserting or deleting transaction @@ -56,13 +55,14 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * src/backend/utils/time/tqual.c + * src/backend/access/heap/heapam_visibilty.c * *------------------------------------------------------------------------- */ #include "postgres.h" +#include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/subtrans.h" @@ -76,11 +76,9 @@ #include "utils/snapmgr.h" #include "utils/tqual.h" - /* Static variables representing various special snapshot semantics */ -SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf}; -SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny}; - +SnapshotData SnapshotSelfData = {SELF_VISIBILITY}; +SnapshotData SnapshotAnyData = {ANY_VISIBILITY}; /* * SetHintBits() @@ -117,6 +115,9 @@ static inline void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid) { + if (!BufferIsValid(buffer)) + return; + if (TransactionIdIsValid(xid)) { /* NB: xid must be known committed here! */ @@ -172,7 +173,7 @@ HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, * (Xmax != my-transaction && the row was deleted by another transaction * Xmax is not committed))) that has not been committed */ -bool +static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; @@ -342,7 +343,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) * HeapTupleSatisfiesAny * Dummy "satisfies" routine: any tuple satisfies SnapshotAny. */ -bool +static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer) { return true; @@ -362,7 +363,7 @@ HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer) * Among other things, this means you can't do UPDATEs of rows in a TOAST * table. */ -bool +static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer) { @@ -612,7 +613,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return HeapTupleMayBeUpdated; - return HeapTupleUpdated; /* updated by other */ + /* updated by other */ + if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) + return HeapTupleDeleted; + else + return HeapTupleUpdated; } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) @@ -653,7 +658,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, return HeapTupleBeingUpdated; if (TransactionIdDidCommit(xmax)) - return HeapTupleUpdated; + { + if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) + return HeapTupleDeleted; + else + return HeapTupleUpdated; + } /* * By here, the update in the Xmax is either aborted or crashed, but @@ -709,7 +719,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetRawXmax(tuple)); - return HeapTupleUpdated; /* updated by other */ + + /* updated by other */ + if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) + return HeapTupleDeleted; + else + return HeapTupleUpdated; } /* @@ -735,7 +750,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * on the insertion without aborting the whole transaction, the associated * token is also returned in snapshot->speculativeToken. */ -bool +static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer) { @@ -959,7 +974,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, * inserting/deleting transaction was still running --- which was more cycles * and more contention on the PGXACT array. */ -bool +static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { @@ -1161,9 +1176,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, * even if we see that the deleting transaction has committed. */ HTSV_Result -HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, +HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin, Buffer buffer) { + HeapTuple htup = (HeapTuple) stup; HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); @@ -1383,18 +1399,17 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, return HEAPTUPLE_DEAD; } - /* * HeapTupleSatisfiesNonVacuumable * - * True if tuple might be visible to some transaction; false if it's - * surely dead to everyone, ie, vacuumable. + * True if tuple might be visible to some transaction; false if it's + * surely dead to everyone, ie, vacuumable. * - * This is an interface to HeapTupleSatisfiesVacuum that meets the - * SnapshotSatisfiesFunc API, so it can be used through a Snapshot. - * snapshot->xmin must have been set up with the xmin horizon to use. + * This is an interface to HeapTupleSatisfiesVacuum that meets the + * SnapshotSatisfiesFunc API, so it can be used through a Snapshot. + * snapshot->xmin must have been set up with the xmin horizon to use. */ -bool +static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer) { @@ -1402,65 +1417,59 @@ HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, != HEAPTUPLE_DEAD; } - /* - * HeapTupleIsSurelyDead + * Is the tuple really only locked? That is, is it not updated? * - * Cheaply determine whether a tuple is surely dead to all onlookers. - * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the - * tuple has just been tested by another visibility routine (usually - * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set - * should already be set. We assume that if no hint bits are set, the xmin - * or xmax transaction is still running. This is therefore faster than - * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG. - * It's okay to return false when in doubt, but we must return true only - * if the tuple is removable. + * It's easy to check just infomask bits if the locker is not a multi; but + * otherwise we need to verify that the updating transaction has not aborted. + * + * This function is here because it follows the same time qualification rules + * laid out at the top of this file. */ bool -HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) +HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) { - HeapTupleHeader tuple = htup->t_data; + TransactionId xmax; - Assert(ItemPointerIsValid(&htup->t_self)); - Assert(htup->t_tableOid != InvalidOid); + /* if there's no valid Xmax, then there's obviously no update either */ + if (tuple->t_infomask & HEAP_XMAX_INVALID) + return true; - /* - * If the inserting transaction is marked invalid, then it aborted, and - * the tuple is definitely dead. If it's marked neither committed nor - * invalid, then we assume it's still alive (since the presumption is that - * all relevant hint bits were just set moments ago). - */ - if (!HeapTupleHeaderXminCommitted(tuple)) - return HeapTupleHeaderXminInvalid(tuple) ? true : false; + if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY) + return true; - /* - * If the inserting transaction committed, but any deleting transaction - * aborted, the tuple is still alive. - */ - if (tuple->t_infomask & HEAP_XMAX_INVALID) - return false; + /* invalid xmax means no update */ + if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + return true; /* - * If the XMAX is just a lock, the tuple is still alive. + * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must + * necessarily have been updated */ - if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) + if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)) return false; - /* - * If the Xmax is a MultiXact, it might be dead or alive, but we cannot - * know without checking pg_multixact. - */ - if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) - return false; + /* ... but if it's a multi, then perhaps the updating Xid aborted. */ + xmax = HeapTupleGetUpdateXid(tuple); - /* If deleter isn't known to have committed, assume it's still running. */ - if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) + /* not LOCKED_ONLY, so it has to have an xmax */ + Assert(TransactionIdIsValid(xmax)); + + if (TransactionIdIsCurrentTransactionId(xmax)) + return false; + if (TransactionIdIsInProgress(xmax)) + return false; + if (TransactionIdDidCommit(xmax)) return false; - /* Deleter committed, so tuple is dead if the XID is old enough. */ - return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin); + /* + * not current, not in progress, not committed -- must have aborted or + * crashed + */ + return true; } + /* * XidInMVCCSnapshot * Is the given XID still-in-progress according to the snapshot? @@ -1584,55 +1593,61 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) } /* - * Is the tuple really only locked? That is, is it not updated? - * - * It's easy to check just infomask bits if the locker is not a multi; but - * otherwise we need to verify that the updating transaction has not aborted. + * HeapTupleIsSurelyDead * - * This function is here because it follows the same time qualification rules - * laid out at the top of this file. + * Cheaply determine whether a tuple is surely dead to all onlookers. + * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the + * tuple has just been tested by another visibility routine (usually + * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set + * should already be set. We assume that if no hint bits are set, the xmin + * or xmax transaction is still running. This is therefore faster than + * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG. + * It's okay to return false when in doubt, but we must return TRUE only + * if the tuple is removable. */ bool -HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) +HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) { - TransactionId xmax; - - /* if there's no valid Xmax, then there's obviously no update either */ - if (tuple->t_infomask & HEAP_XMAX_INVALID) - return true; + HeapTupleHeader tuple = htup->t_data; - if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY) - return true; + Assert(ItemPointerIsValid(&htup->t_self)); + Assert(htup->t_tableOid != InvalidOid); - /* invalid xmax means no update */ - if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) - return true; + /* + * If the inserting transaction is marked invalid, then it aborted, and + * the tuple is definitely dead. If it's marked neither committed nor + * invalid, then we assume it's still alive (since the presumption is that + * all relevant hint bits were just set moments ago). + */ + if (!HeapTupleHeaderXminCommitted(tuple)) + return HeapTupleHeaderXminInvalid(tuple) ? true : false; /* - * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must - * necessarily have been updated + * If the inserting transaction committed, but any deleting transaction + * aborted, the tuple is still alive. */ - if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)) + if (tuple->t_infomask & HEAP_XMAX_INVALID) return false; - /* ... but if it's a multi, then perhaps the updating Xid aborted. */ - xmax = HeapTupleGetUpdateXid(tuple); - - /* not LOCKED_ONLY, so it has to have an xmax */ - Assert(TransactionIdIsValid(xmax)); - - if (TransactionIdIsCurrentTransactionId(xmax)) - return false; - if (TransactionIdIsInProgress(xmax)) - return false; - if (TransactionIdDidCommit(xmax)) + /* + * If the XMAX is just a lock, the tuple is still alive. + */ + if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return false; /* - * not current, not in progress, not committed -- must have aborted or - * crashed + * If the Xmax is a MultiXact, it might be dead or alive, but we cannot + * know without checking pg_multixact. */ - return true; + if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) + return false; + + /* If deleter isn't known to have committed, assume it's still running. */ + if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) + return false; + + /* Deleter committed, so tuple is dead if the XID is old enough. */ + return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin); } /* @@ -1659,7 +1674,7 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num) * dangerous to do so as the semantics of doing so during timetravel are more * complicated than when dealing "only" with the present. */ -bool +static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { @@ -1796,3 +1811,34 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, else return true; } + +bool +HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer) +{ + switch (snapshot->visibility_type) + { + case MVCC_VISIBILITY: + return HeapTupleSatisfiesMVCC(stup, snapshot, buffer); + break; + case SELF_VISIBILITY: + return HeapTupleSatisfiesSelf(stup, snapshot, buffer); + break; + case ANY_VISIBILITY: + return HeapTupleSatisfiesAny(stup, snapshot, buffer); + break; + case TOAST_VISIBILITY: + return HeapTupleSatisfiesToast(stup, snapshot, buffer); + break; + case DIRTY_VISIBILITY: + return HeapTupleSatisfiesDirty(stup, snapshot, buffer); + break; + case HISTORIC_MVCC_VISIBILITY: + return HeapTupleSatisfiesHistoricMVCC(stup, snapshot, buffer); + break; + case NON_VACUUMABLE_VISIBILTY: + return HeapTupleSatisfiesNonVacuumable(stup, snapshot, buffer); + break; + } + + return false; /* keep compiler quiet */ +} diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 44caeca336..327e277422 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -110,6 +110,7 @@ #include "access/heapam.h" #include "access/heapam_xlog.h" #include "access/rewriteheap.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/tuptoaster.h" #include "access/xact.h" @@ -126,13 +127,13 @@ #include "storage/bufmgr.h" #include "storage/fd.h" +#include "storage/procarray.h" #include "storage/smgr.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" -#include "storage/procarray.h" /* * State associated with a rewrite operation. This is opaque to the user @@ -357,7 +358,7 @@ end_heap_rewrite(RewriteState state) * wrote before the checkpoint. */ if (RelationNeedsWAL(state->rs_new_rel)) - heap_sync(state->rs_new_rel); + table_sync(state->rs_new_rel); logical_end_heap_rewrite(state); diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index d1dad998d2..486cde4aff 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -32,6 +32,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/tableam.h" #include "access/tuptoaster.h" #include "access/xact.h" #include "catalog/catalog.h" diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 9d08775687..5f033c5ee4 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "catalog/index.h" #include "lib/stringinfo.h" @@ -82,6 +83,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); scan->heapRelation = NULL; /* may be set later */ + scan->xs_heapfetch = NULL; scan->indexRelation = indexRelation; scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */ scan->numberOfKeys = nkeys; @@ -122,11 +124,6 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_hitup = NULL; scan->xs_hitupdesc = NULL; - ItemPointerSetInvalid(&scan->xs_ctup.t_self); - scan->xs_ctup.t_data = NULL; - scan->xs_cbuf = InvalidBuffer; - scan->xs_continue_hot = false; - return scan; } @@ -334,6 +331,7 @@ systable_beginscan(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = irel; + sysscan->slot = table_gimmegimmeslot(heapRelation, NULL); if (snapshot == NULL) { @@ -383,9 +381,9 @@ systable_beginscan(Relation heapRelation, * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ - sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, - nkeys, key, - true, false); + sysscan->scan = table_beginscan_strat(heapRelation, snapshot, + nkeys, key, + true, false); sysscan->iscan = NULL; } @@ -400,15 +398,18 @@ systable_beginscan(Relation heapRelation, * Note that returned tuple is a reference to data in a disk buffer; * it must not be modified, and should be presumed inaccessible after * next getnext() or endscan() call. + * + * FIXME: Change to be slot based. */ HeapTuple systable_getnext(SysScanDesc sysscan) { - HeapTuple htup; + HeapTuple htup = NULL; if (sysscan->irel) { - htup = index_getnext(sysscan->iscan, ForwardScanDirection); + if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot)) + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL); /* * We currently don't need to support lossy index operators for any @@ -421,7 +422,7 @@ systable_getnext(SysScanDesc sysscan) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); } else - htup = heap_getnext(sysscan->scan, ForwardScanDirection); + htup = heap_scan_getnext(sysscan->scan, ForwardScanDirection); return htup; } @@ -454,26 +455,26 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) if (sysscan->irel) { - IndexScanDesc scan = sysscan->iscan; + IndexFetchHeapData *hscan = (IndexFetchHeapData *) sysscan->iscan->xs_heapfetch; - Assert(IsMVCCSnapshot(scan->xs_snapshot)); - Assert(tup == &scan->xs_ctup); - Assert(BufferIsValid(scan->xs_cbuf)); + Assert(IsMVCCSnapshot(sysscan->iscan->xs_snapshot)); + //Assert(tup == &hscan->xs_ctup); replace by peeking into slot? + Assert(BufferIsValid(hscan->xs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE); + result = HeapTupleSatisfies(tup, freshsnap, hscan->xs_cbuf); + LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK); } else { - HeapScanDesc scan = sysscan->scan; + HeapScanDesc scan = (HeapScanDesc) sysscan->scan; - Assert(IsMVCCSnapshot(scan->rs_snapshot)); - Assert(tup == &scan->rs_ctup); + Assert(IsMVCCSnapshot(scan->rs_scan.rs_snapshot)); + /* hari Assert(tup == &scan->rs_ctup); */ Assert(BufferIsValid(scan->rs_cbuf)); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf); + result = HeapTupleSatisfies(tup, freshsnap, scan->rs_cbuf); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); } return result; @@ -487,13 +488,19 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) void systable_endscan(SysScanDesc sysscan) { + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + if (sysscan->irel) { index_endscan(sysscan->iscan); index_close(sysscan->irel, AccessShareLock); } else - heap_endscan(sysscan->scan); + table_endscan(sysscan->scan); if (sysscan->snapshot) UnregisterSnapshot(sysscan->snapshot); @@ -540,6 +547,7 @@ systable_beginscan_ordered(Relation heapRelation, sysscan->heap_rel = heapRelation; sysscan->irel = indexRelation; + sysscan->slot = table_gimmegimmeslot(heapRelation, NULL); if (snapshot == NULL) { @@ -585,10 +593,12 @@ systable_beginscan_ordered(Relation heapRelation, HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) { - HeapTuple htup; + HeapTuple htup = NULL; Assert(sysscan->irel); - htup = index_getnext(sysscan->iscan, direction); + if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot)) + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL); + /* See notes in systable_getnext */ if (htup && sysscan->iscan->xs_recheck) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); @@ -602,6 +612,12 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) void systable_endscan_ordered(SysScanDesc sysscan) { + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + Assert(sysscan->irel); index_endscan(sysscan->iscan); if (sysscan->snapshot) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index eade540ef5..fe5af31f87 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -71,6 +71,7 @@ #include "access/amapi.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xlog.h" #include "catalog/index.h" @@ -235,6 +236,8 @@ index_beginscan(Relation heapRelation, scan->heapRelation = heapRelation; scan->xs_snapshot = snapshot; + scan->xs_heapfetch = table_begin_index_fetch_table(heapRelation); + return scan; } @@ -318,16 +321,12 @@ index_rescan(IndexScanDesc scan, Assert(nkeys == scan->numberOfKeys); Assert(norderbys == scan->numberOfOrderBys); - /* Release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) - { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; - } - - scan->xs_continue_hot = false; + /* Release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys, orderbys, norderbys); @@ -343,11 +342,11 @@ index_endscan(IndexScanDesc scan) SCAN_CHECKS; CHECK_SCAN_PROCEDURE(amendscan); - /* Release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) + /* Release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; + table_end_index_fetch_table(scan->xs_heapfetch); + scan->xs_heapfetch = NULL; } /* End the AM's scan */ @@ -380,7 +379,7 @@ index_markpos(IndexScanDesc scan) * index_restrpos - restore a scan position * * NOTE: this only restores the internal scan state of the index AM. - * The current result tuple (scan->xs_ctup) doesn't change. See comments + * The current result tuple (scan->xs_ctup) doesn't change FIXME. See comments * for ExecRestrPos(). * * NOTE: in the presence of HOT chains, mark/restore only works correctly @@ -400,9 +399,12 @@ index_restrpos(IndexScanDesc scan) SCAN_CHECKS; CHECK_SCAN_PROCEDURE(amrestrpos); - scan->xs_continue_hot = false; + /* release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; scan->indexRelation->rd_amroutine->amrestrpos(scan); } @@ -483,6 +485,9 @@ index_parallelrescan(IndexScanDesc scan) { SCAN_CHECKS; + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); + /* amparallelrescan is optional; assume no-op if not provided by AM */ if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL) scan->indexRelation->rd_amroutine->amparallelrescan(scan); @@ -513,6 +518,8 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, scan->heapRelation = heaprel; scan->xs_snapshot = snapshot; + scan->xs_heapfetch = table_begin_index_fetch_table(heaprel); + return scan; } @@ -535,7 +542,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* * The AM's amgettuple proc finds the next index entry matching the scan - * keys, and puts the TID into scan->xs_ctup.t_self. It should also set + * keys, and puts the TID into scan->xs_heaptid. It should also set * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we * pay no attention to those fields here. */ @@ -543,23 +550,23 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; + scan->xs_heap_continue = false; /* If we're out of index entries, we're done */ if (!found) { - /* ... but first, release any held pin on a heap page */ - if (BufferIsValid(scan->xs_cbuf)) - { - ReleaseBuffer(scan->xs_cbuf); - scan->xs_cbuf = InvalidBuffer; - } + /* release resources (like buffer pins) for heap accesses */ + if (scan->xs_heapfetch) + table_reset_index_fetch_table(scan->xs_heapfetch); + return NULL; } + Assert(ItemPointerIsValid(&scan->xs_heaptid)); pgstat_count_index_tuples(scan->indexRelation, 1); /* Return the TID of the tuple we found. */ - return &scan->xs_ctup.t_self; + return &scan->xs_heaptid; } /* ---------------- @@ -580,53 +587,17 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * enough information to do it efficiently in the general case. * ---------------- */ -HeapTuple -index_fetch_heap(IndexScanDesc scan) +bool +index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) { - ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; - bool got_heap_tuple; - - /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ - if (!scan->xs_continue_hot) - { - /* Switch to correct buffer if we don't have it already */ - Buffer prev_buf = scan->xs_cbuf; - - scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf, - scan->heapRelation, - ItemPointerGetBlockNumber(tid)); + bool found; - /* - * Prune page, but only if we weren't already on this page - */ - if (prev_buf != scan->xs_cbuf) - heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); - } + found = table_fetch_follow(scan->xs_heapfetch, &scan->xs_heaptid, scan->xs_snapshot, + slot, &scan->xs_heap_continue, &all_dead); - /* Obtain share-lock on the buffer so we can examine visibility */ - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); - got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation, - scan->xs_cbuf, - scan->xs_snapshot, - &scan->xs_ctup, - &all_dead, - !scan->xs_continue_hot); - LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); - - if (got_heap_tuple) - { - /* - * Only in a non-MVCC snapshot can more than one member of the HOT - * chain be visible. - */ - scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot); + if (found) pgstat_count_heap_fetch(scan->indexRelation); - return &scan->xs_ctup; - } - - /* We've reached the end of the HOT chain. */ - scan->xs_continue_hot = false; /* * If we scanned a whole HOT chain and found only dead tuples, tell index @@ -638,50 +609,41 @@ index_fetch_heap(IndexScanDesc scan) if (!scan->xactStartedInRecovery) scan->kill_prior_tuple = all_dead; - return NULL; + return found; } /* ---------------- - * index_getnext - get the next heap tuple from a scan + * index_getnext_slot - get the next tuple from a scan * - * The result is the next heap tuple satisfying the scan keys and the - * snapshot, or NULL if no more matching tuples exist. + * The result is true if a tuple satisfying the scan keys and the snapshot was + * found, false otherwise. The tuple is stored in the specified slot. * * On success, the buffer containing the heap tup is pinned (the pin will be * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan - * call). + * call). XXX * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ -HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +bool +index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot) { - HeapTuple heapTuple; - ItemPointer tid; - for (;;) { - if (scan->xs_continue_hot) - { - /* - * We are resuming scan of a HOT chain after having returned an - * earlier member. Must still hold pin on current heap page. - */ - Assert(BufferIsValid(scan->xs_cbuf)); - Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) == - BufferGetBlockNumber(scan->xs_cbuf)); - } - else + if (!scan->xs_heap_continue) { + ItemPointer tid; + /* Time to fetch the next TID from the index */ tid = index_getnext_tid(scan, direction); /* If we're out of index entries, we're done */ if (tid == NULL) break; + + Assert(ItemPointerEquals(tid, &scan->xs_heaptid)); } /* @@ -689,14 +651,15 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * If we don't find anything, loop around and grab the next TID from * the index. */ - heapTuple = index_fetch_heap(scan); - if (heapTuple != NULL) - return heapTuple; + Assert(ItemPointerIsValid(&scan->xs_heaptid)); + if (index_fetch_heap(scan, slot)) + return true; } - return NULL; /* failure exit */ + return false; } + /* ---------------- * index_getbitmap - get all tuples at once from an index scan * diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 582e5b0652..b2ad95f970 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -18,6 +18,7 @@ #include "access/heapam.h" #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xloginsert.h" #include "miscadmin.h" @@ -415,8 +416,8 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * that satisfies SnapshotDirty. This is necessary because we * have just a single index entry for the entire chain. */ - else if (heap_hot_search(&htid, heapRel, &SnapshotDirty, - &all_dead)) + else if (table_fetch_follow_check(heapRel, &htid, &SnapshotDirty, + &all_dead)) { TransactionId xwait; @@ -469,7 +470,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * entry. */ htid = itup->t_tid; - if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL)) + if (table_fetch_follow_check(heapRel, &htid, SnapshotSelf, NULL)) { /* Normal case --- it's still live */ } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index e8725fbbe1..935a412faf 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -310,7 +310,7 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) if (_bt_first(scan, ForwardScanDirection)) { /* Save tuple ID, and continue scanning */ - heapTid = &scan->xs_ctup.t_self; + heapTid = &scan->xs_heaptid; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 16223d01ec..ecbf5482d8 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1136,7 +1136,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) readcomplete: /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); @@ -1186,7 +1186,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); @@ -1965,7 +1965,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) /* OK, itemIndex says what to return */ currItem = &so->currPos.items[so->currPos.itemIndex]; - scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_heaptid = currItem->heapTid; if (scan->xs_want_itup) scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 16f5755777..a4b287aff7 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -60,6 +60,7 @@ #include "access/nbtree.h" #include "access/parallel.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" @@ -71,7 +72,7 @@ #include "utils/rel.h" #include "utils/sortsupport.h" #include "utils/tuplesort.h" - +#include "utils/tqual.h" /* Magic numbers for parallel state sharing */ #define PARALLEL_KEY_BTREE_SHARED UINT64CONST(0xA000000000000001) @@ -159,7 +160,7 @@ typedef struct BTShared * * See _bt_parallel_estimate_shared(). */ - ParallelHeapScanDescData heapdesc; + ParallelTableScanDescData paralleldesc; } BTShared; /* @@ -469,9 +470,9 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, /* Fill spool using either serial or parallel heap scan */ if (!buildstate->btleader) - reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, - _bt_build_callback, (void *) buildstate, - NULL); + reltuples = table_index_build_scan(heap, index, indexInfo, true, + _bt_build_callback, (void *) buildstate, + NULL); else reltuples = _bt_parallel_heapscan(buildstate, &indexInfo->ii_BrokenHotChain); @@ -546,7 +547,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) } /* - * Per-tuple callback from IndexBuildHeapScan + * Per-tuple callback from table_index_build_scan */ static void _bt_build_callback(Relation index, @@ -1315,7 +1316,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btshared->havedead = false; btshared->indtuples = 0.0; btshared->brokenhotchain = false; - heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot); + table_parallelscan_initialize(&btshared->paralleldesc, btspool->heap, snapshot); /* * Store shared tuplesort-private state, for which we reserved space. @@ -1410,8 +1411,8 @@ _bt_parallel_estimate_shared(Snapshot snapshot) return sizeof(BTShared); } - return add_size(offsetof(BTShared, heapdesc) + - offsetof(ParallelHeapScanDescData, phs_snapshot_data), + return add_size(offsetof(BTShared, paralleldesc) + + offsetof(ParallelTableScanDescData, phs_snapshot_data), EstimateSnapshotSpace(snapshot)); } @@ -1623,7 +1624,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, { SortCoordinate coordinate; BTBuildState buildstate; - HeapScanDesc scan; + TableScanDesc scan; double reltuples; IndexInfo *indexInfo; @@ -1676,10 +1677,10 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, /* Join parallel scan */ indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; - scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc); - reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo, - true, _bt_build_callback, - (void *) &buildstate, scan); + scan = table_beginscan_parallel(btspool->heap, &btshared->paralleldesc); + reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo, + true, _bt_build_callback, + (void *) &buildstate, scan); /* * Execute this worker's part of the sort. diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 7dd0d61fbb..2a4d0c9a54 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/spgist_private.h" #include "access/spgxlog.h" +#include "access/tableam.h" #include "access/xlog.h" #include "access/xloginsert.h" #include "catalog/index.h" @@ -37,7 +38,7 @@ typedef struct } SpGistBuildState; -/* Callback to process one heap tuple during IndexBuildHeapScan */ +/* Callback to process one heap tuple during table_index_build_scan */ static void spgistBuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state) @@ -142,9 +143,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo) "SP-GiST build temporary context", ALLOCSET_DEFAULT_SIZES); - reltuples = IndexBuildHeapScan(heap, index, indexInfo, true, - spgistBuildCallback, (void *) &buildstate, - NULL); + reltuples = table_index_build_scan(heap, index, indexInfo, true, + spgistBuildCallback, (void *) &buildstate, + NULL); MemoryContextDelete(buildstate.tmpCtx); diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index c883ae95e4..e84be1adea 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -927,7 +927,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir) if (so->iPtr < so->nPtrs) { /* continuing to return reported tuples */ - scan->xs_ctup.t_self = so->heapPtrs[so->iPtr]; + scan->xs_heaptid = so->heapPtrs[so->iPtr]; scan->xs_recheck = so->recheck[so->iPtr]; scan->xs_hitup = so->reconTups[so->iPtr]; diff --git a/src/backend/access/table/Makefile b/src/backend/access/table/Makefile new file mode 100644 index 0000000000..006ba99182 --- /dev/null +++ b/src/backend/access/table/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for access/table +# +# IDENTIFICATION +# src/backend/access/table/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/access/table +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = tableam.o tableamapi.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c new file mode 100644 index 0000000000..af99264df9 --- /dev/null +++ b/src/backend/access/table/tableam.c @@ -0,0 +1,187 @@ +/*---------------------------------------------------------------------- + * + * tableam.c + * Table access method routines too big to be inline functions. + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/table/tableam.c + *---------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/tableam.h" +#include "storage/bufmgr.h" +#include "storage/shmem.h" + +/* GUC variable */ +bool synchronize_seqscans = true; + +char *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD; + + +/* ---------------- + * table_parallelscan_estimate - estimate storage for ParallelTableScanDesc + * + * Sadly, this doesn't reduce to a constant, because the size required + * to serialize the snapshot can vary. + * ---------------- + */ +Size +table_parallelscan_estimate(Snapshot snapshot) +{ + return add_size(offsetof(ParallelTableScanDescData, phs_snapshot_data), + EstimateSnapshotSpace(snapshot)); +} + +/* ---------------- + * table_parallelscan_initialize - initialize ParallelTableScanDesc + * + * Must allow as many bytes of shared memory as returned by + * table_parallelscan_estimate. Call this just once in the leader + * process; then, individual workers attach via table_beginscan_parallel. + * ---------------- + */ +void +table_parallelscan_initialize(ParallelTableScanDesc target, Relation relation, + Snapshot snapshot) +{ + target->phs_relid = RelationGetRelid(relation); + target->phs_nblocks = RelationGetNumberOfBlocks(relation); + /* compare phs_syncscan initialization to similar logic in initscan */ + target->phs_syncscan = synchronize_seqscans && + !RelationUsesLocalBuffers(relation) && + target->phs_nblocks > NBuffers / 4; + SpinLockInit(&target->phs_mutex); + target->phs_startblock = InvalidBlockNumber; + pg_atomic_init_u64(&target->phs_nallocated, 0); + if (IsMVCCSnapshot(snapshot)) + { + SerializeSnapshot(snapshot, target->phs_snapshot_data); + target->phs_snapshot_any = false; + } + else + { + Assert(snapshot == SnapshotAny); + target->phs_snapshot_any = true; + } +} + +/* ---------------- + * table_parallelscan_reinitialize - reset a parallel scan + * + * Call this in the leader process. Caller is responsible for + * making sure that all workers have finished the scan beforehand. + * ---------------- + */ +void +table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan) +{ + pg_atomic_write_u64(¶llel_scan->phs_nallocated, 0); +} + +/* ---------------- + * table_parallelscan_startblock_init - find and set the scan's startblock + * + * Determine where the parallel seq scan should start. This function may + * be called many times, once by each parallel worker. We must be careful + * only to set the startblock once. + * ---------------- + */ +void +table_parallelscan_startblock_init(TableScanDesc scan) +{ + BlockNumber sync_startpage = InvalidBlockNumber; + ParallelTableScanDesc parallel_scan; + + Assert(scan->rs_parallel); + parallel_scan = scan->rs_parallel; + +retry: + /* Grab the spinlock. */ + SpinLockAcquire(¶llel_scan->phs_mutex); + + /* + * If the scan's startblock has not yet been initialized, we must do so + * now. If this is not a synchronized scan, we just start at block 0, but + * if it is a synchronized scan, we must get the starting position from + * the synchronized scan machinery. We can't hold the spinlock while + * doing that, though, so release the spinlock, get the information we + * need, and retry. If nobody else has initialized the scan in the + * meantime, we'll fill in the value we fetched on the second time + * through. + */ + if (parallel_scan->phs_startblock == InvalidBlockNumber) + { + if (!parallel_scan->phs_syncscan) + parallel_scan->phs_startblock = 0; + else if (sync_startpage != InvalidBlockNumber) + parallel_scan->phs_startblock = sync_startpage; + else + { + SpinLockRelease(¶llel_scan->phs_mutex); + sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks); + goto retry; + } + } + SpinLockRelease(¶llel_scan->phs_mutex); +} + +/* ---------------- + * table_parallelscan_nextpage - get the next page to scan + * + * Get the next page to scan. Even if there are no pages left to scan, + * another backend could have grabbed a page to scan and not yet finished + * looking at it, so it doesn't follow that the scan is done when the + * first backend gets an InvalidBlockNumber return. + * ---------------- + */ +BlockNumber +table_parallelscan_nextpage(TableScanDesc scan) +{ + BlockNumber page; + ParallelTableScanDesc parallel_scan; + uint64 nallocated; + + Assert(scan->rs_parallel); + parallel_scan = scan->rs_parallel; + + /* + * phs_nallocated tracks how many pages have been allocated to workers + * already. When phs_nallocated >= rs_nblocks, all blocks have been + * allocated. + * + * Because we use an atomic fetch-and-add to fetch the current value, the + * phs_nallocated counter will exceed rs_nblocks, because workers will + * still increment the value, when they try to allocate the next block but + * all blocks have been allocated already. The counter must be 64 bits + * wide because of that, to avoid wrapping around when rs_nblocks is close + * to 2^32. + * + * The actual page to return is calculated by adding the counter to the + * starting block number, modulo nblocks. + */ + nallocated = pg_atomic_fetch_add_u64(¶llel_scan->phs_nallocated, 1); + if (nallocated >= scan->rs_nblocks) + page = InvalidBlockNumber; /* all blocks have been allocated */ + else + page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks; + + /* + * Report scan location. Normally, we report the current page number. + * When we reach the end of the scan, though, we report the starting page, + * not the ending page, just so the starting positions for later scans + * doesn't slew backwards. We only report the position at the end of the + * scan once, though: subsequent callers will report nothing. + */ + if (scan->rs_syncscan) + { + if (page != InvalidBlockNumber) + ss_report_location(scan->rs_rd, page); + else if (nallocated == scan->rs_nblocks) + ss_report_location(scan->rs_rd, parallel_scan->phs_startblock); + } + + return page; +} diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c new file mode 100644 index 0000000000..b2d283a500 --- /dev/null +++ b/src/backend/access/table/tableamapi.c @@ -0,0 +1,187 @@ +/*---------------------------------------------------------------------- + * + * tableamapi.c + * Support routines for API for Postgres table access methods + * + * FIXME: looks like this should be in amapi.c. + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * src/backend/access/table/tableamapi.c + *---------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/pg_am.h" +#include "catalog/pg_proc.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" +#include "utils/memutils.h" + +static Oid get_table_am_oid(const char *tableamname, bool missing_ok); + +TupleTableSlot* +table_gimmegimmeslot(Relation relation, List **reglist) +{ + const TupleTableSlotOps *tts_cb; + TupleTableSlot *slot; + + tts_cb = table_slot_callbacks(relation); + slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), tts_cb); + + if (reglist) + *reglist = lappend(*reglist, slot); + + return slot; +} + + +/* + * GetTableAmRoutine + * Call the specified access method handler routine to get its + * TableAmRoutine struct, which will be palloc'd in the caller's + * memory context. + */ +const TableAmRoutine * +GetTableAmRoutine(Oid amhandler) +{ + Datum datum; + const TableAmRoutine *routine; + + datum = OidFunctionCall0(amhandler); + routine = (TableAmRoutine *) DatumGetPointer(datum); + + if (routine == NULL || !IsA(routine, TableAmRoutine)) + elog(ERROR, "Table access method handler %u did not return a TableAmRoutine struct", + amhandler); + + return routine; +} + +/* + * GetTableAmRoutineByAmId - look up the handler of the table access + * method with the given OID, and get its TableAmRoutine struct. + */ +const TableAmRoutine * +GetTableAmRoutineByAmId(Oid amoid) +{ + regproc amhandler; + HeapTuple tuple; + Form_pg_am amform; + + /* Get handler function OID for the access method */ + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + amoid); + amform = (Form_pg_am) GETSTRUCT(tuple); + + /* Check that it is a table access method */ + if (amform->amtype != AMTYPE_TABLE) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("access method \"%s\" is not of type %s", + NameStr(amform->amname), "TABLE"))); + + amhandler = amform->amhandler; + + /* Complain if handler OID is invalid */ + if (!RegProcedureIsValid(amhandler)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("table access method \"%s\" does not have a handler", + NameStr(amform->amname)))); + + ReleaseSysCache(tuple); + + /* And finally, call the handler function to get the API struct. */ + return GetTableAmRoutine(amhandler); +} + +/* + * get_table_am_oid - given a table access method name, look up the OID + * + * If missing_ok is false, throw an error if table access method name not + * found. If true, just return InvalidOid. + */ +static Oid +get_table_am_oid(const char *tableamname, bool missing_ok) +{ + Oid result; + Relation rel; + TableScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + /* + * Search pg_tablespace. We use a heapscan here even though there is an + * index on name, on the theory that pg_tablespace will usually have just + * a few entries and so an indexed lookup is a waste of effort. + */ + rel = heap_open(AccessMethodRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_am_amname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(tableamname)); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_scan_getnext(scandesc, ForwardScanDirection); + + /* We assume that there can be at most one matching tuple */ + if (HeapTupleIsValid(tuple) && + ((Form_pg_am) GETSTRUCT(tuple))->amtype == AMTYPE_TABLE) + result = ((Form_pg_am) GETSTRUCT(tuple))->oid; + else + result = InvalidOid; + + table_endscan(scandesc); + heap_close(rel, AccessShareLock); + + if (!OidIsValid(result) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("table access method \"%s\" does not exist", + tableamname))); + + return result; +} + +/* check_hook: validate new default_table_access_method */ +bool +check_default_table_access_method(char **newval, void **extra, GucSource source) +{ + /* + * If we aren't inside a transaction, we cannot do database access so + * cannot verify the name. Must accept the value on faith. + */ + if (IsTransactionState()) + { + if (**newval != '\0' && + !OidIsValid(get_table_am_oid(*newval, true))) + { + /* + * When source == PGC_S_TEST, don't throw a hard error for a + * nonexistent table access method, only a NOTICE. + * See comments in guc.h. + */ + if (source == PGC_S_TEST) + { + ereport(NOTICE, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Table access method \"%s\" does not exist", + *newval))); + } + else + { + GUC_check_errdetail("Table access method \"%s\" does not exist.", + *newval); + return false; + } + } + } + + return true; +} diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c index 4d937b4258..1354e3f54a 100644 --- a/src/backend/access/tablesample/system.c +++ b/src/backend/access/tablesample/system.c @@ -56,7 +56,7 @@ static void system_beginsamplescan(SampleScanState *node, Datum *params, int nparams, uint32 seed); -static BlockNumber system_nextsampleblock(SampleScanState *node); +static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks); static OffsetNumber system_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); @@ -177,10 +177,9 @@ system_beginsamplescan(SampleScanState *node, * Select next block to sample. */ static BlockNumber -system_nextsampleblock(SampleScanState *node) +system_nextsampleblock(SampleScanState *node, BlockNumber nblocks) { SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state; - HeapScanDesc scan = node->ss.ss_currentScanDesc; BlockNumber nextblock = sampler->nextblock; uint32 hashinput[2]; @@ -199,7 +198,7 @@ system_nextsampleblock(SampleScanState *node) * Loop over block numbers until finding suitable block or reaching end of * relation. */ - for (; nextblock < scan->rs_nblocks; nextblock++) + for (; nextblock < nblocks; nextblock++) { uint32 hash; @@ -211,7 +210,7 @@ system_nextsampleblock(SampleScanState *node) break; } - if (nextblock < scan->rs_nblocks) + if (nextblock < nblocks) { /* Found a suitable block; remember where we should start next time */ sampler->nextblock = nextblock + 1; diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 71c3714c48..e4208f4068 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -220,6 +220,7 @@ Boot_CreateStmt: shared_relation ? GLOBALTABLESPACE_OID : 0, $3, InvalidOid, + HEAP_TABLE_AM_OID, tupdesc, RELKIND_RELATION, RELPERSISTENCE_PERMANENT, @@ -239,6 +240,7 @@ Boot_CreateStmt: $6, InvalidOid, BOOTSTRAP_SUPERUSERID, + HEAP_TABLE_AM_OID, tupdesc, NIL, RELKIND_RELATION, diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 7caab64ce7..c55060e8f7 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -18,6 +18,7 @@ #include #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "bootstrap/bootstrap.h" @@ -593,7 +594,7 @@ boot_openrel(char *relname) int i; struct typmap **app; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; if (strlen(relname) >= NAMEDATALEN) @@ -603,18 +604,18 @@ boot_openrel(char *relname) { /* We can now load the pg_type data */ rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); i = 0; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + table_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); app = Typ; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid; memcpy((char *) &(*app)->am_typ, @@ -622,7 +623,7 @@ boot_openrel(char *relname) sizeof((*app)->am_typ)); app++; } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, NoLock); } @@ -905,7 +906,7 @@ gettype(char *type) { int i; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; struct typmap **app; @@ -929,25 +930,25 @@ gettype(char *type) } elog(DEBUG4, "external type: %s", type); rel = heap_open(TypeRelationId, NoLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); i = 0; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) ++i; - heap_endscan(scan); + table_endscan(scan); app = Typ = ALLOC(struct typmap *, i + 1); while (i-- > 0) *app++ = ALLOC(struct typmap, 1); *app = NULL; - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); app = Typ; - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid; memmove((char *) &(*app++)->am_typ, (char *) GETSTRUCT(tup), sizeof((*app)->am_typ)); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, NoLock); return gettype(type); } diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 1dd70bb9c6..a5068cef5d 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -20,6 +20,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" @@ -822,7 +823,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames) ScanKeyData key[2]; int keycount; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; keycount = 0; @@ -844,16 +845,16 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames) CharGetDatum(PROKIND_PROCEDURE)); rel = heap_open(ProcedureRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, keycount, key); + scan = table_beginscan_catalog(rel, keycount, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Oid oid = ((Form_pg_proc) GETSTRUCT(tuple))->oid; objects = lappend_oid(objects, oid); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); } break; @@ -878,7 +879,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind) List *relations = NIL; ScanKeyData key[2]; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; ScanKeyInit(&key[0], @@ -891,16 +892,16 @@ getRelationsInNamespace(Oid namespaceId, char relkind) CharGetDatum(relkind)); rel = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 2, key); + scan = table_beginscan_catalog(rel, 2, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Oid oid = ((Form_pg_class) GETSTRUCT(tuple))->oid; relations = lappend_oid(relations, oid); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); return relations; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index edc8ea9f53..552aa46657 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -163,7 +163,9 @@ my $BOOTSTRAP_SUPERUSERID = my $PG_CATALOG_NAMESPACE = Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace}, 'PG_CATALOG_NAMESPACE'); - +my $PG_HEAP_AM = + Catalog::FindDefinedSymbolFromData($catalog_data{pg_am}, + 'HEAP_TABLE_AM_OID'); # Build lookup tables for OID macro substitutions and for pg_attribute # copies of pg_type values. @@ -426,6 +428,7 @@ EOM # (It's intentional that this can apply to parts of a field). $bki_values{$attname} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g; $bki_values{$attname} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g; + $bki_values{$attname} =~ s/\bPGHEAPAM\b/$PG_HEAP_AM/g; # Replace OID synonyms with OIDs per the appropriate lookup rule. # diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 11debaa780..f85eab1714 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -42,6 +42,7 @@ #include "catalog/index.h" #include "catalog/objectaccess.h" #include "catalog/partition.h" +#include "catalog/pg_am.h" #include "catalog/pg_attrdef.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -292,6 +293,7 @@ heap_create(const char *relname, Oid reltablespace, Oid relid, Oid relfilenode, + Oid accessmtd, TupleDesc tupDesc, char relkind, char relpersistence, @@ -394,6 +396,7 @@ heap_create(const char *relname, relnamespace, tupDesc, relid, + accessmtd, relfilenode, reltablespace, shared_relation, @@ -1052,6 +1055,7 @@ heap_create_with_catalog(const char *relname, Oid reltypeid, Oid reloftypeid, Oid ownerid, + Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, @@ -1193,6 +1197,7 @@ heap_create_with_catalog(const char *relname, reltablespace, relid, InvalidOid, + accessmtd, tupdesc, relkind, relpersistence, @@ -1349,6 +1354,22 @@ heap_create_with_catalog(const char *relname, referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } + + /* + * Make a dependency link to force the relation to be deleted if its + * access method is. Do this only for relation and materialized views. + * + * No need to add an explicit dependency with toast, as the original + * table depends on it. + */ + if ((relkind == RELKIND_RELATION) || + (relkind == RELKIND_MATVIEW)) + { + referenced.classId = AccessMethodRelationId; + referenced.objectId = accessmtd; + referenced.objectSubId = 0; + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } } /* Post creation hook for new relation */ diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8709e8c22c..6c541cbe6e 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -27,6 +27,7 @@ #include "access/multixact.h" #include "access/relscan.h" #include "access/reloptions.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/visibilitymap.h" @@ -81,16 +82,6 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_index_pg_class_oid = InvalidOid; -/* state info for validate_index bulkdelete callback */ -typedef struct -{ - Tuplesortstate *tuplesort; /* for sorting the index TIDs */ - /* statistics (for debug purposes only): */ - double htups, - itups, - tups_inserted; -} v_i_state; - /* * Pointer-free representation of variables used when reindexing system * catalogs; we use this to propagate those values to parallel workers. @@ -131,14 +122,7 @@ static void index_update_stats(Relation rel, static void IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo); -static inline int64 itemptr_encode(ItemPointer itemptr); -static inline void itemptr_decode(ItemPointer itemptr, int64 encoded); static bool validate_index_callback(ItemPointer itemptr, void *opaque); -static void validate_index_heapscan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Snapshot snapshot, - v_i_state *state); static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid); static void SetReindexProcessing(Oid heapOid, Oid indexOid); static void ResetReindexProcessing(void); @@ -909,6 +893,7 @@ index_create(Relation heapRelation, tableSpaceId, indexRelationId, relFileNode, + accessMethodObjectId, indexTupDesc, relkind, relpersistence, @@ -2130,7 +2115,7 @@ index_update_stats(Relation rel, ReindexIsProcessingHeap(RelationRelationId)) { /* don't assume syscache will work */ - HeapScanDesc pg_class_scan; + TableScanDesc pg_class_scan; ScanKeyData key[1]; ScanKeyInit(&key[0], @@ -2138,10 +2123,10 @@ index_update_stats(Relation rel, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); - pg_class_scan = heap_beginscan_catalog(pg_class, 1, key); - tuple = heap_getnext(pg_class_scan, ForwardScanDirection); + pg_class_scan = table_beginscan_catalog(pg_class, 1, key); + tuple = heap_scan_getnext(pg_class_scan, ForwardScanDirection); tuple = heap_copytuple(tuple); - heap_endscan(pg_class_scan); + table_endscan(pg_class_scan); } else { @@ -2397,555 +2382,6 @@ index_build(Relation heapRelation, SetUserIdAndSecContext(save_userid, save_sec_context); } - -/* - * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed - * - * This is called back from an access-method-specific index build procedure - * after the AM has done whatever setup it needs. The parent heap relation - * is scanned to find tuples that should be entered into the index. Each - * such tuple is passed to the AM's callback routine, which does the right - * things to add it to the new index. After we return, the AM's index - * build procedure does whatever cleanup it needs. - * - * The total count of live heap tuples is returned. This is for updating - * pg_class statistics. (It's annoying not to be able to do that here, but we - * want to merge that update with others; see index_update_stats.) Note that - * the index AM itself must keep track of the number of index tuples; we don't - * do so here because the AM might reject some of the tuples for its own - * reasons, such as being unable to store NULLs. - * - * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect - * any potentially broken HOT chains. Currently, we set this if there are - * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without - * trying very hard to detect whether they're really incompatible with the - * chain tip. - */ -double -IndexBuildHeapScan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - bool allow_sync, - IndexBuildCallback callback, - void *callback_state, - HeapScanDesc scan) -{ - return IndexBuildHeapRangeScan(heapRelation, indexRelation, - indexInfo, allow_sync, - false, - 0, InvalidBlockNumber, - callback, callback_state, scan); -} - -/* - * As above, except that instead of scanning the complete heap, only the given - * number of blocks are scanned. Scan to end-of-rel can be signalled by - * passing InvalidBlockNumber as numblocks. Note that restricting the range - * to scan cannot be done when requesting syncscan. - * - * When "anyvisible" mode is requested, all tuples visible to any transaction - * are indexed and counted as live, including those inserted or deleted by - * transactions that are still in progress. - */ -double -IndexBuildHeapRangeScan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - bool allow_sync, - bool anyvisible, - BlockNumber start_blockno, - BlockNumber numblocks, - IndexBuildCallback callback, - void *callback_state, - HeapScanDesc scan) -{ - bool is_system_catalog; - bool checking_uniqueness; - HeapTuple heapTuple; - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; - double reltuples; - ExprState *predicate; - TupleTableSlot *slot; - EState *estate; - ExprContext *econtext; - Snapshot snapshot; - bool need_unregister_snapshot = false; - TransactionId OldestXmin; - BlockNumber root_blkno = InvalidBlockNumber; - OffsetNumber root_offsets[MaxHeapTuplesPerPage]; - - /* - * sanity checks - */ - Assert(OidIsValid(indexRelation->rd_rel->relam)); - - /* Remember if it's a system catalog */ - is_system_catalog = IsSystemRelation(heapRelation); - - /* See whether we're verifying uniqueness/exclusion properties */ - checking_uniqueness = (indexInfo->ii_Unique || - indexInfo->ii_ExclusionOps != NULL); - - /* - * "Any visible" mode is not compatible with uniqueness checks; make sure - * only one of those is requested. - */ - Assert(!(anyvisible && checking_uniqueness)); - - /* - * Need an EState for evaluation of index expressions and partial-index - * predicates. Also a slot to hold the current tuple. - */ - estate = CreateExecutorState(); - econtext = GetPerTupleExprContext(estate); - slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), - &TTSOpsHeapTuple); - - /* Arrange for econtext's scan tuple to be the tuple under test */ - econtext->ecxt_scantuple = slot; - - /* Set up execution state for predicate, if any. */ - predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); - - /* - * Prepare for scan of the base relation. In a normal index build, we use - * SnapshotAny because we must retrieve all tuples and do our own time - * qual checks (because we have to index RECENTLY_DEAD tuples). In a - * concurrent build, or during bootstrap, we take a regular MVCC snapshot - * and index whatever's live according to that. - */ - OldestXmin = InvalidTransactionId; - - /* okay to ignore lazy VACUUMs here */ - if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent) - OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM); - - if (!scan) - { - /* - * Serial index build. - * - * Must begin our own heap scan in this case. We may also need to - * register a snapshot whose lifetime is under our direct control. - */ - if (!TransactionIdIsValid(OldestXmin)) - { - snapshot = RegisterSnapshot(GetTransactionSnapshot()); - need_unregister_snapshot = true; - } - else - snapshot = SnapshotAny; - - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - allow_sync); /* syncscan OK? */ - } - else - { - /* - * Parallel index build. - * - * Parallel case never registers/unregisters own snapshot. Snapshot - * is taken from parallel heap scan, and is SnapshotAny or an MVCC - * snapshot, based on same criteria as serial case. - */ - Assert(!IsBootstrapProcessingMode()); - Assert(allow_sync); - snapshot = scan->rs_snapshot; - } - - /* - * Must call GetOldestXmin() with SnapshotAny. Should never call - * GetOldestXmin() with MVCC snapshot. (It's especially worth checking - * this for parallel builds, since ambuild routines that support parallel - * builds must work these details out for themselves.) - */ - Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot)); - Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) : - !TransactionIdIsValid(OldestXmin)); - Assert(snapshot == SnapshotAny || !anyvisible); - - /* set our scan endpoints */ - if (!allow_sync) - heap_setscanlimits(scan, start_blockno, numblocks); - else - { - /* syncscan can only be requested on whole relation */ - Assert(start_blockno == 0); - Assert(numblocks == InvalidBlockNumber); - } - - reltuples = 0; - - /* - * Scan all tuples in the base relation. - */ - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) - { - bool tupleIsAlive; - - CHECK_FOR_INTERRUPTS(); - - /* - * When dealing with a HOT-chain of updated tuples, we want to index - * the values of the live tuple (if any), but index it under the TID - * of the chain's root tuple. This approach is necessary to preserve - * the HOT-chain structure in the heap. So we need to be able to find - * the root item offset for every tuple that's in a HOT-chain. When - * first reaching a new page of the relation, call - * heap_get_root_tuples() to build a map of root item offsets on the - * page. - * - * It might look unsafe to use this information across buffer - * lock/unlock. However, we hold ShareLock on the table so no - * ordinary insert/update/delete should occur; and we hold pin on the - * buffer continuously while visiting the page, so no pruning - * operation can occur either. - * - * Also, although our opinions about tuple liveness could change while - * we scan the page (due to concurrent transaction commits/aborts), - * the chain root locations won't, so this info doesn't need to be - * rebuilt after waiting for another transaction. - * - * Note the implied assumption that there is no more than one live - * tuple per HOT-chain --- else we could create more than one index - * entry pointing to the same root tuple. - */ - if (scan->rs_cblock != root_blkno) - { - Page page = BufferGetPage(scan->rs_cbuf); - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - root_blkno = scan->rs_cblock; - } - - if (snapshot == SnapshotAny) - { - /* do our own time qual check */ - bool indexIt; - TransactionId xwait; - - recheck: - - /* - * We could possibly get away with not locking the buffer here, - * since caller should hold ShareLock on the relation, but let's - * be conservative about it. (This remark is still correct even - * with HOT-pruning: our pin on the buffer prevents pruning.) - */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - /* - * The criteria for counting a tuple as live in this block need to - * match what analyze.c's acquire_sample_rows() does, otherwise - * CREATE INDEX and ANALYZE may produce wildly different reltuples - * values, e.g. when there are many recently-dead tuples. - */ - switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, - scan->rs_cbuf)) - { - case HEAPTUPLE_DEAD: - /* Definitely dead, we can ignore it */ - indexIt = false; - tupleIsAlive = false; - break; - case HEAPTUPLE_LIVE: - /* Normal case, index and unique-check it */ - indexIt = true; - tupleIsAlive = true; - /* Count it as live, too */ - reltuples += 1; - break; - case HEAPTUPLE_RECENTLY_DEAD: - - /* - * If tuple is recently deleted then we must index it - * anyway to preserve MVCC semantics. (Pre-existing - * transactions could try to use the index after we finish - * building it, and may need to see such tuples.) - * - * However, if it was HOT-updated then we must only index - * the live tuple at the end of the HOT-chain. Since this - * breaks semantics for pre-existing snapshots, mark the - * index as unusable for them. - * - * We don't count recently-dead tuples in reltuples, even - * if we index them; see acquire_sample_rows(). - */ - if (HeapTupleIsHotUpdated(heapTuple)) - { - indexIt = false; - /* mark the index as unsafe for old snapshots */ - indexInfo->ii_BrokenHotChain = true; - } - else - indexIt = true; - /* In any case, exclude the tuple from unique-checking */ - tupleIsAlive = false; - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * In "anyvisible" mode, this tuple is visible and we - * don't need any further checks. - */ - if (anyvisible) - { - indexIt = true; - tupleIsAlive = true; - reltuples += 1; - break; - } - - /* - * Since caller should hold ShareLock or better, normally - * the only way to see this is if it was inserted earlier - * in our own transaction. However, it can happen in - * system catalogs, since we tend to release write lock - * before commit there. Give a warning if neither case - * applies. - */ - xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); - if (!TransactionIdIsCurrentTransactionId(xwait)) - { - if (!is_system_catalog) - elog(WARNING, "concurrent insert in progress within table \"%s\"", - RelationGetRelationName(heapRelation)); - - /* - * If we are performing uniqueness checks, indexing - * such a tuple could lead to a bogus uniqueness - * failure. In that case we wait for the inserting - * transaction to finish and check again. - */ - if (checking_uniqueness) - { - /* - * Must drop the lock on the buffer before we wait - */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - XactLockTableWait(xwait, heapRelation, - &heapTuple->t_self, - XLTW_InsertIndexUnique); - CHECK_FOR_INTERRUPTS(); - goto recheck; - } - } - else - { - /* - * For consistency with acquire_sample_rows(), count - * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only - * when inserted by our own transaction. - */ - reltuples += 1; - } - - /* - * We must index such tuples, since if the index build - * commits then they're good. - */ - indexIt = true; - tupleIsAlive = true; - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * As with INSERT_IN_PROGRESS case, this is unexpected - * unless it's our own deletion or a system catalog; but - * in anyvisible mode, this tuple is visible. - */ - if (anyvisible) - { - indexIt = true; - tupleIsAlive = false; - reltuples += 1; - break; - } - - xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); - if (!TransactionIdIsCurrentTransactionId(xwait)) - { - if (!is_system_catalog) - elog(WARNING, "concurrent delete in progress within table \"%s\"", - RelationGetRelationName(heapRelation)); - - /* - * If we are performing uniqueness checks, assuming - * the tuple is dead could lead to missing a - * uniqueness violation. In that case we wait for the - * deleting transaction to finish and check again. - * - * Also, if it's a HOT-updated tuple, we should not - * index it but rather the live tuple at the end of - * the HOT-chain. However, the deleting transaction - * could abort, possibly leaving this tuple as live - * after all, in which case it has to be indexed. The - * only way to know what to do is to wait for the - * deleting transaction to finish and check again. - */ - if (checking_uniqueness || - HeapTupleIsHotUpdated(heapTuple)) - { - /* - * Must drop the lock on the buffer before we wait - */ - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - XactLockTableWait(xwait, heapRelation, - &heapTuple->t_self, - XLTW_InsertIndexUnique); - CHECK_FOR_INTERRUPTS(); - goto recheck; - } - - /* - * Otherwise index it but don't check for uniqueness, - * the same as a RECENTLY_DEAD tuple. - */ - indexIt = true; - - /* - * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live, - * if they were not deleted by the current - * transaction. That's what acquire_sample_rows() - * does, and we want the behavior to be consistent. - */ - reltuples += 1; - } - else if (HeapTupleIsHotUpdated(heapTuple)) - { - /* - * It's a HOT-updated tuple deleted by our own xact. - * We can assume the deletion will commit (else the - * index contents don't matter), so treat the same as - * RECENTLY_DEAD HOT-updated tuples. - */ - indexIt = false; - /* mark the index as unsafe for old snapshots */ - indexInfo->ii_BrokenHotChain = true; - } - else - { - /* - * It's a regular tuple deleted by our own xact. Index - * it, but don't check for uniqueness nor count in - * reltuples, the same as a RECENTLY_DEAD tuple. - */ - indexIt = true; - } - /* In any case, exclude the tuple from unique-checking */ - tupleIsAlive = false; - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - indexIt = tupleIsAlive = false; /* keep compiler quiet */ - break; - } - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - if (!indexIt) - continue; - } - else - { - /* heap_getnext did the time qual check */ - tupleIsAlive = true; - reltuples += 1; - } - - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - /* Set up for predicate or expression evaluation */ - ExecStoreHeapTuple(heapTuple, slot, false); - - /* - * In a partial index, discard tuples that don't satisfy the - * predicate. - */ - if (predicate != NULL) - { - if (!ExecQual(predicate, econtext)) - continue; - } - - /* - * For the current heap tuple, extract all the attributes we use in - * this index, and note which are null. This also performs evaluation - * of any expressions needed. - */ - FormIndexDatum(indexInfo, - slot, - estate, - values, - isnull); - - /* - * You'd think we should go ahead and build the index tuple here, but - * some index AMs want to do further processing on the data first. So - * pass the values[] and isnull[] arrays, instead. - */ - - if (HeapTupleIsHeapOnly(heapTuple)) - { - /* - * For a heap-only tuple, pretend its TID is that of the root. See - * src/backend/access/heap/README.HOT for discussion. - */ - HeapTupleData rootTuple; - OffsetNumber offnum; - - rootTuple = *heapTuple; - offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self); - - if (!OffsetNumberIsValid(root_offsets[offnum - 1])) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", - ItemPointerGetBlockNumber(&heapTuple->t_self), - offnum, - RelationGetRelationName(heapRelation)))); - - ItemPointerSetOffsetNumber(&rootTuple.t_self, - root_offsets[offnum - 1]); - - /* Call the AM's callback routine to process the tuple */ - callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive, - callback_state); - } - else - { - /* Call the AM's callback routine to process the tuple */ - callback(indexRelation, heapTuple, values, isnull, tupleIsAlive, - callback_state); - } - } - - heap_endscan(scan); - - /* we can now forget our snapshot, if set and registered by us */ - if (need_unregister_snapshot) - UnregisterSnapshot(snapshot); - - ExecDropSingleTupleTableSlot(slot); - - FreeExecutorState(estate); - - /* These may have been pointing to the now-gone estate */ - indexInfo->ii_ExpressionsState = NIL; - indexInfo->ii_PredicateState = NULL; - - return reltuples; -} - - /* * IndexCheckExclusion - verify that a new exclusion constraint is satisfied * @@ -2963,8 +2399,7 @@ IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo) { - HeapScanDesc scan; - HeapTuple heapTuple; + TableScanDesc scan; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; ExprState *predicate; @@ -2987,8 +2422,7 @@ IndexCheckExclusion(Relation heapRelation, */ estate = CreateExecutorState(); econtext = GetPerTupleExprContext(estate); - slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), - &TTSOpsHeapTuple); + slot = table_gimmegimmeslot(heapRelation, NULL); /* Arrange for econtext's scan tuple to be the tuple under test */ econtext->ecxt_scantuple = slot; @@ -3000,22 +2434,17 @@ IndexCheckExclusion(Relation heapRelation, * Scan all live tuples in the base relation. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - true); /* syncscan OK */ - - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_strat(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + true, /* buffer access strategy OK */ + true); /* syncscan OK */ + + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - /* Set up for predicate or expression evaluation */ - ExecStoreHeapTuple(heapTuple, slot, false); - /* * In a partial index, ignore tuples that don't satisfy the predicate. */ @@ -3039,11 +2468,13 @@ IndexCheckExclusion(Relation heapRelation, */ check_exclusion_constraint(heapRelation, indexRelation, indexInfo, - &(heapTuple->t_self), values, isnull, + &(slot->tts_tid), values, isnull, estate, true); + + MemoryContextReset(econtext->ecxt_per_tuple_memory); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); @@ -3126,7 +2557,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) indexRelation; IndexInfo *indexInfo; IndexVacuumInfo ivinfo; - v_i_state state; + ValidateIndexState state; Oid save_userid; int save_sec_context; int save_nestlevel; @@ -3187,11 +2618,11 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) /* * Now scan the heap and "merge" it with the index */ - validate_index_heapscan(heapRelation, - indexRelation, - indexInfo, - snapshot, - &state); + table_index_validate_scan(heapRelation, + indexRelation, + indexInfo, + snapshot, + &state); /* Done with tuplesort object */ tuplesort_end(state.tuplesort); @@ -3211,53 +2642,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) heap_close(heapRelation, NoLock); } -/* - * itemptr_encode - Encode ItemPointer as int64/int8 - * - * This representation must produce values encoded as int64 that sort in the - * same order as their corresponding original TID values would (using the - * default int8 opclass to produce a result equivalent to the default TID - * opclass). - * - * As noted in validate_index(), this can be significantly faster. - */ -static inline int64 -itemptr_encode(ItemPointer itemptr) -{ - BlockNumber block = ItemPointerGetBlockNumber(itemptr); - OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); - int64 encoded; - - /* - * Use the 16 least significant bits for the offset. 32 adjacent bits are - * used for the block number. Since remaining bits are unused, there - * cannot be negative encoded values (We assume a two's complement - * representation). - */ - encoded = ((uint64) block << 16) | (uint16) offset; - - return encoded; -} - -/* - * itemptr_decode - Decode int64/int8 representation back to ItemPointer - */ -static inline void -itemptr_decode(ItemPointer itemptr, int64 encoded) -{ - BlockNumber block = (BlockNumber) (encoded >> 16); - OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF); - - ItemPointerSet(itemptr, block, offset); -} - /* * validate_index_callback - bulkdelete callback to collect the index TIDs */ static bool validate_index_callback(ItemPointer itemptr, void *opaque) { - v_i_state *state = (v_i_state *) opaque; + ValidateIndexState *state = (ValidateIndexState *) opaque; int64 encoded = itemptr_encode(itemptr); tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false); @@ -3265,243 +2656,6 @@ validate_index_callback(ItemPointer itemptr, void *opaque) return false; /* never actually delete anything */ } -/* - * validate_index_heapscan - second table scan for concurrent index build - * - * This has much code in common with IndexBuildHeapScan, but it's enough - * different that it seems cleaner to have two routines not one. - */ -static void -validate_index_heapscan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Snapshot snapshot, - v_i_state *state) -{ - HeapScanDesc scan; - HeapTuple heapTuple; - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; - ExprState *predicate; - TupleTableSlot *slot; - EState *estate; - ExprContext *econtext; - BlockNumber root_blkno = InvalidBlockNumber; - OffsetNumber root_offsets[MaxHeapTuplesPerPage]; - bool in_index[MaxHeapTuplesPerPage]; - - /* state variables for the merge */ - ItemPointer indexcursor = NULL; - ItemPointerData decoded; - bool tuplesort_empty = false; - - /* - * sanity checks - */ - Assert(OidIsValid(indexRelation->rd_rel->relam)); - - /* - * Need an EState for evaluation of index expressions and partial-index - * predicates. Also a slot to hold the current tuple. - */ - estate = CreateExecutorState(); - econtext = GetPerTupleExprContext(estate); - slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), - &TTSOpsHeapTuple); - - /* Arrange for econtext's scan tuple to be the tuple under test */ - econtext->ecxt_scantuple = slot; - - /* Set up execution state for predicate, if any. */ - predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); - - /* - * Prepare for scan of the base relation. We need just those tuples - * satisfying the passed-in reference snapshot. We must disable syncscan - * here, because it's critical that we read from block zero forward to - * match the sorted TIDs. - */ - scan = heap_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - false); /* syncscan not OK */ - - /* - * Scan all tuples matching the snapshot. - */ - while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) - { - ItemPointer heapcursor = &heapTuple->t_self; - ItemPointerData rootTuple; - OffsetNumber root_offnum; - - CHECK_FOR_INTERRUPTS(); - - state->htups += 1; - - /* - * As commented in IndexBuildHeapScan, we should index heap-only - * tuples under the TIDs of their root tuples; so when we advance onto - * a new heap page, build a map of root item offsets on the page. - * - * This complicates merging against the tuplesort output: we will - * visit the live tuples in order by their offsets, but the root - * offsets that we need to compare against the index contents might be - * ordered differently. So we might have to "look back" within the - * tuplesort output, but only within the current page. We handle that - * by keeping a bool array in_index[] showing all the - * already-passed-over tuplesort output TIDs of the current page. We - * clear that array here, when advancing onto a new heap page. - */ - if (scan->rs_cblock != root_blkno) - { - Page page = BufferGetPage(scan->rs_cbuf); - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - memset(in_index, 0, sizeof(in_index)); - - root_blkno = scan->rs_cblock; - } - - /* Convert actual tuple TID to root TID */ - rootTuple = *heapcursor; - root_offnum = ItemPointerGetOffsetNumber(heapcursor); - - if (HeapTupleIsHeapOnly(heapTuple)) - { - root_offnum = root_offsets[root_offnum - 1]; - if (!OffsetNumberIsValid(root_offnum)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", - ItemPointerGetBlockNumber(heapcursor), - ItemPointerGetOffsetNumber(heapcursor), - RelationGetRelationName(heapRelation)))); - ItemPointerSetOffsetNumber(&rootTuple, root_offnum); - } - - /* - * "merge" by skipping through the index tuples until we find or pass - * the current root tuple. - */ - while (!tuplesort_empty && - (!indexcursor || - ItemPointerCompare(indexcursor, &rootTuple) < 0)) - { - Datum ts_val; - bool ts_isnull; - - if (indexcursor) - { - /* - * Remember index items seen earlier on the current heap page - */ - if (ItemPointerGetBlockNumber(indexcursor) == root_blkno) - in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true; - } - - tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true, - &ts_val, &ts_isnull, NULL); - Assert(tuplesort_empty || !ts_isnull); - if (!tuplesort_empty) - { - itemptr_decode(&decoded, DatumGetInt64(ts_val)); - indexcursor = &decoded; - - /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ -#ifndef USE_FLOAT8_BYVAL - pfree(DatumGetPointer(ts_val)); -#endif - } - else - { - /* Be tidy */ - indexcursor = NULL; - } - } - - /* - * If the tuplesort has overshot *and* we didn't see a match earlier, - * then this tuple is missing from the index, so insert it. - */ - if ((tuplesort_empty || - ItemPointerCompare(indexcursor, &rootTuple) > 0) && - !in_index[root_offnum - 1]) - { - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - /* Set up for predicate or expression evaluation */ - ExecStoreHeapTuple(heapTuple, slot, false); - - /* - * In a partial index, discard tuples that don't satisfy the - * predicate. - */ - if (predicate != NULL) - { - if (!ExecQual(predicate, econtext)) - continue; - } - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. This also performs - * evaluation of any expressions needed. - */ - FormIndexDatum(indexInfo, - slot, - estate, - values, - isnull); - - /* - * You'd think we should go ahead and build the index tuple here, - * but some index AMs want to do further processing on the data - * first. So pass the values[] and isnull[] arrays, instead. - */ - - /* - * If the tuple is already committed dead, you might think we - * could suppress uniqueness checking, but this is no longer true - * in the presence of HOT, because the insert is actually a proxy - * for a uniqueness check on the whole HOT-chain. That is, the - * tuple we have here could be dead because it was already - * HOT-updated, and if so the updating transaction will not have - * thought it should insert index entries. The index AM will - * check the whole HOT-chain and correctly detect a conflict if - * there is one. - */ - - index_insert(indexRelation, - values, - isnull, - &rootTuple, - heapRelation, - indexInfo->ii_Unique ? - UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, - indexInfo); - - state->tups_inserted += 1; - } - } - - heap_endscan(scan); - - ExecDropSingleTupleTableSlot(slot); - - FreeExecutorState(estate); - - /* These may have been pointing to the now-gone estate */ - indexInfo->ii_ExpressionsState = NIL; - indexInfo->ii_PredicateState = NULL; -} - - /* * index_set_state_flags - adjust pg_index state flags * diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 558022647c..4e6c5df158 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -17,6 +17,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/tupconvert.h" #include "access/sysattr.h" #include "catalog/indexing.h" diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c index c692958d91..d668c9b021 100644 --- a/src/backend/catalog/pg_conversion.c +++ b/src/backend/catalog/pg_conversion.c @@ -16,6 +16,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -153,7 +154,7 @@ RemoveConversionById(Oid conversionOid) { Relation rel; HeapTuple tuple; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData scanKeyData; ScanKeyInit(&scanKeyData, @@ -164,14 +165,14 @@ RemoveConversionById(Oid conversionOid) /* open pg_conversion */ rel = heap_open(ConversionRelationId, RowExclusiveLock); - scan = heap_beginscan_catalog(rel, 1, &scanKeyData); + scan = table_beginscan_catalog(rel, 1, &scanKeyData); /* search for the target tuple */ - if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection))) + if (HeapTupleIsValid(tuple = heap_scan_getnext(scan, ForwardScanDirection))) CatalogTupleDelete(rel, &tuple->t_self); else elog(ERROR, "could not find tuple for conversion %u", conversionOid); - heap_endscan(scan); + table_endscan(scan); heap_close(rel, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c index e123691923..fec0c9e2ec 100644 --- a/src/backend/catalog/pg_db_role_setting.c +++ b/src/backend/catalog/pg_db_role_setting.c @@ -13,6 +13,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_db_role_setting.h" @@ -170,7 +171,7 @@ void DropSetting(Oid databaseid, Oid roleid) { Relation relsetting; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData keys[2]; HeapTuple tup; int numkeys = 0; @@ -196,12 +197,12 @@ DropSetting(Oid databaseid, Oid roleid) numkeys++; } - scan = heap_beginscan_catalog(relsetting, numkeys, keys); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + scan = table_beginscan_catalog(relsetting, numkeys, keys); + while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(relsetting, &tup->t_self); } - heap_endscan(scan); + table_endscan(scan); heap_close(relsetting, RowExclusiveLock); } diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index a9fbb73165..51183f9a2a 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -21,6 +21,7 @@ #include "access/hash.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -329,7 +330,7 @@ GetAllTablesPublicationRelations(void) { Relation classRel; ScanKeyData key[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; List *result = NIL; @@ -340,9 +341,9 @@ GetAllTablesPublicationRelations(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_RELATION)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = table_beginscan_catalog(classRel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); Oid relid = relForm->oid; @@ -351,7 +352,7 @@ GetAllTablesPublicationRelations(void) result = lappend_oid(result, relid); } - heap_endscan(scan); + table_endscan(scan); heap_close(classRel, AccessShareLock); return result; diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index e136aa6a0b..f1dc998f48 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" @@ -390,7 +391,7 @@ void RemoveSubscriptionRel(Oid subid, Oid relid) { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData skey[2]; HeapTuple tup; int nkeys = 0; @@ -416,12 +417,12 @@ RemoveSubscriptionRel(Oid subid, Oid relid) } /* Do the search and delete what we found. */ - scan = heap_beginscan_catalog(rel, nkeys, skey); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + scan = table_beginscan_catalog(rel, nkeys, skey); + while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection))) { CatalogTupleDelete(rel, &tup->t_self); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, RowExclusiveLock); } diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 462969a838..60143f0e44 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -266,6 +266,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, toast_typid, InvalidOid, rel->rd_rel->relowner, + rel->rd_rel->relam, tupdesc, NIL, RELKIND_TOASTVALUE, diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c index 4367290a27..b55f8e708b 100644 --- a/src/backend/commands/amcmds.c +++ b/src/backend/commands/amcmds.c @@ -30,7 +30,7 @@ #include "utils/syscache.h" -static Oid lookup_index_am_handler_func(List *handler_name, char amtype); +static Oid lookup_am_handler_func(List *handler_name, char amtype); static const char *get_am_type_string(char amtype); @@ -74,7 +74,7 @@ CreateAccessMethod(CreateAmStmt *stmt) /* * Get the handler function oid, verifying the AM type while at it. */ - amhandler = lookup_index_am_handler_func(stmt->handler_name, stmt->amtype); + amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype); /* * Insert tuple into pg_am. @@ -229,6 +229,8 @@ get_am_type_string(char amtype) { case AMTYPE_INDEX: return "INDEX"; + case AMTYPE_TABLE: + return "TABLE"; default: /* shouldn't happen */ elog(ERROR, "invalid access method type '%c'", amtype); @@ -243,7 +245,7 @@ get_am_type_string(char amtype) * This function either return valid function Oid or throw an error. */ static Oid -lookup_index_am_handler_func(List *handler_name, char amtype) +lookup_am_handler_func(List *handler_name, char amtype) { Oid handlerOid; static const Oid funcargtypes[1] = {INTERNALOID}; @@ -267,6 +269,15 @@ lookup_index_am_handler_func(List *handler_name, char amtype) NameListToString(handler_name), "index_am_handler"))); break; + /* XXX refactor duplicate error */ + case AMTYPE_TABLE: + if (get_func_rettype(handlerOid) != TABLE_AM_HANDLEROID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("function %s must return type %s", + NameListToString(handler_name), + "storage_am_handler"))); + break; default: elog(ERROR, "unrecognized access method type \"%c\"", amtype); } diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index b8445dc372..29e2377b52 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -18,6 +18,7 @@ #include "access/multixact.h" #include "access/sysattr.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" @@ -1000,6 +1001,8 @@ acquire_sample_rows(Relation onerel, int elevel, TransactionId OldestXmin; BlockSamplerData bs; ReservoirStateData rstate; + TupleTableSlot *slot; + TableScanDesc scan; Assert(targrows > 0); @@ -1013,178 +1016,72 @@ acquire_sample_rows(Relation onerel, int elevel, /* Prepare for sampling rows */ reservoir_init_selection_state(&rstate, targrows); + scan = table_beginscan_analyze(onerel); + slot = table_gimmegimmeslot(onerel, NULL); + /* Outer loop over blocks to sample */ while (BlockSampler_HasMore(&bs)) { BlockNumber targblock = BlockSampler_Next(&bs); - Buffer targbuffer; - Page targpage; - OffsetNumber targoffset, - maxoffset; vacuum_delay_point(); /* - * We must maintain a pin on the target page's buffer to ensure that - * the maxoffset value stays good (else concurrent VACUUM might delete - * tuples out from under us). Hence, pin the page until we are done - * looking at it. We also choose to hold sharelock on the buffer - * throughout --- we could release and re-acquire sharelock for each - * tuple, but since we aren't doing much work per tuple, the extra - * lock traffic is probably better avoided. + * XXX: we could have this function return a boolean, instead of + * forcing such checks to happen in next_tuple(). */ - targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock, - RBM_NORMAL, vac_strategy); - LockBuffer(targbuffer, BUFFER_LOCK_SHARE); - targpage = BufferGetPage(targbuffer); - maxoffset = PageGetMaxOffsetNumber(targpage); - - /* Inner loop over all tuples on the selected page */ - for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++) - { - ItemId itemid; - HeapTupleData targtuple; - bool sample_it = false; - - itemid = PageGetItemId(targpage, targoffset); + table_scan_analyze_next_block(scan, targblock, vac_strategy); + while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) + { /* - * We ignore unused and redirect line pointers. DEAD line - * pointers should be counted as dead, because we need vacuum to - * run to get rid of them. Note that this rule agrees with the - * way that heap_page_prune() counts things. + * The first targrows sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample + * until we reach the end of the relation. This algorithm is + * from Jeff Vitter's paper (see full citation below). It + * works by repeatedly computing the number of tuples to skip + * before selecting a tuple, which replaces a randomly chosen + * element of the reservoir (current set of tuples). At all + * times the reservoir is a true random sample of the tuples + * we've passed over so far, so when we fall off the end of + * the relation we're done. */ - if (!ItemIdIsNormal(itemid)) - { - if (ItemIdIsDead(itemid)) - deadrows += 1; - continue; - } - - ItemPointerSet(&targtuple.t_self, targblock, targoffset); - - targtuple.t_tableOid = RelationGetRelid(onerel); - targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); - targtuple.t_len = ItemIdGetLength(itemid); - - switch (HeapTupleSatisfiesVacuum(&targtuple, - OldestXmin, - targbuffer)) - { - case HEAPTUPLE_LIVE: - sample_it = true; - liverows += 1; - break; - - case HEAPTUPLE_DEAD: - case HEAPTUPLE_RECENTLY_DEAD: - /* Count dead and recently-dead rows */ - deadrows += 1; - break; - - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * Insert-in-progress rows are not counted. We assume - * that when the inserting transaction commits or aborts, - * it will send a stats message to increment the proper - * count. This works right only if that transaction ends - * after we finish analyzing the table; if things happen - * in the other order, its stats update will be - * overwritten by ours. However, the error will be large - * only if the other transaction runs long enough to - * insert many tuples, so assuming it will finish after us - * is the safer option. - * - * A special case is that the inserting transaction might - * be our own. In this case we should count and sample - * the row, to accommodate users who load a table and - * analyze it in one transaction. (pgstat_report_analyze - * has to adjust the numbers we send to the stats - * collector to make this come out right.) - */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data))) - { - sample_it = true; - liverows += 1; - } - break; - - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * We count delete-in-progress rows as still live, using - * the same reasoning given above; but we don't bother to - * include them in the sample. - * - * If the delete was done by our own transaction, however, - * we must count the row as dead to make - * pgstat_report_analyze's stats adjustments come out - * right. (Note: this works out properly when the row was - * both inserted and deleted in our xact.) - */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data))) - deadrows += 1; - else - liverows += 1; - break; - - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; - } - - if (sample_it) + if (numrows < targrows) + rows[numrows++] = ExecCopySlotHeapTuple(slot); + else { /* - * The first targrows sample rows are simply copied into the - * reservoir. Then we start replacing tuples in the sample - * until we reach the end of the relation. This algorithm is - * from Jeff Vitter's paper (see full citation below). It - * works by repeatedly computing the number of tuples to skip - * before selecting a tuple, which replaces a randomly chosen - * element of the reservoir (current set of tuples). At all - * times the reservoir is a true random sample of the tuples - * we've passed over so far, so when we fall off the end of - * the relation we're done. + * t in Vitter's paper is the number of records already + * processed. If we need to compute a new S value, we + * must use the not-yet-incremented value of samplerows as + * t. */ - if (numrows < targrows) - rows[numrows++] = heap_copytuple(&targtuple); - else + if (rowstoskip < 0) + rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + + if (rowstoskip <= 0) { /* - * t in Vitter's paper is the number of records already - * processed. If we need to compute a new S value, we - * must use the not-yet-incremented value of samplerows as - * t. + * Found a suitable tuple, so save it, replacing one + * old tuple at random */ - if (rowstoskip < 0) - rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + int k = (int) (targrows * sampler_random_fract(rstate.randstate)); - if (rowstoskip <= 0) - { - /* - * Found a suitable tuple, so save it, replacing one - * old tuple at random - */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); - - Assert(k >= 0 && k < targrows); - heap_freetuple(rows[k]); - rows[k] = heap_copytuple(&targtuple); - } - - rowstoskip -= 1; + Assert(k >= 0 && k < targrows); + heap_freetuple(rows[k]); + rows[k] = ExecCopySlotHeapTuple(slot); } - samplerows += 1; + rowstoskip -= 1; } - } - /* Now release the lock and pin on the page */ - UnlockReleaseBuffer(targbuffer); + samplerows += 1; + } } + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); + /* * If we didn't find as many tuples as we wanted then we're done. No sort * is needed, since they're already in order. diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 610e425a56..1b8d03642c 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -20,7 +20,7 @@ #include "access/amapi.h" #include "access/multixact.h" #include "access/relscan.h" -#include "access/rewriteheap.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/tuptoaster.h" #include "access/xact.h" @@ -41,7 +41,6 @@ #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/predicate.h" -#include "storage/smgr.h" #include "utils/acl.h" #include "utils/fmgroids.h" #include "utils/inval.h" @@ -68,14 +67,10 @@ typedef struct static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose); -static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, +static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti); static List *get_tables_to_cluster(MemoryContext cluster_context); -static void reform_and_rewrite_tuple(HeapTuple tuple, - TupleDesc oldTupDesc, TupleDesc newTupDesc, - Datum *values, bool *isnull, - RewriteState rwstate); /*--------------------------------------------------------------------------- @@ -597,7 +592,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose) AccessExclusiveLock); /* Copy the heap data into the new table in the desired order */ - copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose, + copy_table_data(OIDNewHeap, tableOid, indexOid, verbose, &swap_toast_by_content, &frozenXid, &cutoffMulti); /* @@ -682,6 +677,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, InvalidOid, InvalidOid, OldHeap->rd_rel->relowner, + OldHeap->rd_rel->relam, OldHeapDesc, NIL, RELKIND_RELATION, @@ -739,7 +735,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, } /* - * Do the physical copying of heap data. + * Do the physical copying of table data. * * There are three output parameters: * *pSwapToastByContent is set true if toast tables must be swapped by content. @@ -747,7 +743,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence, * *pCutoffMulti receives the MultiXactId used as a cutoff point. */ static void -copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, +copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti) { @@ -757,21 +753,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, Relation relRelation; HeapTuple reltup; Form_pg_class relform; - TupleDesc oldTupDesc; - TupleDesc newTupDesc; - int natts; - Datum *values; - bool *isnull; - IndexScanDesc indexScan; - HeapScanDesc heapScan; - bool use_wal; - bool is_system_catalog; + TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; + TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; TransactionId OldestXmin; TransactionId FreezeXid; MultiXactId MultiXactCutoff; - RewriteState rwstate; bool use_sort; - Tuplesortstate *tuplesort; double num_tuples = 0, tups_vacuumed = 0, tups_recently_dead = 0; @@ -799,11 +786,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, newTupDesc = RelationGetDescr(NewHeap); Assert(newTupDesc->natts == oldTupDesc->natts); - /* Preallocate values/isnull arrays */ - natts = newTupDesc->natts; - values = (Datum *) palloc(natts * sizeof(Datum)); - isnull = (bool *) palloc(natts * sizeof(bool)); - /* * If the OldHeap has a toast table, get lock on the toast table to keep * it from being vacuumed. This is needed because autovacuum processes @@ -820,15 +802,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (OldHeap->rd_rel->reltoastrelid) LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock); - /* - * We need to log the copied data in WAL iff WAL archiving/streaming is - * enabled AND it's a WAL-logged rel. - */ - use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap); - - /* use_wal off requires smgr_targblock be initially invalid */ - Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber); - /* * If both tables have TOAST tables, perform toast swap by content. It is * possible that the old table has a toast table but the new one doesn't, @@ -889,13 +862,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, *pFreezeXid = FreezeXid; *pCutoffMulti = MultiXactCutoff; - /* Remember if it's a system catalog */ - is_system_catalog = IsSystemRelation(OldHeap); - - /* Initialize the rewrite operation */ - rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid, - MultiXactCutoff, use_wal); - /* * Decide whether to use an indexscan or seqscan-and-optional-sort to scan * the OldHeap. We know how to use a sort to duplicate the ordering of a @@ -908,39 +874,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, else use_sort = false; - /* Set up sorting if wanted */ - if (use_sort) - tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, - maintenance_work_mem, - NULL, false); - else - tuplesort = NULL; - - /* - * Prepare to scan the OldHeap. To ensure we see recently-dead tuples - * that still need to be copied, we scan with SnapshotAny and use - * HeapTupleSatisfiesVacuum for the visibility test. - */ - if (OldIndex != NULL && !use_sort) - { - heapScan = NULL; - indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0); - index_rescan(indexScan, NULL, 0, NULL, 0); - } - else - { - heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL); - indexScan = NULL; - } - /* Log what we're doing */ - if (indexScan != NULL) + if (OldIndex != NULL && !use_sort) ereport(elevel, (errmsg("clustering \"%s.%s\" using index scan on \"%s\"", get_namespace_name(RelationGetNamespace(OldHeap)), RelationGetRelationName(OldHeap), RelationGetRelationName(OldIndex)))); - else if (tuplesort != NULL) + else if (use_sort) ereport(elevel, (errmsg("clustering \"%s.%s\" using sequential scan and sort", get_namespace_name(RelationGetNamespace(OldHeap)), @@ -952,150 +893,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, RelationGetRelationName(OldHeap)))); /* - * Scan through the OldHeap, either in OldIndex order or sequentially; - * copy each tuple into the NewHeap, or transiently to the tuplesort - * module. Note that we don't bother sorting dead tuples (they won't get - * to the new table anyway). + * Hand of the actual copying to AM specific function, the generic code + * cannot know how to deal with visibility across AMs. */ - for (;;) - { - HeapTuple tuple; - Buffer buf; - bool isdead; - - CHECK_FOR_INTERRUPTS(); - - if (indexScan != NULL) - { - tuple = index_getnext(indexScan, ForwardScanDirection); - if (tuple == NULL) - break; - - /* Since we used no scan keys, should never need to recheck */ - if (indexScan->xs_recheck) - elog(ERROR, "CLUSTER does not support lossy index conditions"); - - buf = indexScan->xs_cbuf; - } - else - { - tuple = heap_getnext(heapScan, ForwardScanDirection); - if (tuple == NULL) - break; - - buf = heapScan->rs_cbuf; - } - - LockBuffer(buf, BUFFER_LOCK_SHARE); - - switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf)) - { - case HEAPTUPLE_DEAD: - /* Definitely dead */ - isdead = true; - break; - case HEAPTUPLE_RECENTLY_DEAD: - tups_recently_dead += 1; - /* fall through */ - case HEAPTUPLE_LIVE: - /* Live or recently dead, must copy it */ - isdead = false; - break; - case HEAPTUPLE_INSERT_IN_PROGRESS: - - /* - * Since we hold exclusive lock on the relation, normally the - * only way to see this is if it was inserted earlier in our - * own transaction. However, it can happen in system - * catalogs, since we tend to release write lock before commit - * there. Give a warning if neither case applies; but in any - * case we had better copy it. - */ - if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) - elog(WARNING, "concurrent insert in progress within table \"%s\"", - RelationGetRelationName(OldHeap)); - /* treat as live */ - isdead = false; - break; - case HEAPTUPLE_DELETE_IN_PROGRESS: - - /* - * Similar situation to INSERT_IN_PROGRESS case. - */ - if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) - elog(WARNING, "concurrent delete in progress within table \"%s\"", - RelationGetRelationName(OldHeap)); - /* treat as recently dead */ - tups_recently_dead += 1; - isdead = false; - break; - default: - elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - isdead = false; /* keep compiler quiet */ - break; - } - - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - - if (isdead) - { - tups_vacuumed += 1; - /* heap rewrite module still needs to see it... */ - if (rewrite_heap_dead_tuple(rwstate, tuple)) - { - /* A previous recently-dead tuple is now known dead */ - tups_vacuumed += 1; - tups_recently_dead -= 1; - } - continue; - } - - num_tuples += 1; - if (tuplesort != NULL) - tuplesort_putheaptuple(tuplesort, tuple); - else - reform_and_rewrite_tuple(tuple, - oldTupDesc, newTupDesc, - values, isnull, - rwstate); - } - - if (indexScan != NULL) - index_endscan(indexScan); - if (heapScan != NULL) - heap_endscan(heapScan); - - /* - * In scan-and-sort mode, complete the sort, then read out all live tuples - * from the tuplestore and write them to the new relation. - */ - if (tuplesort != NULL) - { - tuplesort_performsort(tuplesort); - - for (;;) - { - HeapTuple tuple; - - CHECK_FOR_INTERRUPTS(); - - tuple = tuplesort_getheaptuple(tuplesort, true); - if (tuple == NULL) - break; - - reform_and_rewrite_tuple(tuple, - oldTupDesc, newTupDesc, - values, isnull, - rwstate); - } - - tuplesort_end(tuplesort); - } - - /* Write out any remaining tuples, and fsync if needed */ - end_heap_rewrite(rwstate); + table_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort, + OldestXmin, FreezeXid, MultiXactCutoff, + &num_tuples, &tups_vacuumed, &tups_recently_dead); /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */ NewHeap->rd_toastoid = InvalidOid; @@ -1113,10 +916,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, tups_recently_dead, pg_rusage_show(&ru0)))); - /* Clean up */ - pfree(values); - pfree(isnull); - if (OldIndex != NULL) index_close(OldIndex, NoLock); heap_close(OldHeap, NoLock); @@ -1693,7 +1492,7 @@ static List * get_tables_to_cluster(MemoryContext cluster_context) { Relation indRelation; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData entry; HeapTuple indexTuple; Form_pg_index index; @@ -1712,8 +1511,8 @@ get_tables_to_cluster(MemoryContext cluster_context) Anum_pg_index_indisclustered, BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true)); - scan = heap_beginscan_catalog(indRelation, 1, &entry); - while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(indRelation, 1, &entry); + while ((indexTuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { index = (Form_pg_index) GETSTRUCT(indexTuple); @@ -1733,52 +1532,9 @@ get_tables_to_cluster(MemoryContext cluster_context) MemoryContextSwitchTo(old_context); } - heap_endscan(scan); + table_endscan(scan); relation_close(indRelation, AccessShareLock); return rvs; } - - -/* - * Reconstruct and rewrite the given tuple - * - * We cannot simply copy the tuple as-is, for several reasons: - * - * 1. We'd like to squeeze out the values of any dropped columns, both - * to save space and to ensure we have no corner-case failures. (It's - * possible for example that the new table hasn't got a TOAST table - * and so is unable to store any large values of dropped cols.) - * - * 2. The tuple might not even be legal for the new table; this is - * currently only known to happen as an after-effect of ALTER TABLE - * SET WITHOUT OIDS (in an older version, via pg_upgrade). - * - * So, we must reconstruct the tuple from component Datums. - */ -static void -reform_and_rewrite_tuple(HeapTuple tuple, - TupleDesc oldTupDesc, TupleDesc newTupDesc, - Datum *values, bool *isnull, - RewriteState rwstate) -{ - HeapTuple copiedTuple; - int i; - - heap_deform_tuple(tuple, oldTupDesc, values, isnull); - - /* Be sure to null out any dropped columns */ - for (i = 0; i < newTupDesc->natts; i++) - { - if (TupleDescAttr(newTupDesc, i)->attisdropped) - isnull[i] = true; - } - - copiedTuple = heap_form_tuple(newTupDesc, values, isnull); - - /* The heap rewrite module does the rest */ - rewrite_heap_tuple(rwstate, tuple, copiedTuple); - - heap_freetuple(copiedTuple); -} diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index b0b2cb2a14..9fbea0b4d8 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -13,6 +13,8 @@ */ #include "postgres.h" +#include "access/tableam.h" +#include "access/relscan.h" #include "catalog/index.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -69,6 +71,9 @@ unique_key_recheck(PG_FUNCTION_ARGS) /* * Get the new data that was inserted/updated. + * + * PBORKED: should use slot API, otherwise we'll not work correctly + * for zheap et al. */ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) new_row = trigdata->tg_trigtuple; @@ -83,6 +88,8 @@ unique_key_recheck(PG_FUNCTION_ARGS) new_row = NULL; /* keep compiler quiet */ } + slot = table_gimmegimmeslot(trigdata->tg_relation, NULL); + /* * If the new_row is now dead (ie, inserted and then deleted within our * transaction), we can skip the check. However, we have to be careful, @@ -102,12 +109,20 @@ unique_key_recheck(PG_FUNCTION_ARGS) * removed. */ tmptid = new_row->t_self; - if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL)) { - /* - * All rows in the HOT chain are dead, so skip the check. - */ - return PointerGetDatum(NULL); + IndexFetchTableData *scan = table_begin_index_fetch_table(trigdata->tg_relation); + bool call_again = false; + + if (!table_fetch_follow(scan, &tmptid, SnapshotSelf, slot, &call_again, NULL)) + { + /* + * All rows referenced by the index are dead, so skip the check. + */ + ExecDropSingleTupleTableSlot(slot); + table_end_index_fetch_table(scan); + return PointerGetDatum(NULL); + } + table_end_index_fetch_table(scan); } /* @@ -119,14 +134,6 @@ unique_key_recheck(PG_FUNCTION_ARGS) RowExclusiveLock); indexInfo = BuildIndexInfo(indexRel); - /* - * The heap tuple must be put into a slot for FormIndexDatum. - */ - slot = MakeSingleTupleTableSlot(RelationGetDescr(trigdata->tg_relation), - &TTSOpsHeapTuple); - - ExecStoreHeapTuple(new_row, slot, false); - /* * Typically the index won't have expressions, but if it does we need an * EState to evaluate them. We need it for exclusion constraints too, diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 4aa8890fe8..587e166c2a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -21,6 +21,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -315,9 +316,9 @@ static void CopyOneRowTo(CopyState cstate, Datum *values, bool *nulls); static void CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, int hi_options, - ResultRelInfo *resultRelInfo, TupleTableSlot *myslot, + ResultRelInfo *resultRelInfo, BulkInsertState bistate, - int nBufferedTuples, HeapTuple *bufferedTuples, + int nBufferedTuples, TupleTableSlot **bufferedSlots, uint64 firstBufferedLineNo); static bool CopyReadLine(CopyState cstate); static bool CopyReadLineText(CopyState cstate); @@ -2046,33 +2047,27 @@ CopyTo(CopyState cstate) if (cstate->rel) { - Datum *values; - bool *nulls; - HeapScanDesc scandesc; - HeapTuple tuple; - - values = (Datum *) palloc(num_phys_attrs * sizeof(Datum)); - nulls = (bool *) palloc(num_phys_attrs * sizeof(bool)); - - scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); + TupleTableSlot *slot; + TableScanDesc scandesc; + scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); + slot = table_gimmegimmeslot(cstate->rel, NULL); processed = 0; - while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL) + + while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); - /* Deconstruct the tuple ... faster than repeated heap_getattr */ - heap_deform_tuple(tuple, tupDesc, values, nulls); + /* Deconstruct the tuple ... */ + slot_getallattrs(slot); /* Format and send the data */ - CopyOneRowTo(cstate, values, nulls); + CopyOneRowTo(cstate, slot->tts_values, slot->tts_isnull); processed++; } - heap_endscan(scandesc); - - pfree(values); - pfree(nulls); + ExecDropSingleTupleTableSlot(slot); + table_endscan(scandesc); } else { @@ -2284,17 +2279,13 @@ limit_printout_length(const char *str) uint64 CopyFrom(CopyState cstate) { - HeapTuple tuple; - TupleDesc tupDesc; - Datum *values; - bool *nulls; ResultRelInfo *resultRelInfo; ResultRelInfo *target_resultRelInfo; ResultRelInfo *prevResultRelInfo = NULL; EState *estate = CreateExecutorState(); /* for ExecConstraints() */ ModifyTableState *mtstate; ExprContext *econtext; - TupleTableSlot *myslot; + TupleTableSlot *singleslot; MemoryContext oldcontext = CurrentMemoryContext; PartitionTupleRouting *proute = NULL; @@ -2302,8 +2293,8 @@ CopyFrom(CopyState cstate) ErrorContextCallback errcallback; CommandId mycid = GetCurrentCommandId(true); int hi_options = 0; /* start with default heap_insert options */ - BulkInsertState bistate; CopyInsertMethod insertMethod; + BulkInsertState bistate; uint64 processed = 0; int nBufferedTuples = 0; bool has_before_insert_row_trig; @@ -2312,8 +2303,8 @@ CopyFrom(CopyState cstate) #define MAX_BUFFERED_TUPLES 1000 #define RECHECK_MULTI_INSERT_THRESHOLD 1000 - HeapTuple *bufferedTuples = NULL; /* initialize to silence warning */ - Size bufferedTuplesSize = 0; + TupleTableSlot **bufferedSlots = NULL; /* initialize to silence warning */ + Size bufferedSlotsSize = 0; uint64 firstBufferedLineNo = 0; uint64 lastPartitionSampleLineNo = 0; uint64 nPartitionChanges = 0; @@ -2355,8 +2346,6 @@ CopyFrom(CopyState cstate) RelationGetRelationName(cstate->rel)))); } - tupDesc = RelationGetDescr(cstate->rel); - /*---------- * Check to see if we can avoid writing WAL * @@ -2486,13 +2475,6 @@ CopyFrom(CopyState cstate) ExecInitRangeTable(estate, cstate->range_table); - /* Set up a tuple slot too */ - myslot = ExecInitExtraTupleSlot(estate, tupDesc, - &TTSOpsHeapTuple); - /* Triggers might need a slot as well */ - estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL, - &TTSOpsHeapTuple); - /* * Set up a ModifyTableState so we can let FDW(s) init themselves for * foreign-table result relation(s). @@ -2611,7 +2593,17 @@ CopyFrom(CopyState cstate) else insertMethod = CIM_MULTI; - bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple)); + bufferedSlots = palloc0(MAX_BUFFERED_TUPLES * sizeof(TupleTableSlot *)); + } + + /* + * If not using batch mode (which allocates slots as needed), Set up a + * tuple slot too. + */ + if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL) + { + singleslot = table_gimmegimmeslot(resultRelInfo->ri_RelationDesc, + &estate->es_tupleTable); } has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc && @@ -2628,9 +2620,6 @@ CopyFrom(CopyState cstate) */ ExecBSInsertTriggers(estate, resultRelInfo); - values = (Datum *) palloc(tupDesc->natts * sizeof(Datum)); - nulls = (bool *) palloc(tupDesc->natts * sizeof(bool)); - bistate = GetBulkInsertState(); econtext = GetPerTupleExprContext(estate); @@ -2642,7 +2631,7 @@ CopyFrom(CopyState cstate) for (;;) { - TupleTableSlot *slot; + TupleTableSlot *myslot; bool skip_tuple; CHECK_FOR_INTERRUPTS(); @@ -2657,28 +2646,46 @@ CopyFrom(CopyState cstate) ResetPerTupleExprContext(estate); } + if (insertMethod == CIM_SINGLE || proute) + { + myslot = singleslot; + Assert(myslot != NULL); + } + else + { + if (bufferedSlots[nBufferedTuples] == NULL) + { + const TupleTableSlotOps *tts_cb; + + tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc); + + bufferedSlots[nBufferedTuples] = + MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc), + tts_cb); + } + myslot = bufferedSlots[nBufferedTuples]; + } + /* Switch into its memory context */ MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - if (!NextCopyFrom(cstate, econtext, values, nulls)) + ExecClearTuple(myslot); + + /* Directly store the values/nulls array in the slot */ + if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull)) break; - /* And now we can form the input tuple. */ - tuple = heap_form_tuple(tupDesc, values, nulls); + ExecStoreVirtualTuple(myslot); /* * Constraints might reference the tableoid column, so initialize * t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc); + myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc); /* Triggers and stuff need to be invoked in query context. */ MemoryContextSwitchTo(oldcontext); - /* Place tuple in tuple slot --- but slot shouldn't free it */ - slot = myslot; - ExecStoreHeapTuple(tuple, slot, false); - /* Determine the partition to heap_insert the tuple into */ if (proute) { @@ -2690,7 +2697,7 @@ CopyFrom(CopyState cstate) * if the found partition is not suitable for INSERTs. */ resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo, - proute, slot, estate); + proute, myslot, estate); if (prevResultRelInfo != resultRelInfo) { @@ -2707,11 +2714,19 @@ CopyFrom(CopyState cstate) ExprContext *swapcontext; CopyFromInsertBatch(cstate, estate, mycid, hi_options, - prevResultRelInfo, myslot, bistate, - nBufferedTuples, bufferedTuples, + prevResultRelInfo, bistate, + nBufferedTuples, bufferedSlots, firstBufferedLineNo); nBufferedTuples = 0; - bufferedTuplesSize = 0; + + /* force new slots to be used */ + for (int i = 0; i < MAX_BUFFERED_TUPLES; i++) + { + if (bufferedSlots[i] == NULL) + continue; + ExecDropSingleTupleTableSlot(bufferedSlots[i]); + bufferedSlots[i] = NULL; + } Assert(secondaryExprContext); @@ -2820,36 +2835,57 @@ CopyFrom(CopyState cstate) * Otherwise, just remember the original unconverted * tuple, to avoid a needless round trip conversion. */ - cstate->transition_capture->tcs_original_insert_tuple = tuple; + cstate->transition_capture->tcs_original_insert_tuple = + ExecFetchSlotHeapTuple(myslot, false, NULL); cstate->transition_capture->tcs_map = NULL; } } + /* * We might need to convert from the root rowtype to the partition * rowtype. */ map = resultRelInfo->ri_PartitionInfo->pi_RootToPartitionMap; - if (map != NULL) + if (insertMethod == CIM_SINGLE || + (insertMethod == CIM_MULTI_CONDITIONAL && !leafpart_use_multi_insert)) + { + if (map != NULL) + { + TupleTableSlot *new_slot; + + new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot; + myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot); + } + } + else if (insertMethod == CIM_MULTI_CONDITIONAL) { TupleTableSlot *new_slot; - MemoryContext oldcontext; - new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot; - Assert(new_slot != NULL); + if (bufferedSlots[nBufferedTuples] == NULL) + { + const TupleTableSlotOps *tts_cb; + + tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc); + bufferedSlots[nBufferedTuples] = + MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc), + tts_cb); + } - slot = execute_attr_map_slot(map->attrMap, slot, new_slot); + new_slot = bufferedSlots[nBufferedTuples]; - /* - * Get the tuple in the per-tuple context, so that it will be - * freed after each batch insert. - */ - oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - tuple = ExecCopySlotHeapTuple(slot); - MemoryContextSwitchTo(oldcontext); + if (map != NULL) + myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot); + else + { + ExecCopySlot(new_slot, myslot); + myslot = new_slot; + } + } + else + { + elog(ERROR, "huh"); } - - tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); } skip_tuple = false; @@ -2857,12 +2893,8 @@ CopyFrom(CopyState cstate) /* BEFORE ROW INSERT Triggers */ if (has_before_insert_row_trig) { - slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); - - if (slot == NULL) /* "do nothing" */ - skip_tuple = true; - else /* trigger might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot)) + skip_tuple = true; /* "do nothing" */ } if (!skip_tuple) @@ -2870,7 +2902,7 @@ CopyFrom(CopyState cstate) if (has_instead_insert_row_trig) { /* Pass the data to the INSTEAD ROW INSERT trigger */ - ExecIRInsertTriggers(estate, resultRelInfo, slot); + ExecIRInsertTriggers(estate, resultRelInfo, myslot); } else { @@ -2880,7 +2912,7 @@ CopyFrom(CopyState cstate) */ if (resultRelInfo->ri_FdwRoutine == NULL && resultRelInfo->ri_RelationDesc->rd_att->constr) - ExecConstraints(resultRelInfo, slot, estate); + ExecConstraints(resultRelInfo, myslot, estate); /* * Also check the tuple against the partition constraint, if @@ -2890,7 +2922,7 @@ CopyFrom(CopyState cstate) */ if (resultRelInfo->ri_PartitionCheck && (proute == NULL || has_before_insert_row_trig)) - ExecPartitionCheck(resultRelInfo, slot, estate, true); + ExecPartitionCheck(resultRelInfo, myslot, estate, true); /* * Perform multi-inserts when enabled, or when loading a @@ -2902,8 +2934,10 @@ CopyFrom(CopyState cstate) /* Add this tuple to the tuple buffer */ if (nBufferedTuples == 0) firstBufferedLineNo = cstate->cur_lineno; - bufferedTuples[nBufferedTuples++] = tuple; - bufferedTuplesSize += tuple->t_len; + + Assert(bufferedSlots[nBufferedTuples] == myslot); + nBufferedTuples++; + bufferedSlotsSize += cstate->line_buf.len; /* * If the buffer filled up, flush it. Also flush if the @@ -2912,14 +2946,14 @@ CopyFrom(CopyState cstate) * buffer when the tuples are exceptionally wide. */ if (nBufferedTuples == MAX_BUFFERED_TUPLES || - bufferedTuplesSize > 65535) + bufferedSlotsSize > 65535) { CopyFromInsertBatch(cstate, estate, mycid, hi_options, - resultRelInfo, myslot, bistate, - nBufferedTuples, bufferedTuples, + resultRelInfo, bistate, + nBufferedTuples, bufferedSlots, firstBufferedLineNo); nBufferedTuples = 0; - bufferedTuplesSize = 0; + bufferedSlotsSize = 0; } } else @@ -2929,39 +2963,38 @@ CopyFrom(CopyState cstate) /* OK, store the tuple */ if (resultRelInfo->ri_FdwRoutine != NULL) { - slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate, - resultRelInfo, - slot, - NULL); + myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate, + resultRelInfo, + myslot, + NULL); - if (slot == NULL) /* "do nothing" */ + if (myslot == NULL) /* "do nothing" */ continue; /* next tuple please */ - /* FDW might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - /* * AFTER ROW Triggers might reference the tableoid * column, so initialize t_tableOid before evaluating * them. */ - tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); } else - heap_insert(resultRelInfo->ri_RelationDesc, tuple, - mycid, hi_options, bistate); + { + /* OK, store the tuple and create index entries for it */ + table_insert(resultRelInfo->ri_RelationDesc, myslot, mycid, hi_options, + bistate); + } /* And create index entries for it */ if (resultRelInfo->ri_NumIndices > 0) - recheckIndexes = ExecInsertIndexTuples(slot, - &(tuple->t_self), + recheckIndexes = ExecInsertIndexTuples(myslot, estate, false, NULL, NIL); /* AFTER ROW INSERT Triggers */ - ExecARInsertTriggers(estate, resultRelInfo, tuple, + ExecARInsertTriggers(estate, resultRelInfo, myslot, recheckIndexes, cstate->transition_capture); list_free(recheckIndexes); @@ -2983,21 +3016,33 @@ CopyFrom(CopyState cstate) if (insertMethod == CIM_MULTI_CONDITIONAL) { CopyFromInsertBatch(cstate, estate, mycid, hi_options, - prevResultRelInfo, myslot, bistate, - nBufferedTuples, bufferedTuples, + prevResultRelInfo, bistate, + nBufferedTuples, bufferedSlots, firstBufferedLineNo); } else CopyFromInsertBatch(cstate, estate, mycid, hi_options, - resultRelInfo, myslot, bistate, - nBufferedTuples, bufferedTuples, + resultRelInfo, bistate, + nBufferedTuples, bufferedSlots, firstBufferedLineNo); } + /* free slots */ + if (bufferedSlots) + { + for (int i = 0; i < MAX_BUFFERED_TUPLES; i++) + { + if (bufferedSlots[i] == NULL) + continue; + ExecDropSingleTupleTableSlot(bufferedSlots[i]); + bufferedSlots[i] = NULL; + } + } + /* Done, clean up */ error_context_stack = errcallback.previous; - FreeBulkInsertState(bistate); + ReleaseBulkInsertStatePin(bistate); MemoryContextSwitchTo(oldcontext); @@ -3014,9 +3059,6 @@ CopyFrom(CopyState cstate) /* Handle queued AFTER triggers */ AfterTriggerEndQuery(estate); - pfree(values); - pfree(nulls); - ExecResetTupleTable(estate->es_tupleTable, false); /* Allow the FDW to shut down */ @@ -3041,7 +3083,7 @@ CopyFrom(CopyState cstate) * indexes since those use WAL anyway) */ if (hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(cstate->rel); + table_sync(cstate->rel); return processed; } @@ -3054,8 +3096,7 @@ CopyFrom(CopyState cstate) static void CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, int hi_options, ResultRelInfo *resultRelInfo, - TupleTableSlot *myslot, BulkInsertState bistate, - int nBufferedTuples, HeapTuple *bufferedTuples, + BulkInsertState bistate, int nBufferedTuples, TupleTableSlot **bufferedSlots, uint64 firstBufferedLineNo) { MemoryContext oldcontext; @@ -3075,12 +3116,12 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, * before calling it. */ oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - heap_multi_insert(resultRelInfo->ri_RelationDesc, - bufferedTuples, - nBufferedTuples, - mycid, - hi_options, - bistate); + table_multi_insert(resultRelInfo->ri_RelationDesc, + bufferedSlots, + nBufferedTuples, + mycid, + hi_options, + bistate); MemoryContextSwitchTo(oldcontext); /* @@ -3094,12 +3135,10 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, List *recheckIndexes; cstate->cur_lineno = firstBufferedLineNo + i; - ExecStoreHeapTuple(bufferedTuples[i], myslot, false); recheckIndexes = - ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self), - estate, false, NULL, NIL); + ExecInsertIndexTuples(bufferedSlots[i], estate, false, NULL, NIL); ExecARInsertTriggers(estate, resultRelInfo, - bufferedTuples[i], + bufferedSlots[i], recheckIndexes, cstate->transition_capture); list_free(recheckIndexes); } @@ -3117,7 +3156,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, { cstate->cur_lineno = firstBufferedLineNo + i; ExecARInsertTriggers(estate, resultRelInfo, - bufferedTuples[i], + bufferedSlots[i], NIL, cstate->transition_capture); } } diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index d01b258b65..d346bf0749 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -26,6 +26,7 @@ #include "access/reloptions.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -59,7 +60,8 @@ typedef struct ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ CommandId output_cid; /* cmin to insert in output tuples */ int hi_options; /* heap_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ + void *bistate; /* bulk insert state */ + TupleTableSlot *slot; } DR_intorel; /* utility functions for CTAS definition creation */ @@ -107,6 +109,9 @@ create_ctas_internal(List *attrList, IntoClause *into) create->oncommit = into->onCommit; create->tablespacename = into->tableSpaceName; create->if_not_exists = false; + create->accessMethod = into->accessMethod; + + // PBORKED: toast options /* * Create the relation. (This will error out if there's an existing view, @@ -550,6 +555,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) myState->rel = intoRelationDesc; myState->reladdr = intoRelationAddr; myState->output_cid = GetCurrentCommandId(true); + myState->slot = table_gimmegimmeslot(intoRelationDesc, NULL); /* * We can skip WAL-logging the insertions, unless PITR or streaming @@ -570,19 +576,21 @@ static bool intorel_receive(TupleTableSlot *slot, DestReceiver *self) { DR_intorel *myState = (DR_intorel *) self; - HeapTuple tuple; /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy + * Ensure input tuple is the right format for the target relation. */ - tuple = ExecCopySlotHeapTuple(slot); + if (slot->tts_ops != myState->slot->tts_ops) + { + ExecCopySlot(myState->slot, slot); + slot = myState->slot; + } - heap_insert(myState->rel, - tuple, - myState->output_cid, - myState->hi_options, - myState->bistate); + table_insert(myState->rel, + slot, + myState->output_cid, + myState->hi_options, + myState->bistate); /* We know this is a newly created relation, so there are no indexes */ @@ -597,11 +605,12 @@ intorel_shutdown(DestReceiver *self) { DR_intorel *myState = (DR_intorel *) self; + ExecDropSingleTupleTableSlot(myState->slot); FreeBulkInsertState(myState->bistate); /* If we skipped using WAL, must heap_sync before commit */ if (myState->hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(myState->rel); + table_sync(myState->rel); /* close rel, but keep lock until commit */ heap_close(myState->rel, NoLock); diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index f640f46972..e198a7319c 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -26,6 +26,7 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xloginsert.h" #include "access/xlogutils.h" @@ -98,7 +99,7 @@ static int errdetail_busy_db(int notherbackends, int npreparedxacts); Oid createdb(ParseState *pstate, const CreatedbStmt *stmt) { - HeapScanDesc scan; + TableScanDesc scan; Relation rel; Oid src_dboid; Oid src_owner; @@ -590,8 +591,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * each one to the new database. */ rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid srctablespace = spaceform->oid; @@ -644,7 +645,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); /* @@ -1871,12 +1872,12 @@ static void remove_dbtablespaces(Oid db_id) { Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid dsttablespace = spcform->oid; @@ -1918,7 +1919,7 @@ remove_dbtablespaces(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); } @@ -1939,12 +1940,12 @@ check_db_file_conflict(Oid db_id) { bool result = false; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(rel, 0, NULL); + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple); Oid dsttablespace = spcform->oid; @@ -1968,7 +1969,7 @@ check_db_file_conflict(Oid db_id) pfree(dstpath); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 6c06167fb2..1420c12af8 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -18,6 +18,7 @@ #include "access/amapi.h" #include "access/htup_details.h" #include "access/reloptions.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -2338,7 +2339,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, { Oid objectOid; Relation relationRelation; - HeapScanDesc scan; + TableScanDesc scan; ScanKeyData scan_keys[1]; HeapTuple tuple; MemoryContext private_context; @@ -2412,8 +2413,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * rels will be processed indirectly by reindex_relation). */ relationRelation = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys); + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple); Oid relid = classtuple->oid; @@ -2471,7 +2472,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, MemoryContextSwitchTo(old); } - heap_endscan(scan); + table_endscan(scan); heap_close(relationRelation, AccessShareLock); /* Now reindex each rel in a separate transaction */ diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index a171ebabf8..20cc2ae597 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -16,6 +16,7 @@ #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog.h" #include "catalog/catalog.h" @@ -52,7 +53,8 @@ typedef struct Relation transientrel; /* relation to write to */ CommandId output_cid; /* cmin to insert in output tuples */ int hi_options; /* heap_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ + void *bistate; /* bulk insert state */ + TupleTableSlot *slot; } DR_transientrel; static int matview_maintenance_depth = 0; @@ -454,6 +456,7 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) */ myState->transientrel = transientrel; myState->output_cid = GetCurrentCommandId(true); + myState->slot = table_gimmegimmeslot(transientrel, NULL); /* * We can skip WAL-logging the insertions, unless PITR or streaming @@ -475,25 +478,24 @@ static bool transientrel_receive(TupleTableSlot *slot, DestReceiver *self) { DR_transientrel *myState = (DR_transientrel *) self; - HeapTuple tuple; /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy + * Ensure input tuple is the right format for the target relation. */ - tuple = ExecCopySlotHeapTuple(slot); + if (slot->tts_ops != myState->slot->tts_ops) + { + ExecCopySlot(myState->slot, slot); + slot = myState->slot; + } - heap_insert(myState->transientrel, - tuple, - myState->output_cid, - myState->hi_options, - myState->bistate); + table_insert(myState->transientrel, + slot, + myState->output_cid, + myState->hi_options, + myState->bistate); /* We know this is a newly created relation, so there are no indexes */ - /* Free the copied tuple. */ - heap_freetuple(tuple); - return true; } @@ -505,11 +507,12 @@ transientrel_shutdown(DestReceiver *self) { DR_transientrel *myState = (DR_transientrel *) self; + ExecDropSingleTupleTableSlot(myState->slot); FreeBulkInsertState(myState->bistate); /* If we skipped using WAL, must heap_sync before commit */ if (myState->hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(myState->transientrel); + table_sync(myState->transientrel); /* close transientrel, but keep lock until commit */ heap_close(myState->transientrel, NoLock); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index d6d0de1b01..20f1e5f2c6 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -20,6 +20,7 @@ #include "access/multixact.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/tupconvert.h" #include "access/xact.h" @@ -536,6 +537,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, static char *validnsps[] = HEAP_RELOPT_NAMESPACES; Oid ofTypeId; ObjectAddress address; + const char *accessMethod = NULL; + Oid accessMethodId = InvalidOid; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -717,6 +720,35 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, attr->attidentity = colDef->identity; } + /* + * If the statement hasn't specified an access method, but we're defining + * a type of relation that needs one, use the default. + */ + if (stmt->accessMethod != NULL) + accessMethod = stmt->accessMethod; + else if (relkind == RELKIND_RELATION || + relkind == RELKIND_TOASTVALUE || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_PARTITIONED_TABLE) + accessMethod = default_table_access_method; + + /* + * look up the access method, verify it can handle the requested features + */ + if (accessMethod != NULL) + { + HeapTuple tuple; + + tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethod)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("table access method \"%s\" does not exist", + accessMethod))); + accessMethodId = ((Form_pg_am) GETSTRUCT(tuple))->oid; + ReleaseSysCache(tuple); + } + /* * Create the relation. Inherited defaults and constraints are passed in * for immediate handling --- since they don't need parsing, they can be @@ -729,6 +761,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, InvalidOid, ofTypeId, ownerId, + accessMethodId, descriptor, list_concat(cookedDefaults, old_constraints), @@ -1576,6 +1609,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, */ CheckTableForSerializableConflictIn(rel); + // PBORKED: Need to abstract this minmulti = GetOldestMultiXactId(); /* @@ -1584,6 +1618,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, * Create a new empty storage file for the relation, and assign it * as the relfilenode value. The old storage file is scheduled for * deletion at commit. + * + * PBORKED: needs to be a callback */ RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence, RecentXmin, minmulti); @@ -4523,7 +4559,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) ListCell *l; EState *estate; CommandId mycid; - BulkInsertState bistate; + void *bistate; int hi_options; ExprState *partqualstate = NULL; @@ -4627,12 +4663,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) if (newrel || needscan) { ExprContext *econtext; - Datum *values; - bool *isnull; TupleTableSlot *oldslot; TupleTableSlot *newslot; - HeapScanDesc scan; - HeapTuple tuple; + TableScanDesc scan; MemoryContext oldCxt; List *dropped_attrs = NIL; ListCell *lc; @@ -4664,15 +4697,16 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) * tuples are the same, the tupDescs might not be (consider ADD COLUMN * without a default). */ - oldslot = MakeSingleTupleTableSlot(oldTupDesc, &TTSOpsHeapTuple); - newslot = MakeSingleTupleTableSlot(newTupDesc, &TTSOpsHeapTuple); + // PBORKED: Explain about using oldTupDesc when not rewriting + oldslot = MakeSingleTupleTableSlot(tab->rewrite > 0 ? oldTupDesc : newTupDesc, + table_slot_callbacks(oldrel)); + newslot = MakeSingleTupleTableSlot(newTupDesc, + table_slot_callbacks(newrel ? newrel : oldrel)); - /* Preallocate values/isnull arrays */ - i = Max(newTupDesc->natts, oldTupDesc->natts); - values = (Datum *) palloc(i * sizeof(Datum)); - isnull = (bool *) palloc(i * sizeof(bool)); - memset(values, 0, i * sizeof(Datum)); - memset(isnull, true, i * sizeof(bool)); + memset(newslot->tts_values, 0, + sizeof(Datum) * newTupDesc->natts); + memset(newslot->tts_isnull, 0, + sizeof(bool) * newTupDesc->natts); /* * Any attributes that are dropped according to the new tuple @@ -4690,7 +4724,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) * checking all the constraints. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(oldrel, snapshot, 0, NULL); + scan = table_beginscan(oldrel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -4698,55 +4732,69 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) */ oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot)) { + TupleTableSlot *insertslot; + if (tab->rewrite > 0) { /* Extract data from old tuple */ - heap_deform_tuple(tuple, oldTupDesc, values, isnull); + slot_getallattrs(oldslot); + ExecClearTuple(newslot); + + /* copy attributes */ + memcpy(newslot->tts_values, oldslot->tts_values, + sizeof(Datum) * oldslot->tts_nvalid); + memcpy(newslot->tts_isnull, oldslot->tts_isnull, + sizeof(bool) * oldslot->tts_nvalid); /* Set dropped attributes to null in new tuple */ foreach(lc, dropped_attrs) - isnull[lfirst_int(lc)] = true; + newslot->tts_isnull[lfirst_int(lc)] = true; /* * Process supplied expressions to replace selected columns. * Expression inputs come from the old tuple. */ - ExecStoreHeapTuple(tuple, oldslot, false); econtext->ecxt_scantuple = oldslot; foreach(l, tab->newvals) { NewColumnValue *ex = lfirst(l); - values[ex->attnum - 1] = ExecEvalExpr(ex->exprstate, - econtext, - &isnull[ex->attnum - 1]); + newslot->tts_values[ex->attnum - 1] + = ExecEvalExpr(ex->exprstate, + econtext, + &newslot->tts_isnull[ex->attnum - 1]); } - /* - * Form the new tuple. Note that we don't explicitly pfree it, - * since the per-tuple memory context will be reset shortly. - */ - tuple = heap_form_tuple(newTupDesc, values, isnull); + ExecStoreVirtualTuple(newslot); /* * Constraints might reference the tableoid column, so * initialize t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(oldrel); + newslot->tts_tableOid = RelationGetRelid(oldrel); + insertslot = newslot; + } + else + { + /* + * If there's no rewrite, old and new table are guaranteed to + * have the same AM, so we can just use the old slot to + * verify new constraints etc. + */ + insertslot = oldslot; } /* Now check any constraints on the possibly-changed tuple */ - ExecStoreHeapTuple(tuple, newslot, false); - econtext->ecxt_scantuple = newslot; + econtext->ecxt_scantuple = insertslot; foreach(l, notnull_attrs) { int attn = lfirst_int(l); - if (heap_attisnull(tuple, attn + 1, newTupDesc)) + if (slot_attisnull(insertslot, attn + 1)) { Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn); @@ -4795,7 +4843,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) /* Write the tuple out to the new relation */ if (newrel) - heap_insert(newrel, tuple, mycid, hi_options, bistate); + table_insert(newrel, insertslot, mycid, hi_options, bistate); ResetExprContext(econtext); @@ -4803,7 +4851,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(oldslot); @@ -4819,7 +4867,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) /* If we skipped writing WAL, then we need to sync the heap. */ if (hi_options & HEAP_INSERT_SKIP_WAL) - heap_sync(newrel); + table_sync(newrel); heap_close(newrel, NoLock); } @@ -5198,7 +5246,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be { Relation classRel; ScanKeyData key[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; List *result = NIL; @@ -5209,9 +5257,9 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(typeOid)); - scan = heap_beginscan_catalog(classRel, 1, key); + scan = table_beginscan_catalog(classRel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple); @@ -5225,7 +5273,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be result = lappend_oid(result, classform->oid); } - heap_endscan(scan); + table_endscan(scan); heap_close(classRel, AccessShareLock); return result; @@ -8323,9 +8371,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) char *conbin; Expr *origexpr; ExprState *exprstate; - TupleDesc tupdesc; - HeapScanDesc scan; - HeapTuple tuple; + TableScanDesc scan; ExprContext *econtext; MemoryContext oldcxt; TupleTableSlot *slot; @@ -8360,12 +8406,11 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) exprstate = ExecPrepareExpr(origexpr, estate); econtext = GetPerTupleExprContext(estate); - tupdesc = RelationGetDescr(rel); - slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple); + slot = table_gimmegimmeslot(rel, NULL); econtext->ecxt_scantuple = slot; snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); /* * Switch to per-tuple memory context and reset it for each tuple @@ -8373,10 +8418,8 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) */ oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { - ExecStoreHeapTuple(tuple, slot, false); - if (!ExecCheck(exprstate, econtext)) ereport(ERROR, (errcode(ERRCODE_CHECK_VIOLATION), @@ -8388,7 +8431,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup) } MemoryContextSwitchTo(oldcxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(slot); FreeExecutorState(estate); @@ -8407,8 +8450,8 @@ validateForeignKeyConstraint(char *conname, Oid pkindOid, Oid constraintOid) { - HeapScanDesc scan; - HeapTuple tuple; + TableScanDesc scan; + TupleTableSlot *slot; Trigger trig; Snapshot snapshot; @@ -8443,9 +8486,10 @@ validateForeignKeyConstraint(char *conname, * ereport(ERROR) and that's that. */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); + scan = table_beginscan(rel, snapshot, 0, NULL); + slot = table_gimmegimmeslot(rel, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { FunctionCallInfoData fcinfo; TriggerData trigdata; @@ -8463,19 +8507,19 @@ validateForeignKeyConstraint(char *conname, trigdata.type = T_TriggerData; trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW; trigdata.tg_relation = rel; - trigdata.tg_trigtuple = tuple; + trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL); + trigdata.tg_trigslot = slot; trigdata.tg_newtuple = NULL; trigdata.tg_trigger = &trig; - trigdata.tg_trigtuplebuf = scan->rs_cbuf; - trigdata.tg_newtuplebuf = InvalidBuffer; fcinfo.context = (Node *) &trigdata; RI_FKey_check_ins(&fcinfo); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); + ExecDropSingleTupleTableSlot(slot); } static void @@ -11001,7 +11045,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) ListCell *l; ScanKeyData key[1]; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; Oid orig_tablespaceoid; Oid new_tablespaceoid; @@ -11066,8 +11110,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) ObjectIdGetDatum(orig_tablespaceoid)); rel = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 1, key); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan_catalog(rel, 1, key); + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); Oid relOid = relForm->oid; @@ -11125,7 +11169,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) relations = lappend_oid(relations, relOid); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); if (relations == NIL) diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 4a714f6e2b..ca429731d4 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -53,6 +53,7 @@ #include "access/heapam.h" #include "access/reloptions.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -406,7 +407,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) { #ifdef HAVE_SYMLINK char *tablespacename = stmt->tablespacename; - HeapScanDesc scandesc; + TableScanDesc scandesc; Relation rel; HeapTuple tuple; Form_pg_tablespace spcform; @@ -422,8 +423,8 @@ DropTableSpace(DropTableSpaceStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_scan_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) { @@ -440,7 +441,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) (errmsg("tablespace \"%s\" does not exist, skipping", tablespacename))); /* XXX I assume I need one or both of these next two calls */ - heap_endscan(scandesc); + table_endscan(scandesc); heap_close(rel, NoLock); } return; @@ -468,7 +469,7 @@ DropTableSpace(DropTableSpaceStmt *stmt) */ CatalogTupleDelete(rel, &tuple->t_self); - heap_endscan(scandesc); + table_endscan(scandesc); /* * Remove any comments or security labels on this tablespace. @@ -919,7 +920,7 @@ RenameTableSpace(const char *oldname, const char *newname) Oid tspId; Relation rel; ScanKeyData entry[1]; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; HeapTuple newtuple; Form_pg_tablespace newform; @@ -932,8 +933,8 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(oldname)); - scan = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scan, ForwardScanDirection); + scan = table_beginscan_catalog(rel, 1, entry); + tup = heap_scan_getnext(scan, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -944,7 +945,7 @@ RenameTableSpace(const char *oldname, const char *newname) newform = (Form_pg_tablespace) GETSTRUCT(newtuple); tspId = newform->oid; - heap_endscan(scan); + table_endscan(scan); /* Must be owner */ if (!pg_tablespace_ownercheck(tspId, GetUserId())) @@ -962,15 +963,15 @@ RenameTableSpace(const char *oldname, const char *newname) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(newname)); - scan = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scan, ForwardScanDirection); + scan = table_beginscan_catalog(rel, 1, entry); + tup = heap_scan_getnext(scan, ForwardScanDirection); if (HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", newname))); - heap_endscan(scan); + table_endscan(scan); /* OK, update the entry */ namestrcpy(&(newform->spcname), newname); @@ -994,7 +995,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) { Relation rel; ScanKeyData entry[1]; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tup; Oid tablespaceoid; Datum datum; @@ -1012,8 +1013,8 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(stmt->tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tup = heap_getnext(scandesc, ForwardScanDirection); + scandesc = table_beginscan_catalog(rel, 1, entry); + tup = heap_scan_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tup)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), @@ -1054,7 +1055,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt) heap_freetuple(newtuple); /* Conclude heap scan. */ - heap_endscan(scandesc); + table_endscan(scandesc); heap_close(rel, NoLock); return tablespaceoid; @@ -1388,7 +1389,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) { Oid result; Relation rel; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; @@ -1403,8 +1404,8 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_scan_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ if (HeapTupleIsValid(tuple)) @@ -1412,7 +1413,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok) else result = InvalidOid; - heap_endscan(scandesc); + table_endscan(scandesc); heap_close(rel, AccessShareLock); if (!OidIsValid(result) && !missing_ok) @@ -1434,7 +1435,7 @@ get_tablespace_name(Oid spc_oid) { char *result; Relation rel; - HeapScanDesc scandesc; + TableScanDesc scandesc; HeapTuple tuple; ScanKeyData entry[1]; @@ -1449,8 +1450,8 @@ get_tablespace_name(Oid spc_oid) Anum_pg_tablespace_oid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(spc_oid)); - scandesc = heap_beginscan_catalog(rel, 1, entry); - tuple = heap_getnext(scandesc, ForwardScanDirection); + scandesc = table_beginscan_catalog(rel, 1, entry); + tuple = heap_scan_getnext(scandesc, ForwardScanDirection); /* We assume that there can be at most one matching tuple */ if (HeapTupleIsValid(tuple)) @@ -1458,7 +1459,7 @@ get_tablespace_name(Oid spc_oid) else result = NULL; - heap_endscan(scandesc); + table_endscan(scandesc); heap_close(rel, AccessShareLock); return result; diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index bcdd86ce92..6a00a96f59 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -15,6 +15,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/htup_details.h" #include "access/xact.h" @@ -80,16 +81,18 @@ static int MyTriggerDepth = 0; /* Local function prototypes */ static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid); static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger); -static HeapTuple GetTupleForTrigger(EState *estate, +static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, ItemPointer tid, LockTupleMode lockmode, - TupleTableSlot **newSlot); + TupleTableSlot *oldslot, + TupleTableSlot *newslot, + bool *is_epq); static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo, Trigger *trigger, TriggerEvent event, Bitmapset *modifiedCols, - HeapTuple oldtup, HeapTuple newtup); + TupleTableSlot *oldslot, TupleTableSlot *newslot); static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata, int tgindx, FmgrInfo *finfo, @@ -97,7 +100,7 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata, MemoryContext per_tuple_context); static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, int event, bool row_trigger, - HeapTuple oldtup, HeapTuple newtup, + TupleTableSlot *oldtup, TupleTableSlot *newtup, List *recheckIndexes, Bitmapset *modifiedCols, TransitionCaptureState *transition_capture); static void AfterTriggerEnlargeQueryState(void); @@ -2470,10 +2473,11 @@ ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo) LocTriggerData.tg_relation = relinfo->ri_RelationDesc; LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2513,7 +2517,7 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo, false, NULL, NULL, NIL, NULL, transition_capture); } -TupleTableSlot * +bool ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TupleTableSlot *slot) { @@ -2530,10 +2534,13 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_EVENT_ROW | TRIGGER_EVENT_BEFORE; LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2544,65 +2551,54 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_TYPE_INSERT)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - NULL, NULL, newtuple)) + NULL, NULL, slot)) continue; + LocTriggerData.tg_trigslot = slot; LocTriggerData.tg_trigtuple = oldtuple = newtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; + LocTriggerData.tg_trigger = trigger; newtuple = ExecCallTriggerFunc(&LocTriggerData, i, relinfo->ri_TrigFunctions, relinfo->ri_TrigInstrument, GetPerTupleMemoryContext(estate)); - if (oldtuple != newtuple && oldtuple != slottuple) + if (false && oldtuple != newtuple && oldtuple != slottuple) heap_freetuple(oldtuple); if (newtuple == NULL) { if (should_free) heap_freetuple(slottuple); - return NULL; /* "do nothing" */ + return false; + } + if (newtuple != oldtuple) + { + ExecForceStoreHeapTuple(newtuple, slot); + newtuple = ExecFetchSlotHeapTuple(slot, true, NULL); } } - if (newtuple != slottuple) - { - /* - * Return the modified tuple using the es_trig_tuple_slot. We assume - * the tuple was allocated in per-tuple memory context, and therefore - * will go away by itself. The tuple table slot should not try to - * clear it. - */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; - TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc); - - if (newslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(newslot, tupdesc); - ExecStoreHeapTuple(newtuple, newslot, false); - slot = newslot; - } - - if (should_free) - heap_freetuple(slottuple); - return slot; + return true; } void ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, - HeapTuple trigtuple, List *recheckIndexes, + TupleTableSlot *slot, List *recheckIndexes, TransitionCaptureState *transition_capture) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; if ((trigdesc && trigdesc->trig_insert_after_row) || (transition_capture && transition_capture->tcs_insert_new_table)) + { AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT, - true, NULL, trigtuple, + true, NULL, slot, recheckIndexes, NULL, transition_capture); + } } -TupleTableSlot * +bool ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TupleTableSlot *slot) { @@ -2619,10 +2615,13 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_EVENT_ROW | TRIGGER_EVENT_INSTEAD; LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2633,47 +2632,33 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_TYPE_INSERT)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - NULL, NULL, newtuple)) + NULL, NULL, slot)) continue; + LocTriggerData.tg_trigslot = slot; LocTriggerData.tg_trigtuple = oldtuple = newtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; LocTriggerData.tg_trigger = trigger; newtuple = ExecCallTriggerFunc(&LocTriggerData, i, relinfo->ri_TrigFunctions, relinfo->ri_TrigInstrument, GetPerTupleMemoryContext(estate)); - if (oldtuple != newtuple && oldtuple != slottuple) + if (false && oldtuple != newtuple && oldtuple != slottuple) heap_freetuple(oldtuple); if (newtuple == NULL) { if (should_free) heap_freetuple(slottuple); - return NULL; /* "do nothing" */ + return false; /* "do nothing" */ + } + if (oldtuple != newtuple) + { + ExecForceStoreHeapTuple(newtuple, LocTriggerData.tg_trigslot); + newtuple = ExecFetchSlotHeapTuple(slot, true, NULL); } } - if (newtuple != slottuple) - { - /* - * Return the modified tuple using the es_trig_tuple_slot. We assume - * the tuple was allocated in per-tuple memory context, and therefore - * will go away by itself. The tuple table slot should not try to - * clear it. - */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; - TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc); - - if (newslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(newslot, tupdesc); - ExecStoreHeapTuple(newtuple, newslot, false); - slot = newslot; - } - - if (should_free) - heap_freetuple(slottuple); - return slot; + return true; } void @@ -2701,10 +2686,11 @@ ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo) LocTriggerData.tg_relation = relinfo->ri_RelationDesc; LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2758,20 +2744,20 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot) { + TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); TriggerDesc *trigdesc = relinfo->ri_TrigDesc; bool result = true; TriggerData LocTriggerData; HeapTuple trigtuple; - HeapTuple newtuple; - TupleTableSlot *newSlot; int i; Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) { - trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid, - LockTupleExclusive, &newSlot); - if (trigtuple == NULL) + bool is_epqtuple; + + if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, + LockTupleExclusive, slot, NULL, &is_epqtuple)) return false; /* @@ -2779,27 +2765,36 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, * function requested for the updated tuple, skip the trigger * execution. */ - if (newSlot != NULL && epqslot != NULL) + if (is_epqtuple && epqslot != NULL) { - *epqslot = newSlot; - heap_freetuple(trigtuple); + *epqslot = slot; return false; } + + trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL); + } else + { trigtuple = fdw_trigtuple; + ExecForceStoreHeapTuple(trigtuple, slot); + } LocTriggerData.type = T_TriggerData; LocTriggerData.tg_event = TRIGGER_EVENT_DELETE | TRIGGER_EVENT_ROW | TRIGGER_EVENT_BEFORE; LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { + HeapTuple newtuple; Trigger *trigger = &trigdesc->triggers[i]; if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, @@ -2808,11 +2803,11 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, TRIGGER_TYPE_DELETE)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - NULL, trigtuple, NULL)) + NULL, slot, NULL)) continue; + LocTriggerData.tg_trigslot = slot; LocTriggerData.tg_trigtuple = trigtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; LocTriggerData.tg_trigger = trigger; newtuple = ExecCallTriggerFunc(&LocTriggerData, i, @@ -2824,10 +2819,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, result = false; /* tell caller to suppress delete */ break; } - if (newtuple != trigtuple) + if (false && newtuple != trigtuple) heap_freetuple(newtuple); } - if (trigtuple != fdw_trigtuple) + if (false && trigtuple != fdw_trigtuple) heap_freetuple(trigtuple); return result; @@ -2840,28 +2835,31 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, TransitionCaptureState *transition_capture) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); if ((trigdesc && trigdesc->trig_delete_after_row) || (transition_capture && transition_capture->tcs_delete_old_table)) { - HeapTuple trigtuple; - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) - trigtuple = GetTupleForTrigger(estate, - NULL, - relinfo, - tupleid, - LockTupleExclusive, - NULL); + { + GetTupleForTrigger(estate, + NULL, + relinfo, + tupleid, + LockTupleExclusive, + slot, + NULL, + NULL); + } else - trigtuple = fdw_trigtuple; + { + ExecForceStoreHeapTuple(fdw_trigtuple, slot); + } AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE, - true, trigtuple, NULL, NIL, NULL, + true, slot, NULL, NIL, NULL, transition_capture); - if (trigtuple != fdw_trigtuple) - heap_freetuple(trigtuple); } } @@ -2870,8 +2868,8 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo, HeapTuple trigtuple) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); TriggerData LocTriggerData; - HeapTuple rettuple; int i; LocTriggerData.type = T_TriggerData; @@ -2879,12 +2877,18 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_EVENT_ROW | TRIGGER_EVENT_INSTEAD; LocTriggerData.tg_relation = relinfo->ri_RelationDesc; + LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + + ExecForceStoreHeapTuple(trigtuple, slot); + for (i = 0; i < trigdesc->numtriggers; i++) { + HeapTuple rettuple; Trigger *trigger = &trigdesc->triggers[i]; if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, @@ -2893,11 +2897,11 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_TYPE_DELETE)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - NULL, trigtuple, NULL)) + NULL, slot, NULL)) continue; + LocTriggerData.tg_trigslot = slot; LocTriggerData.tg_trigtuple = trigtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; LocTriggerData.tg_trigger = trigger; rettuple = ExecCallTriggerFunc(&LocTriggerData, i, @@ -2906,7 +2910,7 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo, GetPerTupleMemoryContext(estate)); if (rettuple == NULL) return false; /* Delete was suppressed */ - if (rettuple != trigtuple) + if (false && rettuple != trigtuple) heap_freetuple(rettuple); } return true; @@ -2940,10 +2944,11 @@ ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo) LocTriggerData.tg_relation = relinfo->ri_RelationDesc; LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -2985,20 +2990,19 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, transition_capture); } -TupleTableSlot * +bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, - TupleTableSlot *slot) + TupleTableSlot *newslot) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; - HeapTuple slottuple = ExecFetchSlotHeapTuple(slot, true, NULL); + TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); + HeapTuple slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL); HeapTuple newtuple = slottuple; TriggerData LocTriggerData; HeapTuple trigtuple; - HeapTuple oldtuple; - TupleTableSlot *newSlot; int i; Bitmapset *updatedCols; LockTupleMode lockmode; @@ -3009,37 +3013,41 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); if (fdw_trigtuple == NULL) { + bool is_epqtuple = false; + /* get a copy of the on-disk tuple we are planning to update */ - trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid, - lockmode, &newSlot); - if (trigtuple == NULL) - return NULL; /* cancel the update action */ + if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid, + lockmode, oldslot, newslot, &is_epqtuple)) + return false; /* cancel the update action */ + + /* + * In READ COMMITTED isolation level it's possible that target tuple was + * changed due to concurrent update. In that case we have a raw subplan + * output tuple in newSlot, and need to run it through the junk filter to + * produce an insertable tuple. + * + * Caution: more than likely, the passed-in slot is the same as the + * junkfilter's output slot, so we are clobbering the original value of + * slottuple by doing the filtering. This is OK since neither we nor our + * caller have any more interest in the prior contents of that slot. + */ + if (is_epqtuple) + { + TupleTableSlot *slot = ExecFilterJunk(relinfo->ri_junkFilter, newslot); + + ExecCopySlot(newslot, slot); + slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL); + newtuple = slottuple; + } + + trigtuple = ExecFetchSlotHeapTuple(oldslot, true, NULL); } else { + ExecForceStoreHeapTuple(fdw_trigtuple, oldslot); trigtuple = fdw_trigtuple; - newSlot = NULL; - } - - /* - * In READ COMMITTED isolation level it's possible that target tuple was - * changed due to concurrent update. In that case we have a raw subplan - * output tuple in newSlot, and need to run it through the junk filter to - * produce an insertable tuple. - * - * Caution: more than likely, the passed-in slot is the same as the - * junkfilter's output slot, so we are clobbering the original value of - * slottuple by doing the filtering. This is OK since neither we nor our - * caller have any more interest in the prior contents of that slot. - */ - if (newSlot != NULL) - { - slot = ExecFilterJunk(relinfo->ri_junkFilter, newSlot); - slottuple = ExecFetchSlotHeapTuple(slot, true, NULL); - newtuple = slottuple; } - LocTriggerData.type = T_TriggerData; LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE | TRIGGER_EVENT_ROW | @@ -3051,6 +3059,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; + HeapTuple oldtuple; if (!TRIGGER_TYPE_MATCHES(trigger->tgtype, TRIGGER_TYPE_ROW, @@ -3058,67 +3067,55 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, TRIGGER_TYPE_UPDATE)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - updatedCols, trigtuple, newtuple)) + updatedCols, oldslot, newslot)) continue; + LocTriggerData.tg_trigslot = oldslot; LocTriggerData.tg_trigtuple = trigtuple; LocTriggerData.tg_newtuple = oldtuple = newtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + LocTriggerData.tg_newslot = newslot; LocTriggerData.tg_trigger = trigger; newtuple = ExecCallTriggerFunc(&LocTriggerData, i, relinfo->ri_TrigFunctions, relinfo->ri_TrigInstrument, GetPerTupleMemoryContext(estate)); - if (oldtuple != newtuple && oldtuple != slottuple) + if (false && oldtuple != newtuple && oldtuple != slottuple) heap_freetuple(oldtuple); if (newtuple == NULL) { - if (trigtuple != fdw_trigtuple) + if (false && trigtuple != fdw_trigtuple) heap_freetuple(trigtuple); - return NULL; /* "do nothing" */ + return false; /* "do nothing" */ } + + if (newtuple != oldtuple) + ExecForceStoreHeapTuple(newtuple, newslot); } - if (trigtuple != fdw_trigtuple && trigtuple != newtuple) + if (false && trigtuple != fdw_trigtuple && trigtuple != newtuple) heap_freetuple(trigtuple); - if (newtuple != slottuple) - { - /* - * Return the modified tuple using the es_trig_tuple_slot. We assume - * the tuple was allocated in per-tuple memory context, and therefore - * will go away by itself. The tuple table slot should not try to - * clear it. - */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; - TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc); - - if (newslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(newslot, tupdesc); - ExecStoreHeapTuple(newtuple, newslot, false); - slot = newslot; - } - return slot; + return true; } void ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, - HeapTuple newtuple, + TupleTableSlot *newslot, List *recheckIndexes, TransitionCaptureState *transition_capture) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; + TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); + + ExecClearTuple(oldslot); if ((trigdesc && trigdesc->trig_update_after_row) || (transition_capture && (transition_capture->tcs_update_old_table || transition_capture->tcs_update_new_table))) { - HeapTuple trigtuple; - /* * Note: if the UPDATE is converted into a DELETE+INSERT as part of * update-partition-key operation, then this function is also called @@ -3126,30 +3123,31 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, * In such case, either old tuple or new tuple can be NULL. */ if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid)) - trigtuple = GetTupleForTrigger(estate, - NULL, - relinfo, - tupleid, - LockTupleExclusive, - NULL); - else - trigtuple = fdw_trigtuple; + GetTupleForTrigger(estate, + NULL, + relinfo, + tupleid, + LockTupleExclusive, + oldslot, + NULL, + NULL); + else if (fdw_trigtuple != NULL) + ExecForceStoreHeapTuple(fdw_trigtuple, oldslot); AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE, - true, trigtuple, newtuple, recheckIndexes, + true, oldslot, newslot, recheckIndexes, GetUpdatedColumns(relinfo, estate), transition_capture); - if (trigtuple != fdw_trigtuple) - heap_freetuple(trigtuple); } } -TupleTableSlot * +bool ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo, - HeapTuple trigtuple, TupleTableSlot *slot) + HeapTuple trigtuple, TupleTableSlot *newslot) { TriggerDesc *trigdesc = relinfo->ri_TrigDesc; - HeapTuple slottuple = ExecFetchSlotHeapTuple(slot, true, NULL); + TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc); + HeapTuple slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL); HeapTuple newtuple = slottuple; TriggerData LocTriggerData; HeapTuple oldtuple; @@ -3162,6 +3160,9 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo, LocTriggerData.tg_relation = relinfo->ri_RelationDesc; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; + + ExecForceStoreHeapTuple(trigtuple, oldslot); + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -3172,42 +3173,30 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo, TRIGGER_TYPE_UPDATE)) continue; if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event, - NULL, trigtuple, newtuple)) + NULL, oldslot, newslot)) continue; + LocTriggerData.tg_trigslot = oldslot; LocTriggerData.tg_trigtuple = trigtuple; + LocTriggerData.tg_newslot = newslot; LocTriggerData.tg_newtuple = oldtuple = newtuple; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + LocTriggerData.tg_trigger = trigger; newtuple = ExecCallTriggerFunc(&LocTriggerData, i, relinfo->ri_TrigFunctions, relinfo->ri_TrigInstrument, GetPerTupleMemoryContext(estate)); - if (oldtuple != newtuple && oldtuple != slottuple) + if (false && oldtuple != newtuple && oldtuple != slottuple) heap_freetuple(oldtuple); if (newtuple == NULL) - return NULL; /* "do nothing" */ - } - - if (newtuple != slottuple) - { - /* - * Return the modified tuple using the es_trig_tuple_slot. We assume - * the tuple was allocated in per-tuple memory context, and therefore - * will go away by itself. The tuple table slot should not try to - * clear it. - */ - TupleTableSlot *newslot = estate->es_trig_tuple_slot; - TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc); + return false; /* "do nothing" */ - if (newslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(newslot, tupdesc); - ExecStoreHeapTuple(newtuple, newslot, false); - slot = newslot; + if (oldtuple != newtuple) + ExecForceStoreHeapTuple(newtuple, newslot); } - return slot; + + return true; } void @@ -3230,10 +3219,11 @@ ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo) LocTriggerData.tg_relation = relinfo->ri_RelationDesc; LocTriggerData.tg_trigtuple = NULL; LocTriggerData.tg_newtuple = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; LocTriggerData.tg_oldtable = NULL; LocTriggerData.tg_newtable = NULL; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -3273,25 +3263,24 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo) } -static HeapTuple +static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, ItemPointer tid, LockTupleMode lockmode, - TupleTableSlot **newSlot) + TupleTableSlot *oldslot, + TupleTableSlot *newslot, + bool *is_epqtuple) { Relation relation = relinfo->ri_RelationDesc; - HeapTupleData tuple; - HeapTuple result; - Buffer buffer; - if (newSlot != NULL) + if (is_epqtuple) { HTSU_Result test; HeapUpdateFailureData hufd; - *newSlot = NULL; + *is_epqtuple = false; /* caller must pass an epqstate if EvalPlanQual is possible */ Assert(epqstate != NULL); @@ -3299,12 +3288,13 @@ GetTupleForTrigger(EState *estate, /* * lock tuple for update */ -ltrmark:; - tuple.t_self = *tid; - test = heap_lock_tuple(relation, &tuple, - estate->es_output_cid, - lockmode, LockWaitBlock, - false, &buffer, &hufd); + test = table_lock_tuple(relation, tid, estate->es_snapshot, oldslot, + estate->es_output_cid, + lockmode, LockWaitBlock, + IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION, + &hufd); + + // FIXME: result = tuple; switch (test) { case HeapTupleSelfUpdated: @@ -3324,103 +3314,71 @@ ltrmark:; errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows."))); /* treat it as deleted; do not process */ - ReleaseBuffer(buffer); - return NULL; + return false; case HeapTupleMayBeUpdated: - break; - - case HeapTupleUpdated: - ReleaseBuffer(buffer); - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); - if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) + if (hufd.traversed) { - /* it was updated, so look at the updated version */ + TupleTableSlot *testslot; TupleTableSlot *epqslot; + EvalPlanQualBegin(epqstate, estate); + + testslot = EvalPlanQualSlot(epqstate, relation, relinfo->ri_RangeTableIndex); + ExecCopySlot(testslot, oldslot); + epqslot = EvalPlanQual(estate, epqstate, relation, relinfo->ri_RangeTableIndex, - lockmode, - &hufd.ctid, - hufd.xmax); - if (!TupIsNull(epqslot)) - { - *tid = hufd.ctid; - *newSlot = epqslot; - - /* - * EvalPlanQual already locked the tuple, but we - * re-call heap_lock_tuple anyway as an easy way of - * re-fetching the correct tuple. Speed is hardly a - * criterion in this path anyhow. - */ - goto ltrmark; - } + testslot); + + /* If PlanQual failed for updated tuple - we must not process this tuple!*/ + if (TupIsNull(epqslot)) + return false; + + if (newslot) + ExecCopySlot(newslot, epqslot); + else + ExecCopySlot(oldslot, epqslot); + + *is_epqtuple = true; } + break; - /* - * if tuple was deleted or PlanQual failed for updated tuple - - * we must not process this tuple! - */ - return NULL; + case HeapTupleUpdated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + elog(ERROR, "wrong heap_lock_tuple status: %u", test); + break; + + case HeapTupleDeleted: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + /* tuple was deleted */ + return false; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); break; default: - ReleaseBuffer(buffer); elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); - return NULL; /* keep compiler quiet */ + return false; /* keep compiler quiet */ } } else { - Page page; - ItemId lp; - - buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); - - /* - * Although we already know this tuple is valid, we must lock the - * buffer to ensure that no one has a buffer cleanup lock; otherwise - * they might move the tuple while we try to copy it. But we can - * release the lock before actually doing the heap_copytuple call, - * since holding pin is sufficient to prevent anyone from getting a - * cleanup lock they don't already hold. - */ - LockBuffer(buffer, BUFFER_LOCK_SHARE); - - page = BufferGetPage(buffer); - lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid)); - - Assert(ItemIdIsNormal(lp)); - - tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); - tuple.t_len = ItemIdGetLength(lp); - tuple.t_self = *tid; - tuple.t_tableOid = RelationGetRelid(relation); - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + if (!table_fetch_row_version(relation, tid, SnapshotAny, oldslot, NULL)) + elog(ERROR, "couldn't fetch tuple"); } - if (HeapTupleHeaderGetNatts(tuple.t_data) < relation->rd_att->natts) - result = heap_expand_tuple(&tuple, relation->rd_att); - else - result = heap_copytuple(&tuple); - ReleaseBuffer(buffer); - - return result; + return true; } /* @@ -3430,7 +3388,7 @@ static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo, Trigger *trigger, TriggerEvent event, Bitmapset *modifiedCols, - HeapTuple oldtup, HeapTuple newtup) + TupleTableSlot *oldslot, TupleTableSlot *newslot) { /* Check replication-role-dependent enable state */ if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA) @@ -3472,11 +3430,8 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, /* Check for WHEN clause */ if (trigger->tgqual) { - TupleDesc tupdesc = RelationGetDescr(relinfo->ri_RelationDesc); ExprState **predicate; ExprContext *econtext; - TupleTableSlot *oldslot = NULL; - TupleTableSlot *newslot = NULL; MemoryContext oldContext; int i; @@ -3515,40 +3470,6 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, */ econtext = GetPerTupleExprContext(estate); - /* - * Put OLD and NEW tuples into tupleslots for expression evaluation. - * These slots can be shared across the whole estate, but be careful - * that they have the current resultrel's tupdesc. - */ - if (HeapTupleIsValid(oldtup)) - { - if (estate->es_trig_oldtup_slot == NULL) - { - oldContext = MemoryContextSwitchTo(estate->es_query_cxt); - estate->es_trig_oldtup_slot = - ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple); - MemoryContextSwitchTo(oldContext); - } - oldslot = estate->es_trig_oldtup_slot; - if (oldslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(oldslot, tupdesc); - ExecStoreHeapTuple(oldtup, oldslot, false); - } - if (HeapTupleIsValid(newtup)) - { - if (estate->es_trig_newtup_slot == NULL) - { - oldContext = MemoryContextSwitchTo(estate->es_query_cxt); - estate->es_trig_newtup_slot = - ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple); - MemoryContextSwitchTo(oldContext); - } - newslot = estate->es_trig_newtup_slot; - if (newslot->tts_tupleDescriptor != tupdesc) - ExecSetSlotDescriptor(newslot, tupdesc); - ExecStoreHeapTuple(newtup, newslot, false); - } - /* * Finally evaluate the expression, making the old and/or new tuples * available as INNER_VAR/OUTER_VAR respectively. @@ -3882,7 +3803,8 @@ struct AfterTriggersTableData static AfterTriggersData afterTriggers; -static void AfterTriggerExecute(AfterTriggerEvent event, +static void AfterTriggerExecute(EState *estate, + AfterTriggerEvent event, Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, @@ -4217,7 +4139,8 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs) * ---------- */ static void -AfterTriggerExecute(AfterTriggerEvent event, +AfterTriggerExecute(EState *estate, + AfterTriggerEvent event, Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, MemoryContext per_tuple_context, @@ -4227,17 +4150,16 @@ AfterTriggerExecute(AfterTriggerEvent event, AfterTriggerShared evtshared = GetTriggerSharedData(event); Oid tgoid = evtshared->ats_tgoid; TriggerData LocTriggerData; - HeapTupleData tuple1; - HeapTupleData tuple2; HeapTuple rettuple; - Buffer buffer1 = InvalidBuffer; - Buffer buffer2 = InvalidBuffer; int tgindx; /* * Locate trigger in trigdesc. */ LocTriggerData.tg_trigger = NULL; + LocTriggerData.tg_trigslot = NULL; + LocTriggerData.tg_newslot = NULL; + for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++) { if (trigdesc->triggers[tgindx].tgoid == tgoid) @@ -4287,31 +4209,31 @@ AfterTriggerExecute(AfterTriggerEvent event, * that is stored as a heap tuple, constructed in different memory * context, in the slot anyway. */ - LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(trig_tuple_slot1, - true, NULL); - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; + LocTriggerData.tg_trigslot = trig_tuple_slot1; // FIXME + LocTriggerData.tg_trigtuple = + ExecFetchSlotHeapTuple(trig_tuple_slot1, true, NULL); + LocTriggerData.tg_newslot = trig_tuple_slot2; // FIXME LocTriggerData.tg_newtuple = ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == TRIGGER_EVENT_UPDATE) ? ExecFetchSlotHeapTuple(trig_tuple_slot2, true, NULL) : NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; break; default: + if (ItemPointerIsValid(&(event->ate_ctid1))) { - ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self)); - if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL)) + LocTriggerData.tg_trigslot = ExecTriggerGetOldSlot(estate, rel); + if (!table_fetch_row_version(rel, &(event->ate_ctid1), SnapshotAny, LocTriggerData.tg_trigslot, NULL)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); - LocTriggerData.tg_trigtuple = &tuple1; - LocTriggerData.tg_trigtuplebuf = buffer1; + LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, NULL); + } else { LocTriggerData.tg_trigtuple = NULL; - LocTriggerData.tg_trigtuplebuf = InvalidBuffer; } /* don't touch ctid2 if not there */ @@ -4319,16 +4241,14 @@ AfterTriggerExecute(AfterTriggerEvent event, AFTER_TRIGGER_2CTID && ItemPointerIsValid(&(event->ate_ctid2))) { - ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self)); - if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL)) + LocTriggerData.tg_newslot = ExecTriggerGetNewSlot(estate, rel); + if (!table_fetch_row_version(rel, &(event->ate_ctid2), SnapshotAny, LocTriggerData.tg_newslot, NULL)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); - LocTriggerData.tg_newtuple = &tuple2; - LocTriggerData.tg_newtuplebuf = buffer2; + LocTriggerData.tg_newtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, NULL); } else { LocTriggerData.tg_newtuple = NULL; - LocTriggerData.tg_newtuplebuf = InvalidBuffer; } } @@ -4380,12 +4300,12 @@ AfterTriggerExecute(AfterTriggerEvent event, heap_freetuple(rettuple); /* - * Release buffers + * Release resources */ - if (buffer1 != InvalidBuffer) - ReleaseBuffer(buffer1); - if (buffer2 != InvalidBuffer) - ReleaseBuffer(buffer2); + if (LocTriggerData.tg_trigslot) + ExecClearTuple(LocTriggerData.tg_trigslot); + if (LocTriggerData.tg_newslot) + ExecClearTuple(LocTriggerData.tg_newslot); /* * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count @@ -4552,6 +4472,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, slot2 = MakeSingleTupleTableSlot(rel->rd_att, &TTSOpsMinimalTuple); } + if (trigdesc == NULL) /* should not happen */ elog(ERROR, "relation %u has no triggers", evtshared->ats_relid); @@ -4562,7 +4483,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, * still set, so recursive examinations of the event list * won't try to re-fire it. */ - AfterTriggerExecute(event, rel, trigdesc, finfo, instr, + AfterTriggerExecute(estate, event, rel, trigdesc, finfo, instr, per_tuple_context, slot1, slot2); /* @@ -4606,6 +4527,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, if (local_estate) { ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); FreeExecutorState(estate); } @@ -5743,7 +5665,7 @@ AfterTriggerPendingOnRel(Oid relid) static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, int event, bool row_trigger, - HeapTuple oldtup, HeapTuple newtup, + TupleTableSlot *oldslot, TupleTableSlot *newslot, List *recheckIndexes, Bitmapset *modifiedCols, TransitionCaptureState *transition_capture) { @@ -5792,11 +5714,11 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * deleted. */ Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table && - oldtup == NULL)); + TupIsNull(oldslot))); Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table && - newtup == NULL)); + TupIsNull(newslot))); - if (oldtup != NULL && + if (!TupIsNull(oldslot) && ((event == TRIGGER_EVENT_DELETE && delete_old_table) || (event == TRIGGER_EVENT_UPDATE && update_old_table))) { @@ -5806,15 +5728,17 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, if (map != NULL) { - HeapTuple converted = execute_attr_map_tuple(oldtup, map); - + HeapTuple converted; + // PBORKED + converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(oldslot, true, NULL), + map); tuplestore_puttuple(old_tuplestore, converted); pfree(converted); } else - tuplestore_puttuple(old_tuplestore, oldtup); + tuplestore_puttupleslot(old_tuplestore, oldslot); } - if (newtup != NULL && + if (!TupIsNull(newslot) && ((event == TRIGGER_EVENT_INSERT && insert_new_table) || (event == TRIGGER_EVENT_UPDATE && update_new_table))) { @@ -5826,13 +5750,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tuplestore_puttuple(new_tuplestore, original_insert_tuple); else if (map != NULL) { - HeapTuple converted = execute_attr_map_tuple(newtup, map); + HeapTuple converted; + converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(newslot, true, NULL), + map); tuplestore_puttuple(new_tuplestore, converted); pfree(converted); } else - tuplestore_puttuple(new_tuplestore, newtup); + tuplestore_puttupleslot(new_tuplestore, newslot); } /* @@ -5846,7 +5772,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, (event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) || (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) || (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) || - (event == TRIGGER_EVENT_UPDATE && ((oldtup == NULL) ^ (newtup == NULL)))) + (event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot)))) return; } @@ -5868,15 +5794,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_INSERT; if (row_trigger) { - Assert(oldtup == NULL); - Assert(newtup != NULL); - ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1)); + Assert(oldslot == NULL); + Assert(newslot != NULL); + ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); } else { - Assert(oldtup == NULL); - Assert(newtup == NULL); + Assert(oldslot == NULL); + Assert(newslot == NULL); ItemPointerSetInvalid(&(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); cancel_prior_stmt_triggers(RelationGetRelid(rel), @@ -5887,15 +5813,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_DELETE; if (row_trigger) { - Assert(oldtup != NULL); - Assert(newtup == NULL); - ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1)); + Assert(oldslot != NULL); + Assert(newslot == NULL); + ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); } else { - Assert(oldtup == NULL); - Assert(newtup == NULL); + Assert(oldslot == NULL); + Assert(newslot == NULL); ItemPointerSetInvalid(&(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); cancel_prior_stmt_triggers(RelationGetRelid(rel), @@ -5906,15 +5832,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_UPDATE; if (row_trigger) { - Assert(oldtup != NULL); - Assert(newtup != NULL); - ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1)); - ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2)); + Assert(oldslot != NULL); + Assert(newslot != NULL); + ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); + ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); } else { - Assert(oldtup == NULL); - Assert(newtup == NULL); + Assert(oldslot == NULL); + Assert(newslot == NULL); ItemPointerSetInvalid(&(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); cancel_prior_stmt_triggers(RelationGetRelid(rel), @@ -5923,8 +5849,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; case TRIGGER_EVENT_TRUNCATE: tgtype_event = TRIGGER_TYPE_TRUNCATE; - Assert(oldtup == NULL); - Assert(newtup == NULL); + Assert(oldslot == NULL); + Assert(newslot == NULL); ItemPointerSetInvalid(&(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); break; @@ -5951,7 +5877,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event)) continue; if (!TriggerEnabled(estate, relinfo, trigger, event, - modifiedCols, oldtup, newtup)) + modifiedCols, oldslot, newslot)) continue; if (relkind == RELKIND_FOREIGN_TABLE && row_trigger) @@ -5978,7 +5904,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, case RI_TRIGGER_PK: /* Update or delete on trigger's PK table */ if (!RI_FKey_pk_upd_check_required(trigger, rel, - oldtup, newtup)) + oldslot, newslot)) { /* skip queuing this event */ continue; @@ -5988,7 +5914,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, case RI_TRIGGER_FK: /* Update on trigger's FK table */ if (!RI_FKey_fk_upd_check_required(trigger, rel, - oldtup, newtup)) + oldslot, newslot)) { /* skip queuing this event */ continue; @@ -6042,10 +5968,10 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, */ if (fdw_tuplestore) { - if (oldtup != NULL) - tuplestore_puttuple(fdw_tuplestore, oldtup); - if (newtup != NULL) - tuplestore_puttuple(fdw_tuplestore, newtup); + if (oldslot != NULL) + tuplestore_puttupleslot(fdw_tuplestore, oldslot); + if (newslot != NULL) + tuplestore_puttupleslot(fdw_tuplestore, newslot); } } diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 1ffc8231d4..24e8f0c279 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -32,6 +32,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/binary_upgrade.h" #include "catalog/catalog.h" @@ -2358,14 +2359,16 @@ AlterDomainNotNull(List *names, bool notNull) RelToCheck *rtc = (RelToCheck *) lfirst(rt); Relation testrel = rtc->rel; TupleDesc tupdesc = RelationGetDescr(testrel); - HeapScanDesc scan; - HeapTuple tuple; + TableScanDesc scan; + TupleTableSlot *slot; Snapshot snapshot; /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan(testrel, snapshot, 0, NULL); + slot = table_gimmegimmeslot(testrel, NULL); + + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { int i; @@ -2375,7 +2378,7 @@ AlterDomainNotNull(List *names, bool notNull) int attnum = rtc->atts[i]; Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); - if (heap_attisnull(tuple, attnum, tupdesc)) + if (slot_attisnull(slot, attnum)) { /* * In principle the auxiliary information for this @@ -2394,7 +2397,9 @@ AlterDomainNotNull(List *names, bool notNull) } } } - heap_endscan(scan); + + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); UnregisterSnapshot(snapshot); /* Close each rel after processing, but keep lock */ @@ -2757,14 +2762,16 @@ validateDomainConstraint(Oid domainoid, char *ccbin) RelToCheck *rtc = (RelToCheck *) lfirst(rt); Relation testrel = rtc->rel; TupleDesc tupdesc = RelationGetDescr(testrel); - HeapScanDesc scan; - HeapTuple tuple; + TableScanDesc scan; + TupleTableSlot *slot; Snapshot snapshot; /* Scan all tuples in this relation */ snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(testrel, snapshot, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan(testrel, snapshot, 0, NULL); + slot = table_gimmegimmeslot(testrel, NULL); + + while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { int i; @@ -2777,7 +2784,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin) Datum conResult; Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1); - d = heap_getattr(tuple, attnum, tupdesc, &isNull); + d = slot_getattr(slot, attnum, &isNull); econtext->domainValue_datum = d; econtext->domainValue_isNull = isNull; @@ -2807,7 +2814,9 @@ validateDomainConstraint(Oid domainoid, char *ccbin) ResetExprContext(econtext); } - heap_endscan(scan); + + ExecDropSingleTupleTableSlot(slot); + table_endscan(scan); UnregisterSnapshot(snapshot); /* Hold relation lock till commit (XXX bad for concurrency) */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 15eec19418..fcae282044 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -28,6 +28,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" @@ -746,14 +747,14 @@ get_all_vacuum_rels(int options) { List *vacrels = NIL; Relation pgclass; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; pgclass = heap_open(RelationRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pgclass, 0, NULL); + scan = table_beginscan_catalog(pgclass, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); MemoryContext oldcontext; @@ -785,7 +786,7 @@ get_all_vacuum_rels(int options) MemoryContextSwitchTo(oldcontext); } - heap_endscan(scan); + table_endscan(scan); heap_close(pgclass, AccessShareLock); return vacrels; @@ -1382,7 +1383,7 @@ vac_truncate_clog(TransactionId frozenXID, { TransactionId nextXID = ReadNewTransactionId(); Relation relation; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tuple; Oid oldestxid_datoid; Oid minmulti_datoid; @@ -1413,9 +1414,9 @@ vac_truncate_clog(TransactionId frozenXID, */ relation = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(relation, 0, NULL); + scan = table_beginscan_catalog(relation, 0, NULL); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple); TransactionId datfrozenxid = dbform->datfrozenxid; @@ -1452,7 +1453,7 @@ vac_truncate_clog(TransactionId frozenXID, } } - heap_endscan(scan); + table_endscan(scan); heap_close(relation, AccessShareLock); @@ -1711,7 +1712,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params) cluster_rel(relid, InvalidOid, cluster_options); } else - heap_vacuum_rel(onerel, options, params, vac_strategy); + table_vacuum_rel(onerel, options, params, vac_strategy); /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 9e78421978..33951c9f6d 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -544,7 +544,7 @@ static bool IndexSupportsBackwardScan(Oid indexid) { bool result; - HeapTuple ht_idxrel; + HeapTuple ht_idxrel; Form_pg_class idxrelrec; IndexAmRoutine *amroutine; diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 39c462a4e5..35048400cf 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -203,7 +203,7 @@ execCurrentOf(CurrentOfExpr *cexpr, */ IndexScanDesc scan = ((IndexOnlyScanState *) scanstate)->ioss_ScanDesc; - *current_tid = scan->xs_ctup.t_self; + *current_tid = scan->xs_heaptid; } else { diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index ec4a2506f1..6cac1cf99c 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -57,6 +57,7 @@ #include "postgres.h" #include "access/tuptoaster.h" +#include "access/sysattr.h" #include "catalog/pg_type.h" #include "commands/sequence.h" #include "executor/execExpr.h" diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 8b35bb458d..66d838dbce 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -107,6 +107,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/index.h" #include "executor/executor.h" @@ -269,12 +270,12 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) */ List * ExecInsertIndexTuples(TupleTableSlot *slot, - ItemPointer tupleid, EState *estate, bool noDupErr, bool *specConflict, List *arbiterIndexes) { + ItemPointer tupleid = &slot->tts_tid; List *result = NIL; ResultRelInfo *resultRelInfo; int i; @@ -286,6 +287,8 @@ ExecInsertIndexTuples(TupleTableSlot *slot, Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; + Assert(ItemPointerIsValid(tupleid)); + /* * Get information from the result relation info structure. */ @@ -650,7 +653,6 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, Oid *index_collations = index->rd_indcollation; int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index); IndexScanDesc index_scan; - HeapTuple tup; ScanKeyData scankeys[INDEX_MAX_KEYS]; SnapshotData DirtySnapshot; int i; @@ -706,8 +708,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, * to this slot. Be sure to save and restore caller's value for * scantuple. */ - existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap), - &TTSOpsHeapTuple); + existing_slot = table_gimmegimmeslot(heap, NULL); econtext = GetPerTupleExprContext(estate); save_scantuple = econtext->ecxt_scantuple; @@ -723,11 +724,9 @@ retry: index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0); index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0); - while ((tup = index_getnext(index_scan, - ForwardScanDirection)) != NULL) + while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot)) { TransactionId xwait; - ItemPointerData ctid_wait; XLTW_Oper reason_wait; Datum existing_values[INDEX_MAX_KEYS]; bool existing_isnull[INDEX_MAX_KEYS]; @@ -738,7 +737,7 @@ retry: * Ignore the entry for the tuple we're trying to check. */ if (ItemPointerIsValid(tupleid) && - ItemPointerEquals(tupleid, &tup->t_self)) + ItemPointerEquals(tupleid, &existing_slot->tts_tid)) { if (found_self) /* should not happen */ elog(ERROR, "found self tuple multiple times in index \"%s\"", @@ -751,7 +750,6 @@ retry: * Extract the index column values and isnull flags from the existing * tuple. */ - ExecStoreHeapTuple(tup, existing_slot, false); FormIndexDatum(indexInfo, existing_slot, estate, existing_values, existing_isnull); @@ -786,7 +784,10 @@ retry: DirtySnapshot.speculativeToken && TransactionIdPrecedes(GetCurrentTransactionId(), xwait)))) { - ctid_wait = tup->t_data->t_ctid; + /* + * PBORKED? When waiting, we used to use t_ctid, rather than + * t_self, but I don't see a need for that? + */ reason_wait = indexInfo->ii_ExclusionOps ? XLTW_RecheckExclusionConstr : XLTW_InsertIndex; index_endscan(index_scan); @@ -794,7 +795,9 @@ retry: SpeculativeInsertionWait(DirtySnapshot.xmin, DirtySnapshot.speculativeToken); else - XactLockTableWait(xwait, heap, &ctid_wait, reason_wait); + XactLockTableWait(xwait, heap, + &existing_slot->tts_tid, reason_wait); + goto retry; } @@ -806,7 +809,9 @@ retry: { conflict = true; if (conflictTid) - *conflictTid = tup->t_self; + { + *conflictTid = existing_slot->tts_tid; + } break; } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index d83d296d82..9fe420bfa8 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -38,6 +38,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/xact.h" @@ -976,12 +977,12 @@ InitPlan(QueryDesc *queryDesc, int eflags) * Initialize the executor's tuple table to empty. */ estate->es_tupleTable = NIL; - estate->es_trig_tuple_slot = NULL; + estate->es_trig_return_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; /* mark EvalPlanQual not active */ - estate->es_epqTuple = NULL; + estate->es_epqTupleSlot = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; @@ -2403,6 +2404,30 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) return aerm; } +TupleTableSlot * +EvalPlanQualSlot(EPQState *epqstate, + Relation relation, Index rti) +{ + TupleTableSlot **slot = &epqstate->estate->es_epqTupleSlot[rti - 1]; + + if (*slot == NULL) + { + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt); + + if (relation) + *slot = table_gimmegimmeslot(relation, &epqstate->estate->es_tupleTable); + else + *slot = MakeTupleTableSlot(epqstate->origslot->tts_tupleDescriptor, &TTSOpsVirtual); + + epqstate->estate->es_epqTupleSet[rti - 1] = true; + MemoryContextSwitchTo(oldcontext); + } + + return *slot; +} + /* * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to @@ -2420,9 +2445,7 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) * epqstate - state for EvalPlanQual rechecking * relation - table containing tuple * rti - rangetable index of table containing tuple - * lockmode - requested tuple lock mode - * *tid - t_ctid from the outdated tuple (ie, next updated version) - * priorXmax - t_xmax from the outdated tuple + * tuple - tuple for processing * * *tid is also an output parameter: it's modified to hold the TID of the * latest version of the tuple (note this may be changed even on failure) @@ -2435,39 +2458,25 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) */ TupleTableSlot * EvalPlanQual(EState *estate, EPQState *epqstate, - Relation relation, Index rti, int lockmode, - ItemPointer tid, TransactionId priorXmax) + Relation relation, Index rti, TupleTableSlot *testslot) { TupleTableSlot *slot; - HeapTuple copyTuple; Assert(rti > 0); - /* - * Get and lock the updated version of the row; if fail, return NULL. - */ - copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock, - tid, priorXmax); - - if (copyTuple == NULL) - return NULL; - - /* - * For UPDATE/DELETE we have to return tid of actual row we're executing - * PQ for. - */ - *tid = copyTuple->t_self; - /* * Need to run a recheck subquery. Initialize or reinitialize EPQ state. */ EvalPlanQualBegin(epqstate, estate); +#if FIXME /* * Free old test tuple, if any, and store new tuple where relation's scan * node will see it */ - EvalPlanQualSetTuple(epqstate, rti, copyTuple); + EvalPlanQualSetTuple(epqstate, rti, testslot); +#endif + Assert(testslot == epqstate->estate->es_epqTupleSlot[rti - 1]); /* * Fetch any non-locked source rows @@ -2489,272 +2498,20 @@ EvalPlanQual(EState *estate, EPQState *epqstate, if (!TupIsNull(slot)) ExecMaterializeSlot(slot); +#if FIXME /* * Clear out the test tuple. This is needed in case the EPQ query is * re-used to test a tuple for a different relation. (Not clear that can * really happen, but let's be safe.) */ EvalPlanQualSetTuple(epqstate, rti, NULL); +#else + ExecClearTuple(epqstate->estate->es_epqTupleSlot[rti - 1]); +#endif return slot; } -/* - * Fetch a copy of the newest version of an outdated tuple - * - * estate - executor state data - * relation - table containing tuple - * lockmode - requested tuple lock mode - * wait_policy - requested lock wait policy - * *tid - t_ctid from the outdated tuple (ie, next updated version) - * priorXmax - t_xmax from the outdated tuple - * - * Returns a palloc'd copy of the newest tuple version, or NULL if we find - * that there is no newest version (ie, the row was deleted not updated). - * We also return NULL if the tuple is locked and the wait policy is to skip - * such tuples. - * - * If successful, we have locked the newest tuple version, so caller does not - * need to worry about it changing anymore. - * - * Note: properly, lockmode should be declared as enum LockTupleMode, - * but we use "int" to avoid having to include heapam.h in executor.h. - */ -HeapTuple -EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, - LockWaitPolicy wait_policy, - ItemPointer tid, TransactionId priorXmax) -{ - HeapTuple copyTuple = NULL; - HeapTupleData tuple; - SnapshotData SnapshotDirty; - - /* - * fetch target tuple - * - * Loop here to deal with updated or busy tuples - */ - InitDirtySnapshot(SnapshotDirty); - tuple.t_self = *tid; - for (;;) - { - Buffer buffer; - - if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL)) - { - HTSU_Result test; - HeapUpdateFailureData hufd; - - /* - * If xmin isn't what we're expecting, the slot must have been - * recycled and reused for an unrelated tuple. This implies that - * the latest version of the row was deleted, so we need do - * nothing. (Should be safe to examine xmin without getting - * buffer's content lock. We assume reading a TransactionId to be - * atomic, and Xmin never changes in an existing tuple, except to - * invalid or frozen, and neither of those can match priorXmax.) - */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), - priorXmax)) - { - ReleaseBuffer(buffer); - return NULL; - } - - /* otherwise xmin should not be dirty... */ - if (TransactionIdIsValid(SnapshotDirty.xmin)) - elog(ERROR, "t_xmin is uncommitted in tuple to be updated"); - - /* - * If tuple is being updated by other transaction then we have to - * wait for its commit/abort, or die trying. - */ - if (TransactionIdIsValid(SnapshotDirty.xmax)) - { - ReleaseBuffer(buffer); - switch (wait_policy) - { - case LockWaitBlock: - XactLockTableWait(SnapshotDirty.xmax, - relation, &tuple.t_self, - XLTW_FetchUpdated); - break; - case LockWaitSkip: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) - return NULL; /* skip instead of waiting */ - break; - case LockWaitError: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) - ereport(ERROR, - (errcode(ERRCODE_LOCK_NOT_AVAILABLE), - errmsg("could not obtain lock on row in relation \"%s\"", - RelationGetRelationName(relation)))); - break; - } - continue; /* loop back to repeat heap_fetch */ - } - - /* - * If tuple was inserted by our own transaction, we have to check - * cmin against es_output_cid: cmin >= current CID means our - * command cannot see the tuple, so we should ignore it. Otherwise - * heap_lock_tuple() will throw an error, and so would any later - * attempt to update or delete the tuple. (We need not check cmax - * because HeapTupleSatisfiesDirty will consider a tuple deleted - * by our transaction dead, regardless of cmax.) We just checked - * that priorXmax == xmin, so we can test that variable instead of - * doing HeapTupleHeaderGetXmin again. - */ - if (TransactionIdIsCurrentTransactionId(priorXmax) && - HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid) - { - ReleaseBuffer(buffer); - return NULL; - } - - /* - * This is a live tuple, so now try to lock it. - */ - test = heap_lock_tuple(relation, &tuple, - estate->es_output_cid, - lockmode, wait_policy, - false, &buffer, &hufd); - /* We now have two pins on the buffer, get rid of one */ - ReleaseBuffer(buffer); - - switch (test) - { - case HeapTupleSelfUpdated: - - /* - * The target tuple was already updated or deleted by the - * current command, or by a later command in the current - * transaction. We *must* ignore the tuple in the former - * case, so as to avoid the "Halloween problem" of - * repeated update attempts. In the latter case it might - * be sensible to fetch the updated tuple instead, but - * doing so would require changing heap_update and - * heap_delete to not complain about updating "invisible" - * tuples, which seems pretty scary (heap_lock_tuple will - * not complain, but few callers expect - * HeapTupleInvisible, and we're not one of them). So for - * now, treat the tuple as deleted and do not process. - */ - ReleaseBuffer(buffer); - return NULL; - - case HeapTupleMayBeUpdated: - /* successfully locked */ - break; - - case HeapTupleUpdated: - ReleaseBuffer(buffer); - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); - - /* Should not encounter speculative tuple on recheck */ - Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data)); - if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) - { - /* it was updated, so look at the updated version */ - tuple.t_self = hufd.ctid; - /* updated row should have xmin matching this xmax */ - priorXmax = hufd.xmax; - continue; - } - /* tuple was deleted, so give up */ - return NULL; - - case HeapTupleWouldBlock: - ReleaseBuffer(buffer); - return NULL; - - case HeapTupleInvisible: - elog(ERROR, "attempted to lock invisible tuple"); - break; - - default: - ReleaseBuffer(buffer); - elog(ERROR, "unrecognized heap_lock_tuple status: %u", - test); - return NULL; /* keep compiler quiet */ - } - - /* - * We got tuple - now copy it for use by recheck query. - */ - copyTuple = heap_copytuple(&tuple); - ReleaseBuffer(buffer); - break; - } - - /* - * If the referenced slot was actually empty, the latest version of - * the row must have been deleted, so we need do nothing. - */ - if (tuple.t_data == NULL) - { - ReleaseBuffer(buffer); - return NULL; - } - - /* - * As above, if xmin isn't what we're expecting, do nothing. - */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), - priorXmax)) - { - ReleaseBuffer(buffer); - return NULL; - } - - /* - * If we get here, the tuple was found but failed SnapshotDirty. - * Assuming the xmin is either a committed xact or our own xact (as it - * certainly should be if we're trying to modify the tuple), this must - * mean that the row was updated or deleted by either a committed xact - * or our own xact. If it was deleted, we can ignore it; if it was - * updated then chain up to the next version and repeat the whole - * process. - * - * As above, it should be safe to examine xmax and t_ctid without the - * buffer content lock, because they can't be changing. - */ - - /* check whether next version would be in a different partition */ - if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); - - /* check whether tuple has been deleted */ - if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid)) - { - /* deleted, so forget about it */ - ReleaseBuffer(buffer); - return NULL; - } - - /* updated, so look at the updated row */ - tuple.t_self = tuple.t_data->t_ctid; - /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data); - ReleaseBuffer(buffer); - /* loop back to fetch next in chain */ - } - - /* - * Return the copied tuple - */ - return copyTuple; -} - /* * EvalPlanQualInit -- initialize during creation of a plan state node * that might need to invoke EPQ processing. @@ -2792,40 +2549,34 @@ EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks) epqstate->arowMarks = auxrowmarks; } +#if 0 /* * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL * * NB: passed tuple must be palloc'd; it may get freed later */ void -EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple) +EvalPlanQualSetTuple(EPQState *epqstate, Index rti, TupleTableSlot *slot) { EState *estate = epqstate->estate; Assert(rti > 0); - /* - * free old test tuple, if any, and store new tuple where relation's scan - * node will see it - */ - if (estate->es_epqTuple[rti - 1] != NULL) - heap_freetuple(estate->es_epqTuple[rti - 1]); - estate->es_epqTuple[rti - 1] = tuple; + if (estate->es_epqTupleSlot[rti - 1] != NULL) + ExecClearTuple(estate->es_epqTupleSlot[rti - 1]); + if (slot) + { + if (!estate->es_epqTupleSlot[rti]) + { + slot = table_gimmegimmeslot(erm->relation); + epqstate->estate->es_epqTupleSlot[erm->rti] = slot; + } + // XXX: It'd be better if we could work around needing to copy. + ExecCopySlot(estate->es_epqTupleSlot[rti - 1], slot); + } estate->es_epqTupleSet[rti - 1] = true; } - -/* - * Fetch back the current test tuple (if any) for the specified RTI - */ -HeapTuple -EvalPlanQualGetTuple(EPQState *epqstate, Index rti) -{ - EState *estate = epqstate->estate; - - Assert(rti > 0); - - return estate->es_epqTuple[rti - 1]; -} +#endif /* * Fetch the current row values for any non-locked relations that need @@ -2845,13 +2596,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) ExecRowMark *erm = aerm->rowmark; Datum datum; bool isNull; - HeapTupleData tuple; + TupleTableSlot *slot; if (RowMarkRequiresRowShareLock(erm->markType)) elog(ERROR, "EvalPlanQual doesn't support locking rowmarks"); /* clear any leftover test tuple for this rel */ - EvalPlanQualSetTuple(epqstate, erm->rti, NULL); + slot = EvalPlanQualSlot(epqstate, erm->relation, erm->rti); + ExecClearTuple(slot); /* if child rel, must check whether it produced this row */ if (erm->rti != erm->prti) @@ -2876,8 +2628,6 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) if (erm->markType == ROW_MARK_REFERENCE) { - HeapTuple copyTuple; - Assert(erm->relation != NULL); /* fetch the tuple's ctid */ @@ -2895,17 +2645,20 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) bool updated = false; fdwroutine = GetFdwRoutineForRelation(erm->relation, false); + /* this should have been checked already, but let's be safe */ if (fdwroutine->RefetchForeignRow == NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot lock rows in foreign table \"%s\"", RelationGetRelationName(erm->relation)))); - copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate, - erm, - datum, - &updated); - if (copyTuple == NULL) + + slot = fdwroutine->RefetchForeignRow(epqstate->estate, + erm, + datum, + slot, + &updated); + if (slot == NULL) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); /* @@ -2917,25 +2670,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) else { /* ordinary table, fetch the tuple */ - Buffer buffer; - tuple.t_self = *((ItemPointer) DatumGetPointer(datum)); - if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer, - false, NULL)) + if (!table_fetch_row_version(erm->relation, (ItemPointer) DatumGetPointer(datum), + SnapshotAny, slot, NULL)) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); - - /* successful, copy tuple */ - copyTuple = heap_copytuple(&tuple); - ReleaseBuffer(buffer); } - - /* store tuple */ - EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple); } else { - HeapTupleHeader td; - Assert(erm->markType == ROW_MARK_COPY); /* fetch the whole-row Var for the relation */ @@ -2945,19 +2687,8 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) /* non-locked rels could be on the inside of outer joins */ if (isNull) continue; - td = DatumGetHeapTupleHeader(datum); - - /* build a temporary HeapTuple control structure */ - tuple.t_len = HeapTupleHeaderGetDatumLength(td); - tuple.t_data = td; - /* relation might be a foreign table, if so provide tableoid */ - tuple.t_tableOid = erm->relid; - /* also copy t_ctid in case there's valid data there */ - tuple.t_self = td->t_ctid; - - /* copy and store tuple */ - EvalPlanQualSetTuple(epqstate, erm->rti, - heap_copytuple(&tuple)); + + ExecForceStoreHeapTupleDatum(datum, slot); } } } @@ -3153,15 +2884,15 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree) * sub-rechecks to inherit the values being examined by an outer recheck. */ estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool)); - if (parentestate->es_epqTuple != NULL) + if (parentestate->es_epqTupleSlot != NULL) { - estate->es_epqTuple = parentestate->es_epqTuple; + estate->es_epqTupleSlot = parentestate->es_epqTupleSlot; estate->es_epqTupleSet = parentestate->es_epqTupleSet; } else { - estate->es_epqTuple = (HeapTuple *) - palloc0(rtsize * sizeof(HeapTuple)); + estate->es_epqTupleSlot = (TupleTableSlot **) + palloc0(rtsize * sizeof(TupleTableSlot *)); estate->es_epqTupleSet = (bool *) palloc0(rtsize * sizeof(bool)); } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 31f7288b46..f0a6318a25 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/tableam.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" @@ -899,8 +900,7 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, * end of the command. */ partrouteinfo->pi_PartitionTupleSlot = - ExecInitExtraTupleSlot(estate, RelationGetDescr(partrel), - &TTSOpsHeapTuple); + table_gimmegimmeslot(partrel, &estate->es_tupleTable); } else partrouteinfo->pi_PartitionTupleSlot = NULL; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 5bd3bbc35e..553159b08e 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "commands/trigger.h" @@ -117,7 +118,6 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid, TupleTableSlot *searchslot, TupleTableSlot *outslot) { - HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; @@ -143,10 +143,9 @@ retry: index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0); /* Try to find the tuple */ - if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) + if (index_getnext_slot(scan, ForwardScanDirection, outslot)) { found = true; - ExecStoreHeapTuple(scantuple, outslot, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? @@ -166,25 +165,18 @@ retry: /* Found tuple, try to lock it in the lockmode. */ if (found) { - Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; - HeapTupleData locktup; - HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot; - - /* Only a heap tuple has item pointers. */ - Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot)); - ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); - res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), - lockmode, - LockWaitBlock, - false /* don't follow updates */ , - &buf, &hufd); - /* the tuple slot already has the buffer pinned */ - ReleaseBuffer(buf); + res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(), + outslot, + GetCurrentCommandId(false), + lockmode, + LockWaitBlock, + 0 /* don't follow updates */ , + &hufd); PopActiveSnapshot(); @@ -203,6 +195,12 @@ retry: (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; + case HeapTupleDeleted: + /* XXX: Improve handling here */ + ereport(LOG, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("concurrent delete, retrying"))); + goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); break; @@ -220,59 +218,6 @@ retry: return found; } -/* - * Compare the tuple and slot and check if they have equal values. - * - * We use binary datum comparison which might return false negatives but - * that's the best we can do here as there may be multiple notions of - * equality for the data types and table columns don't specify which one - * to use. - */ -static bool -tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot) -{ - Datum values[MaxTupleAttributeNumber]; - bool isnull[MaxTupleAttributeNumber]; - int attrnum; - - heap_deform_tuple(tup, desc, values, isnull); - - /* Check equality of the attributes. */ - for (attrnum = 0; attrnum < desc->natts; attrnum++) - { - Form_pg_attribute att; - TypeCacheEntry *typentry; - - /* - * If one value is NULL and other is not, then they are certainly not - * equal - */ - if (isnull[attrnum] != slot->tts_isnull[attrnum]) - return false; - - /* - * If both are NULL, they can be considered equal. - */ - if (isnull[attrnum]) - continue; - - att = TupleDescAttr(desc, attrnum); - - typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO); - if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an equality operator for type %s", - format_type_be(att->atttypid)))); - - if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, - values[attrnum], - slot->tts_values[attrnum]))) - return false; - } - - return true; -} /* * Search the relation 'rel' for tuple using the sequential scan. @@ -288,33 +233,34 @@ bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { - HeapTuple scantuple; - HeapScanDesc scan; + TupleTableSlot *scanslot; + TableScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; - TupleDesc desc = RelationGetDescr(rel); + TupleDesc desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel); Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor)); /* Start a heap scan. */ InitDirtySnapshot(snap); - scan = heap_beginscan(rel, &snap, 0, NULL); + scan = table_beginscan(rel, &snap, 0, NULL); + + scanslot = table_gimmegimmeslot(rel, NULL); retry: found = false; - heap_rescan(scan, NULL); + table_rescan(scan, NULL); /* Try to find the tuple */ - while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot)) { - if (!tuple_equals_slot(desc, scantuple, searchslot)) + if (!ExecSlotCompare(scanslot, searchslot)) continue; found = true; - ExecStoreHeapTuple(scantuple, outslot, false); - ExecMaterializeSlot(outslot); + ExecCopySlot(outslot, scanslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; @@ -333,25 +279,18 @@ retry: /* Found tuple, try to lock it in the lockmode. */ if (found) { - Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; - HeapTupleData locktup; - HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot; - - /* Only a heap tuple has item pointers. */ - Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot)); - ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); - res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), - lockmode, - LockWaitBlock, - false /* don't follow updates */ , - &buf, &hufd); - /* the tuple slot already has the buffer pinned */ - ReleaseBuffer(buf); + res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(), + outslot, + GetCurrentCommandId(false), + lockmode, + LockWaitBlock, + 0 /* don't follow updates */ , + &hufd); PopActiveSnapshot(); @@ -370,6 +309,12 @@ retry: (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; + case HeapTupleDeleted: + /* XXX: Improve handling here */ + ereport(LOG, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("concurrent delete, retrying"))); + goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); break; @@ -379,7 +324,8 @@ retry: } } - heap_endscan(scan); + table_endscan(scan); + ExecDropSingleTupleTableSlot(scanslot); return found; } @@ -394,7 +340,6 @@ void ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot) { bool skip_tuple = false; - HeapTuple tuple; ResultRelInfo *resultRelInfo = estate->es_result_relation_info; Relation rel = resultRelInfo->ri_RelationDesc; @@ -407,10 +352,8 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot) if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row) { - slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); - - if (slot == NULL) /* "do nothing" */ - skip_tuple = true; + if (!ExecBRInsertTriggers(estate, resultRelInfo, slot)) + skip_tuple = true; /* "do nothing" */ } if (!skip_tuple) @@ -423,19 +366,15 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot) if (resultRelInfo->ri_PartitionCheck) ExecPartitionCheck(resultRelInfo, slot, estate, true); - /* Materialize slot into a tuple that we can scribble upon. */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - - /* OK, store the tuple and create index entries for it */ - simple_heap_insert(rel, tuple); + table_insert(resultRelInfo->ri_RelationDesc, slot, + GetCurrentCommandId(true), 0, NULL); if (resultRelInfo->ri_NumIndices > 0) - recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, + recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL); /* AFTER ROW INSERT Triggers */ - ExecARInsertTriggers(estate, resultRelInfo, tuple, + ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes, NULL); /* @@ -459,15 +398,9 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, TupleTableSlot *searchslot, TupleTableSlot *slot) { bool skip_tuple = false; - HeapTuple tuple; ResultRelInfo *resultRelInfo = estate->es_result_relation_info; Relation rel = resultRelInfo->ri_RelationDesc; - HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot; - HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)slot; - - /* We expect both searchslot and the slot to contain a heap tuple. */ - Assert(TTS_IS_HEAPTUPLE(searchslot) || TTS_IS_BUFFERTUPLE(searchslot)); - Assert(TTS_IS_HEAPTUPLE(slot) || TTS_IS_BUFFERTUPLE(slot)); + ItemPointer tid = &(searchslot->tts_tid); /* For now we support only tables. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); @@ -478,16 +411,18 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_before_row) { - slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - &hsearchslot->tuple->t_self, NULL, slot); - - if (slot == NULL) /* "do nothing" */ - skip_tuple = true; + if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, + tid, + NULL, slot)) + skip_tuple = true; /* "do nothing" */ } if (!skip_tuple) { List *recheckIndexes = NIL; + HeapUpdateFailureData hufd; + LockTupleMode lockmode; + bool update_indexes; /* Check the constraints of the tuple */ if (rel->rd_att->constr) @@ -495,22 +430,22 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, if (resultRelInfo->ri_PartitionCheck) ExecPartitionCheck(resultRelInfo, slot, estate, true); - /* Materialize slot into a tuple that we can scribble upon. */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + table_update(rel, tid, slot, GetCurrentCommandId(true), estate->es_snapshot, + InvalidSnapshot, true, &hufd, &lockmode, &update_indexes); - /* OK, update the tuple and index entries for it */ - simple_heap_update(rel, &hsearchslot->tuple->t_self, hslot->tuple); + /* + * FIXME: move from simple_heap_update to table_update removes + * concurrency handling + */ - if (resultRelInfo->ri_NumIndices > 0 && - !HeapTupleIsHeapOnly(hslot->tuple)) - recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, + if (resultRelInfo->ri_NumIndices > 0 && update_indexes) + recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL); /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, - &hsearchslot->tuple->t_self, NULL, tuple, - recheckIndexes, NULL); + tid, + NULL, slot, recheckIndexes, NULL); list_free(recheckIndexes); } @@ -529,7 +464,7 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate, bool skip_tuple = false; ResultRelInfo *resultRelInfo = estate->es_result_relation_info; Relation rel = resultRelInfo->ri_RelationDesc; - HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot; + ItemPointer tid = &(searchslot->tts_tid); /* For now we support only tables and heap tuples. */ Assert(rel->rd_rel->relkind == RELKIND_RELATION); @@ -542,20 +477,24 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - &hsearchslot->tuple->t_self, NULL, - NULL); + tid, NULL, NULL); + } if (!skip_tuple) { List *recheckIndexes = NIL; + HeapUpdateFailureData hufd; /* OK, delete the tuple */ - simple_heap_delete(rel, &hsearchslot->tuple->t_self); + /* FIXME: needs checks for return codes */ + table_delete(rel, tid, GetCurrentCommandId(true), + estate->es_snapshot, InvalidSnapshot, + true, &hufd, false); /* AFTER ROW DELETE Triggers */ ExecARDeleteTriggers(estate, resultRelInfo, - &hsearchslot->tuple->t_self, NULL, NULL); + tid, NULL, NULL); list_free(recheckIndexes); } diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index d90bb16b57..a3349099b6 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -40,7 +40,7 @@ ExecScanFetch(ScanState *node, CHECK_FOR_INTERRUPTS(); - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) { /* * We are inside an EvalPlanQual recheck. Return the test tuple if @@ -73,17 +73,15 @@ ExecScanFetch(ScanState *node, /* Else mark to remember that we shouldn't return more */ estate->es_epqScanDone[scanrelid - 1] = true; + slot = estate->es_epqTupleSlot[scanrelid - 1]; + /* Return empty slot if we haven't got a test tuple */ - if (estate->es_epqTuple[scanrelid - 1] == NULL) + if (TupIsNull(slot)) return ExecClearTuple(slot); - /* Store test tuple in the plan node's scan slot */ - ExecForceStoreHeapTuple(estate->es_epqTuple[scanrelid - 1], - slot); - /* Check if it meets the access-method conditions */ if (!(*recheckMtd) (node, slot)) - ExecClearTuple(slot); /* would not be returned by scan */ + return ExecClearTuple(slot); /* would not be returned by scan */ return slot; } diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 472a5f39cf..d91a71a7c1 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -453,6 +453,7 @@ tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree) hslot->tuple = tuple; hslot->off = 0; slot->tts_flags &= ~TTS_FLAG_EMPTY; + slot->tts_tid = tuple->t_self; if (shouldFree) slot->tts_flags |= TTS_FLAG_SHOULDFREE; @@ -717,20 +718,31 @@ tts_buffer_heap_materialize(TupleTableSlot *slot) * associated with it, unless it's materialized (which would've returned * above). */ + // PBORKED: restore +#if 0 Assert(bslot->base.tuple); +#endif oldContext = MemoryContextSwitchTo(slot->tts_mcxt); - bslot->base.tuple = heap_copytuple(bslot->base.tuple); +#if 1 + if (!bslot->base.tuple) + { + bslot->base.tuple = heap_form_tuple(slot->tts_tupleDescriptor, + slot->tts_values, + slot->tts_isnull); + } +#endif + else + { + bslot->base.tuple = heap_copytuple(bslot->base.tuple); + } MemoryContextSwitchTo(oldContext); - /* - * A heap tuple stored in a BufferHeapTupleTableSlot should have a buffer - * associated with it, unless it's materialized. - */ - Assert(BufferIsValid(bslot->buffer)); - if (likely(BufferIsValid(bslot->buffer))) + if (BufferIsValid(bslot->buffer)) + { ReleaseBuffer(bslot->buffer); - bslot->buffer = InvalidBuffer; + bslot->buffer = InvalidBuffer; + } /* * Have to deform from scratch, otherwise tts_values[] entries could point @@ -764,6 +776,10 @@ tts_buffer_heap_copyslot(TupleTableSlot *dstslot, TupleTableSlot *srcslot) } else { + // PBORKED: shouldn't be required + if (!bsrcslot->base.tuple) + tts_buffer_heap_materialize(srcslot); + tts_buffer_heap_store_tuple(dstslot, bsrcslot->base.tuple, bsrcslot->buffer); /* * Need to materialize because the HeapTupleData portion of the tuple @@ -858,6 +874,7 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer slot->tts_nvalid = 0; bslot->base.tuple = tuple; bslot->base.off = 0; + slot->tts_tid = tuple->t_self; /* * If tuple is on a disk page, keep the page pinned as long as we hold a @@ -873,7 +890,9 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer if (BufferIsValid(bslot->buffer)) ReleaseBuffer(bslot->buffer); bslot->buffer = buffer; - IncrBufferRefCount(buffer); + // PBORKED: Should always be valid + if (BufferIsValid(buffer)) + IncrBufferRefCount(buffer); } } @@ -1211,6 +1230,56 @@ MakeSingleTupleTableSlot(TupleDesc tupdesc, return slot; } +// FIXME this definitely does not belong here. +/* -------------------------------- + * ExecSlotCompare + * + * This is a slot comparision function to find out + * whether both the slots are same or not? + * -------------------------------- + */ +bool +ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2) +{ + int attrnum; + + Assert(slot1->tts_tupleDescriptor->natts == slot2->tts_tupleDescriptor->natts); + + slot_getallattrs(slot1); + slot_getallattrs(slot2); + + /* Check equality of the attributes. */ + for (attrnum = 0; attrnum < slot1->tts_tupleDescriptor->natts; attrnum++) + { + Form_pg_attribute att; + TypeCacheEntry *typentry; + + /* + * If one value is NULL and other is not, then they are certainly not + * equal + */ + if (slot1->tts_isnull[attrnum] != slot2->tts_isnull[attrnum]) + return false; + + att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum); + + typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO); + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(att->atttypid)))); + + if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, + slot1->tts_values[attrnum], + slot2->tts_values[attrnum]))) + return false; + } + + return true; +} + + /* -------------------------------- * ExecDropSingleTupleTableSlot * @@ -1328,9 +1397,15 @@ ExecStoreHeapTuple(HeapTuple tuple, Assert(slot != NULL); Assert(slot->tts_tupleDescriptor != NULL); - if (unlikely(!TTS_IS_HEAPTUPLE(slot))) + // PBORKED: should onlyneed heaptuples here. + if (TTS_IS_BUFFERTUPLE(slot)) + tts_buffer_heap_store_tuple(slot, tuple, InvalidBuffer); + else if (TTS_IS_HEAPTUPLE(slot)) + tts_heap_store_tuple(slot, tuple, shouldFree); + else elog(ERROR, "trying to store a heap tuple into wrong type of slot"); - tts_heap_store_tuple(slot, tuple, shouldFree); + + slot->tts_tableOid = tuple->t_tableOid; return slot; } @@ -1371,6 +1446,8 @@ ExecStoreBufferHeapTuple(HeapTuple tuple, elog(ERROR, "trying to store an on-disk heap tuple into wrong type of slot"); tts_buffer_heap_store_tuple(slot, tuple, buffer); + slot->tts_tableOid = tuple->t_tableOid; + return slot; } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 2a47abc02e..4031642b80 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -47,6 +47,7 @@ #include "access/parallel.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "executor/executor.h" #include "jit/jit.h" @@ -130,7 +131,7 @@ CreateExecutorState(void) estate->es_tuple_routing_result_relations = NIL; estate->es_trig_target_relations = NIL; - estate->es_trig_tuple_slot = NULL; + estate->es_trig_return_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; @@ -157,7 +158,7 @@ CreateExecutorState(void) estate->es_per_tuple_exprcontext = NULL; - estate->es_epqTuple = NULL; + estate->es_epqTupleSlot = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; estate->es_sourceText = NULL; @@ -419,6 +420,63 @@ MakePerTupleExprContext(EState *estate) return estate->es_per_tuple_exprcontext; } +TupleTableSlot * +ExecTriggerGetOldSlot(EState *estate, Relation rel) +{ + TupleDesc reldesc = RelationGetDescr(rel); + MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* PBORKED: This needs to handle switching slot types between partitions */ + if (estate->es_trig_oldtup_slot == NULL) + estate->es_trig_oldtup_slot = ExecInitExtraTupleSlot(estate, NULL, + table_slot_callbacks(rel)); + + if (estate->es_trig_oldtup_slot->tts_tupleDescriptor != reldesc) + ExecSetSlotDescriptor(estate->es_trig_oldtup_slot, reldesc); + + MemoryContextSwitchTo(oldcontext); + + return estate->es_trig_oldtup_slot; +} + +TupleTableSlot * +ExecTriggerGetNewSlot(EState *estate, Relation rel) +{ + TupleDesc reldesc = RelationGetDescr(rel); + MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* PBORKED: This needs to handle switching slot types between partitions */ + if (estate->es_trig_newtup_slot == NULL) + estate->es_trig_newtup_slot = ExecInitExtraTupleSlot(estate, NULL, + table_slot_callbacks(rel)); + + if (estate->es_trig_newtup_slot->tts_tupleDescriptor != reldesc) + ExecSetSlotDescriptor(estate->es_trig_newtup_slot, reldesc); + + MemoryContextSwitchTo(oldcontext); + + return estate->es_trig_newtup_slot; +} + +TupleTableSlot * +ExecTriggerGetReturnSlot(EState *estate, Relation rel) +{ + TupleDesc reldesc = RelationGetDescr(rel); + MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); + + /* PBORKED: This needs to handle switching slot types between partitions */ + if (estate->es_trig_return_slot == NULL) + estate->es_trig_return_slot = ExecInitExtraTupleSlot(estate, NULL, + table_slot_callbacks(rel)); + + if (estate->es_trig_return_slot->tts_tupleDescriptor != reldesc) + ExecSetSlotDescriptor(estate->es_trig_return_slot, reldesc); + + MemoryContextSwitchTo(oldcontext); + + return estate->es_trig_return_slot; +} + /* ---------------------------------------------------------------- * miscellaneous node-init support functions diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 00d02fd50f..8c0625eb2e 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -38,6 +38,7 @@ #include #include "access/relscan.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/visibilitymap.h" #include "executor/execdebug.h" @@ -54,14 +55,13 @@ static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); -static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres); static inline void BitmapDoneInitializingSharedState( ParallelBitmapHeapState *pstate); static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, TBMIterateResult *tbmres); static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); static inline void BitmapPrefetch(BitmapHeapScanState *node, - HeapScanDesc scan); + TableScanDesc scan); static bool BitmapShouldInitializeSharedState( ParallelBitmapHeapState *pstate); @@ -76,12 +76,12 @@ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; - HeapScanDesc scan; + TableScanDesc scan; + TIDBitmap *tbm; TBMIterator *tbmiterator = NULL; TBMSharedIterator *shared_tbmiterator = NULL; TBMIterateResult *tbmres; - OffsetNumber targoffset; TupleTableSlot *slot; ParallelBitmapHeapState *pstate = node->pstate; dsa_area *dsa = node->ss.ps.state->es_query_dsa; @@ -191,16 +191,27 @@ BitmapHeapNext(BitmapHeapScanState *node) for (;;) { - Page dp; - ItemId lp; - CHECK_FOR_INTERRUPTS(); - /* - * Get next page of results if needed - */ - if (tbmres == NULL) + if (node->return_empty_tuples > 0) + { + ExecStoreAllNullTuple(slot); + node->return_empty_tuples--; + } + else if (tbmres) + { + if (!table_scan_bitmap_pagescan_next(scan, slot)) + { + node->tbmres = tbmres = NULL; + continue; + } + } + else { + /* + * Get next page of results if needed + */ + if (!pstate) node->tbmres = tbmres = tbm_iterate(tbmiterator); else @@ -213,18 +224,6 @@ BitmapHeapNext(BitmapHeapScanState *node) BitmapAdjustPrefetchIterator(node, tbmres); - /* - * Ignore any claimed entries past what we think is the end of the - * relation. (This is probably not necessary given that we got at - * least AccessShareLock on the table before performing any of the - * indexscans, but let's be safe.) - */ - if (tbmres->blockno >= scan->rs_nblocks) - { - node->tbmres = tbmres = NULL; - continue; - } - /* * We can skip fetching the heap page if we don't need any fields * from the heap, and the bitmap entries don't need rechecking, @@ -240,16 +239,21 @@ BitmapHeapNext(BitmapHeapScanState *node) { /* * The number of tuples on this page is put into - * scan->rs_ntuples; note we don't fill scan->rs_vistuples. + * node->return_empty_tuples; note we don't fill + * scan->rs_vistuples. */ - scan->rs_ntuples = tbmres->ntuples; + node->return_empty_tuples = tbmres->ntuples; } else { /* * Fetch the current heap page and identify candidate tuples. */ - bitgetpage(scan, tbmres); + if (!table_scan_bitmap_pagescan(scan, tbmres)) + { + /* AM doesn't think this block is valid, skip */ + continue; + } } if (tbmres->ntuples >= 0) @@ -257,51 +261,37 @@ BitmapHeapNext(BitmapHeapScanState *node) else node->lossy_pages++; - /* - * Set rs_cindex to first slot to examine - */ - scan->rs_cindex = 0; - /* Adjust the prefetch target */ BitmapAdjustPrefetchTarget(node); - } - else - { - /* - * Continuing in previously obtained page; advance rs_cindex - */ - scan->rs_cindex++; - -#ifdef USE_PREFETCH /* - * Try to prefetch at least a few pages even before we get to the - * second page if we don't stop reading after the first tuple. + * XXX: Note we do not prefetch here. */ - if (!pstate) - { - if (node->prefetch_target < node->prefetch_maximum) - node->prefetch_target++; - } - else if (pstate->prefetch_target < node->prefetch_maximum) - { - /* take spinlock while updating shared state */ - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_target < node->prefetch_maximum) - pstate->prefetch_target++; - SpinLockRelease(&pstate->mutex); - } -#endif /* USE_PREFETCH */ + + continue; } + +#ifdef USE_PREFETCH + /* - * Out of range? If so, nothing more to look at on this page + * Try to prefetch at least a few pages even before we get to the + * second page if we don't stop reading after the first tuple. */ - if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples) + if (!pstate) { - node->tbmres = tbmres = NULL; - continue; + if (node->prefetch_target < node->prefetch_maximum) + node->prefetch_target++; } + else if (pstate->prefetch_target < node->prefetch_maximum) + { + /* take spinlock while updating shared state */ + SpinLockAcquire(&pstate->mutex); + if (pstate->prefetch_target < node->prefetch_maximum) + pstate->prefetch_target++; + SpinLockRelease(&pstate->mutex); + } +#endif /* USE_PREFETCH */ /* * We issue prefetch requests *after* fetching the current page to try @@ -312,52 +302,19 @@ BitmapHeapNext(BitmapHeapScanState *node) */ BitmapPrefetch(node, scan); - if (node->skip_fetch) - { - /* - * If we don't have to fetch the tuple, just return nulls. - */ - ExecStoreAllNullTuple(slot); - } - else + /* + * If we are using lossy info, we have to recheck the qual + * conditions at every tuple. + */ + if (tbmres->recheck) { - /* - * Okay to fetch the tuple. - */ - targoffset = scan->rs_vistuples[scan->rs_cindex]; - dp = (Page) BufferGetPage(scan->rs_cbuf); - lp = PageGetItemId(dp, targoffset); - Assert(ItemIdIsNormal(lp)); - - scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - scan->rs_ctup.t_len = ItemIdGetLength(lp); - scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; - ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); - - pgstat_count_heap_fetch(scan->rs_rd); - - /* - * Set up the result slot to point to this tuple. Note that the - * slot acquires a pin on the buffer. - */ - ExecStoreBufferHeapTuple(&scan->rs_ctup, - slot, - scan->rs_cbuf); - - /* - * If we are using lossy info, we have to recheck the qual - * conditions at every tuple. - */ - if (tbmres->recheck) + econtext->ecxt_scantuple = slot; + if (!ExecQualAndReset(node->bitmapqualorig, econtext)) { - econtext->ecxt_scantuple = slot; - if (!ExecQualAndReset(node->bitmapqualorig, econtext)) - { - /* Fails recheck, so drop it and loop back for another */ - InstrCountFiltered2(node, 1); - ExecClearTuple(slot); - continue; - } + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + ExecClearTuple(slot); + continue; } } @@ -371,110 +328,6 @@ BitmapHeapNext(BitmapHeapScanState *node) return ExecClearTuple(slot); } -/* - * bitgetpage - subroutine for BitmapHeapNext() - * - * This routine reads and pins the specified page of the relation, then - * builds an array indicating which tuples on the page are both potentially - * interesting according to the bitmap, and visible according to the snapshot. - */ -static void -bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) -{ - BlockNumber page = tbmres->blockno; - Buffer buffer; - Snapshot snapshot; - int ntup; - - /* - * Acquire pin on the target heap page, trading in any pin we held before. - */ - Assert(page < scan->rs_nblocks); - - scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf, - scan->rs_rd, - page); - buffer = scan->rs_cbuf; - snapshot = scan->rs_snapshot; - - ntup = 0; - - /* - * Prune and repair fragmentation for the whole page, if possible. - */ - heap_page_prune_opt(scan->rs_rd, buffer); - - /* - * We must hold share lock on the buffer content while examining tuple - * visibility. Afterwards, however, the tuples we have found to be - * visible are guaranteed good as long as we hold the buffer pin. - */ - LockBuffer(buffer, BUFFER_LOCK_SHARE); - - /* - * We need two separate strategies for lossy and non-lossy cases. - */ - if (tbmres->ntuples >= 0) - { - /* - * Bitmap is non-lossy, so we just look through the offsets listed in - * tbmres; but we have to follow any HOT chain starting at each such - * offset. - */ - int curslot; - - for (curslot = 0; curslot < tbmres->ntuples; curslot++) - { - OffsetNumber offnum = tbmres->offsets[curslot]; - ItemPointerData tid; - HeapTupleData heapTuple; - - ItemPointerSet(&tid, page, offnum); - if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, - &heapTuple, NULL, true)) - scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); - } - } - else - { - /* - * Bitmap is lossy, so we must examine each item pointer on the page. - * But we can ignore HOT chains, since we'll check each tuple anyway. - */ - Page dp = (Page) BufferGetPage(buffer); - OffsetNumber maxoff = PageGetMaxOffsetNumber(dp); - OffsetNumber offnum; - - for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) - { - ItemId lp; - HeapTupleData loctup; - bool valid; - - lp = PageGetItemId(dp, offnum); - if (!ItemIdIsNormal(lp)) - continue; - loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); - loctup.t_len = ItemIdGetLength(lp); - loctup.t_tableOid = scan->rs_rd->rd_id; - ItemPointerSet(&loctup.t_self, page, offnum); - valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); - if (valid) - { - scan->rs_vistuples[ntup++] = offnum; - PredicateLockTuple(scan->rs_rd, &loctup, snapshot); - } - CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, - buffer, snapshot); - } - } - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - - Assert(ntup <= MaxHeapTuplesPerPage); - scan->rs_ntuples = ntup; -} - /* * BitmapDoneInitializingSharedState - Shared state is initialized * @@ -598,7 +451,7 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target */ static inline void -BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan) +BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; @@ -741,7 +594,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) PlanState *outerPlan = outerPlanState(node); /* rescan to release any page pin */ - heap_rescan(node->ss.ss_currentScanDesc, NULL); + table_rescan(node->ss.ss_currentScanDesc, NULL); /* release bitmaps and buffers if any */ if (node->tbmiterator) @@ -785,7 +638,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) void ExecEndBitmapHeapScan(BitmapHeapScanState *node) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; /* * extract information from the node @@ -830,7 +683,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) /* * close heap scan */ - heap_endscan(scanDesc); + table_endscan(scanDesc); } /* ---------------------------------------------------------------- @@ -914,8 +767,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) */ ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(currentRelation), - &TTSOpsBufferHeapTuple); - + table_slot_callbacks(currentRelation)); /* * Initialize result type and projection. @@ -953,10 +805,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) * Even though we aren't going to do a conventional seqscan, it is useful * to create a HeapScanDesc --- most of the fields in it are usable. */ - scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, - estate->es_snapshot, - 0, - NULL); + scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation, + estate->es_snapshot, + 0, + NULL); /* * all done. @@ -1104,5 +956,5 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, node->pstate = pstate; snapshot = RestoreSnapshot(pstate->phs_snapshot_data); - heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot); + table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); } diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index fab752058a..c96db36e0d 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -62,9 +62,12 @@ ForeignNext(ForeignScanState *node) */ if (plan->fsSystemCol && !TupIsNull(slot)) { - HeapTuple tup = ExecFetchSlotHeapTuple(slot, true, NULL); - - tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation); + ExecMaterializeSlot(slot); +#if 0 + ExecSlotUpdateTupleTableoid(slot, + RelationGetRelid(node->ss.ss_currentRelation)); +#endif + slot->tts_tableOid = RelationGetRelid(node->ss.ss_currentRelation); } return slot; diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c index e6367ade76..1dd8bb3f3a 100644 --- a/src/backend/executor/nodeGather.c +++ b/src/backend/executor/nodeGather.c @@ -266,7 +266,7 @@ gather_getnext(GatherState *gatherstate) PlanState *outerPlan = outerPlanState(gatherstate); TupleTableSlot *outerTupleSlot; TupleTableSlot *fslot = gatherstate->funnel_slot; - HeapTuple tup; + HeapTuple tup; while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally) { @@ -316,7 +316,7 @@ gather_readnext(GatherState *gatherstate) for (;;) { TupleQueueReader *reader; - HeapTuple tup; + HeapTuple tup; bool readerdone; /* Check for async events, particularly messages from workers. */ diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c index 51d910bd5e..54ef0ca7b7 100644 --- a/src/backend/executor/nodeGatherMerge.c +++ b/src/backend/executor/nodeGatherMerge.c @@ -45,7 +45,7 @@ */ typedef struct GMReaderTupleBuffer { - HeapTuple *tuple; /* array of length MAX_TUPLE_STORE */ + HeapTuple *tuple; /* array of length MAX_TUPLE_STORE */ int nTuples; /* number of tuples currently stored */ int readCounter; /* index of next tuple to extract */ bool done; /* true if reader is known exhausted */ @@ -55,7 +55,7 @@ static TupleTableSlot *ExecGatherMerge(PlanState *pstate); static int32 heap_compare_slots(Datum a, Datum b, void *arg); static TupleTableSlot *gather_merge_getnext(GatherMergeState *gm_state); static HeapTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader, - bool nowait, bool *done); + bool nowait, bool *done); static void ExecShutdownGatherMergeWorkers(GatherMergeState *node); static void gather_merge_setup(GatherMergeState *gm_state); static void gather_merge_init(GatherMergeState *gm_state); @@ -637,7 +637,7 @@ static bool gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait) { GMReaderTupleBuffer *tuple_buffer; - HeapTuple tup; + HeapTuple tup; /* * If we're being asked to generate a tuple from the leader, then we just @@ -716,7 +716,7 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait, bool *done) { TupleQueueReader *reader; - HeapTuple tup; + HeapTuple tup; /* Check for async events, particularly messages from workers. */ CHECK_FOR_INTERRUPTS(); diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 8498273207..c39c4f453d 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -31,6 +31,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/visibilitymap.h" #include "executor/execdebug.h" #include "executor/nodeIndexonlyscan.h" @@ -117,7 +118,7 @@ IndexOnlyNext(IndexOnlyScanState *node) */ while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { - HeapTuple tuple = NULL; + bool tuple_from_heap = false; CHECK_FOR_INTERRUPTS(); @@ -163,17 +164,18 @@ IndexOnlyNext(IndexOnlyScanState *node) * Rats, we have to visit the heap to check visibility. */ InstrCountTuples2(node, 1); - tuple = index_fetch_heap(scandesc); - if (tuple == NULL) + if (!index_fetch_heap(scandesc, slot)) continue; /* no visible tuple, try next index entry */ + ExecClearTuple(slot); + /* * Only MVCC snapshots are supported here, so there should be no * need to keep following the HOT chain once a visible entry has * been found. If we did want to allow that, we'd need to keep * more state to remember not to call index_getnext_tid next time. */ - if (scandesc->xs_continue_hot) + if (scandesc->xs_heap_continue) elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); /* @@ -182,13 +184,15 @@ IndexOnlyNext(IndexOnlyScanState *node) * but it's not clear whether it's a win to do so. The next index * entry might require a visit to the same heap page. */ + + tuple_from_heap = true; } /* * Fill the scan tuple slot with data from the index. This might be - * provided in either HeapTuple or IndexTuple format. Conceivably an - * index AM might fill both fields, in which case we prefer the heap - * format, since it's probably a bit cheaper to fill a slot from. + * provided in either HeapTuple or IndexTuple format. Conceivably + * an index AM might fill both fields, in which case we prefer the + * heap format, since it's probably a bit cheaper to fill a slot from. */ if (scandesc->xs_hitup) { @@ -200,6 +204,7 @@ IndexOnlyNext(IndexOnlyScanState *node) Assert(slot->tts_tupleDescriptor->natts == scandesc->xs_hitupdesc->natts); ExecForceStoreHeapTuple(scandesc->xs_hitup, slot); + slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation); } else if (scandesc->xs_itup) StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc); @@ -242,7 +247,7 @@ IndexOnlyNext(IndexOnlyScanState *node) * anyway, then we already have the tuple-level lock and can skip the * page lock. */ - if (tuple == NULL) + if (!tuple_from_heap) PredicateLockPage(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), estate->es_snapshot); @@ -424,7 +429,7 @@ ExecIndexOnlyMarkPos(IndexOnlyScanState *node) { EState *estate = node->ss.ps.state; - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) { /* * We are inside an EvalPlanQual recheck. If a test tuple exists for @@ -459,7 +464,7 @@ ExecIndexOnlyRestrPos(IndexOnlyScanState *node) { EState *estate = node->ss.ps.state; - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) { /* See comments in ExecIndexOnlyMarkPos */ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid; @@ -527,7 +532,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) * suitable data anyway.) */ tupDesc = ExecTypeFromTL(node->indextlist); - ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, &TTSOpsHeapTuple); + ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, + table_slot_callbacks(currentRelation)); /* * Initialize result type and projection info. The node's targetlist will diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 6b222a9f64..b38dadaa9a 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -31,6 +31,7 @@ #include "access/nbtree.h" #include "access/relscan.h" +#include "access/tableam.h" #include "catalog/pg_am.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" @@ -51,7 +52,7 @@ typedef struct { pairingheap_node ph_node; - HeapTuple htup; + HeapTuple htup; Datum *orderbyvals; bool *orderbynulls; } ReorderTuple; @@ -84,7 +85,6 @@ IndexNext(IndexScanState *node) ExprContext *econtext; ScanDirection direction; IndexScanDesc scandesc; - HeapTuple tuple; TupleTableSlot *slot; /* @@ -131,20 +131,10 @@ IndexNext(IndexScanState *node) /* * ok, now that we have what we need, fetch the next tuple. */ - while ((tuple = index_getnext(scandesc, direction)) != NULL) + while (index_getnext_slot(scandesc, direction, slot)) { CHECK_FOR_INTERRUPTS(); - /* - * Store the scanned tuple in the scan tuple slot of the scan state. - * Note: we pass 'false' because tuples returned by amgetnext are - * pointers onto disk pages and must not be pfree()'d. - */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf); /* buffer containing - * tuple */ - /* * If the index was lossy, we have to recheck the index quals using * the fetched tuple. @@ -184,7 +174,6 @@ IndexNextWithReorder(IndexScanState *node) EState *estate; ExprContext *econtext; IndexScanDesc scandesc; - HeapTuple tuple; TupleTableSlot *slot; ReorderTuple *topmost = NULL; bool was_exact; @@ -253,9 +242,12 @@ IndexNextWithReorder(IndexScanState *node) scandesc->xs_orderbynulls, node) <= 0) { + HeapTuple tuple; + tuple = reorderqueue_pop(node); /* Pass 'true', as the tuple in the queue is a palloc'd copy */ + slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation); ExecStoreHeapTuple(tuple, slot, true); return slot; } @@ -272,8 +264,7 @@ IndexNextWithReorder(IndexScanState *node) */ next_indextuple: slot = node->ss.ss_ScanTupleSlot; - tuple = index_getnext(scandesc, ForwardScanDirection); - if (!tuple) + if (!index_getnext_slot(scandesc, ForwardScanDirection, slot)) { /* * No more tuples from the index. But we still need to drain any @@ -283,14 +274,6 @@ next_indextuple: continue; } - /* - * Store the scanned tuple in the scan tuple slot of the scan state. - */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf); /* buffer containing - * tuple */ - /* * If the index was lossy, we have to recheck the index quals and * ORDER BY expressions using the fetched tuple. @@ -358,6 +341,8 @@ next_indextuple: topmost->orderbynulls, node) > 0)) { + HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + /* Put this tuple to the queue */ reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls); continue; @@ -515,7 +500,7 @@ reorderqueue_push(IndexScanState *node, HeapTuple tuple, static HeapTuple reorderqueue_pop(IndexScanState *node) { - HeapTuple result; + HeapTuple result; ReorderTuple *topmost; int i; @@ -851,7 +836,7 @@ ExecIndexMarkPos(IndexScanState *node) { EState *estate = node->ss.ps.state; - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) { /* * We are inside an EvalPlanQual recheck. If a test tuple exists for @@ -886,7 +871,7 @@ ExecIndexRestrPos(IndexScanState *node) { EState *estate = node->ss.ps.state; - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) { /* See comments in ExecIndexMarkPos */ Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid; @@ -950,7 +935,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecInitScanTupleSlot(estate, &indexstate->ss, RelationGetDescr(currentRelation), - &TTSOpsBufferHeapTuple); + table_slot_callbacks(currentRelation)); if (node->indexorderby != NIL) indexstate->ss.ps.scanopsfixed = false; diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 7887388b9e..4451779ff8 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -22,6 +22,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "executor/executor.h" #include "executor/nodeLockRows.h" @@ -66,6 +67,8 @@ lnext: /* We don't need EvalPlanQual unless we get updated tuple version(s) */ epq_needed = false; + EvalPlanQualBegin(&node->lr_epqstate, estate); + /* * Attempt to lock the source tuple(s). (Note we only have locking * rowmarks in lr_arowMarks.) @@ -74,21 +77,19 @@ lnext: { ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc); ExecRowMark *erm = aerm->rowmark; - HeapTuple *testTuple; + TupleTableSlot *markSlot; Datum datum; bool isNull; - HeapTupleData tuple; - Buffer buffer; HeapUpdateFailureData hufd; LockTupleMode lockmode; HTSU_Result test; - HeapTuple copyTuple; + ItemPointerData tid; /* clear any leftover test tuple for this rel */ - testTuple = &(node->lr_curtuples[erm->rti - 1]); - if (*testTuple != NULL) - heap_freetuple(*testTuple); - *testTuple = NULL; + // used to be: - can we skip having lr_curtuples? + //testSlot = node->lr_curtuples[erm->rti - 1]; + markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti); + ExecClearTuple(markSlot); /* if child rel, must check whether it produced this row */ if (erm->rti != erm->prti) @@ -129,25 +130,25 @@ lnext: bool updated = false; fdwroutine = GetFdwRoutineForRelation(erm->relation, false); + /* this should have been checked already, but let's be safe */ if (fdwroutine->RefetchForeignRow == NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot lock rows in foreign table \"%s\"", RelationGetRelationName(erm->relation)))); - copyTuple = fdwroutine->RefetchForeignRow(estate, - erm, - datum, - &updated); - if (copyTuple == NULL) + + markSlot = fdwroutine->RefetchForeignRow(estate, + erm, + datum, + markSlot, + &updated); + if (markSlot == NULL) { /* couldn't get the lock, so skip this row */ goto lnext; } - /* save locked tuple for possible EvalPlanQual testing below */ - *testTuple = copyTuple; - /* * if FDW says tuple was updated before getting locked, we need to * perform EPQ testing to see if quals are still satisfied @@ -159,7 +160,7 @@ lnext: } /* okay, try to lock the tuple */ - tuple.t_self = *((ItemPointer) DatumGetPointer(datum)); + tid = *((ItemPointer) DatumGetPointer(datum)); switch (erm->markType) { case ROW_MARK_EXCLUSIVE: @@ -180,11 +181,13 @@ lnext: break; } - test = heap_lock_tuple(erm->relation, &tuple, - estate->es_output_cid, - lockmode, erm->waitPolicy, true, - &buffer, &hufd); - ReleaseBuffer(buffer); + test = table_lock_tuple(erm->relation, &tid, estate->es_snapshot, + markSlot, estate->es_output_cid, + lockmode, erm->waitPolicy, + (IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION) + | TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, + &hufd); + switch (test) { case HeapTupleWouldBlock: @@ -211,6 +214,15 @@ lnext: case HeapTupleMayBeUpdated: /* got the lock successfully */ + if (hufd.traversed) + { + /* locked tuple saved in markSlot for EvalPlanQual testing below */ + + /* Remember we need to do EPQ testing */ + epq_needed = true; + + /* Continue loop until we have all target tuples */ + } break; case HeapTupleUpdated: @@ -218,41 +230,19 @@ lnext: ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); - if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) + /* skip lock */ + goto lnext; + + case HeapTupleDeleted: + if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be locked was already moved to another partition due to concurrent update"))); - - if (ItemPointerEquals(&hufd.ctid, &tuple.t_self)) - { - /* Tuple was deleted, so don't return it */ - goto lnext; - } - - /* updated, so fetch and lock the updated version */ - copyTuple = EvalPlanQualFetch(estate, erm->relation, - lockmode, erm->waitPolicy, - &hufd.ctid, hufd.xmax); - - if (copyTuple == NULL) - { - /* - * Tuple was deleted; or it's locked and we're under SKIP - * LOCKED policy, so don't return it - */ - goto lnext; - } - /* remember the actually locked tuple's TID */ - tuple.t_self = copyTuple->t_self; - - /* Save locked tuple for EvalPlanQual testing below */ - *testTuple = copyTuple; - - /* Remember we need to do EPQ testing */ - epq_needed = true; - - /* Continue loop until we have all target tuples */ - break; + errmsg("could not serialize access due to concurrent update"))); + /* + * Tuple was deleted; or it's locked and we're under SKIP + * LOCKED policy, so don't return it + */ + goto lnext; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); @@ -264,7 +254,7 @@ lnext: } /* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */ - erm->curCtid = tuple.t_self; + erm->curCtid = tid; } /* @@ -273,7 +263,7 @@ lnext: if (epq_needed) { /* Initialize EPQ machinery */ - EvalPlanQualBegin(&node->lr_epqstate, estate); + //EvalPlanQualBegin(&node->lr_epqstate, estate); /* * Transfer any already-fetched tuples into the EPQ state, and fetch a @@ -286,26 +276,31 @@ lnext: { ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc); ExecRowMark *erm = aerm->rowmark; - HeapTupleData tuple; - Buffer buffer; + TupleTableSlot *markSlot; + + markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti); /* skip non-active child tables, but clear their test tuples */ if (!erm->ermActive) { Assert(erm->rti != erm->prti); /* check it's child table */ - EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL); + ExecClearTuple(markSlot); continue; } /* was tuple updated and fetched above? */ - if (node->lr_curtuples[erm->rti - 1] != NULL) + //node->lr_curtuples[erm->rti - 1] != NULL + if (!TupIsNull(markSlot)) { +// elog(ERROR, "frak"); +#if FIXME /* yes, so set it as the EPQ test tuple for this rel */ EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, node->lr_curtuples[erm->rti - 1]); /* freeing this tuple is now the responsibility of EPQ */ node->lr_curtuples[erm->rti - 1] = NULL; +#endif continue; } @@ -314,15 +309,13 @@ lnext: Assert(ItemPointerIsValid(&(erm->curCtid))); /* okay, fetch the tuple */ - tuple.t_self = erm->curCtid; - if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer, - false, NULL)) + if (!table_fetch_row_version(erm->relation, &erm->curCtid, SnapshotAny, markSlot, + NULL)) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); /* successful, copy and store tuple */ - EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, - heap_copytuple(&tuple)); - ReleaseBuffer(buffer); + //EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, tuple); + // ReleaseBuffer(buffer); } /* @@ -405,8 +398,8 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags) * Create workspace in which we can remember per-RTE locked tuples */ lrstate->lr_ntables = estate->es_range_table_size; - lrstate->lr_curtuples = (HeapTuple *) - palloc0(lrstate->lr_ntables * sizeof(HeapTuple)); + lrstate->lr_curtuples = (TupleTableSlot **) + palloc0(lrstate->lr_ntables * sizeof(TupleTableSlot *)); /* * Locate the ExecRowMark(s) that this node is responsible for, and diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 3aa0d4d0eb..d1ac9fc2e9 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -38,8 +38,10 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" +#include "catalog/pg_am.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -168,15 +170,12 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo, econtext->ecxt_scantuple = tupleSlot; else { - HeapTuple tuple; - /* * RETURNING expressions might reference the tableoid column, so * initialize t_tableOid before evaluating them. */ Assert(!TupIsNull(econtext->ecxt_scantuple)); - tuple = ExecFetchSlotHeapTuple(econtext->ecxt_scantuple, true, NULL); - tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + econtext->ecxt_scantuple->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); } econtext->ecxt_outertuple = planSlot; @@ -194,31 +193,33 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo, */ static void ExecCheckHeapTupleVisible(EState *estate, - HeapTuple tuple, - Buffer buffer) + Relation rel, + TupleTableSlot *slot) { if (!IsolationUsesXactSnapshot()) return; - /* - * We need buffer pin and lock to call HeapTupleSatisfiesVisibility. - * Caller should be holding pin, but not lock. - */ - LockBuffer(buffer, BUFFER_LOCK_SHARE); - if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer)) + if (!table_satisfies_snapshot(rel, slot, estate->es_snapshot)) { + Datum xminDatum; + TransactionId xmin; + bool isnull; + + xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + /* * We should not raise a serialization failure if the conflict is * against a tuple inserted by our own transaction, even if it's not * visible to our snapshot. (This would happen, for example, if * conflicting keys are proposed for insertion in a single command.) */ - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + if (!TransactionIdIsCurrentTransactionId(xmin)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); } - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); } /* @@ -227,21 +228,19 @@ ExecCheckHeapTupleVisible(EState *estate, static void ExecCheckTIDVisible(EState *estate, ResultRelInfo *relinfo, - ItemPointer tid) + ItemPointer tid, + TupleTableSlot *tempSlot) { Relation rel = relinfo->ri_RelationDesc; - Buffer buffer; - HeapTupleData tuple; /* Redundantly check isolation level */ if (!IsolationUsesXactSnapshot()) return; - tuple.t_self = *tid; - if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL)) + if (!table_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL)) elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); - ExecCheckHeapTupleVisible(estate, &tuple, buffer); - ReleaseBuffer(buffer); + ExecCheckHeapTupleVisible(estate, rel, tempSlot); + ExecClearTuple(tempSlot); } /* ---------------------------------------------------------------- @@ -260,7 +259,6 @@ ExecInsert(ModifyTableState *mtstate, EState *estate, bool canSetTag) { - HeapTuple tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; List *recheckIndexes = NIL; @@ -269,11 +267,7 @@ ExecInsert(ModifyTableState *mtstate, ModifyTable *node = (ModifyTable *) mtstate->ps.plan; OnConflictAction onconflict = node->onConflictAction; - /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy - */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + ExecMaterializeSlot(slot); /* * get information on the (current) result relation @@ -293,26 +287,16 @@ ExecInsert(ModifyTableState *mtstate, if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row) { - slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); - - if (slot == NULL) /* "do nothing" */ - return NULL; - - /* trigger might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + if (!ExecBRInsertTriggers(estate, resultRelInfo, slot)) + return NULL; /* "do nothing" */ } /* INSTEAD OF ROW INSERT Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_instead_row) { - slot = ExecIRInsertTriggers(estate, resultRelInfo, slot); - - if (slot == NULL) /* "do nothing" */ - return NULL; - - /* trigger might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + if (!ExecIRInsertTriggers(estate, resultRelInfo, slot)) + return NULL; /* "do nothing" */ } else if (resultRelInfo->ri_FdwRoutine) { @@ -327,14 +311,11 @@ ExecInsert(ModifyTableState *mtstate, if (slot == NULL) /* "do nothing" */ return NULL; - /* FDW might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - /* * AFTER ROW Triggers or RETURNING expressions might reference the * tableoid column, so initialize t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(resultRelationDesc); + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); } else { @@ -344,7 +325,7 @@ ExecInsert(ModifyTableState *mtstate, * Constraints might reference the tableoid column, so initialize * t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(resultRelationDesc); + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); /* * Check any RLS WITH CHECK policies. @@ -436,9 +417,11 @@ ExecInsert(ModifyTableState *mtstate, * In case of ON CONFLICT DO NOTHING, do nothing. However, * verify that the tuple is visible to the executor's MVCC * snapshot at higher isolation levels. + * + * Can reuse the input slot here (XXX). */ Assert(onconflict == ONCONFLICT_NOTHING); - ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid); + ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid, slot); InstrCountTuples2(&mtstate->ps, 1); return NULL; } @@ -451,24 +434,22 @@ ExecInsert(ModifyTableState *mtstate, * waiting for the whole transaction to complete. */ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); - HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken); /* insert the tuple, with the speculative token */ - heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, - HEAP_INSERT_SPECULATIVE, - NULL); + table_insert_speculative(resultRelationDesc, slot, + estate->es_output_cid, + HEAP_INSERT_SPECULATIVE, + NULL, + specToken); /* insert index entries for tuple */ - recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + recheckIndexes = ExecInsertIndexTuples(slot, estate, true, &specConflict, arbiterIndexes); /* adjust the tuple's state accordingly */ - if (!specConflict) - heap_finish_speculative(resultRelationDesc, tuple); - else - heap_abort_speculative(resultRelationDesc, tuple); + table_complete_speculative(resultRelationDesc, slot, + specToken, specConflict); /* * Wake up anyone waiting for our decision. They will re-check @@ -496,26 +477,23 @@ ExecInsert(ModifyTableState *mtstate, { /* * insert the tuple normally. - * - * Note: heap_insert returns the tid (location) of the new tuple - * in the t_self field. */ - heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, - 0, NULL); + table_insert(resultRelationDesc, slot, + estate->es_output_cid, + 0, NULL); /* insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) - recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, + recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL); + } } if (canSetTag) { (estate->es_processed)++; - setLastTid(&(tuple->t_self)); + setLastTid(&(slot->tts_tid)); } /* @@ -530,7 +508,7 @@ ExecInsert(ModifyTableState *mtstate, { ExecARUpdateTriggers(estate, resultRelInfo, NULL, NULL, - tuple, + slot, NULL, mtstate->mt_transition_capture); @@ -542,7 +520,7 @@ ExecInsert(ModifyTableState *mtstate, } /* AFTER ROW INSERT Triggers */ - ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes, + ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes, ar_insert_trig_tcs); list_free(recheckIndexes); @@ -602,7 +580,7 @@ ExecDelete(ModifyTableState *mtstate, bool canSetTag, bool changingPart, bool *tupleDeleted, - TupleTableSlot **epqslot) + TupleTableSlot **epqreturnslot) { ResultRelInfo *resultRelInfo; Relation resultRelationDesc; @@ -627,7 +605,7 @@ ExecDelete(ModifyTableState *mtstate, bool dodelete; dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tupleid, oldtuple, epqslot); + tupleid, oldtuple, epqreturnslot); if (!dodelete) /* "do nothing" */ return NULL; @@ -647,8 +625,6 @@ ExecDelete(ModifyTableState *mtstate, } else if (resultRelInfo->ri_FdwRoutine) { - HeapTuple tuple; - /* * delete from foreign table: let the FDW do it * @@ -656,10 +632,7 @@ ExecDelete(ModifyTableState *mtstate, * although the FDW can return some other slot if it wants. Set up * the slot's tupdesc so the FDW doesn't need to do that for itself. */ - slot = estate->es_trig_tuple_slot; - if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc)) - ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc)); - + slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc); slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate, resultRelInfo, slot, @@ -674,8 +647,9 @@ ExecDelete(ModifyTableState *mtstate, */ if (TTS_EMPTY(slot)) ExecStoreAllNullTuple(slot); - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - tuple->t_tableOid = RelationGetRelid(resultRelationDesc); + + ExecMaterializeSlot(slot); + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); } else { @@ -689,12 +663,58 @@ ExecDelete(ModifyTableState *mtstate, * mode transactions. */ ldelete:; - result = heap_delete(resultRelationDesc, tupleid, + result = table_delete(resultRelationDesc, tupleid, estate->es_output_cid, + estate->es_snapshot, estate->es_crosscheck_snapshot, true /* wait for commit */ , &hufd, changingPart); + + if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot()) + { + EvalPlanQualBegin(epqstate, estate); + slot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); + + result = table_lock_tuple(resultRelationDesc, tupleid, + estate->es_snapshot, + slot, estate->es_output_cid, + LockTupleExclusive, LockWaitBlock, + TUPLE_LOCK_FLAG_FIND_LAST_VERSION, + &hufd); + /*hari FIXME*/ + /*Assert(result != HeapTupleUpdated && hufd.traversed);*/ + if (result == HeapTupleMayBeUpdated) + { + TupleTableSlot *epqslot; + + epqslot = EvalPlanQual(estate, + epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + slot); + if (TupIsNull(epqslot)) + { + /* Tuple no more passing quals, exiting... */ + return NULL; + } + + /**/ + if (epqreturnslot) + { + *epqreturnslot = epqslot; + return NULL; + } + + goto ldelete; + } + else if (result == HeapTupleInvisible) + { + /* tuple is not visible; nothing to do */ + return NULL; + } + } + switch (result) { case HeapTupleSelfUpdated: @@ -740,39 +760,16 @@ ldelete:; ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); - if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) + else + /* shouldn't get there */ + elog(ERROR, "wrong heap_delete status: %u", result); + break; + + case HeapTupleDeleted: + if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be deleted was already moved to another partition due to concurrent update"))); - - if (!ItemPointerEquals(tupleid, &hufd.ctid)) - { - TupleTableSlot *my_epqslot; - - my_epqslot = EvalPlanQual(estate, - epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - LockTupleExclusive, - &hufd.ctid, - hufd.xmax); - if (!TupIsNull(my_epqslot)) - { - *tupleid = hufd.ctid; - - /* - * If requested, skip delete and pass back the updated - * row. - */ - if (epqslot) - { - *epqslot = my_epqslot; - return NULL; - } - else - goto ldelete; - } - } + errmsg("could not serialize access due to concurrent delete"))); /* tuple already deleted; nothing to do */ return NULL; @@ -834,36 +831,31 @@ ldelete:; * gotta fetch it. We can use the trigger tuple slot. */ TupleTableSlot *rslot; - HeapTupleData deltuple; - Buffer delbuffer; if (resultRelInfo->ri_FdwRoutine) { /* FDW must have provided a slot containing the deleted row */ Assert(!TupIsNull(slot)); - delbuffer = InvalidBuffer; } else { - slot = estate->es_trig_tuple_slot; + slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc); if (oldtuple != NULL) { - deltuple = *oldtuple; - delbuffer = InvalidBuffer; + ExecForceStoreHeapTuple(oldtuple, slot); } else { - deltuple.t_self = *tupleid; - if (!heap_fetch(resultRelationDesc, SnapshotAny, - &deltuple, &delbuffer, false, NULL)) + if (!table_fetch_row_version(resultRelationDesc, tupleid, SnapshotAny, + slot, NULL)) elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING"); } - - if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc)) - ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc)); - ExecStoreHeapTuple(&deltuple, slot, false); } + // FIXME: centralize + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); + planSlot->tts_tableOid = RelationGetRelid(resultRelationDesc); + rslot = ExecProcessReturning(resultRelInfo, slot, planSlot); /* @@ -873,8 +865,6 @@ ldelete:; ExecMaterializeSlot(rslot); ExecClearTuple(slot); - if (BufferIsValid(delbuffer)) - ReleaseBuffer(delbuffer); return rslot; } @@ -914,7 +904,6 @@ ExecUpdate(ModifyTableState *mtstate, EState *estate, bool canSetTag) { - HeapTuple tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; @@ -928,11 +917,7 @@ ExecUpdate(ModifyTableState *mtstate, if (IsBootstrapProcessingMode()) elog(ERROR, "cannot UPDATE during bootstrap"); - /* - * get the heap tuple out of the tuple table slot, making sure we have a - * writable copy - */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + ExecMaterializeSlot(slot); /* * get information on the (current) result relation @@ -944,28 +929,18 @@ ExecUpdate(ModifyTableState *mtstate, if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_before_row) { - slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - tupleid, oldtuple, slot); - - if (slot == NULL) /* "do nothing" */ - return NULL; - - /* trigger might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, + tupleid, oldtuple, slot)) + return NULL; /* "do nothing" */ } /* INSTEAD OF ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_instead_row) { - slot = ExecIRUpdateTriggers(estate, resultRelInfo, - oldtuple, slot); - - if (slot == NULL) /* "do nothing" */ - return NULL; - - /* trigger might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); + if (!ExecIRUpdateTriggers(estate, resultRelInfo, + oldtuple, slot)) + return NULL; /* "do nothing" */ } else if (resultRelInfo->ri_FdwRoutine) { @@ -980,25 +955,23 @@ ExecUpdate(ModifyTableState *mtstate, if (slot == NULL) /* "do nothing" */ return NULL; - /* FDW might have changed tuple */ - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - /* * AFTER ROW Triggers or RETURNING expressions might reference the * tableoid column, so initialize t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(resultRelationDesc); + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); } else { LockTupleMode lockmode; bool partition_constraint_failed; + bool update_indexes; /* * Constraints might reference the tableoid column, so initialize * t_tableOid before evaluating them. */ - tuple->t_tableOid = RelationGetRelid(resultRelationDesc); + slot->tts_tableOid = RelationGetRelid(resultRelationDesc); /* * Check any RLS UPDATE WITH CHECK policies @@ -1011,6 +984,9 @@ ExecUpdate(ModifyTableState *mtstate, */ lreplace:; + /* ensure slot is independent, consider e.g. EPQ */ + ExecMaterializeSlot(slot); + /* * If partition constraint fails, this row might get moved to another * partition, in which case we should check the RLS CHECK policy just @@ -1108,7 +1084,6 @@ lreplace:; else { slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); goto lreplace; } } @@ -1179,11 +1154,54 @@ lreplace:; * needed for referential integrity updates in transaction-snapshot * mode transactions. */ - result = heap_update(resultRelationDesc, tupleid, tuple, - estate->es_output_cid, - estate->es_crosscheck_snapshot, - true /* wait for commit */ , - &hufd, &lockmode); + result = table_update(resultRelationDesc, tupleid, slot, + estate->es_output_cid, + estate->es_snapshot, + estate->es_crosscheck_snapshot, + true /* wait for commit */, + &hufd, &lockmode, &update_indexes); + + if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot()) + { + TupleTableSlot *inputslot; + + EvalPlanQualBegin(epqstate, estate); + + inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); + ExecCopySlot(inputslot, slot); + + result = table_lock_tuple(resultRelationDesc, tupleid, + estate->es_snapshot, + inputslot, estate->es_output_cid, + lockmode, LockWaitBlock, + TUPLE_LOCK_FLAG_FIND_LAST_VERSION, + &hufd); + /* hari FIXME*/ + /*Assert(result != HeapTupleUpdated && hufd.traversed);*/ + if (result == HeapTupleMayBeUpdated) + { + TupleTableSlot *epqslot; + + epqslot = EvalPlanQual(estate, + epqstate, + resultRelationDesc, + resultRelInfo->ri_RangeTableIndex, + inputslot); + if (TupIsNull(epqslot)) + { + /* Tuple no more passing quals, exiting... */ + return NULL; + } + slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); + goto lreplace; + } + else if (result == HeapTupleInvisible) + { + /* tuple is not visible; nothing to do */ + return NULL; + } + } + switch (result) { case HeapTupleSelfUpdated: @@ -1224,34 +1242,21 @@ lreplace:; break; case HeapTupleUpdated: + /* + * The lower level isolation case for HeapTupleUpdated is + * handled above. + */ + Assert(IsolationUsesXactSnapshot()); + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + break; + + case HeapTupleDeleted: if (IsolationUsesXactSnapshot()) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("tuple to be updated was already moved to another partition due to concurrent update"))); - - if (!ItemPointerEquals(tupleid, &hufd.ctid)) - { - TupleTableSlot *epqslot; - - epqslot = EvalPlanQual(estate, - epqstate, - resultRelationDesc, - resultRelInfo->ri_RangeTableIndex, - lockmode, - &hufd.ctid, - hufd.xmax); - if (!TupIsNull(epqslot)) - { - *tupleid = hufd.ctid; - slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); - tuple = ExecFetchSlotHeapTuple(slot, true, NULL); - goto lreplace; - } - } + errmsg("could not serialize access due to concurrent delete"))); /* tuple already deleted; nothing to do */ return NULL; @@ -1260,6 +1265,7 @@ lreplace:; return NULL; } + /* * Note: instead of having to update the old index tuples associated * with the heap tuple, all we do is form and insert new index tuples. @@ -1272,20 +1278,19 @@ lreplace:; * insert index entries for tuple * * Note: heap_update returns the tid (location) of the new tuple in - * the t_self field. + * the t_self field. FIXME * * If it's a HOT update, we mustn't insert new index entries. */ - if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) - recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, NIL); + if (resultRelInfo->ri_NumIndices > 0 && update_indexes) + recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL); } if (canSetTag) (estate->es_processed)++; /* AFTER ROW UPDATE Triggers */ - ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple, + ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, recheckIndexes, mtstate->operation == CMD_INSERT ? mtstate->mt_oc_transition_capture : @@ -1336,11 +1341,12 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, ExprContext *econtext = mtstate->ps.ps_ExprContext; Relation relation = resultRelInfo->ri_RelationDesc; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; - HeapTupleData tuple; HeapUpdateFailureData hufd; LockTupleMode lockmode; HTSU_Result test; - Buffer buffer; + Datum xminDatum; + TransactionId xmin; + bool isnull; /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, resultRelInfo); @@ -1351,10 +1357,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, * previous conclusion that the tuple is conclusively committed is not * true anymore. */ - tuple.t_self = *conflictTid; - test = heap_lock_tuple(relation, &tuple, estate->es_output_cid, - lockmode, LockWaitBlock, false, &buffer, - &hufd); + test = table_lock_tuple(relation, conflictTid, + estate->es_snapshot, + mtstate->mt_existing, estate->es_output_cid, + lockmode, LockWaitBlock, TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, + &hufd); switch (test) { case HeapTupleMayBeUpdated: @@ -1379,7 +1386,13 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, * that for SQL MERGE, an exception must be raised in the event of * an attempt to update the same row twice. */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data))) + xminDatum = slot_getsysattr(mtstate->mt_existing, + MinTransactionIdAttributeNumber, + &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + + if (TransactionIdIsCurrentTransactionId(xmin)) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), @@ -1420,7 +1433,16 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, * loop here, as the new version of the row might not conflict * anymore, or the conflicting tuple has actually been deleted. */ - ReleaseBuffer(buffer); + ExecClearTuple(mtstate->mt_existing); + return false; + + case HeapTupleDeleted: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent delete"))); + + ExecClearTuple(mtstate->mt_existing); return false; default: @@ -1442,10 +1464,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, * snapshot. This is in line with the way UPDATE deals with newer tuple * versions. */ - ExecCheckHeapTupleVisible(estate, &tuple, buffer); - - /* Store target's existing tuple in the state's dedicated slot */ - ExecStoreBufferHeapTuple(&tuple, mtstate->mt_existing, buffer); + ExecCheckHeapTupleVisible(estate, relation, mtstate->mt_existing); /* * Make tuple and any needed join variables available to ExecQual and @@ -1460,7 +1479,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, if (!ExecQual(onConflictSetWhere, econtext)) { - ReleaseBuffer(buffer); + ExecClearTuple(mtstate->mt_existing); InstrCountFiltered1(&mtstate->ps, 1); return true; /* done with the tuple */ } @@ -1500,12 +1519,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, */ /* Execute UPDATE with projection */ - *returning = ExecUpdate(mtstate, &tuple.t_self, NULL, + *returning = ExecUpdate(mtstate, conflictTid, NULL, mtstate->mt_conflproj, planSlot, &mtstate->mt_epqstate, mtstate->ps.state, canSetTag); - - ReleaseBuffer(buffer); + ExecClearTuple(mtstate->mt_existing); return true; } @@ -1688,6 +1706,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, estate->es_result_relation_info = partrel; /* Get the heap tuple out of the given slot. */ + // PBORKED: this'll leak memory for some slot types tuple = ExecFetchSlotHeapTuple(slot, true, NULL); /* @@ -1826,7 +1845,7 @@ ExecModifyTable(PlanState *pstate) ItemPointer tupleid; ItemPointerData tuple_ctid; HeapTupleData oldtupdata; - HeapTuple oldtuple; + HeapTuple oldtuple; CHECK_FOR_INTERRUPTS(); @@ -1839,7 +1858,7 @@ ExecModifyTable(PlanState *pstate) * case it is within a CTE subplan. Hence this test must be here, not in * ExecInitModifyTable.) */ - if (estate->es_epqTuple != NULL) + if (estate->es_epqTupleSlot != NULL) elog(ERROR, "ModifyTable should not be called during EvalPlanQual"); /* @@ -1982,7 +2001,7 @@ ExecModifyTable(PlanState *pstate) &isNull); /* shouldn't ever get a null result... */ if (isNull) - elog(ERROR, "ctid is NULL"); + elog(PANIC, "ctid is NULL"); tupleid = (ItemPointer) DatumGetPointer(datum); tuple_ctid = *tupleid; /* be sure we don't free ctid!! */ @@ -2123,6 +2142,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans); + mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans); + /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) mtstate->rootResultRelInfo = estate->es_root_result_relations + @@ -2190,7 +2211,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags); mtstate->mt_scans[i] = ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]), - &TTSOpsHeapTuple); + table_slot_callbacks(resultRelInfo->ri_RelationDesc)); /* Also let FDWs init themselves for foreign-table result rels */ if (!resultRelInfo->ri_usesFdwDirectModify && @@ -2250,8 +2271,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (update_tuple_routing_needed) { ExecSetupChildParentMapForSubplan(mtstate); - mtstate->mt_root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel), - &TTSOpsHeapTuple); + mtstate->mt_root_tuple_slot = table_gimmegimmeslot(rel, NULL); } /* @@ -2344,6 +2364,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ExprContext *econtext; TupleDesc relationDesc; TupleDesc tupDesc; + const TupleTableSlotOps *tts_cb; /* insert may only have one plan, inheritance is not expanded */ Assert(nplans == 1); @@ -2354,6 +2375,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) econtext = mtstate->ps.ps_ExprContext; relationDesc = resultRelInfo->ri_RelationDesc->rd_att; + tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc); /* * Initialize slot for the existing tuple. If we'll be performing @@ -2364,7 +2386,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? - NULL : relationDesc, &TTSOpsBufferHeapTuple); + NULL : relationDesc, tts_cb); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; @@ -2385,7 +2407,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? - NULL : tupDesc, &TTSOpsHeapTuple); + NULL : tupDesc, tts_cb); resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc; /* build UPDATE SET projection state */ @@ -2488,15 +2510,18 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) for (i = 0; i < nplans; i++) { JunkFilter *j; + TupleTableSlot *junkresslot; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, subplan->targetlist); + junkresslot = + ExecInitExtraTupleSlot(estate, NULL, + table_slot_callbacks(resultRelInfo->ri_RelationDesc)); j = ExecInitJunkFilter(subplan->targetlist, - ExecInitExtraTupleSlot(estate, NULL, - &TTSOpsHeapTuple)); + junkresslot); if (operation == CMD_UPDATE || operation == CMD_DELETE) { @@ -2540,16 +2565,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } } - /* - * Set up a tuple table slot for use for trigger output tuples. In a plan - * containing multiple ModifyTable nodes, all can share one such slot, so - * we keep it in the estate. The tuple being inserted doesn't come from a - * buffer. - */ - if (estate->es_trig_tuple_slot == NULL) - estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL, - &TTSOpsHeapTuple); - /* * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it * to estate->es_auxmodifytables so that it will be run to completion by diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c index 78735fa15b..5351ec347f 100644 --- a/src/backend/executor/nodeSamplescan.c +++ b/src/backend/executor/nodeSamplescan.c @@ -16,6 +16,7 @@ #include "access/hash.h" #include "access/relscan.h" +#include "access/tableam.h" #include "access/tsmapi.h" #include "executor/executor.h" #include "executor/nodeSamplescan.h" @@ -28,9 +29,7 @@ static TupleTableSlot *SampleNext(SampleScanState *node); static void tablesample_init(SampleScanState *scanstate); -static HeapTuple tablesample_getnext(SampleScanState *scanstate); -static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, - HeapScanDesc scan); +static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate); /* ---------------------------------------------------------------- * Scan Support @@ -46,9 +45,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, static TupleTableSlot * SampleNext(SampleScanState *node) { - HeapTuple tuple; - TupleTableSlot *slot; - /* * if this is first call within a scan, initialize */ @@ -58,18 +54,7 @@ SampleNext(SampleScanState *node) /* * get the next tuple, and store it in our result slot */ - tuple = tablesample_getnext(node); - - slot = node->ss.ss_ScanTupleSlot; - - if (tuple) - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */ - else - ExecClearTuple(slot); - - return slot; + return tablesample_getnext(node); } /* @@ -147,7 +132,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) /* and create slot with appropriate rowtype */ ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(scanstate->ss.ss_currentRelation), - &TTSOpsBufferHeapTuple); + table_slot_callbacks(scanstate->ss.ss_currentRelation)); /* * Initialize result type and projection. @@ -219,7 +204,7 @@ ExecEndSampleScan(SampleScanState *node) * close heap scan */ if (node->ss.ss_currentScanDesc) - heap_endscan(node->ss.ss_currentScanDesc); + table_endscan(node->ss.ss_currentScanDesc); } /* ---------------------------------------------------------------- @@ -234,6 +219,9 @@ ExecReScanSampleScan(SampleScanState *node) { /* Remember we need to do BeginSampleScan again (if we did it at all) */ node->begun = false; + node->done = false; + node->haveblock = false; + node->donetuples = 0; ExecScanReScan(&node->ss); } @@ -255,6 +243,7 @@ tablesample_init(SampleScanState *scanstate) int i; ListCell *arg; + scanstate->donetuples = 0; params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum)); i = 0; @@ -319,19 +308,19 @@ tablesample_init(SampleScanState *scanstate) if (scanstate->ss.ss_currentScanDesc == NULL) { scanstate->ss.ss_currentScanDesc = - heap_beginscan_sampling(scanstate->ss.ss_currentRelation, - scanstate->ss.ps.state->es_snapshot, - 0, NULL, - scanstate->use_bulkread, - allow_sync, - scanstate->use_pagemode); + table_beginscan_sampling(scanstate->ss.ss_currentRelation, + scanstate->ss.ps.state->es_snapshot, + 0, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); } else { - heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, - scanstate->use_bulkread, - allow_sync, - scanstate->use_pagemode); + table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, + scanstate->use_bulkread, + allow_sync, + scanstate->use_pagemode); } pfree(params); @@ -342,224 +331,49 @@ tablesample_init(SampleScanState *scanstate) /* * Get next tuple from TABLESAMPLE method. - * - * Note: an awful lot of this is copied-and-pasted from heapam.c. It would - * perhaps be better to refactor to share more code. */ -static HeapTuple +static TupleTableSlot* tablesample_getnext(SampleScanState *scanstate) { - TsmRoutine *tsm = scanstate->tsmroutine; - HeapScanDesc scan = scanstate->ss.ss_currentScanDesc; - HeapTuple tuple = &(scan->rs_ctup); - Snapshot snapshot = scan->rs_snapshot; - bool pagemode = scan->rs_pageatatime; - BlockNumber blockno; - Page page; - bool all_visible; - OffsetNumber maxoffset; - - if (!scan->rs_inited) - { - /* - * return null immediately if relation is empty - */ - if (scan->rs_nblocks == 0) - { - Assert(!BufferIsValid(scan->rs_cbuf)); - tuple->t_data = NULL; - return NULL; - } - if (tsm->NextSampleBlock) - { - blockno = tsm->NextSampleBlock(scanstate); - if (!BlockNumberIsValid(blockno)) - { - tuple->t_data = NULL; - return NULL; - } - } - else - blockno = scan->rs_startblock; - Assert(blockno < scan->rs_nblocks); - heapgetpage(scan, blockno); - scan->rs_inited = true; - } - else - { - /* continue from previously returned page/tuple */ - blockno = scan->rs_cblock; /* current page */ - } + TableScanDesc scan = scanstate->ss.ss_currentScanDesc; + TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot; - /* - * When not using pagemode, we must lock the buffer during tuple - * visibility checks. - */ - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + ExecClearTuple(slot); - page = (Page) BufferGetPage(scan->rs_cbuf); - all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; - maxoffset = PageGetMaxOffsetNumber(page); + if (scanstate->done) + return NULL; for (;;) { - OffsetNumber tupoffset; - bool finished; - - CHECK_FOR_INTERRUPTS(); - - /* Ask the tablesample method which tuples to check on this page. */ - tupoffset = tsm->NextSampleTuple(scanstate, - blockno, - maxoffset); - - if (OffsetNumberIsValid(tupoffset)) + if (!scanstate->haveblock) { - ItemId itemid; - bool visible; - - /* Skip invalid tuple pointers. */ - itemid = PageGetItemId(page, tupoffset); - if (!ItemIdIsNormal(itemid)) - continue; - - tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple->t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple->t_self), blockno, tupoffset); - - if (all_visible) - visible = true; - else - visible = SampleTupleVisible(tuple, tupoffset, scan); - - /* in pagemode, heapgetpage did this for us */ - if (!pagemode) - CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, - scan->rs_cbuf, snapshot); - - if (visible) - { - /* Found visible tuple, return it. */ - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - break; - } - else + if (!table_scan_sample_next_block(scan, scanstate)) { - /* Try next tuple from same page. */ - continue; - } - } + scanstate->haveblock = false; + scanstate->done = true; - /* - * if we get here, it means we've exhausted the items on this page and - * it's time to move to the next. - */ - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + /* exhausted relation */ + return NULL; + } - if (tsm->NextSampleBlock) - { - blockno = tsm->NextSampleBlock(scanstate); - Assert(!scan->rs_syncscan); - finished = !BlockNumberIsValid(blockno); + scanstate->haveblock = true; } - else - { - /* Without NextSampleBlock, just do a plain forward seqscan. */ - blockno++; - if (blockno >= scan->rs_nblocks) - blockno = 0; + if (!table_scan_sample_next_tuple(scan, scanstate, slot)) + { /* - * Report our new scan position for synchronization purposes. - * - * Note: we do this before checking for end of scan so that the - * final state of the position hint is back at the start of the - * rel. That's not strictly necessary, but otherwise when you run - * the same query multiple times the starting position would shift - * a little bit backwards on every invocation, which is confusing. - * We don't guarantee any specific ordering in general, though. + * If we get here, it means we've exhausted the items on this page + * and it's time to move to the next. */ - if (scan->rs_syncscan) - ss_report_location(scan->rs_rd, blockno); - - finished = (blockno == scan->rs_startblock); + scanstate->haveblock = false; + continue; } - /* - * Reached end of scan? - */ - if (finished) - { - if (BufferIsValid(scan->rs_cbuf)) - ReleaseBuffer(scan->rs_cbuf); - scan->rs_cbuf = InvalidBuffer; - scan->rs_cblock = InvalidBlockNumber; - tuple->t_data = NULL; - scan->rs_inited = false; - return NULL; - } - - Assert(blockno < scan->rs_nblocks); - heapgetpage(scan, blockno); - - /* Re-establish state for new page */ - if (!pagemode) - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - - page = (Page) BufferGetPage(scan->rs_cbuf); - all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery; - maxoffset = PageGetMaxOffsetNumber(page); + /* Found visible tuple, return it. */ + break; } - /* Count successfully-fetched tuples as heap fetches */ - pgstat_count_heap_getnext(scan->rs_rd); - - return &(scan->rs_ctup); -} + scanstate->donetuples++; -/* - * Check visibility of the tuple. - */ -static bool -SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan) -{ - if (scan->rs_pageatatime) - { - /* - * In pageatatime mode, heapgetpage() already did visibility checks, - * so just look at the info it left in rs_vistuples[]. - * - * We use a binary search over the known-sorted array. Note: we could - * save some effort if we insisted that NextSampleTuple select tuples - * in increasing order, but it's not clear that there would be enough - * gain to justify the restriction. - */ - int start = 0, - end = scan->rs_ntuples - 1; - - while (start <= end) - { - int mid = (start + end) / 2; - OffsetNumber curoffset = scan->rs_vistuples[mid]; - - if (tupoffset == curoffset) - return true; - else if (tupoffset < curoffset) - end = mid - 1; - else - start = mid + 1; - } - - return false; - } - else - { - /* Otherwise, we have to check the tuple individually. */ - return HeapTupleSatisfiesVisibility(tuple, - scan->rs_snapshot, - scan->rs_cbuf); - } + return slot; } diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 55377add6e..a6e89bf2af 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -28,6 +28,7 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/tableam.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" #include "utils/rel.h" @@ -48,8 +49,7 @@ static TupleTableSlot *SeqNext(SeqScanState *node); static TupleTableSlot * SeqNext(SeqScanState *node) { - HeapTuple tuple; - HeapScanDesc scandesc; + TableScanDesc scandesc; EState *estate; ScanDirection direction; TupleTableSlot *slot; @@ -68,34 +68,16 @@ SeqNext(SeqScanState *node) * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ - scandesc = heap_beginscan(node->ss.ss_currentRelation, - estate->es_snapshot, - 0, NULL); + scandesc = table_beginscan(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL); node->ss.ss_currentScanDesc = scandesc; } /* * get the next tuple from the table */ - tuple = heap_getnext(scandesc, direction); - - /* - * save the tuple and the buffer returned to us by the access methods in - * our scan tuple slot and return the slot. Note: we pass 'false' because - * tuples returned by heap_getnext() are pointers onto disk pages and were - * not created with palloc() and so should not be pfree()'d. Note also - * that ExecStoreHeapTuple will increment the refcount of the buffer; the - * refcount will not be dropped until the tuple table slot is cleared. - */ - if (tuple) - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->rs_cbuf); /* buffer associated - * with this tuple */ - else - ExecClearTuple(slot); - - return slot; + return table_scan_getnextslot(scandesc, direction, slot); } /* @@ -173,7 +155,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags) /* and create slot with the appropriate rowtype */ ExecInitScanTupleSlot(estate, &scanstate->ss, RelationGetDescr(scanstate->ss.ss_currentRelation), - &TTSOpsBufferHeapTuple); + table_slot_callbacks(scanstate->ss.ss_currentRelation)); /* * Initialize result type and projection. @@ -199,7 +181,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags) void ExecEndSeqScan(SeqScanState *node) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; /* * get information from node @@ -222,7 +204,7 @@ ExecEndSeqScan(SeqScanState *node) * close heap scan */ if (scanDesc != NULL) - heap_endscan(scanDesc); + table_endscan(scanDesc); } /* ---------------------------------------------------------------- @@ -239,13 +221,13 @@ ExecEndSeqScan(SeqScanState *node) void ExecReScanSeqScan(SeqScanState *node) { - HeapScanDesc scan; + TableScanDesc scan; scan = node->ss.ss_currentScanDesc; if (scan != NULL) - heap_rescan(scan, /* scan desc */ - NULL); /* new scan keys */ + table_rescan(scan, /* scan desc */ + NULL); /* new scan keys */ ExecScanReScan((ScanState *) node); } @@ -268,7 +250,7 @@ ExecSeqScanEstimate(SeqScanState *node, { EState *estate = node->ss.ps.state; - node->pscan_len = heap_parallelscan_estimate(estate->es_snapshot); + node->pscan_len = table_parallelscan_estimate(estate->es_snapshot); shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -284,15 +266,15 @@ ExecSeqScanInitializeDSM(SeqScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; - ParallelHeapScanDesc pscan; + ParallelTableScanDesc pscan; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); - heap_parallelscan_initialize(pscan, - node->ss.ss_currentRelation, - estate->es_snapshot); + table_parallelscan_initialize(pscan, + node->ss.ss_currentRelation, + estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan); } /* ---------------------------------------------------------------- @@ -305,9 +287,10 @@ void ExecSeqScanReInitializeDSM(SeqScanState *node, ParallelContext *pcxt) { - HeapScanDesc scan = node->ss.ss_currentScanDesc; + ParallelTableScanDesc pscan; - heap_parallelscan_reinitialize(scan->rs_parallel); + pscan = node->ss.ss_currentScanDesc->rs_parallel; + table_parallelscan_reinitialize(pscan); } /* ---------------------------------------------------------------- @@ -320,9 +303,9 @@ void ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { - ParallelHeapScanDesc pscan; + ParallelTableScanDesc pscan; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - heap_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan); } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index afec097bc8..c8bcf97f1d 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -22,6 +22,7 @@ */ #include "postgres.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "catalog/pg_type.h" #include "executor/execdebug.h" @@ -306,9 +307,7 @@ TidNext(TidScanState *node) ScanDirection direction; Snapshot snapshot; Relation heapRelation; - HeapTuple tuple; TupleTableSlot *slot; - Buffer buffer = InvalidBuffer; ItemPointerData *tidList; int numTids; bool bBackward; @@ -331,12 +330,6 @@ TidNext(TidScanState *node) tidList = node->tss_TidList; numTids = node->tss_NumTids; - /* - * We use node->tss_htup as the tuple pointer; note this can't just be a - * local variable here, as the scan tuple slot will keep a pointer to it. - */ - tuple = &(node->tss_htup); - /* * Initialize or advance scan position, depending on direction. */ @@ -364,7 +357,7 @@ TidNext(TidScanState *node) while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) { - tuple->t_self = tidList[node->tss_TidPtr]; + ItemPointerData tid = tidList[node->tss_TidPtr]; /* * For WHERE CURRENT OF, the tuple retrieved from the cursor might @@ -372,28 +365,11 @@ TidNext(TidScanState *node) * current according to our snapshot. */ if (node->tss_isCurrentOf) - heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self); - - if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL)) - { - /* - * Store the scanned tuple in the scan tuple slot of the scan - * state. Eventually we will only do this and not return a tuple. - */ - ExecStoreBufferHeapTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - buffer); /* buffer associated with - * tuple */ - - /* - * At this point we have an extra pin on the buffer, because - * ExecStoreHeapTuple incremented the pin count. Drop our local - * pin. - */ - ReleaseBuffer(buffer); + table_get_latest_tid(heapRelation, snapshot, &tid); + if (table_fetch_row_version(heapRelation, &tid, snapshot, slot, NULL)) return slot; - } + /* Bad TID or failed snapshot qual; try next */ if (bBackward) node->tss_TidPtr--; @@ -544,7 +520,7 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags) */ ExecInitScanTupleSlot(estate, &tidstate->ss, RelationGetDescr(currentRelation), - &TTSOpsBufferHeapTuple); + table_slot_callbacks(currentRelation)); /* * Initialize result type and projection. diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index ad726676d8..34664e76d1 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -762,7 +762,7 @@ HeapTuple SPI_copytuple(HeapTuple tuple) { MemoryContext oldcxt; - HeapTuple ctuple; + HeapTuple ctuple; if (tuple == NULL) { @@ -983,7 +983,7 @@ char * SPI_gettype(TupleDesc tupdesc, int fnumber) { Oid typoid; - HeapTuple typeTuple; + HeapTuple typeTuple; char *result; SPI_result = 0; @@ -1844,7 +1844,7 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self) tuptable->free = tuptable->alloced; tuptable->alloced += tuptable->free; tuptable->vals = (HeapTuple *) repalloc_huge(tuptable->vals, - tuptable->alloced * sizeof(HeapTuple)); + tuptable->alloced * sizeof(HeapTuple)); } tuptable->vals[tuptable->alloced - tuptable->free] = diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c index e47ef49192..e2b596cf74 100644 --- a/src/backend/executor/tqueue.c +++ b/src/backend/executor/tqueue.c @@ -59,6 +59,8 @@ tqueueReceiveSlot(TupleTableSlot *slot, DestReceiver *self) bool should_free; /* Send the tuple itself. */ + // PBORKED: this shouldn't rely on heaptuples. If we need it to be tuple + // formed, it should be a minimal tuple. tuple = ExecFetchSlotHeapTuple(slot, true, &should_free); result = shm_mq_send(tqueue->queue, tuple->t_len, tuple->t_data, false); diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index db49968409..2848634e00 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3332,6 +3332,7 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode) COPY_NODE_FIELD(options); COPY_SCALAR_FIELD(oncommit); COPY_STRING_FIELD(tablespacename); + COPY_STRING_FIELD(accessMethod); COPY_SCALAR_FIELD(if_not_exists); } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index a570ac0aab..58e8c7cc5f 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -21,6 +21,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/nbtree.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/transam.h" #include "access/xlog.h" @@ -271,7 +272,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amsearchnulls = amroutine->amsearchnulls; info->amcanparallel = amroutine->amcanparallel; info->amhasgettuple = (amroutine->amgettuple != NULL); - info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); + info->amhasgetbitmap = ((amroutine->amgetbitmap != NULL) + && (relation->rd_tableamroutine->scan_bitmap_pagescan != NULL)); info->amcostestimate = amroutine->amcostestimate; Assert(info->amcostestimate != NULL); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 2c2208ffb7..71bedf34f3 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -48,6 +48,7 @@ #include #include +#include "access/tableam.h" #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/pg_am.h" @@ -322,6 +323,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type OptSchemaName %type OptSchemaEltList +%type am_type + %type TriggerForSpec TriggerForType %type TriggerActionTime %type TriggerEvents TriggerOneEvent @@ -337,7 +340,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type copy_file_name database_name access_method_clause access_method attr_name - name cursor_name file_name + table_access_method_clause name cursor_name file_name index_name opt_index_name cluster_index_specification %type func_name handler_name qual_Op qual_all_Op subquery_Op @@ -3170,7 +3173,8 @@ copy_generic_opt_arg_list_item: *****************************************************************************/ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' - OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace + OptInherit OptPartitionSpec table_access_method_clause OptWith + OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; @@ -3180,15 +3184,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->partspec = $9; n->ofTypename = NULL; n->constraints = NIL; - n->options = $10; - n->oncommit = $11; - n->tablespacename = $12; + n->accessMethod = $10; + n->options = $11; + n->oncommit = $12; + n->tablespacename = $13; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '(' - OptTableElementList ')' OptInherit OptPartitionSpec OptWith - OnCommitOption OptTableSpace + OptTableElementList ')' OptInherit OptPartitionSpec table_access_method_clause + OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; @@ -3198,15 +3203,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->partspec = $12; n->ofTypename = NULL; n->constraints = NIL; - n->options = $13; - n->oncommit = $14; - n->tablespacename = $15; + n->accessMethod = $13; + n->options = $14; + n->oncommit = $15; + n->tablespacename = $16; n->if_not_exists = true; $$ = (Node *)n; } | CREATE OptTemp TABLE qualified_name OF any_name - OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption - OptTableSpace + OptTypedTableElementList OptPartitionSpec table_access_method_clause + OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; @@ -3217,15 +3223,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->ofTypename = makeTypeNameFromNameList($6); n->ofTypename->location = @6; n->constraints = NIL; - n->options = $9; - n->oncommit = $10; - n->tablespacename = $11; + n->accessMethod = $9; + n->options = $10; + n->oncommit = $11; + n->tablespacename = $12; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name - OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption - OptTableSpace + OptTypedTableElementList OptPartitionSpec table_access_method_clause + OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; @@ -3236,15 +3243,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->ofTypename = makeTypeNameFromNameList($9); n->ofTypename->location = @9; n->constraints = NIL; - n->options = $12; - n->oncommit = $13; - n->tablespacename = $14; + n->accessMethod = $12; + n->options = $13; + n->oncommit = $14; + n->tablespacename = $15; n->if_not_exists = true; $$ = (Node *)n; } | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name - OptTypedTableElementList PartitionBoundSpec OptPartitionSpec OptWith - OnCommitOption OptTableSpace + OptTypedTableElementList PartitionBoundSpec OptPartitionSpec + table_access_method_clause OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $4->relpersistence = $2; @@ -3255,15 +3263,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->partspec = $10; n->ofTypename = NULL; n->constraints = NIL; - n->options = $11; - n->oncommit = $12; - n->tablespacename = $13; + n->accessMethod = $11; + n->options = $12; + n->oncommit = $13; + n->tablespacename = $14; n->if_not_exists = false; $$ = (Node *)n; } | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF qualified_name OptTypedTableElementList PartitionBoundSpec OptPartitionSpec - OptWith OnCommitOption OptTableSpace + table_access_method_clause OptWith OnCommitOption OptTableSpace { CreateStmt *n = makeNode(CreateStmt); $7->relpersistence = $2; @@ -3274,9 +3283,10 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' n->partspec = $13; n->ofTypename = NULL; n->constraints = NIL; - n->options = $14; - n->oncommit = $15; - n->tablespacename = $16; + n->accessMethod = $14; + n->options = $15; + n->oncommit = $16; + n->tablespacename = $17; n->if_not_exists = true; $$ = (Node *)n; } @@ -3921,6 +3931,12 @@ part_elem: ColId opt_collate opt_class $$ = n; } ; + +table_access_method_clause: + USING access_method { $$ = $2; } + | /*EMPTY*/ { $$ = NULL; } + ; + /* WITHOUT OIDS is legacy only */ OptWith: WITH reloptions { $$ = $2; } @@ -4026,14 +4042,16 @@ CreateAsStmt: ; create_as_target: - qualified_name opt_column_list OptWith OnCommitOption OptTableSpace + qualified_name opt_column_list table_access_method_clause + OptWith OnCommitOption OptTableSpace { $$ = makeNode(IntoClause); $$->rel = $1; $$->colNames = $2; - $$->options = $3; - $$->onCommit = $4; - $$->tableSpaceName = $5; + $$->accessMethod = $3; + $$->options = $4; + $$->onCommit = $5; + $$->tableSpaceName = $6; $$->viewQuery = NULL; $$->skipData = false; /* might get changed later */ } @@ -4083,14 +4101,15 @@ CreateMatViewStmt: ; create_mv_target: - qualified_name opt_column_list opt_reloptions OptTableSpace + qualified_name opt_column_list table_access_method_clause opt_reloptions OptTableSpace { $$ = makeNode(IntoClause); $$->rel = $1; $$->colNames = $2; - $$->options = $3; + $$->accessMethod = $3; + $$->options = $4; $$->onCommit = ONCOMMIT_NOOP; - $$->tableSpaceName = $4; + $$->tableSpaceName = $5; $$->viewQuery = NULL; /* filled at analysis time */ $$->skipData = false; /* might get changed later */ } @@ -5298,16 +5317,21 @@ row_security_cmd: * *****************************************************************************/ -CreateAmStmt: CREATE ACCESS METHOD name TYPE_P INDEX HANDLER handler_name +CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name { CreateAmStmt *n = makeNode(CreateAmStmt); n->amname = $4; n->handler_name = $8; - n->amtype = AMTYPE_INDEX; + n->amtype = $6; $$ = (Node *) n; } ; +am_type: + INDEX { $$ = AMTYPE_INDEX; } + | TABLE { $$ = AMTYPE_TABLE; } + ; + /***************************************************************************** * * QUERIES : diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index eeaab2f4c9..cb9e252f41 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "access/tableam.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" @@ -1200,12 +1201,10 @@ check_default_partition_contents(Relation parent, Relation default_rel, Expr *constr; Expr *partition_constraint; EState *estate; - HeapTuple tuple; ExprState *partqualstate = NULL; Snapshot snapshot; - TupleDesc tupdesc; ExprContext *econtext; - HeapScanDesc scan; + TableScanDesc scan; MemoryContext oldCxt; TupleTableSlot *tupslot; @@ -1252,7 +1251,6 @@ check_default_partition_contents(Relation parent, Relation default_rel, continue; } - tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel)); constr = linitial(def_part_constraints); partition_constraint = (Expr *) map_partition_varattnos((List *) constr, @@ -1264,8 +1262,8 @@ check_default_partition_contents(Relation parent, Relation default_rel, econtext = GetPerTupleExprContext(estate); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(part_rel, snapshot, 0, NULL); - tupslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple); + scan = table_beginscan(part_rel, snapshot, 0, NULL); + tupslot = table_gimmegimmeslot(parent, &estate->es_tupleTable); /* * Switch to per-tuple memory context and reset it for each tuple @@ -1273,9 +1271,8 @@ check_default_partition_contents(Relation parent, Relation default_rel, */ oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot)) { - ExecStoreHeapTuple(tuple, tupslot, false); econtext->ecxt_scantuple = tupslot; if (!ExecCheck(partqualstate, econtext)) @@ -1289,7 +1286,7 @@ check_default_partition_contents(Relation parent, Relation default_rel, } MemoryContextSwitchTo(oldCxt); - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); ExecDropSingleTupleTableSlot(tupslot); FreeExecutorState(estate); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2d5086d406..cb7ee7d53a 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -69,6 +69,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/reloptions.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/dependency.h" @@ -1866,7 +1867,7 @@ get_database_list(void) { List *dblist = NIL; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; MemoryContext resultcxt; @@ -1884,9 +1885,9 @@ get_database_list(void) (void) GetTransactionSnapshot(); rel = heap_open(DatabaseRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection))) { Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup); avw_dbase *avdb; @@ -1913,7 +1914,7 @@ get_database_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); CommitTransactionCommand(); @@ -1932,7 +1933,7 @@ do_autovacuum(void) { Relation classRel; HeapTuple tuple; - HeapScanDesc relScan; + TableScanDesc relScan; Form_pg_database dbForm; List *table_oids = NIL; List *orphan_oids = NIL; @@ -2044,13 +2045,13 @@ do_autovacuum(void) * wide tables there might be proportionally much more activity in the * TOAST table than in its parent. */ - relScan = heap_beginscan_catalog(classRel, 0, NULL); + relScan = table_beginscan_catalog(classRel, 0, NULL); /* * On the first pass, we collect main tables to vacuum, and also the main * table relid to TOAST relid mapping. */ - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); PgStat_StatTabEntry *tabentry; @@ -2133,7 +2134,7 @@ do_autovacuum(void) } } - heap_endscan(relScan); + table_endscan(relScan); /* second pass: check TOAST tables */ ScanKeyInit(&key, @@ -2141,8 +2142,8 @@ do_autovacuum(void) BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(RELKIND_TOASTVALUE)); - relScan = heap_beginscan_catalog(classRel, 1, &key); - while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL) + relScan = table_beginscan_catalog(classRel, 1, &key); + while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL) { Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); PgStat_StatTabEntry *tabentry; @@ -2188,7 +2189,7 @@ do_autovacuum(void) table_oids = lappend_oid(table_oids, relid); } - heap_endscan(relScan); + table_endscan(relScan); heap_close(classRel, AccessShareLock); /* diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 8676088e57..7762dbc44b 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -36,6 +36,7 @@ #include "access/heapam.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/twophase_rmgr.h" #include "access/xact.h" @@ -1206,7 +1207,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) HTAB *htab; HASHCTL hash_ctl; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; Snapshot snapshot; @@ -1221,8 +1222,8 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) rel = heap_open(catalogid, AccessShareLock); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scan = heap_beginscan(rel, snapshot, 0, NULL); - while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL) + scan = table_beginscan(rel, snapshot, 0, NULL); + while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL) { Oid thisoid; bool isnull; @@ -1234,7 +1235,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid) (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL); } - heap_endscan(scan); + table_endscan(scan); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 3a84d8ca86..8d452474c1 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -24,6 +24,7 @@ #include "access/heapam.h" #include "access/htup.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/pg_subscription.h" @@ -118,7 +119,7 @@ get_subscription_list(void) { List *res = NIL; Relation rel; - HeapScanDesc scan; + TableScanDesc scan; HeapTuple tup; MemoryContext resultcxt; @@ -136,9 +137,9 @@ get_subscription_list(void) (void) GetTransactionSnapshot(); rel = heap_open(SubscriptionRelationId, AccessShareLock); - scan = heap_beginscan_catalog(rel, 0, NULL); + scan = table_beginscan_catalog(rel, 0, NULL); - while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection))) { Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup); Subscription *sub; @@ -164,7 +165,7 @@ get_subscription_list(void) MemoryContextSwitchTo(oldcxt); } - heap_endscan(scan); + table_endscan(scan); heap_close(rel, AccessShareLock); CommitTransactionCommand(); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 363ddf4505..363b82e1b5 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -376,7 +376,7 @@ static void SnapBuildFreeSnapshot(Snapshot snap) { /* make sure we don't get passed an external snapshot */ - Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC); + Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY); /* make sure nobody modified our snapshot */ Assert(snap->curcid == FirstCommandId); @@ -434,7 +434,7 @@ void SnapBuildSnapDecRefcount(Snapshot snap) { /* make sure we don't get passed an external snapshot */ - Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC); + Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY); /* make sure nobody modified our snapshot */ Assert(snap->curcid == FirstCommandId); @@ -476,7 +476,7 @@ SnapBuildBuildSnapshot(SnapBuild *builder) snapshot = MemoryContextAllocZero(builder->context, ssize); - snapshot->satisfies = HeapTupleSatisfiesHistoricMVCC; + snapshot->visibility_type = HISTORIC_MVCC_VISIBILITY; /* * We misuse the original meaning of SnapshotData's xip and subxip fields diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 893f1f008e..247e03aec4 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -27,6 +27,7 @@ #include "pgstat.h" #include "funcapi.h" +#include "access/tableam.h" #include "access/xact.h" #include "access/xlog_internal.h" @@ -211,11 +212,6 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) estate->es_output_cid = GetCurrentCommandId(true); - /* Triggers might need a slot */ - if (resultRelInfo->ri_TrigDesc) - estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL, - &TTSOpsVirtual); - /* Prepare to catch AFTER triggers. */ AfterTriggerBeginQuery(); @@ -718,10 +714,8 @@ apply_handle_update(StringInfo s) estate = create_estate_for_relation(rel); remoteslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel), - &TTSOpsHeapTuple); - localslot = ExecInitExtraTupleSlot(estate, - RelationGetDescr(rel->localrel), - &TTSOpsHeapTuple); + &TTSOpsVirtual); + localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); PushActiveSnapshot(GetTransactionSnapshot()); @@ -839,9 +833,7 @@ apply_handle_delete(StringInfo s) remoteslot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel->localrel), &TTSOpsVirtual); - localslot = ExecInitExtraTupleSlot(estate, - RelationGetDescr(rel->localrel), - &TTSOpsHeapTuple); + localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable); EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); PushActiveSnapshot(GetTransactionSnapshot()); diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 1061947950..2001190e27 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -17,6 +17,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -424,8 +425,9 @@ DefineQueryRewrite(const char *rulename, if (event_relation->rd_rel->relkind != RELKIND_VIEW && event_relation->rd_rel->relkind != RELKIND_MATVIEW) { - HeapScanDesc scanDesc; + TableScanDesc scanDesc; Snapshot snapshot; + TupleTableSlot *slot; if (event_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ereport(ERROR, @@ -440,13 +442,15 @@ DefineQueryRewrite(const char *rulename, RelationGetRelationName(event_relation)))); snapshot = RegisterSnapshot(GetLatestSnapshot()); - scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL); - if (heap_getnext(scanDesc, ForwardScanDirection) != NULL) + scanDesc = table_beginscan(event_relation, snapshot, 0, NULL); + slot = table_gimmegimmeslot(event_relation, NULL); + if (table_scan_getnextslot(scanDesc, ForwardScanDirection, slot)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("could not convert table \"%s\" to a view because it is not empty", RelationGetRelationName(event_relation)))); - heap_endscan(scanDesc); + ExecDropSingleTupleTableSlot(slot); + table_endscan(scanDesc); UnregisterSnapshot(snapshot); if (event_relation->rd_rel->relhastriggers) diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index e8390311d0..2960e21340 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -188,6 +188,7 @@ #include "access/htup_details.h" #include "access/slru.h" #include "access/subtrans.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/twophase.h" #include "access/twophase_rmgr.h" diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c index dbe67cdb4c..89aac13c80 100644 --- a/src/backend/utils/adt/pseudotypes.c +++ b/src/backend/utils/adt/pseudotypes.c @@ -418,3 +418,4 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(internal); PSEUDOTYPE_DUMMY_IO_FUNCS(opaque); PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement); PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray); +PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler); diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index cdda860e73..747602b571 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -31,6 +31,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "catalog/pg_collation.h" @@ -191,7 +192,7 @@ static int ri_constraint_cache_valid_count = 0; * ---------- */ static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, - HeapTuple old_row, + TupleTableSlot *oldslot, const RI_ConstraintInfo *riinfo); static Datum ri_restrict(TriggerData *trigdata, bool is_no_action); static Datum ri_setnull(TriggerData *trigdata); @@ -204,12 +205,12 @@ static void ri_GenerateQual(StringInfo buf, Oid opoid, const char *rightop, Oid rightoptype); static void ri_GenerateQualCollation(StringInfo buf, Oid collation); -static int ri_NullCheck(TupleDesc tupdesc, HeapTuple tup, +static int ri_NullCheck(TupleDesc tupdesc, TupleTableSlot *slot, const RI_ConstraintInfo *riinfo, bool rel_is_pk); static void ri_BuildQueryKey(RI_QueryKey *key, const RI_ConstraintInfo *riinfo, int32 constr_queryno); -static bool ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup, +static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, const RI_ConstraintInfo *riinfo, bool rel_is_pk); static bool ri_AttributesEqual(Oid eq_opr, Oid typeid, Datum oldvalue, Datum newvalue); @@ -231,14 +232,14 @@ static SPIPlanPtr ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes, static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo, RI_QueryKey *qkey, SPIPlanPtr qplan, Relation fk_rel, Relation pk_rel, - HeapTuple old_tuple, HeapTuple new_tuple, + TupleTableSlot *oldslot, TupleTableSlot *newslot, bool detectNewRows, int expect_OK); -static void ri_ExtractValues(Relation rel, HeapTuple tup, +static void ri_ExtractValues(Relation rel, TupleTableSlot *slot, const RI_ConstraintInfo *riinfo, bool rel_is_pk, Datum *vals, char *nulls); static void ri_ReportViolation(const RI_ConstraintInfo *riinfo, Relation pk_rel, Relation fk_rel, - HeapTuple violator, TupleDesc tupdesc, + TupleTableSlot *violator, TupleDesc tupdesc, int queryno) pg_attribute_noreturn(); @@ -254,8 +255,11 @@ RI_FKey_check(TriggerData *trigdata) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; +#if 0 HeapTuple new_row; Buffer new_row_buf; +#endif + TupleTableSlot *newslot; RI_QueryKey qkey; SPIPlanPtr qplan; int i; @@ -267,15 +271,9 @@ RI_FKey_check(TriggerData *trigdata) trigdata->tg_relation, false); if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - { - new_row = trigdata->tg_newtuple; - new_row_buf = trigdata->tg_newtuplebuf; - } + newslot = trigdata->tg_newslot; else - { - new_row = trigdata->tg_trigtuple; - new_row_buf = trigdata->tg_trigtuplebuf; - } + newslot = trigdata->tg_trigslot; /* * We should not even consider checking the row if it is no longer valid, @@ -285,13 +283,8 @@ RI_FKey_check(TriggerData *trigdata) * and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller * should be holding pin, but not lock. */ - LockBuffer(new_row_buf, BUFFER_LOCK_SHARE); - if (!HeapTupleSatisfiesVisibility(new_row, SnapshotSelf, new_row_buf)) - { - LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK); + if (!table_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf)) return PointerGetDatum(NULL); - } - LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK); /* * Get the relation descriptors of the FK and PK tables. @@ -307,7 +300,7 @@ RI_FKey_check(TriggerData *trigdata) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("MATCH PARTIAL not yet implemented"))); - switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false)) + switch (ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false)) { case RI_KEYS_ALL_NULL: @@ -437,7 +430,7 @@ RI_FKey_check(TriggerData *trigdata) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - NULL, new_row, + NULL, newslot, false, SPI_OK_SELECT); @@ -505,7 +498,7 @@ RI_FKey_check_upd(PG_FUNCTION_ARGS) */ static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, - HeapTuple old_row, + TupleTableSlot *oldslot, const RI_ConstraintInfo *riinfo) { SPIPlanPtr qplan; @@ -514,7 +507,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, bool result; /* Only called for non-null rows */ - Assert(ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) == RI_KEYS_NONE_NULL); + Assert(ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) == RI_KEYS_NONE_NULL); if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "SPI_connect failed"); @@ -572,7 +565,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel, */ result = ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, NULL, + oldslot, NULL, true, /* treat like update */ SPI_OK_SELECT); @@ -690,7 +683,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; - HeapTuple old_row; + TupleTableSlot *old_slot; RI_QueryKey qkey; SPIPlanPtr qplan; @@ -708,7 +701,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) */ fk_rel = heap_open(riinfo->fk_relid, RowShareLock); pk_rel = trigdata->tg_relation; - old_row = trigdata->tg_trigtuple; + old_slot = trigdata->tg_trigslot; switch (riinfo->confmatchtype) { @@ -732,7 +725,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) * allow another row to be substituted. */ if (is_no_action && - ri_Check_Pk_Match(pk_rel, fk_rel, old_row, riinfo)) + ri_Check_Pk_Match(pk_rel, fk_rel, old_slot, riinfo)) { heap_close(fk_rel, RowShareLock); return PointerGetDatum(NULL); @@ -800,7 +793,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, NULL, + old_slot, NULL, true, /* must detect new rows */ SPI_OK_SELECT); @@ -844,7 +837,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; - HeapTuple old_row; + TupleTableSlot *old_slot; RI_QueryKey qkey; SPIPlanPtr qplan; int i; @@ -868,7 +861,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) */ fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock); pk_rel = trigdata->tg_relation; - old_row = trigdata->tg_trigtuple; + old_slot = trigdata->tg_trigslot; switch (riinfo->confmatchtype) { @@ -940,7 +933,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, NULL, + old_slot, NULL, true, /* must detect new rows */ SPI_OK_DELETE); @@ -984,8 +977,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; - HeapTuple new_row; - HeapTuple old_row; + TupleTableSlot *new_slot; + TupleTableSlot *old_slot; RI_QueryKey qkey; SPIPlanPtr qplan; int i; @@ -1011,8 +1004,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) */ fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock); pk_rel = trigdata->tg_relation; - new_row = trigdata->tg_newtuple; - old_row = trigdata->tg_trigtuple; + new_slot = trigdata->tg_newslot; + old_slot = trigdata->tg_trigslot; switch (riinfo->confmatchtype) { @@ -1096,7 +1089,7 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, new_row, + old_slot, new_slot, true, /* must detect new rows */ SPI_OK_UPDATE); @@ -1179,7 +1172,7 @@ ri_setnull(TriggerData *trigdata) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; - HeapTuple old_row; + TupleTableSlot *old_slot; RI_QueryKey qkey; SPIPlanPtr qplan; int i; @@ -1198,7 +1191,7 @@ ri_setnull(TriggerData *trigdata) */ fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock); pk_rel = trigdata->tg_relation; - old_row = trigdata->tg_trigtuple; + old_slot = trigdata->tg_trigslot; switch (riinfo->confmatchtype) { @@ -1283,7 +1276,7 @@ ri_setnull(TriggerData *trigdata) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, NULL, + old_slot, NULL, true, /* must detect new rows */ SPI_OK_UPDATE); @@ -1366,7 +1359,7 @@ ri_setdefault(TriggerData *trigdata) const RI_ConstraintInfo *riinfo; Relation fk_rel; Relation pk_rel; - HeapTuple old_row; + TupleTableSlot *old_slot; RI_QueryKey qkey; SPIPlanPtr qplan; @@ -1384,7 +1377,7 @@ ri_setdefault(TriggerData *trigdata) */ fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock); pk_rel = trigdata->tg_relation; - old_row = trigdata->tg_trigtuple; + old_slot = trigdata->tg_trigslot; switch (riinfo->confmatchtype) { @@ -1470,7 +1463,7 @@ ri_setdefault(TriggerData *trigdata) */ ri_PerformCheck(riinfo, &qkey, qplan, fk_rel, pk_rel, - old_row, NULL, + old_slot, NULL, true, /* must detect new rows */ SPI_OK_UPDATE); @@ -1529,7 +1522,7 @@ ri_setdefault(TriggerData *trigdata) */ bool RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel, - HeapTuple old_row, HeapTuple new_row) + TupleTableSlot *old_slot, TupleTableSlot *new_slot) { const RI_ConstraintInfo *riinfo; @@ -1547,11 +1540,11 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel, * If any old key value is NULL, the row could not have been * referenced by an FK row, so no check is needed. */ - if (ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) != RI_KEYS_NONE_NULL) + if (ri_NullCheck(RelationGetDescr(pk_rel), old_slot, riinfo, true) != RI_KEYS_NONE_NULL) return false; /* If all old and new key values are equal, no check is needed */ - if (new_row && ri_KeysEqual(pk_rel, old_row, new_row, riinfo, true)) + if (new_slot && ri_KeysEqual(pk_rel, old_slot, new_slot, riinfo, true)) return false; /* Else we need to fire the trigger. */ @@ -1586,9 +1579,12 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel, */ bool RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, - HeapTuple old_row, HeapTuple new_row) + TupleTableSlot *old_slot, TupleTableSlot *new_slot) { const RI_ConstraintInfo *riinfo; + Datum xminDatum; + TransactionId xmin; + bool isnull; /* * Get arguments. @@ -1603,7 +1599,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, * If any new key value is NULL, the row must satisfy the * constraint, so no check is needed. */ - if (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false) != RI_KEYS_NONE_NULL) + if (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false) != RI_KEYS_NONE_NULL) return false; /* @@ -1614,11 +1610,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data))) + xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + if (TransactionIdIsCurrentTransactionId(xmin)) return true; /* If all old and new key values are equal, no check is needed */ - if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false)) + if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false)) return false; /* Else we need to fire the trigger. */ @@ -1634,7 +1633,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, * invalidated before the constraint is to be checked, but we * should queue the event to apply the check later. */ - switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false)) + switch (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false)) { case RI_KEYS_ALL_NULL: return false; @@ -1652,11 +1651,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data))) + xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull); + Assert(!isnull); + xmin = DatumGetTransactionId(xminDatum); + if (TransactionIdIsCurrentTransactionId(xmin)) return true; /* If all old and new key values are equal, no check is needed */ - if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false)) + if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false)) return false; /* Else we need to fire the trigger. */ @@ -1910,10 +1912,17 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) /* Did we find a tuple violating the constraint? */ if (SPI_processed > 0) { + TupleTableSlot *slot; HeapTuple tuple = SPI_tuptable->vals[0]; TupleDesc tupdesc = SPI_tuptable->tupdesc; RI_ConstraintInfo fake_riinfo; + slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual); + + heap_deform_tuple(tuple, tupdesc, + slot->tts_values, slot->tts_isnull); + ExecStoreVirtualTuple(slot); + /* * The columns to look at in the result tuple are 1..N, not whatever * they are in the fk_rel. Hack up riinfo so that the subroutines @@ -1933,7 +1942,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) * disallows partially-null FK rows. */ if (fake_riinfo.confmatchtype == FKCONSTR_MATCH_FULL && - ri_NullCheck(tupdesc, tuple, &fake_riinfo, false) != RI_KEYS_NONE_NULL) + ri_NullCheck(tupdesc, slot, &fake_riinfo, false) != RI_KEYS_NONE_NULL) ereport(ERROR, (errcode(ERRCODE_FOREIGN_KEY_VIOLATION), errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"", @@ -1950,8 +1959,10 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) */ ri_ReportViolation(&fake_riinfo, pk_rel, fk_rel, - tuple, tupdesc, + slot, tupdesc, RI_PLAN_CHECK_LOOKUPPK); + + ExecDropSingleTupleTableSlot(slot); } if (SPI_finish() != SPI_OK_FINISH) @@ -2429,7 +2440,7 @@ static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo, RI_QueryKey *qkey, SPIPlanPtr qplan, Relation fk_rel, Relation pk_rel, - HeapTuple old_tuple, HeapTuple new_tuple, + TupleTableSlot *old_slot, TupleTableSlot *new_slot, bool detectNewRows, int expect_OK) { Relation query_rel, @@ -2472,17 +2483,17 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo, } /* Extract the parameters to be passed into the query */ - if (new_tuple) + if (new_slot) { - ri_ExtractValues(source_rel, new_tuple, riinfo, source_is_pk, + ri_ExtractValues(source_rel, new_slot, riinfo, source_is_pk, vals, nulls); - if (old_tuple) - ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk, + if (old_slot) + ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk, vals + riinfo->nkeys, nulls + riinfo->nkeys); } else { - ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk, + ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk, vals, nulls); } @@ -2552,7 +2563,7 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo, (SPI_processed == 0) == (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK)) ri_ReportViolation(riinfo, pk_rel, fk_rel, - new_tuple ? new_tuple : old_tuple, + new_slot ? new_slot : old_slot, NULL, qkey->constr_queryno); @@ -2563,11 +2574,10 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo, * Extract fields from a tuple into Datum/nulls arrays */ static void -ri_ExtractValues(Relation rel, HeapTuple tup, +ri_ExtractValues(Relation rel, TupleTableSlot *slot, const RI_ConstraintInfo *riinfo, bool rel_is_pk, Datum *vals, char *nulls) { - TupleDesc tupdesc = rel->rd_att; const int16 *attnums; int i; bool isnull; @@ -2579,8 +2589,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup, for (i = 0; i < riinfo->nkeys; i++) { - vals[i] = heap_getattr(tup, attnums[i], tupdesc, - &isnull); + vals[i] = slot_getattr(slot, attnums[i], &isnull); nulls[i] = isnull ? 'n' : ' '; } } @@ -2597,7 +2606,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup, static void ri_ReportViolation(const RI_ConstraintInfo *riinfo, Relation pk_rel, Relation fk_rel, - HeapTuple violator, TupleDesc tupdesc, + TupleTableSlot *violatorslot, TupleDesc tupdesc, int queryno) { StringInfoData key_names; @@ -2676,7 +2685,8 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo, *val; name = SPI_fname(tupdesc, fnum); - val = SPI_getvalue(violator, tupdesc, fnum); + // PBORKED: avoid heaptuple conversion + val = SPI_getvalue(ExecFetchSlotHeapTuple(violatorslot, false, NULL), tupdesc, fnum); if (!val) val = "null"; @@ -2730,7 +2740,7 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo, */ static int ri_NullCheck(TupleDesc tupDesc, - HeapTuple tup, + TupleTableSlot *slot, const RI_ConstraintInfo *riinfo, bool rel_is_pk) { const int16 *attnums; @@ -2745,7 +2755,7 @@ ri_NullCheck(TupleDesc tupDesc, for (i = 0; i < riinfo->nkeys; i++) { - if (heap_attisnull(tup, attnums[i], tupDesc)) + if (slot_attisnull(slot, attnums[i])) nonenull = false; else allnull = false; @@ -2896,10 +2906,9 @@ ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan) * ---------- */ static bool -ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup, +ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot, const RI_ConstraintInfo *riinfo, bool rel_is_pk) { - TupleDesc tupdesc = RelationGetDescr(rel); const int16 *attnums; const Oid *eq_oprs; int i; @@ -2915,6 +2924,7 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup, eq_oprs = riinfo->ff_eq_oprs; } + /* XXX: could be worthwhile to fetch all necessary attrs at once */ for (i = 0; i < riinfo->nkeys; i++) { Datum oldvalue; @@ -2924,14 +2934,14 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup, /* * Get one attribute's oldvalue. If it is NULL - they're not equal. */ - oldvalue = heap_getattr(oldtup, attnums[i], tupdesc, &isnull); + oldvalue = slot_getattr(oldslot, attnums[i], &isnull); if (isnull) return false; /* * Get one attribute's newvalue. If it is NULL - they're not equal. */ - newvalue = heap_getattr(newtup, attnums[i], tupdesc, &isnull); + newvalue = slot_getattr(newslot, attnums[i], &isnull); if (isnull) return false; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index ffca0fe5bb..270b01909e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -103,6 +103,7 @@ #include "access/brin.h" #include "access/gin.h" #include "access/htup_details.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_am.h" @@ -5528,7 +5529,6 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, bool typByVal; ScanKeyData scankeys[1]; IndexScanDesc index_scan; - HeapTuple tup; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; SnapshotData SnapshotNonVacuumable; @@ -5551,8 +5551,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, indexInfo = BuildIndexInfo(indexRel); /* some other stuff */ - slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel), - &TTSOpsHeapTuple); + slot = table_gimmegimmeslot(heapRel, NULL); econtext->ecxt_scantuple = slot; get_typlenbyval(vardata->atttype, &typLen, &typByVal); InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin); @@ -5604,11 +5603,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in sortop's direction */ - if ((tup = index_getnext(index_scan, - indexscandir)) != NULL) + if (index_getnext_slot(index_scan, indexscandir, slot)) { - /* Extract the index column values from the heap tuple */ - ExecStoreHeapTuple(tup, slot, false); + /* Extract the index column values from the slot */ FormIndexDatum(indexInfo, slot, estate, values, isnull); @@ -5637,11 +5634,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first tuple in reverse direction */ - if ((tup = index_getnext(index_scan, - -indexscandir)) != NULL) + if (index_getnext_slot(index_scan, -indexscandir, slot)) { - /* Extract the index column values from the heap tuple */ - ExecStoreHeapTuple(tup, slot, false); + /* Extract the index column values from the slot */ FormIndexDatum(indexInfo, slot, estate, values, isnull); diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 41d540b46e..bb8a683b44 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -22,6 +22,7 @@ #include "access/heapam.h" #include "access/sysattr.h" +#include "access/tableam.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "libpq/pqformat.h" @@ -352,7 +353,7 @@ currtid_byreloid(PG_FUNCTION_ARGS) ItemPointerCopy(tid, result); snapshot = RegisterSnapshot(GetLatestSnapshot()); - heap_get_latest_tid(rel, snapshot, result); + table_get_latest_tid(rel, snapshot, result); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); @@ -387,7 +388,7 @@ currtid_byrelname(PG_FUNCTION_ARGS) ItemPointerCopy(tid, result); snapshot = RegisterSnapshot(GetLatestSnapshot()); - heap_get_latest_tid(rel, snapshot, result); + table_get_latest_tid(rel, snapshot, result); UnregisterSnapshot(snapshot); heap_close(rel, AccessShareLock); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index c3071db1cd..8b79e59077 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -36,6 +36,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/sysattr.h" +#include "access/tableam.h" #include "access/tupdesc_details.h" #include "access/xact.h" #include "access/xlog.h" @@ -1196,10 +1197,29 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) } /* - * if it's an index, initialize index-related information + * initialize access method information */ - if (OidIsValid(relation->rd_rel->relam)) - RelationInitIndexAccessInfo(relation); + switch (relation->rd_rel->relkind) + { + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + Assert(relation->rd_rel->relam != InvalidOid); + RelationInitIndexAccessInfo(relation); + break; + case RELKIND_RELATION: + case RELKIND_SEQUENCE: + case RELKIND_TOASTVALUE: + case RELKIND_VIEW: /* Not exactly the storage, but underlying + * tuple access, it is required */ + case RELKIND_MATVIEW: + case RELKIND_PARTITIONED_TABLE: + case RELKIND_FOREIGN_TABLE: /* hari FIXME :To support COPY on foreign tables */ + RelationInitTableAccessMethod(relation); + break; + default: + /* nothing to do in other cases */ + break; + } /* extract reloptions if any */ RelationParseRelOptions(relation, pg_class_tuple); @@ -1701,6 +1721,52 @@ LookupOpclassInfo(Oid operatorClassOid, return opcentry; } +/* + * Fill in the TableAmRoutine for a relation + * + * relation's rd_tableamhandler must be valid already. + */ +static void +InitTableAmRoutine(Relation relation) +{ + relation->rd_tableamroutine = GetTableAmRoutine(relation->rd_tableamhandler); +} + +/* + * Initialize table-access-method support data for a heap relation + */ +void +RelationInitTableAccessMethod(Relation relation) +{ + HeapTuple tuple; + Form_pg_am aform; + + if (IsCatalogRelation(relation) || + !OidIsValid(relation->rd_rel->relam)) + { + relation->rd_tableamhandler = HEAP_TABLE_AM_HANDLER_OID; + } + else + { + /* + * Look up the table access method, save the OID of its handler + * function. + */ + tuple = SearchSysCache1(AMOID, + ObjectIdGetDatum(relation->rd_rel->relam)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + relation->rd_rel->relam); + aform = (Form_pg_am) GETSTRUCT(tuple); + relation->rd_tableamhandler = aform->amhandler; + ReleaseSysCache(tuple); + } + + /* + * Now we can fetch the table AM's API struct + */ + InitTableAmRoutine(relation); +} /* * formrdesc @@ -1787,6 +1853,7 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_rel->relallvisible = 0; relation->rd_rel->relkind = RELKIND_RELATION; relation->rd_rel->relnatts = (int16) natts; + relation->rd_rel->relam = HEAP_TABLE_AM_OID; /* * initialize attribute tuple form @@ -1854,6 +1921,12 @@ formrdesc(const char *relationName, Oid relationReltype, */ RelationInitPhysicalAddr(relation); + /* + * initialize the table am handler + */ + relation->rd_rel->relam = HEAP_TABLE_AM_OID; + relation->rd_tableamroutine = GetHeapamTableAmRoutine(); + /* * initialize the rel-has-index flag, using hardwired knowledge */ @@ -3089,6 +3162,7 @@ RelationBuildLocalRelation(const char *relname, Oid relnamespace, TupleDesc tupDesc, Oid relid, + Oid accessmtd, Oid relfilenode, Oid reltablespace, bool shared_relation, @@ -3268,6 +3342,16 @@ RelationBuildLocalRelation(const char *relname, RelationInitPhysicalAddr(rel); + rel->rd_rel->relam = accessmtd; + + if (relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_VIEW || /* Not exactly the storage, but underlying + * tuple access, it is required */ + relkind == RELKIND_PARTITIONED_TABLE || + relkind == RELKIND_TOASTVALUE) + RelationInitTableAccessMethod(rel); + /* * Okay to insert into the relcache hash table. * @@ -3788,6 +3872,19 @@ RelationCacheInitializePhase3(void) restart = true; } + if (relation->rd_tableamroutine == NULL && + (relation->rd_rel->relkind == RELKIND_RELATION || + relation->rd_rel->relkind == RELKIND_MATVIEW || + relation->rd_rel->relkind == RELKIND_VIEW || + relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + relation->rd_rel->relkind == RELKIND_TOASTVALUE)) + { + RelationInitTableAccessMethod(relation); + Assert(relation->rd_tableamroutine != NULL); + + restart = true; + } + /* Release hold on the relation */ RelationDecrementReferenceCount(relation); @@ -5563,6 +5660,9 @@ load_relcache_init_file(bool shared) if (rel->rd_isnailed) nailed_rels++; + /* Load table AM stuff */ + RelationInitTableAccessMethod(rel); + Assert(rel->rd_index == NULL); Assert(rel->rd_indextuple == NULL); Assert(rel->rd_indexcxt == NULL); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index b636b1e262..1d57177cb5 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -22,6 +22,7 @@ #include "access/heapam.h" #include "access/htup_details.h" #include "access/session.h" +#include "access/tableam.h" #include "access/sysattr.h" #include "access/xact.h" #include "access/xlog.h" @@ -1246,15 +1247,15 @@ static bool ThereIsAtLeastOneRole(void) { Relation pg_authid_rel; - HeapScanDesc scan; + TableScanDesc scan; bool result; pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock); - scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL); - result = (heap_getnext(scan, ForwardScanDirection) != NULL); + scan = table_beginscan_catalog(pg_authid_rel, 0, NULL); + result = (heap_scan_getnext(scan, ForwardScanDirection) != NULL); - heap_endscan(scan); + table_endscan(scan); heap_close(pg_authid_rel, AccessShareLock); return result; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 6fe1939881..11b6df209a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -29,6 +29,7 @@ #include "access/commit_ts.h" #include "access/gin.h" #include "access/rmgr.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/twophase.h" #include "access/xact.h" @@ -3511,6 +3512,17 @@ static struct config_string ConfigureNamesString[] = check_datestyle, assign_datestyle, NULL }, + { + {"default_table_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the default table access method for new tables."), + NULL, + GUC_IS_NAME + }, + &default_table_access_method, + DEFAULT_TABLE_ACCESS_METHOD, + check_default_table_access_method, NULL, NULL + }, + { {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the default tablespace to create tables and indexes in."), diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index ee7fd83c02..7d2b6facf2 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -3818,12 +3818,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) { - HeapTuple tuple = (HeapTuple) tup; Datum original; MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + TupleTableSlot *slot = (TupleTableSlot *) tup; + HeapTuple tuple; /* copy the tuple into sort storage */ - tuple = heap_copytuple(tuple); + tuple = ExecCopySlotHeapTuple(slot); stup->tuple = (void *) tuple; USEMEM(state, GetMemoryChunkSpace(tuple)); diff --git a/src/backend/utils/time/Makefile b/src/backend/utils/time/Makefile index 5a6e6fa4c8..f17b1c5324 100644 --- a/src/backend/utils/time/Makefile +++ b/src/backend/utils/time/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/utils/time top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = combocid.o tqual.o snapmgr.o +OBJS = combocid.o snapmgr.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index edf59efc29..9c59545961 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -141,9 +141,9 @@ static volatile OldSnapshotControlData *oldSnapshotControl; * These SnapshotData structs are static to simplify memory allocation * (see the hack in GetSnapshotData to avoid repeated malloc/free). */ -static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC}; -static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC}; -SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC}; +static SnapshotData CurrentSnapshotData = {MVCC_VISIBILITY}; +static SnapshotData SecondarySnapshotData = {MVCC_VISIBILITY}; +SnapshotData CatalogSnapshotData = {MVCC_VISIBILITY}; /* Pointers to valid snapshots */ static Snapshot CurrentSnapshot = NULL; @@ -2046,7 +2046,7 @@ EstimateSnapshotSpace(Snapshot snap) Size size; Assert(snap != InvalidSnapshot); - Assert(snap->satisfies == HeapTupleSatisfiesMVCC); + Assert(snap->visibility_type == MVCC_VISIBILITY); /* We allocate any XID arrays needed in the same palloc block. */ size = add_size(sizeof(SerializedSnapshotData), @@ -2143,7 +2143,7 @@ RestoreSnapshot(char *start_address) /* Copy all required fields */ snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); - snapshot->satisfies = HeapTupleSatisfiesMVCC; + snapshot->visibility_type = MVCC_VISIBILITY; snapshot->xmin = serialized_snapshot.xmin; snapshot->xmax = serialized_snapshot.xmax; snapshot->xip = NULL; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 534fac7bf2..0aa107f4b4 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -159,8 +159,10 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel, ParallelIndexScanDesc pscan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); -extern HeapTuple index_fetch_heap(IndexScanDesc scan); -extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); +struct TupleTableSlot; +extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot); +//extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); +extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, struct TupleTableSlot *slot); extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 108e4f1067..a309db1a1c 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -33,6 +33,8 @@ typedef struct BulkInsertStateData *BulkInsertState; +struct TupleTableSlot; + /* * Possible lock modes for a tuple. */ @@ -71,8 +73,21 @@ typedef struct HeapUpdateFailureData ItemPointerData ctid; TransactionId xmax; CommandId cmax; + bool traversed; } HeapUpdateFailureData; +/* Result codes for HeapTupleSatisfiesVacuum */ +typedef enum +{ + HEAPTUPLE_DEAD, /* tuple is dead and deletable */ + HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */ + HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */ + HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */ + HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */ +} HTSV_Result; + +/* struct definition is private to rewriteheap.c */ +typedef struct RewriteStateData *RewriteState; /* ---------------- * function prototypes for heap access method @@ -98,8 +113,9 @@ extern Relation heap_openrv_extended(const RangeVar *relation, #define heap_close(r,l) relation_close(r,l) /* struct definitions appear in relscan.h */ +typedef struct TableScanDescData *TableScanDesc; typedef struct HeapScanDescData *HeapScanDesc; -typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc; +typedef struct ParallelTableScanDescData *ParallelTableScanDesc; /* * HeapScanIsValid @@ -107,53 +123,47 @@ typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc; */ #define HeapScanIsValid(scan) PointerIsValid(scan) -extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, - ScanKey key); -extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key, - bool allow_strat, bool allow_sync); -extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); -extern HeapScanDesc heap_beginscan_sampling(Relation relation, - Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, +extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); +extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); -extern void heapgetpage(HeapScanDesc scan, BlockNumber page); -extern void heap_rescan(HeapScanDesc scan, ScanKey key); -extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key, +extern void heapgetpage(TableScanDesc scan, BlockNumber page); +extern void heap_rescan(TableScanDesc scan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); +extern void heap_rescan_set_params(TableScanDesc scan, ScanKey key, bool allow_strat, bool allow_sync, bool allow_pagemode); -extern void heap_endscan(HeapScanDesc scan); -extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); - -extern Size heap_parallelscan_estimate(Snapshot snapshot); -extern void heap_parallelscan_initialize(ParallelHeapScanDesc target, - Relation relation, Snapshot snapshot); -extern void heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan); -extern HeapScanDesc heap_beginscan_parallel(Relation, ParallelHeapScanDesc); - -extern bool heap_fetch(Relation relation, Snapshot snapshot, - HeapTuple tuple, Buffer *userbuf, bool keep_buf, +extern void heap_endscan(TableScanDesc scan); +extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction); +extern struct TupleTableSlot *heap_getnextslot(TableScanDesc sscan, ScanDirection direction, + struct TupleTableSlot *slot); +extern HeapTuple heap_scan_getnext(TableScanDesc sscan, ScanDirection direction); + +extern bool heap_fetch(Relation relation, ItemPointer tid, Snapshot snapshot, + HeapTuple tuple, Buffer *userbuf, Relation stats_relation); extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call); extern bool heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot, bool *all_dead); - extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid); extern void setLastTid(const ItemPointer tid); extern BulkInsertState GetBulkInsertState(void); -extern void FreeBulkInsertState(BulkInsertState); +extern void FreeBulkInsertState(BulkInsertState bistate); extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); -extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, +extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, @@ -164,10 +174,11 @@ extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, HeapUpdateFailureData *hufd, LockTupleMode *lockmode); -extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, +extern HTSU_Result heap_lock_tuple(Relation relation, ItemPointer tid, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, - bool follow_update, + bool follow_update, HeapTuple tuple, Buffer *buffer, HeapUpdateFailureData *hufd); + extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, @@ -182,7 +193,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup); extern void heap_sync(Relation relation); -extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot); +extern void heap_update_snapshot(TableScanDesc scan, Snapshot snapshot); /* in heap/pruneheap.c */ extern void heap_page_prune_opt(Relation relation, Buffer buffer); @@ -205,4 +216,26 @@ extern Size SyncScanShmemSize(void); struct VacuumParams; extern void heap_vacuum_rel(Relation onerel, int options, struct VacuumParams *params, BufferAccessStrategy bstrategy); + +/* in heap/heapam_visibility.c */ +extern bool HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer); +extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple stup, CommandId curcid, + Buffer buffer); +extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin, + Buffer buffer); +extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, + uint16 infomask, TransactionId xid); +extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); +extern bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin); + +/* in heap/rewriteheap.c */ +extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap, + TransactionId OldestXmin, TransactionId FreezeXid, + MultiXactId MultiXactCutoff, bool use_wal); +extern void end_heap_rewrite(RewriteState state); +extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple, + HeapTuple newTuple); +extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple); + #endif /* HEAPAM_H */ diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index e5289b8aa7..51a3ad74fa 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -22,15 +22,15 @@ #include "storage/spin.h" /* - * Shared state for parallel heap scan. + * Shared state for parallel table scan. * - * Each backend participating in a parallel heap scan has its own - * HeapScanDesc in backend-private memory, and those objects all contain - * a pointer to this structure. The information here must be sufficient - * to properly initialize each new HeapScanDesc as workers join the scan, - * and it must act as a font of block numbers for those workers. + * Each backend participating in a parallel table scan has its own + * TableScanDesc in backend-private memory, and those objects all contain a + * pointer to this structure. The information here must be sufficient to + * properly initialize each new TableScanDesc as workers join the scan, and it + * must act as a font of block numbers for those workers. */ -typedef struct ParallelHeapScanDescData +typedef struct ParallelTableScanDescData { Oid phs_relid; /* OID of relation to scan */ bool phs_syncscan; /* report location to syncscan logic? */ @@ -41,9 +41,9 @@ typedef struct ParallelHeapScanDescData * workers so far. */ bool phs_snapshot_any; /* SnapshotAny, not phs_snapshot_data? */ char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; -} ParallelHeapScanDescData; +} ParallelTableScanDescData; -typedef struct HeapScanDescData +typedef struct TableScanDescData { /* scan parameters */ Relation rs_rd; /* heap relation descriptor */ @@ -62,16 +62,27 @@ typedef struct HeapScanDescData BlockNumber rs_startblock; /* block # to start at */ BlockNumber rs_numblocks; /* max number of blocks to scan */ /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */ - BufferAccessStrategy rs_strategy; /* access strategy for reads */ bool rs_syncscan; /* report location to syncscan logic? */ + ParallelTableScanDesc rs_parallel; /* parallel scan information */ + +} TableScanDescData; + +typedef struct HeapScanDescData +{ + /* scan parameters */ + TableScanDescData rs_scan; /* */ + /* scan current state */ bool rs_inited; /* false = scan not init'd yet */ - HeapTupleData rs_ctup; /* current tuple in scan, if any */ BlockNumber rs_cblock; /* current block # in scan, if any */ Buffer rs_cbuf; /* current buffer in scan, if any */ /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ - ParallelHeapScanDesc rs_parallel; /* parallel scan information */ + + /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */ + BufferAccessStrategy rs_strategy; /* access strategy for reads */ + + HeapTupleData rs_ctup; /* current tuple in scan, if any */ /* these fields only used in page-at-a-time mode and for bitmap scans */ int rs_cindex; /* current tuple's index in vistuples */ @@ -79,6 +90,21 @@ typedef struct HeapScanDescData OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ } HeapScanDescData; + +typedef struct IndexFetchTableData +{ + Relation rel; +} IndexFetchTableData; + + +typedef struct IndexFetchHeapData +{ + IndexFetchTableData xs_base; + + Buffer xs_cbuf; /* current heap buffer in scan, if any */ + /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ +} IndexFetchHeapData; + /* * We use the same IndexScanDescData structure for both amgettuple-based * and amgetbitmap-based index scans. Some fields are only relevant in @@ -117,10 +143,10 @@ typedef struct IndexScanDescData HeapTuple xs_hitup; /* index data returned by AM, as HeapTuple */ TupleDesc xs_hitupdesc; /* rowtype descriptor of xs_hitup */ - /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */ - HeapTupleData xs_ctup; /* current heap tuple, if any */ - Buffer xs_cbuf; /* current heap buffer in scan, if any */ - /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ + ItemPointerData xs_heaptid; /* result */ + bool xs_heap_continue; /* T if must keep walking, potential further results */ + IndexFetchTableData *xs_heapfetch; + bool xs_recheck; /* T means scan keys must be rechecked */ /* @@ -134,9 +160,6 @@ typedef struct IndexScanDescData bool *xs_orderbynulls; bool xs_recheckorderby; - /* state data for traversing HOT chains in index_getnext */ - bool xs_continue_hot; /* T if must keep walking HOT chain */ - /* parallel index scan information, in shared memory */ ParallelIndexScanDesc parallel_scan; } IndexScanDescData; @@ -150,14 +173,17 @@ typedef struct ParallelIndexScanDescData char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER]; } ParallelIndexScanDescData; -/* Struct for heap-or-index scans of system tables */ +struct TupleTableSlot; + +/* Struct for storage-or-index scans of system tables */ typedef struct SysScanDescData { Relation heap_rel; /* catalog being scanned */ Relation irel; /* NULL if doing heap scan */ - HeapScanDesc scan; /* only valid in heap-scan case */ + TableScanDesc scan; /* only valid in storage-scan case */ IndexScanDesc iscan; /* only valid in index-scan case */ Snapshot snapshot; /* snapshot to unregister at end of scan */ + struct TupleTableSlot *slot; } SysScanDescData; #endif /* RELSCAN_H */ diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index cfdf33b4bd..cc74012f72 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -18,17 +18,6 @@ #include "storage/relfilenode.h" #include "utils/relcache.h" -/* struct definition is private to rewriteheap.c */ -typedef struct RewriteStateData *RewriteState; - -extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap, - TransactionId OldestXmin, TransactionId FreezeXid, - MultiXactId MultiXactCutoff, bool use_wal); -extern void end_heap_rewrite(RewriteState state); -extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple, - HeapTuple newTuple); -extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple); - /* * On-Disk data format for an individual logical rewrite mapping. */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h new file mode 100644 index 0000000000..7364afa340 --- /dev/null +++ b/src/include/access/tableam.h @@ -0,0 +1,750 @@ +/*------------------------------------------------------------------------- + * + * tableam.h + * POSTGRES table access method definitions. + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/tableam.h + * + *------------------------------------------------------------------------- + */ +#ifndef TABLEAM_H +#define TABLEAM_H + +#include "postgres.h" + +#include "access/heapam.h" +#include "access/relscan.h" +#include "catalog/index.h" +#include "executor/tuptable.h" +#include "nodes/execnodes.h" +#include "nodes/nodes.h" +#include "fmgr.h" +#include "utils/guc.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/snapshot.h" +#include "utils/tqual.h" + + +#define DEFAULT_TABLE_ACCESS_METHOD "heap" + +extern char *default_table_access_method; +extern bool synchronize_seqscans; + +/* + * Storage routine function hooks + */ +typedef bool (*SnapshotSatisfies_function) (Relation rel, + TupleTableSlot *slot, + Snapshot snapshot); + +typedef void (*TupleInsert_function) (Relation rel, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate); + +typedef void (*TupleInsertSpeculative_function) (Relation rel, + TupleTableSlot *slot, + CommandId cid, + int options, + BulkInsertState bistate, + uint32 specToken); + + +typedef void (*TupleCompleteSpeculative_function) (Relation rel, + TupleTableSlot *slot, + uint32 specToken, + bool succeeded); + +typedef HTSU_Result (*TupleDelete_function) (Relation relation, + ItemPointer tid, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + bool changingPart); + +typedef HTSU_Result (*TupleUpdate_function) (Relation relation, + ItemPointer otid, + TupleTableSlot *slot, + CommandId cid, + Snapshot snapshot, + Snapshot crosscheck, + bool wait, + HeapUpdateFailureData *hufd, + LockTupleMode *lockmode, + bool *update_indexes); + +typedef bool (*TupleFetchRowVersion_function) (Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + Relation stats_relation); + +typedef HTSU_Result (*TupleLock_function) (Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + CommandId cid, + LockTupleMode mode, + LockWaitPolicy wait_policy, + uint8 flags, + HeapUpdateFailureData *hufd); + +typedef void (*MultiInsert_function) (Relation relation, TupleTableSlot **slots, int nslots, + CommandId cid, int options, BulkInsertState bistate); + +typedef void (*TupleGetLatestTid_function) (Relation relation, + Snapshot snapshot, + ItemPointer tid); + +struct VacuumParams; +typedef void (*RelationVacuum_function)(Relation onerel, int options, + struct VacuumParams *params, BufferAccessStrategy bstrategy); +typedef void (*RelationScanAnalyzeNextBlock_function)(TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy); +typedef bool (*RelationScanAnalyzeNextTuple_function)(TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, TupleTableSlot *slot); + +typedef void (*RelationCopyForCluster_function)(Relation NewHeap, Relation OldHeap, Relation OldIndex, + bool use_sort, + TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, + double *num_tuples, double *tups_vacuumed, double *tups_recently_dead); + +typedef void (*RelationSync_function) (Relation relation); + +typedef const TupleTableSlotOps* (*SlotCallbacks_function) (Relation relation); + +typedef TableScanDesc (*ScanBegin_function) (Relation relation, + Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + bool allow_strat, + bool allow_sync, + bool allow_pagemode, + bool is_bitmapscan, + bool is_samplescan, + bool temp_snap); + +typedef struct IndexFetchTableData* (*BeginIndexFetchTable_function) (Relation relation); +typedef void (*ResetIndexFetchTable_function) (struct IndexFetchTableData* data); +typedef void (*EndIndexFetchTable_function) (struct IndexFetchTableData* data); + +typedef void (*ScanSetlimits_function) (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks); + +typedef TupleTableSlot *(*ScanGetnextSlot_function) (TableScanDesc scan, + ScanDirection direction, TupleTableSlot *slot); + +typedef void (*ScanEnd_function) (TableScanDesc scan); + + +typedef void (*ScanRescan_function) (TableScanDesc scan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode); +typedef void (*ScanUpdateSnapshot_function) (TableScanDesc scan, Snapshot snapshot); + +typedef bool (*TupleFetchFollow_function)(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead); + +typedef double (*IndexBuildRangeScan_function)(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + BlockNumber start_blockno, + BlockNumber end_blockno, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan); +struct ValidateIndexState; +typedef void (*IndexValidateScan_function)(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + struct ValidateIndexState *state); + +typedef bool (*BitmapPagescan_function)(TableScanDesc scan, + TBMIterateResult *tbmres); + +typedef bool (*BitmapPagescanNext_function)(TableScanDesc scan, + TupleTableSlot *slot); + +struct SampleScanState; +typedef bool (*SampleScanNextBlock_function)(TableScanDesc scan, struct SampleScanState *scanstate); +typedef bool (*SampleScanNextTuple_function)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot); + +/* + * API struct for a table AM. Note this must be allocated in a + * server-lifetime manner, typically as a static const struct. + */ +typedef struct TableAmRoutine +{ + NodeTag type; + + SlotCallbacks_function slot_callbacks; + + SnapshotSatisfies_function snapshot_satisfies; + + /* Operations on physical tuples */ + TupleInsert_function tuple_insert; + TupleInsertSpeculative_function tuple_insert_speculative; + TupleCompleteSpeculative_function tuple_complete_speculative; + TupleUpdate_function tuple_update; + TupleDelete_function tuple_delete; + TupleFetchRowVersion_function tuple_fetch_row_version; + TupleLock_function tuple_lock; + MultiInsert_function multi_insert; + TupleGetLatestTid_function tuple_get_latest_tid; + TupleFetchFollow_function tuple_fetch_follow; + + RelationVacuum_function relation_vacuum; + RelationScanAnalyzeNextBlock_function scan_analyze_next_block; + RelationScanAnalyzeNextTuple_function scan_analyze_next_tuple; + RelationCopyForCluster_function relation_copy_for_cluster; + RelationSync_function relation_sync; + + /* Operations on relation scans */ + ScanBegin_function scan_begin; + ScanSetlimits_function scansetlimits; + ScanGetnextSlot_function scan_getnextslot; + + BitmapPagescan_function scan_bitmap_pagescan; + BitmapPagescanNext_function scan_bitmap_pagescan_next; + + SampleScanNextBlock_function scan_sample_next_block; + SampleScanNextTuple_function scan_sample_next_tuple; + + ScanEnd_function scan_end; + ScanRescan_function scan_rescan; + ScanUpdateSnapshot_function scan_update_snapshot; + + BeginIndexFetchTable_function begin_index_fetch; + EndIndexFetchTable_function reset_index_fetch; + EndIndexFetchTable_function end_index_fetch; + + + IndexBuildRangeScan_function index_build_range_scan; + IndexValidateScan_function index_validate_scan; +} TableAmRoutine; + +static inline const TupleTableSlotOps* +table_slot_callbacks(Relation relation) +{ + const TupleTableSlotOps *tts_cb; + + tts_cb = relation->rd_tableamroutine->slot_callbacks(relation); + + return tts_cb; +} + +/* + * INLINE functions + */ +extern TupleTableSlot* table_gimmegimmeslot(Relation relation, List **reglist); + +/* + * table_fetch_row_version - retrieve tuple with given tid + * + * XXX: This shouldn't just take a tid, but tid + additional information + */ +static inline bool +table_fetch_row_version(Relation r, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + Relation stats_relation) +{ + return r->rd_tableamroutine->tuple_fetch_row_version(r, tid, + snapshot, slot, + stats_relation); +} + + +/* + * table_lock_tuple - lock a tuple in shared or exclusive mode + * + * XXX: This shouldn't just take a tid, but tid + additional information + */ +static inline HTSU_Result +table_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + HeapUpdateFailureData *hufd) +{ + return relation->rd_tableamroutine->tuple_lock(relation, tid, snapshot, slot, + cid, mode, wait_policy, + flags, hufd); +} + +/* ---------------- + * heap_beginscan_parallel - join a parallel scan + * + * Caller must hold a suitable lock on the correct relation. + * ---------------- + */ +static inline TableScanDesc +table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) +{ + Snapshot snapshot; + + Assert(RelationGetRelid(relation) == parallel_scan->phs_relid); + + if (!parallel_scan->phs_snapshot_any) + { + /* Snapshot was serialized -- restore it */ + snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data); + RegisterSnapshot(snapshot); + } + else + { + /* SnapshotAny passed by caller (not serialized) */ + snapshot = SnapshotAny; + } + + return relation->rd_tableamroutine->scan_begin(relation, snapshot, 0, NULL, parallel_scan, + true, true, true, false, false, !parallel_scan->phs_snapshot_any); +} + +/* + * heap_setscanlimits - restrict range of a heapscan + * + * startBlk is the page to start at + * numBlks is number of pages to scan (InvalidBlockNumber means "all") + */ +static inline void +table_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks) +{ + sscan->rs_rd->rd_tableamroutine->scansetlimits(sscan, startBlk, numBlks); +} + + +/* ---------------- + * heap_beginscan - begin relation scan + * + * heap_beginscan is the "standard" case. + * + * heap_beginscan_catalog differs in setting up its own temporary snapshot. + * + * heap_beginscan_strat offers an extended API that lets the caller control + * whether a nondefault buffer access strategy can be used, and whether + * syncscan can be chosen (possibly resulting in the scan not starting from + * block zero). Both of these default to true with plain heap_beginscan. + * + * heap_beginscan_bm is an alternative entry point for setting up a + * TableScanDesc for a bitmap heap scan. Although that scan technology is + * really quite unlike a standard seqscan, there is just enough commonality + * to make it worth using the same data structure. + * + * heap_beginscan_sampling is an alternative entry point for setting up a + * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth + * using the same data structure although the behavior is rather different. + * In addition to the options offered by heap_beginscan_strat, this call + * also allows control of whether page-mode visibility checking is used. + * ---------------- + */ +static inline TableScanDesc +table_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key) +{ + return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + true, true, true, false, false, false); +} + +static inline TableScanDesc +table_beginscan_catalog(Relation relation, int nkeys, ScanKey key) +{ + Oid relid = RelationGetRelid(relation); + Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + + return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + true, true, true, false, false, true); +} + +static inline TableScanDesc +table_beginscan_strat(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync) +{ + return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, true, + false, false, false); +} + +static inline TableScanDesc +table_beginscan_bm(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key) +{ + return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + false, false, true, true, false, false); +} + +static inline TableScanDesc +table_beginscan_sampling(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL, + allow_strat, allow_sync, allow_pagemode, + false, true, false); +} + +static inline TableScanDesc +table_beginscan_analyze(Relation relation) +{ + return relation->rd_tableamroutine->scan_begin(relation, NULL, 0, NULL, NULL, + true, false, true, + false, true, false); +} + + +/* ---------------- + * heap_rescan - restart a relation scan + * ---------------- + */ +static inline void +table_rescan(TableScanDesc scan, + ScanKey key) +{ + scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, false, false, false, false); +} + +/* ---------------- + * heap_rescan_set_params - restart a relation scan after changing params + * + * This call allows changing the buffer strategy, syncscan, and pagemode + * options before starting a fresh scan. Note that although the actual use + * of syncscan might change (effectively, enabling or disabling reporting), + * the previously selected startblock will be kept. + * ---------------- + */ +static inline void +table_rescan_set_params(TableScanDesc scan, ScanKey key, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, true, + allow_strat, allow_sync, (allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot))); +} + +/* ---------------- + * heap_endscan - end relation scan + * + * See how to integrate with index scans. + * Check handling if reldesc caching. + * ---------------- + */ +static inline void +table_endscan(TableScanDesc scan) +{ + scan->rs_rd->rd_tableamroutine->scan_end(scan); +} + + +/* ---------------- + * heap_update_snapshot + * + * Update snapshot info in heap scan descriptor. + * ---------------- + */ +static inline void +table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot) +{ + scan->rs_rd->rd_tableamroutine->scan_update_snapshot(scan, snapshot); +} + + +static inline bool +table_scan_bitmap_pagescan(TableScanDesc scan, + TBMIterateResult *tbmres) +{ + return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan(scan, tbmres); +} + +static inline bool +table_scan_bitmap_pagescan_next(TableScanDesc scan, TupleTableSlot *slot) +{ + return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan_next(scan, slot); +} + +static inline bool +table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate) +{ + return scan->rs_rd->rd_tableamroutine->scan_sample_next_block(scan, scanstate); +} + +static inline bool +table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot) +{ + return scan->rs_rd->rd_tableamroutine->scan_sample_next_tuple(scan, scanstate, slot); +} + +static inline void +table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) +{ + scan->rs_rd->rd_tableamroutine->scan_analyze_next_block(scan, blockno, bstrategy); +} + +static inline bool +table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) +{ + return scan->rs_rd->rd_tableamroutine->scan_analyze_next_tuple(scan, OldestXmin, liverows, deadrows, slot); +} + +static inline TupleTableSlot * +table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + slot->tts_tableOid = RelationGetRelid(sscan->rs_rd); + return sscan->rs_rd->rd_tableamroutine->scan_getnextslot(sscan, direction, slot); +} + +static inline IndexFetchTableData* +table_begin_index_fetch_table(Relation rel) +{ + return rel->rd_tableamroutine->begin_index_fetch(rel); +} + +static inline void +table_reset_index_fetch_table(struct IndexFetchTableData* scan) +{ + scan->rel->rd_tableamroutine->reset_index_fetch(scan); +} + +static inline void +table_end_index_fetch_table(struct IndexFetchTableData* scan) +{ + scan->rel->rd_tableamroutine->end_index_fetch(scan); +} + +/* + * Insert a tuple from a slot into table AM routine + */ +static inline void +table_insert(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate) +{ + relation->rd_tableamroutine->tuple_insert(relation, slot, cid, options, + bistate); +} + +static inline void +table_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate, uint32 specToken) +{ + relation->rd_tableamroutine->tuple_insert_speculative(relation, slot, cid, options, + bistate, specToken); +} + +static inline void +table_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, + bool succeeded) +{ + return relation->rd_tableamroutine->tuple_complete_speculative(relation, slot, specToken, succeeded); +} + +/* + * Delete a tuple from tid using table AM routine + */ +static inline HTSU_Result +table_delete(Relation relation, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + HeapUpdateFailureData *hufd, bool changingPart) +{ + return relation->rd_tableamroutine->tuple_delete(relation, tid, cid, + snapshot, crosscheck, + wait, hufd, changingPart); +} + +/* + * update a tuple from tid using table AM routine + */ +static inline HTSU_Result +table_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, + HeapUpdateFailureData *hufd, LockTupleMode *lockmode, + bool *update_indexes) +{ + return relation->rd_tableamroutine->tuple_update(relation, otid, slot, + cid, snapshot, crosscheck, + wait, hufd, + lockmode, update_indexes); +} + +static inline bool +table_fetch_follow(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + + return scan->rel->rd_tableamroutine->tuple_fetch_follow(scan, tid, snapshot, + slot, call_again, + all_dead); +} + +static inline bool +table_fetch_follow_check(Relation rel, + ItemPointer tid, + Snapshot snapshot, + bool *all_dead) +{ + IndexFetchTableData *scan = table_begin_index_fetch_table(rel); + TupleTableSlot *slot = table_gimmegimmeslot(rel, NULL); + bool call_again = false; + bool found; + + found = table_fetch_follow(scan, tid, snapshot, slot, &call_again, all_dead); + + table_end_index_fetch_table(scan); + ExecDropSingleTupleTableSlot(slot); + + return found; +} + +/* + * table_multi_insert - insert multiple tuple into a table + */ +static inline void +table_multi_insert(Relation relation, TupleTableSlot **slots, int nslots, + CommandId cid, int options, BulkInsertState bistate) +{ + relation->rd_tableamroutine->multi_insert(relation, slots, nslots, + cid, options, bistate); +} + +static inline void +table_get_latest_tid(Relation relation, + Snapshot snapshot, + ItemPointer tid) +{ + relation->rd_tableamroutine->tuple_get_latest_tid(relation, snapshot, tid); +} + + +static inline void +table_vacuum_rel(Relation rel, int options, + struct VacuumParams *params, BufferAccessStrategy bstrategy) +{ + rel->rd_tableamroutine->relation_vacuum(rel, options, params, bstrategy); +} + + +/* XXX: Move arguments to struct? */ +static inline void +table_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, + bool use_sort, + TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff, + double *num_tuples, double *tups_vacuumed, double *tups_recently_dead) +{ + OldHeap->rd_tableamroutine->relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, + use_sort, + OldestXmin, FreezeXid, MultiXactCutoff, + num_tuples, tups_vacuumed, tups_recently_dead); +} + +/* + * table_sync - sync a heap, for use when no WAL has been written + */ +static inline void +table_sync(Relation rel) +{ + rel->rd_tableamroutine->relation_sync(rel); +} + +static inline double +table_index_build_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan) +{ + return heapRelation->rd_tableamroutine->index_build_range_scan( + heapRelation, + indexRelation, + indexInfo, + allow_sync, + false, + 0, + InvalidBlockNumber, + callback, + callback_state, + scan); +} + +static inline void +table_index_validate_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + struct ValidateIndexState *state) +{ + heapRelation->rd_tableamroutine->index_validate_scan( + heapRelation, + indexRelation, + indexInfo, + snapshot, + state); +} + +static inline double +table_index_build_range_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + BlockNumber start_blockno, + BlockNumber numblocks, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan) +{ + return heapRelation->rd_tableamroutine->index_build_range_scan( + heapRelation, + indexRelation, + indexInfo, + allow_sync, + anyvisible, + start_blockno, + numblocks, + callback, + callback_state, + scan); +} + +/* + * Return true iff tuple in slot satisfies the snapshot. + * + * Notes: + * Assumes slot's tuple is valid. + * Hint bits in the HeapTuple's t_infomask may be updated as a side effect; + * if so, the indicated buffer is marked dirty. + * + * XXX: Add _tuple_ to name? + */ +static inline bool +table_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot) +{ + return rel->rd_tableamroutine->snapshot_satisfies(rel, slot, snapshot); +} + +extern BlockNumber table_parallelscan_nextpage(TableScanDesc scan); +extern void table_parallelscan_startblock_init(TableScanDesc scan); +extern Size table_parallelscan_estimate(Snapshot snapshot); +extern void table_parallelscan_initialize(ParallelTableScanDesc target, + Relation relation, Snapshot snapshot); +extern void table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan); + +extern const TableAmRoutine * GetTableAmRoutine(Oid amhandler); +extern const TableAmRoutine * GetTableAmRoutineByAmId(Oid amoid); +extern const TableAmRoutine * GetHeapamTableAmRoutine(void); + +extern bool check_default_table_access_method(char **newval, void **extra, + GucSource source); + +#endif /* TABLEAM_H */ diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h index 3ecd4737e5..3947d7b915 100644 --- a/src/include/access/tsmapi.h +++ b/src/include/access/tsmapi.h @@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node, int nparams, uint32 seed); -typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node); +typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks); typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node, BlockNumber blockno, diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 56a341a622..cedc19fbcf 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -45,6 +45,7 @@ extern Relation heap_create(const char *relname, Oid reltablespace, Oid relid, Oid relfilenode, + Oid accessmtd, TupleDesc tupDesc, char relkind, char relpersistence, @@ -59,6 +60,7 @@ extern Oid heap_create_with_catalog(const char *relname, Oid reltypeid, Oid reloftypeid, Oid ownerid, + Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 35a29f3498..5e53001d78 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -20,7 +20,7 @@ #define DEFAULT_INDEX_TYPE "btree" -/* Typedef for callback function for IndexBuildHeapScan */ +/* Typedef for callback function for table_index_build_scan */ typedef void (*IndexBuildCallback) (Relation index, HeapTuple htup, Datum *values, @@ -37,6 +37,15 @@ typedef enum INDEX_DROP_SET_DEAD } IndexStateFlagsAction; +/* state info for validate_index bulkdelete callback */ +typedef struct ValidateIndexState +{ + Tuplesortstate *tuplesort; /* for sorting the index TIDs */ + /* statistics (for debug purposes only): */ + double htups, + itups, + tups_inserted; +} ValidateIndexState; extern void index_check_primary_key(Relation heapRel, IndexInfo *indexInfo, @@ -111,24 +120,6 @@ extern void index_build(Relation heapRelation, bool isreindex, bool parallel); -extern double IndexBuildHeapScan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - bool allow_sync, - IndexBuildCallback callback, - void *callback_state, - HeapScanDesc scan); -extern double IndexBuildHeapRangeScan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - bool allow_sync, - bool anyvisible, - BlockNumber start_blockno, - BlockNumber end_blockno, - IndexBuildCallback callback, - void *callback_state, - HeapScanDesc scan); - extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action); @@ -155,4 +146,45 @@ extern void RestoreReindexState(void *reindexstate); extern void IndexSetParentIndex(Relation idx, Oid parentOid); + +/* + * itemptr_encode - Encode ItemPointer as int64/int8 + * + * This representation must produce values encoded as int64 that sort in the + * same order as their corresponding original TID values would (using the + * default int8 opclass to produce a result equivalent to the default TID + * opclass). + * + * As noted in validate_index(), this can be significantly faster. + */ +static inline int64 +itemptr_encode(ItemPointer itemptr) +{ + BlockNumber block = ItemPointerGetBlockNumber(itemptr); + OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); + int64 encoded; + + /* + * Use the 16 least significant bits for the offset. 32 adjacent bits are + * used for the block number. Since remaining bits are unused, there + * cannot be negative encoded values (We assume a two's complement + * representation). + */ + encoded = ((uint64) block << 16) | (uint16) offset; + + return encoded; +} + +/* + * itemptr_decode - Decode int64/int8 representation back to ItemPointer + */ +static inline void +itemptr_decode(ItemPointer itemptr, int64 encoded) +{ + BlockNumber block = (BlockNumber) (encoded >> 16); + OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF); + + ItemPointerSet(itemptr, block, offset); +} + #endif /* INDEX_H */ diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat index bef53a319a..0f44c42094 100644 --- a/src/include/catalog/pg_am.dat +++ b/src/include/catalog/pg_am.dat @@ -30,5 +30,8 @@ { oid => '3580', oid_symbol => 'BRIN_AM_OID', descr => 'block range index (BRIN) access method', amname => 'brin', amhandler => 'brinhandler', amtype => 'i' }, +{ oid => '4001', oid_symbol => 'HEAP_TABLE_AM_OID', + descr => 'heap table access method', + amname => 'heap', amhandler => 'heap_tableam_handler', amtype => 't' }, ] diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 57d65f830f..6db7e4b21e 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -53,6 +53,7 @@ typedef FormData_pg_am *Form_pg_am; * Allowed values for amtype */ #define AMTYPE_INDEX 'i' /* index access method */ +#define AMTYPE_TABLE 't' /* table access method */ #endif /* EXPOSE_TO_CLIENT_CODE */ diff --git a/src/include/catalog/pg_class.dat b/src/include/catalog/pg_class.dat index 5a884a852b..b43c37ff14 100644 --- a/src/include/catalog/pg_class.dat +++ b/src/include/catalog/pg_class.dat @@ -22,7 +22,7 @@ { oid => '1247', relname => 'pg_type', relnamespace => 'PGNSP', reltype => '71', - reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0', + reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0', reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0', reltoastrelid => '0', relhasindex => 'f', relisshared => 'f', relpersistence => 'p', relkind => 'r', relnatts => '31', relchecks => '0', @@ -33,7 +33,7 @@ reloptions => '_null_', relpartbound => '_null_' }, { oid => '1249', relname => 'pg_attribute', relnamespace => 'PGNSP', reltype => '75', - reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0', + reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0', reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0', reltoastrelid => '0', relhasindex => 'f', relisshared => 'f', relpersistence => 'p', relkind => 'r', relnatts => '24', relchecks => '0', @@ -44,7 +44,7 @@ reloptions => '_null_', relpartbound => '_null_' }, { oid => '1255', relname => 'pg_proc', relnamespace => 'PGNSP', reltype => '81', - reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0', + reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0', reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0', reltoastrelid => '0', relhasindex => 'f', relisshared => 'f', relpersistence => 'p', relkind => 'r', relnatts => '29', relchecks => '0', @@ -55,7 +55,7 @@ reloptions => '_null_', relpartbound => '_null_' }, { oid => '1259', relname => 'pg_class', relnamespace => 'PGNSP', reltype => '83', - reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0', + reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0', reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0', reltoastrelid => '0', relhasindex => 'f', relisshared => 'f', relpersistence => 'p', relkind => 'r', relnatts => '33', relchecks => '0', diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 84e63c6d06..873a5b8d22 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -36,7 +36,7 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat Oid reloftype; /* OID of entry in pg_type for underlying * composite type */ Oid relowner; /* class owner */ - Oid relam; /* index access method; 0 if not an index */ + Oid relam; /* access method; 0 if not a table / index */ Oid relfilenode; /* identifier of physical storage file */ /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index f79fcfe029..112fe90ada 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -816,6 +816,12 @@ proname => 'int4', prorettype => 'int4', proargtypes => 'float4', prosrc => 'ftoi4' }, +# Table access method handlers +{ oid => '4002', oid_symbol => 'HEAP_TABLE_AM_HANDLER_OID', + descr => 'row-oriented heap table access method handler', + proname => 'heap_tableam_handler', provolatile => 'v', prorettype => 'table_am_handler', + proargtypes => 'internal', prosrc => 'heap_tableam_handler' }, + # Index access method handlers { oid => '330', descr => 'btree index access method handler', proname => 'bthandler', provolatile => 'v', prorettype => 'index_am_handler', @@ -6918,6 +6924,13 @@ { oid => '3312', descr => 'I/O', proname => 'tsm_handler_out', prorettype => 'cstring', proargtypes => 'tsm_handler', prosrc => 'tsm_handler_out' }, +{ oid => '3425', descr => 'I/O', + proname => 'table_am_handler_in', proisstrict => 'f', + prorettype => 'table_am_handler', proargtypes => 'cstring', + prosrc => 'table_am_handler_in' }, +{ oid => '3426', descr => 'I/O', + proname => 'table_am_handler_out', prorettype => 'cstring', + proargtypes => 'table_am_handler', prosrc => 'table_am_handler_out' }, # tablesample method handlers { oid => '3313', descr => 'BERNOULLI tablesample method handler', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index d295eae1b9..f37856d2b4 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -580,6 +580,11 @@ typcategory => 'P', typinput => 'tsm_handler_in', typoutput => 'tsm_handler_out', typreceive => '-', typsend => '-', typalign => 'i' }, +{ oid => '3998', + typname => 'table_am_handler', typlen => '4', typbyval => 't', typtype => 'p', + typcategory => 'P', typinput => 'table_am_handler_in', + typoutput => 'table_am_handler_out', typreceive => '-', typsend => '-', + typalign => 'i' }, { oid => '3831', descr => 'pseudo-type representing a polymorphic base type that is a range', typname => 'anyrange', typlen => '-1', typbyval => 'f', typtype => 'p', diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 1031448c14..0f02baee14 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -35,8 +35,8 @@ typedef struct TriggerData HeapTuple tg_trigtuple; HeapTuple tg_newtuple; Trigger *tg_trigger; - Buffer tg_trigtuplebuf; - Buffer tg_newtuplebuf; + TupleTableSlot *tg_trigslot; + TupleTableSlot *tg_newslot; Tuplestorestate *tg_oldtable; Tuplestorestate *tg_newtable; } TriggerData; @@ -186,15 +186,15 @@ extern void ExecBSInsertTriggers(EState *estate, extern void ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo, TransitionCaptureState *transition_capture); -extern TupleTableSlot *ExecBRInsertTriggers(EState *estate, +extern bool ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TupleTableSlot *slot); extern void ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, - HeapTuple trigtuple, + TupleTableSlot *slot, List *recheckIndexes, TransitionCaptureState *transition_capture); -extern TupleTableSlot *ExecIRInsertTriggers(EState *estate, +extern bool ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo, TupleTableSlot *slot); extern void ExecBSDeleteTriggers(EState *estate, @@ -221,7 +221,7 @@ extern void ExecBSUpdateTriggers(EState *estate, extern void ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, TransitionCaptureState *transition_capture); -extern TupleTableSlot *ExecBRUpdateTriggers(EState *estate, +extern bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, ItemPointer tupleid, @@ -231,10 +231,10 @@ extern void ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, - HeapTuple newtuple, + TupleTableSlot *slot, List *recheckIndexes, TransitionCaptureState *transition_capture); -extern TupleTableSlot *ExecIRUpdateTriggers(EState *estate, +extern bool ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo, HeapTuple trigtuple, TupleTableSlot *slot); @@ -258,9 +258,9 @@ extern bool AfterTriggerPendingOnRel(Oid relid); * in utils/adt/ri_triggers.c */ extern bool RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel, - HeapTuple old_row, HeapTuple new_row); + TupleTableSlot *old_slot, TupleTableSlot *new_slot); extern bool RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, - HeapTuple old_row, HeapTuple new_row); + TupleTableSlot *old_slot, TupleTableSlot *new_slot); extern bool RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 2feec628c0..ded4baf004 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -183,19 +183,14 @@ extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo, extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo); extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok); extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist); +extern TupleTableSlot *EvalPlanQualSlot(EPQState *epqstate, + Relation relation, Index rti); extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate, - Relation relation, Index rti, int lockmode, - ItemPointer tid, TransactionId priorXmax); -extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation, - int lockmode, LockWaitPolicy wait_policy, ItemPointer tid, - TransactionId priorXmax); + Relation relation, Index rti, TupleTableSlot *slot); extern void EvalPlanQualInit(EPQState *epqstate, EState *estate, Plan *subplan, List *auxrowmarks, int epqParam); extern void EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks); -extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti, - HeapTuple tuple); -extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti); #define EvalPlanQualSetSlot(epqstate, slot) ((epqstate)->origslot = (slot)) extern void EvalPlanQualFetchRowMarks(EPQState *epqstate); @@ -486,6 +481,10 @@ extern void ReScanExprContext(ExprContext *econtext); extern ExprContext *MakePerTupleExprContext(EState *estate); +extern TupleTableSlot *ExecTriggerGetOldSlot(EState *estate, Relation rel); +extern TupleTableSlot *ExecTriggerGetNewSlot(EState *estate, Relation rel); +extern TupleTableSlot *ExecTriggerGetReturnSlot(EState *estate, Relation rel); + /* Get an EState's per-output-tuple exprcontext, making it if first use */ #define GetPerTupleExprContext(estate) \ ((estate)->es_per_tuple_exprcontext ? \ @@ -554,9 +553,8 @@ extern int ExecCleanTargetListLength(List *targetlist); */ extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); -extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool noDupErr, bool *specConflict, - List *arbiterIndexes); +extern List *ExecInsertIndexTuples(TupleTableSlot *slot, EState *estate, bool noDupErr, + bool *specConflict, List *arbiterIndexes); extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate, ItemPointer conflictTid, List *arbiterIndexes); extern void check_exclusion_constraint(Relation heap, Relation index, diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h index d2616968ac..d17d0de64f 100644 --- a/src/include/executor/spi.h +++ b/src/include/executor/spi.h @@ -25,7 +25,7 @@ typedef struct SPITupleTable uint64 alloced; /* # of alloced vals */ uint64 free; /* # of free vals */ TupleDesc tupdesc; /* tuple descriptor */ - HeapTuple *vals; /* tuples */ + HeapTuple *vals; /* tuples */ slist_node next; /* link for internal bookkeeping */ SubTransactionId subid; /* subxact in which tuptable was created */ } SPITupleTable; @@ -122,7 +122,7 @@ extern CachedPlan *SPI_plan_get_cached_plan(SPIPlanPtr plan); extern HeapTuple SPI_copytuple(HeapTuple tuple); extern HeapTupleHeader SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc); extern HeapTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, - int *attnum, Datum *Values, const char *Nulls); + int *attnum, Datum *Values, const char *Nulls); extern int SPI_fnumber(TupleDesc tupdesc, const char *fname); extern char *SPI_fname(TupleDesc tupdesc, int fnumber); extern char *SPI_getvalue(HeapTuple tuple, TupleDesc tupdesc, int fnumber); diff --git a/src/include/executor/tqueue.h b/src/include/executor/tqueue.h index 0fe3639252..8bcbe2fbff 100644 --- a/src/include/executor/tqueue.h +++ b/src/include/executor/tqueue.h @@ -27,6 +27,6 @@ extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle); extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle); extern void DestroyTupleQueueReader(TupleQueueReader *reader); extern HeapTuple TupleQueueReaderNext(TupleQueueReader *reader, - bool nowait, bool *done); + bool nowait, bool *done); #endif /* TQUEUE_H */ diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 5c390a9669..c87689b3dd 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -15,6 +15,7 @@ #define TUPTABLE_H #include "access/htup.h" +#include "access/sysattr.h" #include "access/tupdesc.h" #include "storage/buf.h" @@ -125,6 +126,10 @@ typedef struct TupleTableSlot #define FIELDNO_TUPLETABLESLOT_ISNULL 6 bool *tts_isnull; /* current per-attribute isnull flags */ MemoryContext tts_mcxt; /* slot itself is in this context */ + + ItemPointerData tts_tid; /* XXX describe */ + Oid tts_tableOid; /* XXX describe */ + } TupleTableSlot; /* routines for a TupleTableSlot implementation */ @@ -238,6 +243,7 @@ typedef struct VirtualTupleTableSlot char *data; /* data for materialized slots */ } VirtualTupleTableSlot; +#include typedef struct HeapTupleTableSlot { TupleTableSlot base; @@ -246,6 +252,7 @@ typedef struct HeapTupleTableSlot HeapTuple tuple; /* physical tuple */ #define FIELDNO_HEAPTUPLETABLESLOT_OFF 2 uint32 off; /* saved state for slot_deform_heap_tuple */ + HeapTupleData tupdata; } HeapTupleTableSlot; /* heap tuple residing in a buffer */ @@ -323,6 +330,9 @@ extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum, extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum); +// FIXME: remove +extern bool ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2); + #ifndef FRONTEND /* @@ -395,6 +405,12 @@ slot_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull) { AssertArg(attnum < 0); /* caller error */ + if (attnum == TableOidAttributeNumber) + { + *isnull = false; + return slot->tts_tableOid; + } + /* Fetch the system attribute from the underlying tuple. */ return slot->tts_ops->getsysattr(slot, attnum, isnull); } diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index c14eb546c6..508b0eece8 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -121,10 +121,11 @@ typedef void (*EndDirectModify_function) (ForeignScanState *node); typedef RowMarkType (*GetForeignRowMarkType_function) (RangeTblEntry *rte, LockClauseStrength strength); -typedef HeapTuple (*RefetchForeignRow_function) (EState *estate, - ExecRowMark *erm, - Datum rowid, - bool *updated); +typedef TupleTableSlot *(*RefetchForeignRow_function) (EState *estate, + ExecRowMark *erm, + Datum rowid, + TupleTableSlot *slot, + bool *updated); typedef void (*ExplainForeignScan_function) (ForeignScanState *node, struct ExplainState *es); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5ed0f40f69..359ed0a86c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -525,7 +525,7 @@ typedef struct EState /* Stuff used for firing triggers: */ List *es_trig_target_relations; /* trigger-only ResultRelInfos */ - TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */ + TupleTableSlot *es_trig_return_slot; /* for trigger output tuples */ TupleTableSlot *es_trig_oldtup_slot; /* for TriggerEnabled */ TupleTableSlot *es_trig_newtup_slot; /* for TriggerEnabled */ @@ -568,7 +568,8 @@ typedef struct EState * remember if the tuple has been returned already. Arrays are of size * es_range_table_size and are indexed by scan node scanrelid - 1. */ - HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */ + //TableTuple *es_epqTuple; /* array of EPQ substitute tuples */ + TupleTableSlot **es_epqTupleSlot; bool *es_epqTupleSet; /* true if EPQ tuple is provided */ bool *es_epqScanDone; /* true if EPQ tuple has been fetched */ @@ -1268,7 +1269,7 @@ typedef struct ScanState { PlanState ps; /* its first field is NodeTag */ Relation ss_currentRelation; - HeapScanDesc ss_currentScanDesc; + TableScanDesc ss_currentScanDesc; TupleTableSlot *ss_ScanTupleSlot; } ScanState; @@ -1298,6 +1299,9 @@ typedef struct SampleScanState bool use_pagemode; /* use page-at-a-time visibility checking? */ bool begun; /* false means need to call BeginSampleScan */ uint32 seed; /* random seed */ + int64 donetuples; /* number of tuples already returned */ + bool haveblock; /* has a block for sampling been determined */ + bool done; /* exhausted all tuples? */ } SampleScanState; /* @@ -1526,6 +1530,7 @@ typedef struct BitmapHeapScanState Buffer pvmbuffer; long exact_pages; long lossy_pages; + int return_empty_tuples; TBMIterator *prefetch_iterator; int prefetch_pages; int prefetch_target; @@ -2256,7 +2261,7 @@ typedef struct LockRowsState PlanState ps; /* its first field is NodeTag */ List *lr_arowMarks; /* List of ExecAuxRowMarks */ EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */ - HeapTuple *lr_curtuples; /* locked tuples (one entry per RT entry) */ + TupleTableSlot **lr_curtuples; /* locked tuples (one entry per RT entry) */ int lr_ntables; /* length of lr_curtuples[] array */ } LockRowsState; diff --git a/src/include/nodes/lockoptions.h b/src/include/nodes/lockoptions.h index 24afd6efd4..5f579781a1 100644 --- a/src/include/nodes/lockoptions.h +++ b/src/include/nodes/lockoptions.h @@ -43,4 +43,9 @@ typedef enum LockWaitPolicy LockWaitError } LockWaitPolicy; +/* Follow tuples whose update is in progress if lock modes don't conflict */ +#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0) +/* Follow update chain and lock lastest version of tuple */ +#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1) + #endif /* LOCKOPTIONS_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index cac6ff0eda..5391c41d9e 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -503,6 +503,7 @@ typedef enum NodeTag T_InlineCodeBlock, /* in nodes/parsenodes.h */ T_FdwRoutine, /* in foreign/fdwapi.h */ T_IndexAmRoutine, /* in access/amapi.h */ + T_TableAmRoutine, /* in access/tableam.h */ T_TsmRoutine, /* in access/tsmapi.h */ T_ForeignKeyCacheInfo, /* in utils/rel.h */ T_CallContext /* in nodes/parsenodes.h */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index e5bdc1cec5..bb20fdd488 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2032,6 +2032,7 @@ typedef struct CreateStmt List *options; /* options from WITH clause */ OnCommitAction oncommit; /* what do we do at COMMIT? */ char *tablespacename; /* table space to use, or NULL */ + char *accessMethod; /* table access method */ bool if_not_exists; /* just do nothing if it already exists? */ } CreateStmt; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index b886ed3534..0bdc0bbf50 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -111,6 +111,7 @@ typedef struct IntoClause RangeVar *rel; /* target relation name */ List *colNames; /* column names to assign, or NIL */ + char *accessMethod; /* table access method */ List *options; /* options from WITH clause */ OnCommitAction onCommit; /* what do we do at COMMIT? */ char *tableSpaceName; /* table space to use, or NULL */ diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 31532e9769..f6b829da2c 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator; typedef struct TBMSharedIterator TBMSharedIterator; /* Result structure for tbm_iterate */ -typedef struct +typedef struct TBMIterateResult { BlockNumber blockno; /* page number containing tuples */ int ntuples; /* -1 indicates lossy result */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 3cce3906a0..95915bdc92 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -20,7 +20,6 @@ #include "storage/relfilenode.h" #include "utils/relcache.h" #include "utils/snapmgr.h" -#include "utils/tqual.h" typedef void *Block; @@ -268,8 +267,8 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page) if (old_snapshot_threshold >= 0 && (snapshot) != NULL - && ((snapshot)->satisfies == HeapTupleSatisfiesMVCC - || (snapshot)->satisfies == HeapTupleSatisfiesToast) + && ((snapshot)->visibility_type == MVCC_VISIBILITY + || (snapshot)->visibility_type == TOAST_VISIBILITY) && !XLogRecPtrIsInvalid((snapshot)->lsn) && PageGetLSN(page) > (snapshot)->lsn) TestForOldSnapshot_impl(snapshot, relation); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 2217081dcc..9187cbbcf3 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -131,6 +131,12 @@ typedef struct RelationData /* use "struct" here to avoid needing to include htup.h: */ struct HeapTupleData *rd_indextuple; /* all of pg_index tuple */ + /* + * Underlying table access method support + */ + Oid rd_tableamhandler; /* OID of table AM handler function */ + const struct TableAmRoutine *rd_tableamroutine; /* table AM's API struct */ + /* * index access support info (used only for an index relation) * @@ -432,6 +438,12 @@ typedef struct ViewOptions */ #define RelationGetDescr(relation) ((relation)->rd_att) +/* + * RelationGetTableamRoutine + * Returns the table AM routine for a relation. + */ +#define RelationGettableamRoutine(relation) ((relation)->rd_tableamroutine) + /* * RelationGetRelationName * Returns the rel's name. diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index a99d6b6681..de5b096bf7 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -76,6 +76,8 @@ extern void RelationInitIndexAccessInfo(Relation relation); struct PublicationActions; extern struct PublicationActions *GetRelationPublicationActions(Relation relation); +extern void RelationInitTableAccessMethod(Relation relation); + /* * Routines to support ereport() reports of relation-related errors */ @@ -98,6 +100,7 @@ extern Relation RelationBuildLocalRelation(const char *relname, Oid relnamespace, TupleDesc tupDesc, Oid relid, + Oid accessmtd, Oid relfilenode, Oid reltablespace, bool shared_relation, diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index a8a5a8f4c0..77e25fb561 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -19,6 +19,16 @@ #include "lib/pairingheap.h" #include "storage/buf.h" +typedef enum tuple_visibility_type +{ + MVCC_VISIBILITY = 0, /* HeapTupleSatisfiesMVCC */ + SELF_VISIBILITY, /* HeapTupleSatisfiesSelf */ + ANY_VISIBILITY, /* HeapTupleSatisfiesAny */ + TOAST_VISIBILITY, /* HeapTupleSatisfiesToast */ + DIRTY_VISIBILITY, /* HeapTupleSatisfiesDirty */ + HISTORIC_MVCC_VISIBILITY, /* HeapTupleSatisfiesHistoricMVCC */ + NON_VACUUMABLE_VISIBILTY /* HeapTupleSatisfiesNonVacuumable */ +} tuple_visibility_type; typedef struct SnapshotData *Snapshot; @@ -52,7 +62,7 @@ typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup, */ typedef struct SnapshotData { - SnapshotSatisfiesFunc satisfies; /* tuple test function */ + tuple_visibility_type visibility_type; /* tuple visibility test type */ /* * The remaining fields are used only for MVCC snapshots, and are normally @@ -124,6 +134,7 @@ typedef enum HeapTupleInvisible, HeapTupleSelfUpdated, HeapTupleUpdated, + HeapTupleDeleted, HeapTupleBeingUpdated, HeapTupleWouldBlock /* can be returned by heap_tuple_lock */ } HTSU_Result; diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h index d3b6e99bb4..1fe9cc6402 100644 --- a/src/include/utils/tqual.h +++ b/src/include/utils/tqual.h @@ -18,7 +18,6 @@ #include "utils/snapshot.h" #include "access/xlogdefs.h" - /* Static variables representing various special snapshot semantics */ extern PGDLLIMPORT SnapshotData SnapshotSelfData; extern PGDLLIMPORT SnapshotData SnapshotAnyData; @@ -29,60 +28,8 @@ extern PGDLLIMPORT SnapshotData CatalogSnapshotData; /* This macro encodes the knowledge of which snapshots are MVCC-safe */ #define IsMVCCSnapshot(snapshot) \ - ((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \ - (snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC) - -/* - * HeapTupleSatisfiesVisibility - * True iff heap tuple satisfies a time qual. - * - * Notes: - * Assumes heap tuple is valid. - * Beware of multiple evaluations of snapshot argument. - * Hint bits in the HeapTuple's t_infomask may be updated as a side effect; - * if so, the indicated buffer is marked dirty. - */ -#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer) \ - ((*(snapshot)->satisfies) (tuple, snapshot, buffer)) - -/* Result codes for HeapTupleSatisfiesVacuum */ -typedef enum -{ - HEAPTUPLE_DEAD, /* tuple is dead and deletable */ - HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */ - HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */ - HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */ - HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */ -} HTSV_Result; - -/* These are the "satisfies" test routines for the various snapshot types */ -extern bool HeapTupleSatisfiesMVCC(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesSelf(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesAny(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesToast(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesDirty(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, - Snapshot snapshot, Buffer buffer); -extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, - Snapshot snapshot, Buffer buffer); - -/* Special "satisfies" routines with different APIs */ -extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup, - CommandId curcid, Buffer buffer); -extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, - TransactionId OldestXmin, Buffer buffer); -extern bool HeapTupleIsSurelyDead(HeapTuple htup, - TransactionId OldestXmin); -extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); - -extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, - uint16 infomask, TransactionId xid); -extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); + ((snapshot)->visibility_type == MVCC_VISIBILITY || \ + (snapshot)->visibility_type == HISTORIC_MVCC_VISIBILITY) /* * To avoid leaking too much knowledge about reorderbuffer implementation @@ -101,14 +48,14 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data, * local variable of type SnapshotData, and initialize it with this macro. */ #define InitDirtySnapshot(snapshotdata) \ - ((snapshotdata).satisfies = HeapTupleSatisfiesDirty) + ((snapshotdata).visibility_type = DIRTY_VISIBILITY) /* * Similarly, some initialization is required for a NonVacuumable snapshot. * The caller must supply the xmin horizon to use (e.g., RecentGlobalXmin). */ #define InitNonVacuumableSnapshot(snapshotdata, xmin_horizon) \ - ((snapshotdata).satisfies = HeapTupleSatisfiesNonVacuumable, \ + ((snapshotdata).visibility_type = NON_VACUUMABLE_VISIBILTY, \ (snapshotdata).xmin = (xmin_horizon)) /* @@ -116,7 +63,7 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data, * to set lsn and whenTaken correctly to support snapshot_too_old. */ #define InitToastSnapshot(snapshotdata, l, w) \ - ((snapshotdata).satisfies = HeapTupleSatisfiesToast, \ + ((snapshotdata).visibility_type = TOAST_VISIBILITY, \ (snapshotdata).lsn = (l), \ (snapshotdata).whenTaken = (w)) diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 32908b6625..334096e979 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -105,7 +105,7 @@ typedef struct TuplesortInstrumentation * * The "cluster" API stores/sorts full HeapTuples including all visibility * info. The sort keys are specified by reference to a btree index that is - * defined on the relation to be sorted. Note that putheaptuple/getheaptuple + * defined on the relation to be sorted. Note that putheaptupleslot/getheaptuple * go with this API, not the "begin_heap" one! * * The "index_btree" API stores/sorts IndexTuples (preserving all their diff --git a/src/test/isolation/expected/partition-key-update-1.out b/src/test/isolation/expected/partition-key-update-1.out index 37fe6a7b27..a632d7f7ba 100644 --- a/src/test/isolation/expected/partition-key-update-1.out +++ b/src/test/isolation/expected/partition-key-update-1.out @@ -15,7 +15,7 @@ step s1u: UPDATE foo SET a=2 WHERE a=1; step s2d: DELETE FROM foo WHERE a=1; step s1c: COMMIT; step s2d: <... completed> -error in steps s1c s2d: ERROR: tuple to be deleted was already moved to another partition due to concurrent update +error in steps s1c s2d: ERROR: tuple to be locked was already moved to another partition due to concurrent update step s2c: COMMIT; starting permutation: s1b s2b s2d s1u s2c s1c diff --git a/src/test/regress/expected/create_am.out b/src/test/regress/expected/create_am.out index 47dd885c4e..e15ba33a08 100644 --- a/src/test/regress/expected/create_am.out +++ b/src/test/regress/expected/create_am.out @@ -99,3 +99,82 @@ HINT: Use DROP ... CASCADE to drop the dependent objects too. -- Drop access method cascade DROP ACCESS METHOD gist2 CASCADE; NOTICE: drop cascades to index grect2ind2 +-- Create a heap2 table am handler with heapam handler +CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler; +SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2; + amname | amhandler | amtype +--------+----------------------+-------- + heap | heap_tableam_handler | t + heap2 | heap_tableam_handler | t +(2 rows) + +CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2; +INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series'); +SELECT count(*) FROM tbl_heap2; + count +------- + 10 +(1 row) + +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tbl_heap2'; + relname | relkind | amname +-----------+---------+-------- + tbl_heap2 | r | heap2 +(1 row) + +-- create table as using heap2 +CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2; +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tblas_heap2'; + relname | relkind | amname +-------------+---------+-------- + tblas_heap2 | r | heap2 +(1 row) + +-- +-- select into doesn't support new syntax, so it should be +-- default access method. +-- +SELECT INTO tblselectinto_heap from tbl_heap2; +SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method') +from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tblselectinto_heap'; + relname | relkind | ?column? +--------------------+---------+---------- + tblselectinto_heap | r | t +(1 row) + +DROP TABLE tblselectinto_heap; +-- create materialized view using heap2 +CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS + SELECT * FROM tbl_heap2; +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'mv_heap2'; + relname | relkind | amname +----------+---------+-------- + mv_heap2 | m | heap2 +(1 row) + +-- Try creating the unsupported relation kinds with using syntax +CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2; +ERROR: syntax error at or near "USING" +LINE 1: CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2... + ^ +CREATE SEQUENCE test_seq USING heap2; +ERROR: syntax error at or near "USING" +LINE 1: CREATE SEQUENCE test_seq USING heap2; + ^ +-- Drop table access method, but fails as objects depends on it +DROP ACCESS METHOD heap2; +ERROR: cannot drop access method heap2 because other objects depend on it +DETAIL: table tbl_heap2 depends on access method heap2 +table tblas_heap2 depends on access method heap2 +materialized view mv_heap2 depends on access method heap2 +HINT: Use DROP ... CASCADE to drop the dependent objects too. +-- Drop table access method with cascade +DROP ACCESS METHOD heap2 CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table tbl_heap2 +drop cascades to table tblas_heap2 +drop cascades to materialized view mv_heap2 diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 6072f6bdb1..4cd92c20dd 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1741,11 +1741,24 @@ WHERE p1.amhandler = 0; -----+-------- (0 rows) --- Check for amhandler functions with the wrong signature +-- Check for index amhandler functions with the wrong signature SELECT p1.oid, p1.amname, p2.oid, p2.proname FROM pg_am AS p1, pg_proc AS p2 -WHERE p2.oid = p1.amhandler AND - (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset +WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND + (p2.prorettype != 'index_am_handler'::regtype + OR p2.proretset + OR p2.pronargs != 1 + OR p2.proargtypes[0] != 'internal'::regtype); + oid | amname | oid | proname +-----+--------+-----+--------- +(0 rows) + +-- Check for table amhandler functions with the wrong signature +SELECT p1.oid, p1.amname, p2.oid, p2.proname +FROM pg_am AS p1, pg_proc AS p2 +WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND + (p2.prorettype != 'table_am_handler'::regtype + OR p2.proretset OR p2.pronargs != 1 OR p2.proargtypes[0] != 'internal'::regtype); oid | amname | oid | proname diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index b1419d4bc2..0e38d571c0 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -502,11 +502,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR -----+--------- (0 rows) --- Indexes should have an access method, others not. +-- All tables and indexes should have an access method. SELECT p1.oid, p1.relname FROM pg_class as p1 -WHERE (p1.relkind = 'i' AND p1.relam = 0) OR - (p1.relkind != 'i' AND p1.relam != 0); +WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and + p1.relam = 0; oid | relname -----+--------- (0 rows) diff --git a/src/test/regress/sql/create_am.sql b/src/test/regress/sql/create_am.sql index 3e0ac104f3..2c7b4813e8 100644 --- a/src/test/regress/sql/create_am.sql +++ b/src/test/regress/sql/create_am.sql @@ -66,3 +66,50 @@ DROP ACCESS METHOD gist2; -- Drop access method cascade DROP ACCESS METHOD gist2 CASCADE; + +-- Create a heap2 table am handler with heapam handler +CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler; + +SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2; + +CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2; +INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series'); +SELECT count(*) FROM tbl_heap2; + +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tbl_heap2'; + +-- create table as using heap2 +CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2; +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tblas_heap2'; + +-- +-- select into doesn't support new syntax, so it should be +-- default access method. +-- +SELECT INTO tblselectinto_heap from tbl_heap2; +SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method') +from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'tblselectinto_heap'; + +DROP TABLE tblselectinto_heap; + +-- create materialized view using heap2 +CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS + SELECT * FROM tbl_heap2; + +SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a + where a.oid = r.relam AND r.relname = 'mv_heap2'; + +-- Try creating the unsupported relation kinds with using syntax +CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2; + +CREATE SEQUENCE test_seq USING heap2; + + +-- Drop table access method, but fails as objects depends on it +DROP ACCESS METHOD heap2; + +-- Drop table access method with cascade +DROP ACCESS METHOD heap2 CASCADE; diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 91c68f4204..2664252d94 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -1169,15 +1169,25 @@ SELECT p1.oid, p1.amname FROM pg_am AS p1 WHERE p1.amhandler = 0; --- Check for amhandler functions with the wrong signature +-- Check for index amhandler functions with the wrong signature SELECT p1.oid, p1.amname, p2.oid, p2.proname FROM pg_am AS p1, pg_proc AS p2 -WHERE p2.oid = p1.amhandler AND - (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset +WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND + (p2.prorettype != 'index_am_handler'::regtype + OR p2.proretset OR p2.pronargs != 1 OR p2.proargtypes[0] != 'internal'::regtype); +-- Check for table amhandler functions with the wrong signature + +SELECT p1.oid, p1.amname, p2.oid, p2.proname +FROM pg_am AS p1, pg_proc AS p2 +WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND + (p2.prorettype != 'table_am_handler'::regtype + OR p2.proretset + OR p2.pronargs != 1 + OR p2.proargtypes[0] != 'internal'::regtype); -- **************** pg_amop **************** diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index f9aeea3214..2efa229d40 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -367,12 +367,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR relpersistence NOT IN ('p', 'u', 't') OR relreplident NOT IN ('d', 'n', 'f', 'i'); --- Indexes should have an access method, others not. - +-- All tables and indexes should have an access method. SELECT p1.oid, p1.relname FROM pg_class as p1 -WHERE (p1.relkind = 'i' AND p1.relam = 0) OR - (p1.relkind != 'i' AND p1.relam != 0); +WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and + p1.relam = 0; -- **************** pg_attribute **************** diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9fe950b29d..e4c9f81fe2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1608,8 +1608,8 @@ ParallelHashGrowth ParallelHashJoinBatch ParallelHashJoinBatchAccessor ParallelHashJoinState -ParallelHeapScanDesc -ParallelHeapScanDescData +ParallelTableScanDesc +ParallelTableScanDescData ParallelIndexScanDesc ParallelSlot ParallelState