Pluggable Storage.
authorAndres Freund <andres@anarazel.de>
Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
committerAndres Freund <andres@anarazel.de>
Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
Author: Haribabu Komi, Andres Freund and others.
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:

145 files changed:
contrib/amcheck/verify_nbtree.c
contrib/bloom/blinsert.c
contrib/pg_visibility/pg_visibility.c
contrib/pgrowlocks/pgrowlocks.c
contrib/pgstattuple/pgstatapprox.c
contrib/pgstattuple/pgstattuple.c
contrib/postgres_fdw/postgres_fdw.c
contrib/tsm_system_rows/tsm_system_rows.c
contrib/tsm_system_time/tsm_system_time.c
doc/src/sgml/fdwhandler.sgml
src/backend/access/Makefile
src/backend/access/brin/brin.c
src/backend/access/gin/gininsert.c
src/backend/access/gist/gistbuild.c
src/backend/access/gist/gistget.c
src/backend/access/hash/hash.c
src/backend/access/hash/hashsearch.c
src/backend/access/heap/Makefile
src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_handler.c [new file with mode: 0644]
src/backend/access/heap/heapam_visibility.c [moved from src/backend/utils/time/tqual.c with 96% similarity]
src/backend/access/heap/rewriteheap.c
src/backend/access/heap/tuptoaster.c
src/backend/access/index/genam.c
src/backend/access/index/indexam.c
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtree.c
src/backend/access/nbtree/nbtsearch.c
src/backend/access/nbtree/nbtsort.c
src/backend/access/spgist/spginsert.c
src/backend/access/spgist/spgscan.c
src/backend/access/table/Makefile [new file with mode: 0644]
src/backend/access/table/tableam.c [new file with mode: 0644]
src/backend/access/table/tableamapi.c [new file with mode: 0644]
src/backend/access/tablesample/system.c
src/backend/bootstrap/bootparse.y
src/backend/bootstrap/bootstrap.c
src/backend/catalog/aclchk.c
src/backend/catalog/genbki.pl
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/catalog/partition.c
src/backend/catalog/pg_conversion.c
src/backend/catalog/pg_db_role_setting.c
src/backend/catalog/pg_publication.c
src/backend/catalog/pg_subscription.c
src/backend/catalog/toasting.c
src/backend/commands/amcmds.c
src/backend/commands/analyze.c
src/backend/commands/cluster.c
src/backend/commands/constraint.c
src/backend/commands/copy.c
src/backend/commands/createas.c
src/backend/commands/dbcommands.c
src/backend/commands/indexcmds.c
src/backend/commands/matview.c
src/backend/commands/tablecmds.c
src/backend/commands/tablespace.c
src/backend/commands/trigger.c
src/backend/commands/typecmds.c
src/backend/commands/vacuum.c
src/backend/executor/execAmi.c
src/backend/executor/execCurrent.c
src/backend/executor/execExprInterp.c
src/backend/executor/execIndexing.c
src/backend/executor/execMain.c
src/backend/executor/execPartition.c
src/backend/executor/execReplication.c
src/backend/executor/execScan.c
src/backend/executor/execTuples.c
src/backend/executor/execUtils.c
src/backend/executor/nodeBitmapHeapscan.c
src/backend/executor/nodeForeignscan.c
src/backend/executor/nodeGather.c
src/backend/executor/nodeGatherMerge.c
src/backend/executor/nodeIndexonlyscan.c
src/backend/executor/nodeIndexscan.c
src/backend/executor/nodeLockRows.c
src/backend/executor/nodeModifyTable.c
src/backend/executor/nodeSamplescan.c
src/backend/executor/nodeSeqscan.c
src/backend/executor/nodeTidscan.c
src/backend/executor/spi.c
src/backend/executor/tqueue.c
src/backend/nodes/copyfuncs.c
src/backend/optimizer/util/plancat.c
src/backend/parser/gram.y
src/backend/partitioning/partbounds.c
src/backend/postmaster/autovacuum.c
src/backend/postmaster/pgstat.c
src/backend/replication/logical/launcher.c
src/backend/replication/logical/snapbuild.c
src/backend/replication/logical/worker.c
src/backend/rewrite/rewriteDefine.c
src/backend/storage/lmgr/predicate.c
src/backend/utils/adt/pseudotypes.c
src/backend/utils/adt/ri_triggers.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/adt/tid.c
src/backend/utils/cache/relcache.c
src/backend/utils/init/postinit.c
src/backend/utils/misc/guc.c
src/backend/utils/sort/tuplesort.c
src/backend/utils/time/Makefile
src/backend/utils/time/snapmgr.c
src/include/access/genam.h
src/include/access/heapam.h
src/include/access/relscan.h
src/include/access/rewriteheap.h
src/include/access/tableam.h [new file with mode: 0644]
src/include/access/tsmapi.h
src/include/catalog/heap.h
src/include/catalog/index.h
src/include/catalog/pg_am.dat
src/include/catalog/pg_am.h
src/include/catalog/pg_class.dat
src/include/catalog/pg_class.h
src/include/catalog/pg_proc.dat
src/include/catalog/pg_type.dat
src/include/commands/trigger.h
src/include/executor/executor.h
src/include/executor/spi.h
src/include/executor/tqueue.h
src/include/executor/tuptable.h
src/include/foreign/fdwapi.h
src/include/nodes/execnodes.h
src/include/nodes/lockoptions.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/nodes/primnodes.h
src/include/nodes/tidbitmap.h
src/include/storage/bufmgr.h
src/include/utils/rel.h
src/include/utils/relcache.h
src/include/utils/snapshot.h
src/include/utils/tqual.h
src/include/utils/tuplesort.h
src/test/isolation/expected/partition-key-update-1.out
src/test/regress/expected/create_am.out
src/test/regress/expected/opr_sanity.out
src/test/regress/expected/type_sanity.out
src/test/regress/sql/create_am.sql
src/test/regress/sql/opr_sanity.sql
src/test/regress/sql/type_sanity.sql
src/tools/pgindent/typedefs.list

index 14ed31753fb54646f3dd3c4b18cb89f6cb0dbdfc..aa2ac1b852de8d0a7ab2ce376cc06e096e60731f 100644 (file)
@@ -25,6 +25,7 @@
 
 #include "access/htup_details.h"
 #include "access/nbtree.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/index.h"
@@ -35,6 +36,7 @@
 #include "storage/lmgr.h"
 #include "utils/memutils.h"
 #include "utils/snapmgr.h"
+#include "utils/tqual.h"
 
 
 PG_MODULE_MAGIC;
@@ -478,7 +480,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
    if (state->heapallindexed)
    {
        IndexInfo  *indexinfo = BuildIndexInfo(state->rel);
-       HeapScanDesc scan;
+       TableScanDesc scan;
 
        /* Report on extra downlink checks performed in readonly case */
        if (state->readonly)
@@ -497,7 +499,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
         *
         * Note that IndexBuildHeapScan() calls heap_endscan() for us.
         */
-       scan = heap_beginscan_strat(state->heaprel, /* relation */
+       scan = table_beginscan_strat(state->heaprel, /* relation */
                                    snapshot,   /* snapshot */
                                    0,  /* number of keys */
                                    NULL,   /* scan key */
@@ -531,8 +533,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
             RelationGetRelationName(state->rel),
             RelationGetRelationName(state->heaprel));
 
-       IndexBuildHeapScan(state->heaprel, state->rel, indexinfo, true,
-                          bt_tuple_present_callback, (void *) state, scan);
+       table_index_build_scan(state->heaprel, state->rel, indexinfo, true,
+                              bt_tuple_present_callback, (void *) state, scan);
 
        ereport(DEBUG1,
                (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
index 9f223d3b2a7bd4c4247016b11557850b8c25b9df..413828818b5941062c2e5f5112803b6f4fab2ca2 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "access/genam.h"
 #include "access/generic_xlog.h"
+#include "access/tableam.h"
 #include "catalog/index.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
@@ -69,7 +70,7 @@ initCachedPage(BloomBuildState *buildstate)
 }
 
 /*
- * Per-tuple callback from IndexBuildHeapScan.
+ * Per-tuple callback from table_index_build_scan.
  */
 static void
 bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
@@ -141,7 +142,7 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
    initCachedPage(&buildstate);
 
    /* Do the heap scan */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
                                   bloomBuildCallback, (void *) &buildstate,
                                   NULL);
 
index c1aae9d655121a10892c69f29689087173e7bc04..ce9ca704f6a26ae0e579de994c8f0deaf823b6d9 100644 (file)
@@ -13,6 +13,7 @@
 #include "access/htup_details.h"
 #include "access/visibilitymap.h"
 #include "catalog/pg_type.h"
+#include "catalog/pg_am_d.h"
 #include "catalog/storage_xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
@@ -565,6 +566,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
    /* Only some relkinds have a visibility map */
    check_relation_relkind(rel);
 
+   if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+
+
    nblocks = RelationGetNumberOfBlocks(rel);
 
    /*
index 94e051d642b435b88baee4b0e931715b5c0f2852..852adba35949dcf79f4113cae02447ac9f6fc73e 100644 (file)
 
 #include "access/multixact.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "catalog/namespace.h"
+#include "catalog/pg_am_d.h"
 #include "catalog/pg_authid.h"
 #include "funcapi.h"
 #include "miscadmin.h"
@@ -55,7 +57,7 @@ PG_FUNCTION_INFO_V1(pgrowlocks);
 typedef struct
 {
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    int         ncolumns;
 } MyData;
 
@@ -70,7 +72,8 @@ Datum
 pgrowlocks(PG_FUNCTION_ARGS)
 {
    FuncCallContext *funcctx;
-   HeapScanDesc scan;
+   TableScanDesc scan;
+   HeapScanDesc hscan;
    HeapTuple   tuple;
    TupleDesc   tupdesc;
    AttInMetadata *attinmeta;
@@ -99,6 +102,10 @@ pgrowlocks(PG_FUNCTION_ARGS)
        relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
        rel = relation_openrv(relrv, AccessShareLock);
 
+       if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+           ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                           errmsg("only heap AM is supported")));
+
        if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -124,7 +131,8 @@ pgrowlocks(PG_FUNCTION_ARGS)
            aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
                           RelationGetRelationName(rel));
 
-       scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+       scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+       hscan = (HeapScanDesc) scan;
        mydata = palloc(sizeof(*mydata));
        mydata->rel = rel;
        mydata->scan = scan;
@@ -138,20 +146,20 @@ pgrowlocks(PG_FUNCTION_ARGS)
    attinmeta = funcctx->attinmeta;
    mydata = (MyData *) funcctx->user_fctx;
    scan = mydata->scan;
+   hscan = (HeapScanDesc) scan;
 
-   /* scan the relation */
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   /* scan the relation (will error if not heap) */
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        HTSU_Result htsu;
        TransactionId xmax;
        uint16      infomask;
 
        /* must hold a buffer lock to call HeapTupleSatisfiesUpdate */
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
 
-       htsu = HeapTupleSatisfiesUpdate(tuple,
-                                       GetCurrentCommandId(false),
-                                       scan->rs_cbuf);
+       htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false),
+                                       hscan->rs_cbuf);
        xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
        infomask = tuple->t_data->t_infomask;
 
@@ -284,7 +292,7 @@ pgrowlocks(PG_FUNCTION_ARGS)
                         BackendXidGetPid(xmax));
            }
 
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
            /* build a tuple */
            tuple = BuildTupleFromCStrings(attinmeta, values);
@@ -301,11 +309,11 @@ pgrowlocks(PG_FUNCTION_ARGS)
        }
        else
        {
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
        }
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(mydata->rel, AccessShareLock);
 
    SRF_RETURN_DONE(funcctx);
index ef33cacec6af983d92547021dabaeebd566faa60..c59fd10dc17608d3c8c6cf10edfef0ab8222d70f 100644 (file)
  */
 #include "postgres.h"
 
-#include "access/visibilitymap.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/multixact.h"
 #include "access/htup_details.h"
 #include "catalog/namespace.h"
+#include "catalog/pg_am_d.h"
+#include "commands/vacuum.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
@@ -26,7 +28,7 @@
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/tqual.h"
-#include "commands/vacuum.h"
+
 
 PG_FUNCTION_INFO_V1(pgstattuple_approx);
 PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
@@ -287,6 +289,10 @@ pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
                 errmsg("\"%s\" is not a table or materialized view",
                        RelationGetRelationName(rel))));
 
+   if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+
    statapprox_heap(rel, &stat);
 
    relation_close(rel, AccessShareLock);
index 6d67bd8271c63c732360910c865956809593f18e..520438d779e9d073d6dab7a26610782960cb48bf 100644 (file)
@@ -28,6 +28,7 @@
 #include "access/hash.h"
 #include "access/nbtree.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_am.h"
 #include "funcapi.h"
@@ -317,7 +318,8 @@ pgstat_relation(Relation rel, FunctionCallInfo fcinfo)
 static Datum
 pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
 {
-   HeapScanDesc scan;
+   TableScanDesc scan;
+   HeapScanDesc hscan;
    HeapTuple   tuple;
    BlockNumber nblocks;
    BlockNumber block = 0;      /* next block to count free space in */
@@ -327,20 +329,22 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
    SnapshotData SnapshotDirty;
 
    /* Disable syncscan because we assume we scan from block zero upwards */
-   scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false);
+   scan = table_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false);
+   hscan = (HeapScanDesc) scan;
+
    InitDirtySnapshot(SnapshotDirty);
 
    nblocks = scan->rs_nblocks; /* # blocks to be scanned */
 
-   /* scan the relation */
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   /* scan the relation (will error if not heap) */
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        CHECK_FOR_INTERRUPTS();
 
        /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
 
-       if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf))
+       if (HeapTupleSatisfies(tuple, &SnapshotDirty, hscan->rs_cbuf))
        {
            stat.tuple_len += tuple->t_len;
            stat.tuple_count++;
@@ -351,7 +355,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
            stat.dead_tuple_count++;
        }
 
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
        /*
         * To avoid physically reading the table twice, try to do the
@@ -366,7 +370,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
            CHECK_FOR_INTERRUPTS();
 
            buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
-                                       RBM_NORMAL, scan->rs_strategy);
+                                       RBM_NORMAL, hscan->rs_strategy);
            LockBuffer(buffer, BUFFER_LOCK_SHARE);
            stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
            UnlockReleaseBuffer(buffer);
@@ -379,14 +383,14 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
        CHECK_FOR_INTERRUPTS();
 
        buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
-                                   RBM_NORMAL, scan->rs_strategy);
+                                   RBM_NORMAL, hscan->rs_strategy);
        LockBuffer(buffer, BUFFER_LOCK_SHARE);
        stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
        UnlockReleaseBuffer(buffer);
        block++;
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    relation_close(rel, AccessShareLock);
 
    stat.table_len = (uint64) nblocks * BLCKSZ;
index 674eb982d06bf70ca91532d0eedd4cfc2d7db971..cc5b928950a5a1a665e5f150b1e0e606a5ebf97d 100644 (file)
@@ -3927,7 +3927,7 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate,
    /*
     * Use the trigger tuple slot as a place to store the result tuple.
     */
-   resultSlot = estate->es_trig_tuple_slot;
+   resultSlot = ExecTriggerGetReturnSlot(estate, dmstate->resultRel);
    if (resultSlot->tts_tupleDescriptor != resultTupType)
        ExecSetSlotDescriptor(resultSlot, resultTupType);
 
index 83f841f0c2ea056ef41a4a3900dfdadd40a400a8..9f26c76ba6bf1f05b062d9659d7517b5b2ebc023 100644 (file)
@@ -46,7 +46,6 @@ typedef struct
 {
    uint32      seed;           /* random seed */
    int64       ntuples;        /* number of tuples to return */
-   int64       donetuples;     /* number of tuples already returned */
    OffsetNumber lt;            /* last tuple returned from current block */
    BlockNumber doneblocks;     /* number of already-scanned blocks */
    BlockNumber lb;             /* last block visited */
@@ -67,11 +66,10 @@ static void system_rows_beginsamplescan(SampleScanState *node,
                            Datum *params,
                            int nparams,
                            uint32 seed);
-static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
                            BlockNumber blockno,
                            OffsetNumber maxoffset);
-static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
 static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
 
 
@@ -187,7 +185,6 @@ system_rows_beginsamplescan(SampleScanState *node,
 
    sampler->seed = seed;
    sampler->ntuples = ntuples;
-   sampler->donetuples = 0;
    sampler->lt = InvalidOffsetNumber;
    sampler->doneblocks = 0;
    /* lb will be initialized during first NextSampleBlock call */
@@ -206,10 +203,9 @@ system_rows_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_rows_nextsampleblock(SampleScanState *node)
+system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
 
    /* First call within scan? */
    if (sampler->doneblocks == 0)
@@ -221,14 +217,14 @@ system_rows_nextsampleblock(SampleScanState *node)
            SamplerRandomState randstate;
 
            /* If relation is empty, there's nothing to scan */
-           if (scan->rs_nblocks == 0)
+           if (nblocks == 0)
                return InvalidBlockNumber;
 
            /* We only need an RNG during this setup step */
            sampler_random_init_state(sampler->seed, randstate);
 
            /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = scan->rs_nblocks;
+           sampler->nblocks = nblocks;
 
            /* Choose random starting block within the relation */
            /* (Actually this is the predecessor of the first block visited) */
@@ -245,7 +241,7 @@ system_rows_nextsampleblock(SampleScanState *node)
 
    /* If we've read all blocks or returned all needed tuples, we're done */
    if (++sampler->doneblocks > sampler->nblocks ||
-       sampler->donetuples >= sampler->ntuples)
+       node->donetuples >= sampler->ntuples)
        return InvalidBlockNumber;
 
    /*
@@ -258,7 +254,7 @@ system_rows_nextsampleblock(SampleScanState *node)
    {
        /* Advance lb, using uint64 arithmetic to forestall overflow */
        sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= scan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
 
    return sampler->lb;
 }
@@ -278,76 +274,27 @@ system_rows_nextsampletuple(SampleScanState *node,
                            OffsetNumber maxoffset)
 {
    SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
    OffsetNumber tupoffset = sampler->lt;
 
    /* Quit if we've returned all needed tuples */
-   if (sampler->donetuples >= sampler->ntuples)
+   if (node->donetuples >= sampler->ntuples)
        return InvalidOffsetNumber;
 
-   /*
-    * Because we should only count visible tuples as being returned, we need
-    * to search for a visible tuple rather than just let the core code do it.
-    */
-
-   /* We rely on the data accumulated in pagemode access */
-   Assert(scan->rs_pageatatime);
-   for (;;)
-   {
-       /* Advance to next possible offset on page */
-       if (tupoffset == InvalidOffsetNumber)
-           tupoffset = FirstOffsetNumber;
-       else
-           tupoffset++;
-
-       /* Done? */
-       if (tupoffset > maxoffset)
-       {
-           tupoffset = InvalidOffsetNumber;
-           break;
-       }
+   /* Advance to next possible offset on page */
+   if (tupoffset == InvalidOffsetNumber)
+       tupoffset = FirstOffsetNumber;
+   else
+       tupoffset++;
 
-       /* Found a candidate? */
-       if (SampleOffsetVisible(tupoffset, scan))
-       {
-           sampler->donetuples++;
-           break;
-       }
-   }
+   /* Done? */
+   if (tupoffset > maxoffset)
+       tupoffset = InvalidOffsetNumber;
 
    sampler->lt = tupoffset;
 
    return tupoffset;
 }
 
-/*
- * Check if tuple offset is visible
- *
- * In pageatatime mode, heapgetpage() already did visibility checks,
- * so just look at the info it left in rs_vistuples[].
- */
-static bool
-SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   int         start = 0,
-               end = scan->rs_ntuples - 1;
-
-   while (start <= end)
-   {
-       int         mid = (start + end) / 2;
-       OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-       if (tupoffset == curoffset)
-           return true;
-       else if (tupoffset < curoffset)
-           end = mid - 1;
-       else
-           start = mid + 1;
-   }
-
-   return false;
-}
-
 /*
  * Compute greatest common divisor of two uint32's.
  */
index 249d6f4d463181e5fd05bf853b09e5a3543f4ba3..ee6f4b95a2450527f7b220a0b5936604a2278af7 100644 (file)
@@ -66,7 +66,7 @@ static void system_time_beginsamplescan(SampleScanState *node,
                            Datum *params,
                            int nparams,
                            uint32 seed);
-static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
                            BlockNumber blockno,
                            OffsetNumber maxoffset);
@@ -213,10 +213,9 @@ system_time_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_time_nextsampleblock(SampleScanState *node)
+system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
    instr_time  cur_time;
 
    /* First call within scan? */
@@ -229,14 +228,14 @@ system_time_nextsampleblock(SampleScanState *node)
            SamplerRandomState randstate;
 
            /* If relation is empty, there's nothing to scan */
-           if (scan->rs_nblocks == 0)
+           if (nblocks == 0)
                return InvalidBlockNumber;
 
            /* We only need an RNG during this setup step */
            sampler_random_init_state(sampler->seed, randstate);
 
            /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = scan->rs_nblocks;
+           sampler->nblocks = nblocks;
 
            /* Choose random starting block within the relation */
            /* (Actually this is the predecessor of the first block visited) */
@@ -272,7 +271,7 @@ system_time_nextsampleblock(SampleScanState *node)
    {
        /* Advance lb, using uint64 arithmetic to forestall overflow */
        sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= scan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
 
    return sampler->lb;
 }
index 4ce88dd77c19edefca0121e66d8ff4e584b8c07c..12769f3288d5617ed453e0c3a359af2ea6ea3c87 100644 (file)
@@ -988,23 +988,25 @@ GetForeignRowMarkType(RangeTblEntry *rte,
 
     <para>
 <programlisting>
-HeapTuple
+TupleTableSlot *
 RefetchForeignRow(EState *estate,
                   ExecRowMark *erm,
                   Datum rowid,
+                  TupleTableSlot *slot,
                   bool *updated);
 </programlisting>
 
-     Re-fetch one tuple from the foreign table, after locking it if required.
+     Re-fetch one tuple slot from the foreign table, after locking it if required.
      <literal>estate</literal> is global execution state for the query.
      <literal>erm</literal> is the <structname>ExecRowMark</structname> struct describing
      the target foreign table and the row lock type (if any) to acquire.
      <literal>rowid</literal> identifies the tuple to be fetched.
-     <literal>updated</literal> is an output parameter.
+     <literal>slot</literal> contains nothing useful upon call, but can be used to
+     hold the returned tuple. <literal>updated</literal> is an output parameter.
     </para>
 
     <para>
-     This function should return a palloc'ed copy of the fetched tuple,
+     This function should return a slot containing the fetched tuple
      or <literal>NULL</literal> if the row lock couldn't be obtained.  The row lock
      type to acquire is defined by <literal>erm-&gt;markType</literal>, which is the
      value previously returned by <function>GetForeignRowMarkType</function>.
index bd93a6a8d1e606fcb6ce30deeddd5068c97b3f85..0880e0a8bbb63901164aef4a2de577db59c98b25 100644 (file)
@@ -9,6 +9,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 SUBDIRS        = brin common gin gist hash heap index nbtree rmgrdesc spgist \
-             tablesample transam
+             table tablesample transam
 
 include $(top_srcdir)/src/backend/common.mk
index e95fbbcea74e3c233318d78e9c0d966cd23e58a5..b70737a7a65e2d4db8116ae8a9cc0073affc8245 100644 (file)
@@ -21,6 +21,7 @@
 #include "access/brin_xlog.h"
 #include "access/reloptions.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/xloginsert.h"
 #include "catalog/index.h"
 #include "catalog/pg_am.h"
@@ -585,7 +586,7 @@ brinendscan(IndexScanDesc scan)
 }
 
 /*
- * Per-heap-tuple callback for IndexBuildHeapScan.
+ * Per-heap-tuple callback for table_index_build_scan.
  *
  * Note we don't worry about the page range at the end of the table here; it is
  * present in the build state struct after we're called the last time, but not
@@ -716,8 +717,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     * Now scan the relation.  No syncscan allowed here because we want the
     * heap blocks in physical order.
     */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
-                                  brinbuildCallback, (void *) state, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, false,
+                                      brinbuildCallback, (void *) state, NULL);
 
    /* process the final batch */
    form_and_insert_tuple(state);
@@ -1228,13 +1229,16 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
     * short of brinbuildCallback creating the new index entry.
     *
     * Note that it is critical we use the "any visible" mode of
-    * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted
-    * by transactions that are still in progress, among other corner cases.
+    * table_index_build_range_scan here: otherwise, we would miss tuples
+    * inserted by transactions that are still in progress, among other corner
+    * cases.
+    *
+    * ZBORKED?
     */
    state->bs_currRangeStart = heapBlk;
-   IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
-                           heapBlk, scanNumBlks,
-                           brinbuildCallback, (void *) state, NULL);
+   table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true,
+                                heapBlk, scanNumBlks,
+                                brinbuildCallback, (void *) state, NULL);
 
    /*
     * Now we update the values obtained by the scan with the placeholder
index 5281eb682382ff170fadb0f91abde8a5842974b4..621bd93ccd5984ef8ee62bea39fb662c4a2f80dc 100644 (file)
@@ -17,6 +17,7 @@
 #include "access/gin_private.h"
 #include "access/ginxlog.h"
 #include "access/xloginsert.h"
+#include "access/tableam.h"
 #include "catalog/index.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
@@ -394,8 +395,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
     * prefers to receive tuples in TID order.
     */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
-                                  ginBuildCallback, (void *) &buildstate, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, false,
+                                      ginBuildCallback, (void *) &buildstate, NULL);
 
    /* dump remaining entries to the index */
    oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
index 434f15f0148e0ff90dd131d36783edb405982960..c39ddc910cc694cfaba7d480910fe4cd305299bd 100644 (file)
@@ -19,6 +19,7 @@
 #include "access/genam.h"
 #include "access/gist_private.h"
 #include "access/gistxlog.h"
+#include "access/tableam.h"
 #include "access/xloginsert.h"
 #include "catalog/index.h"
 #include "miscadmin.h"
@@ -202,8 +203,8 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
    /*
     * Do the heap scan.
     */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                  gistBuildCallback, (void *) &buildstate, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                      gistBuildCallback, (void *) &buildstate, NULL);
 
    /*
     * If buffering was used, flush out all the tuples that are still in the
@@ -452,7 +453,7 @@ calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
 }
 
 /*
- * Per-tuple callback from IndexBuildHeapScan.
+ * Per-tuple callback from table_index_build_scan.
  */
 static void
 gistBuildCallback(Relation index,
index e4a3786be01cfc5d9ecea9441a16435edbc6b339..7c75461dd12096c13c05b9f3a834e17e2af6581e 100644 (file)
@@ -561,7 +561,7 @@ getNextNearest(IndexScanDesc scan)
        if (GISTSearchItemIsHeap(*item))
        {
            /* found a heap item at currently minimal distance */
-           scan->xs_ctup.t_self = item->data.heap.heapPtr;
+           scan->xs_heaptid = item->data.heap.heapPtr;
            scan->xs_recheck = item->data.heap.recheck;
 
            index_store_float8_orderby_distances(scan, so->orderByTypes,
@@ -650,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
                            so->pageData[so->curPageData - 1].offnum;
                }
                /* continuing to return tuples from a leaf page */
-               scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
+               scan->xs_heaptid = so->pageData[so->curPageData].heapPtr;
                scan->xs_recheck = so->pageData[so->curPageData].recheck;
 
                /* in an index-only scan, also return the reconstructed tuple */
index 0002df30c0d67fcb199fe3e574be8b725ad1c0f7..18b26d902663ab9213c817ca1725559a67b7895c 100644 (file)
@@ -21,6 +21,7 @@
 #include "access/hash.h"
 #include "access/hash_xlog.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "catalog/index.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
@@ -159,7 +160,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
    buildstate.heapRel = heap;
 
    /* do the heap scan */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
                                   hashbuildCallback, (void *) &buildstate, NULL);
 
    if (buildstate.spool)
@@ -190,7 +191,7 @@ hashbuildempty(Relation index)
 }
 
 /*
- * Per-tuple callback from IndexBuildHeapScan
+ * Per-tuple callback from table_index_build_scan
  */
 static void
 hashbuildCallback(Relation index,
index 650041db0a5ca3a0dabb9429326916d07b0ef2c3..edda7991ad0cc920a751eae21ef1e48cffae3997 100644 (file)
@@ -119,7 +119,7 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
 
    return true;
 }
@@ -432,7 +432,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
 
    /* if we're here, _hash_readpage found a valid tuples */
    return true;
index 7e7324a9166f330f2d87c44f27d6890f86ad2cdb..aee7bfd8346bd5cac71361896f090f47ff4e1a3c 100644 (file)
@@ -12,7 +12,7 @@ subdir = src/backend/access/heap
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o \
-   vacuumlazy.o visibilitymap.o
+OBJS = heapam.o heapam_handler.o heapam_visibility.o hio.o pruneheap.o \
+   rewriteheap.o syncscan.o tuptoaster.o vacuumlazy.o visibilitymap.o
 
 include $(top_srcdir)/src/backend/common.mk
index 9650145642209bfcc8493dbabc385d2545860be5..f769d828ff75746f6451b5fdea32958bb3e6853d 100644 (file)
@@ -45,6 +45,7 @@
 #include "access/multixact.h"
 #include "access/parallel.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "nodes/execnodes.h"
 #include "executor/executor.h"
 
-/* GUC variable */
-bool       synchronize_seqscans = true;
-
-
-static HeapScanDesc heap_beginscan_internal(Relation relation,
-                       Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       ParallelHeapScanDesc parallel_scan,
-                       bool allow_strat,
-                       bool allow_sync,
-                       bool allow_pagemode,
-                       bool is_bitmapscan,
-                       bool is_samplescan,
-                       bool temp_snap);
-static void heap_parallelscan_startblock_init(HeapScanDesc scan);
-static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan);
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
                    TransactionId xid, CommandId cid, int options);
 static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -233,10 +218,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
     * results for a non-MVCC snapshot, the caller must hold some higher-level
     * lock that ensures the interesting tuple(s) won't change.)
     */
-   if (scan->rs_parallel != NULL)
-       scan->rs_nblocks = scan->rs_parallel->phs_nblocks;
+   if (scan->rs_scan.rs_parallel != NULL)
+       scan->rs_scan.rs_nblocks = scan->rs_scan.rs_parallel->phs_nblocks;
    else
-       scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
+       scan->rs_scan.rs_nblocks = RelationGetNumberOfBlocks(scan->rs_scan.rs_rd);
 
    /*
     * If the table is large relative to NBuffers, use a bulk-read access
@@ -250,11 +235,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
     * Note that heap_parallelscan_initialize has a very similar test; if you
     * change this, consider changing that one, too.
     */
-   if (!RelationUsesLocalBuffers(scan->rs_rd) &&
-       scan->rs_nblocks > NBuffers / 4)
+   if (!RelationUsesLocalBuffers(scan->rs_scan.rs_rd) &&
+       scan->rs_scan.rs_nblocks > NBuffers / 4)
    {
-       allow_strat = scan->rs_allow_strat;
-       allow_sync = scan->rs_allow_sync;
+       allow_strat = scan->rs_scan.rs_allow_strat;
+       allow_sync = scan->rs_scan.rs_allow_sync;
    }
    else
        allow_strat = allow_sync = false;
@@ -272,10 +257,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
        scan->rs_strategy = NULL;
    }
 
-   if (scan->rs_parallel != NULL)
+   if (scan->rs_scan.rs_parallel != NULL)
    {
-       /* For parallel scan, believe whatever ParallelHeapScanDesc says. */
-       scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
+       /* For parallel scan, believe whatever ParallelTableScanDesc says. */
+       scan->rs_scan.rs_syncscan = scan->rs_scan.rs_parallel->phs_syncscan;
    }
    else if (keep_startblock)
    {
@@ -284,20 +269,20 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
         * so that rewinding a cursor doesn't generate surprising results.
         * Reset the active syncscan setting, though.
         */
-       scan->rs_syncscan = (allow_sync && synchronize_seqscans);
+       scan->rs_scan.rs_syncscan = (allow_sync && synchronize_seqscans);
    }
    else if (allow_sync && synchronize_seqscans)
    {
-       scan->rs_syncscan = true;
-       scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
+       scan->rs_scan.rs_syncscan = true;
+       scan->rs_scan.rs_startblock = ss_get_location(scan->rs_scan.rs_rd, scan->rs_scan.rs_nblocks);
    }
    else
    {
-       scan->rs_syncscan = false;
-       scan->rs_startblock = 0;
+       scan->rs_scan.rs_syncscan = false;
+       scan->rs_scan.rs_startblock = 0;
    }
 
-   scan->rs_numblocks = InvalidBlockNumber;
+   scan->rs_scan.rs_numblocks = InvalidBlockNumber;
    scan->rs_inited = false;
    scan->rs_ctup.t_data = NULL;
    ItemPointerSetInvalid(&scan->rs_ctup.t_self);
@@ -310,15 +295,15 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
     * copy the scan key, if appropriate
     */
    if (key != NULL)
-       memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
+       memcpy(scan->rs_scan.rs_key, key, scan->rs_scan.rs_nkeys * sizeof(ScanKeyData));
 
    /*
     * Currently, we don't have a stats counter for bitmap heap scans (but the
     * underlying bitmap index scans will be counted) or sample scans (we only
     * update stats for tuple fetches there)
     */
-   if (!scan->rs_bitmapscan && !scan->rs_samplescan)
-       pgstat_count_heap_scan(scan->rs_rd);
+   if (!scan->rs_scan.rs_bitmapscan && !scan->rs_scan.rs_samplescan)
+       pgstat_count_heap_scan(scan->rs_scan.rs_rd);
 }
 
 /*
@@ -328,16 +313,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
  * numBlks is number of pages to scan (InvalidBlockNumber means "all")
  */
 void
-heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
+heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
    Assert(!scan->rs_inited);   /* else too late to change */
-   Assert(!scan->rs_syncscan); /* else rs_startblock is significant */
+   Assert(!scan->rs_scan.rs_syncscan); /* else rs_startblock is
+                                            * significant */
 
    /* Check startBlk is valid (but allow case of zero blocks...) */
-   Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
+   Assert(startBlk == 0 || startBlk < scan->rs_scan.rs_nblocks);
 
-   scan->rs_startblock = startBlk;
-   scan->rs_numblocks = numBlks;
+   scan->rs_scan.rs_startblock = startBlk;
+   scan->rs_scan.rs_numblocks = numBlks;
 }
 
 /*
@@ -348,8 +336,9 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
  * which tuples on the page are visible.
  */
 void
-heapgetpage(HeapScanDesc scan, BlockNumber page)
+heapgetpage(TableScanDesc sscan, BlockNumber page)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
    Buffer      buffer;
    Snapshot    snapshot;
    Page        dp;
@@ -359,7 +348,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
    ItemId      lpp;
    bool        all_visible;
 
-   Assert(page < scan->rs_nblocks);
+   Assert(page < scan->rs_scan.rs_nblocks);
 
    /* release previous scan buffer, if any */
    if (BufferIsValid(scan->rs_cbuf))
@@ -376,20 +365,20 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
    CHECK_FOR_INTERRUPTS();
 
    /* read page using selected strategy */
-   scan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM, page,
-                                      RBM_NORMAL, scan->rs_strategy);
+   scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, page,
+                                              RBM_NORMAL, scan->rs_strategy);
    scan->rs_cblock = page;
 
-   if (!scan->rs_pageatatime)
+   if (!scan->rs_scan.rs_pageatatime)
        return;
 
    buffer = scan->rs_cbuf;
-   snapshot = scan->rs_snapshot;
+   snapshot = scan->rs_scan.rs_snapshot;
 
    /*
     * Prune and repair fragmentation for the whole page, if possible.
     */
-   heap_page_prune_opt(scan->rs_rd, buffer);
+   heap_page_prune_opt(scan->rs_scan.rs_rd, buffer);
 
    /*
     * We must hold share lock on the buffer content while examining tuple
@@ -399,7 +388,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
    LockBuffer(buffer, BUFFER_LOCK_SHARE);
 
    dp = BufferGetPage(buffer);
-   TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+   TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
    lines = PageGetMaxOffsetNumber(dp);
    ntup = 0;
 
@@ -434,7 +423,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
            HeapTupleData loctup;
            bool        valid;
 
-           loctup.t_tableOid = RelationGetRelid(scan->rs_rd);
+           loctup.t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd);
            loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
            loctup.t_len = ItemIdGetLength(lpp);
            ItemPointerSet(&(loctup.t_self), page, lineoff);
@@ -442,9 +431,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
            if (all_visible)
                valid = true;
            else
-               valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+               valid = HeapTupleSatisfies(&loctup, snapshot, buffer);
 
-           CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
+           CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup,
                                            buffer, snapshot);
 
            if (valid)
@@ -488,7 +477,7 @@ heapgettup(HeapScanDesc scan,
           ScanKey key)
 {
    HeapTuple   tuple = &(scan->rs_ctup);
-   Snapshot    snapshot = scan->rs_snapshot;
+   Snapshot    snapshot = scan->rs_scan.rs_snapshot;
    bool        backward = ScanDirectionIsBackward(dir);
    BlockNumber page;
    bool        finished;
@@ -508,17 +497,17 @@ heapgettup(HeapScanDesc scan,
            /*
             * return null immediately if relation is empty
             */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
                return;
            }
-           if (scan->rs_parallel != NULL)
+           if (scan->rs_scan.rs_parallel != NULL)
            {
-               heap_parallelscan_startblock_init(scan);
+               table_parallelscan_startblock_init(&scan->rs_scan);
 
-               page = heap_parallelscan_nextpage(scan);
+               page = table_parallelscan_nextpage(&scan->rs_scan);
 
                /* Other processes might have already finished the scan. */
                if (page == InvalidBlockNumber)
@@ -529,8 +518,8 @@ heapgettup(HeapScanDesc scan,
                }
            }
            else
-               page = scan->rs_startblock; /* first page */
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_startblock; /* first page */
+           heapgetpage((TableScanDesc) scan, page);
            lineoff = FirstOffsetNumber;    /* first offnum */
            scan->rs_inited = true;
        }
@@ -545,7 +534,7 @@ heapgettup(HeapScanDesc scan,
        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
        lines = PageGetMaxOffsetNumber(dp);
        /* page and lineoff now reference the physically next tid */
 
@@ -554,14 +543,14 @@ heapgettup(HeapScanDesc scan,
    else if (backward)
    {
        /* backward parallel scan not supported */
-       Assert(scan->rs_parallel == NULL);
+       Assert(scan->rs_scan.rs_parallel == NULL);
 
        if (!scan->rs_inited)
        {
            /*
             * return null immediately if relation is empty
             */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
@@ -574,13 +563,13 @@ heapgettup(HeapScanDesc scan,
             * time, and much more likely that we'll just bollix things for
             * forward scanners.
             */
-           scan->rs_syncscan = false;
+           scan->rs_scan.rs_syncscan = false;
            /* start from last page of the scan */
-           if (scan->rs_startblock > 0)
-               page = scan->rs_startblock - 1;
+           if (scan->rs_scan.rs_startblock > 0)
+               page = scan->rs_scan.rs_startblock - 1;
            else
-               page = scan->rs_nblocks - 1;
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_nblocks - 1;
+           heapgetpage((TableScanDesc) scan, page);
        }
        else
        {
@@ -591,7 +580,7 @@ heapgettup(HeapScanDesc scan,
        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
        lines = PageGetMaxOffsetNumber(dp);
 
        if (!scan->rs_inited)
@@ -622,11 +611,11 @@ heapgettup(HeapScanDesc scan,
 
        page = ItemPointerGetBlockNumber(&(tuple->t_self));
        if (page != scan->rs_cblock)
-           heapgetpage(scan, page);
+           heapgetpage((TableScanDesc) scan, page);
 
        /* Since the tuple was previously fetched, needn't lock page here */
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
        lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
        lpp = PageGetItemId(dp, lineoff);
        Assert(ItemIdIsNormal(lpp));
@@ -657,15 +646,13 @@ heapgettup(HeapScanDesc scan,
                /*
                 * if current tuple qualifies, return it.
                 */
-               valid = HeapTupleSatisfiesVisibility(tuple,
-                                                    snapshot,
-                                                    scan->rs_cbuf);
+               valid = HeapTupleSatisfies(tuple, snapshot, scan->rs_cbuf);
 
-               CheckForSerializableConflictOut(valid, scan->rs_rd, tuple,
+               CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, tuple,
                                                scan->rs_cbuf, snapshot);
 
                if (valid && key != NULL)
-                   HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
+                   HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd),
                                nkeys, key, valid);
 
                if (valid)
@@ -702,24 +689,24 @@ heapgettup(HeapScanDesc scan,
         */
        if (backward)
        {
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
            if (page == 0)
-               page = scan->rs_nblocks;
+               page = scan->rs_scan.rs_nblocks;
            page--;
        }
-       else if (scan->rs_parallel != NULL)
+       else if (scan->rs_scan.rs_parallel != NULL)
        {
-           page = heap_parallelscan_nextpage(scan);
+           page = table_parallelscan_nextpage(&scan->rs_scan);
            finished = (page == InvalidBlockNumber);
        }
        else
        {
            page++;
-           if (page >= scan->rs_nblocks)
+           if (page >= scan->rs_scan.rs_nblocks)
                page = 0;
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
 
            /*
             * Report our new scan position for synchronization purposes. We
@@ -733,8 +720,8 @@ heapgettup(HeapScanDesc scan,
             * a little bit backwards on every invocation, which is confusing.
             * We don't guarantee any specific ordering in general, though.
             */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, page);
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, page);
        }
 
        /*
@@ -751,12 +738,12 @@ heapgettup(HeapScanDesc scan,
            return;
        }
 
-       heapgetpage(scan, page);
+       heapgetpage((TableScanDesc) scan, page);
 
        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
 
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
        lines = PageGetMaxOffsetNumber((Page) dp);
        linesleft = lines;
        if (backward)
@@ -812,17 +799,17 @@ heapgettup_pagemode(HeapScanDesc scan,
            /*
             * return null immediately if relation is empty
             */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
                return;
            }
-           if (scan->rs_parallel != NULL)
+           if (scan->rs_scan.rs_parallel != NULL)
            {
-               heap_parallelscan_startblock_init(scan);
+               table_parallelscan_startblock_init(&scan->rs_scan);
 
-               page = heap_parallelscan_nextpage(scan);
+               page = table_parallelscan_nextpage(&scan->rs_scan);
 
                /* Other processes might have already finished the scan. */
                if (page == InvalidBlockNumber)
@@ -833,8 +820,8 @@ heapgettup_pagemode(HeapScanDesc scan,
                }
            }
            else
-               page = scan->rs_startblock; /* first page */
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_startblock; /* first page */
+           heapgetpage((TableScanDesc) scan, page);
            lineindex = 0;
            scan->rs_inited = true;
        }
@@ -845,8 +832,9 @@ heapgettup_pagemode(HeapScanDesc scan,
            lineindex = scan->rs_cindex + 1;
        }
 
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
        lines = scan->rs_ntuples;
        /* page and lineindex now reference the next visible tid */
 
@@ -855,14 +843,14 @@ heapgettup_pagemode(HeapScanDesc scan,
    else if (backward)
    {
        /* backward parallel scan not supported */
-       Assert(scan->rs_parallel == NULL);
+       Assert(scan->rs_scan.rs_parallel == NULL);
 
        if (!scan->rs_inited)
        {
            /*
             * return null immediately if relation is empty
             */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
@@ -875,13 +863,13 @@ heapgettup_pagemode(HeapScanDesc scan,
             * time, and much more likely that we'll just bollix things for
             * forward scanners.
             */
-           scan->rs_syncscan = false;
+           scan->rs_scan.rs_syncscan = false;
            /* start from last page of the scan */
-           if (scan->rs_startblock > 0)
-               page = scan->rs_startblock - 1;
+           if (scan->rs_scan.rs_startblock > 0)
+               page = scan->rs_scan.rs_startblock - 1;
            else
-               page = scan->rs_nblocks - 1;
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_nblocks - 1;
+           heapgetpage((TableScanDesc) scan, page);
        }
        else
        {
@@ -889,8 +877,9 @@ heapgettup_pagemode(HeapScanDesc scan,
            page = scan->rs_cblock; /* current page */
        }
 
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
        lines = scan->rs_ntuples;
 
        if (!scan->rs_inited)
@@ -920,11 +909,11 @@ heapgettup_pagemode(HeapScanDesc scan,
 
        page = ItemPointerGetBlockNumber(&(tuple->t_self));
        if (page != scan->rs_cblock)
-           heapgetpage(scan, page);
+           heapgetpage((TableScanDesc) scan, page);
 
        /* Since the tuple was previously fetched, needn't lock page here */
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
        lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
        lpp = PageGetItemId(dp, lineoff);
        Assert(ItemIdIsNormal(lpp));
@@ -962,17 +951,19 @@ heapgettup_pagemode(HeapScanDesc scan,
            {
                bool        valid;
 
-               HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
+               HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd),
                            nkeys, key, valid);
                if (valid)
                {
                    scan->rs_cindex = lineindex;
+                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
                    return;
                }
            }
            else
            {
                scan->rs_cindex = lineindex;
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
                return;
            }
 
@@ -986,30 +977,36 @@ heapgettup_pagemode(HeapScanDesc scan,
                ++lineindex;
        }
 
+       /*
+        * if we get here, it means we've exhausted the items on this page and
+        * it's time to move to the next.
+        */
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
        /*
         * if we get here, it means we've exhausted the items on this page and
         * it's time to move to the next.
         */
        if (backward)
        {
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
            if (page == 0)
-               page = scan->rs_nblocks;
+               page = scan->rs_scan.rs_nblocks;
            page--;
        }
-       else if (scan->rs_parallel != NULL)
+       else if (scan->rs_scan.rs_parallel != NULL)
        {
-           page = heap_parallelscan_nextpage(scan);
+           page = table_parallelscan_nextpage(&scan->rs_scan);
            finished = (page == InvalidBlockNumber);
        }
        else
        {
            page++;
-           if (page >= scan->rs_nblocks)
+           if (page >= scan->rs_scan.rs_nblocks)
                page = 0;
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
 
            /*
             * Report our new scan position for synchronization purposes. We
@@ -1023,8 +1020,8 @@ heapgettup_pagemode(HeapScanDesc scan,
             * a little bit backwards on every invocation, which is confusing.
             * We don't guarantee any specific ordering in general, though.
             */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, page);
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, page);
        }
 
        /*
@@ -1041,10 +1038,11 @@ heapgettup_pagemode(HeapScanDesc scan,
            return;
        }
 
-       heapgetpage(scan, page);
+       heapgetpage((TableScanDesc) scan, page);
 
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
        dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
        lines = scan->rs_ntuples;
        linesleft = lines;
        if (backward)
@@ -1387,87 +1385,16 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
    return r;
 }
 
-
-/* ----------------
- *     heap_beginscan  - begin relation scan
- *
- * heap_beginscan is the "standard" case.
- *
- * heap_beginscan_catalog differs in setting up its own temporary snapshot.
- *
- * heap_beginscan_strat offers an extended API that lets the caller control
- * whether a nondefault buffer access strategy can be used, and whether
- * syncscan can be chosen (possibly resulting in the scan not starting from
- * block zero).  Both of these default to true with plain heap_beginscan.
- *
- * heap_beginscan_bm is an alternative entry point for setting up a
- * HeapScanDesc for a bitmap heap scan.  Although that scan technology is
- * really quite unlike a standard seqscan, there is just enough commonality
- * to make it worth using the same data structure.
- *
- * heap_beginscan_sampling is an alternative entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
- * using the same data structure although the behavior is rather different.
- * In addition to the options offered by heap_beginscan_strat, this call
- * also allows control of whether page-mode visibility checking is used.
- * ----------------
- */
-HeapScanDesc
+TableScanDesc
 heap_beginscan(Relation relation, Snapshot snapshot,
-              int nkeys, ScanKey key)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  true, true, true, false, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
-{
-   Oid         relid = RelationGetRelid(relation);
-   Snapshot    snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
-
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  true, true, true, false, false, true);
-}
-
-HeapScanDesc
-heap_beginscan_strat(Relation relation, Snapshot snapshot,
-                    int nkeys, ScanKey key,
-                    bool allow_strat, bool allow_sync)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  allow_strat, allow_sync, true,
-                                  false, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_bm(Relation relation, Snapshot snapshot,
-                 int nkeys, ScanKey key)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  false, false, true, true, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_sampling(Relation relation, Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_sync, bool allow_pagemode)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  allow_strat, allow_sync, allow_pagemode,
-                                  false, true, false);
-}
-
-static HeapScanDesc
-heap_beginscan_internal(Relation relation, Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       ParallelHeapScanDesc parallel_scan,
-                       bool allow_strat,
-                       bool allow_sync,
-                       bool allow_pagemode,
-                       bool is_bitmapscan,
-                       bool is_samplescan,
-                       bool temp_snap)
+              int nkeys, ScanKey key,
+              ParallelTableScanDesc parallel_scan,
+              bool allow_strat,
+              bool allow_sync,
+              bool allow_pagemode,
+              bool is_bitmapscan,
+              bool is_samplescan,
+              bool temp_snap)
 {
    HeapScanDesc scan;
 
@@ -1485,21 +1412,21 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
     */
    scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
 
-   scan->rs_rd = relation;
-   scan->rs_snapshot = snapshot;
-   scan->rs_nkeys = nkeys;
-   scan->rs_bitmapscan = is_bitmapscan;
-   scan->rs_samplescan = is_samplescan;
+   scan->rs_scan.rs_rd = relation;
+   scan->rs_scan.rs_snapshot = snapshot;
+   scan->rs_scan.rs_nkeys = nkeys;
+   scan->rs_scan.rs_bitmapscan = is_bitmapscan;
+   scan->rs_scan.rs_samplescan = is_samplescan;
    scan->rs_strategy = NULL;   /* set in initscan */
-   scan->rs_allow_strat = allow_strat;
-   scan->rs_allow_sync = allow_sync;
-   scan->rs_temp_snap = temp_snap;
-   scan->rs_parallel = parallel_scan;
+   scan->rs_scan.rs_allow_strat = allow_strat;
+   scan->rs_scan.rs_allow_sync = allow_sync;
+   scan->rs_scan.rs_temp_snap = temp_snap;
+   scan->rs_scan.rs_parallel = parallel_scan;
 
    /*
     * we can use page-at-a-time mode if it's an MVCC-safe snapshot
     */
-   scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
+   scan->rs_scan.rs_pageatatime = allow_pagemode && snapshot && IsMVCCSnapshot(snapshot);
 
    /*
     * For a seqscan in a serializable transaction, acquire a predicate lock
@@ -1512,7 +1439,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
     * covering the predicate. But in that case we still have to lock any
     * matching heap tuples.
     */
-   if (!is_bitmapscan)
+   if (!is_bitmapscan && snapshot)
        PredicateLockRelation(relation, snapshot);
 
    /* we only need to set this up once */
@@ -1523,13 +1450,13 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
     * initscan() and we don't want to allocate memory again
     */
    if (nkeys > 0)
-       scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
+       scan->rs_scan.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
    else
-       scan->rs_key = NULL;
+       scan->rs_scan.rs_key = NULL;
 
    initscan(scan, key, false);
 
-   return scan;
+   return (TableScanDesc) scan;
 }
 
 /* ----------------
@@ -1537,9 +1464,18 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
  * ----------------
  */
 void
-heap_rescan(HeapScanDesc scan,
-           ScanKey key)
+heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
+           bool allow_strat, bool allow_sync, bool allow_pagemode)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   if (set_params)
+   {
+       scan->rs_scan.rs_allow_strat = allow_strat;
+       scan->rs_scan.rs_allow_sync = allow_sync;
+       scan->rs_scan.rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_scan.rs_snapshot);
+   }
+
    /*
     * unpin scan buffers
     */
@@ -1550,27 +1486,21 @@ heap_rescan(HeapScanDesc scan,
     * reinitialize scan descriptor
     */
    initscan(scan, key, true);
-}
 
-/* ----------------
- *     heap_rescan_set_params  - restart a relation scan after changing params
- *
- * This call allows changing the buffer strategy, syncscan, and pagemode
- * options before starting a fresh scan.  Note that although the actual use
- * of syncscan might change (effectively, enabling or disabling reporting),
- * the previously selected startblock will be kept.
- * ----------------
- */
-void
-heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
-                      bool allow_strat, bool allow_sync, bool allow_pagemode)
-{
-   /* adjust parameters */
-   scan->rs_allow_strat = allow_strat;
-   scan->rs_allow_sync = allow_sync;
-   scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
-   /* ... and rescan */
-   heap_rescan(scan, key);
+   /*
+    * reset parallel scan, if present
+    */
+   if (scan->rs_scan.rs_parallel != NULL)
+   {
+       ParallelTableScanDesc parallel_scan;
+
+       /*
+        * Caller is responsible for making sure that all workers have
+        * finished the scan before calling this.
+        */
+       parallel_scan = scan->rs_scan.rs_parallel;
+       pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
+   }
 }
 
 /* ----------------
@@ -1581,8 +1511,10 @@ heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
  * ----------------
  */
 void
-heap_endscan(HeapScanDesc scan)
+heap_endscan(TableScanDesc sscan)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
    /* Note: no locking manipulations needed */
 
    /*
@@ -1594,215 +1526,20 @@ heap_endscan(HeapScanDesc scan)
    /*
     * decrement relation reference count and free scan descriptor storage
     */
-   RelationDecrementReferenceCount(scan->rs_rd);
+   RelationDecrementReferenceCount(scan->rs_scan.rs_rd);
 
-   if (scan->rs_key)
-       pfree(scan->rs_key);
+   if (scan->rs_scan.rs_key)
+       pfree(scan->rs_scan.rs_key);
 
    if (scan->rs_strategy != NULL)
        FreeAccessStrategy(scan->rs_strategy);
 
-   if (scan->rs_temp_snap)
-       UnregisterSnapshot(scan->rs_snapshot);
+   if (scan->rs_scan.rs_temp_snap)
+       UnregisterSnapshot(scan->rs_scan.rs_snapshot);
 
    pfree(scan);
 }
 
-/* ----------------
- *     heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc
- *
- *     Sadly, this doesn't reduce to a constant, because the size required
- *     to serialize the snapshot can vary.
- * ----------------
- */
-Size
-heap_parallelscan_estimate(Snapshot snapshot)
-{
-   return add_size(offsetof(ParallelHeapScanDescData, phs_snapshot_data),
-                   EstimateSnapshotSpace(snapshot));
-}
-
-/* ----------------
- *     heap_parallelscan_initialize - initialize ParallelHeapScanDesc
- *
- *     Must allow as many bytes of shared memory as returned by
- *     heap_parallelscan_estimate.  Call this just once in the leader
- *     process; then, individual workers attach via heap_beginscan_parallel.
- * ----------------
- */
-void
-heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation,
-                            Snapshot snapshot)
-{
-   target->phs_relid = RelationGetRelid(relation);
-   target->phs_nblocks = RelationGetNumberOfBlocks(relation);
-   /* compare phs_syncscan initialization to similar logic in initscan */
-   target->phs_syncscan = synchronize_seqscans &&
-       !RelationUsesLocalBuffers(relation) &&
-       target->phs_nblocks > NBuffers / 4;
-   SpinLockInit(&target->phs_mutex);
-   target->phs_startblock = InvalidBlockNumber;
-   pg_atomic_init_u64(&target->phs_nallocated, 0);
-   if (IsMVCCSnapshot(snapshot))
-   {
-       SerializeSnapshot(snapshot, target->phs_snapshot_data);
-       target->phs_snapshot_any = false;
-   }
-   else
-   {
-       Assert(snapshot == SnapshotAny);
-       target->phs_snapshot_any = true;
-   }
-}
-
-/* ----------------
- *     heap_parallelscan_reinitialize - reset a parallel scan
- *
- *     Call this in the leader process.  Caller is responsible for
- *     making sure that all workers have finished the scan beforehand.
- * ----------------
- */
-void
-heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan)
-{
-   pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
-}
-
-/* ----------------
- *     heap_beginscan_parallel - join a parallel scan
- *
- *     Caller must hold a suitable lock on the correct relation.
- * ----------------
- */
-HeapScanDesc
-heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan)
-{
-   Snapshot    snapshot;
-
-   Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
-
-   if (!parallel_scan->phs_snapshot_any)
-   {
-       /* Snapshot was serialized -- restore it */
-       snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data);
-       RegisterSnapshot(snapshot);
-   }
-   else
-   {
-       /* SnapshotAny passed by caller (not serialized) */
-       snapshot = SnapshotAny;
-   }
-
-   return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan,
-                                  true, true, true, false, false,
-                                  !parallel_scan->phs_snapshot_any);
-}
-
-/* ----------------
- *     heap_parallelscan_startblock_init - find and set the scan's startblock
- *
- *     Determine where the parallel seq scan should start.  This function may
- *     be called many times, once by each parallel worker.  We must be careful
- *     only to set the startblock once.
- * ----------------
- */
-static void
-heap_parallelscan_startblock_init(HeapScanDesc scan)
-{
-   BlockNumber sync_startpage = InvalidBlockNumber;
-   ParallelHeapScanDesc parallel_scan;
-
-   Assert(scan->rs_parallel);
-   parallel_scan = scan->rs_parallel;
-
-retry:
-   /* Grab the spinlock. */
-   SpinLockAcquire(&parallel_scan->phs_mutex);
-
-   /*
-    * If the scan's startblock has not yet been initialized, we must do so
-    * now.  If this is not a synchronized scan, we just start at block 0, but
-    * if it is a synchronized scan, we must get the starting position from
-    * the synchronized scan machinery.  We can't hold the spinlock while
-    * doing that, though, so release the spinlock, get the information we
-    * need, and retry.  If nobody else has initialized the scan in the
-    * meantime, we'll fill in the value we fetched on the second time
-    * through.
-    */
-   if (parallel_scan->phs_startblock == InvalidBlockNumber)
-   {
-       if (!parallel_scan->phs_syncscan)
-           parallel_scan->phs_startblock = 0;
-       else if (sync_startpage != InvalidBlockNumber)
-           parallel_scan->phs_startblock = sync_startpage;
-       else
-       {
-           SpinLockRelease(&parallel_scan->phs_mutex);
-           sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks);
-           goto retry;
-       }
-   }
-   SpinLockRelease(&parallel_scan->phs_mutex);
-}
-
-/* ----------------
- *     heap_parallelscan_nextpage - get the next page to scan
- *
- *     Get the next page to scan.  Even if there are no pages left to scan,
- *     another backend could have grabbed a page to scan and not yet finished
- *     looking at it, so it doesn't follow that the scan is done when the
- *     first backend gets an InvalidBlockNumber return.
- * ----------------
- */
-static BlockNumber
-heap_parallelscan_nextpage(HeapScanDesc scan)
-{
-   BlockNumber page;
-   ParallelHeapScanDesc parallel_scan;
-   uint64      nallocated;
-
-   Assert(scan->rs_parallel);
-   parallel_scan = scan->rs_parallel;
-
-   /*
-    * phs_nallocated tracks how many pages have been allocated to workers
-    * already.  When phs_nallocated >= rs_nblocks, all blocks have been
-    * allocated.
-    *
-    * Because we use an atomic fetch-and-add to fetch the current value, the
-    * phs_nallocated counter will exceed rs_nblocks, because workers will
-    * still increment the value, when they try to allocate the next block but
-    * all blocks have been allocated already. The counter must be 64 bits
-    * wide because of that, to avoid wrapping around when rs_nblocks is close
-    * to 2^32.
-    *
-    * The actual page to return is calculated by adding the counter to the
-    * starting block number, modulo nblocks.
-    */
-   nallocated = pg_atomic_fetch_add_u64(&parallel_scan->phs_nallocated, 1);
-   if (nallocated >= scan->rs_nblocks)
-       page = InvalidBlockNumber;  /* all blocks have been allocated */
-   else
-       page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks;
-
-   /*
-    * Report scan location.  Normally, we report the current page number.
-    * When we reach the end of the scan, though, we report the starting page,
-    * not the ending page, just so the starting positions for later scans
-    * doesn't slew backwards.  We only report the position at the end of the
-    * scan once, though: subsequent callers will report nothing.
-    */
-   if (scan->rs_syncscan)
-   {
-       if (page != InvalidBlockNumber)
-           ss_report_location(scan->rs_rd, page);
-       else if (nallocated == scan->rs_nblocks)
-           ss_report_location(scan->rs_rd, parallel_scan->phs_startblock);
-   }
-
-   return page;
-}
-
 /* ----------------
  *     heap_update_snapshot
  *
@@ -1810,13 +1547,15 @@ heap_parallelscan_nextpage(HeapScanDesc scan)
  * ----------------
  */
 void
-heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
+heap_update_snapshot(TableScanDesc sscan, Snapshot snapshot)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
    Assert(IsMVCCSnapshot(snapshot));
 
    RegisterSnapshot(snapshot);
-   scan->rs_snapshot = snapshot;
-   scan->rs_temp_snap = true;
+   scan->rs_scan.rs_snapshot = snapshot;
+   scan->rs_scan.rs_temp_snap = true;
 }
 
 /* ----------------
@@ -1842,19 +1581,20 @@ heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
 #define HEAPDEBUG_3
 #endif                         /* !defined(HEAPDEBUGALL) */
 
-
 HeapTuple
-heap_getnext(HeapScanDesc scan, ScanDirection direction)
+heap_getnext(TableScanDesc sscan, ScanDirection direction)
 {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
    /* Note: no locking manipulations needed */
 
    HEAPDEBUG_1;                /* heap_getnext( info ) */
 
-   if (scan->rs_pageatatime)
+   if (scan->rs_scan.rs_pageatatime)
        heapgettup_pagemode(scan, direction,
-                           scan->rs_nkeys, scan->rs_key);
+                           scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
    else
-       heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);
+       heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
 
    if (scan->rs_ctup.t_data == NULL)
    {
@@ -1868,9 +1608,57 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
     */
    HEAPDEBUG_3;                /* heap_getnext returning tuple */
 
-   pgstat_count_heap_getnext(scan->rs_rd);
+   pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+   return &scan->rs_ctup;
+}
+
+#ifdef HEAPAMSLOTDEBUGALL
+#define HEAPAMSLOTDEBUG_1 \
+   elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \
+        RelationGetRelationName(scan->rs_scan.rs_rd), scan->rs_scan.rs_nkeys, (int) direction)
+#define HEAPAMSLOTDEBUG_2 \
+   elog(DEBUG2, "heapam_getnext returning EOS")
+#define HEAPAMSLOTDEBUG_3 \
+   elog(DEBUG2, "heapam_getnext returning tuple")
+#else
+#define HEAPAMSLOTDEBUG_1
+#define HEAPAMSLOTDEBUG_2
+#define HEAPAMSLOTDEBUG_3
+#endif
+
+TupleTableSlot *
+heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   /* Note: no locking manipulations needed */
+
+   HEAPAMSLOTDEBUG_1;          /* heap_getnext( info ) */
+
+   if (scan->rs_scan.rs_pageatatime)
+       heapgettup_pagemode(scan, direction,
+                           scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
+   else
+       heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
 
-   return &(scan->rs_ctup);
+   if (scan->rs_ctup.t_data == NULL)
+   {
+       HEAPAMSLOTDEBUG_2;      /* heap_getnext returning EOS */
+       ExecClearTuple(slot);
+       return NULL;
+   }
+
+   /*
+    * if we get here it means we have a new current scan tuple, so point to
+    * the proper return buffer and return the tuple.
+    */
+   HEAPAMSLOTDEBUG_3;          /* heap_getnext returning tuple */
+
+   pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+   return ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
+                                   scan->rs_cbuf);
 }
 
 /*
@@ -1890,10 +1678,8 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
  * If the tuple is found but fails the time qual check, then false is returned
  * but tuple->t_data is left pointing to the tuple.
  *
- * keep_buf determines what is done with the buffer in the false-result cases.
- * When the caller specifies keep_buf = true, we retain the pin on the buffer
- * and return it in *userbuf (so the caller must eventually unpin it); when
- * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
+ * In the false-result cases the buffer pin is released and *userbuf is set to
+ * InvalidBuffer.
  *
  * stats_relation is the relation to charge the heap_fetch operation against
  * for statistical purposes.  (This could be the heap rel itself, an
@@ -1913,13 +1699,12 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
  */
 bool
 heap_fetch(Relation relation,
+          ItemPointer tid,
           Snapshot snapshot,
           HeapTuple tuple,
           Buffer *userbuf,
-          bool keep_buf,
           Relation stats_relation)
 {
-   ItemPointer tid = &(tuple->t_self);
    ItemId      lp;
    Buffer      buffer;
    Page        page;
@@ -1946,13 +1731,8 @@ heap_fetch(Relation relation,
    if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
    {
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-       if (keep_buf)
-           *userbuf = buffer;
-       else
-       {
-           ReleaseBuffer(buffer);
-           *userbuf = InvalidBuffer;
-       }
+       ReleaseBuffer(buffer);
+       *userbuf = InvalidBuffer;
        tuple->t_data = NULL;
        return false;
    }
@@ -1968,20 +1748,16 @@ heap_fetch(Relation relation,
    if (!ItemIdIsNormal(lp))
    {
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-       if (keep_buf)
-           *userbuf = buffer;
-       else
-       {
-           ReleaseBuffer(buffer);
-           *userbuf = InvalidBuffer;
-       }
+       ReleaseBuffer(buffer);
+       *userbuf = InvalidBuffer;
        tuple->t_data = NULL;
        return false;
    }
 
    /*
-    * fill in *tuple fields
+    * fill in tuple fields and place it in stuple
     */
+   ItemPointerCopy(tid, &(tuple->t_self));
    tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
    tuple->t_len = ItemIdGetLength(lp);
    tuple->t_tableOid = RelationGetRelid(relation);
@@ -1989,7 +1765,7 @@ heap_fetch(Relation relation,
    /*
     * check time qualification of tuple, then release lock
     */
-   valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+   valid = HeapTupleSatisfies(tuple, snapshot, buffer);
 
    if (valid)
        PredicateLockTuple(relation, tuple, snapshot);
@@ -2013,14 +1789,9 @@ heap_fetch(Relation relation,
        return true;
    }
 
-   /* Tuple failed time qual, but maybe caller wants to see it anyway. */
-   if (keep_buf)
-       *userbuf = buffer;
-   else
-   {
-       ReleaseBuffer(buffer);
-       *userbuf = InvalidBuffer;
-   }
+   /* Tuple failed time qual */
+   ReleaseBuffer(buffer);
+   *userbuf = InvalidBuffer;
 
    return false;
 }
@@ -2136,7 +1907,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
            ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum);
 
            /* If it's visible per the snapshot, we must return it */
-           valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
+           valid = HeapTupleSatisfies(heapTuple, snapshot, buffer);
            CheckForSerializableConflictOut(valid, relation, heapTuple,
                                            buffer, snapshot);
            /* reset to original, non-redirected, tid */
@@ -2310,7 +2081,7 @@ heap_get_latest_tid(Relation relation,
         * Check time qualification of tuple; if visible, set it as the new
         * result candidate.
         */
-       valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
+       valid = HeapTupleSatisfies(&tp, snapshot, buffer);
        CheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
        if (valid)
            *tid = ctid;
@@ -2333,7 +2104,6 @@ heap_get_latest_tid(Relation relation,
    }                           /* end of loop */
 }
 
-
 /*
  * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends
  *
@@ -2380,7 +2150,7 @@ GetBulkInsertState(void)
    bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
    bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
    bistate->current_buf = InvalidBuffer;
-   return bistate;
+   return (void *)bistate;
 }
 
 /*
@@ -2694,7 +2464,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
  * temporary context before calling this, if that's a problem.
  */
 void
-heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
+heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                  CommandId cid, int options, BulkInsertState bistate)
 {
    TransactionId xid = GetCurrentTransactionId();
@@ -2715,12 +2485,17 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
    saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
                                                   HEAP_DEFAULT_FILLFACTOR);
 
-   /* Toast and set header data in all the tuples */
+   /* Toast and set header data in all the slots */
    heaptuples = palloc(ntuples * sizeof(HeapTuple));
    for (i = 0; i < ntuples; i++)
-       heaptuples[i] = heap_prepare_insert(relation, tuples[i],
+   {
+       heaptuples[i] = heap_prepare_insert(relation, ExecFetchSlotHeapTuple(slots[i], true, NULL),
                                            xid, cid, options);
 
+       if (slots[i]->tts_tableOid != InvalidOid)
+           heaptuples[i]->t_tableOid = slots[i]->tts_tableOid;
+   }
+
    /*
     * We're about to do the actual inserts -- but check for conflict first,
     * to minimize the possibility of having to roll back work we've just
@@ -2955,7 +2730,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
     * probably faster to always copy than check.
     */
    for (i = 0; i < ntuples; i++)
-       tuples[i]->t_self = heaptuples[i]->t_self;
+       slots[i]->tts_tid = heaptuples[i]->t_self;
 
    pgstat_count_heap_insert(relation, ntuples);
 }
@@ -3225,7 +3000,7 @@ l1:
    if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
    {
        /* Perform additional check for transaction-snapshot mode RI updates */
-       if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
+       if (!HeapTupleSatisfies(&tp, crosscheck, buffer))
            result = HeapTupleUpdated;
    }
 
@@ -3233,6 +3008,7 @@ l1:
    {
        Assert(result == HeapTupleSelfUpdated ||
               result == HeapTupleUpdated ||
+              result == HeapTupleDeleted ||
               result == HeapTupleBeingUpdated);
        Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
        hufd->ctid = tp.t_data->t_ctid;
@@ -3246,6 +3022,8 @@ l1:
            UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
        if (vmbuffer != InvalidBuffer)
            ReleaseBuffer(vmbuffer);
+       if (result == HeapTupleUpdated && ItemPointerEquals(tid, &hufd->ctid))
+           result = HeapTupleDeleted;
        return result;
    }
 
@@ -3463,6 +3241,10 @@ simple_heap_delete(Relation relation, ItemPointer tid)
            elog(ERROR, "tuple concurrently updated");
            break;
 
+       case HeapTupleDeleted:
+           elog(ERROR, "tuple concurrently deleted");
+           break;
+
        default:
            elog(ERROR, "unrecognized heap_delete status: %u", result);
            break;
@@ -3860,7 +3642,7 @@ l2:
    if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
    {
        /* Perform additional check for transaction-snapshot mode RI updates */
-       if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
+       if (!HeapTupleSatisfies(&oldtup, crosscheck, buffer))
            result = HeapTupleUpdated;
    }
 
@@ -3868,6 +3650,7 @@ l2:
    {
        Assert(result == HeapTupleSelfUpdated ||
               result == HeapTupleUpdated ||
+              result == HeapTupleDeleted ||
               result == HeapTupleBeingUpdated);
        Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
        hufd->ctid = oldtup.t_data->t_ctid;
@@ -3887,6 +3670,8 @@ l2:
        bms_free(id_attrs);
        bms_free(modified_attrs);
        bms_free(interesting_attrs);
+       if (result == HeapTupleUpdated && ItemPointerEquals(otid, &hufd->ctid))
+           result = HeapTupleDeleted;
        return result;
    }
 
@@ -4593,6 +4378,10 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
            elog(ERROR, "tuple concurrently updated");
            break;
 
+       case HeapTupleDeleted:
+           elog(ERROR, "tuple concurrently deleted");
+           break;
+
        default:
            elog(ERROR, "unrecognized heap_update status: %u", result);
            break;
@@ -4627,7 +4416,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  *
  * Input parameters:
  * relation: relation containing tuple (caller must hold suitable lock)
- * tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
+ * tid: TID of tuple to lock
  * cid: current command ID (used for visibility test, and stored into
  *     tuple's cmax if lock is successful)
  * mode: indicates if shared or exclusive tuple lock is desired
@@ -4645,6 +4434,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  * HeapTupleInvisible: lock failed because tuple was never visible to us
  * HeapTupleSelfUpdated: lock failed because tuple updated by self
  * HeapTupleUpdated: lock failed because tuple updated by other xact
+ * HeapTupleDeleted: lock failed because tuple deleted by other xact
  * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
  *
  * In the failure cases other than HeapTupleInvisible, the routine fills
@@ -4657,13 +4447,12 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
  * See README.tuplock for a thorough explanation of this mechanism.
  */
 HTSU_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid,
                CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
                bool follow_updates,
-               Buffer *buffer, HeapUpdateFailureData *hufd)
+               HeapTuple tuple, Buffer *buffer, HeapUpdateFailureData *hufd)
 {
    HTSU_Result result;
-   ItemPointer tid = &(tuple->t_self);
    ItemId      lp;
    Page        page;
    Buffer      vmbuffer = InvalidBuffer;
@@ -4698,6 +4487,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
    tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
    tuple->t_len = ItemIdGetLength(lp);
    tuple->t_tableOid = RelationGetRelid(relation);
+   tuple->t_self = *tid;
 
 l3:
    result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
@@ -4713,7 +4503,7 @@ l3:
        result = HeapTupleInvisible;
        goto out_locked;
    }
-   else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated)
+   else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated || result == HeapTupleDeleted)
    {
        TransactionId xwait;
        uint16      infomask;
@@ -4993,7 +4783,7 @@ l3:
         * or we must wait for the locking transaction or multixact; so below
         * we ensure that we grab buffer lock after the sleep.
         */
-       if (require_sleep && result == HeapTupleUpdated)
+       if (require_sleep && (result == HeapTupleUpdated || result == HeapTupleDeleted))
        {
            LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
            goto failed;
@@ -5153,6 +4943,8 @@ l3:
            HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
            HeapTupleHeaderIsOnlyLocked(tuple->t_data))
            result = HeapTupleMayBeUpdated;
+       else if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
+           result = HeapTupleDeleted;
        else
            result = HeapTupleUpdated;
    }
@@ -5161,7 +4953,7 @@ failed:
    if (result != HeapTupleMayBeUpdated)
    {
        Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated ||
-              result == HeapTupleWouldBlock);
+              result == HeapTupleWouldBlock || result == HeapTupleDeleted);
        Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
        hufd->ctid = tuple->t_data->t_ctid;
        hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
@@ -5765,9 +5557,8 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
        new_infomask = 0;
        new_xmax = InvalidTransactionId;
        block = ItemPointerGetBlockNumber(&tupid);
-       ItemPointerCopy(&tupid, &(mytup.t_self));
 
-       if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL))
+       if (!heap_fetch(rel, &tupid, SnapshotAny, &mytup, &buf, NULL))
        {
            /*
             * if we fail to find the updated version of the tuple, it's
@@ -6050,6 +5841,10 @@ next:
    result = HeapTupleMayBeUpdated;
 
 out_locked:
+
+   if (result == HeapTupleUpdated && ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid))
+       result = HeapTupleDeleted;
+
    UnlockReleaseBuffer(buf);
 
 out_unlocked:
@@ -6228,6 +6023,10 @@ heap_abort_speculative(Relation relation, HeapTuple tuple)
    BlockNumber block;
    Buffer      buffer;
 
+   /*
+    * Assert(slot->tts_speculativeToken != 0); This needs some update in
+    * toast
+    */
    Assert(ItemPointerIsValid(tid));
 
    block = ItemPointerGetBlockNumber(tid);
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
new file mode 100644 (file)
index 0000000..49f8d83
--- /dev/null
@@ -0,0 +1,2122 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam_handler.c
+ *   heap table access method code
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   src/backend/access/heap/heapam_handler.c
+ *
+ *
+ * NOTES
+ *   This file contains the heap_ routines which implement
+ *   the POSTGRES heap table access method used for all POSTGRES
+ *   relations.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/rewriteheap.h"
+#include "access/tableam.h"
+#include "access/tsmapi.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/pg_am_d.h"
+#include "executor/executor.h"
+#include "pgstat.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+#include "storage/bufpage.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "access/xact.h"
+
+
+/* ----------------------------------------------------------------
+ *             storage AM support routines for heapam
+ * ----------------------------------------------------------------
+ */
+
+static bool
+heapam_fetch_row_version(Relation relation,
+                        ItemPointer tid,
+                        Snapshot snapshot,
+                        TupleTableSlot *slot,
+                        Relation stats_relation)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   Buffer buffer;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   if (heap_fetch(relation, tid, snapshot, &bslot->base.tupdata, &buffer, stats_relation))
+   {
+       ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
+       ReleaseBuffer(buffer);
+
+       slot->tts_tableOid = RelationGetRelid(relation);
+
+       return true;
+   }
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+
+   return false;
+}
+
+/*
+ * Insert a heap tuple from a slot, which may contain an OID and speculative
+ * insertion token.
+ */
+static void
+heapam_heap_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
+                  int options, BulkInsertState bistate)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* Update the tuple with table oid */
+   slot->tts_tableOid = RelationGetRelid(relation);
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   /* Perform the insertion, and copy the resulting ItemPointer */
+   heap_insert(relation, tuple, cid, options, bistate);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+static void
+heapam_heap_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid,
+                              int options, BulkInsertState bistate, uint32 specToken)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* Update the tuple with table oid */
+   slot->tts_tableOid = RelationGetRelid(relation);
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
+
+   /* Perform the insertion, and copy the resulting ItemPointer */
+   heap_insert(relation, tuple, cid, options, bistate);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+static void
+heapam_heap_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 spekToken,
+                                bool succeeded)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* adjust the tuple's state accordingly */
+   if (!succeeded)
+       heap_finish_speculative(relation, tuple);
+   else
+   {
+       heap_abort_speculative(relation, tuple);
+   }
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+
+static HTSU_Result
+heapam_heap_delete(Relation relation, ItemPointer tid, CommandId cid,
+                  Snapshot snapshot, Snapshot crosscheck, bool wait,
+                  HeapUpdateFailureData *hufd, bool changingPart)
+{
+   /*
+    * Currently Deleting of index tuples are handled at vacuum, in case
+    * if the storage itself is cleaning the dead tuples by itself, it is
+    * the time to call the index tuple deletion also.
+    */
+   return heap_delete(relation, tid, cid, crosscheck, wait, hufd, changingPart);
+}
+
+
+/*
+ * Locks tuple and fetches its newest version and TID.
+ *
+ * relation - table containing tuple
+ * tid - TID of tuple to lock
+ * snapshot - snapshot indentifying required version (used for assert check only)
+ * slot - tuple to be returned
+ * cid - current command ID (used for visibility test, and stored into
+ *       tuple's cmax if lock is successful)
+ * mode - indicates if shared or exclusive tuple lock is desired
+ * wait_policy - what to do if tuple lock is not available
+ * flags โ€“ indicating how do we handle updated tuples
+ * *hufd - filled in failure cases
+ *
+ * Function result may be:
+ * HeapTupleMayBeUpdated: lock was successfully acquired
+ * HeapTupleInvisible: lock failed because tuple was never visible to us
+ * HeapTupleSelfUpdated: lock failed because tuple updated by self
+ * HeapTupleUpdated: lock failed because tuple updated by other xact
+ * HeapTupleDeleted: lock failed because tuple deleted by other xact
+ * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
+ *
+ * In the failure cases other than HeapTupleInvisible, the routine fills
+ * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
+ * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated,
+ * since we cannot obtain cmax from a combocid generated by another
+ * transaction).
+ * See comments for struct HeapUpdateFailureData for additional info.
+ */
+static HTSU_Result
+heapam_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot,
+               TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
+               LockWaitPolicy wait_policy, uint8 flags,
+               HeapUpdateFailureData *hufd)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   HTSU_Result     result;
+   Buffer          buffer;
+   HeapTuple       tuple = &bslot->base.tupdata;
+
+   hufd->traversed = false;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+retry:
+   result = heap_lock_tuple(relation, tid, cid, mode, wait_policy,
+       (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) ? true : false,
+                            tuple, &buffer, hufd);
+
+   if (result == HeapTupleUpdated &&
+       (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
+   {
+       ReleaseBuffer(buffer);
+       /* Should not encounter speculative tuple on recheck */
+       Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
+
+       if (!ItemPointerEquals(&hufd->ctid, &tuple->t_self))
+       {
+           SnapshotData    SnapshotDirty;
+           TransactionId   priorXmax;
+
+           /* it was updated, so look at the updated version */
+           *tid = hufd->ctid;
+           /* updated row should have xmin matching this xmax */
+           priorXmax = hufd->xmax;
+
+           /*
+            * fetch target tuple
+            *
+            * Loop here to deal with updated or busy tuples
+            */
+           InitDirtySnapshot(SnapshotDirty);
+           for (;;)
+           {
+               if (ItemPointerIndicatesMovedPartitions(tid))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
+
+               if (heap_fetch(relation, tid, &SnapshotDirty, tuple, &buffer, NULL))
+               {
+                   /*
+                    * If xmin isn't what we're expecting, the slot must have been
+                    * recycled and reused for an unrelated tuple.  This implies that
+                    * the latest version of the row was deleted, so we need do
+                    * nothing.  (Should be safe to examine xmin without getting
+                    * buffer's content lock.  We assume reading a TransactionId to be
+                    * atomic, and Xmin never changes in an existing tuple, except to
+                    * invalid or frozen, and neither of those can match priorXmax.)
+                    */
+                   if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+                                            priorXmax))
+                   {
+                       ReleaseBuffer(buffer);
+                       return HeapTupleDeleted;
+                   }
+
+                   /* otherwise xmin should not be dirty... */
+                   if (TransactionIdIsValid(SnapshotDirty.xmin))
+                       elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
+
+                   /*
+                    * If tuple is being updated by other transaction then we have to
+                    * wait for its commit/abort, or die trying.
+                    */
+                   if (TransactionIdIsValid(SnapshotDirty.xmax))
+                   {
+                       ReleaseBuffer(buffer);
+                       switch (wait_policy)
+                       {
+                           case LockWaitBlock:
+                               XactLockTableWait(SnapshotDirty.xmax,
+                                                 relation, &tuple->t_self,
+                                                 XLTW_FetchUpdated);
+                               break;
+                           case LockWaitSkip:
+                               if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
+                                   return result;  /* skip instead of waiting */
+                               break;
+                           case LockWaitError:
+                               if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
+                                   ereport(ERROR,
+                                           (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+                                            errmsg("could not obtain lock on row in relation \"%s\"",
+                                                   RelationGetRelationName(relation))));
+                               break;
+                       }
+                       continue;       /* loop back to repeat heap_fetch */
+                   }
+
+                   /*
+                    * If tuple was inserted by our own transaction, we have to check
+                    * cmin against es_output_cid: cmin >= current CID means our
+                    * command cannot see the tuple, so we should ignore it. Otherwise
+                    * heap_lock_tuple() will throw an error, and so would any later
+                    * attempt to update or delete the tuple.  (We need not check cmax
+                    * because HeapTupleSatisfiesDirty will consider a tuple deleted
+                    * by our transaction dead, regardless of cmax.) We just checked
+                    * that priorXmax == xmin, so we can test that variable instead of
+                    * doing HeapTupleHeaderGetXmin again.
+                    */
+                   if (TransactionIdIsCurrentTransactionId(priorXmax) &&
+                       HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
+                   {
+                       ReleaseBuffer(buffer);
+                       return result;
+                   }
+
+                   hufd->traversed = true;
+                   *tid = tuple->t_data->t_ctid;
+                   ReleaseBuffer(buffer);
+                   goto retry;
+               }
+
+               /*
+                * If the referenced slot was actually empty, the latest version of
+                * the row must have been deleted, so we need do nothing.
+                */
+               if (tuple->t_data == NULL)
+               {
+                   return HeapTupleDeleted;
+               }
+
+               /*
+                * As above, if xmin isn't what we're expecting, do nothing.
+                */
+               if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+                                        priorXmax))
+               {
+                   if (BufferIsValid(buffer))
+                       ReleaseBuffer(buffer);
+                   return HeapTupleDeleted;
+               }
+
+               /*
+                * If we get here, the tuple was found but failed SnapshotDirty.
+                * Assuming the xmin is either a committed xact or our own xact (as it
+                * certainly should be if we're trying to modify the tuple), this must
+                * mean that the row was updated or deleted by either a committed xact
+                * or our own xact.  If it was deleted, we can ignore it; if it was
+                * updated then chain up to the next version and repeat the whole
+                * process.
+                *
+                * As above, it should be safe to examine xmax and t_ctid without the
+                * buffer content lock, because they can't be changing.
+                */
+               if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
+               {
+                   /* deleted, so forget about it */
+                   if (BufferIsValid(buffer))
+                       ReleaseBuffer(buffer);
+                   return HeapTupleDeleted;
+               }
+
+               /* updated, so look at the updated row */
+               *tid = tuple->t_data->t_ctid;
+               /* updated row should have xmin matching this xmax */
+               priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+               if (BufferIsValid(buffer))
+                   ReleaseBuffer(buffer);
+               /* loop back to fetch next in chain */
+           }
+       }
+       else
+       {
+           /* tuple was deleted, so give up */
+           return HeapTupleDeleted;
+       }
+   }
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+   ExecStoreBufferHeapTuple(tuple, slot, buffer);
+   ReleaseBuffer(buffer); // FIXME: invent option to just transfer pin?
+
+   return result;
+}
+
+
+static HTSU_Result
+heapam_heap_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+                  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
+                  bool wait, HeapUpdateFailureData *hufd,
+                  LockTupleMode *lockmode, bool *update_indexes)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+   HTSU_Result result;
+
+   /* Update the tuple with table oid */
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
+                        hufd, lockmode);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+
+   /*
+    * Note: instead of having to update the old index tuples associated with
+    * the heap tuple, all we do is form and insert new index tuples. This is
+    * because UPDATEs are actually DELETEs and INSERTs, and index tuple
+    * deletion is done later by VACUUM (see notes in ExecDelete). All we do
+    * here is insert new index tuples.  -cim 9/27/89
+    */
+
+   /*
+    * insert index entries for tuple
+    *
+    * Note: heap_update returns the tid (location) of the new tuple in the
+    * t_self field.
+    *
+    * If it's a HOT update, we mustn't insert new index entries.
+    */
+   *update_indexes = result == HeapTupleMayBeUpdated &&
+       !HeapTupleIsHeapOnly(tuple);
+
+   if (shouldFree)
+       pfree(tuple);
+
+   return result;
+}
+
+static const TupleTableSlotOps *
+heapam_slot_callbacks(Relation relation)
+{
+   return &TTSOpsBufferHeapTuple;
+}
+
+HeapTuple
+heap_scan_getnext(TableScanDesc sscan, ScanDirection direction)
+{
+   if (unlikely(sscan->rs_rd->rd_rel->relam != HEAP_TABLE_AM_OID))
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+   return heap_getnext(sscan, direction);
+}
+
+static bool
+heapam_satisfies(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   bool res;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+   Assert(BufferIsValid(bslot->buffer));
+
+   /*
+    * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
+    * Caller should be holding pin, but not lock.
+    */
+   LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
+   res = HeapTupleSatisfies(bslot->base.tuple, snapshot, bslot->buffer);
+   LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
+
+   return res;
+}
+
+static IndexFetchTableData*
+heapam_begin_index_fetch(Relation rel)
+{
+   IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
+
+   hscan->xs_base.rel = rel;
+   hscan->xs_cbuf = InvalidBuffer;
+   //hscan->xs_continue_hot = false;
+
+   return &hscan->xs_base;
+}
+
+
+static void
+heapam_reset_index_fetch(IndexFetchTableData* scan)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+
+   if (BufferIsValid(hscan->xs_cbuf))
+   {
+       ReleaseBuffer(hscan->xs_cbuf);
+       hscan->xs_cbuf = InvalidBuffer;
+   }
+
+   //hscan->xs_continue_hot = false;
+}
+
+static void
+heapam_end_index_fetch(IndexFetchTableData* scan)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+
+   heapam_reset_index_fetch(scan);
+
+   pfree(hscan);
+}
+
+static bool
+heapam_fetch_follow(struct IndexFetchTableData *scan,
+                   ItemPointer tid,
+                   Snapshot snapshot,
+                   TupleTableSlot *slot,
+                   bool *call_again, bool *all_dead)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   bool got_heap_tuple;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
+   if (!*call_again)
+   {
+       /* Switch to correct buffer if we don't have it already */
+       Buffer      prev_buf = hscan->xs_cbuf;
+
+       hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
+                                             hscan->xs_base.rel,
+                                             ItemPointerGetBlockNumber(tid));
+
+       /*
+        * Prune page, but only if we weren't already on this page
+        */
+       if (prev_buf != hscan->xs_cbuf)
+           heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+   }
+
+   /* Obtain share-lock on the buffer so we can examine visibility */
+   LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
+   got_heap_tuple = heap_hot_search_buffer(tid,
+                                           hscan->xs_base.rel,
+                                           hscan->xs_cbuf,
+                                           snapshot,
+                                           &bslot->base.tupdata,
+                                           all_dead,
+                                           !*call_again);
+   bslot->base.tupdata.t_self = *tid;
+   LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+   if (got_heap_tuple)
+   {
+       /*
+        * Only in a non-MVCC snapshot can more than one member of the HOT
+        * chain be visible.
+        */
+       *call_again = !IsMVCCSnapshot(snapshot);
+       // FIXME pgstat_count_heap_fetch(scan->indexRelation);
+
+       slot->tts_tableOid = RelationGetRelid(scan->rel);
+       ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
+   }
+   else
+   {
+       /* We've reached the end of the HOT chain. */
+       *call_again = false;
+   }
+
+   return got_heap_tuple;
+}
+
+/*
+ * As above, except that instead of scanning the complete heap, only the given
+ * number of blocks are scanned.  Scan to end-of-rel can be signalled by
+ * passing InvalidBlockNumber as numblocks.  Note that restricting the range
+ * to scan cannot be done when requesting syncscan.
+ *
+ * When "anyvisible" mode is requested, all tuples visible to any transaction
+ * are indexed and counted as live, including those inserted or deleted by
+ * transactions that are still in progress.
+ */
+static double
+IndexBuildHeapRangeScan(Relation heapRelation,
+                       Relation indexRelation,
+                       IndexInfo *indexInfo,
+                       bool allow_sync,
+                       bool anyvisible,
+                       BlockNumber start_blockno,
+                       BlockNumber numblocks,
+                       IndexBuildCallback callback,
+                       void *callback_state,
+                       TableScanDesc sscan)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   bool        is_system_catalog;
+   bool        checking_uniqueness;
+   HeapTuple   heapTuple;
+   Datum       values[INDEX_MAX_KEYS];
+   bool        isnull[INDEX_MAX_KEYS];
+   double      reltuples;
+   ExprState  *predicate;
+   TupleTableSlot *slot;
+   EState     *estate;
+   ExprContext *econtext;
+   Snapshot    snapshot;
+   bool        need_unregister_snapshot = false;
+   TransactionId OldestXmin;
+   BlockNumber root_blkno = InvalidBlockNumber;
+   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+
+   /*
+    * sanity checks
+    */
+   Assert(OidIsValid(indexRelation->rd_rel->relam));
+
+   /* Remember if it's a system catalog */
+   is_system_catalog = IsSystemRelation(heapRelation);
+
+   /* See whether we're verifying uniqueness/exclusion properties */
+   checking_uniqueness = (indexInfo->ii_Unique ||
+                          indexInfo->ii_ExclusionOps != NULL);
+
+   /*
+    * "Any visible" mode is not compatible with uniqueness checks; make sure
+    * only one of those is requested.
+    */
+   Assert(!(anyvisible && checking_uniqueness));
+
+   /*
+    * Need an EState for evaluation of index expressions and partial-index
+    * predicates.  Also a slot to hold the current tuple.
+    */
+   estate = CreateExecutorState();
+   econtext = GetPerTupleExprContext(estate);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
+                                   &TTSOpsHeapTuple);
+
+   /* Arrange for econtext's scan tuple to be the tuple under test */
+   econtext->ecxt_scantuple = slot;
+
+   /* Set up execution state for predicate, if any. */
+   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+   /*
+    * Prepare for scan of the base relation.  In a normal index build, we use
+    * SnapshotAny because we must retrieve all tuples and do our own time
+    * qual checks (because we have to index RECENTLY_DEAD tuples). In a
+    * concurrent build, or during bootstrap, we take a regular MVCC snapshot
+    * and index whatever's live according to that.
+    */
+   OldestXmin = InvalidTransactionId;
+
+   /* okay to ignore lazy VACUUMs here */
+   if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
+       OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
+
+   if (!scan)
+   {
+       /*
+        * Serial index build.
+        *
+        * Must begin our own heap scan in this case.  We may also need to
+        * register a snapshot whose lifetime is under our direct control.
+        */
+       if (!TransactionIdIsValid(OldestXmin))
+       {
+           snapshot = RegisterSnapshot(GetTransactionSnapshot());
+           need_unregister_snapshot = true;
+       }
+       else
+           snapshot = SnapshotAny;
+
+       sscan = table_beginscan_strat(heapRelation, /* relation */
+                                     snapshot, /* snapshot */
+                                     0,    /* number of keys */
+                                     NULL, /* scan key */
+                                     true, /* buffer access strategy OK */
+                                     allow_sync);  /* syncscan OK? */
+       scan = (HeapScanDesc) sscan;
+   }
+   else
+   {
+       /*
+        * Parallel index build.
+        *
+        * Parallel case never registers/unregisters own snapshot.  Snapshot
+        * is taken from parallel heap scan, and is SnapshotAny or an MVCC
+        * snapshot, based on same criteria as serial case.
+        */
+       Assert(!IsBootstrapProcessingMode());
+       Assert(allow_sync);
+       snapshot = scan->rs_scan.rs_snapshot;
+   }
+
+   /*
+    * Must call GetOldestXmin() with SnapshotAny.  Should never call
+    * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
+    * this for parallel builds, since ambuild routines that support parallel
+    * builds must work these details out for themselves.)
+    */
+   Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
+   Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
+          !TransactionIdIsValid(OldestXmin));
+   Assert(snapshot == SnapshotAny || !anyvisible);
+
+   /* set our scan endpoints */
+   if (!allow_sync)
+       table_setscanlimits(sscan, start_blockno, numblocks);
+   else
+   {
+       /* syncscan can only be requested on whole relation */
+       Assert(start_blockno == 0);
+       Assert(numblocks == InvalidBlockNumber);
+   }
+
+   reltuples = 0;
+
+   /*
+    * Scan all tuples in the base relation.
+    */
+   while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL)
+   {
+       bool        tupleIsAlive;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /*
+        * When dealing with a HOT-chain of updated tuples, we want to index
+        * the values of the live tuple (if any), but index it under the TID
+        * of the chain's root tuple.  This approach is necessary to preserve
+        * the HOT-chain structure in the heap. So we need to be able to find
+        * the root item offset for every tuple that's in a HOT-chain.  When
+        * first reaching a new page of the relation, call
+        * heap_get_root_tuples() to build a map of root item offsets on the
+        * page.
+        *
+        * It might look unsafe to use this information across buffer
+        * lock/unlock.  However, we hold ShareLock on the table so no
+        * ordinary insert/update/delete should occur; and we hold pin on the
+        * buffer continuously while visiting the page, so no pruning
+        * operation can occur either.
+        *
+        * Also, although our opinions about tuple liveness could change while
+        * we scan the page (due to concurrent transaction commits/aborts),
+        * the chain root locations won't, so this info doesn't need to be
+        * rebuilt after waiting for another transaction.
+        *
+        * Note the implied assumption that there is no more than one live
+        * tuple per HOT-chain --- else we could create more than one index
+        * entry pointing to the same root tuple.
+        */
+       if (scan->rs_cblock != root_blkno)
+       {
+           Page        page = BufferGetPage(scan->rs_cbuf);
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+           heap_get_root_tuples(page, root_offsets);
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           root_blkno = scan->rs_cblock;
+       }
+
+       if (snapshot == SnapshotAny)
+       {
+           /* do our own time qual check */
+           bool        indexIt;
+           TransactionId xwait;
+
+   recheck:
+
+           /*
+            * We could possibly get away with not locking the buffer here,
+            * since caller should hold ShareLock on the relation, but let's
+            * be conservative about it.  (This remark is still correct even
+            * with HOT-pruning: our pin on the buffer prevents pruning.)
+            */
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+           /*
+            * The criteria for counting a tuple as live in this block need to
+            * match what analyze.c's acquire_sample_rows() does, otherwise
+            * CREATE INDEX and ANALYZE may produce wildly different reltuples
+            * values, e.g. when there are many recently-dead tuples.
+            */
+           switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, scan->rs_cbuf))
+           {
+               case HEAPTUPLE_DEAD:
+                   /* Definitely dead, we can ignore it */
+                   indexIt = false;
+                   tupleIsAlive = false;
+                   break;
+               case HEAPTUPLE_LIVE:
+                   /* Normal case, index and unique-check it */
+                   indexIt = true;
+                   tupleIsAlive = true;
+                   /* Count it as live, too */
+                   reltuples += 1;
+                   break;
+               case HEAPTUPLE_RECENTLY_DEAD:
+
+                   /*
+                    * If tuple is recently deleted then we must index it
+                    * anyway to preserve MVCC semantics.  (Pre-existing
+                    * transactions could try to use the index after we finish
+                    * building it, and may need to see such tuples.)
+                    *
+                    * However, if it was HOT-updated then we must only index
+                    * the live tuple at the end of the HOT-chain.  Since this
+                    * breaks semantics for pre-existing snapshots, mark the
+                    * index as unusable for them.
+                    *
+                    * We don't count recently-dead tuples in reltuples, even
+                    * if we index them; see acquire_sample_rows().
+                    */
+                   if (HeapTupleIsHotUpdated(heapTuple))
+                   {
+                       indexIt = false;
+                       /* mark the index as unsafe for old snapshots */
+                       indexInfo->ii_BrokenHotChain = true;
+                   }
+                   else
+                       indexIt = true;
+                   /* In any case, exclude the tuple from unique-checking */
+                   tupleIsAlive = false;
+                   break;
+               case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+                   /*
+                    * In "anyvisible" mode, this tuple is visible and we
+                    * don't need any further checks.
+                    */
+                   if (anyvisible)
+                   {
+                       indexIt = true;
+                       tupleIsAlive = true;
+                       reltuples += 1;
+                       break;
+                   }
+
+                   /*
+                    * Since caller should hold ShareLock or better, normally
+                    * the only way to see this is if it was inserted earlier
+                    * in our own transaction.  However, it can happen in
+                    * system catalogs, since we tend to release write lock
+                    * before commit there.  Give a warning if neither case
+                    * applies.
+                    */
+                   xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
+                   if (!TransactionIdIsCurrentTransactionId(xwait))
+                   {
+                       if (!is_system_catalog)
+                           elog(WARNING, "concurrent insert in progress within table \"%s\"",
+                                RelationGetRelationName(heapRelation));
+
+                       /*
+                        * If we are performing uniqueness checks, indexing
+                        * such a tuple could lead to a bogus uniqueness
+                        * failure.  In that case we wait for the inserting
+                        * transaction to finish and check again.
+                        */
+                       if (checking_uniqueness)
+                       {
+                           /*
+                            * Must drop the lock on the buffer before we wait
+                            */
+                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                           XactLockTableWait(xwait, heapRelation,
+                                             &heapTuple->t_self,
+                                             XLTW_InsertIndexUnique);
+                           CHECK_FOR_INTERRUPTS();
+                           goto recheck;
+                       }
+                   }
+                   else
+                   {
+                       /*
+                        * For consistency with acquire_sample_rows(), count
+                        * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
+                        * when inserted by our own transaction.
+                        */
+                       reltuples += 1;
+                   }
+
+                   /*
+                    * We must index such tuples, since if the index build
+                    * commits then they're good.
+                    */
+                   indexIt = true;
+                   tupleIsAlive = true;
+                   break;
+               case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+                   /*
+                    * As with INSERT_IN_PROGRESS case, this is unexpected
+                    * unless it's our own deletion or a system catalog; but
+                    * in anyvisible mode, this tuple is visible.
+                    */
+                   if (anyvisible)
+                   {
+                       indexIt = true;
+                       tupleIsAlive = false;
+                       reltuples += 1;
+                       break;
+                   }
+
+                   xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+                   if (!TransactionIdIsCurrentTransactionId(xwait))
+                   {
+                       if (!is_system_catalog)
+                           elog(WARNING, "concurrent delete in progress within table \"%s\"",
+                                RelationGetRelationName(heapRelation));
+
+                       /*
+                        * If we are performing uniqueness checks, assuming
+                        * the tuple is dead could lead to missing a
+                        * uniqueness violation.  In that case we wait for the
+                        * deleting transaction to finish and check again.
+                        *
+                        * Also, if it's a HOT-updated tuple, we should not
+                        * index it but rather the live tuple at the end of
+                        * the HOT-chain.  However, the deleting transaction
+                        * could abort, possibly leaving this tuple as live
+                        * after all, in which case it has to be indexed. The
+                        * only way to know what to do is to wait for the
+                        * deleting transaction to finish and check again.
+                        */
+                       if (checking_uniqueness ||
+                           HeapTupleIsHotUpdated(heapTuple))
+                       {
+                           /*
+                            * Must drop the lock on the buffer before we wait
+                            */
+                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                           XactLockTableWait(xwait, heapRelation,
+                                             &heapTuple->t_self,
+                                             XLTW_InsertIndexUnique);
+                           CHECK_FOR_INTERRUPTS();
+                           goto recheck;
+                       }
+
+                       /*
+                        * Otherwise index it but don't check for uniqueness,
+                        * the same as a RECENTLY_DEAD tuple.
+                        */
+                       indexIt = true;
+
+                       /*
+                        * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
+                        * if they were not deleted by the current
+                        * transaction.  That's what acquire_sample_rows()
+                        * does, and we want the behavior to be consistent.
+                        */
+                       reltuples += 1;
+                   }
+                   else if (HeapTupleIsHotUpdated(heapTuple))
+                   {
+                       /*
+                        * It's a HOT-updated tuple deleted by our own xact.
+                        * We can assume the deletion will commit (else the
+                        * index contents don't matter), so treat the same as
+                        * RECENTLY_DEAD HOT-updated tuples.
+                        */
+                       indexIt = false;
+                       /* mark the index as unsafe for old snapshots */
+                       indexInfo->ii_BrokenHotChain = true;
+                   }
+                   else
+                   {
+                       /*
+                        * It's a regular tuple deleted by our own xact. Index
+                        * it, but don't check for uniqueness nor count in
+                        * reltuples, the same as a RECENTLY_DEAD tuple.
+                        */
+                       indexIt = true;
+                   }
+                   /* In any case, exclude the tuple from unique-checking */
+                   tupleIsAlive = false;
+                   break;
+               default:
+                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+                   indexIt = tupleIsAlive = false; /* keep compiler quiet */
+                   break;
+           }
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           if (!indexIt)
+               continue;
+       }
+       else
+       {
+           /* heap_getnext did the time qual check */
+           tupleIsAlive = true;
+           reltuples += 1;
+       }
+
+       MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+       /* Set up for predicate or expression evaluation */
+       ExecStoreHeapTuple(heapTuple, slot, false);
+
+       /*
+        * In a partial index, discard tuples that don't satisfy the
+        * predicate.
+        */
+       if (predicate != NULL)
+       {
+           if (!ExecQual(predicate, econtext))
+               continue;
+       }
+
+       /*
+        * For the current heap tuple, extract all the attributes we use in
+        * this index, and note which are null.  This also performs evaluation
+        * of any expressions needed.
+        */
+       FormIndexDatum(indexInfo,
+                      slot,
+                      estate,
+                      values,
+                      isnull);
+
+       /*
+        * You'd think we should go ahead and build the index tuple here, but
+        * some index AMs want to do further processing on the data first.  So
+        * pass the values[] and isnull[] arrays, instead.
+        */
+
+       if (HeapTupleIsHeapOnly(heapTuple))
+       {
+           /*
+            * For a heap-only tuple, pretend its TID is that of the root. See
+            * src/backend/access/heap/README.HOT for discussion.
+            */
+           HeapTupleData rootTuple;
+           OffsetNumber offnum;
+
+           rootTuple = *heapTuple;
+           offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
+
+           if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATA_CORRUPTED),
+                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
+                                        ItemPointerGetBlockNumber(&heapTuple->t_self),
+                                        offnum,
+                                        RelationGetRelationName(heapRelation))));
+
+           ItemPointerSetOffsetNumber(&rootTuple.t_self,
+                                      root_offsets[offnum - 1]);
+
+           /* Call the AM's callback routine to process the tuple */
+           callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
+                    callback_state);
+       }
+       else
+       {
+           /* Call the AM's callback routine to process the tuple */
+           callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
+                    callback_state);
+       }
+   }
+
+   table_endscan(sscan);
+
+   /* we can now forget our snapshot, if set and registered by us */
+   if (need_unregister_snapshot)
+       UnregisterSnapshot(snapshot);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   FreeExecutorState(estate);
+
+   /* These may have been pointing to the now-gone estate */
+   indexInfo->ii_ExpressionsState = NIL;
+   indexInfo->ii_PredicateState = NULL;
+
+   return reltuples;
+}
+
+/*
+ * validate_index_heapscan - second table scan for concurrent index build
+ *
+ * This has much code in common with IndexBuildHeapScan, but it's enough
+ * different that it seems cleaner to have two routines not one.
+ */
+static void
+validate_index_heapscan(Relation heapRelation,
+                       Relation indexRelation,
+                       IndexInfo *indexInfo,
+                       Snapshot snapshot,
+                       ValidateIndexState *state)
+{
+   TableScanDesc sscan;
+   HeapScanDesc scan;
+   HeapTuple   heapTuple;
+   Datum       values[INDEX_MAX_KEYS];
+   bool        isnull[INDEX_MAX_KEYS];
+   ExprState  *predicate;
+   TupleTableSlot *slot;
+   EState     *estate;
+   ExprContext *econtext;
+   BlockNumber root_blkno = InvalidBlockNumber;
+   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+   bool        in_index[MaxHeapTuplesPerPage];
+
+   /* state variables for the merge */
+   ItemPointer indexcursor = NULL;
+   ItemPointerData decoded;
+   bool        tuplesort_empty = false;
+
+   /*
+    * sanity checks
+    */
+   Assert(OidIsValid(indexRelation->rd_rel->relam));
+
+   /*
+    * Need an EState for evaluation of index expressions and partial-index
+    * predicates.  Also a slot to hold the current tuple.
+    */
+   estate = CreateExecutorState();
+   econtext = GetPerTupleExprContext(estate);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
+                                   &TTSOpsHeapTuple);
+
+   /* Arrange for econtext's scan tuple to be the tuple under test */
+   econtext->ecxt_scantuple = slot;
+
+   /* Set up execution state for predicate, if any. */
+   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+   /*
+    * Prepare for scan of the base relation.  We need just those tuples
+    * satisfying the passed-in reference snapshot.  We must disable syncscan
+    * here, because it's critical that we read from block zero forward to
+    * match the sorted TIDs.
+    */
+   sscan = table_beginscan_strat(heapRelation, /* relation */
+                                  snapshot,    /* snapshot */
+                                  0,   /* number of keys */
+                                  NULL,    /* scan key */
+                                  true,    /* buffer access strategy OK */
+                                  false);  /* syncscan not OK */
+   scan = (HeapScanDesc) sscan;
+
+   /*
+    * Scan all tuples matching the snapshot.
+    *
+    * PBORKED: Slotify
+    */
+   while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL)
+   {
+       ItemPointer heapcursor = &heapTuple->t_self;
+       ItemPointerData rootTuple;
+       OffsetNumber root_offnum;
+
+       CHECK_FOR_INTERRUPTS();
+
+       state->htups += 1;
+
+       /*
+        * As commented in IndexBuildHeapScan, we should index heap-only
+        * tuples under the TIDs of their root tuples; so when we advance onto
+        * a new heap page, build a map of root item offsets on the page.
+        *
+        * This complicates merging against the tuplesort output: we will
+        * visit the live tuples in order by their offsets, but the root
+        * offsets that we need to compare against the index contents might be
+        * ordered differently.  So we might have to "look back" within the
+        * tuplesort output, but only within the current page.  We handle that
+        * by keeping a bool array in_index[] showing all the
+        * already-passed-over tuplesort output TIDs of the current page. We
+        * clear that array here, when advancing onto a new heap page.
+        */
+       if (scan->rs_cblock != root_blkno)
+       {
+           Page        page = BufferGetPage(scan->rs_cbuf);
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+           heap_get_root_tuples(page, root_offsets);
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           memset(in_index, 0, sizeof(in_index));
+
+           root_blkno = scan->rs_cblock;
+       }
+
+       /* Convert actual tuple TID to root TID */
+       rootTuple = *heapcursor;
+       root_offnum = ItemPointerGetOffsetNumber(heapcursor);
+
+       if (HeapTupleIsHeapOnly(heapTuple))
+       {
+           root_offnum = root_offsets[root_offnum - 1];
+           if (!OffsetNumberIsValid(root_offnum))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATA_CORRUPTED),
+                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
+                                        ItemPointerGetBlockNumber(heapcursor),
+                                        ItemPointerGetOffsetNumber(heapcursor),
+                                        RelationGetRelationName(heapRelation))));
+           ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
+       }
+
+       /*
+        * "merge" by skipping through the index tuples until we find or pass
+        * the current root tuple.
+        */
+       while (!tuplesort_empty &&
+              (!indexcursor ||
+               ItemPointerCompare(indexcursor, &rootTuple) < 0))
+       {
+           Datum       ts_val;
+           bool        ts_isnull;
+
+           if (indexcursor)
+           {
+               /*
+                * Remember index items seen earlier on the current heap page
+                */
+               if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
+                   in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
+           }
+
+           tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
+                                                 &ts_val, &ts_isnull, NULL);
+           Assert(tuplesort_empty || !ts_isnull);
+           if (!tuplesort_empty)
+           {
+               itemptr_decode(&decoded, DatumGetInt64(ts_val));
+               indexcursor = &decoded;
+
+               /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
+#ifndef USE_FLOAT8_BYVAL
+               pfree(DatumGetPointer(ts_val));
+#endif
+           }
+           else
+           {
+               /* Be tidy */
+               indexcursor = NULL;
+           }
+       }
+
+       /*
+        * If the tuplesort has overshot *and* we didn't see a match earlier,
+        * then this tuple is missing from the index, so insert it.
+        */
+       if ((tuplesort_empty ||
+            ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
+           !in_index[root_offnum - 1])
+       {
+           MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+           /* Set up for predicate or expression evaluation */
+           ExecStoreHeapTuple(heapTuple, slot, false);
+
+           /*
+            * In a partial index, discard tuples that don't satisfy the
+            * predicate.
+            */
+           if (predicate != NULL)
+           {
+               if (!ExecQual(predicate, econtext))
+                   continue;
+           }
+
+           /*
+            * For the current heap tuple, extract all the attributes we use
+            * in this index, and note which are null.  This also performs
+            * evaluation of any expressions needed.
+            */
+           FormIndexDatum(indexInfo,
+                          slot,
+                          estate,
+                          values,
+                          isnull);
+
+           /*
+            * You'd think we should go ahead and build the index tuple here,
+            * but some index AMs want to do further processing on the data
+            * first. So pass the values[] and isnull[] arrays, instead.
+            */
+
+           /*
+            * If the tuple is already committed dead, you might think we
+            * could suppress uniqueness checking, but this is no longer true
+            * in the presence of HOT, because the insert is actually a proxy
+            * for a uniqueness check on the whole HOT-chain.  That is, the
+            * tuple we have here could be dead because it was already
+            * HOT-updated, and if so the updating transaction will not have
+            * thought it should insert index entries.  The index AM will
+            * check the whole HOT-chain and correctly detect a conflict if
+            * there is one.
+            */
+
+           index_insert(indexRelation,
+                        values,
+                        isnull,
+                        &rootTuple,
+                        heapRelation,
+                        indexInfo->ii_Unique ?
+                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+                        indexInfo);
+
+           state->tups_inserted += 1;
+       }
+   }
+
+   table_endscan(sscan);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   FreeExecutorState(estate);
+
+   /* These may have been pointing to the now-gone estate */
+   indexInfo->ii_ExpressionsState = NIL;
+   indexInfo->ii_PredicateState = NULL;
+}
+
+static bool
+heapam_scan_bitmap_pagescan(TableScanDesc sscan,
+                           TBMIterateResult *tbmres)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   BlockNumber page = tbmres->blockno;
+   Buffer      buffer;
+   Snapshot    snapshot;
+   int         ntup;
+
+   scan->rs_cindex = 0;
+   scan->rs_ntuples = 0;
+
+   /*
+    * Ignore any claimed entries past what we think is the end of the
+    * relation.  (This is probably not necessary given that we got at
+    * least AccessShareLock on the table before performing any of the
+    * indexscans, but let's be safe.)
+    */
+   if (page >= scan->rs_scan.rs_nblocks)
+       return false;
+
+   scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
+                                                scan->rs_scan.rs_rd,
+                                                page);
+   scan->rs_cblock = page;
+   buffer = scan->rs_cbuf;
+   snapshot = scan->rs_scan.rs_snapshot;
+
+   ntup = 0;
+
+   /*
+    * Prune and repair fragmentation for the whole page, if possible.
+    */
+   heap_page_prune_opt(scan->rs_scan.rs_rd, buffer);
+
+   /*
+    * We must hold share lock on the buffer content while examining tuple
+    * visibility.  Afterwards, however, the tuples we have found to be
+    * visible are guaranteed good as long as we hold the buffer pin.
+    */
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+   /*
+    * We need two separate strategies for lossy and non-lossy cases.
+    */
+   if (tbmres->ntuples >= 0)
+   {
+       /*
+        * Bitmap is non-lossy, so we just look through the offsets listed in
+        * tbmres; but we have to follow any HOT chain starting at each such
+        * offset.
+        */
+       int         curslot;
+
+       for (curslot = 0; curslot < tbmres->ntuples; curslot++)
+       {
+           OffsetNumber offnum = tbmres->offsets[curslot];
+           ItemPointerData tid;
+           HeapTupleData heapTuple;
+
+           ItemPointerSet(&tid, page, offnum);
+           if (heap_hot_search_buffer(&tid, sscan->rs_rd, buffer, snapshot,
+                                      &heapTuple, NULL, true))
+               scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
+       }
+   }
+   else
+   {
+       /*
+        * Bitmap is lossy, so we must examine each item pointer on the page.
+        * But we can ignore HOT chains, since we'll check each tuple anyway.
+        */
+       Page        dp = (Page) BufferGetPage(buffer);
+       OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
+       OffsetNumber offnum;
+
+       for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
+       {
+           ItemId      lp;
+           HeapTupleData loctup;
+           bool        valid;
+
+           lp = PageGetItemId(dp, offnum);
+           if (!ItemIdIsNormal(lp))
+               continue;
+           loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+           loctup.t_len = ItemIdGetLength(lp);
+           loctup.t_tableOid = scan->rs_scan.rs_rd->rd_id;
+           ItemPointerSet(&loctup.t_self, page, offnum);
+           valid = HeapTupleSatisfies(&loctup, snapshot, buffer);
+           if (valid)
+           {
+               scan->rs_vistuples[ntup++] = offnum;
+               PredicateLockTuple(scan->rs_scan.rs_rd, &loctup, snapshot);
+           }
+           CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup,
+                                           buffer, snapshot);
+       }
+   }
+
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+   Assert(ntup <= MaxHeapTuplesPerPage);
+   scan->rs_ntuples = ntup;
+
+   return ntup > 0;
+}
+
+static bool
+heapam_scan_bitmap_pagescan_next(TableScanDesc sscan, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   OffsetNumber targoffset;
+   Page        dp;
+   ItemId      lp;
+
+   if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
+       return false;
+
+   targoffset = scan->rs_vistuples[scan->rs_cindex];
+   dp = (Page) BufferGetPage(scan->rs_cbuf);
+   lp = PageGetItemId(dp, targoffset);
+   Assert(ItemIdIsNormal(lp));
+
+   scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+   scan->rs_ctup.t_len = ItemIdGetLength(lp);
+   scan->rs_ctup.t_tableOid = scan->rs_scan.rs_rd->rd_id;
+   ItemPointerSet(&scan->rs_ctup.t_self, scan->rs_cblock, targoffset);
+
+   pgstat_count_heap_fetch(scan->rs_scan.rs_rd);
+
+   /*
+    * Set up the result slot to point to this tuple.  Note that the
+    * slot acquires a pin on the buffer.
+    */
+   ExecStoreBufferHeapTuple(&scan->rs_ctup,
+                            slot,
+                            scan->rs_cbuf);
+
+   scan->rs_cindex++;
+
+   return true;
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer,
+                  HeapTuple tuple,
+                  OffsetNumber tupoffset)
+{
+   if (scan->rs_scan.rs_pageatatime)
+   {
+       /*
+        * In pageatatime mode, heapgetpage() already did visibility checks,
+        * so just look at the info it left in rs_vistuples[].
+        *
+        * We use a binary search over the known-sorted array.  Note: we could
+        * save some effort if we insisted that NextSampleTuple select tuples
+        * in increasing order, but it's not clear that there would be enough
+        * gain to justify the restriction.
+        */
+       int         start = 0,
+                   end = scan->rs_ntuples - 1;
+
+       while (start <= end)
+       {
+           int         mid = (start + end) / 2;
+           OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+           if (tupoffset == curoffset)
+               return true;
+           else if (tupoffset < curoffset)
+               end = mid - 1;
+           else
+               start = mid + 1;
+       }
+
+       return false;
+   }
+   else
+   {
+       /* Otherwise, we have to check the tuple individually. */
+       return HeapTupleSatisfies(tuple, scan->rs_scan.rs_snapshot, buffer);
+   }
+}
+
+static bool
+heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno;
+
+   /* return false immediately if relation is empty */
+   if (scan->rs_scan.rs_nblocks == 0)
+       return false;
+
+   if (tsm->NextSampleBlock)
+   {
+       blockno = tsm->NextSampleBlock(scanstate, scan->rs_scan.rs_nblocks);
+       scan->rs_cblock = blockno;
+   }
+   else
+   {
+       /* scanning table sequentially */
+
+       if (scan->rs_cblock == InvalidBlockNumber)
+       {
+           Assert(!scan->rs_inited);
+           blockno = scan->rs_scan.rs_startblock;
+       }
+       else
+       {
+           Assert(scan->rs_inited);
+
+           blockno = scan->rs_cblock + 1;
+
+           if (blockno >= scan->rs_scan.rs_nblocks)
+           {
+               /* wrap to begining of rel, might not have started at 0 */
+               blockno = 0;
+           }
+
+           /*
+            * Report our new scan position for synchronization purposes.
+            *
+            * Note: we do this before checking for end of scan so that the
+            * final state of the position hint is back at the start of the
+            * rel.  That's not strictly necessary, but otherwise when you run
+            * the same query multiple times the starting position would shift
+            * a little bit backwards on every invocation, which is confusing.
+            * We don't guarantee any specific ordering in general, though.
+            */
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, blockno);
+
+           if (blockno == scan->rs_scan.rs_startblock)
+           {
+               blockno = InvalidBlockNumber;
+           }
+       }
+   }
+
+   if (!BlockNumberIsValid(blockno))
+   {
+       if (BufferIsValid(scan->rs_cbuf))
+           ReleaseBuffer(scan->rs_cbuf);
+       scan->rs_cbuf = InvalidBuffer;
+       scan->rs_cblock = InvalidBlockNumber;
+       scan->rs_inited = false;
+
+       return false;
+   }
+
+   heapgetpage(sscan, blockno);
+   scan->rs_inited = true;
+
+   return true;
+}
+
+static bool
+heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno = scan->rs_cblock;
+   bool        pagemode = scan->rs_scan.rs_pageatatime;
+
+   Page        page;
+   bool        all_visible;
+   OffsetNumber maxoffset;
+
+   ExecClearTuple(slot);
+
+   /*
+    * When not using pagemode, we must lock the buffer during tuple
+    * visibility checks.
+    */
+   if (!pagemode)
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+   page = (Page) BufferGetPage(scan->rs_cbuf);
+   all_visible = PageIsAllVisible(page) && !scan->rs_scan.rs_snapshot->takenDuringRecovery;
+   maxoffset = PageGetMaxOffsetNumber(page);
+
+   for (;;)
+   {
+       OffsetNumber tupoffset;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /* Ask the tablesample method which tuples to check on this page. */
+       tupoffset = tsm->NextSampleTuple(scanstate,
+                                        blockno,
+                                        maxoffset);
+
+       if (OffsetNumberIsValid(tupoffset))
+       {
+           ItemId      itemid;
+           bool        visible;
+           HeapTuple   tuple = &(scan->rs_ctup);
+
+           /* Skip invalid tuple pointers. */
+           itemid = PageGetItemId(page, tupoffset);
+           if (!ItemIdIsNormal(itemid))
+               continue;
+
+           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+           tuple->t_len = ItemIdGetLength(itemid);
+           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+
+           if (all_visible)
+               visible = true;
+           else
+               visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset);
+
+           /* in pagemode, heapgetpage did this for us */
+           if (!pagemode)
+               CheckForSerializableConflictOut(visible, scan->rs_scan.rs_rd, tuple,
+                                               scan->rs_cbuf, scan->rs_scan.rs_snapshot);
+
+           /* Try next tuple from same page. */
+           if (!visible)
+               continue;
+
+           ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf);
+
+           /* Found visible tuple, return it. */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           /* Count successfully-fetched tuples as heap fetches */
+           pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+           return true;
+       }
+       else
+       {
+           /*
+            * If we get here, it means we've exhausted the items on this page and
+            * it's time to move to the next.
+            */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           break;
+       }
+   }
+
+   return false;
+}
+
+static void
+heapam_scan_analyze_next_block(TableScanDesc sscan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   /*
+    * We must maintain a pin on the target page's buffer to ensure that
+    * the maxoffset value stays good (else concurrent VACUUM might delete
+    * tuples out from under us).  Hence, pin the page until we are done
+    * looking at it.  We also choose to hold sharelock on the buffer
+    * throughout --- we could release and re-acquire sharelock for each
+    * tuple, but since we aren't doing much work per tuple, the extra
+    * lock traffic is probably better avoided.
+    */
+   scan->rs_cblock = blockno;
+   scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, blockno,
+                                      RBM_NORMAL, bstrategy);
+   scan->rs_cindex = FirstOffsetNumber;
+   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+}
+
+static bool
+heapam_scan_analyze_next_tuple(TableScanDesc sscan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   Page        targpage;
+   OffsetNumber maxoffset;
+   BufferHeapTupleTableSlot *hslot;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   hslot = (BufferHeapTupleTableSlot *) slot;
+   targpage = BufferGetPage(scan->rs_cbuf);
+   maxoffset = PageGetMaxOffsetNumber(targpage);
+
+   /* Inner loop over all tuples on the selected page */
+   for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
+   {
+       ItemId      itemid;
+       HeapTuple   targtuple = &hslot->base.tupdata;
+       bool        sample_it = false;
+
+       itemid = PageGetItemId(targpage, scan->rs_cindex);
+
+       /*
+        * We ignore unused and redirect line pointers.  DEAD line
+        * pointers should be counted as dead, because we need vacuum to
+        * run to get rid of them.  Note that this rule agrees with the
+        * way that heap_page_prune() counts things.
+        */
+       if (!ItemIdIsNormal(itemid))
+       {
+           if (ItemIdIsDead(itemid))
+               *deadrows += 1;
+           continue;
+       }
+
+       ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
+
+       targtuple->t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd);
+       targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
+       targtuple->t_len = ItemIdGetLength(itemid);
+
+       switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, scan->rs_cbuf))
+       {
+           case HEAPTUPLE_LIVE:
+               sample_it = true;
+               *liverows += 1;
+               break;
+
+           case HEAPTUPLE_DEAD:
+           case HEAPTUPLE_RECENTLY_DEAD:
+               /* Count dead and recently-dead rows */
+               *deadrows += 1;
+               break;
+
+           case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+               /*
+                * Insert-in-progress rows are not counted.  We assume
+                * that when the inserting transaction commits or aborts,
+                * it will send a stats message to increment the proper
+                * count.  This works right only if that transaction ends
+                * after we finish analyzing the table; if things happen
+                * in the other order, its stats update will be
+                * overwritten by ours.  However, the error will be large
+                * only if the other transaction runs long enough to
+                * insert many tuples, so assuming it will finish after us
+                * is the safer option.
+                *
+                * A special case is that the inserting transaction might
+                * be our own.  In this case we should count and sample
+                * the row, to accommodate users who load a table and
+                * analyze it in one transaction.  (pgstat_report_analyze
+                * has to adjust the numbers we send to the stats
+                * collector to make this come out right.)
+                */
+               if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
+               {
+                   sample_it = true;
+                   *liverows += 1;
+               }
+               break;
+
+           case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+               /*
+                * We count delete-in-progress rows as still live, using
+                * the same reasoning given above; but we don't bother to
+                * include them in the sample.
+                *
+                * If the delete was done by our own transaction, however,
+                * we must count the row as dead to make
+                * pgstat_report_analyze's stats adjustments come out
+                * right.  (Note: this works out properly when the row was
+                * both inserted and deleted in our xact.)
+                */
+               if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
+                   *deadrows += 1;
+               else
+                   *liverows += 1;
+               break;
+
+           default:
+               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+               break;
+       }
+
+       if (sample_it)
+       {
+           ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+           scan->rs_cindex++;
+
+           /* note that we leave the buffer locked here! */
+           return true;
+       }
+   }
+
+   /* Now release the lock and pin on the page */
+   UnlockReleaseBuffer(scan->rs_cbuf);
+   scan->rs_cbuf = InvalidBuffer;
+
+   return false;
+}
+
+/*
+ * Reconstruct and rewrite the given tuple
+ *
+ * We cannot simply copy the tuple as-is, for several reasons:
+ *
+ * 1. We'd like to squeeze out the values of any dropped columns, both
+ * to save space and to ensure we have no corner-case failures. (It's
+ * possible for example that the new table hasn't got a TOAST table
+ * and so is unable to store any large values of dropped cols.)
+ *
+ * 2. The tuple might not even be legal for the new table; this is
+ * currently only known to happen as an after-effect of ALTER TABLE
+ * SET WITHOUT OIDS.
+ *
+ * So, we must reconstruct the tuple from component Datums.
+ */
+static void
+reform_and_rewrite_tuple(HeapTuple tuple,
+                        Relation OldHeap, Relation NewHeap,
+                        Datum *values, bool *isnull, RewriteState rwstate)
+{
+   TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
+   TupleDesc newTupDesc = RelationGetDescr(NewHeap);
+   HeapTuple   copiedTuple;
+   int         i;
+
+   heap_deform_tuple(tuple, oldTupDesc, values, isnull);
+
+   /* Be sure to null out any dropped columns */
+   for (i = 0; i < newTupDesc->natts; i++)
+   {
+       if (TupleDescAttr(newTupDesc, i)->attisdropped)
+           isnull[i] = true;
+   }
+
+   copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
+
+   /* The heap rewrite module does the rest */
+   rewrite_heap_tuple(rwstate, tuple, copiedTuple);
+
+   heap_freetuple(copiedTuple);
+}
+
+static void
+heap_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex,
+                    bool use_sort,
+                    TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                    double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
+{
+   RewriteState rwstate;
+   IndexScanDesc indexScan;
+   TableScanDesc heapScan;
+   bool        use_wal;
+   bool        is_system_catalog;
+   Tuplesortstate *tuplesort;
+   TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
+   TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
+   TupleTableSlot *slot;
+   int         natts;
+   Datum      *values;
+   bool       *isnull;
+   BufferHeapTupleTableSlot *hslot;
+
+   /* Remember if it's a system catalog */
+   is_system_catalog = IsSystemRelation(OldHeap);
+
+   /*
+    * We need to log the copied data in WAL iff WAL archiving/streaming is
+    * enabled AND it's a WAL-logged rel.
+    */
+   use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
+
+   /* use_wal off requires smgr_targblock be initially invalid */
+   Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
+
+   /* Preallocate values/isnull arrays */
+   natts = newTupDesc->natts;
+   values = (Datum *) palloc(natts * sizeof(Datum));
+   isnull = (bool *) palloc(natts * sizeof(bool));
+
+   /* Initialize the rewrite operation */
+   rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
+                                MultiXactCutoff, use_wal);
+
+
+   /* Set up sorting if wanted */
+   if (use_sort)
+       tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
+                                           maintenance_work_mem,
+                                           NULL, false);
+   else
+       tuplesort = NULL;
+
+   /*
+    * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
+    * that still need to be copied, we scan with SnapshotAny and use
+    * HeapTupleSatisfiesVacuum for the visibility test.
+    */
+   if (OldIndex != NULL && !use_sort)
+   {
+       heapScan = NULL;
+       indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
+       index_rescan(indexScan, NULL, 0, NULL, 0);
+   }
+   else
+   {
+       heapScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
+       indexScan = NULL;
+   }
+
+   slot = table_gimmegimmeslot(OldHeap, NULL);
+   hslot = (BufferHeapTupleTableSlot *) slot;
+
+   /*
+    * Scan through the OldHeap, either in OldIndex order or sequentially;
+    * copy each tuple into the NewHeap, or transiently to the tuplesort
+    * module.  Note that we don't bother sorting dead tuples (they won't get
+    * to the new table anyway).
+    */
+   for (;;)
+   {
+       bool        isdead;
+       TransactionId xid;
+
+       CHECK_FOR_INTERRUPTS();
+
+       if (indexScan != NULL)
+       {
+           if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
+               break;
+
+           /* Since we used no scan keys, should never need to recheck */
+           if (indexScan->xs_recheck)
+               elog(ERROR, "CLUSTER does not support lossy index conditions");
+       }
+       else
+       {
+           if (!table_scan_getnextslot(heapScan, ForwardScanDirection, slot))
+               break;
+       }
+
+       LockBuffer(hslot->buffer, BUFFER_LOCK_SHARE);
+
+       switch (HeapTupleSatisfiesVacuum(hslot->base.tuple, OldestXmin, hslot->buffer))
+       {
+           case HEAPTUPLE_DEAD:
+               /* Definitely dead */
+               isdead = true;
+               break;
+           case HEAPTUPLE_RECENTLY_DEAD:
+               *tups_recently_dead += 1;
+               /* fall through */
+           case HEAPTUPLE_LIVE:
+               /* Live or recently dead, must copy it */
+               isdead = false;
+               break;
+           case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+               /*
+                * Since we hold exclusive lock on the relation, normally the
+                * only way to see this is if it was inserted earlier in our
+                * own transaction.  However, it can happen in system
+                * catalogs, since we tend to release write lock before commit
+                * there.  Give a warning if neither case applies; but in any
+                * case we had better copy it.
+                */
+               xid = HeapTupleHeaderGetXmin(hslot->base.tuple->t_data);
+               if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid))
+                   elog(WARNING, "concurrent insert in progress within table \"%s\"",
+                        RelationGetRelationName(OldHeap));
+               /* treat as live */
+               isdead = false;
+               break;
+           case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+               /*
+                * Similar situation to INSERT_IN_PROGRESS case.
+                */
+               xid = HeapTupleHeaderGetUpdateXid(hslot->base.tuple->t_data);
+               if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid))
+                   elog(WARNING, "concurrent delete in progress within table \"%s\"",
+                        RelationGetRelationName(OldHeap));
+               /* treat as recently dead */
+               *tups_recently_dead += 1;
+               isdead = false;
+               break;
+           default:
+               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+               isdead = false; /* keep compiler quiet */
+               break;
+       }
+
+       LockBuffer(hslot->buffer, BUFFER_LOCK_UNLOCK);
+
+       if (isdead)
+       {
+           *tups_vacuumed += 1;
+           /* heap rewrite module still needs to see it... */
+           if (rewrite_heap_dead_tuple(rwstate, ExecFetchSlotHeapTuple(slot, false, NULL)))
+           {
+               /* A previous recently-dead tuple is now known dead */
+               *tups_vacuumed += 1;
+               *tups_recently_dead -= 1;
+           }
+           continue;
+       }
+
+       *num_tuples += 1;
+       if (tuplesort != NULL)
+           tuplesort_puttupleslot(tuplesort, slot);
+       else
+           reform_and_rewrite_tuple(ExecFetchSlotHeapTuple(slot, false, NULL),
+                                    OldHeap, NewHeap,
+                                    values, isnull, rwstate);
+   }
+
+   if (indexScan != NULL)
+       index_endscan(indexScan);
+   if (heapScan != NULL)
+       table_endscan(heapScan);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   /*
+    * In scan-and-sort mode, complete the sort, then read out all live tuples
+    * from the tuplestore and write them to the new relation.
+    */
+   if (tuplesort != NULL)
+   {
+       tuplesort_performsort(tuplesort);
+
+       for (;;)
+       {
+           HeapTuple   tuple;
+
+           CHECK_FOR_INTERRUPTS();
+
+           tuple = tuplesort_getheaptuple(tuplesort, true);
+           if (tuple == NULL)
+               break;
+
+           reform_and_rewrite_tuple(tuple,
+                                    OldHeap, NewHeap,
+                                    values, isnull, rwstate);
+       }
+
+       tuplesort_end(tuplesort);
+   }
+
+   /* Write out any remaining tuples, and fsync if needed */
+   end_heap_rewrite(rwstate);
+
+   /* Clean up */
+   pfree(values);
+   pfree(isnull);
+}
+
+static const TableAmRoutine heapam_methods = {
+   .type = T_TableAmRoutine,
+
+   .slot_callbacks = heapam_slot_callbacks,
+
+   .snapshot_satisfies = heapam_satisfies,
+
+   .scan_begin = heap_beginscan,
+   .scansetlimits = heap_setscanlimits,
+   .scan_getnextslot = heap_getnextslot,
+   .scan_end = heap_endscan,
+   .scan_rescan = heap_rescan,
+   .scan_update_snapshot = heap_update_snapshot,
+
+   .scan_bitmap_pagescan = heapam_scan_bitmap_pagescan,
+   .scan_bitmap_pagescan_next = heapam_scan_bitmap_pagescan_next,
+
+   .scan_sample_next_block = heapam_scan_sample_next_block,
+   .scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+   .tuple_fetch_row_version = heapam_fetch_row_version,
+   .tuple_fetch_follow = heapam_fetch_follow,
+   .tuple_insert = heapam_heap_insert,
+   .tuple_insert_speculative = heapam_heap_insert_speculative,
+   .tuple_complete_speculative = heapam_heap_complete_speculative,
+   .tuple_delete = heapam_heap_delete,
+   .tuple_update = heapam_heap_update,
+   .tuple_lock = heapam_lock_tuple,
+   .multi_insert = heap_multi_insert,
+
+   .tuple_get_latest_tid = heap_get_latest_tid,
+
+   .relation_vacuum = heap_vacuum_rel,
+   .scan_analyze_next_block = heapam_scan_analyze_next_block,
+   .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
+   .relation_copy_for_cluster = heap_copy_for_cluster,
+   .relation_sync = heap_sync,
+
+   .begin_index_fetch = heapam_begin_index_fetch,
+   .reset_index_fetch = heapam_reset_index_fetch,
+   .end_index_fetch = heapam_end_index_fetch,
+
+   .index_build_range_scan = IndexBuildHeapRangeScan,
+
+   .index_validate_scan = validate_index_heapscan
+};
+
+const TableAmRoutine *
+GetHeapamTableAmRoutine(void)
+{
+   return &heapam_methods;
+}
+
+Datum
+heap_tableam_handler(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(&heapam_methods);
+}
similarity index 96%
rename from src/backend/utils/time/tqual.c
rename to src/backend/access/heap/heapam_visibility.c
index f7c4c9188ce2dd66a67d62cc91c1c18e4fd7518e..1ac1a20c1dc7b0846f7feedb2ed5e2e8d447ece1 100644 (file)
@@ -1,7 +1,6 @@
 /*-------------------------------------------------------------------------
  *
- * tqual.c
- *   POSTGRES "time qualification" code, ie, tuple visibility rules.
+ * POSTGRES "time qualification" code, ie, tuple visibility rules.
  *
  * NOTE: all the HeapTupleSatisfies routines will update the tuple's
  * "hint" status bits if we see that the inserting or deleting transaction
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   src/backend/utils/time/tqual.c
+ *   src/backend/access/heap/heapam_visibilty.c
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
 #include "utils/snapmgr.h"
 #include "utils/tqual.h"
 
-
 /* Static variables representing various special snapshot semantics */
-SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf};
-SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny};
-
+SnapshotData SnapshotSelfData = {SELF_VISIBILITY};
+SnapshotData SnapshotAnyData = {ANY_VISIBILITY};
 
 /*
  * SetHintBits()
@@ -117,6 +115,9 @@ static inline void
 SetHintBits(HeapTupleHeader tuple, Buffer buffer,
            uint16 infomask, TransactionId xid)
 {
+   if (!BufferIsValid(buffer))
+       return;
+
    if (TransactionIdIsValid(xid))
    {
        /* NB: xid must be known committed here! */
@@ -172,7 +173,7 @@ HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
  *         (Xmax != my-transaction &&          the row was deleted by another transaction
  *          Xmax is not committed)))           that has not been committed
  */
-bool
+static bool
 HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
 {
    HeapTupleHeader tuple = htup->t_data;
@@ -342,7 +343,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
  * HeapTupleSatisfiesAny
  *     Dummy "satisfies" routine: any tuple satisfies SnapshotAny.
  */
-bool
+static bool
 HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
 {
    return true;
@@ -362,7 +363,7 @@ HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
  * Among other things, this means you can't do UPDATEs of rows in a TOAST
  * table.
  */
-bool
+static bool
 HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
                        Buffer buffer)
 {
@@ -612,7 +613,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
    {
        if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
            return HeapTupleMayBeUpdated;
-       return HeapTupleUpdated;    /* updated by other */
+       /* updated by other */
+       if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+           return HeapTupleDeleted;
+       else
+           return HeapTupleUpdated;
    }
 
    if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
@@ -653,7 +658,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
            return HeapTupleBeingUpdated;
 
        if (TransactionIdDidCommit(xmax))
-           return HeapTupleUpdated;
+       {
+           if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+               return HeapTupleDeleted;
+           else
+               return HeapTupleUpdated;
+       }
 
        /*
         * By here, the update in the Xmax is either aborted or crashed, but
@@ -709,7 +719,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
 
    SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
                HeapTupleHeaderGetRawXmax(tuple));
-   return HeapTupleUpdated;    /* updated by other */
+
+   /* updated by other */
+   if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+       return HeapTupleDeleted;
+   else
+       return HeapTupleUpdated;
 }
 
 /*
@@ -735,7 +750,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
  * on the insertion without aborting the whole transaction, the associated
  * token is also returned in snapshot->speculativeToken.
  */
-bool
+static bool
 HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
                        Buffer buffer)
 {
@@ -959,7 +974,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
  * inserting/deleting transaction was still running --- which was more cycles
  * and more contention on the PGXACT array.
  */
-bool
+static bool
 HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
                       Buffer buffer)
 {
@@ -1161,9 +1176,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
  * even if we see that the deleting transaction has committed.
  */
 HTSV_Result
-HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
+HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
                         Buffer buffer)
 {
+   HeapTuple   htup = (HeapTuple) stup;
    HeapTupleHeader tuple = htup->t_data;
 
    Assert(ItemPointerIsValid(&htup->t_self));
@@ -1383,18 +1399,17 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
    return HEAPTUPLE_DEAD;
 }
 
-
 /*
  * HeapTupleSatisfiesNonVacuumable
  *
- * True if tuple might be visible to some transaction; false if it's
- * surely dead to everyone, ie, vacuumable.
+ *     True if tuple might be visible to some transaction; false if it's
+ *     surely dead to everyone, ie, vacuumable.
  *
- * This is an interface to HeapTupleSatisfiesVacuum that meets the
- * SnapshotSatisfiesFunc API, so it can be used through a Snapshot.
- * snapshot->xmin must have been set up with the xmin horizon to use.
+ *     This is an interface to HeapTupleSatisfiesVacuum that meets the
+ *     SnapshotSatisfiesFunc API, so it can be used through a Snapshot.
+ *     snapshot->xmin must have been set up with the xmin horizon to use.
  */
-bool
+static bool
 HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot,
                                Buffer buffer)
 {
@@ -1402,65 +1417,59 @@ HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot,
        != HEAPTUPLE_DEAD;
 }
 
-
 /*
- * HeapTupleIsSurelyDead
+ * Is the tuple really only locked?  That is, is it not updated?
  *
- * Cheaply determine whether a tuple is surely dead to all onlookers.
- * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
- * tuple has just been tested by another visibility routine (usually
- * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
- * should already be set.  We assume that if no hint bits are set, the xmin
- * or xmax transaction is still running.  This is therefore faster than
- * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
- * It's okay to return false when in doubt, but we must return true only
- * if the tuple is removable.
+ * It's easy to check just infomask bits if the locker is not a multi; but
+ * otherwise we need to verify that the updating transaction has not aborted.
+ *
+ * This function is here because it follows the same time qualification rules
+ * laid out at the top of this file.
  */
 bool
-HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
+HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
 {
-   HeapTupleHeader tuple = htup->t_data;
+   TransactionId xmax;
 
-   Assert(ItemPointerIsValid(&htup->t_self));
-   Assert(htup->t_tableOid != InvalidOid);
+   /* if there's no valid Xmax, then there's obviously no update either */
+   if (tuple->t_infomask & HEAP_XMAX_INVALID)
+       return true;
 
-   /*
-    * If the inserting transaction is marked invalid, then it aborted, and
-    * the tuple is definitely dead.  If it's marked neither committed nor
-    * invalid, then we assume it's still alive (since the presumption is that
-    * all relevant hint bits were just set moments ago).
-    */
-   if (!HeapTupleHeaderXminCommitted(tuple))
-       return HeapTupleHeaderXminInvalid(tuple) ? true : false;
+   if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
+       return true;
 
-   /*
-    * If the inserting transaction committed, but any deleting transaction
-    * aborted, the tuple is still alive.
-    */
-   if (tuple->t_infomask & HEAP_XMAX_INVALID)
-       return false;
+   /* invalid xmax means no update */
+   if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+       return true;
 
    /*
-    * If the XMAX is just a lock, the tuple is still alive.
+    * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
+    * necessarily have been updated
     */
-   if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+   if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
        return false;
 
-   /*
-    * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
-    * know without checking pg_multixact.
-    */
-   if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
-       return false;
+   /* ... but if it's a multi, then perhaps the updating Xid aborted. */
+   xmax = HeapTupleGetUpdateXid(tuple);
 
-   /* If deleter isn't known to have committed, assume it's still running. */
-   if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
+   /* not LOCKED_ONLY, so it has to have an xmax */
+   Assert(TransactionIdIsValid(xmax));
+
+   if (TransactionIdIsCurrentTransactionId(xmax))
+       return false;
+   if (TransactionIdIsInProgress(xmax))
+       return false;
+   if (TransactionIdDidCommit(xmax))
        return false;
 
-   /* Deleter committed, so tuple is dead if the XID is old enough. */
-   return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
+   /*
+    * not current, not in progress, not committed -- must have aborted or
+    * crashed
+    */
+   return true;
 }
 
+
 /*
  * XidInMVCCSnapshot
  *     Is the given XID still-in-progress according to the snapshot?
@@ -1584,55 +1593,61 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
 }
 
 /*
- * Is the tuple really only locked?  That is, is it not updated?
- *
- * It's easy to check just infomask bits if the locker is not a multi; but
- * otherwise we need to verify that the updating transaction has not aborted.
+ * HeapTupleIsSurelyDead
  *
- * This function is here because it follows the same time qualification rules
- * laid out at the top of this file.
+ * Cheaply determine whether a tuple is surely dead to all onlookers.
+ * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
+ * tuple has just been tested by another visibility routine (usually
+ * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
+ * should already be set.  We assume that if no hint bits are set, the xmin
+ * or xmax transaction is still running.  This is therefore faster than
+ * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
+ * It's okay to return false when in doubt, but we must return TRUE only
+ * if the tuple is removable.
  */
 bool
-HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
+HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
 {
-   TransactionId xmax;
-
-   /* if there's no valid Xmax, then there's obviously no update either */
-   if (tuple->t_infomask & HEAP_XMAX_INVALID)
-       return true;
+   HeapTupleHeader tuple = htup->t_data;
 
-   if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
-       return true;
+   Assert(ItemPointerIsValid(&htup->t_self));
+   Assert(htup->t_tableOid != InvalidOid);
 
-   /* invalid xmax means no update */
-   if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
-       return true;
+   /*
+    * If the inserting transaction is marked invalid, then it aborted, and
+    * the tuple is definitely dead.  If it's marked neither committed nor
+    * invalid, then we assume it's still alive (since the presumption is that
+    * all relevant hint bits were just set moments ago).
+    */
+   if (!HeapTupleHeaderXminCommitted(tuple))
+       return HeapTupleHeaderXminInvalid(tuple) ? true : false;
 
    /*
-    * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
-    * necessarily have been updated
+    * If the inserting transaction committed, but any deleting transaction
+    * aborted, the tuple is still alive.
     */
-   if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+   if (tuple->t_infomask & HEAP_XMAX_INVALID)
        return false;
 
-   /* ... but if it's a multi, then perhaps the updating Xid aborted. */
-   xmax = HeapTupleGetUpdateXid(tuple);
-
-   /* not LOCKED_ONLY, so it has to have an xmax */
-   Assert(TransactionIdIsValid(xmax));
-
-   if (TransactionIdIsCurrentTransactionId(xmax))
-       return false;
-   if (TransactionIdIsInProgress(xmax))
-       return false;
-   if (TransactionIdDidCommit(xmax))
+   /*
+    * If the XMAX is just a lock, the tuple is still alive.
+    */
+   if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
        return false;
 
    /*
-    * not current, not in progress, not committed -- must have aborted or
-    * crashed
+    * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
+    * know without checking pg_multixact.
     */
-   return true;
+   if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+       return false;
+
+   /* If deleter isn't known to have committed, assume it's still running. */
+   if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
+       return false;
+
+   /* Deleter committed, so tuple is dead if the XID is old enough. */
+   return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
 }
 
 /*
@@ -1659,7 +1674,7 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
  * dangerous to do so as the semantics of doing so during timetravel are more
  * complicated than when dealing "only" with the present.
  */
-bool
+static bool
 HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
                               Buffer buffer)
 {
@@ -1796,3 +1811,34 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
    else
        return true;
 }
+
+bool
+HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer)
+{
+   switch (snapshot->visibility_type)
+   {
+       case MVCC_VISIBILITY:
+           return HeapTupleSatisfiesMVCC(stup, snapshot, buffer);
+           break;
+       case SELF_VISIBILITY:
+           return HeapTupleSatisfiesSelf(stup, snapshot, buffer);
+           break;
+       case ANY_VISIBILITY:
+           return HeapTupleSatisfiesAny(stup, snapshot, buffer);
+           break;
+       case TOAST_VISIBILITY:
+           return HeapTupleSatisfiesToast(stup, snapshot, buffer);
+           break;
+       case DIRTY_VISIBILITY:
+           return HeapTupleSatisfiesDirty(stup, snapshot, buffer);
+           break;
+       case HISTORIC_MVCC_VISIBILITY:
+           return HeapTupleSatisfiesHistoricMVCC(stup, snapshot, buffer);
+           break;
+       case NON_VACUUMABLE_VISIBILTY:
+           return HeapTupleSatisfiesNonVacuumable(stup, snapshot, buffer);
+           break;
+   }
+
+   return false; /* keep compiler quiet */
+}
index 44caeca336c19a2dcbf0bc26f603a1e3cbca3546..327e277422ef803e55d58b6dd367920b550cff35 100644 (file)
 #include "access/heapam.h"
 #include "access/heapam_xlog.h"
 #include "access/rewriteheap.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "access/xact.h"
 
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
+#include "storage/procarray.h"
 #include "storage/smgr.h"
 
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/tqual.h"
 
-#include "storage/procarray.h"
 
 /*
  * State associated with a rewrite operation. This is opaque to the user
@@ -357,7 +358,7 @@ end_heap_rewrite(RewriteState state)
     * wrote before the checkpoint.
     */
    if (RelationNeedsWAL(state->rs_new_rel))
-       heap_sync(state->rs_new_rel);
+       table_sync(state->rs_new_rel);
 
    logical_end_heap_rewrite(state);
 
index d1dad998d28c9053a262212ca301e30a5ffb544f..486cde4aff8bb045245c08a5a6f70196f79418df 100644 (file)
@@ -32,6 +32,7 @@
 
 #include "access/genam.h"
 #include "access/heapam.h"
+#include "access/tableam.h"
 #include "access/tuptoaster.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
index 9d08775687985313681bfc8b8bf5d9b5ef8b9a41..5f033c5ee46ddc25f1a8653f750b8c9ababd4476 100644 (file)
@@ -20,6 +20,7 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "catalog/index.h"
 #include "lib/stringinfo.h"
@@ -82,6 +83,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
    scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
 
    scan->heapRelation = NULL;  /* may be set later */
+   scan->xs_heapfetch = NULL;
    scan->indexRelation = indexRelation;
    scan->xs_snapshot = InvalidSnapshot;    /* caller must initialize this */
    scan->numberOfKeys = nkeys;
@@ -122,11 +124,6 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
    scan->xs_hitup = NULL;
    scan->xs_hitupdesc = NULL;
 
-   ItemPointerSetInvalid(&scan->xs_ctup.t_self);
-   scan->xs_ctup.t_data = NULL;
-   scan->xs_cbuf = InvalidBuffer;
-   scan->xs_continue_hot = false;
-
    return scan;
 }
 
@@ -334,6 +331,7 @@ systable_beginscan(Relation heapRelation,
 
    sysscan->heap_rel = heapRelation;
    sysscan->irel = irel;
+   sysscan->slot = table_gimmegimmeslot(heapRelation, NULL);
 
    if (snapshot == NULL)
    {
@@ -383,9 +381,9 @@ systable_beginscan(Relation heapRelation,
         * disadvantage; and there are no compensating advantages, because
         * it's unlikely that such scans will occur in parallel.
         */
-       sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
-                                            nkeys, key,
-                                            true, false);
+       sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
+                                               nkeys, key,
+                                               true, false);
        sysscan->iscan = NULL;
    }
 
@@ -400,15 +398,18 @@ systable_beginscan(Relation heapRelation,
  * Note that returned tuple is a reference to data in a disk buffer;
  * it must not be modified, and should be presumed inaccessible after
  * next getnext() or endscan() call.
+ *
+ * FIXME: Change to be slot based.
  */
 HeapTuple
 systable_getnext(SysScanDesc sysscan)
 {
-   HeapTuple   htup;
+   HeapTuple   htup = NULL;
 
    if (sysscan->irel)
    {
-       htup = index_getnext(sysscan->iscan, ForwardScanDirection);
+       if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
+           htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
 
        /*
         * We currently don't need to support lossy index operators for any
@@ -421,7 +422,7 @@ systable_getnext(SysScanDesc sysscan)
            elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
    }
    else
-       htup = heap_getnext(sysscan->scan, ForwardScanDirection);
+       htup = heap_scan_getnext(sysscan->scan, ForwardScanDirection);
 
    return htup;
 }
@@ -454,26 +455,26 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
 
    if (sysscan->irel)
    {
-       IndexScanDesc scan = sysscan->iscan;
+       IndexFetchHeapData *hscan = (IndexFetchHeapData *) sysscan->iscan->xs_heapfetch;
 
-       Assert(IsMVCCSnapshot(scan->xs_snapshot));
-       Assert(tup == &scan->xs_ctup);
-       Assert(BufferIsValid(scan->xs_cbuf));
+       Assert(IsMVCCSnapshot(sysscan->iscan->xs_snapshot));
+       //Assert(tup == &hscan->xs_ctup); replace by peeking into slot?
+       Assert(BufferIsValid(hscan->xs_cbuf));
        /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
-       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
-       result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
-       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+       LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
+       result = HeapTupleSatisfies(tup, freshsnap, hscan->xs_cbuf);
+       LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
    }
    else
    {
-       HeapScanDesc scan = sysscan->scan;
+       HeapScanDesc scan = (HeapScanDesc) sysscan->scan;
 
-       Assert(IsMVCCSnapshot(scan->rs_snapshot));
-       Assert(tup == &scan->rs_ctup);
+       Assert(IsMVCCSnapshot(scan->rs_scan.rs_snapshot));
+       /* hari Assert(tup == &scan->rs_ctup); */
        Assert(BufferIsValid(scan->rs_cbuf));
        /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-       result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
+       result = HeapTupleSatisfies(tup, freshsnap, scan->rs_cbuf);
        LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    }
    return result;
@@ -487,13 +488,19 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
 void
 systable_endscan(SysScanDesc sysscan)
 {
+   if (sysscan->slot)
+   {
+       ExecDropSingleTupleTableSlot(sysscan->slot);
+       sysscan->slot = NULL;
+   }
+
    if (sysscan->irel)
    {
        index_endscan(sysscan->iscan);
        index_close(sysscan->irel, AccessShareLock);
    }
    else
-       heap_endscan(sysscan->scan);
+       table_endscan(sysscan->scan);
 
    if (sysscan->snapshot)
        UnregisterSnapshot(sysscan->snapshot);
@@ -540,6 +547,7 @@ systable_beginscan_ordered(Relation heapRelation,
 
    sysscan->heap_rel = heapRelation;
    sysscan->irel = indexRelation;
+   sysscan->slot = table_gimmegimmeslot(heapRelation, NULL);
 
    if (snapshot == NULL)
    {
@@ -585,10 +593,12 @@ systable_beginscan_ordered(Relation heapRelation,
 HeapTuple
 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
 {
-   HeapTuple   htup;
+   HeapTuple   htup = NULL;
 
    Assert(sysscan->irel);
-   htup = index_getnext(sysscan->iscan, direction);
+   if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
+       htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
+
    /* See notes in systable_getnext */
    if (htup && sysscan->iscan->xs_recheck)
        elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
@@ -602,6 +612,12 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
 void
 systable_endscan_ordered(SysScanDesc sysscan)
 {
+   if (sysscan->slot)
+   {
+       ExecDropSingleTupleTableSlot(sysscan->slot);
+       sysscan->slot = NULL;
+   }
+
    Assert(sysscan->irel);
    index_endscan(sysscan->iscan);
    if (sysscan->snapshot)
index eade540ef5da6ec87e742c5194d9e6d86326e4fb..fe5af31f8701d6ae893be78703593cba9f7b60b5 100644 (file)
@@ -71,6 +71,7 @@
 
 #include "access/amapi.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xlog.h"
 #include "catalog/index.h"
@@ -235,6 +236,8 @@ index_beginscan(Relation heapRelation,
    scan->heapRelation = heapRelation;
    scan->xs_snapshot = snapshot;
 
+   scan->xs_heapfetch = table_begin_index_fetch_table(heapRelation);
+
    return scan;
 }
 
@@ -318,16 +321,12 @@ index_rescan(IndexScanDesc scan,
    Assert(nkeys == scan->numberOfKeys);
    Assert(norderbys == scan->numberOfOrderBys);
 
-   /* Release any held pin on a heap page */
-   if (BufferIsValid(scan->xs_cbuf))
-   {
-       ReleaseBuffer(scan->xs_cbuf);
-       scan->xs_cbuf = InvalidBuffer;
-   }
-
-   scan->xs_continue_hot = false;
+   /* Release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
 
    scan->kill_prior_tuple = false; /* for safety */
+   scan->xs_heap_continue = false;
 
    scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys,
                                                orderbys, norderbys);
@@ -343,11 +342,11 @@ index_endscan(IndexScanDesc scan)
    SCAN_CHECKS;
    CHECK_SCAN_PROCEDURE(amendscan);
 
-   /* Release any held pin on a heap page */
-   if (BufferIsValid(scan->xs_cbuf))
+   /* Release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
    {
-       ReleaseBuffer(scan->xs_cbuf);
-       scan->xs_cbuf = InvalidBuffer;
+       table_end_index_fetch_table(scan->xs_heapfetch);
+       scan->xs_heapfetch = NULL;
    }
 
    /* End the AM's scan */
@@ -380,7 +379,7 @@ index_markpos(IndexScanDesc scan)
  *     index_restrpos  - restore a scan position
  *
  * NOTE: this only restores the internal scan state of the index AM.
- * The current result tuple (scan->xs_ctup) doesn't change.  See comments
+ * The current result tuple (scan->xs_ctup) doesn't change FIXME.  See comments
  * for ExecRestrPos().
  *
  * NOTE: in the presence of HOT chains, mark/restore only works correctly
@@ -400,9 +399,12 @@ index_restrpos(IndexScanDesc scan)
    SCAN_CHECKS;
    CHECK_SCAN_PROCEDURE(amrestrpos);
 
-   scan->xs_continue_hot = false;
+   /* release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
 
    scan->kill_prior_tuple = false; /* for safety */
+   scan->xs_heap_continue = false;
 
    scan->indexRelation->rd_amroutine->amrestrpos(scan);
 }
@@ -483,6 +485,9 @@ index_parallelrescan(IndexScanDesc scan)
 {
    SCAN_CHECKS;
 
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
+
    /* amparallelrescan is optional; assume no-op if not provided by AM */
    if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL)
        scan->indexRelation->rd_amroutine->amparallelrescan(scan);
@@ -513,6 +518,8 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys,
    scan->heapRelation = heaprel;
    scan->xs_snapshot = snapshot;
 
+   scan->xs_heapfetch = table_begin_index_fetch_table(heaprel);
+
    return scan;
 }
 
@@ -535,7 +542,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
 
    /*
     * The AM's amgettuple proc finds the next index entry matching the scan
-    * keys, and puts the TID into scan->xs_ctup.t_self.  It should also set
+    * keys, and puts the TID into scan->xs_heaptid.  It should also set
     * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we
     * pay no attention to those fields here.
     */
@@ -543,23 +550,23 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
 
    /* Reset kill flag immediately for safety */
    scan->kill_prior_tuple = false;
+   scan->xs_heap_continue = false;
 
    /* If we're out of index entries, we're done */
    if (!found)
    {
-       /* ... but first, release any held pin on a heap page */
-       if (BufferIsValid(scan->xs_cbuf))
-       {
-           ReleaseBuffer(scan->xs_cbuf);
-           scan->xs_cbuf = InvalidBuffer;
-       }
+       /* release resources (like buffer pins) for heap accesses */
+       if (scan->xs_heapfetch)
+           table_reset_index_fetch_table(scan->xs_heapfetch);
+
        return NULL;
    }
+   Assert(ItemPointerIsValid(&scan->xs_heaptid));
 
    pgstat_count_index_tuples(scan->indexRelation, 1);
 
    /* Return the TID of the tuple we found. */
-   return &scan->xs_ctup.t_self;
+   return &scan->xs_heaptid;
 }
 
 /* ----------------
@@ -580,53 +587,17 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
  * enough information to do it efficiently in the general case.
  * ----------------
  */
-HeapTuple
-index_fetch_heap(IndexScanDesc scan)
+bool
+index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
 {
-   ItemPointer tid = &scan->xs_ctup.t_self;
    bool        all_dead = false;
-   bool        got_heap_tuple;
-
-   /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
-   if (!scan->xs_continue_hot)
-   {
-       /* Switch to correct buffer if we don't have it already */
-       Buffer      prev_buf = scan->xs_cbuf;
-
-       scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
-                                            scan->heapRelation,
-                                            ItemPointerGetBlockNumber(tid));
+   bool        found;
 
-       /*
-        * Prune page, but only if we weren't already on this page
-        */
-       if (prev_buf != scan->xs_cbuf)
-           heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
-   }
+   found = table_fetch_follow(scan->xs_heapfetch, &scan->xs_heaptid, scan->xs_snapshot,
+                              slot, &scan->xs_heap_continue, &all_dead);
 
-   /* Obtain share-lock on the buffer so we can examine visibility */
-   LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
-   got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
-                                           scan->xs_cbuf,
-                                           scan->xs_snapshot,
-                                           &scan->xs_ctup,
-                                           &all_dead,
-                                           !scan->xs_continue_hot);
-   LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
-
-   if (got_heap_tuple)
-   {
-       /*
-        * Only in a non-MVCC snapshot can more than one member of the HOT
-        * chain be visible.
-        */
-       scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
+   if (found)
        pgstat_count_heap_fetch(scan->indexRelation);
-       return &scan->xs_ctup;
-   }
-
-   /* We've reached the end of the HOT chain. */
-   scan->xs_continue_hot = false;
 
    /*
     * If we scanned a whole HOT chain and found only dead tuples, tell index
@@ -638,50 +609,41 @@ index_fetch_heap(IndexScanDesc scan)
    if (!scan->xactStartedInRecovery)
        scan->kill_prior_tuple = all_dead;
 
-   return NULL;
+   return found;
 }
 
 /* ----------------
- *     index_getnext - get the next heap tuple from a scan
+ *     index_getnext_slot - get the next tuple from a scan
  *
- * The result is the next heap tuple satisfying the scan keys and the
- * snapshot, or NULL if no more matching tuples exist.
+ * The result is true if a tuple satisfying the scan keys and the snapshot was
+ * found, false otherwise.  The tuple is stored in the specified slot.
  *
  * On success, the buffer containing the heap tup is pinned (the pin will be
  * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
- * call).
+ * call).  XXX
  *
  * Note: caller must check scan->xs_recheck, and perform rechecking of the
  * scan keys if required.  We do not do that here because we don't have
  * enough information to do it efficiently in the general case.
  * ----------------
  */
-HeapTuple
-index_getnext(IndexScanDesc scan, ScanDirection direction)
+bool
+index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
 {
-   HeapTuple   heapTuple;
-   ItemPointer tid;
-
    for (;;)
    {
-       if (scan->xs_continue_hot)
-       {
-           /*
-            * We are resuming scan of a HOT chain after having returned an
-            * earlier member.  Must still hold pin on current heap page.
-            */
-           Assert(BufferIsValid(scan->xs_cbuf));
-           Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
-                  BufferGetBlockNumber(scan->xs_cbuf));
-       }
-       else
+       if (!scan->xs_heap_continue)
        {
+           ItemPointer tid;
+
            /* Time to fetch the next TID from the index */
            tid = index_getnext_tid(scan, direction);
 
            /* If we're out of index entries, we're done */
            if (tid == NULL)
                break;
+
+           Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
        }
 
        /*
@@ -689,14 +651,15 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
         * If we don't find anything, loop around and grab the next TID from
         * the index.
         */
-       heapTuple = index_fetch_heap(scan);
-       if (heapTuple != NULL)
-           return heapTuple;
+       Assert(ItemPointerIsValid(&scan->xs_heaptid));
+       if (index_fetch_heap(scan, slot))
+           return true;
    }
 
-   return NULL;                /* failure exit */
+   return false;
 }
 
+
 /* ----------------
  *     index_getbitmap - get all tuples at once from an index scan
  *
index 582e5b0652d40e72272d5f941fb776a79f0ff1f3..b2ad95f9706dfec4bb9d593b5c68b7006b7aeb3e 100644 (file)
@@ -18,6 +18,7 @@
 #include "access/heapam.h"
 #include "access/nbtree.h"
 #include "access/nbtxlog.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xloginsert.h"
 #include "miscadmin.h"
@@ -415,8 +416,8 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
                 * that satisfies SnapshotDirty.  This is necessary because we
                 * have just a single index entry for the entire chain.
                 */
-               else if (heap_hot_search(&htid, heapRel, &SnapshotDirty,
-                                        &all_dead))
+               else if (table_fetch_follow_check(heapRel, &htid, &SnapshotDirty,
+                                                 &all_dead))
                {
                    TransactionId xwait;
 
@@ -469,7 +470,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
                     * entry.
                     */
                    htid = itup->t_tid;
-                   if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL))
+                   if (table_fetch_follow_check(heapRel, &htid, SnapshotSelf, NULL))
                    {
                        /* Normal case --- it's still live */
                    }
index e8725fbbe1eec84fc9b8e03092b54d96b3bbd8cd..935a412fafa4103c0b13a55745c99081185dbed2 100644 (file)
@@ -310,7 +310,7 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
        if (_bt_first(scan, ForwardScanDirection))
        {
            /* Save tuple ID, and continue scanning */
-           heapTid = &scan->xs_ctup.t_self;
+           heapTid = &scan->xs_heaptid;
            tbm_add_tuples(tbm, heapTid, 1, false);
            ntids++;
 
index 16223d01ec40d50089fab31ff04e3438d88f22ee..ecbf5482d8829d289fc484d3c293621f1e0ea21a 100644 (file)
@@ -1136,7 +1136,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 readcomplete:
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
    if (scan->xs_want_itup)
        scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
 
@@ -1186,7 +1186,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
    if (scan->xs_want_itup)
        scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
 
@@ -1965,7 +1965,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
 
    /* OK, itemIndex says what to return */
    currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
    if (scan->xs_want_itup)
        scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
 
index 16f57557776908519ea376f8ea2a5803acc50131..a4b287aff774f115be69371045167d5ba947b03c 100644 (file)
@@ -60,6 +60,7 @@
 #include "access/nbtree.h"
 #include "access/parallel.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
@@ -71,7 +72,7 @@
 #include "utils/rel.h"
 #include "utils/sortsupport.h"
 #include "utils/tuplesort.h"
-
+#include "utils/tqual.h"
 
 /* Magic numbers for parallel state sharing */
 #define PARALLEL_KEY_BTREE_SHARED      UINT64CONST(0xA000000000000001)
@@ -159,7 +160,7 @@ typedef struct BTShared
     *
     * See _bt_parallel_estimate_shared().
     */
-   ParallelHeapScanDescData heapdesc;
+   ParallelTableScanDescData paralleldesc;
 } BTShared;
 
 /*
@@ -469,9 +470,9 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
 
    /* Fill spool using either serial or parallel heap scan */
    if (!buildstate->btleader)
-       reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                      _bt_build_callback, (void *) buildstate,
-                                      NULL);
+       reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                          _bt_build_callback, (void *) buildstate,
+                                          NULL);
    else
        reltuples = _bt_parallel_heapscan(buildstate,
                                          &indexInfo->ii_BrokenHotChain);
@@ -546,7 +547,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
 }
 
 /*
- * Per-tuple callback from IndexBuildHeapScan
+ * Per-tuple callback from table_index_build_scan
  */
 static void
 _bt_build_callback(Relation index,
@@ -1315,7 +1316,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
    btshared->havedead = false;
    btshared->indtuples = 0.0;
    btshared->brokenhotchain = false;
-   heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot);
+   table_parallelscan_initialize(&btshared->paralleldesc, btspool->heap, snapshot);
 
    /*
     * Store shared tuplesort-private state, for which we reserved space.
@@ -1410,8 +1411,8 @@ _bt_parallel_estimate_shared(Snapshot snapshot)
        return sizeof(BTShared);
    }
 
-   return add_size(offsetof(BTShared, heapdesc) +
-                   offsetof(ParallelHeapScanDescData, phs_snapshot_data),
+   return add_size(offsetof(BTShared, paralleldesc) +
+                   offsetof(ParallelTableScanDescData, phs_snapshot_data),
                    EstimateSnapshotSpace(snapshot));
 }
 
@@ -1623,7 +1624,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
 {
    SortCoordinate coordinate;
    BTBuildState buildstate;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    double      reltuples;
    IndexInfo  *indexInfo;
 
@@ -1676,10 +1677,10 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
    /* Join parallel scan */
    indexInfo = BuildIndexInfo(btspool->index);
    indexInfo->ii_Concurrent = btshared->isconcurrent;
-   scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc);
-   reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo,
-                                  true, _bt_build_callback,
-                                  (void *) &buildstate, scan);
+   scan = table_beginscan_parallel(btspool->heap, &btshared->paralleldesc);
+   reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
+                                      true, _bt_build_callback,
+                                      (void *) &buildstate, scan);
 
    /*
     * Execute this worker's part of the sort.
index 7dd0d61fbbca6365dae442c4978555574368d712..2a4d0c9a54870f56e4b8357871475d5a6b2d0d37 100644 (file)
@@ -19,6 +19,7 @@
 #include "access/genam.h"
 #include "access/spgist_private.h"
 #include "access/spgxlog.h"
+#include "access/tableam.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
 #include "catalog/index.h"
@@ -37,7 +38,7 @@ typedef struct
 } SpGistBuildState;
 
 
-/* Callback to process one heap tuple during IndexBuildHeapScan */
+/* Callback to process one heap tuple during table_index_build_scan */
 static void
 spgistBuildCallback(Relation index, HeapTuple htup, Datum *values,
                    bool *isnull, bool tupleIsAlive, void *state)
@@ -142,9 +143,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
                                              "SP-GiST build temporary context",
                                              ALLOCSET_DEFAULT_SIZES);
 
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                  spgistBuildCallback, (void *) &buildstate,
-                                  NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                      spgistBuildCallback, (void *) &buildstate,
+                                      NULL);
 
    MemoryContextDelete(buildstate.tmpCtx);
 
index c883ae95e48c0e9f61b0cc552170cf781da90662..e84be1adead6cb77f7be305e5f0f10203bc28d49 100644 (file)
@@ -927,7 +927,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
        if (so->iPtr < so->nPtrs)
        {
            /* continuing to return reported tuples */
-           scan->xs_ctup.t_self = so->heapPtrs[so->iPtr];
+           scan->xs_heaptid = so->heapPtrs[so->iPtr];
            scan->xs_recheck = so->recheck[so->iPtr];
            scan->xs_hitup = so->reconTups[so->iPtr];
 
diff --git a/src/backend/access/table/Makefile b/src/backend/access/table/Makefile
new file mode 100644 (file)
index 0000000..006ba99
--- /dev/null
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for access/table
+#
+# IDENTIFICATION
+#    src/backend/access/table/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/table
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tableam.o tableamapi.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
new file mode 100644 (file)
index 0000000..af99264
--- /dev/null
@@ -0,0 +1,187 @@
+/*----------------------------------------------------------------------
+ *
+ * tableam.c
+ *     Table access method routines too big to be inline functions.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/table/tableam.c
+ *----------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tableam.h"
+#include "storage/bufmgr.h"
+#include "storage/shmem.h"
+
+/* GUC variable */
+bool       synchronize_seqscans = true;
+
+char *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
+
+
+/* ----------------
+ *     table_parallelscan_estimate - estimate storage for ParallelTableScanDesc
+ *
+ *     Sadly, this doesn't reduce to a constant, because the size required
+ *     to serialize the snapshot can vary.
+ * ----------------
+ */
+Size
+table_parallelscan_estimate(Snapshot snapshot)
+{
+   return add_size(offsetof(ParallelTableScanDescData, phs_snapshot_data),
+                   EstimateSnapshotSpace(snapshot));
+}
+
+/* ----------------
+ *     table_parallelscan_initialize - initialize ParallelTableScanDesc
+ *
+ *     Must allow as many bytes of shared memory as returned by
+ *     table_parallelscan_estimate.  Call this just once in the leader
+ *     process; then, individual workers attach via table_beginscan_parallel.
+ * ----------------
+ */
+void
+table_parallelscan_initialize(ParallelTableScanDesc target, Relation relation,
+                            Snapshot snapshot)
+{
+   target->phs_relid = RelationGetRelid(relation);
+   target->phs_nblocks = RelationGetNumberOfBlocks(relation);
+   /* compare phs_syncscan initialization to similar logic in initscan */
+   target->phs_syncscan = synchronize_seqscans &&
+       !RelationUsesLocalBuffers(relation) &&
+       target->phs_nblocks > NBuffers / 4;
+   SpinLockInit(&target->phs_mutex);
+   target->phs_startblock = InvalidBlockNumber;
+   pg_atomic_init_u64(&target->phs_nallocated, 0);
+   if (IsMVCCSnapshot(snapshot))
+   {
+       SerializeSnapshot(snapshot, target->phs_snapshot_data);
+       target->phs_snapshot_any = false;
+   }
+   else
+   {
+       Assert(snapshot == SnapshotAny);
+       target->phs_snapshot_any = true;
+   }
+}
+
+/* ----------------
+ *     table_parallelscan_reinitialize - reset a parallel scan
+ *
+ *     Call this in the leader process.  Caller is responsible for
+ *     making sure that all workers have finished the scan beforehand.
+ * ----------------
+ */
+void
+table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan)
+{
+   pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
+}
+
+/* ----------------
+ *     table_parallelscan_startblock_init - find and set the scan's startblock
+ *
+ *     Determine where the parallel seq scan should start.  This function may
+ *     be called many times, once by each parallel worker.  We must be careful
+ *     only to set the startblock once.
+ * ----------------
+ */
+void
+table_parallelscan_startblock_init(TableScanDesc scan)
+{
+   BlockNumber sync_startpage = InvalidBlockNumber;
+   ParallelTableScanDesc parallel_scan;
+
+   Assert(scan->rs_parallel);
+   parallel_scan = scan->rs_parallel;
+
+retry:
+   /* Grab the spinlock. */
+   SpinLockAcquire(&parallel_scan->phs_mutex);
+
+   /*
+    * If the scan's startblock has not yet been initialized, we must do so
+    * now.  If this is not a synchronized scan, we just start at block 0, but
+    * if it is a synchronized scan, we must get the starting position from
+    * the synchronized scan machinery.  We can't hold the spinlock while
+    * doing that, though, so release the spinlock, get the information we
+    * need, and retry.  If nobody else has initialized the scan in the
+    * meantime, we'll fill in the value we fetched on the second time
+    * through.
+    */
+   if (parallel_scan->phs_startblock == InvalidBlockNumber)
+   {
+       if (!parallel_scan->phs_syncscan)
+           parallel_scan->phs_startblock = 0;
+       else if (sync_startpage != InvalidBlockNumber)
+           parallel_scan->phs_startblock = sync_startpage;
+       else
+       {
+           SpinLockRelease(&parallel_scan->phs_mutex);
+           sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks);
+           goto retry;
+       }
+   }
+   SpinLockRelease(&parallel_scan->phs_mutex);
+}
+
+/* ----------------
+ *     table_parallelscan_nextpage - get the next page to scan
+ *
+ *     Get the next page to scan.  Even if there are no pages left to scan,
+ *     another backend could have grabbed a page to scan and not yet finished
+ *     looking at it, so it doesn't follow that the scan is done when the
+ *     first backend gets an InvalidBlockNumber return.
+ * ----------------
+ */
+BlockNumber
+table_parallelscan_nextpage(TableScanDesc scan)
+{
+   BlockNumber page;
+   ParallelTableScanDesc parallel_scan;
+   uint64      nallocated;
+
+   Assert(scan->rs_parallel);
+   parallel_scan = scan->rs_parallel;
+
+   /*
+    * phs_nallocated tracks how many pages have been allocated to workers
+    * already.  When phs_nallocated >= rs_nblocks, all blocks have been
+    * allocated.
+    *
+    * Because we use an atomic fetch-and-add to fetch the current value, the
+    * phs_nallocated counter will exceed rs_nblocks, because workers will
+    * still increment the value, when they try to allocate the next block but
+    * all blocks have been allocated already. The counter must be 64 bits
+    * wide because of that, to avoid wrapping around when rs_nblocks is close
+    * to 2^32.
+    *
+    * The actual page to return is calculated by adding the counter to the
+    * starting block number, modulo nblocks.
+    */
+   nallocated = pg_atomic_fetch_add_u64(&parallel_scan->phs_nallocated, 1);
+   if (nallocated >= scan->rs_nblocks)
+       page = InvalidBlockNumber;  /* all blocks have been allocated */
+   else
+       page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks;
+
+   /*
+    * Report scan location.  Normally, we report the current page number.
+    * When we reach the end of the scan, though, we report the starting page,
+    * not the ending page, just so the starting positions for later scans
+    * doesn't slew backwards.  We only report the position at the end of the
+    * scan once, though: subsequent callers will report nothing.
+    */
+   if (scan->rs_syncscan)
+   {
+       if (page != InvalidBlockNumber)
+           ss_report_location(scan->rs_rd, page);
+       else if (nallocated == scan->rs_nblocks)
+           ss_report_location(scan->rs_rd, parallel_scan->phs_startblock);
+   }
+
+   return page;
+}
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
new file mode 100644 (file)
index 0000000..b2d283a
--- /dev/null
@@ -0,0 +1,187 @@
+/*----------------------------------------------------------------------
+ *
+ * tableamapi.c
+ *     Support routines for API for Postgres table access methods
+ *
+ * FIXME: looks like this should be in amapi.c.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * src/backend/access/table/tableamapi.c
+ *----------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+#include "utils/memutils.h"
+
+static Oid get_table_am_oid(const char *tableamname, bool missing_ok);
+
+TupleTableSlot*
+table_gimmegimmeslot(Relation relation, List **reglist)
+{
+   const TupleTableSlotOps *tts_cb;
+   TupleTableSlot *slot;
+
+   tts_cb = table_slot_callbacks(relation);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), tts_cb);
+
+   if (reglist)
+       *reglist = lappend(*reglist, slot);
+
+   return slot;
+}
+
+
+/*
+ * GetTableAmRoutine
+ *     Call the specified access method handler routine to get its
+ *     TableAmRoutine struct, which will be palloc'd in the caller's
+ *     memory context.
+ */
+const TableAmRoutine *
+GetTableAmRoutine(Oid amhandler)
+{
+   Datum       datum;
+   const TableAmRoutine *routine;
+
+   datum = OidFunctionCall0(amhandler);
+   routine = (TableAmRoutine *) DatumGetPointer(datum);
+
+   if (routine == NULL || !IsA(routine, TableAmRoutine))
+       elog(ERROR, "Table access method handler %u did not return a TableAmRoutine struct",
+            amhandler);
+
+   return routine;
+}
+
+/*
+ * GetTableAmRoutineByAmId - look up the handler of the table access
+ * method with the given OID, and get its TableAmRoutine struct.
+ */
+const TableAmRoutine *
+GetTableAmRoutineByAmId(Oid amoid)
+{
+   regproc     amhandler;
+   HeapTuple   tuple;
+   Form_pg_am  amform;
+
+   /* Get handler function OID for the access method */
+   tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid));
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "cache lookup failed for access method %u",
+            amoid);
+   amform = (Form_pg_am) GETSTRUCT(tuple);
+
+   /* Check that it is a table access method */
+   if (amform->amtype != AMTYPE_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("access method \"%s\" is not of type %s",
+                       NameStr(amform->amname), "TABLE")));
+
+   amhandler = amform->amhandler;
+
+   /* Complain if handler OID is invalid */
+   if (!RegProcedureIsValid(amhandler))
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("table access method \"%s\" does not have a handler",
+                       NameStr(amform->amname))));
+
+   ReleaseSysCache(tuple);
+
+   /* And finally, call the handler function to get the API struct. */
+   return GetTableAmRoutine(amhandler);
+}
+
+/*
+ * get_table_am_oid - given a table access method name, look up the OID
+ *
+ * If missing_ok is false, throw an error if table access method name not
+ * found. If true, just return InvalidOid.
+ */
+static Oid
+get_table_am_oid(const char *tableamname, bool missing_ok)
+{
+   Oid         result;
+   Relation    rel;
+   TableScanDesc scandesc;
+   HeapTuple   tuple;
+   ScanKeyData entry[1];
+
+   /*
+    * Search pg_tablespace.  We use a heapscan here even though there is an
+    * index on name, on the theory that pg_tablespace will usually have just
+    * a few entries and so an indexed lookup is a waste of effort.
+    */
+   rel = heap_open(AccessMethodRelationId, AccessShareLock);
+
+   ScanKeyInit(&entry[0],
+               Anum_pg_am_amname,
+               BTEqualStrategyNumber, F_NAMEEQ,
+               CStringGetDatum(tableamname));
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
+
+   /* We assume that there can be at most one matching tuple */
+   if (HeapTupleIsValid(tuple) &&
+           ((Form_pg_am) GETSTRUCT(tuple))->amtype == AMTYPE_TABLE)
+       result = ((Form_pg_am) GETSTRUCT(tuple))->oid;
+   else
+       result = InvalidOid;
+
+   table_endscan(scandesc);
+   heap_close(rel, AccessShareLock);
+
+   if (!OidIsValid(result) && !missing_ok)
+       ereport(ERROR,
+               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                errmsg("table access method \"%s\" does not exist",
+                        tableamname)));
+
+   return result;
+}
+
+/* check_hook: validate new default_table_access_method */
+bool
+check_default_table_access_method(char **newval, void **extra, GucSource source)
+{
+   /*
+    * If we aren't inside a transaction, we cannot do database access so
+    * cannot verify the name.  Must accept the value on faith.
+    */
+   if (IsTransactionState())
+   {
+       if (**newval != '\0' &&
+           !OidIsValid(get_table_am_oid(*newval, true)))
+       {
+           /*
+            * When source == PGC_S_TEST, don't throw a hard error for a
+            * nonexistent table access method, only a NOTICE.
+            * See comments in guc.h.
+            */
+           if (source == PGC_S_TEST)
+           {
+               ereport(NOTICE,
+                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                        errmsg("Table access method \"%s\" does not exist",
+                               *newval)));
+           }
+           else
+           {
+               GUC_check_errdetail("Table access method \"%s\" does not exist.",
+                                   *newval);
+               return false;
+           }
+       }
+   }
+
+   return true;
+}
index 4d937b4258af037e9c335d9b48378768c59cd658..1354e3f54aaef3ac1e4d1e002801a35432ac5cc7 100644 (file)
@@ -56,7 +56,7 @@ static void system_beginsamplescan(SampleScanState *node,
                       Datum *params,
                       int nparams,
                       uint32 seed);
-static BlockNumber system_nextsampleblock(SampleScanState *node);
+static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_nextsampletuple(SampleScanState *node,
                       BlockNumber blockno,
                       OffsetNumber maxoffset);
@@ -177,10 +177,9 @@ system_beginsamplescan(SampleScanState *node,
  * Select next block to sample.
  */
 static BlockNumber
-system_nextsampleblock(SampleScanState *node)
+system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
    BlockNumber nextblock = sampler->nextblock;
    uint32      hashinput[2];
 
@@ -199,7 +198,7 @@ system_nextsampleblock(SampleScanState *node)
     * Loop over block numbers until finding suitable block or reaching end of
     * relation.
     */
-   for (; nextblock < scan->rs_nblocks; nextblock++)
+   for (; nextblock < nblocks; nextblock++)
    {
        uint32      hash;
 
@@ -211,7 +210,7 @@ system_nextsampleblock(SampleScanState *node)
            break;
    }
 
-   if (nextblock < scan->rs_nblocks)
+   if (nextblock < nblocks)
    {
        /* Found a suitable block; remember where we should start next time */
        sampler->nextblock = nextblock + 1;
index 71c3714c48c9f838af6aa890e271a17fd89c56c6..e4208f406848d6602bf60c9423fd2d3cd7afc142 100644 (file)
@@ -220,6 +220,7 @@ Boot_CreateStmt:
                                                   shared_relation ? GLOBALTABLESPACE_OID : 0,
                                                   $3,
                                                   InvalidOid,
+                                                  HEAP_TABLE_AM_OID,
                                                   tupdesc,
                                                   RELKIND_RELATION,
                                                   RELPERSISTENCE_PERMANENT,
@@ -239,6 +240,7 @@ Boot_CreateStmt:
                                                      $6,
                                                      InvalidOid,
                                                      BOOTSTRAP_SUPERUSERID,
+                                                     HEAP_TABLE_AM_OID,
                                                      tupdesc,
                                                      NIL,
                                                      RELKIND_RELATION,
index 7caab64ce7808be7b18c5a555dbca9930fa49e49..c55060e8f7c98a67df00e3b3959ccd0b174e66c9 100644 (file)
@@ -18,6 +18,7 @@
 #include <signal.h>
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "bootstrap/bootstrap.h"
@@ -593,7 +594,7 @@ boot_openrel(char *relname)
    int         i;
    struct typmap **app;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
 
    if (strlen(relname) >= NAMEDATALEN)
@@ -603,18 +604,18 @@ boot_openrel(char *relname)
    {
        /* We can now load the pg_type data */
        rel = heap_open(TypeRelationId, NoLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
        i = 0;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
            ++i;
-       heap_endscan(scan);
+       table_endscan(scan);
        app = Typ = ALLOC(struct typmap *, i + 1);
        while (i-- > 0)
            *app++ = ALLOC(struct typmap, 1);
        *app = NULL;
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
        app = Typ;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
        {
            (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid;
            memcpy((char *) &(*app)->am_typ,
@@ -622,7 +623,7 @@ boot_openrel(char *relname)
                   sizeof((*app)->am_typ));
            app++;
        }
-       heap_endscan(scan);
+       table_endscan(scan);
        heap_close(rel, NoLock);
    }
 
@@ -905,7 +906,7 @@ gettype(char *type)
 {
    int         i;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
    struct typmap **app;
 
@@ -929,25 +930,25 @@ gettype(char *type)
        }
        elog(DEBUG4, "external type: %s", type);
        rel = heap_open(TypeRelationId, NoLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
        i = 0;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
            ++i;
-       heap_endscan(scan);
+       table_endscan(scan);
        app = Typ = ALLOC(struct typmap *, i + 1);
        while (i-- > 0)
            *app++ = ALLOC(struct typmap, 1);
        *app = NULL;
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
        app = Typ;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
        {
            (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid;
            memmove((char *) &(*app++)->am_typ,
                    (char *) GETSTRUCT(tup),
                    sizeof((*app)->am_typ));
        }
-       heap_endscan(scan);
+       table_endscan(scan);
        heap_close(rel, NoLock);
        return gettype(type);
    }
index 1dd70bb9c693e9a9516ead77020cd09e9f0bbab2..a5068cef5d92eea41e44ec9d623566d6a31f3dfe 100644 (file)
@@ -20,6 +20,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "catalog/binary_upgrade.h"
@@ -822,7 +823,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames)
                    ScanKeyData key[2];
                    int         keycount;
                    Relation    rel;
-                   HeapScanDesc scan;
+                   TableScanDesc scan;
                    HeapTuple   tuple;
 
                    keycount = 0;
@@ -844,16 +845,16 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames)
                                    CharGetDatum(PROKIND_PROCEDURE));
 
                    rel = heap_open(ProcedureRelationId, AccessShareLock);
-                   scan = heap_beginscan_catalog(rel, keycount, key);
+                   scan = table_beginscan_catalog(rel, keycount, key);
 
-                   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+                   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
                    {
                        Oid     oid = ((Form_pg_proc) GETSTRUCT(tuple))->oid;
 
                        objects = lappend_oid(objects, oid);
                    }
 
-                   heap_endscan(scan);
+                   table_endscan(scan);
                    heap_close(rel, AccessShareLock);
                }
                break;
@@ -878,7 +879,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind)
    List       *relations = NIL;
    ScanKeyData key[2];
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
 
    ScanKeyInit(&key[0],
@@ -891,16 +892,16 @@ getRelationsInNamespace(Oid namespaceId, char relkind)
                CharGetDatum(relkind));
 
    rel = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 2, key);
+   scan = table_beginscan_catalog(rel, 2, key);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Oid     oid  = ((Form_pg_class) GETSTRUCT(tuple))->oid;
 
        relations = lappend_oid(relations, oid);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 
    return relations;
index edc8ea9f53393166033f8513f66debdf5c6b59fc..552aa46657a60e359288083b32f6de23227b77b1 100644 (file)
@@ -163,7 +163,9 @@ my $BOOTSTRAP_SUPERUSERID =
 my $PG_CATALOG_NAMESPACE =
   Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace},
    'PG_CATALOG_NAMESPACE');
-
+my $PG_HEAP_AM =
+  Catalog::FindDefinedSymbolFromData($catalog_data{pg_am},
+   'HEAP_TABLE_AM_OID');
 
 # Build lookup tables for OID macro substitutions and for pg_attribute
 # copies of pg_type values.
@@ -426,6 +428,7 @@ EOM
            # (It's intentional that this can apply to parts of a field).
            $bki_values{$attname} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
            $bki_values{$attname} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
+           $bki_values{$attname} =~ s/\bPGHEAPAM\b/$PG_HEAP_AM/g;
 
            # Replace OID synonyms with OIDs per the appropriate lookup rule.
            #
index 11debaa780e7f3c47f90ba19dd003352c49324b6..f85eab1714c2a15210bcc3f07b7ec85631d11d33 100644 (file)
@@ -42,6 +42,7 @@
 #include "catalog/index.h"
 #include "catalog/objectaccess.h"
 #include "catalog/partition.h"
+#include "catalog/pg_am.h"
 #include "catalog/pg_attrdef.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
@@ -292,6 +293,7 @@ heap_create(const char *relname,
            Oid reltablespace,
            Oid relid,
            Oid relfilenode,
+           Oid accessmtd,
            TupleDesc tupDesc,
            char relkind,
            char relpersistence,
@@ -394,6 +396,7 @@ heap_create(const char *relname,
                                     relnamespace,
                                     tupDesc,
                                     relid,
+                                    accessmtd,
                                     relfilenode,
                                     reltablespace,
                                     shared_relation,
@@ -1052,6 +1055,7 @@ heap_create_with_catalog(const char *relname,
                         Oid reltypeid,
                         Oid reloftypeid,
                         Oid ownerid,
+                        Oid accessmtd,
                         TupleDesc tupdesc,
                         List *cooked_constraints,
                         char relkind,
@@ -1193,6 +1197,7 @@ heap_create_with_catalog(const char *relname,
                               reltablespace,
                               relid,
                               InvalidOid,
+                              accessmtd,
                               tupdesc,
                               relkind,
                               relpersistence,
@@ -1349,6 +1354,22 @@ heap_create_with_catalog(const char *relname,
            referenced.objectSubId = 0;
            recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
        }
+
+       /*
+        * Make a dependency link to force the relation to be deleted if its
+        * access method is. Do this only for relation and materialized views.
+        *
+        * No need to add an explicit dependency with toast, as the original
+        * table depends on it.
+        */
+       if ((relkind == RELKIND_RELATION) ||
+               (relkind == RELKIND_MATVIEW))
+       {
+           referenced.classId = AccessMethodRelationId;
+           referenced.objectId = accessmtd;
+           referenced.objectSubId = 0;
+           recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+       }
    }
 
    /* Post creation hook for new relation */
index 8709e8c22c77edaf3ffbd2c677a5e22cb8231bff..6c541cbe6ea67ac1f36ef6316566e735134206f6 100644 (file)
@@ -27,6 +27,7 @@
 #include "access/multixact.h"
 #include "access/relscan.h"
 #include "access/reloptions.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/visibilitymap.h"
 /* Potentially set by pg_upgrade_support functions */
 Oid            binary_upgrade_next_index_pg_class_oid = InvalidOid;
 
-/* state info for validate_index bulkdelete callback */
-typedef struct
-{
-   Tuplesortstate *tuplesort;  /* for sorting the index TIDs */
-   /* statistics (for debug purposes only): */
-   double      htups,
-               itups,
-               tups_inserted;
-} v_i_state;
-
 /*
  * Pointer-free representation of variables used when reindexing system
  * catalogs; we use this to propagate those values to parallel workers.
@@ -131,14 +122,7 @@ static void index_update_stats(Relation rel,
 static void IndexCheckExclusion(Relation heapRelation,
                    Relation indexRelation,
                    IndexInfo *indexInfo);
-static inline int64 itemptr_encode(ItemPointer itemptr);
-static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
-static void validate_index_heapscan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       Snapshot snapshot,
-                       v_i_state *state);
 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
 static void ResetReindexProcessing(void);
@@ -909,6 +893,7 @@ index_create(Relation heapRelation,
                                tableSpaceId,
                                indexRelationId,
                                relFileNode,
+                               accessMethodObjectId,
                                indexTupDesc,
                                relkind,
                                relpersistence,
@@ -2130,7 +2115,7 @@ index_update_stats(Relation rel,
        ReindexIsProcessingHeap(RelationRelationId))
    {
        /* don't assume syscache will work */
-       HeapScanDesc pg_class_scan;
+       TableScanDesc pg_class_scan;
        ScanKeyData key[1];
 
        ScanKeyInit(&key[0],
@@ -2138,10 +2123,10 @@ index_update_stats(Relation rel,
                    BTEqualStrategyNumber, F_OIDEQ,
                    ObjectIdGetDatum(relid));
 
-       pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
-       tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
+       pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
+       tuple = heap_scan_getnext(pg_class_scan, ForwardScanDirection);
        tuple = heap_copytuple(tuple);
-       heap_endscan(pg_class_scan);
+       table_endscan(pg_class_scan);
    }
    else
    {
@@ -2397,555 +2382,6 @@ index_build(Relation heapRelation,
    SetUserIdAndSecContext(save_userid, save_sec_context);
 }
 
-
-/*
- * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
- *
- * This is called back from an access-method-specific index build procedure
- * after the AM has done whatever setup it needs.  The parent heap relation
- * is scanned to find tuples that should be entered into the index.  Each
- * such tuple is passed to the AM's callback routine, which does the right
- * things to add it to the new index.  After we return, the AM's index
- * build procedure does whatever cleanup it needs.
- *
- * The total count of live heap tuples is returned.  This is for updating
- * pg_class statistics.  (It's annoying not to be able to do that here, but we
- * want to merge that update with others; see index_update_stats.)  Note that
- * the index AM itself must keep track of the number of index tuples; we don't
- * do so here because the AM might reject some of the tuples for its own
- * reasons, such as being unable to store NULLs.
- *
- * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
- * any potentially broken HOT chains.  Currently, we set this if there are
- * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
- * trying very hard to detect whether they're really incompatible with the
- * chain tip.
- */
-double
-IndexBuildHeapScan(Relation heapRelation,
-                  Relation indexRelation,
-                  IndexInfo *indexInfo,
-                  bool allow_sync,
-                  IndexBuildCallback callback,
-                  void *callback_state,
-                  HeapScanDesc scan)
-{
-   return IndexBuildHeapRangeScan(heapRelation, indexRelation,
-                                  indexInfo, allow_sync,
-                                  false,
-                                  0, InvalidBlockNumber,
-                                  callback, callback_state, scan);
-}
-
-/*
- * As above, except that instead of scanning the complete heap, only the given
- * number of blocks are scanned.  Scan to end-of-rel can be signalled by
- * passing InvalidBlockNumber as numblocks.  Note that restricting the range
- * to scan cannot be done when requesting syncscan.
- *
- * When "anyvisible" mode is requested, all tuples visible to any transaction
- * are indexed and counted as live, including those inserted or deleted by
- * transactions that are still in progress.
- */
-double
-IndexBuildHeapRangeScan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       bool allow_sync,
-                       bool anyvisible,
-                       BlockNumber start_blockno,
-                       BlockNumber numblocks,
-                       IndexBuildCallback callback,
-                       void *callback_state,
-                       HeapScanDesc scan)
-{
-   bool        is_system_catalog;
-   bool        checking_uniqueness;
-   HeapTuple   heapTuple;
-   Datum       values[INDEX_MAX_KEYS];
-   bool        isnull[INDEX_MAX_KEYS];
-   double      reltuples;
-   ExprState  *predicate;
-   TupleTableSlot *slot;
-   EState     *estate;
-   ExprContext *econtext;
-   Snapshot    snapshot;
-   bool        need_unregister_snapshot = false;
-   TransactionId OldestXmin;
-   BlockNumber root_blkno = InvalidBlockNumber;
-   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
-
-   /*
-    * sanity checks
-    */
-   Assert(OidIsValid(indexRelation->rd_rel->relam));
-
-   /* Remember if it's a system catalog */
-   is_system_catalog = IsSystemRelation(heapRelation);
-
-   /* See whether we're verifying uniqueness/exclusion properties */
-   checking_uniqueness = (indexInfo->ii_Unique ||
-                          indexInfo->ii_ExclusionOps != NULL);
-
-   /*
-    * "Any visible" mode is not compatible with uniqueness checks; make sure
-    * only one of those is requested.
-    */
-   Assert(!(anyvisible && checking_uniqueness));
-
-   /*
-    * Need an EState for evaluation of index expressions and partial-index
-    * predicates.  Also a slot to hold the current tuple.
-    */
-   estate = CreateExecutorState();
-   econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
-
-   /* Arrange for econtext's scan tuple to be the tuple under test */
-   econtext->ecxt_scantuple = slot;
-
-   /* Set up execution state for predicate, if any. */
-   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
-
-   /*
-    * Prepare for scan of the base relation.  In a normal index build, we use
-    * SnapshotAny because we must retrieve all tuples and do our own time
-    * qual checks (because we have to index RECENTLY_DEAD tuples). In a
-    * concurrent build, or during bootstrap, we take a regular MVCC snapshot
-    * and index whatever's live according to that.
-    */
-   OldestXmin = InvalidTransactionId;
-
-   /* okay to ignore lazy VACUUMs here */
-   if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
-       OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
-
-   if (!scan)
-   {
-       /*
-        * Serial index build.
-        *
-        * Must begin our own heap scan in this case.  We may also need to
-        * register a snapshot whose lifetime is under our direct control.
-        */
-       if (!TransactionIdIsValid(OldestXmin))
-       {
-           snapshot = RegisterSnapshot(GetTransactionSnapshot());
-           need_unregister_snapshot = true;
-       }
-       else
-           snapshot = SnapshotAny;
-
-       scan = heap_beginscan_strat(heapRelation,   /* relation */
-                                   snapshot,   /* snapshot */
-                                   0,  /* number of keys */
-                                   NULL,   /* scan key */
-                                   true,   /* buffer access strategy OK */
-                                   allow_sync);    /* syncscan OK? */
-   }
-   else
-   {
-       /*
-        * Parallel index build.
-        *
-        * Parallel case never registers/unregisters own snapshot.  Snapshot
-        * is taken from parallel heap scan, and is SnapshotAny or an MVCC
-        * snapshot, based on same criteria as serial case.
-        */
-       Assert(!IsBootstrapProcessingMode());
-       Assert(allow_sync);
-       snapshot = scan->rs_snapshot;
-   }
-
-   /*
-    * Must call GetOldestXmin() with SnapshotAny.  Should never call
-    * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
-    * this for parallel builds, since ambuild routines that support parallel
-    * builds must work these details out for themselves.)
-    */
-   Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
-   Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
-          !TransactionIdIsValid(OldestXmin));
-   Assert(snapshot == SnapshotAny || !anyvisible);
-
-   /* set our scan endpoints */
-   if (!allow_sync)
-       heap_setscanlimits(scan, start_blockno, numblocks);
-   else
-   {
-       /* syncscan can only be requested on whole relation */
-       Assert(start_blockno == 0);
-       Assert(numblocks == InvalidBlockNumber);
-   }
-
-   reltuples = 0;
-
-   /*
-    * Scan all tuples in the base relation.
-    */
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
-   {
-       bool        tupleIsAlive;
-
-       CHECK_FOR_INTERRUPTS();
-
-       /*
-        * When dealing with a HOT-chain of updated tuples, we want to index
-        * the values of the live tuple (if any), but index it under the TID
-        * of the chain's root tuple.  This approach is necessary to preserve
-        * the HOT-chain structure in the heap. So we need to be able to find
-        * the root item offset for every tuple that's in a HOT-chain.  When
-        * first reaching a new page of the relation, call
-        * heap_get_root_tuples() to build a map of root item offsets on the
-        * page.
-        *
-        * It might look unsafe to use this information across buffer
-        * lock/unlock.  However, we hold ShareLock on the table so no
-        * ordinary insert/update/delete should occur; and we hold pin on the
-        * buffer continuously while visiting the page, so no pruning
-        * operation can occur either.
-        *
-        * Also, although our opinions about tuple liveness could change while
-        * we scan the page (due to concurrent transaction commits/aborts),
-        * the chain root locations won't, so this info doesn't need to be
-        * rebuilt after waiting for another transaction.
-        *
-        * Note the implied assumption that there is no more than one live
-        * tuple per HOT-chain --- else we could create more than one index
-        * entry pointing to the same root tuple.
-        */
-       if (scan->rs_cblock != root_blkno)
-       {
-           Page        page = BufferGetPage(scan->rs_cbuf);
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-           heap_get_root_tuples(page, root_offsets);
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           root_blkno = scan->rs_cblock;
-       }
-
-       if (snapshot == SnapshotAny)
-       {
-           /* do our own time qual check */
-           bool        indexIt;
-           TransactionId xwait;
-
-   recheck:
-
-           /*
-            * We could possibly get away with not locking the buffer here,
-            * since caller should hold ShareLock on the relation, but let's
-            * be conservative about it.  (This remark is still correct even
-            * with HOT-pruning: our pin on the buffer prevents pruning.)
-            */
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-           /*
-            * The criteria for counting a tuple as live in this block need to
-            * match what analyze.c's acquire_sample_rows() does, otherwise
-            * CREATE INDEX and ANALYZE may produce wildly different reltuples
-            * values, e.g. when there are many recently-dead tuples.
-            */
-           switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
-                                            scan->rs_cbuf))
-           {
-               case HEAPTUPLE_DEAD:
-                   /* Definitely dead, we can ignore it */
-                   indexIt = false;
-                   tupleIsAlive = false;
-                   break;
-               case HEAPTUPLE_LIVE:
-                   /* Normal case, index and unique-check it */
-                   indexIt = true;
-                   tupleIsAlive = true;
-                   /* Count it as live, too */
-                   reltuples += 1;
-                   break;
-               case HEAPTUPLE_RECENTLY_DEAD:
-
-                   /*
-                    * If tuple is recently deleted then we must index it
-                    * anyway to preserve MVCC semantics.  (Pre-existing
-                    * transactions could try to use the index after we finish
-                    * building it, and may need to see such tuples.)
-                    *
-                    * However, if it was HOT-updated then we must only index
-                    * the live tuple at the end of the HOT-chain.  Since this
-                    * breaks semantics for pre-existing snapshots, mark the
-                    * index as unusable for them.
-                    *
-                    * We don't count recently-dead tuples in reltuples, even
-                    * if we index them; see acquire_sample_rows().
-                    */
-                   if (HeapTupleIsHotUpdated(heapTuple))
-                   {
-                       indexIt = false;
-                       /* mark the index as unsafe for old snapshots */
-                       indexInfo->ii_BrokenHotChain = true;
-                   }
-                   else
-                       indexIt = true;
-                   /* In any case, exclude the tuple from unique-checking */
-                   tupleIsAlive = false;
-                   break;
-               case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-                   /*
-                    * In "anyvisible" mode, this tuple is visible and we
-                    * don't need any further checks.
-                    */
-                   if (anyvisible)
-                   {
-                       indexIt = true;
-                       tupleIsAlive = true;
-                       reltuples += 1;
-                       break;
-                   }
-
-                   /*
-                    * Since caller should hold ShareLock or better, normally
-                    * the only way to see this is if it was inserted earlier
-                    * in our own transaction.  However, it can happen in
-                    * system catalogs, since we tend to release write lock
-                    * before commit there.  Give a warning if neither case
-                    * applies.
-                    */
-                   xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
-                   if (!TransactionIdIsCurrentTransactionId(xwait))
-                   {
-                       if (!is_system_catalog)
-                           elog(WARNING, "concurrent insert in progress within table \"%s\"",
-                                RelationGetRelationName(heapRelation));
-
-                       /*
-                        * If we are performing uniqueness checks, indexing
-                        * such a tuple could lead to a bogus uniqueness
-                        * failure.  In that case we wait for the inserting
-                        * transaction to finish and check again.
-                        */
-                       if (checking_uniqueness)
-                       {
-                           /*
-                            * Must drop the lock on the buffer before we wait
-                            */
-                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-                           XactLockTableWait(xwait, heapRelation,
-                                             &heapTuple->t_self,
-                                             XLTW_InsertIndexUnique);
-                           CHECK_FOR_INTERRUPTS();
-                           goto recheck;
-                       }
-                   }
-                   else
-                   {
-                       /*
-                        * For consistency with acquire_sample_rows(), count
-                        * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
-                        * when inserted by our own transaction.
-                        */
-                       reltuples += 1;
-                   }
-
-                   /*
-                    * We must index such tuples, since if the index build
-                    * commits then they're good.
-                    */
-                   indexIt = true;
-                   tupleIsAlive = true;
-                   break;
-               case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-                   /*
-                    * As with INSERT_IN_PROGRESS case, this is unexpected
-                    * unless it's our own deletion or a system catalog; but
-                    * in anyvisible mode, this tuple is visible.
-                    */
-                   if (anyvisible)
-                   {
-                       indexIt = true;
-                       tupleIsAlive = false;
-                       reltuples += 1;
-                       break;
-                   }
-
-                   xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
-                   if (!TransactionIdIsCurrentTransactionId(xwait))
-                   {
-                       if (!is_system_catalog)
-                           elog(WARNING, "concurrent delete in progress within table \"%s\"",
-                                RelationGetRelationName(heapRelation));
-
-                       /*
-                        * If we are performing uniqueness checks, assuming
-                        * the tuple is dead could lead to missing a
-                        * uniqueness violation.  In that case we wait for the
-                        * deleting transaction to finish and check again.
-                        *
-                        * Also, if it's a HOT-updated tuple, we should not
-                        * index it but rather the live tuple at the end of
-                        * the HOT-chain.  However, the deleting transaction
-                        * could abort, possibly leaving this tuple as live
-                        * after all, in which case it has to be indexed. The
-                        * only way to know what to do is to wait for the
-                        * deleting transaction to finish and check again.
-                        */
-                       if (checking_uniqueness ||
-                           HeapTupleIsHotUpdated(heapTuple))
-                       {
-                           /*
-                            * Must drop the lock on the buffer before we wait
-                            */
-                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-                           XactLockTableWait(xwait, heapRelation,
-                                             &heapTuple->t_self,
-                                             XLTW_InsertIndexUnique);
-                           CHECK_FOR_INTERRUPTS();
-                           goto recheck;
-                       }
-
-                       /*
-                        * Otherwise index it but don't check for uniqueness,
-                        * the same as a RECENTLY_DEAD tuple.
-                        */
-                       indexIt = true;
-
-                       /*
-                        * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
-                        * if they were not deleted by the current
-                        * transaction.  That's what acquire_sample_rows()
-                        * does, and we want the behavior to be consistent.
-                        */
-                       reltuples += 1;
-                   }
-                   else if (HeapTupleIsHotUpdated(heapTuple))
-                   {
-                       /*
-                        * It's a HOT-updated tuple deleted by our own xact.
-                        * We can assume the deletion will commit (else the
-                        * index contents don't matter), so treat the same as
-                        * RECENTLY_DEAD HOT-updated tuples.
-                        */
-                       indexIt = false;
-                       /* mark the index as unsafe for old snapshots */
-                       indexInfo->ii_BrokenHotChain = true;
-                   }
-                   else
-                   {
-                       /*
-                        * It's a regular tuple deleted by our own xact. Index
-                        * it, but don't check for uniqueness nor count in
-                        * reltuples, the same as a RECENTLY_DEAD tuple.
-                        */
-                       indexIt = true;
-                   }
-                   /* In any case, exclude the tuple from unique-checking */
-                   tupleIsAlive = false;
-                   break;
-               default:
-                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-                   indexIt = tupleIsAlive = false; /* keep compiler quiet */
-                   break;
-           }
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           if (!indexIt)
-               continue;
-       }
-       else
-       {
-           /* heap_getnext did the time qual check */
-           tupleIsAlive = true;
-           reltuples += 1;
-       }
-
-       MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-       /* Set up for predicate or expression evaluation */
-       ExecStoreHeapTuple(heapTuple, slot, false);
-
-       /*
-        * In a partial index, discard tuples that don't satisfy the
-        * predicate.
-        */
-       if (predicate != NULL)
-       {
-           if (!ExecQual(predicate, econtext))
-               continue;
-       }
-
-       /*
-        * For the current heap tuple, extract all the attributes we use in
-        * this index, and note which are null.  This also performs evaluation
-        * of any expressions needed.
-        */
-       FormIndexDatum(indexInfo,
-                      slot,
-                      estate,
-                      values,
-                      isnull);
-
-       /*
-        * You'd think we should go ahead and build the index tuple here, but
-        * some index AMs want to do further processing on the data first.  So
-        * pass the values[] and isnull[] arrays, instead.
-        */
-
-       if (HeapTupleIsHeapOnly(heapTuple))
-       {
-           /*
-            * For a heap-only tuple, pretend its TID is that of the root. See
-            * src/backend/access/heap/README.HOT for discussion.
-            */
-           HeapTupleData rootTuple;
-           OffsetNumber offnum;
-
-           rootTuple = *heapTuple;
-           offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
-
-           if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
-               ereport(ERROR,
-                       (errcode(ERRCODE_DATA_CORRUPTED),
-                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
-                                        ItemPointerGetBlockNumber(&heapTuple->t_self),
-                                        offnum,
-                                        RelationGetRelationName(heapRelation))));
-
-           ItemPointerSetOffsetNumber(&rootTuple.t_self,
-                                      root_offsets[offnum - 1]);
-
-           /* Call the AM's callback routine to process the tuple */
-           callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
-                    callback_state);
-       }
-       else
-       {
-           /* Call the AM's callback routine to process the tuple */
-           callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
-                    callback_state);
-       }
-   }
-
-   heap_endscan(scan);
-
-   /* we can now forget our snapshot, if set and registered by us */
-   if (need_unregister_snapshot)
-       UnregisterSnapshot(snapshot);
-
-   ExecDropSingleTupleTableSlot(slot);
-
-   FreeExecutorState(estate);
-
-   /* These may have been pointing to the now-gone estate */
-   indexInfo->ii_ExpressionsState = NIL;
-   indexInfo->ii_PredicateState = NULL;
-
-   return reltuples;
-}
-
-
 /*
  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
  *
@@ -2963,8 +2399,7 @@ IndexCheckExclusion(Relation heapRelation,
                    Relation indexRelation,
                    IndexInfo *indexInfo)
 {
-   HeapScanDesc scan;
-   HeapTuple   heapTuple;
+   TableScanDesc scan;
    Datum       values[INDEX_MAX_KEYS];
    bool        isnull[INDEX_MAX_KEYS];
    ExprState  *predicate;
@@ -2987,8 +2422,7 @@ IndexCheckExclusion(Relation heapRelation,
     */
    estate = CreateExecutorState();
    econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
+   slot = table_gimmegimmeslot(heapRelation, NULL);
 
    /* Arrange for econtext's scan tuple to be the tuple under test */
    econtext->ecxt_scantuple = slot;
@@ -3000,22 +2434,17 @@ IndexCheckExclusion(Relation heapRelation,
     * Scan all live tuples in the base relation.
     */
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan_strat(heapRelation,   /* relation */
-                               snapshot,   /* snapshot */
-                               0,  /* number of keys */
-                               NULL,   /* scan key */
-                               true,   /* buffer access strategy OK */
-                               true);  /* syncscan OK */
-
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_strat(heapRelation,  /* relation */
+                                  snapshot,    /* snapshot */
+                                  0,   /* number of keys */
+                                  NULL,    /* scan key */
+                                  true,    /* buffer access strategy OK */
+                                  true);   /* syncscan OK */
+
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
    {
        CHECK_FOR_INTERRUPTS();
 
-       MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-       /* Set up for predicate or expression evaluation */
-       ExecStoreHeapTuple(heapTuple, slot, false);
-
        /*
         * In a partial index, ignore tuples that don't satisfy the predicate.
         */
@@ -3039,11 +2468,13 @@ IndexCheckExclusion(Relation heapRelation,
         */
        check_exclusion_constraint(heapRelation,
                                   indexRelation, indexInfo,
-                                  &(heapTuple->t_self), values, isnull,
+                                  &(slot->tts_tid), values, isnull,
                                   estate, true);
+
+       MemoryContextReset(econtext->ecxt_per_tuple_memory);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    UnregisterSnapshot(snapshot);
 
    ExecDropSingleTupleTableSlot(slot);
@@ -3126,7 +2557,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
                indexRelation;
    IndexInfo  *indexInfo;
    IndexVacuumInfo ivinfo;
-   v_i_state   state;
+   ValidateIndexState state;
    Oid         save_userid;
    int         save_sec_context;
    int         save_nestlevel;
@@ -3187,11 +2618,11 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
    /*
     * Now scan the heap and "merge" it with the index
     */
-   validate_index_heapscan(heapRelation,
-                           indexRelation,
-                           indexInfo,
-                           snapshot,
-                           &state);
+   table_index_validate_scan(heapRelation,
+                             indexRelation,
+                             indexInfo,
+                             snapshot,
+                             &state);
 
    /* Done with tuplesort object */
    tuplesort_end(state.tuplesort);
@@ -3211,53 +2642,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
    heap_close(heapRelation, NoLock);
 }
 
-/*
- * itemptr_encode - Encode ItemPointer as int64/int8
- *
- * This representation must produce values encoded as int64 that sort in the
- * same order as their corresponding original TID values would (using the
- * default int8 opclass to produce a result equivalent to the default TID
- * opclass).
- *
- * As noted in validate_index(), this can be significantly faster.
- */
-static inline int64
-itemptr_encode(ItemPointer itemptr)
-{
-   BlockNumber block = ItemPointerGetBlockNumber(itemptr);
-   OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
-   int64       encoded;
-
-   /*
-    * Use the 16 least significant bits for the offset.  32 adjacent bits are
-    * used for the block number.  Since remaining bits are unused, there
-    * cannot be negative encoded values (We assume a two's complement
-    * representation).
-    */
-   encoded = ((uint64) block << 16) | (uint16) offset;
-
-   return encoded;
-}
-
-/*
- * itemptr_decode - Decode int64/int8 representation back to ItemPointer
- */
-static inline void
-itemptr_decode(ItemPointer itemptr, int64 encoded)
-{
-   BlockNumber block = (BlockNumber) (encoded >> 16);
-   OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
-
-   ItemPointerSet(itemptr, block, offset);
-}
-
 /*
  * validate_index_callback - bulkdelete callback to collect the index TIDs
  */
 static bool
 validate_index_callback(ItemPointer itemptr, void *opaque)
 {
-   v_i_state  *state = (v_i_state *) opaque;
+   ValidateIndexState *state = (ValidateIndexState *) opaque;
    int64       encoded = itemptr_encode(itemptr);
 
    tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
@@ -3265,243 +2656,6 @@ validate_index_callback(ItemPointer itemptr, void *opaque)
    return false;               /* never actually delete anything */
 }
 
-/*
- * validate_index_heapscan - second table scan for concurrent index build
- *
- * This has much code in common with IndexBuildHeapScan, but it's enough
- * different that it seems cleaner to have two routines not one.
- */
-static void
-validate_index_heapscan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       Snapshot snapshot,
-                       v_i_state *state)
-{
-   HeapScanDesc scan;
-   HeapTuple   heapTuple;
-   Datum       values[INDEX_MAX_KEYS];
-   bool        isnull[INDEX_MAX_KEYS];
-   ExprState  *predicate;
-   TupleTableSlot *slot;
-   EState     *estate;
-   ExprContext *econtext;
-   BlockNumber root_blkno = InvalidBlockNumber;
-   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
-   bool        in_index[MaxHeapTuplesPerPage];
-
-   /* state variables for the merge */
-   ItemPointer indexcursor = NULL;
-   ItemPointerData decoded;
-   bool        tuplesort_empty = false;
-
-   /*
-    * sanity checks
-    */
-   Assert(OidIsValid(indexRelation->rd_rel->relam));
-
-   /*
-    * Need an EState for evaluation of index expressions and partial-index
-    * predicates.  Also a slot to hold the current tuple.
-    */
-   estate = CreateExecutorState();
-   econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
-
-   /* Arrange for econtext's scan tuple to be the tuple under test */
-   econtext->ecxt_scantuple = slot;
-
-   /* Set up execution state for predicate, if any. */
-   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
-
-   /*
-    * Prepare for scan of the base relation.  We need just those tuples
-    * satisfying the passed-in reference snapshot.  We must disable syncscan
-    * here, because it's critical that we read from block zero forward to
-    * match the sorted TIDs.
-    */
-   scan = heap_beginscan_strat(heapRelation,   /* relation */
-                               snapshot,   /* snapshot */
-                               0,  /* number of keys */
-                               NULL,   /* scan key */
-                               true,   /* buffer access strategy OK */
-                               false); /* syncscan not OK */
-
-   /*
-    * Scan all tuples matching the snapshot.
-    */
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
-   {
-       ItemPointer heapcursor = &heapTuple->t_self;
-       ItemPointerData rootTuple;
-       OffsetNumber root_offnum;
-
-       CHECK_FOR_INTERRUPTS();
-
-       state->htups += 1;
-
-       /*
-        * As commented in IndexBuildHeapScan, we should index heap-only
-        * tuples under the TIDs of their root tuples; so when we advance onto
-        * a new heap page, build a map of root item offsets on the page.
-        *
-        * This complicates merging against the tuplesort output: we will
-        * visit the live tuples in order by their offsets, but the root
-        * offsets that we need to compare against the index contents might be
-        * ordered differently.  So we might have to "look back" within the
-        * tuplesort output, but only within the current page.  We handle that
-        * by keeping a bool array in_index[] showing all the
-        * already-passed-over tuplesort output TIDs of the current page. We
-        * clear that array here, when advancing onto a new heap page.
-        */
-       if (scan->rs_cblock != root_blkno)
-       {
-           Page        page = BufferGetPage(scan->rs_cbuf);
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-           heap_get_root_tuples(page, root_offsets);
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           memset(in_index, 0, sizeof(in_index));
-
-           root_blkno = scan->rs_cblock;
-       }
-
-       /* Convert actual tuple TID to root TID */
-       rootTuple = *heapcursor;
-       root_offnum = ItemPointerGetOffsetNumber(heapcursor);
-
-       if (HeapTupleIsHeapOnly(heapTuple))
-       {
-           root_offnum = root_offsets[root_offnum - 1];
-           if (!OffsetNumberIsValid(root_offnum))
-               ereport(ERROR,
-                       (errcode(ERRCODE_DATA_CORRUPTED),
-                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
-                                        ItemPointerGetBlockNumber(heapcursor),
-                                        ItemPointerGetOffsetNumber(heapcursor),
-                                        RelationGetRelationName(heapRelation))));
-           ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
-       }
-
-       /*
-        * "merge" by skipping through the index tuples until we find or pass
-        * the current root tuple.
-        */
-       while (!tuplesort_empty &&
-              (!indexcursor ||
-               ItemPointerCompare(indexcursor, &rootTuple) < 0))
-       {
-           Datum       ts_val;
-           bool        ts_isnull;
-
-           if (indexcursor)
-           {
-               /*
-                * Remember index items seen earlier on the current heap page
-                */
-               if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
-                   in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
-           }
-
-           tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
-                                                 &ts_val, &ts_isnull, NULL);
-           Assert(tuplesort_empty || !ts_isnull);
-           if (!tuplesort_empty)
-           {
-               itemptr_decode(&decoded, DatumGetInt64(ts_val));
-               indexcursor = &decoded;
-
-               /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
-#ifndef USE_FLOAT8_BYVAL
-               pfree(DatumGetPointer(ts_val));
-#endif
-           }
-           else
-           {
-               /* Be tidy */
-               indexcursor = NULL;
-           }
-       }
-
-       /*
-        * If the tuplesort has overshot *and* we didn't see a match earlier,
-        * then this tuple is missing from the index, so insert it.
-        */
-       if ((tuplesort_empty ||
-            ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
-           !in_index[root_offnum - 1])
-       {
-           MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-           /* Set up for predicate or expression evaluation */
-           ExecStoreHeapTuple(heapTuple, slot, false);
-
-           /*
-            * In a partial index, discard tuples that don't satisfy the
-            * predicate.
-            */
-           if (predicate != NULL)
-           {
-               if (!ExecQual(predicate, econtext))
-                   continue;
-           }
-
-           /*
-            * For the current heap tuple, extract all the attributes we use
-            * in this index, and note which are null.  This also performs
-            * evaluation of any expressions needed.
-            */
-           FormIndexDatum(indexInfo,
-                          slot,
-                          estate,
-                          values,
-                          isnull);
-
-           /*
-            * You'd think we should go ahead and build the index tuple here,
-            * but some index AMs want to do further processing on the data
-            * first. So pass the values[] and isnull[] arrays, instead.
-            */
-
-           /*
-            * If the tuple is already committed dead, you might think we
-            * could suppress uniqueness checking, but this is no longer true
-            * in the presence of HOT, because the insert is actually a proxy
-            * for a uniqueness check on the whole HOT-chain.  That is, the
-            * tuple we have here could be dead because it was already
-            * HOT-updated, and if so the updating transaction will not have
-            * thought it should insert index entries.  The index AM will
-            * check the whole HOT-chain and correctly detect a conflict if
-            * there is one.
-            */
-
-           index_insert(indexRelation,
-                        values,
-                        isnull,
-                        &rootTuple,
-                        heapRelation,
-                        indexInfo->ii_Unique ?
-                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
-                        indexInfo);
-
-           state->tups_inserted += 1;
-       }
-   }
-
-   heap_endscan(scan);
-
-   ExecDropSingleTupleTableSlot(slot);
-
-   FreeExecutorState(estate);
-
-   /* These may have been pointing to the now-gone estate */
-   indexInfo->ii_ExpressionsState = NIL;
-   indexInfo->ii_PredicateState = NULL;
-}
-
-
 /*
  * index_set_state_flags - adjust pg_index state flags
  *
index 558022647ccc2af182f549328e48bd250a0444a1..4e6c5df15802d88c377a81e8a215fe5099d017d3 100644 (file)
@@ -17,6 +17,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/tupconvert.h"
 #include "access/sysattr.h"
 #include "catalog/indexing.h"
index c692958d917188a341281b84661a05e80986f1c1..d668c9b02188373127ad4ab22a5417d8029d505a 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
@@ -153,7 +154,7 @@ RemoveConversionById(Oid conversionOid)
 {
    Relation    rel;
    HeapTuple   tuple;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    ScanKeyData scanKeyData;
 
    ScanKeyInit(&scanKeyData,
@@ -164,14 +165,14 @@ RemoveConversionById(Oid conversionOid)
    /* open pg_conversion */
    rel = heap_open(ConversionRelationId, RowExclusiveLock);
 
-   scan = heap_beginscan_catalog(rel, 1, &scanKeyData);
+   scan = table_beginscan_catalog(rel, 1, &scanKeyData);
 
    /* search for the target tuple */
-   if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection)))
+   if (HeapTupleIsValid(tuple = heap_scan_getnext(scan, ForwardScanDirection)))
        CatalogTupleDelete(rel, &tuple->t_self);
    else
        elog(ERROR, "could not find tuple for conversion %u", conversionOid);
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, RowExclusiveLock);
 }
 
index e123691923c728be89a774b1359133822319f13d..fec0c9e2ec209c82923b4eac6856bc1df63d6cd8 100644 (file)
@@ -13,6 +13,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "catalog/indexing.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_db_role_setting.h"
@@ -170,7 +171,7 @@ void
 DropSetting(Oid databaseid, Oid roleid)
 {
    Relation    relsetting;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    ScanKeyData keys[2];
    HeapTuple   tup;
    int         numkeys = 0;
@@ -196,12 +197,12 @@ DropSetting(Oid databaseid, Oid roleid)
        numkeys++;
    }
 
-   scan = heap_beginscan_catalog(relsetting, numkeys, keys);
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   scan = table_beginscan_catalog(relsetting, numkeys, keys);
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
    {
        CatalogTupleDelete(relsetting, &tup->t_self);
    }
-   heap_endscan(scan);
+   table_endscan(scan);
 
    heap_close(relsetting, RowExclusiveLock);
 }
index a9fbb731654eb069e0e5c3e0a9426991c64d11a4..51183f9a2aea2e27cfa6677109631417ec3cfeda 100644 (file)
@@ -21,6 +21,7 @@
 #include "access/hash.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 
 #include "catalog/catalog.h"
@@ -329,7 +330,7 @@ GetAllTablesPublicationRelations(void)
 {
    Relation    classRel;
    ScanKeyData key[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
    List       *result = NIL;
 
@@ -340,9 +341,9 @@ GetAllTablesPublicationRelations(void)
                BTEqualStrategyNumber, F_CHAREQ,
                CharGetDatum(RELKIND_RELATION));
 
-   scan = heap_beginscan_catalog(classRel, 1, key);
+   scan = table_beginscan_catalog(classRel, 1, key);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
        Oid         relid = relForm->oid;
@@ -351,7 +352,7 @@ GetAllTablesPublicationRelations(void)
            result = lappend_oid(result, relid);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(classRel, AccessShareLock);
 
    return result;
index e136aa6a0b22de2e4575a63b418f9704406e433c..f1dc998f48907476b6af5e11ab48924e334adf21 100644 (file)
@@ -19,6 +19,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 
 #include "catalog/indexing.h"
@@ -390,7 +391,7 @@ void
 RemoveSubscriptionRel(Oid subid, Oid relid)
 {
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    ScanKeyData skey[2];
    HeapTuple   tup;
    int         nkeys = 0;
@@ -416,12 +417,12 @@ RemoveSubscriptionRel(Oid subid, Oid relid)
    }
 
    /* Do the search and delete what we found. */
-   scan = heap_beginscan_catalog(rel, nkeys, skey);
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   scan = table_beginscan_catalog(rel, nkeys, skey);
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
    {
        CatalogTupleDelete(rel, &tup->t_self);
    }
-   heap_endscan(scan);
+   table_endscan(scan);
 
    heap_close(rel, RowExclusiveLock);
 }
index 462969a83843c2021340427a812c58f805774c79..60143f0e44d9ec6b6f889ce857b8fd349f309086 100644 (file)
@@ -266,6 +266,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
                                           toast_typid,
                                           InvalidOid,
                                           rel->rd_rel->relowner,
+                                          rel->rd_rel->relam,
                                           tupdesc,
                                           NIL,
                                           RELKIND_TOASTVALUE,
index 4367290a27cab24103f60d3cf643f4c55f7938c4..b55f8e708bbf46e8cf9fbf8e98d21ac31ebc0467 100644 (file)
@@ -30,7 +30,7 @@
 #include "utils/syscache.h"
 
 
-static Oid lookup_index_am_handler_func(List *handler_name, char amtype);
+static Oid lookup_am_handler_func(List *handler_name, char amtype);
 static const char *get_am_type_string(char amtype);
 
 
@@ -74,7 +74,7 @@ CreateAccessMethod(CreateAmStmt *stmt)
    /*
     * Get the handler function oid, verifying the AM type while at it.
     */
-   amhandler = lookup_index_am_handler_func(stmt->handler_name, stmt->amtype);
+   amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype);
 
    /*
     * Insert tuple into pg_am.
@@ -229,6 +229,8 @@ get_am_type_string(char amtype)
    {
        case AMTYPE_INDEX:
            return "INDEX";
+       case AMTYPE_TABLE:
+           return "TABLE";
        default:
            /* shouldn't happen */
            elog(ERROR, "invalid access method type '%c'", amtype);
@@ -243,7 +245,7 @@ get_am_type_string(char amtype)
  * This function either return valid function Oid or throw an error.
  */
 static Oid
-lookup_index_am_handler_func(List *handler_name, char amtype)
+lookup_am_handler_func(List *handler_name, char amtype)
 {
    Oid         handlerOid;
    static const Oid funcargtypes[1] = {INTERNALOID};
@@ -267,6 +269,15 @@ lookup_index_am_handler_func(List *handler_name, char amtype)
                                NameListToString(handler_name),
                                "index_am_handler")));
            break;
+           /* XXX refactor duplicate error */
+       case AMTYPE_TABLE:
+           if (get_func_rettype(handlerOid) != TABLE_AM_HANDLEROID)
+               ereport(ERROR,
+                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                        errmsg("function %s must return type %s",
+                               NameListToString(handler_name),
+                               "storage_am_handler")));
+           break;
        default:
            elog(ERROR, "unrecognized access method type \"%c\"", amtype);
    }
index b8445dc372881acb4cd314c19072e0fa41b1a588..29e2377b52008d49f3687dc7838c6d605d53820c 100644 (file)
@@ -18,6 +18,7 @@
 
 #include "access/multixact.h"
 #include "access/sysattr.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/tupconvert.h"
 #include "access/tuptoaster.h"
@@ -1000,6 +1001,8 @@ acquire_sample_rows(Relation onerel, int elevel,
    TransactionId OldestXmin;
    BlockSamplerData bs;
    ReservoirStateData rstate;
+   TupleTableSlot *slot;
+   TableScanDesc scan;
 
    Assert(targrows > 0);
 
@@ -1013,178 +1016,72 @@ acquire_sample_rows(Relation onerel, int elevel,
    /* Prepare for sampling rows */
    reservoir_init_selection_state(&rstate, targrows);
 
+   scan = table_beginscan_analyze(onerel);
+   slot = table_gimmegimmeslot(onerel, NULL);
+
    /* Outer loop over blocks to sample */
    while (BlockSampler_HasMore(&bs))
    {
        BlockNumber targblock = BlockSampler_Next(&bs);
-       Buffer      targbuffer;
-       Page        targpage;
-       OffsetNumber targoffset,
-                   maxoffset;
 
        vacuum_delay_point();
 
        /*
-        * We must maintain a pin on the target page's buffer to ensure that
-        * the maxoffset value stays good (else concurrent VACUUM might delete
-        * tuples out from under us).  Hence, pin the page until we are done
-        * looking at it.  We also choose to hold sharelock on the buffer
-        * throughout --- we could release and re-acquire sharelock for each
-        * tuple, but since we aren't doing much work per tuple, the extra
-        * lock traffic is probably better avoided.
+        * XXX: we could have this function return a boolean, instead of
+        * forcing such checks to happen in next_tuple().
         */
-       targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
-                                       RBM_NORMAL, vac_strategy);
-       LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
-       targpage = BufferGetPage(targbuffer);
-       maxoffset = PageGetMaxOffsetNumber(targpage);
-
-       /* Inner loop over all tuples on the selected page */
-       for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
-       {
-           ItemId      itemid;
-           HeapTupleData targtuple;
-           bool        sample_it = false;
-
-           itemid = PageGetItemId(targpage, targoffset);
+       table_scan_analyze_next_block(scan, targblock, vac_strategy);
 
+       while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+       {
            /*
-            * We ignore unused and redirect line pointers.  DEAD line
-            * pointers should be counted as dead, because we need vacuum to
-            * run to get rid of them.  Note that this rule agrees with the
-            * way that heap_page_prune() counts things.
+            * The first targrows sample rows are simply copied into the
+            * reservoir. Then we start replacing tuples in the sample
+            * until we reach the end of the relation.  This algorithm is
+            * from Jeff Vitter's paper (see full citation below). It
+            * works by repeatedly computing the number of tuples to skip
+            * before selecting a tuple, which replaces a randomly chosen
+            * element of the reservoir (current set of tuples).  At all
+            * times the reservoir is a true random sample of the tuples
+            * we've passed over so far, so when we fall off the end of
+            * the relation we're done.
             */
-           if (!ItemIdIsNormal(itemid))
-           {
-               if (ItemIdIsDead(itemid))
-                   deadrows += 1;
-               continue;
-           }
-
-           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
-
-           targtuple.t_tableOid = RelationGetRelid(onerel);
-           targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
-           targtuple.t_len = ItemIdGetLength(itemid);
-
-           switch (HeapTupleSatisfiesVacuum(&targtuple,
-                                            OldestXmin,
-                                            targbuffer))
-           {
-               case HEAPTUPLE_LIVE:
-                   sample_it = true;
-                   liverows += 1;
-                   break;
-
-               case HEAPTUPLE_DEAD:
-               case HEAPTUPLE_RECENTLY_DEAD:
-                   /* Count dead and recently-dead rows */
-                   deadrows += 1;
-                   break;
-
-               case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-                   /*
-                    * Insert-in-progress rows are not counted.  We assume
-                    * that when the inserting transaction commits or aborts,
-                    * it will send a stats message to increment the proper
-                    * count.  This works right only if that transaction ends
-                    * after we finish analyzing the table; if things happen
-                    * in the other order, its stats update will be
-                    * overwritten by ours.  However, the error will be large
-                    * only if the other transaction runs long enough to
-                    * insert many tuples, so assuming it will finish after us
-                    * is the safer option.
-                    *
-                    * A special case is that the inserting transaction might
-                    * be our own.  In this case we should count and sample
-                    * the row, to accommodate users who load a table and
-                    * analyze it in one transaction.  (pgstat_report_analyze
-                    * has to adjust the numbers we send to the stats
-                    * collector to make this come out right.)
-                    */
-                   if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
-                   {
-                       sample_it = true;
-                       liverows += 1;
-                   }
-                   break;
-
-               case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-                   /*
-                    * We count delete-in-progress rows as still live, using
-                    * the same reasoning given above; but we don't bother to
-                    * include them in the sample.
-                    *
-                    * If the delete was done by our own transaction, however,
-                    * we must count the row as dead to make
-                    * pgstat_report_analyze's stats adjustments come out
-                    * right.  (Note: this works out properly when the row was
-                    * both inserted and deleted in our xact.)
-                    */
-                   if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
-                       deadrows += 1;
-                   else
-                       liverows += 1;
-                   break;
-
-               default:
-                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-                   break;
-           }
-
-           if (sample_it)
+           if (numrows < targrows)
+               rows[numrows++] = ExecCopySlotHeapTuple(slot);
+           else
            {
                /*
-                * The first targrows sample rows are simply copied into the
-                * reservoir. Then we start replacing tuples in the sample
-                * until we reach the end of the relation.  This algorithm is
-                * from Jeff Vitter's paper (see full citation below). It
-                * works by repeatedly computing the number of tuples to skip
-                * before selecting a tuple, which replaces a randomly chosen
-                * element of the reservoir (current set of tuples).  At all
-                * times the reservoir is a true random sample of the tuples
-                * we've passed over so far, so when we fall off the end of
-                * the relation we're done.
+                * t in Vitter's paper is the number of records already
+                * processed.  If we need to compute a new S value, we
+                * must use the not-yet-incremented value of samplerows as
+                * t.
                 */
-               if (numrows < targrows)
-                   rows[numrows++] = heap_copytuple(&targtuple);
-               else
+               if (rowstoskip < 0)
+                   rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+
+               if (rowstoskip <= 0)
                {
                    /*
-                    * t in Vitter's paper is the number of records already
-                    * processed.  If we need to compute a new S value, we
-                    * must use the not-yet-incremented value of samplerows as
-                    * t.
+                    * Found a suitable tuple, so save it, replacing one
+                    * old tuple at random
                     */
-                   if (rowstoskip < 0)
-                       rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+                   int         k = (int) (targrows * sampler_random_fract(rstate.randstate));
 
-                   if (rowstoskip <= 0)
-                   {
-                       /*
-                        * Found a suitable tuple, so save it, replacing one
-                        * old tuple at random
-                        */
-                       int         k = (int) (targrows * sampler_random_fract(rstate.randstate));
-
-                       Assert(k >= 0 && k < targrows);
-                       heap_freetuple(rows[k]);
-                       rows[k] = heap_copytuple(&targtuple);
-                   }
-
-                   rowstoskip -= 1;
+                   Assert(k >= 0 && k < targrows);
+                   heap_freetuple(rows[k]);
+                   rows[k] = ExecCopySlotHeapTuple(slot);
                }
 
-               samplerows += 1;
+               rowstoskip -= 1;
            }
-       }
 
-       /* Now release the lock and pin on the page */
-       UnlockReleaseBuffer(targbuffer);
+           samplerows += 1;
+       }
    }
 
+   ExecDropSingleTupleTableSlot(slot);
+   table_endscan(scan);
+
    /*
     * If we didn't find as many tuples as we wanted then we're done. No sort
     * is needed, since they're already in order.
index 610e425a566d5621784ff386df024fdc84959f56..1b8d03642c54482c36207f3559e1bac7418a74b3 100644 (file)
@@ -20,7 +20,7 @@
 #include "access/amapi.h"
 #include "access/multixact.h"
 #include "access/relscan.h"
-#include "access/rewriteheap.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "access/xact.h"
@@ -41,7 +41,6 @@
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
-#include "storage/smgr.h"
 #include "utils/acl.h"
 #include "utils/fmgroids.h"
 #include "utils/inval.h"
@@ -68,14 +67,10 @@ typedef struct
 
 
 static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
-static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
               bool verbose, bool *pSwapToastByContent,
               TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
-static void reform_and_rewrite_tuple(HeapTuple tuple,
-                        TupleDesc oldTupDesc, TupleDesc newTupDesc,
-                        Datum *values, bool *isnull,
-                        RewriteState rwstate);
 
 
 /*---------------------------------------------------------------------------
@@ -597,7 +592,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
                               AccessExclusiveLock);
 
    /* Copy the heap data into the new table in the desired order */
-   copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose,
+   copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
                   &swap_toast_by_content, &frozenXid, &cutoffMulti);
 
    /*
@@ -682,6 +677,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
                                          InvalidOid,
                                          InvalidOid,
                                          OldHeap->rd_rel->relowner,
+                                         OldHeap->rd_rel->relam,
                                          OldHeapDesc,
                                          NIL,
                                          RELKIND_RELATION,
@@ -739,7 +735,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
 }
 
 /*
- * Do the physical copying of heap data.
+ * Do the physical copying of table data.
  *
  * There are three output parameters:
  * *pSwapToastByContent is set true if toast tables must be swapped by content.
@@ -747,7 +743,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
  */
 static void
-copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
               bool *pSwapToastByContent, TransactionId *pFreezeXid,
               MultiXactId *pCutoffMulti)
 {
@@ -757,21 +753,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
    Relation    relRelation;
    HeapTuple   reltup;
    Form_pg_class relform;
-   TupleDesc   oldTupDesc;
-   TupleDesc   newTupDesc;
-   int         natts;
-   Datum      *values;
-   bool       *isnull;
-   IndexScanDesc indexScan;
-   HeapScanDesc heapScan;
-   bool        use_wal;
-   bool        is_system_catalog;
+   TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
+   TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
    TransactionId OldestXmin;
    TransactionId FreezeXid;
    MultiXactId MultiXactCutoff;
-   RewriteState rwstate;
    bool        use_sort;
-   Tuplesortstate *tuplesort;
    double      num_tuples = 0,
                tups_vacuumed = 0,
                tups_recently_dead = 0;
@@ -799,11 +786,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
    newTupDesc = RelationGetDescr(NewHeap);
    Assert(newTupDesc->natts == oldTupDesc->natts);
 
-   /* Preallocate values/isnull arrays */
-   natts = newTupDesc->natts;
-   values = (Datum *) palloc(natts * sizeof(Datum));
-   isnull = (bool *) palloc(natts * sizeof(bool));
-
    /*
     * If the OldHeap has a toast table, get lock on the toast table to keep
     * it from being vacuumed.  This is needed because autovacuum processes
@@ -820,15 +802,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
    if (OldHeap->rd_rel->reltoastrelid)
        LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
 
-   /*
-    * We need to log the copied data in WAL iff WAL archiving/streaming is
-    * enabled AND it's a WAL-logged rel.
-    */
-   use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
-
-   /* use_wal off requires smgr_targblock be initially invalid */
-   Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
-
    /*
     * If both tables have TOAST tables, perform toast swap by content.  It is
     * possible that the old table has a toast table but the new one doesn't,
@@ -889,13 +862,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
    *pFreezeXid = FreezeXid;
    *pCutoffMulti = MultiXactCutoff;
 
-   /* Remember if it's a system catalog */
-   is_system_catalog = IsSystemRelation(OldHeap);
-
-   /* Initialize the rewrite operation */
-   rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
-                                MultiXactCutoff, use_wal);
-
    /*
     * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     * the OldHeap.  We know how to use a sort to duplicate the ordering of a
@@ -908,39 +874,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
    else
        use_sort = false;
 
-   /* Set up sorting if wanted */
-   if (use_sort)
-       tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
-                                           maintenance_work_mem,
-                                           NULL, false);
-   else
-       tuplesort = NULL;
-
-   /*
-    * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
-    * that still need to be copied, we scan with SnapshotAny and use
-    * HeapTupleSatisfiesVacuum for the visibility test.
-    */
-   if (OldIndex != NULL && !use_sort)
-   {
-       heapScan = NULL;
-       indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
-       index_rescan(indexScan, NULL, 0, NULL, 0);
-   }
-   else
-   {
-       heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
-       indexScan = NULL;
-   }
-
    /* Log what we're doing */
-   if (indexScan != NULL)
+   if (OldIndex != NULL && !use_sort)
        ereport(elevel,
                (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
                        get_namespace_name(RelationGetNamespace(OldHeap)),
                        RelationGetRelationName(OldHeap),
                        RelationGetRelationName(OldIndex))));
-   else if (tuplesort != NULL)
+   else if (use_sort)
        ereport(elevel,
                (errmsg("clustering \"%s.%s\" using sequential scan and sort",
                        get_namespace_name(RelationGetNamespace(OldHeap)),
@@ -952,150 +893,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                        RelationGetRelationName(OldHeap))));
 
    /*
-    * Scan through the OldHeap, either in OldIndex order or sequentially;
-    * copy each tuple into the NewHeap, or transiently to the tuplesort
-    * module.  Note that we don't bother sorting dead tuples (they won't get
-    * to the new table anyway).
+    * Hand of the actual copying to AM specific function, the generic code
+    * cannot know how to deal with visibility across AMs.
     */
-   for (;;)
-   {
-       HeapTuple   tuple;
-       Buffer      buf;
-       bool        isdead;
-
-       CHECK_FOR_INTERRUPTS();
-
-       if (indexScan != NULL)
-       {
-           tuple = index_getnext(indexScan, ForwardScanDirection);
-           if (tuple == NULL)
-               break;
-
-           /* Since we used no scan keys, should never need to recheck */
-           if (indexScan->xs_recheck)
-               elog(ERROR, "CLUSTER does not support lossy index conditions");
-
-           buf = indexScan->xs_cbuf;
-       }
-       else
-       {
-           tuple = heap_getnext(heapScan, ForwardScanDirection);
-           if (tuple == NULL)
-               break;
-
-           buf = heapScan->rs_cbuf;
-       }
-
-       LockBuffer(buf, BUFFER_LOCK_SHARE);
-
-       switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
-       {
-           case HEAPTUPLE_DEAD:
-               /* Definitely dead */
-               isdead = true;
-               break;
-           case HEAPTUPLE_RECENTLY_DEAD:
-               tups_recently_dead += 1;
-               /* fall through */
-           case HEAPTUPLE_LIVE:
-               /* Live or recently dead, must copy it */
-               isdead = false;
-               break;
-           case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-               /*
-                * Since we hold exclusive lock on the relation, normally the
-                * only way to see this is if it was inserted earlier in our
-                * own transaction.  However, it can happen in system
-                * catalogs, since we tend to release write lock before commit
-                * there.  Give a warning if neither case applies; but in any
-                * case we had better copy it.
-                */
-               if (!is_system_catalog &&
-                   !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
-                   elog(WARNING, "concurrent insert in progress within table \"%s\"",
-                        RelationGetRelationName(OldHeap));
-               /* treat as live */
-               isdead = false;
-               break;
-           case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-               /*
-                * Similar situation to INSERT_IN_PROGRESS case.
-                */
-               if (!is_system_catalog &&
-                   !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
-                   elog(WARNING, "concurrent delete in progress within table \"%s\"",
-                        RelationGetRelationName(OldHeap));
-               /* treat as recently dead */
-               tups_recently_dead += 1;
-               isdead = false;
-               break;
-           default:
-               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-               isdead = false; /* keep compiler quiet */
-               break;
-       }
-
-       LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-
-       if (isdead)
-       {
-           tups_vacuumed += 1;
-           /* heap rewrite module still needs to see it... */
-           if (rewrite_heap_dead_tuple(rwstate, tuple))
-           {
-               /* A previous recently-dead tuple is now known dead */
-               tups_vacuumed += 1;
-               tups_recently_dead -= 1;
-           }
-           continue;
-       }
-
-       num_tuples += 1;
-       if (tuplesort != NULL)
-           tuplesort_putheaptuple(tuplesort, tuple);
-       else
-           reform_and_rewrite_tuple(tuple,
-                                    oldTupDesc, newTupDesc,
-                                    values, isnull,
-                                    rwstate);
-   }
-
-   if (indexScan != NULL)
-       index_endscan(indexScan);
-   if (heapScan != NULL)
-       heap_endscan(heapScan);
-
-   /*
-    * In scan-and-sort mode, complete the sort, then read out all live tuples
-    * from the tuplestore and write them to the new relation.
-    */
-   if (tuplesort != NULL)
-   {
-       tuplesort_performsort(tuplesort);
-
-       for (;;)
-       {
-           HeapTuple   tuple;
-
-           CHECK_FOR_INTERRUPTS();
-
-           tuple = tuplesort_getheaptuple(tuplesort, true);
-           if (tuple == NULL)
-               break;
-
-           reform_and_rewrite_tuple(tuple,
-                                    oldTupDesc, newTupDesc,
-                                    values, isnull,
-                                    rwstate);
-       }
-
-       tuplesort_end(tuplesort);
-   }
-
-   /* Write out any remaining tuples, and fsync if needed */
-   end_heap_rewrite(rwstate);
+   table_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
+                          OldestXmin, FreezeXid, MultiXactCutoff,
+                          &num_tuples, &tups_vacuumed, &tups_recently_dead);
 
    /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
    NewHeap->rd_toastoid = InvalidOid;
@@ -1113,10 +916,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                       tups_recently_dead,
                       pg_rusage_show(&ru0))));
 
-   /* Clean up */
-   pfree(values);
-   pfree(isnull);
-
    if (OldIndex != NULL)
        index_close(OldIndex, NoLock);
    heap_close(OldHeap, NoLock);
@@ -1693,7 +1492,7 @@ static List *
 get_tables_to_cluster(MemoryContext cluster_context)
 {
    Relation    indRelation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    ScanKeyData entry;
    HeapTuple   indexTuple;
    Form_pg_index index;
@@ -1712,8 +1511,8 @@ get_tables_to_cluster(MemoryContext cluster_context)
                Anum_pg_index_indisclustered,
                BTEqualStrategyNumber, F_BOOLEQ,
                BoolGetDatum(true));
-   scan = heap_beginscan_catalog(indRelation, 1, &entry);
-   while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(indRelation, 1, &entry);
+   while ((indexTuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        index = (Form_pg_index) GETSTRUCT(indexTuple);
 
@@ -1733,52 +1532,9 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
        MemoryContextSwitchTo(old_context);
    }
-   heap_endscan(scan);
+   table_endscan(scan);
 
    relation_close(indRelation, AccessShareLock);
 
    return rvs;
 }
-
-
-/*
- * Reconstruct and rewrite the given tuple
- *
- * We cannot simply copy the tuple as-is, for several reasons:
- *
- * 1. We'd like to squeeze out the values of any dropped columns, both
- * to save space and to ensure we have no corner-case failures. (It's
- * possible for example that the new table hasn't got a TOAST table
- * and so is unable to store any large values of dropped cols.)
- *
- * 2. The tuple might not even be legal for the new table; this is
- * currently only known to happen as an after-effect of ALTER TABLE
- * SET WITHOUT OIDS (in an older version, via pg_upgrade).
- *
- * So, we must reconstruct the tuple from component Datums.
- */
-static void
-reform_and_rewrite_tuple(HeapTuple tuple,
-                        TupleDesc oldTupDesc, TupleDesc newTupDesc,
-                        Datum *values, bool *isnull,
-                        RewriteState rwstate)
-{
-   HeapTuple   copiedTuple;
-   int         i;
-
-   heap_deform_tuple(tuple, oldTupDesc, values, isnull);
-
-   /* Be sure to null out any dropped columns */
-   for (i = 0; i < newTupDesc->natts; i++)
-   {
-       if (TupleDescAttr(newTupDesc, i)->attisdropped)
-           isnull[i] = true;
-   }
-
-   copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
-
-   /* The heap rewrite module does the rest */
-   rewrite_heap_tuple(rwstate, tuple, copiedTuple);
-
-   heap_freetuple(copiedTuple);
-}
index b0b2cb2a1461bb0792b161f36cb6e70074a51138..9fbea0b4d8e162c815e8ccbf822543f6c67b967b 100644 (file)
@@ -13,6 +13,8 @@
  */
 #include "postgres.h"
 
+#include "access/tableam.h"
+#include "access/relscan.h"
 #include "catalog/index.h"
 #include "commands/trigger.h"
 #include "executor/executor.h"
@@ -69,6 +71,9 @@ unique_key_recheck(PG_FUNCTION_ARGS)
 
    /*
     * Get the new data that was inserted/updated.
+    *
+    * PBORKED: should use slot API, otherwise we'll not work correctly
+    * for zheap et al.
     */
    if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
        new_row = trigdata->tg_trigtuple;
@@ -83,6 +88,8 @@ unique_key_recheck(PG_FUNCTION_ARGS)
        new_row = NULL;         /* keep compiler quiet */
    }
 
+   slot = table_gimmegimmeslot(trigdata->tg_relation, NULL);
+
    /*
     * If the new_row is now dead (ie, inserted and then deleted within our
     * transaction), we can skip the check.  However, we have to be careful,
@@ -102,12 +109,20 @@ unique_key_recheck(PG_FUNCTION_ARGS)
     * removed.
     */
    tmptid = new_row->t_self;
-   if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL))
    {
-       /*
-        * All rows in the HOT chain are dead, so skip the check.
-        */
-       return PointerGetDatum(NULL);
+       IndexFetchTableData *scan = table_begin_index_fetch_table(trigdata->tg_relation);
+       bool call_again = false;
+
+       if (!table_fetch_follow(scan, &tmptid, SnapshotSelf, slot, &call_again, NULL))
+       {
+           /*
+            * All rows referenced by the index are dead, so skip the check.
+            */
+           ExecDropSingleTupleTableSlot(slot);
+           table_end_index_fetch_table(scan);
+           return PointerGetDatum(NULL);
+       }
+       table_end_index_fetch_table(scan);
    }
 
    /*
@@ -119,14 +134,6 @@ unique_key_recheck(PG_FUNCTION_ARGS)
                          RowExclusiveLock);
    indexInfo = BuildIndexInfo(indexRel);
 
-   /*
-    * The heap tuple must be put into a slot for FormIndexDatum.
-    */
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(trigdata->tg_relation),
-                                   &TTSOpsHeapTuple);
-
-   ExecStoreHeapTuple(new_row, slot, false);
-
    /*
     * Typically the index won't have expressions, but if it does we need an
     * EState to evaluate them.  We need it for exclusion constraints too,
index 4aa8890fe816b2a50c91f73acb66435f3dfded00..587e166c2a2a8176d052d025c0be22030fa346d9 100644 (file)
@@ -21,6 +21,7 @@
 
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "access/xlog.h"
@@ -315,9 +316,9 @@ static void CopyOneRowTo(CopyState cstate,
             Datum *values, bool *nulls);
 static void CopyFromInsertBatch(CopyState cstate, EState *estate,
                    CommandId mycid, int hi_options,
-                   ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
+                   ResultRelInfo *resultRelInfo,
                    BulkInsertState bistate,
-                   int nBufferedTuples, HeapTuple *bufferedTuples,
+                   int nBufferedTuples, TupleTableSlot **bufferedSlots,
                    uint64 firstBufferedLineNo);
 static bool CopyReadLine(CopyState cstate);
 static bool CopyReadLineText(CopyState cstate);
@@ -2046,33 +2047,27 @@ CopyTo(CopyState cstate)
 
    if (cstate->rel)
    {
-       Datum      *values;
-       bool       *nulls;
-       HeapScanDesc scandesc;
-       HeapTuple   tuple;
-
-       values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
-       nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
-
-       scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+       TupleTableSlot *slot;
+       TableScanDesc scandesc;
 
+       scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+       slot = table_gimmegimmeslot(cstate->rel, NULL);
        processed = 0;
-       while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
+
+       while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
        {
            CHECK_FOR_INTERRUPTS();
 
-           /* Deconstruct the tuple ... faster than repeated heap_getattr */
-           heap_deform_tuple(tuple, tupDesc, values, nulls);
+           /* Deconstruct the tuple ... */
+           slot_getallattrs(slot);
 
            /* Format and send the data */
-           CopyOneRowTo(cstate, values, nulls);
+           CopyOneRowTo(cstate, slot->tts_values, slot->tts_isnull);
            processed++;
        }
 
-       heap_endscan(scandesc);
-
-       pfree(values);
-       pfree(nulls);
+       ExecDropSingleTupleTableSlot(slot);
+       table_endscan(scandesc);
    }
    else
    {
@@ -2284,17 +2279,13 @@ limit_printout_length(const char *str)
 uint64
 CopyFrom(CopyState cstate)
 {
-   HeapTuple   tuple;
-   TupleDesc   tupDesc;
-   Datum      *values;
-   bool       *nulls;
    ResultRelInfo *resultRelInfo;
    ResultRelInfo *target_resultRelInfo;
    ResultRelInfo *prevResultRelInfo = NULL;
    EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
    ModifyTableState *mtstate;
    ExprContext *econtext;
-   TupleTableSlot *myslot;
+   TupleTableSlot *singleslot;
    MemoryContext oldcontext = CurrentMemoryContext;
 
    PartitionTupleRouting *proute = NULL;
@@ -2302,8 +2293,8 @@ CopyFrom(CopyState cstate)
    ErrorContextCallback errcallback;
    CommandId   mycid = GetCurrentCommandId(true);
    int         hi_options = 0; /* start with default heap_insert options */
-   BulkInsertState bistate;
    CopyInsertMethod insertMethod;
+   BulkInsertState bistate;
    uint64      processed = 0;
    int         nBufferedTuples = 0;
    bool        has_before_insert_row_trig;
@@ -2312,8 +2303,8 @@ CopyFrom(CopyState cstate)
 
 #define MAX_BUFFERED_TUPLES 1000
 #define RECHECK_MULTI_INSERT_THRESHOLD 1000
-   HeapTuple  *bufferedTuples = NULL;  /* initialize to silence warning */
-   Size        bufferedTuplesSize = 0;
+   TupleTableSlot  **bufferedSlots = NULL; /* initialize to silence warning */
+   Size        bufferedSlotsSize = 0;
    uint64      firstBufferedLineNo = 0;
    uint64      lastPartitionSampleLineNo = 0;
    uint64      nPartitionChanges = 0;
@@ -2355,8 +2346,6 @@ CopyFrom(CopyState cstate)
                            RelationGetRelationName(cstate->rel))));
    }
 
-   tupDesc = RelationGetDescr(cstate->rel);
-
    /*----------
     * Check to see if we can avoid writing WAL
     *
@@ -2486,13 +2475,6 @@ CopyFrom(CopyState cstate)
 
    ExecInitRangeTable(estate, cstate->range_table);
 
-   /* Set up a tuple slot too */
-   myslot = ExecInitExtraTupleSlot(estate, tupDesc,
-                                   &TTSOpsHeapTuple);
-   /* Triggers might need a slot as well */
-   estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                       &TTSOpsHeapTuple);
-
    /*
     * Set up a ModifyTableState so we can let FDW(s) init themselves for
     * foreign-table result relation(s).
@@ -2611,7 +2593,17 @@ CopyFrom(CopyState cstate)
        else
            insertMethod = CIM_MULTI;
 
-       bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
+       bufferedSlots = palloc0(MAX_BUFFERED_TUPLES * sizeof(TupleTableSlot *));
+   }
+
+   /*
+    * If not using batch mode (which allocates slots as needed), Set up a
+    * tuple slot too.
+    */
+   if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL)
+   {
+       singleslot = table_gimmegimmeslot(resultRelInfo->ri_RelationDesc,
+                                         &estate->es_tupleTable);
    }
 
    has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
@@ -2628,9 +2620,6 @@ CopyFrom(CopyState cstate)
     */
    ExecBSInsertTriggers(estate, resultRelInfo);
 
-   values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
-   nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
-
    bistate = GetBulkInsertState();
    econtext = GetPerTupleExprContext(estate);
 
@@ -2642,7 +2631,7 @@ CopyFrom(CopyState cstate)
 
    for (;;)
    {
-       TupleTableSlot *slot;
+       TupleTableSlot *myslot;
        bool        skip_tuple;
 
        CHECK_FOR_INTERRUPTS();
@@ -2657,28 +2646,46 @@ CopyFrom(CopyState cstate)
            ResetPerTupleExprContext(estate);
        }
 
+       if (insertMethod == CIM_SINGLE || proute)
+       {
+           myslot = singleslot;
+           Assert(myslot != NULL);
+       }
+       else
+       {
+           if (bufferedSlots[nBufferedTuples] == NULL)
+           {
+               const TupleTableSlotOps *tts_cb;
+
+               tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
+
+               bufferedSlots[nBufferedTuples] =
+                   MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc),
+                                            tts_cb);
+           }
+           myslot = bufferedSlots[nBufferedTuples];
+       }
+
        /* Switch into its memory context */
        MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 
-       if (!NextCopyFrom(cstate, econtext, values, nulls))
+       ExecClearTuple(myslot);
+
+       /* Directly store the values/nulls array in the slot */
+       if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
            break;
 
-       /* And now we can form the input tuple. */
-       tuple = heap_form_tuple(tupDesc, values, nulls);
+       ExecStoreVirtualTuple(myslot);
 
        /*
         * Constraints might reference the tableoid column, so initialize
         * t_tableOid before evaluating them.
         */
-       tuple->t_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
+       myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
 
        /* Triggers and stuff need to be invoked in query context. */
        MemoryContextSwitchTo(oldcontext);
 
-       /* Place tuple in tuple slot --- but slot shouldn't free it */
-       slot = myslot;
-       ExecStoreHeapTuple(tuple, slot, false);
-
        /* Determine the partition to heap_insert the tuple into */
        if (proute)
        {
@@ -2690,7 +2697,7 @@ CopyFrom(CopyState cstate)
             * if the found partition is not suitable for INSERTs.
             */
            resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo,
-                                             proute, slot, estate);
+                                             proute, myslot, estate);
 
            if (prevResultRelInfo != resultRelInfo)
            {
@@ -2707,11 +2714,19 @@ CopyFrom(CopyState cstate)
                        ExprContext *swapcontext;
 
                        CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                                           prevResultRelInfo, myslot, bistate,
-                                           nBufferedTuples, bufferedTuples,
+                                           prevResultRelInfo, bistate,
+                                           nBufferedTuples, bufferedSlots,
                                            firstBufferedLineNo);
                        nBufferedTuples = 0;
-                       bufferedTuplesSize = 0;
+
+                       /* force new slots to be used */
+                       for (int i = 0; i < MAX_BUFFERED_TUPLES; i++)
+                       {
+                           if (bufferedSlots[i] == NULL)
+                               continue;
+                           ExecDropSingleTupleTableSlot(bufferedSlots[i]);
+                           bufferedSlots[i] = NULL;
+                       }
 
                        Assert(secondaryExprContext);
 
@@ -2820,36 +2835,57 @@ CopyFrom(CopyState cstate)
                     * Otherwise, just remember the original unconverted
                     * tuple, to avoid a needless round trip conversion.
                     */
-                   cstate->transition_capture->tcs_original_insert_tuple = tuple;
+                   cstate->transition_capture->tcs_original_insert_tuple =
+                       ExecFetchSlotHeapTuple(myslot, false, NULL);
                    cstate->transition_capture->tcs_map = NULL;
                }
            }
 
+
            /*
             * We might need to convert from the root rowtype to the partition
             * rowtype.
             */
            map = resultRelInfo->ri_PartitionInfo->pi_RootToPartitionMap;
-           if (map != NULL)
+           if (insertMethod == CIM_SINGLE ||
+               (insertMethod == CIM_MULTI_CONDITIONAL && !leafpart_use_multi_insert))
+           {
+               if (map != NULL)
+               {
+                   TupleTableSlot *new_slot;
+
+                   new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot;
+                   myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+               }
+           }
+           else if (insertMethod == CIM_MULTI_CONDITIONAL)
            {
                TupleTableSlot *new_slot;
-               MemoryContext oldcontext;
 
-               new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot;
-               Assert(new_slot != NULL);
+               if (bufferedSlots[nBufferedTuples] == NULL)
+               {
+                   const TupleTableSlotOps *tts_cb;
+
+                   tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
+                   bufferedSlots[nBufferedTuples] =
+                       MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc),
+                                                tts_cb);
+               }
 
-               slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
+               new_slot = bufferedSlots[nBufferedTuples];
 
-               /*
-                * Get the tuple in the per-tuple context, so that it will be
-                * freed after each batch insert.
-                */
-               oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-               tuple = ExecCopySlotHeapTuple(slot);
-               MemoryContextSwitchTo(oldcontext);
+               if (map != NULL)
+                   myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+               else
+               {
+                   ExecCopySlot(new_slot, myslot);
+                   myslot = new_slot;
+               }
+           }
+           else
+           {
+               elog(ERROR, "huh");
            }
-
-           tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
        }
 
        skip_tuple = false;
@@ -2857,12 +2893,8 @@ CopyFrom(CopyState cstate)
        /* BEFORE ROW INSERT Triggers */
        if (has_before_insert_row_trig)
        {
-           slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-           if (slot == NULL)   /* "do nothing" */
-               skip_tuple = true;
-           else                /* trigger might have changed tuple */
-               tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+           if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot))
+               skip_tuple = true;  /* "do nothing" */
        }
 
        if (!skip_tuple)
@@ -2870,7 +2902,7 @@ CopyFrom(CopyState cstate)
            if (has_instead_insert_row_trig)
            {
                /* Pass the data to the INSTEAD ROW INSERT trigger */
-               ExecIRInsertTriggers(estate, resultRelInfo, slot);
+               ExecIRInsertTriggers(estate, resultRelInfo, myslot);
            }
            else
            {
@@ -2880,7 +2912,7 @@ CopyFrom(CopyState cstate)
                 */
                if (resultRelInfo->ri_FdwRoutine == NULL &&
                    resultRelInfo->ri_RelationDesc->rd_att->constr)
-                   ExecConstraints(resultRelInfo, slot, estate);
+                   ExecConstraints(resultRelInfo, myslot, estate);
 
                /*
                 * Also check the tuple against the partition constraint, if
@@ -2890,7 +2922,7 @@ CopyFrom(CopyState cstate)
                 */
                if (resultRelInfo->ri_PartitionCheck &&
                    (proute == NULL || has_before_insert_row_trig))
-                   ExecPartitionCheck(resultRelInfo, slot, estate, true);
+                   ExecPartitionCheck(resultRelInfo, myslot, estate, true);
 
                /*
                 * Perform multi-inserts when enabled, or when loading a
@@ -2902,8 +2934,10 @@ CopyFrom(CopyState cstate)
                    /* Add this tuple to the tuple buffer */
                    if (nBufferedTuples == 0)
                        firstBufferedLineNo = cstate->cur_lineno;
-                   bufferedTuples[nBufferedTuples++] = tuple;
-                   bufferedTuplesSize += tuple->t_len;
+
+                   Assert(bufferedSlots[nBufferedTuples] == myslot);
+                   nBufferedTuples++;
+                   bufferedSlotsSize += cstate->line_buf.len;
 
                    /*
                     * If the buffer filled up, flush it.  Also flush if the
@@ -2912,14 +2946,14 @@ CopyFrom(CopyState cstate)
                     * buffer when the tuples are exceptionally wide.
                     */
                    if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
-                       bufferedTuplesSize > 65535)
+                       bufferedSlotsSize > 65535)
                    {
                        CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                                           resultRelInfo, myslot, bistate,
-                                           nBufferedTuples, bufferedTuples,
+                                           resultRelInfo, bistate,
+                                           nBufferedTuples, bufferedSlots,
                                            firstBufferedLineNo);
                        nBufferedTuples = 0;
-                       bufferedTuplesSize = 0;
+                       bufferedSlotsSize = 0;
                    }
                }
                else
@@ -2929,39 +2963,38 @@ CopyFrom(CopyState cstate)
                    /* OK, store the tuple */
                    if (resultRelInfo->ri_FdwRoutine != NULL)
                    {
-                       slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
-                                                                              resultRelInfo,
-                                                                              slot,
-                                                                              NULL);
+                       myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
+                                                                                resultRelInfo,
+                                                                                myslot,
+                                                                                NULL);
 
-                       if (slot == NULL)   /* "do nothing" */
+                       if (myslot == NULL) /* "do nothing" */
                            continue;   /* next tuple please */
 
-                       /* FDW might have changed tuple */
-                       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
                        /*
                         * AFTER ROW Triggers might reference the tableoid
                         * column, so initialize t_tableOid before evaluating
                         * them.
                         */
-                       tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+                       myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
                    }
                    else
-                       heap_insert(resultRelInfo->ri_RelationDesc, tuple,
-                                   mycid, hi_options, bistate);
+                   {
+                       /* OK, store the tuple and create index entries for it */
+                       table_insert(resultRelInfo->ri_RelationDesc, myslot, mycid, hi_options,
+                                    bistate);
+                   }
 
                    /* And create index entries for it */
                    if (resultRelInfo->ri_NumIndices > 0)
-                       recheckIndexes = ExecInsertIndexTuples(slot,
-                                                              &(tuple->t_self),
+                       recheckIndexes = ExecInsertIndexTuples(myslot,
                                                               estate,
                                                               false,
                                                               NULL,
                                                               NIL);
 
                    /* AFTER ROW INSERT Triggers */
-                   ExecARInsertTriggers(estate, resultRelInfo, tuple,
+                   ExecARInsertTriggers(estate, resultRelInfo, myslot,
                                         recheckIndexes, cstate->transition_capture);
 
                    list_free(recheckIndexes);
@@ -2983,21 +3016,33 @@ CopyFrom(CopyState cstate)
        if (insertMethod == CIM_MULTI_CONDITIONAL)
        {
            CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                               prevResultRelInfo, myslot, bistate,
-                               nBufferedTuples, bufferedTuples,
+                               prevResultRelInfo, bistate,
+                               nBufferedTuples, bufferedSlots,
                                firstBufferedLineNo);
        }
        else
            CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                               resultRelInfo, myslot, bistate,
-                               nBufferedTuples, bufferedTuples,
+                               resultRelInfo, bistate,
+                               nBufferedTuples, bufferedSlots,
                                firstBufferedLineNo);
    }
 
+   /* free slots */
+   if (bufferedSlots)
+   {
+       for (int i = 0; i < MAX_BUFFERED_TUPLES; i++)
+       {
+           if (bufferedSlots[i] == NULL)
+               continue;
+           ExecDropSingleTupleTableSlot(bufferedSlots[i]);
+           bufferedSlots[i] = NULL;
+       }
+   }
+
    /* Done, clean up */
    error_context_stack = errcallback.previous;
 
-   FreeBulkInsertState(bistate);
+   ReleaseBulkInsertStatePin(bistate);
 
    MemoryContextSwitchTo(oldcontext);
 
@@ -3014,9 +3059,6 @@ CopyFrom(CopyState cstate)
    /* Handle queued AFTER triggers */
    AfterTriggerEndQuery(estate);
 
-   pfree(values);
-   pfree(nulls);
-
    ExecResetTupleTable(estate->es_tupleTable, false);
 
    /* Allow the FDW to shut down */
@@ -3041,7 +3083,7 @@ CopyFrom(CopyState cstate)
     * indexes since those use WAL anyway)
     */
    if (hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(cstate->rel);
+       table_sync(cstate->rel);
 
    return processed;
 }
@@ -3054,8 +3096,7 @@ CopyFrom(CopyState cstate)
 static void
 CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
                    int hi_options, ResultRelInfo *resultRelInfo,
-                   TupleTableSlot *myslot, BulkInsertState bistate,
-                   int nBufferedTuples, HeapTuple *bufferedTuples,
+                   BulkInsertState bistate, int nBufferedTuples, TupleTableSlot **bufferedSlots,
                    uint64 firstBufferedLineNo)
 {
    MemoryContext oldcontext;
@@ -3075,12 +3116,12 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
     * before calling it.
     */
    oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-   heap_multi_insert(resultRelInfo->ri_RelationDesc,
-                     bufferedTuples,
-                     nBufferedTuples,
-                     mycid,
-                     hi_options,
-                     bistate);
+   table_multi_insert(resultRelInfo->ri_RelationDesc,
+                      bufferedSlots,
+                      nBufferedTuples,
+                      mycid,
+                      hi_options,
+                      bistate);
    MemoryContextSwitchTo(oldcontext);
 
    /*
@@ -3094,12 +3135,10 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
            List       *recheckIndexes;
 
            cstate->cur_lineno = firstBufferedLineNo + i;
-           ExecStoreHeapTuple(bufferedTuples[i], myslot, false);
            recheckIndexes =
-               ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
-                                     estate, false, NULL, NIL);
+               ExecInsertIndexTuples(bufferedSlots[i], estate, false, NULL, NIL);
            ExecARInsertTriggers(estate, resultRelInfo,
-                                bufferedTuples[i],
+                                bufferedSlots[i],
                                 recheckIndexes, cstate->transition_capture);
            list_free(recheckIndexes);
        }
@@ -3117,7 +3156,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
        {
            cstate->cur_lineno = firstBufferedLineNo + i;
            ExecARInsertTriggers(estate, resultRelInfo,
-                                bufferedTuples[i],
+                                bufferedSlots[i],
                                 NIL, cstate->transition_capture);
        }
    }
index d01b258b65444a47ff23773e07fe73a14ade567b..d346bf0749adefc6d16e8b1ee298d5259499e786 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "access/reloptions.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "access/xlog.h"
@@ -59,7 +60,8 @@ typedef struct
    ObjectAddress reladdr;      /* address of rel, for ExecCreateTableAs */
    CommandId   output_cid;     /* cmin to insert in output tuples */
    int         hi_options;     /* heap_insert performance options */
-   BulkInsertState bistate;    /* bulk insert state */
+   void       *bistate;        /* bulk insert state */
+   TupleTableSlot *slot;
 } DR_intorel;
 
 /* utility functions for CTAS definition creation */
@@ -107,6 +109,9 @@ create_ctas_internal(List *attrList, IntoClause *into)
    create->oncommit = into->onCommit;
    create->tablespacename = into->tableSpaceName;
    create->if_not_exists = false;
+   create->accessMethod = into->accessMethod;
+
+   // PBORKED: toast options
 
    /*
     * Create the relation.  (This will error out if there's an existing view,
@@ -550,6 +555,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
    myState->rel = intoRelationDesc;
    myState->reladdr = intoRelationAddr;
    myState->output_cid = GetCurrentCommandId(true);
+   myState->slot = table_gimmegimmeslot(intoRelationDesc, NULL);
 
    /*
     * We can skip WAL-logging the insertions, unless PITR or streaming
@@ -570,19 +576,21 @@ static bool
 intorel_receive(TupleTableSlot *slot, DestReceiver *self)
 {
    DR_intorel *myState = (DR_intorel *) self;
-   HeapTuple   tuple;
 
    /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
+    * Ensure input tuple is the right format for the target relation.
     */
-   tuple = ExecCopySlotHeapTuple(slot);
+   if (slot->tts_ops != myState->slot->tts_ops)
+   {
+       ExecCopySlot(myState->slot, slot);
+       slot = myState->slot;
+   }
 
-   heap_insert(myState->rel,
-               tuple,
-               myState->output_cid,
-               myState->hi_options,
-               myState->bistate);
+   table_insert(myState->rel,
+                slot,
+                myState->output_cid,
+                myState->hi_options,
+                myState->bistate);
 
    /* We know this is a newly created relation, so there are no indexes */
 
@@ -597,11 +605,12 @@ intorel_shutdown(DestReceiver *self)
 {
    DR_intorel *myState = (DR_intorel *) self;
 
+   ExecDropSingleTupleTableSlot(myState->slot);
    FreeBulkInsertState(myState->bistate);
 
    /* If we skipped using WAL, must heap_sync before commit */
    if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(myState->rel);
+       table_sync(myState->rel);
 
    /* close rel, but keep lock until commit */
    heap_close(myState->rel, NoLock);
index f640f4697294563bd35890d0f346eaab16a97e44..e198a7319c2e8da0ab90788640175bc9a4302737 100644 (file)
@@ -26,6 +26,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xloginsert.h"
 #include "access/xlogutils.h"
@@ -98,7 +99,7 @@ static int    errdetail_busy_db(int notherbackends, int npreparedxacts);
 Oid
 createdb(ParseState *pstate, const CreatedbStmt *stmt)
 {
-   HeapScanDesc scan;
+   TableScanDesc scan;
    Relation    rel;
    Oid         src_dboid;
    Oid         src_owner;
@@ -590,8 +591,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
         * each one to the new database.
         */
        rel = heap_open(TableSpaceRelationId, AccessShareLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       scan = table_beginscan_catalog(rel, 0, NULL);
+       while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
        {
            Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
            Oid         srctablespace = spaceform->oid;
@@ -644,7 +645,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
                                  XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
            }
        }
-       heap_endscan(scan);
+       table_endscan(scan);
        heap_close(rel, AccessShareLock);
 
        /*
@@ -1871,12 +1872,12 @@ static void
 remove_dbtablespaces(Oid db_id)
 {
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
 
    rel = heap_open(TableSpaceRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 0, NULL);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
        Oid         dsttablespace = spcform->oid;
@@ -1918,7 +1919,7 @@ remove_dbtablespaces(Oid db_id)
        pfree(dstpath);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 }
 
@@ -1939,12 +1940,12 @@ check_db_file_conflict(Oid db_id)
 {
    bool        result = false;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
 
    rel = heap_open(TableSpaceRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 0, NULL);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
        Oid         dsttablespace = spcform->oid;
@@ -1968,7 +1969,7 @@ check_db_file_conflict(Oid db_id)
        pfree(dstpath);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 
    return result;
index 6c06167fb2ae39e81e0cf64e6069cceb9e689dca..1420c12af874be4a6e02044ad9a4cdab65527bb3 100644 (file)
@@ -18,6 +18,7 @@
 #include "access/amapi.h"
 #include "access/htup_details.h"
 #include "access/reloptions.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
@@ -2338,7 +2339,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 {
    Oid         objectOid;
    Relation    relationRelation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    ScanKeyData scan_keys[1];
    HeapTuple   tuple;
    MemoryContext private_context;
@@ -2412,8 +2413,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
     * rels will be processed indirectly by reindex_relation).
     */
    relationRelation = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
        Oid         relid = classtuple->oid;
@@ -2471,7 +2472,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 
        MemoryContextSwitchTo(old);
    }
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(relationRelation, AccessShareLock);
 
    /* Now reindex each rel in a separate transaction */
index a171ebabf8fb6d12ad5be4e6ebe77392133b3817..20cc2ae597095b2e447f9b39919a9b59bf7fd941 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/catalog.h"
@@ -52,7 +53,8 @@ typedef struct
    Relation    transientrel;   /* relation to write to */
    CommandId   output_cid;     /* cmin to insert in output tuples */
    int         hi_options;     /* heap_insert performance options */
-   BulkInsertState bistate;    /* bulk insert state */
+   void       *bistate;        /* bulk insert state */
+   TupleTableSlot *slot;
 } DR_transientrel;
 
 static int matview_maintenance_depth = 0;
@@ -454,6 +456,7 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
     */
    myState->transientrel = transientrel;
    myState->output_cid = GetCurrentCommandId(true);
+   myState->slot = table_gimmegimmeslot(transientrel, NULL);
 
    /*
     * We can skip WAL-logging the insertions, unless PITR or streaming
@@ -475,25 +478,24 @@ static bool
 transientrel_receive(TupleTableSlot *slot, DestReceiver *self)
 {
    DR_transientrel *myState = (DR_transientrel *) self;
-   HeapTuple   tuple;
 
    /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
+    * Ensure input tuple is the right format for the target relation.
     */
-   tuple = ExecCopySlotHeapTuple(slot);
+   if (slot->tts_ops != myState->slot->tts_ops)
+   {
+       ExecCopySlot(myState->slot, slot);
+       slot = myState->slot;
+   }
 
-   heap_insert(myState->transientrel,
-               tuple,
-               myState->output_cid,
-               myState->hi_options,
-               myState->bistate);
+   table_insert(myState->transientrel,
+                slot,
+                myState->output_cid,
+                myState->hi_options,
+                myState->bistate);
 
    /* We know this is a newly created relation, so there are no indexes */
 
-   /* Free the copied tuple. */
-   heap_freetuple(tuple);
-
    return true;
 }
 
@@ -505,11 +507,12 @@ transientrel_shutdown(DestReceiver *self)
 {
    DR_transientrel *myState = (DR_transientrel *) self;
 
+   ExecDropSingleTupleTableSlot(myState->slot);
    FreeBulkInsertState(myState->bistate);
 
    /* If we skipped using WAL, must heap_sync before commit */
    if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(myState->transientrel);
+       table_sync(myState->transientrel);
 
    /* close transientrel, but keep lock until commit */
    heap_close(myState->transientrel, NoLock);
index d6d0de1b01b1ea57a8b927237e2e688b6c3a487a..20f1e5f2c618640a449dc5a166be562ad0488a66 100644 (file)
@@ -20,6 +20,7 @@
 #include "access/multixact.h"
 #include "access/reloptions.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/tupconvert.h"
 #include "access/xact.h"
@@ -536,6 +537,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
    static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
    Oid         ofTypeId;
    ObjectAddress address;
+   const char *accessMethod = NULL;
+   Oid         accessMethodId = InvalidOid;
 
    /*
     * Truncate relname to appropriate length (probably a waste of time, as
@@ -717,6 +720,35 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
            attr->attidentity = colDef->identity;
    }
 
+   /*
+    * If the statement hasn't specified an access method, but we're defining
+    * a type of relation that needs one, use the default.
+    */
+   if (stmt->accessMethod != NULL)
+       accessMethod = stmt->accessMethod;
+   else if (relkind == RELKIND_RELATION ||
+            relkind == RELKIND_TOASTVALUE ||
+            relkind == RELKIND_MATVIEW ||
+            relkind == RELKIND_PARTITIONED_TABLE)
+       accessMethod = default_table_access_method;
+
+   /*
+    * look up the access method, verify it can handle the requested features
+    */
+   if (accessMethod != NULL)
+   {
+       HeapTuple   tuple;
+
+       tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethod));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                        errmsg("table access method \"%s\" does not exist",
+                                accessMethod)));
+       accessMethodId = ((Form_pg_am) GETSTRUCT(tuple))->oid;
+       ReleaseSysCache(tuple);
+   }
+
    /*
     * Create the relation.  Inherited defaults and constraints are passed in
     * for immediate handling --- since they don't need parsing, they can be
@@ -729,6 +761,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
                                          InvalidOid,
                                          ofTypeId,
                                          ownerId,
+                                         accessMethodId,
                                          descriptor,
                                          list_concat(cookedDefaults,
                                                      old_constraints),
@@ -1576,6 +1609,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
             */
            CheckTableForSerializableConflictIn(rel);
 
+           // PBORKED: Need to abstract this
            minmulti = GetOldestMultiXactId();
 
            /*
@@ -1584,6 +1618,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
             * Create a new empty storage file for the relation, and assign it
             * as the relfilenode value. The old storage file is scheduled for
             * deletion at commit.
+            *
+            * PBORKED: needs to be a callback
             */
            RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence,
                                      RecentXmin, minmulti);
@@ -4523,7 +4559,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
    ListCell   *l;
    EState     *estate;
    CommandId   mycid;
-   BulkInsertState bistate;
+   void       *bistate;
    int         hi_options;
    ExprState  *partqualstate = NULL;
 
@@ -4627,12 +4663,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
    if (newrel || needscan)
    {
        ExprContext *econtext;
-       Datum      *values;
-       bool       *isnull;
        TupleTableSlot *oldslot;
        TupleTableSlot *newslot;
-       HeapScanDesc scan;
-       HeapTuple   tuple;
+       TableScanDesc scan;
        MemoryContext oldCxt;
        List       *dropped_attrs = NIL;
        ListCell   *lc;
@@ -4664,15 +4697,16 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
         * tuples are the same, the tupDescs might not be (consider ADD COLUMN
         * without a default).
         */
-       oldslot = MakeSingleTupleTableSlot(oldTupDesc, &TTSOpsHeapTuple);
-       newslot = MakeSingleTupleTableSlot(newTupDesc, &TTSOpsHeapTuple);
+       // PBORKED: Explain about using oldTupDesc when not rewriting
+       oldslot = MakeSingleTupleTableSlot(tab->rewrite > 0 ? oldTupDesc : newTupDesc,
+                                          table_slot_callbacks(oldrel));
+       newslot = MakeSingleTupleTableSlot(newTupDesc,
+                                          table_slot_callbacks(newrel ? newrel : oldrel));
 
-       /* Preallocate values/isnull arrays */
-       i = Max(newTupDesc->natts, oldTupDesc->natts);
-       values = (Datum *) palloc(i * sizeof(Datum));
-       isnull = (bool *) palloc(i * sizeof(bool));
-       memset(values, 0, i * sizeof(Datum));
-       memset(isnull, true, i * sizeof(bool));
+       memset(newslot->tts_values, 0,
+              sizeof(Datum) * newTupDesc->natts);
+       memset(newslot->tts_isnull, 0,
+              sizeof(bool) * newTupDesc->natts);
 
        /*
         * Any attributes that are dropped according to the new tuple
@@ -4690,7 +4724,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
         * checking all the constraints.
         */
        snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(oldrel, snapshot, 0, NULL);
+       scan = table_beginscan(oldrel, snapshot, 0, NULL);
 
        /*
         * Switch to per-tuple memory context and reset it for each tuple
@@ -4698,55 +4732,69 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
         */
        oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
        {
+           TupleTableSlot *insertslot;
+
            if (tab->rewrite > 0)
            {
                /* Extract data from old tuple */
-               heap_deform_tuple(tuple, oldTupDesc, values, isnull);
+               slot_getallattrs(oldslot);
+               ExecClearTuple(newslot);
+
+               /* copy attributes */
+               memcpy(newslot->tts_values, oldslot->tts_values,
+                      sizeof(Datum) * oldslot->tts_nvalid);
+               memcpy(newslot->tts_isnull, oldslot->tts_isnull,
+                      sizeof(bool) * oldslot->tts_nvalid);
 
                /* Set dropped attributes to null in new tuple */
                foreach(lc, dropped_attrs)
-                   isnull[lfirst_int(lc)] = true;
+                   newslot->tts_isnull[lfirst_int(lc)] = true;
 
                /*
                 * Process supplied expressions to replace selected columns.
                 * Expression inputs come from the old tuple.
                 */
-               ExecStoreHeapTuple(tuple, oldslot, false);
                econtext->ecxt_scantuple = oldslot;
 
                foreach(l, tab->newvals)
                {
                    NewColumnValue *ex = lfirst(l);
 
-                   values[ex->attnum - 1] = ExecEvalExpr(ex->exprstate,
-                                                         econtext,
-                                                         &isnull[ex->attnum - 1]);
+                   newslot->tts_values[ex->attnum - 1]
+                       = ExecEvalExpr(ex->exprstate,
+                                      econtext,
+                                      &newslot->tts_isnull[ex->attnum - 1]);
                }
 
-               /*
-                * Form the new tuple. Note that we don't explicitly pfree it,
-                * since the per-tuple memory context will be reset shortly.
-                */
-               tuple = heap_form_tuple(newTupDesc, values, isnull);
+               ExecStoreVirtualTuple(newslot);
 
                /*
                 * Constraints might reference the tableoid column, so
                 * initialize t_tableOid before evaluating them.
                 */
-               tuple->t_tableOid = RelationGetRelid(oldrel);
+               newslot->tts_tableOid = RelationGetRelid(oldrel);
+               insertslot = newslot;
+           }
+           else
+           {
+               /*
+                * If there's no rewrite, old and new table are guaranteed to
+                * have the same AM, so we can just use the old slot to
+                * verify new constraints etc.
+                */
+               insertslot = oldslot;
            }
 
            /* Now check any constraints on the possibly-changed tuple */
-           ExecStoreHeapTuple(tuple, newslot, false);
-           econtext->ecxt_scantuple = newslot;
+           econtext->ecxt_scantuple = insertslot;
 
            foreach(l, notnull_attrs)
            {
                int         attn = lfirst_int(l);
 
-               if (heap_attisnull(tuple, attn + 1, newTupDesc))
+               if (slot_attisnull(insertslot, attn + 1))
                {
                    Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn);
 
@@ -4795,7 +4843,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 
            /* Write the tuple out to the new relation */
            if (newrel)
-               heap_insert(newrel, tuple, mycid, hi_options, bistate);
+               table_insert(newrel, insertslot, mycid, hi_options, bistate);
 
            ResetExprContext(econtext);
 
@@ -4803,7 +4851,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
        }
 
        MemoryContextSwitchTo(oldCxt);
-       heap_endscan(scan);
+       table_endscan(scan);
        UnregisterSnapshot(snapshot);
 
        ExecDropSingleTupleTableSlot(oldslot);
@@ -4819,7 +4867,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 
        /* If we skipped writing WAL, then we need to sync the heap. */
        if (hi_options & HEAP_INSERT_SKIP_WAL)
-           heap_sync(newrel);
+           table_sync(newrel);
 
        heap_close(newrel, NoLock);
    }
@@ -5198,7 +5246,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
 {
    Relation    classRel;
    ScanKeyData key[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
    List       *result = NIL;
 
@@ -5209,9 +5257,9 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
                BTEqualStrategyNumber, F_OIDEQ,
                ObjectIdGetDatum(typeOid));
 
-   scan = heap_beginscan_catalog(classRel, 1, key);
+   scan = table_beginscan_catalog(classRel, 1, key);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);
 
@@ -5225,7 +5273,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
            result = lappend_oid(result, classform->oid);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(classRel, AccessShareLock);
 
    return result;
@@ -8323,9 +8371,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
    char       *conbin;
    Expr       *origexpr;
    ExprState  *exprstate;
-   TupleDesc   tupdesc;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
+   TableScanDesc scan;
    ExprContext *econtext;
    MemoryContext oldcxt;
    TupleTableSlot *slot;
@@ -8360,12 +8406,11 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
    exprstate = ExecPrepareExpr(origexpr, estate);
 
    econtext = GetPerTupleExprContext(estate);
-   tupdesc = RelationGetDescr(rel);
-   slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple);
+   slot = table_gimmegimmeslot(rel, NULL);
    econtext->ecxt_scantuple = slot;
 
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
+   scan = table_beginscan(rel, snapshot, 0, NULL);
 
    /*
     * Switch to per-tuple memory context and reset it for each tuple
@@ -8373,10 +8418,8 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
     */
    oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
    {
-       ExecStoreHeapTuple(tuple, slot, false);
-
        if (!ExecCheck(exprstate, econtext))
            ereport(ERROR,
                    (errcode(ERRCODE_CHECK_VIOLATION),
@@ -8388,7 +8431,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
    }
 
    MemoryContextSwitchTo(oldcxt);
-   heap_endscan(scan);
+   table_endscan(scan);
    UnregisterSnapshot(snapshot);
    ExecDropSingleTupleTableSlot(slot);
    FreeExecutorState(estate);
@@ -8407,8 +8450,8 @@ validateForeignKeyConstraint(char *conname,
                             Oid pkindOid,
                             Oid constraintOid)
 {
-   HeapScanDesc scan;
-   HeapTuple   tuple;
+   TableScanDesc scan;
+   TupleTableSlot *slot;
    Trigger     trig;
    Snapshot    snapshot;
 
@@ -8443,9 +8486,10 @@ validateForeignKeyConstraint(char *conname,
     * ereport(ERROR) and that's that.
     */
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
+   scan = table_beginscan(rel, snapshot, 0, NULL);
+   slot = table_gimmegimmeslot(rel, NULL);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
    {
        FunctionCallInfoData fcinfo;
        TriggerData trigdata;
@@ -8463,19 +8507,19 @@ validateForeignKeyConstraint(char *conname,
        trigdata.type = T_TriggerData;
        trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW;
        trigdata.tg_relation = rel;
-       trigdata.tg_trigtuple = tuple;
+       trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       trigdata.tg_trigslot = slot;
        trigdata.tg_newtuple = NULL;
        trigdata.tg_trigger = &trig;
-       trigdata.tg_trigtuplebuf = scan->rs_cbuf;
-       trigdata.tg_newtuplebuf = InvalidBuffer;
 
        fcinfo.context = (Node *) &trigdata;
 
        RI_FKey_check_ins(&fcinfo);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    UnregisterSnapshot(snapshot);
+   ExecDropSingleTupleTableSlot(slot);
 }
 
 static void
@@ -11001,7 +11045,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
    ListCell   *l;
    ScanKeyData key[1];
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
    Oid         orig_tablespaceoid;
    Oid         new_tablespaceoid;
@@ -11066,8 +11110,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
                ObjectIdGetDatum(orig_tablespaceoid));
 
    rel = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 1, key);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 1, key);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
        Oid         relOid = relForm->oid;
@@ -11125,7 +11169,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
        relations = lappend_oid(relations, relOid);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 
    if (relations == NIL)
index 4a714f6e2be15b2461c502245e684e9e4ed326ef..ca429731d4091f94c1acdd288047a6e383e7de00 100644 (file)
@@ -53,6 +53,7 @@
 #include "access/heapam.h"
 #include "access/reloptions.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "access/xlog.h"
@@ -406,7 +407,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 {
 #ifdef HAVE_SYMLINK
    char       *tablespacename = stmt->tablespacename;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
    Relation    rel;
    HeapTuple   tuple;
    Form_pg_tablespace spcform;
@@ -422,8 +423,8 @@ DropTableSpace(DropTableSpaceStmt *stmt)
                Anum_pg_tablespace_spcname,
                BTEqualStrategyNumber, F_NAMEEQ,
                CStringGetDatum(tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
 
    if (!HeapTupleIsValid(tuple))
    {
@@ -440,7 +441,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
                    (errmsg("tablespace \"%s\" does not exist, skipping",
                            tablespacename)));
            /* XXX I assume I need one or both of these next two calls */
-           heap_endscan(scandesc);
+           table_endscan(scandesc);
            heap_close(rel, NoLock);
        }
        return;
@@ -468,7 +469,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
     */
    CatalogTupleDelete(rel, &tuple->t_self);
 
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
 
    /*
     * Remove any comments or security labels on this tablespace.
@@ -919,7 +920,7 @@ RenameTableSpace(const char *oldname, const char *newname)
    Oid         tspId;
    Relation    rel;
    ScanKeyData entry[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
    HeapTuple   newtuple;
    Form_pg_tablespace newform;
@@ -932,8 +933,8 @@ RenameTableSpace(const char *oldname, const char *newname)
                Anum_pg_tablespace_spcname,
                BTEqualStrategyNumber, F_NAMEEQ,
                CStringGetDatum(oldname));
-   scan = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scan, ForwardScanDirection);
+   scan = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scan, ForwardScanDirection);
    if (!HeapTupleIsValid(tup))
        ereport(ERROR,
                (errcode(ERRCODE_UNDEFINED_OBJECT),
@@ -944,7 +945,7 @@ RenameTableSpace(const char *oldname, const char *newname)
    newform = (Form_pg_tablespace) GETSTRUCT(newtuple);
    tspId = newform->oid;
 
-   heap_endscan(scan);
+   table_endscan(scan);
 
    /* Must be owner */
    if (!pg_tablespace_ownercheck(tspId, GetUserId()))
@@ -962,15 +963,15 @@ RenameTableSpace(const char *oldname, const char *newname)
                Anum_pg_tablespace_spcname,
                BTEqualStrategyNumber, F_NAMEEQ,
                CStringGetDatum(newname));
-   scan = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scan, ForwardScanDirection);
+   scan = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scan, ForwardScanDirection);
    if (HeapTupleIsValid(tup))
        ereport(ERROR,
                (errcode(ERRCODE_DUPLICATE_OBJECT),
                 errmsg("tablespace \"%s\" already exists",
                        newname)));
 
-   heap_endscan(scan);
+   table_endscan(scan);
 
    /* OK, update the entry */
    namestrcpy(&(newform->spcname), newname);
@@ -994,7 +995,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
 {
    Relation    rel;
    ScanKeyData entry[1];
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
    HeapTuple   tup;
    Oid         tablespaceoid;
    Datum       datum;
@@ -1012,8 +1013,8 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
                Anum_pg_tablespace_spcname,
                BTEqualStrategyNumber, F_NAMEEQ,
                CStringGetDatum(stmt->tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scandesc, ForwardScanDirection);
    if (!HeapTupleIsValid(tup))
        ereport(ERROR,
                (errcode(ERRCODE_UNDEFINED_OBJECT),
@@ -1054,7 +1055,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
    heap_freetuple(newtuple);
 
    /* Conclude heap scan. */
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
    heap_close(rel, NoLock);
 
    return tablespaceoid;
@@ -1388,7 +1389,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
 {
    Oid         result;
    Relation    rel;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
    HeapTuple   tuple;
    ScanKeyData entry[1];
 
@@ -1403,8 +1404,8 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
                Anum_pg_tablespace_spcname,
                BTEqualStrategyNumber, F_NAMEEQ,
                CStringGetDatum(tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
 
    /* We assume that there can be at most one matching tuple */
    if (HeapTupleIsValid(tuple))
@@ -1412,7 +1413,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
    else
        result = InvalidOid;
 
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
    heap_close(rel, AccessShareLock);
 
    if (!OidIsValid(result) && !missing_ok)
@@ -1434,7 +1435,7 @@ get_tablespace_name(Oid spc_oid)
 {
    char       *result;
    Relation    rel;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
    HeapTuple   tuple;
    ScanKeyData entry[1];
 
@@ -1449,8 +1450,8 @@ get_tablespace_name(Oid spc_oid)
                Anum_pg_tablespace_oid,
                BTEqualStrategyNumber, F_OIDEQ,
                ObjectIdGetDatum(spc_oid));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
 
    /* We assume that there can be at most one matching tuple */
    if (HeapTupleIsValid(tuple))
@@ -1458,7 +1459,7 @@ get_tablespace_name(Oid spc_oid)
    else
        result = NULL;
 
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
    heap_close(rel, AccessShareLock);
 
    return result;
index bcdd86ce92f73519e3fcf7d3758d806db2e6966a..6a00a96f59c7869b2c500ccf9c635cfbebf2757f 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "access/genam.h"
 #include "access/heapam.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/htup_details.h"
 #include "access/xact.h"
@@ -80,16 +81,18 @@ static int  MyTriggerDepth = 0;
 /* Local function prototypes */
 static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid);
 static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
-static HeapTuple GetTupleForTrigger(EState *estate,
+static bool GetTupleForTrigger(EState *estate,
                   EPQState *epqstate,
                   ResultRelInfo *relinfo,
                   ItemPointer tid,
                   LockTupleMode lockmode,
-                  TupleTableSlot **newSlot);
+                  TupleTableSlot *oldslot,
+                  TupleTableSlot *newslot,
+                  bool *is_epq);
 static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
               Trigger *trigger, TriggerEvent event,
               Bitmapset *modifiedCols,
-              HeapTuple oldtup, HeapTuple newtup);
+              TupleTableSlot *oldslot, TupleTableSlot *newslot);
 static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
                    int tgindx,
                    FmgrInfo *finfo,
@@ -97,7 +100,7 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
                    MemoryContext per_tuple_context);
 static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                      int event, bool row_trigger,
-                     HeapTuple oldtup, HeapTuple newtup,
+                     TupleTableSlot *oldtup, TupleTableSlot *newtup,
                      List *recheckIndexes, Bitmapset *modifiedCols,
                      TransitionCaptureState *transition_capture);
 static void AfterTriggerEnlargeQueryState(void);
@@ -2470,10 +2473,11 @@ ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
    LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -2513,7 +2517,7 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                              false, NULL, NULL, NIL, NULL, transition_capture);
 }
 
-TupleTableSlot *
+bool
 ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                     TupleTableSlot *slot)
 {
@@ -2530,10 +2534,13 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
        TRIGGER_EVENT_ROW |
        TRIGGER_EVENT_BEFORE;
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -2544,65 +2551,54 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                                  TRIGGER_TYPE_INSERT))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, NULL, newtuple))
+                           NULL, NULL, slot))
            continue;
 
+       LocTriggerData.tg_trigslot = slot;
        LocTriggerData.tg_trigtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
+
        LocTriggerData.tg_trigger = trigger;
        newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
                                       relinfo->ri_TrigFunctions,
                                       relinfo->ri_TrigInstrument,
                                       GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
            heap_freetuple(oldtuple);
        if (newtuple == NULL)
        {
            if (should_free)
                heap_freetuple(slottuple);
-           return NULL;        /* "do nothing" */
+           return false;
+       }
+       if (newtuple != oldtuple)
+       {
+           ExecForceStoreHeapTuple(newtuple, slot);
+           newtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
        }
    }
 
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-
-   if (should_free)
-       heap_freetuple(slottuple);
-   return slot;
+   return true;
 }
 
 void
 ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
-                    HeapTuple trigtuple, List *recheckIndexes,
+                    TupleTableSlot *slot, List *recheckIndexes,
                     TransitionCaptureState *transition_capture)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 
    if ((trigdesc && trigdesc->trig_insert_after_row) ||
        (transition_capture && transition_capture->tcs_insert_new_table))
+   {
        AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
-                             true, NULL, trigtuple,
+                             true, NULL, slot,
                              recheckIndexes, NULL,
                              transition_capture);
+   }
 }
 
-TupleTableSlot *
+bool
 ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                     TupleTableSlot *slot)
 {
@@ -2619,10 +2615,13 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
        TRIGGER_EVENT_ROW |
        TRIGGER_EVENT_INSTEAD;
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -2633,47 +2632,33 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                                  TRIGGER_TYPE_INSERT))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, NULL, newtuple))
+                           NULL, NULL, slot))
            continue;
 
+       LocTriggerData.tg_trigslot = slot;
        LocTriggerData.tg_trigtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
        LocTriggerData.tg_trigger = trigger;
        newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
                                       relinfo->ri_TrigFunctions,
                                       relinfo->ri_TrigInstrument,
                                       GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
            heap_freetuple(oldtuple);
        if (newtuple == NULL)
        {
            if (should_free)
                heap_freetuple(slottuple);
-           return NULL;        /* "do nothing" */
+           return false;       /* "do nothing" */
+       }
+       if (oldtuple != newtuple)
+       {
+           ExecForceStoreHeapTuple(newtuple, LocTriggerData.tg_trigslot);
+           newtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
        }
    }
 
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-
-   if (should_free)
-       heap_freetuple(slottuple);
-   return slot;
+   return true;
 }
 
 void
@@ -2701,10 +2686,11 @@ ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
    LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -2758,20 +2744,20 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
                     HeapTuple fdw_trigtuple,
                     TupleTableSlot **epqslot)
 {
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
    bool        result = true;
    TriggerData LocTriggerData;
    HeapTuple   trigtuple;
-   HeapTuple   newtuple;
-   TupleTableSlot *newSlot;
    int         i;
 
    Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
    if (fdw_trigtuple == NULL)
    {
-       trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                                      LockTupleExclusive, &newSlot);
-       if (trigtuple == NULL)
+       bool is_epqtuple;
+
+       if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+                               LockTupleExclusive, slot, NULL, &is_epqtuple))
            return false;
 
        /*
@@ -2779,27 +2765,36 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
         * function requested for the updated tuple, skip the trigger
         * execution.
         */
-       if (newSlot != NULL && epqslot != NULL)
+       if (is_epqtuple && epqslot != NULL)
        {
-           *epqslot = newSlot;
-           heap_freetuple(trigtuple);
+           *epqslot = slot;
            return false;
        }
+
+       trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+
    }
    else
+   {
        trigtuple = fdw_trigtuple;
+       ExecForceStoreHeapTuple(trigtuple, slot);
+   }
 
    LocTriggerData.type = T_TriggerData;
    LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
        TRIGGER_EVENT_ROW |
        TRIGGER_EVENT_BEFORE;
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
+       HeapTuple   newtuple;
        Trigger    *trigger = &trigdesc->triggers[i];
 
        if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
@@ -2808,11 +2803,11 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
                                  TRIGGER_TYPE_DELETE))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, NULL))
+                           NULL, slot, NULL))
            continue;
 
+       LocTriggerData.tg_trigslot = slot;
        LocTriggerData.tg_trigtuple = trigtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
        LocTriggerData.tg_trigger = trigger;
        newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
@@ -2824,10 +2819,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
            result = false;     /* tell caller to suppress delete */
            break;
        }
-       if (newtuple != trigtuple)
+       if (false && newtuple != trigtuple)
            heap_freetuple(newtuple);
    }
-   if (trigtuple != fdw_trigtuple)
+   if (false && trigtuple != fdw_trigtuple)
        heap_freetuple(trigtuple);
 
    return result;
@@ -2840,28 +2835,31 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                     TransitionCaptureState *transition_capture)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
 
    if ((trigdesc && trigdesc->trig_delete_after_row) ||
        (transition_capture && transition_capture->tcs_delete_old_table))
    {
-       HeapTuple   trigtuple;
-
        Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
        if (fdw_trigtuple == NULL)
-           trigtuple = GetTupleForTrigger(estate,
-                                          NULL,
-                                          relinfo,
-                                          tupleid,
-                                          LockTupleExclusive,
-                                          NULL);
+       {
+           GetTupleForTrigger(estate,
+                              NULL,
+                              relinfo,
+                              tupleid,
+                              LockTupleExclusive,
+                              slot,
+                              NULL,
+                              NULL);
+       }
        else
-           trigtuple = fdw_trigtuple;
+       {
+           ExecForceStoreHeapTuple(fdw_trigtuple, slot);
+       }
 
        AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
-                             true, trigtuple, NULL, NIL, NULL,
+                             true, slot, NULL, NIL, NULL,
                              transition_capture);
-       if (trigtuple != fdw_trigtuple)
-           heap_freetuple(trigtuple);
    }
 }
 
@@ -2870,8 +2868,8 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                     HeapTuple trigtuple)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
    TriggerData LocTriggerData;
-   HeapTuple   rettuple;
    int         i;
 
    LocTriggerData.type = T_TriggerData;
@@ -2879,12 +2877,18 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
        TRIGGER_EVENT_ROW |
        TRIGGER_EVENT_INSTEAD;
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
+   ExecForceStoreHeapTuple(trigtuple, slot);
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
+       HeapTuple   rettuple;
        Trigger    *trigger = &trigdesc->triggers[i];
 
        if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
@@ -2893,11 +2897,11 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                                  TRIGGER_TYPE_DELETE))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, NULL))
+                           NULL, slot, NULL))
            continue;
 
+       LocTriggerData.tg_trigslot = slot;
        LocTriggerData.tg_trigtuple = trigtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
        LocTriggerData.tg_trigger = trigger;
        rettuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
@@ -2906,7 +2910,7 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                                       GetPerTupleMemoryContext(estate));
        if (rettuple == NULL)
            return false;       /* Delete was suppressed */
-       if (rettuple != trigtuple)
+       if (false && rettuple != trigtuple)
            heap_freetuple(rettuple);
    }
    return true;
@@ -2940,10 +2944,11 @@ ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
    LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -2985,20 +2990,19 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                              transition_capture);
 }
 
-TupleTableSlot *
+bool
 ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
                     ResultRelInfo *relinfo,
                     ItemPointer tupleid,
                     HeapTuple fdw_trigtuple,
-                    TupleTableSlot *slot)
+                    TupleTableSlot *newslot)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
-   HeapTuple   slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+   HeapTuple   slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
    HeapTuple   newtuple = slottuple;
    TriggerData LocTriggerData;
    HeapTuple   trigtuple;
-   HeapTuple   oldtuple;
-   TupleTableSlot *newSlot;
    int         i;
    Bitmapset  *updatedCols;
    LockTupleMode lockmode;
@@ -3009,37 +3013,41 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
    Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
    if (fdw_trigtuple == NULL)
    {
+       bool        is_epqtuple = false;
+
        /* get a copy of the on-disk tuple we are planning to update */
-       trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                                      lockmode, &newSlot);
-       if (trigtuple == NULL)
-           return NULL;        /* cancel the update action */
+       if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+                               lockmode, oldslot, newslot, &is_epqtuple))
+           return false;       /* cancel the update action */
+
+       /*
+        * In READ COMMITTED isolation level it's possible that target tuple was
+        * changed due to concurrent update.  In that case we have a raw subplan
+        * output tuple in newSlot, and need to run it through the junk filter to
+        * produce an insertable tuple.
+        *
+        * Caution: more than likely, the passed-in slot is the same as the
+        * junkfilter's output slot, so we are clobbering the original value of
+        * slottuple by doing the filtering.  This is OK since neither we nor our
+        * caller have any more interest in the prior contents of that slot.
+        */
+       if (is_epqtuple)
+       {
+           TupleTableSlot *slot = ExecFilterJunk(relinfo->ri_junkFilter, newslot);
+
+           ExecCopySlot(newslot, slot);
+           slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
+           newtuple = slottuple;
+       }
+
+       trigtuple = ExecFetchSlotHeapTuple(oldslot, true, NULL);
    }
    else
    {
+       ExecForceStoreHeapTuple(fdw_trigtuple, oldslot);
        trigtuple = fdw_trigtuple;
-       newSlot = NULL;
-   }
-
-   /*
-    * In READ COMMITTED isolation level it's possible that target tuple was
-    * changed due to concurrent update.  In that case we have a raw subplan
-    * output tuple in newSlot, and need to run it through the junk filter to
-    * produce an insertable tuple.
-    *
-    * Caution: more than likely, the passed-in slot is the same as the
-    * junkfilter's output slot, so we are clobbering the original value of
-    * slottuple by doing the filtering.  This is OK since neither we nor our
-    * caller have any more interest in the prior contents of that slot.
-    */
-   if (newSlot != NULL)
-   {
-       slot = ExecFilterJunk(relinfo->ri_junkFilter, newSlot);
-       slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-       newtuple = slottuple;
    }
 
-
    LocTriggerData.type = T_TriggerData;
    LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
        TRIGGER_EVENT_ROW |
@@ -3051,6 +3059,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
+       HeapTuple   oldtuple;
 
        if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
                                  TRIGGER_TYPE_ROW,
@@ -3058,67 +3067,55 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
                                  TRIGGER_TYPE_UPDATE))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           updatedCols, trigtuple, newtuple))
+                           updatedCols, oldslot, newslot))
            continue;
 
+       LocTriggerData.tg_trigslot = oldslot;
        LocTriggerData.tg_trigtuple = trigtuple;
        LocTriggerData.tg_newtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-       LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+       LocTriggerData.tg_newslot = newslot;
        LocTriggerData.tg_trigger = trigger;
        newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
                                       relinfo->ri_TrigFunctions,
                                       relinfo->ri_TrigInstrument,
                                       GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
            heap_freetuple(oldtuple);
        if (newtuple == NULL)
        {
-           if (trigtuple != fdw_trigtuple)
+           if (false && trigtuple != fdw_trigtuple)
                heap_freetuple(trigtuple);
-           return NULL;        /* "do nothing" */
+           return false;       /* "do nothing" */
        }
+
+       if (newtuple != oldtuple)
+           ExecForceStoreHeapTuple(newtuple, newslot);
    }
-   if (trigtuple != fdw_trigtuple && trigtuple != newtuple)
+   if (false && trigtuple != fdw_trigtuple && trigtuple != newtuple)
        heap_freetuple(trigtuple);
 
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-   return slot;
+   return true;
 }
 
 void
 ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                     ItemPointer tupleid,
                     HeapTuple fdw_trigtuple,
-                    HeapTuple newtuple,
+                    TupleTableSlot *newslot,
                     List *recheckIndexes,
                     TransitionCaptureState *transition_capture)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+
+   ExecClearTuple(oldslot);
 
    if ((trigdesc && trigdesc->trig_update_after_row) ||
        (transition_capture &&
         (transition_capture->tcs_update_old_table ||
          transition_capture->tcs_update_new_table)))
    {
-       HeapTuple   trigtuple;
-
        /*
         * Note: if the UPDATE is converted into a DELETE+INSERT as part of
         * update-partition-key operation, then this function is also called
@@ -3126,30 +3123,31 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
         * In such case, either old tuple or new tuple can be NULL.
         */
        if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
-           trigtuple = GetTupleForTrigger(estate,
-                                          NULL,
-                                          relinfo,
-                                          tupleid,
-                                          LockTupleExclusive,
-                                          NULL);
-       else
-           trigtuple = fdw_trigtuple;
+           GetTupleForTrigger(estate,
+                              NULL,
+                              relinfo,
+                              tupleid,
+                              LockTupleExclusive,
+                              oldslot,
+                              NULL,
+                              NULL);
+       else if (fdw_trigtuple != NULL)
+           ExecForceStoreHeapTuple(fdw_trigtuple, oldslot);
 
        AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
-                             true, trigtuple, newtuple, recheckIndexes,
+                             true, oldslot, newslot, recheckIndexes,
                              GetUpdatedColumns(relinfo, estate),
                              transition_capture);
-       if (trigtuple != fdw_trigtuple)
-           heap_freetuple(trigtuple);
    }
 }
 
-TupleTableSlot *
+bool
 ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
-                    HeapTuple trigtuple, TupleTableSlot *slot)
+                    HeapTuple trigtuple, TupleTableSlot *newslot)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
-   HeapTuple   slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+   HeapTuple   slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
    HeapTuple   newtuple = slottuple;
    TriggerData LocTriggerData;
    HeapTuple   oldtuple;
@@ -3162,6 +3160,9 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
+
+   ExecForceStoreHeapTuple(trigtuple, oldslot);
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -3172,42 +3173,30 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                                  TRIGGER_TYPE_UPDATE))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, newtuple))
+                           NULL, oldslot, newslot))
            continue;
 
+       LocTriggerData.tg_trigslot = oldslot;
        LocTriggerData.tg_trigtuple = trigtuple;
+       LocTriggerData.tg_newslot = newslot;
        LocTriggerData.tg_newtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-       LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
        LocTriggerData.tg_trigger = trigger;
        newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                       i,
                                       relinfo->ri_TrigFunctions,
                                       relinfo->ri_TrigInstrument,
                                       GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
            heap_freetuple(oldtuple);
        if (newtuple == NULL)
-           return NULL;        /* "do nothing" */
-   }
-
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
+           return false;       /* "do nothing" */
 
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
+       if (oldtuple != newtuple)
+           ExecForceStoreHeapTuple(newtuple, newslot);
    }
-   return slot;
+
+   return true;
 }
 
 void
@@ -3230,10 +3219,11 @@ ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
    LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
    LocTriggerData.tg_trigtuple = NULL;
    LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
    LocTriggerData.tg_oldtable = NULL;
    LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
    for (i = 0; i < trigdesc->numtriggers; i++)
    {
        Trigger    *trigger = &trigdesc->triggers[i];
@@ -3273,25 +3263,24 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
 }
 
 
-static HeapTuple
+static bool
 GetTupleForTrigger(EState *estate,
                   EPQState *epqstate,
                   ResultRelInfo *relinfo,
                   ItemPointer tid,
                   LockTupleMode lockmode,
-                  TupleTableSlot **newSlot)
+                  TupleTableSlot *oldslot,
+                  TupleTableSlot *newslot,
+                  bool *is_epqtuple)
 {
    Relation    relation = relinfo->ri_RelationDesc;
-   HeapTupleData tuple;
-   HeapTuple   result;
-   Buffer      buffer;
 
-   if (newSlot != NULL)
+   if (is_epqtuple)
    {
        HTSU_Result test;
        HeapUpdateFailureData hufd;
 
-       *newSlot = NULL;
+       *is_epqtuple = false;
 
        /* caller must pass an epqstate if EvalPlanQual is possible */
        Assert(epqstate != NULL);
@@ -3299,12 +3288,13 @@ GetTupleForTrigger(EState *estate,
        /*
         * lock tuple for update
         */
-ltrmark:;
-       tuple.t_self = *tid;
-       test = heap_lock_tuple(relation, &tuple,
-                              estate->es_output_cid,
-                              lockmode, LockWaitBlock,
-                              false, &buffer, &hufd);
+       test = table_lock_tuple(relation, tid, estate->es_snapshot, oldslot,
+                               estate->es_output_cid,
+                               lockmode, LockWaitBlock,
+                               IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                               &hufd);
+
+       // FIXME: result = tuple;
        switch (test)
        {
            case HeapTupleSelfUpdated:
@@ -3324,103 +3314,71 @@ ltrmark:;
                             errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
 
                /* treat it as deleted; do not process */
-               ReleaseBuffer(buffer);
-               return NULL;
+               return false;
 
            case HeapTupleMayBeUpdated:
-               break;
-
-           case HeapTupleUpdated:
-               ReleaseBuffer(buffer);
-               if (IsolationUsesXactSnapshot())
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
 
-               if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
+               if (hufd.traversed)
                {
-                   /* it was updated, so look at the updated version */
+                   TupleTableSlot *testslot;
                    TupleTableSlot *epqslot;
 
+                   EvalPlanQualBegin(epqstate, estate);
+
+                   testslot = EvalPlanQualSlot(epqstate, relation, relinfo->ri_RangeTableIndex);
+                   ExecCopySlot(testslot, oldslot);
+
                    epqslot = EvalPlanQual(estate,
                                           epqstate,
                                           relation,
                                           relinfo->ri_RangeTableIndex,
-                                          lockmode,
-                                          &hufd.ctid,
-                                          hufd.xmax);
-                   if (!TupIsNull(epqslot))
-                   {
-                       *tid = hufd.ctid;
-                       *newSlot = epqslot;
-
-                       /*
-                        * EvalPlanQual already locked the tuple, but we
-                        * re-call heap_lock_tuple anyway as an easy way of
-                        * re-fetching the correct tuple.  Speed is hardly a
-                        * criterion in this path anyhow.
-                        */
-                       goto ltrmark;
-                   }
+                                          testslot);
+
+                   /* If PlanQual failed for updated tuple - we must not process this tuple!*/
+                   if (TupIsNull(epqslot))
+                       return false;
+
+                   if (newslot)
+                       ExecCopySlot(newslot, epqslot);
+                   else
+                       ExecCopySlot(oldslot, epqslot);
+
+                   *is_epqtuple = true;
                }
+               break;
 
-               /*
-                * if tuple was deleted or PlanQual failed for updated tuple -
-                * we must not process this tuple!
-                */
-               return NULL;
+           case HeapTupleUpdated:
+               if (IsolationUsesXactSnapshot())
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("could not serialize access due to concurrent update")));
+               elog(ERROR, "wrong heap_lock_tuple status: %u", test);
+               break;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("could not serialize access due to concurrent update")));
+               /* tuple was deleted */
+               return false;
 
            case HeapTupleInvisible:
                elog(ERROR, "attempted to lock invisible tuple");
                break;
 
            default:
-               ReleaseBuffer(buffer);
                elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
-               return NULL;    /* keep compiler quiet */
+               return false;   /* keep compiler quiet */
        }
    }
    else
    {
-       Page        page;
-       ItemId      lp;
-
-       buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
-
-       /*
-        * Although we already know this tuple is valid, we must lock the
-        * buffer to ensure that no one has a buffer cleanup lock; otherwise
-        * they might move the tuple while we try to copy it.  But we can
-        * release the lock before actually doing the heap_copytuple call,
-        * since holding pin is sufficient to prevent anyone from getting a
-        * cleanup lock they don't already hold.
-        */
-       LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
-       page = BufferGetPage(buffer);
-       lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
-
-       Assert(ItemIdIsNormal(lp));
-
-       tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp);
-       tuple.t_len = ItemIdGetLength(lp);
-       tuple.t_self = *tid;
-       tuple.t_tableOid = RelationGetRelid(relation);
-
-       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+       if (!table_fetch_row_version(relation, tid, SnapshotAny, oldslot, NULL))
+           elog(ERROR, "couldn't fetch tuple");
    }
 
-   if (HeapTupleHeaderGetNatts(tuple.t_data) < relation->rd_att->natts)
-       result = heap_expand_tuple(&tuple, relation->rd_att);
-   else
-       result = heap_copytuple(&tuple);
-   ReleaseBuffer(buffer);
-
-   return result;
+   return true;
 }
 
 /*
@@ -3430,7 +3388,7 @@ static bool
 TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
               Trigger *trigger, TriggerEvent event,
               Bitmapset *modifiedCols,
-              HeapTuple oldtup, HeapTuple newtup)
+              TupleTableSlot *oldslot, TupleTableSlot *newslot)
 {
    /* Check replication-role-dependent enable state */
    if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
@@ -3472,11 +3430,8 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
    /* Check for WHEN clause */
    if (trigger->tgqual)
    {
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
        ExprState **predicate;
        ExprContext *econtext;
-       TupleTableSlot *oldslot = NULL;
-       TupleTableSlot *newslot = NULL;
        MemoryContext oldContext;
        int         i;
 
@@ -3515,40 +3470,6 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
         */
        econtext = GetPerTupleExprContext(estate);
 
-       /*
-        * Put OLD and NEW tuples into tupleslots for expression evaluation.
-        * These slots can be shared across the whole estate, but be careful
-        * that they have the current resultrel's tupdesc.
-        */
-       if (HeapTupleIsValid(oldtup))
-       {
-           if (estate->es_trig_oldtup_slot == NULL)
-           {
-               oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
-               estate->es_trig_oldtup_slot =
-                   ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple);
-               MemoryContextSwitchTo(oldContext);
-           }
-           oldslot = estate->es_trig_oldtup_slot;
-           if (oldslot->tts_tupleDescriptor != tupdesc)
-               ExecSetSlotDescriptor(oldslot, tupdesc);
-           ExecStoreHeapTuple(oldtup, oldslot, false);
-       }
-       if (HeapTupleIsValid(newtup))
-       {
-           if (estate->es_trig_newtup_slot == NULL)
-           {
-               oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
-               estate->es_trig_newtup_slot =
-                   ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple);
-               MemoryContextSwitchTo(oldContext);
-           }
-           newslot = estate->es_trig_newtup_slot;
-           if (newslot->tts_tupleDescriptor != tupdesc)
-               ExecSetSlotDescriptor(newslot, tupdesc);
-           ExecStoreHeapTuple(newtup, newslot, false);
-       }
-
        /*
         * Finally evaluate the expression, making the old and/or new tuples
         * available as INNER_VAR/OUTER_VAR respectively.
@@ -3882,7 +3803,8 @@ struct AfterTriggersTableData
 
 static AfterTriggersData afterTriggers;
 
-static void AfterTriggerExecute(AfterTriggerEvent event,
+static void AfterTriggerExecute(EState *estate,
+                   AfterTriggerEvent event,
                    Relation rel, TriggerDesc *trigdesc,
                    FmgrInfo *finfo,
                    Instrumentation *instr,
@@ -4217,7 +4139,8 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
  * ----------
  */
 static void
-AfterTriggerExecute(AfterTriggerEvent event,
+AfterTriggerExecute(EState *estate,
+                   AfterTriggerEvent event,
                    Relation rel, TriggerDesc *trigdesc,
                    FmgrInfo *finfo, Instrumentation *instr,
                    MemoryContext per_tuple_context,
@@ -4227,17 +4150,16 @@ AfterTriggerExecute(AfterTriggerEvent event,
    AfterTriggerShared evtshared = GetTriggerSharedData(event);
    Oid         tgoid = evtshared->ats_tgoid;
    TriggerData LocTriggerData;
-   HeapTupleData tuple1;
-   HeapTupleData tuple2;
    HeapTuple   rettuple;
-   Buffer      buffer1 = InvalidBuffer;
-   Buffer      buffer2 = InvalidBuffer;
    int         tgindx;
 
    /*
     * Locate trigger in trigdesc.
     */
    LocTriggerData.tg_trigger = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
+
    for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++)
    {
        if (trigdesc->triggers[tgindx].tgoid == tgoid)
@@ -4287,31 +4209,31 @@ AfterTriggerExecute(AfterTriggerEvent event,
             * that is stored as a heap tuple, constructed in different memory
             * context, in the slot anyway.
             */
-           LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(trig_tuple_slot1,
-                                                                   true, NULL);
-           LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
+           LocTriggerData.tg_trigslot = trig_tuple_slot1; // FIXME
+           LocTriggerData.tg_trigtuple =
+               ExecFetchSlotHeapTuple(trig_tuple_slot1, true, NULL);
 
+           LocTriggerData.tg_newslot = trig_tuple_slot2; // FIXME
            LocTriggerData.tg_newtuple =
                ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
                 TRIGGER_EVENT_UPDATE) ?
                ExecFetchSlotHeapTuple(trig_tuple_slot2, true, NULL) : NULL;
-           LocTriggerData.tg_newtuplebuf = InvalidBuffer;
 
            break;
 
        default:
+
            if (ItemPointerIsValid(&(event->ate_ctid1)))
            {
-               ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL))
+               LocTriggerData.tg_trigslot = ExecTriggerGetOldSlot(estate, rel);
+               if (!table_fetch_row_version(rel, &(event->ate_ctid1), SnapshotAny, LocTriggerData.tg_trigslot, NULL))
                    elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
-               LocTriggerData.tg_trigtuple = &tuple1;
-               LocTriggerData.tg_trigtuplebuf = buffer1;
+               LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, NULL);
+
            }
            else
            {
                LocTriggerData.tg_trigtuple = NULL;
-               LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
            }
 
            /* don't touch ctid2 if not there */
@@ -4319,16 +4241,14 @@ AfterTriggerExecute(AfterTriggerEvent event,
                AFTER_TRIGGER_2CTID &&
                ItemPointerIsValid(&(event->ate_ctid2)))
            {
-               ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL))
+               LocTriggerData.tg_newslot = ExecTriggerGetNewSlot(estate, rel);
+               if (!table_fetch_row_version(rel, &(event->ate_ctid2), SnapshotAny, LocTriggerData.tg_newslot, NULL))
                    elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
-               LocTriggerData.tg_newtuple = &tuple2;
-               LocTriggerData.tg_newtuplebuf = buffer2;
+               LocTriggerData.tg_newtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, NULL);
            }
            else
            {
                LocTriggerData.tg_newtuple = NULL;
-               LocTriggerData.tg_newtuplebuf = InvalidBuffer;
            }
    }
 
@@ -4380,12 +4300,12 @@ AfterTriggerExecute(AfterTriggerEvent event,
        heap_freetuple(rettuple);
 
    /*
-    * Release buffers
+    * Release resources
     */
-   if (buffer1 != InvalidBuffer)
-       ReleaseBuffer(buffer1);
-   if (buffer2 != InvalidBuffer)
-       ReleaseBuffer(buffer2);
+   if (LocTriggerData.tg_trigslot)
+       ExecClearTuple(LocTriggerData.tg_trigslot);
+   if (LocTriggerData.tg_newslot)
+       ExecClearTuple(LocTriggerData.tg_newslot);
 
    /*
     * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
@@ -4552,6 +4472,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                        slot2 = MakeSingleTupleTableSlot(rel->rd_att,
                                                         &TTSOpsMinimalTuple);
                    }
+
                    if (trigdesc == NULL)   /* should not happen */
                        elog(ERROR, "relation %u has no triggers",
                             evtshared->ats_relid);
@@ -4562,7 +4483,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                 * still set, so recursive examinations of the event list
                 * won't try to re-fire it.
                 */
-               AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
+               AfterTriggerExecute(estate, event, rel, trigdesc, finfo, instr,
                                    per_tuple_context, slot1, slot2);
 
                /*
@@ -4606,6 +4527,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
    if (local_estate)
    {
        ExecCleanUpTriggerState(estate);
+       ExecResetTupleTable(estate->es_tupleTable, false);
        FreeExecutorState(estate);
    }
 
@@ -5743,7 +5665,7 @@ AfterTriggerPendingOnRel(Oid relid)
 static void
 AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                      int event, bool row_trigger,
-                     HeapTuple oldtup, HeapTuple newtup,
+                     TupleTableSlot *oldslot, TupleTableSlot *newslot,
                      List *recheckIndexes, Bitmapset *modifiedCols,
                      TransitionCaptureState *transition_capture)
 {
@@ -5792,11 +5714,11 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
         * deleted.
         */
        Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table &&
-                oldtup == NULL));
+                TupIsNull(oldslot)));
        Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table &&
-                newtup == NULL));
+                TupIsNull(newslot)));
 
-       if (oldtup != NULL &&
+       if (!TupIsNull(oldslot) &&
            ((event == TRIGGER_EVENT_DELETE && delete_old_table) ||
             (event == TRIGGER_EVENT_UPDATE && update_old_table)))
        {
@@ -5806,15 +5728,17 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
 
            if (map != NULL)
            {
-               HeapTuple   converted = execute_attr_map_tuple(oldtup, map);
-
+               HeapTuple   converted;
+               // PBORKED
+               converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(oldslot, true, NULL),
+                                                  map);
                tuplestore_puttuple(old_tuplestore, converted);
                pfree(converted);
            }
            else
-               tuplestore_puttuple(old_tuplestore, oldtup);
+               tuplestore_puttupleslot(old_tuplestore, oldslot);
        }
-       if (newtup != NULL &&
+       if (!TupIsNull(newslot) &&
            ((event == TRIGGER_EVENT_INSERT && insert_new_table) ||
             (event == TRIGGER_EVENT_UPDATE && update_new_table)))
        {
@@ -5826,13 +5750,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                tuplestore_puttuple(new_tuplestore, original_insert_tuple);
            else if (map != NULL)
            {
-               HeapTuple   converted = execute_attr_map_tuple(newtup, map);
+               HeapTuple   converted;
 
+               converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(newslot, true, NULL),
+                                                  map);
                tuplestore_puttuple(new_tuplestore, converted);
                pfree(converted);
            }
            else
-               tuplestore_puttuple(new_tuplestore, newtup);
+               tuplestore_puttupleslot(new_tuplestore, newslot);
        }
 
        /*
@@ -5846,7 +5772,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
            (event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) ||
            (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) ||
            (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) ||
-           (event == TRIGGER_EVENT_UPDATE && ((oldtup == NULL) ^ (newtup == NULL))))
+           (event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot))))
            return;
    }
 
@@ -5868,15 +5794,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
            tgtype_event = TRIGGER_TYPE_INSERT;
            if (row_trigger)
            {
-               Assert(oldtup == NULL);
-               Assert(newtup != NULL);
-               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1));
+               Assert(oldslot == NULL);
+               Assert(newslot != NULL);
+               ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
                ItemPointerSetInvalid(&(new_event.ate_ctid2));
            }
            else
            {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                ItemPointerSetInvalid(&(new_event.ate_ctid1));
                ItemPointerSetInvalid(&(new_event.ate_ctid2));
                cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5887,15 +5813,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
            tgtype_event = TRIGGER_TYPE_DELETE;
            if (row_trigger)
            {
-               Assert(oldtup != NULL);
-               Assert(newtup == NULL);
-               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
+               Assert(oldslot != NULL);
+               Assert(newslot == NULL);
+               ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
                ItemPointerSetInvalid(&(new_event.ate_ctid2));
            }
            else
            {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                ItemPointerSetInvalid(&(new_event.ate_ctid1));
                ItemPointerSetInvalid(&(new_event.ate_ctid2));
                cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5906,15 +5832,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
            tgtype_event = TRIGGER_TYPE_UPDATE;
            if (row_trigger)
            {
-               Assert(oldtup != NULL);
-               Assert(newtup != NULL);
-               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
-               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2));
+               Assert(oldslot != NULL);
+               Assert(newslot != NULL);
+               ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+               ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
            }
            else
            {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                ItemPointerSetInvalid(&(new_event.ate_ctid1));
                ItemPointerSetInvalid(&(new_event.ate_ctid2));
                cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5923,8 +5849,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
            break;
        case TRIGGER_EVENT_TRUNCATE:
            tgtype_event = TRIGGER_TYPE_TRUNCATE;
-           Assert(oldtup == NULL);
-           Assert(newtup == NULL);
+           Assert(oldslot == NULL);
+           Assert(newslot == NULL);
            ItemPointerSetInvalid(&(new_event.ate_ctid1));
            ItemPointerSetInvalid(&(new_event.ate_ctid2));
            break;
@@ -5951,7 +5877,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                                  tgtype_event))
            continue;
        if (!TriggerEnabled(estate, relinfo, trigger, event,
-                           modifiedCols, oldtup, newtup))
+                           modifiedCols, oldslot, newslot))
            continue;
 
        if (relkind == RELKIND_FOREIGN_TABLE && row_trigger)
@@ -5978,7 +5904,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                case RI_TRIGGER_PK:
                    /* Update or delete on trigger's PK table */
                    if (!RI_FKey_pk_upd_check_required(trigger, rel,
-                                                      oldtup, newtup))
+                                                      oldslot, newslot))
                    {
                        /* skip queuing this event */
                        continue;
@@ -5988,7 +5914,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                case RI_TRIGGER_FK:
                    /* Update on trigger's FK table */
                    if (!RI_FKey_fk_upd_check_required(trigger, rel,
-                                                      oldtup, newtup))
+                                                      oldslot, newslot))
                    {
                        /* skip queuing this event */
                        continue;
@@ -6042,10 +5968,10 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
     */
    if (fdw_tuplestore)
    {
-       if (oldtup != NULL)
-           tuplestore_puttuple(fdw_tuplestore, oldtup);
-       if (newtup != NULL)
-           tuplestore_puttuple(fdw_tuplestore, newtup);
+       if (oldslot != NULL)
+           tuplestore_puttupleslot(fdw_tuplestore, oldslot);
+       if (newslot != NULL)
+           tuplestore_puttupleslot(fdw_tuplestore, newslot);
    }
 }
 
index 1ffc8231d467a7df86b223c1a6c02ffc439ad4c0..24e8f0c279c55fa194e8c6dec38baf138ef96090 100644 (file)
@@ -32,6 +32,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "catalog/binary_upgrade.h"
 #include "catalog/catalog.h"
@@ -2358,14 +2359,16 @@ AlterDomainNotNull(List *names, bool notNull)
            RelToCheck *rtc = (RelToCheck *) lfirst(rt);
            Relation    testrel = rtc->rel;
            TupleDesc   tupdesc = RelationGetDescr(testrel);
-           HeapScanDesc scan;
-           HeapTuple   tuple;
+           TableScanDesc scan;
+           TupleTableSlot *slot;
            Snapshot    snapshot;
 
            /* Scan all tuples in this relation */
            snapshot = RegisterSnapshot(GetLatestSnapshot());
-           scan = heap_beginscan(testrel, snapshot, 0, NULL);
-           while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+           scan = table_beginscan(testrel, snapshot, 0, NULL);
+           slot = table_gimmegimmeslot(testrel, NULL);
+
+           while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
            {
                int         i;
 
@@ -2375,7 +2378,7 @@ AlterDomainNotNull(List *names, bool notNull)
                    int         attnum = rtc->atts[i];
                    Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
 
-                   if (heap_attisnull(tuple, attnum, tupdesc))
+                   if (slot_attisnull(slot, attnum))
                    {
                        /*
                         * In principle the auxiliary information for this
@@ -2394,7 +2397,9 @@ AlterDomainNotNull(List *names, bool notNull)
                    }
                }
            }
-           heap_endscan(scan);
+
+           ExecDropSingleTupleTableSlot(slot);
+           table_endscan(scan);
            UnregisterSnapshot(snapshot);
 
            /* Close each rel after processing, but keep lock */
@@ -2757,14 +2762,16 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
        RelToCheck *rtc = (RelToCheck *) lfirst(rt);
        Relation    testrel = rtc->rel;
        TupleDesc   tupdesc = RelationGetDescr(testrel);
-       HeapScanDesc scan;
-       HeapTuple   tuple;
+       TableScanDesc scan;
+       TupleTableSlot *slot;
        Snapshot    snapshot;
 
        /* Scan all tuples in this relation */
        snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(testrel, snapshot, 0, NULL);
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       scan = table_beginscan(testrel, snapshot, 0, NULL);
+       slot = table_gimmegimmeslot(testrel, NULL);
+
+       while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
        {
            int         i;
 
@@ -2777,7 +2784,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
                Datum       conResult;
                Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
 
-               d = heap_getattr(tuple, attnum, tupdesc, &isNull);
+               d = slot_getattr(slot, attnum, &isNull);
 
                econtext->domainValue_datum = d;
                econtext->domainValue_isNull = isNull;
@@ -2807,7 +2814,9 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
 
            ResetExprContext(econtext);
        }
-       heap_endscan(scan);
+
+       ExecDropSingleTupleTableSlot(slot);
+       table_endscan(scan);
        UnregisterSnapshot(snapshot);
 
        /* Hold relation lock till commit (XXX bad for concurrency) */
index 15eec19418c8e5358155f3317260ff8e0ac0474b..fcae282044b2567500dba943ca17ef76db4727a7 100644 (file)
@@ -28,6 +28,7 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/namespace.h"
@@ -746,14 +747,14 @@ get_all_vacuum_rels(int options)
 {
    List       *vacrels = NIL;
    Relation    pgclass;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
 
    pgclass = heap_open(RelationRelationId, AccessShareLock);
 
-   scan = heap_beginscan_catalog(pgclass, 0, NULL);
+   scan = table_beginscan_catalog(pgclass, 0, NULL);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
        MemoryContext oldcontext;
@@ -785,7 +786,7 @@ get_all_vacuum_rels(int options)
        MemoryContextSwitchTo(oldcontext);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(pgclass, AccessShareLock);
 
    return vacrels;
@@ -1382,7 +1383,7 @@ vac_truncate_clog(TransactionId frozenXID,
 {
    TransactionId nextXID = ReadNewTransactionId();
    Relation    relation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tuple;
    Oid         oldestxid_datoid;
    Oid         minmulti_datoid;
@@ -1413,9 +1414,9 @@ vac_truncate_clog(TransactionId frozenXID,
     */
    relation = heap_open(DatabaseRelationId, AccessShareLock);
 
-   scan = heap_beginscan_catalog(relation, 0, NULL);
+   scan = table_beginscan_catalog(relation, 0, NULL);
 
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
        TransactionId datfrozenxid = dbform->datfrozenxid;
@@ -1452,7 +1453,7 @@ vac_truncate_clog(TransactionId frozenXID,
        }
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
 
    heap_close(relation, AccessShareLock);
 
@@ -1711,7 +1712,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
        cluster_rel(relid, InvalidOid, cluster_options);
    }
    else
-       heap_vacuum_rel(onerel, options, params, vac_strategy);
+       table_vacuum_rel(onerel, options, params, vac_strategy);
 
    /* Roll back any GUC changes executed by index functions */
    AtEOXact_GUC(false, save_nestlevel);
index 9e784219786ac5192e71ed75808049851c82bf57..33951c9f6dcb5e3f6127cc31d0f5fcd211661fab 100644 (file)
@@ -544,7 +544,7 @@ static bool
 IndexSupportsBackwardScan(Oid indexid)
 {
    bool        result;
-   HeapTuple   ht_idxrel;
+   HeapTuple ht_idxrel;
    Form_pg_class idxrelrec;
    IndexAmRoutine *amroutine;
 
index 39c462a4e59232ed30b86fed96d63f7098713e48..35048400cfcfa998b51c47e10628ae9792179b7f 100644 (file)
@@ -203,7 +203,7 @@ execCurrentOf(CurrentOfExpr *cexpr,
             */
            IndexScanDesc scan = ((IndexOnlyScanState *) scanstate)->ioss_ScanDesc;
 
-           *current_tid = scan->xs_ctup.t_self;
+           *current_tid = scan->xs_heaptid;
        }
        else
        {
index ec4a2506f151e43102c33644c0003b0ec7fcac08..6cac1cf99cd490e5df2c2a4c3f80b6b03ec5c5e1 100644 (file)
@@ -57,6 +57,7 @@
 #include "postgres.h"
 
 #include "access/tuptoaster.h"
+#include "access/sysattr.h"
 #include "catalog/pg_type.h"
 #include "commands/sequence.h"
 #include "executor/execExpr.h"
index 8b35bb458de3992cde113a7dee8561d72f73a84d..66d838dbcef8c11a9090fe3552c01895aa41745a 100644 (file)
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "catalog/index.h"
 #include "executor/executor.h"
@@ -269,12 +270,12 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  */
 List *
 ExecInsertIndexTuples(TupleTableSlot *slot,
-                     ItemPointer tupleid,
                      EState *estate,
                      bool noDupErr,
                      bool *specConflict,
                      List *arbiterIndexes)
 {
+   ItemPointer tupleid = &slot->tts_tid;
    List       *result = NIL;
    ResultRelInfo *resultRelInfo;
    int         i;
@@ -286,6 +287,8 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
    Datum       values[INDEX_MAX_KEYS];
    bool        isnull[INDEX_MAX_KEYS];
 
+   Assert(ItemPointerIsValid(tupleid));
+
    /*
     * Get information from the result relation info structure.
     */
@@ -650,7 +653,6 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
    Oid        *index_collations = index->rd_indcollation;
    int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
    IndexScanDesc index_scan;
-   HeapTuple   tup;
    ScanKeyData scankeys[INDEX_MAX_KEYS];
    SnapshotData DirtySnapshot;
    int         i;
@@ -706,8 +708,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
     * to this slot.  Be sure to save and restore caller's value for
     * scantuple.
     */
-   existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap),
-                                            &TTSOpsHeapTuple);
+   existing_slot = table_gimmegimmeslot(heap, NULL);
 
    econtext = GetPerTupleExprContext(estate);
    save_scantuple = econtext->ecxt_scantuple;
@@ -723,11 +724,9 @@ retry:
    index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0);
    index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0);
 
-   while ((tup = index_getnext(index_scan,
-                               ForwardScanDirection)) != NULL)
+   while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot))
    {
        TransactionId xwait;
-       ItemPointerData ctid_wait;
        XLTW_Oper   reason_wait;
        Datum       existing_values[INDEX_MAX_KEYS];
        bool        existing_isnull[INDEX_MAX_KEYS];
@@ -738,7 +737,7 @@ retry:
         * Ignore the entry for the tuple we're trying to check.
         */
        if (ItemPointerIsValid(tupleid) &&
-           ItemPointerEquals(tupleid, &tup->t_self))
+           ItemPointerEquals(tupleid, &existing_slot->tts_tid))
        {
            if (found_self)     /* should not happen */
                elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -751,7 +750,6 @@ retry:
         * Extract the index column values and isnull flags from the existing
         * tuple.
         */
-       ExecStoreHeapTuple(tup, existing_slot, false);
        FormIndexDatum(indexInfo, existing_slot, estate,
                       existing_values, existing_isnull);
 
@@ -786,7 +784,10 @@ retry:
              DirtySnapshot.speculativeToken &&
              TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
        {
-           ctid_wait = tup->t_data->t_ctid;
+           /*
+            * PBORKED? When waiting, we used to use t_ctid, rather than
+            * t_self, but I don't see a need for that?
+            */
            reason_wait = indexInfo->ii_ExclusionOps ?
                XLTW_RecheckExclusionConstr : XLTW_InsertIndex;
            index_endscan(index_scan);
@@ -794,7 +795,9 @@ retry:
                SpeculativeInsertionWait(DirtySnapshot.xmin,
                                         DirtySnapshot.speculativeToken);
            else
-               XactLockTableWait(xwait, heap, &ctid_wait, reason_wait);
+               XactLockTableWait(xwait, heap,
+                                 &existing_slot->tts_tid, reason_wait);
+
            goto retry;
        }
 
@@ -806,7 +809,9 @@ retry:
        {
            conflict = true;
            if (conflictTid)
-               *conflictTid = tup->t_self;
+           {
+               *conflictTid = existing_slot->tts_tid;
+           }
            break;
        }
 
index d83d296d82c283a1c3a92dbb76afb864ea607b01..9fe420bfa87e01d53cdd7018fbe32a30ebf02820 100644 (file)
@@ -38,6 +38,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -976,12 +977,12 @@ InitPlan(QueryDesc *queryDesc, int eflags)
     * Initialize the executor's tuple table to empty.
     */
    estate->es_tupleTable = NIL;
-   estate->es_trig_tuple_slot = NULL;
+   estate->es_trig_return_slot = NULL;
    estate->es_trig_oldtup_slot = NULL;
    estate->es_trig_newtup_slot = NULL;
 
    /* mark EvalPlanQual not active */
-   estate->es_epqTuple = NULL;
+   estate->es_epqTupleSlot = NULL;
    estate->es_epqTupleSet = NULL;
    estate->es_epqScanDone = NULL;
 
@@ -2403,6 +2404,30 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
    return aerm;
 }
 
+TupleTableSlot *
+EvalPlanQualSlot(EPQState *epqstate,
+                Relation relation, Index rti)
+{
+   TupleTableSlot **slot = &epqstate->estate->es_epqTupleSlot[rti - 1];
+
+   if (*slot == NULL)
+   {
+       MemoryContext oldcontext;
+
+       oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
+
+       if (relation)
+           *slot = table_gimmegimmeslot(relation, &epqstate->estate->es_tupleTable);
+       else
+           *slot = MakeTupleTableSlot(epqstate->origslot->tts_tupleDescriptor, &TTSOpsVirtual);
+
+       epqstate->estate->es_epqTupleSet[rti - 1] = true;
+       MemoryContextSwitchTo(oldcontext);
+   }
+
+   return *slot;
+}
+
 
 /*
  * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
@@ -2420,9 +2445,7 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
  * epqstate - state for EvalPlanQual rechecking
  * relation - table containing tuple
  * rti - rangetable index of table containing tuple
- * lockmode - requested tuple lock mode
- * *tid - t_ctid from the outdated tuple (ie, next updated version)
- * priorXmax - t_xmax from the outdated tuple
+ * tuple - tuple for processing
  *
  * *tid is also an output parameter: it's modified to hold the TID of the
  * latest version of the tuple (note this may be changed even on failure)
@@ -2435,39 +2458,25 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
  */
 TupleTableSlot *
 EvalPlanQual(EState *estate, EPQState *epqstate,
-            Relation relation, Index rti, int lockmode,
-            ItemPointer tid, TransactionId priorXmax)
+            Relation relation, Index rti, TupleTableSlot *testslot)
 {
    TupleTableSlot *slot;
-   HeapTuple   copyTuple;
 
    Assert(rti > 0);
 
-   /*
-    * Get and lock the updated version of the row; if fail, return NULL.
-    */
-   copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
-                                 tid, priorXmax);
-
-   if (copyTuple == NULL)
-       return NULL;
-
-   /*
-    * For UPDATE/DELETE we have to return tid of actual row we're executing
-    * PQ for.
-    */
-   *tid = copyTuple->t_self;
-
    /*
     * Need to run a recheck subquery.  Initialize or reinitialize EPQ state.
     */
    EvalPlanQualBegin(epqstate, estate);
 
+#if FIXME
    /*
     * Free old test tuple, if any, and store new tuple where relation's scan
     * node will see it
     */
-   EvalPlanQualSetTuple(epqstate, rti, copyTuple);
+   EvalPlanQualSetTuple(epqstate, rti, testslot);
+#endif
+   Assert(testslot == epqstate->estate->es_epqTupleSlot[rti - 1]);
 
    /*
     * Fetch any non-locked source rows
@@ -2489,272 +2498,20 @@ EvalPlanQual(EState *estate, EPQState *epqstate,
    if (!TupIsNull(slot))
        ExecMaterializeSlot(slot);
 
+#if FIXME
    /*
     * Clear out the test tuple.  This is needed in case the EPQ query is
     * re-used to test a tuple for a different relation.  (Not clear that can
     * really happen, but let's be safe.)
     */
    EvalPlanQualSetTuple(epqstate, rti, NULL);
+#else
+   ExecClearTuple(epqstate->estate->es_epqTupleSlot[rti - 1]);
+#endif
 
    return slot;
 }
 
-/*
- * Fetch a copy of the newest version of an outdated tuple
- *
- * estate - executor state data
- * relation - table containing tuple
- * lockmode - requested tuple lock mode
- * wait_policy - requested lock wait policy
- * *tid - t_ctid from the outdated tuple (ie, next updated version)
- * priorXmax - t_xmax from the outdated tuple
- *
- * Returns a palloc'd copy of the newest tuple version, or NULL if we find
- * that there is no newest version (ie, the row was deleted not updated).
- * We also return NULL if the tuple is locked and the wait policy is to skip
- * such tuples.
- *
- * If successful, we have locked the newest tuple version, so caller does not
- * need to worry about it changing anymore.
- *
- * Note: properly, lockmode should be declared as enum LockTupleMode,
- * but we use "int" to avoid having to include heapam.h in executor.h.
- */
-HeapTuple
-EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
-                 LockWaitPolicy wait_policy,
-                 ItemPointer tid, TransactionId priorXmax)
-{
-   HeapTuple   copyTuple = NULL;
-   HeapTupleData tuple;
-   SnapshotData SnapshotDirty;
-
-   /*
-    * fetch target tuple
-    *
-    * Loop here to deal with updated or busy tuples
-    */
-   InitDirtySnapshot(SnapshotDirty);
-   tuple.t_self = *tid;
-   for (;;)
-   {
-       Buffer      buffer;
-
-       if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
-       {
-           HTSU_Result test;
-           HeapUpdateFailureData hufd;
-
-           /*
-            * If xmin isn't what we're expecting, the slot must have been
-            * recycled and reused for an unrelated tuple.  This implies that
-            * the latest version of the row was deleted, so we need do
-            * nothing.  (Should be safe to examine xmin without getting
-            * buffer's content lock.  We assume reading a TransactionId to be
-            * atomic, and Xmin never changes in an existing tuple, except to
-            * invalid or frozen, and neither of those can match priorXmax.)
-            */
-           if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
-                                    priorXmax))
-           {
-               ReleaseBuffer(buffer);
-               return NULL;
-           }
-
-           /* otherwise xmin should not be dirty... */
-           if (TransactionIdIsValid(SnapshotDirty.xmin))
-               elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
-
-           /*
-            * If tuple is being updated by other transaction then we have to
-            * wait for its commit/abort, or die trying.
-            */
-           if (TransactionIdIsValid(SnapshotDirty.xmax))
-           {
-               ReleaseBuffer(buffer);
-               switch (wait_policy)
-               {
-                   case LockWaitBlock:
-                       XactLockTableWait(SnapshotDirty.xmax,
-                                         relation, &tuple.t_self,
-                                         XLTW_FetchUpdated);
-                       break;
-                   case LockWaitSkip:
-                       if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
-                           return NULL;    /* skip instead of waiting */
-                       break;
-                   case LockWaitError:
-                       if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
-                           ereport(ERROR,
-                                   (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
-                                    errmsg("could not obtain lock on row in relation \"%s\"",
-                                           RelationGetRelationName(relation))));
-                       break;
-               }
-               continue;       /* loop back to repeat heap_fetch */
-           }
-
-           /*
-            * If tuple was inserted by our own transaction, we have to check
-            * cmin against es_output_cid: cmin >= current CID means our
-            * command cannot see the tuple, so we should ignore it. Otherwise
-            * heap_lock_tuple() will throw an error, and so would any later
-            * attempt to update or delete the tuple.  (We need not check cmax
-            * because HeapTupleSatisfiesDirty will consider a tuple deleted
-            * by our transaction dead, regardless of cmax.) We just checked
-            * that priorXmax == xmin, so we can test that variable instead of
-            * doing HeapTupleHeaderGetXmin again.
-            */
-           if (TransactionIdIsCurrentTransactionId(priorXmax) &&
-               HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
-           {
-               ReleaseBuffer(buffer);
-               return NULL;
-           }
-
-           /*
-            * This is a live tuple, so now try to lock it.
-            */
-           test = heap_lock_tuple(relation, &tuple,
-                                  estate->es_output_cid,
-                                  lockmode, wait_policy,
-                                  false, &buffer, &hufd);
-           /* We now have two pins on the buffer, get rid of one */
-           ReleaseBuffer(buffer);
-
-           switch (test)
-           {
-               case HeapTupleSelfUpdated:
-
-                   /*
-                    * The target tuple was already updated or deleted by the
-                    * current command, or by a later command in the current
-                    * transaction.  We *must* ignore the tuple in the former
-                    * case, so as to avoid the "Halloween problem" of
-                    * repeated update attempts.  In the latter case it might
-                    * be sensible to fetch the updated tuple instead, but
-                    * doing so would require changing heap_update and
-                    * heap_delete to not complain about updating "invisible"
-                    * tuples, which seems pretty scary (heap_lock_tuple will
-                    * not complain, but few callers expect
-                    * HeapTupleInvisible, and we're not one of them).  So for
-                    * now, treat the tuple as deleted and do not process.
-                    */
-                   ReleaseBuffer(buffer);
-                   return NULL;
-
-               case HeapTupleMayBeUpdated:
-                   /* successfully locked */
-                   break;
-
-               case HeapTupleUpdated:
-                   ReleaseBuffer(buffer);
-                   if (IsolationUsesXactSnapshot())
-                       ereport(ERROR,
-                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                errmsg("could not serialize access due to concurrent update")));
-                   if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                       ereport(ERROR,
-                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-                   /* Should not encounter speculative tuple on recheck */
-                   Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
-                   if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
-                   {
-                       /* it was updated, so look at the updated version */
-                       tuple.t_self = hufd.ctid;
-                       /* updated row should have xmin matching this xmax */
-                       priorXmax = hufd.xmax;
-                       continue;
-                   }
-                   /* tuple was deleted, so give up */
-                   return NULL;
-
-               case HeapTupleWouldBlock:
-                   ReleaseBuffer(buffer);
-                   return NULL;
-
-               case HeapTupleInvisible:
-                   elog(ERROR, "attempted to lock invisible tuple");
-                   break;
-
-               default:
-                   ReleaseBuffer(buffer);
-                   elog(ERROR, "unrecognized heap_lock_tuple status: %u",
-                        test);
-                   return NULL;    /* keep compiler quiet */
-           }
-
-           /*
-            * We got tuple - now copy it for use by recheck query.
-            */
-           copyTuple = heap_copytuple(&tuple);
-           ReleaseBuffer(buffer);
-           break;
-       }
-
-       /*
-        * If the referenced slot was actually empty, the latest version of
-        * the row must have been deleted, so we need do nothing.
-        */
-       if (tuple.t_data == NULL)
-       {
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /*
-        * As above, if xmin isn't what we're expecting, do nothing.
-        */
-       if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
-                                priorXmax))
-       {
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /*
-        * If we get here, the tuple was found but failed SnapshotDirty.
-        * Assuming the xmin is either a committed xact or our own xact (as it
-        * certainly should be if we're trying to modify the tuple), this must
-        * mean that the row was updated or deleted by either a committed xact
-        * or our own xact.  If it was deleted, we can ignore it; if it was
-        * updated then chain up to the next version and repeat the whole
-        * process.
-        *
-        * As above, it should be safe to examine xmax and t_ctid without the
-        * buffer content lock, because they can't be changing.
-        */
-
-       /* check whether next version would be in a different partition */
-       if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
-           ereport(ERROR,
-                   (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                    errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-       /* check whether tuple has been deleted */
-       if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
-       {
-           /* deleted, so forget about it */
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /* updated, so look at the updated row */
-       tuple.t_self = tuple.t_data->t_ctid;
-       /* updated row should have xmin matching this xmax */
-       priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
-       ReleaseBuffer(buffer);
-       /* loop back to fetch next in chain */
-   }
-
-   /*
-    * Return the copied tuple
-    */
-   return copyTuple;
-}
-
 /*
  * EvalPlanQualInit -- initialize during creation of a plan state node
  * that might need to invoke EPQ processing.
@@ -2792,40 +2549,34 @@ EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
    epqstate->arowMarks = auxrowmarks;
 }
 
+#if 0
 /*
  * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
  *
  * NB: passed tuple must be palloc'd; it may get freed later
  */
 void
-EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
+EvalPlanQualSetTuple(EPQState *epqstate, Index rti, TupleTableSlot *slot)
 {
    EState     *estate = epqstate->estate;
 
    Assert(rti > 0);
 
-   /*
-    * free old test tuple, if any, and store new tuple where relation's scan
-    * node will see it
-    */
-   if (estate->es_epqTuple[rti - 1] != NULL)
-       heap_freetuple(estate->es_epqTuple[rti - 1]);
-   estate->es_epqTuple[rti - 1] = tuple;
+   if (estate->es_epqTupleSlot[rti - 1] != NULL)
+       ExecClearTuple(estate->es_epqTupleSlot[rti - 1]);
+   if (slot)
+   {
+       if (!estate->es_epqTupleSlot[rti])
+       {
+           slot = table_gimmegimmeslot(erm->relation);
+           epqstate->estate->es_epqTupleSlot[erm->rti] = slot;
+       }
+       // XXX: It'd be better if we could work around needing to copy.
+       ExecCopySlot(estate->es_epqTupleSlot[rti - 1], slot);
+   }
    estate->es_epqTupleSet[rti - 1] = true;
 }
-
-/*
- * Fetch back the current test tuple (if any) for the specified RTI
- */
-HeapTuple
-EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
-{
-   EState     *estate = epqstate->estate;
-
-   Assert(rti > 0);
-
-   return estate->es_epqTuple[rti - 1];
-}
+#endif
 
 /*
  * Fetch the current row values for any non-locked relations that need
@@ -2845,13 +2596,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
        ExecRowMark *erm = aerm->rowmark;
        Datum       datum;
        bool        isNull;
-       HeapTupleData tuple;
+       TupleTableSlot *slot;
 
        if (RowMarkRequiresRowShareLock(erm->markType))
            elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
 
        /* clear any leftover test tuple for this rel */
-       EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
+       slot = EvalPlanQualSlot(epqstate, erm->relation, erm->rti);
+       ExecClearTuple(slot);
 
        /* if child rel, must check whether it produced this row */
        if (erm->rti != erm->prti)
@@ -2876,8 +2628,6 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
 
        if (erm->markType == ROW_MARK_REFERENCE)
        {
-           HeapTuple   copyTuple;
-
            Assert(erm->relation != NULL);
 
            /* fetch the tuple's ctid */
@@ -2895,17 +2645,20 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
                bool        updated = false;
 
                fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+
                /* this should have been checked already, but let's be safe */
                if (fdwroutine->RefetchForeignRow == NULL)
                    ereport(ERROR,
                            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                             errmsg("cannot lock rows in foreign table \"%s\"",
                                    RelationGetRelationName(erm->relation))));
-               copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
-                                                         erm,
-                                                         datum,
-                                                         &updated);
-               if (copyTuple == NULL)
+
+               slot = fdwroutine->RefetchForeignRow(epqstate->estate,
+                                                    erm,
+                                                    datum,
+                                                    slot,
+                                                    &updated);
+               if (slot == NULL)
                    elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
 
                /*
@@ -2917,25 +2670,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
            else
            {
                /* ordinary table, fetch the tuple */
-               Buffer      buffer;
 
-               tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
-               if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-                               false, NULL))
+               if (!table_fetch_row_version(erm->relation, (ItemPointer) DatumGetPointer(datum),
+                                            SnapshotAny, slot, NULL))
                    elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
-
-               /* successful, copy tuple */
-               copyTuple = heap_copytuple(&tuple);
-               ReleaseBuffer(buffer);
            }
-
-           /* store tuple */
-           EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
        }
        else
        {
-           HeapTupleHeader td;
-
            Assert(erm->markType == ROW_MARK_COPY);
 
            /* fetch the whole-row Var for the relation */
@@ -2945,19 +2687,8 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
            /* non-locked rels could be on the inside of outer joins */
            if (isNull)
                continue;
-           td = DatumGetHeapTupleHeader(datum);
-
-           /* build a temporary HeapTuple control structure */
-           tuple.t_len = HeapTupleHeaderGetDatumLength(td);
-           tuple.t_data = td;
-           /* relation might be a foreign table, if so provide tableoid */
-           tuple.t_tableOid = erm->relid;
-           /* also copy t_ctid in case there's valid data there */
-           tuple.t_self = td->t_ctid;
-
-           /* copy and store tuple */
-           EvalPlanQualSetTuple(epqstate, erm->rti,
-                                heap_copytuple(&tuple));
+
+           ExecForceStoreHeapTupleDatum(datum, slot);
        }
    }
 }
@@ -3153,15 +2884,15 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
     * sub-rechecks to inherit the values being examined by an outer recheck.
     */
    estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
-   if (parentestate->es_epqTuple != NULL)
+   if (parentestate->es_epqTupleSlot != NULL)
    {
-       estate->es_epqTuple = parentestate->es_epqTuple;
+       estate->es_epqTupleSlot = parentestate->es_epqTupleSlot;
        estate->es_epqTupleSet = parentestate->es_epqTupleSet;
    }
    else
    {
-       estate->es_epqTuple = (HeapTuple *)
-           palloc0(rtsize * sizeof(HeapTuple));
+       estate->es_epqTupleSlot = (TupleTableSlot **)
+           palloc0(rtsize * sizeof(TupleTableSlot *));
        estate->es_epqTupleSet = (bool *)
            palloc0(rtsize * sizeof(bool));
    }
index 31f7288b46071fdeb8424d46472f65ec19d4ddd2..f0a6318a255c7b4836222ccc6dbe908a6beb5b87 100644 (file)
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "access/tableam.h"
 #include "catalog/partition.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_type.h"
@@ -899,8 +900,7 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
         * end of the command.
         */
        partrouteinfo->pi_PartitionTupleSlot =
-           ExecInitExtraTupleSlot(estate, RelationGetDescr(partrel),
-                                  &TTSOpsHeapTuple);
+           table_gimmegimmeslot(partrel, &estate->es_tupleTable);
    }
    else
        partrouteinfo->pi_PartitionTupleSlot = NULL;
index 5bd3bbc35e96ba44552504b9364b27f9f300c22f..553159b08e2a3bd9cc57cbc801ae1dc32309195b 100644 (file)
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "commands/trigger.h"
@@ -117,7 +118,6 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
                             TupleTableSlot *searchslot,
                             TupleTableSlot *outslot)
 {
-   HeapTuple   scantuple;
    ScanKeyData skey[INDEX_MAX_KEYS];
    IndexScanDesc scan;
    SnapshotData snap;
@@ -143,10 +143,9 @@ retry:
    index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0);
 
    /* Try to find the tuple */
-   if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
+   if (index_getnext_slot(scan, ForwardScanDirection, outslot))
    {
        found = true;
-       ExecStoreHeapTuple(scantuple, outslot, false);
        ExecMaterializeSlot(outslot);
 
        xwait = TransactionIdIsValid(snap.xmin) ?
@@ -166,25 +165,18 @@ retry:
    /* Found tuple, try to lock it in the lockmode. */
    if (found)
    {
-       Buffer      buf;
        HeapUpdateFailureData hufd;
        HTSU_Result res;
-       HeapTupleData locktup;
-       HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot;
-
-       /* Only a heap tuple has item pointers. */
-       Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot));
-       ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self);
 
        PushActiveSnapshot(GetLatestSnapshot());
 
-       res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
-                             lockmode,
-                             LockWaitBlock,
-                             false /* don't follow updates */ ,
-                             &buf, &hufd);
-       /* the tuple slot already has the buffer pinned */
-       ReleaseBuffer(buf);
+       res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+                                outslot,
+                                GetCurrentCommandId(false),
+                                lockmode,
+                                LockWaitBlock,
+                                0 /* don't follow updates */ ,
+                                &hufd);
 
        PopActiveSnapshot();
 
@@ -203,6 +195,12 @@ retry:
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                             errmsg("concurrent update, retrying")));
                goto retry;
+           case HeapTupleDeleted:
+               /* XXX: Improve handling here */
+               ereport(LOG,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("concurrent delete, retrying")));
+               goto retry;
            case HeapTupleInvisible:
                elog(ERROR, "attempted to lock invisible tuple");
                break;
@@ -220,59 +218,6 @@ retry:
    return found;
 }
 
-/*
- * Compare the tuple and slot and check if they have equal values.
- *
- * We use binary datum comparison which might return false negatives but
- * that's the best we can do here as there may be multiple notions of
- * equality for the data types and table columns don't specify which one
- * to use.
- */
-static bool
-tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot)
-{
-   Datum       values[MaxTupleAttributeNumber];
-   bool        isnull[MaxTupleAttributeNumber];
-   int         attrnum;
-
-   heap_deform_tuple(tup, desc, values, isnull);
-
-   /* Check equality of the attributes. */
-   for (attrnum = 0; attrnum < desc->natts; attrnum++)
-   {
-       Form_pg_attribute att;
-       TypeCacheEntry *typentry;
-
-       /*
-        * If one value is NULL and other is not, then they are certainly not
-        * equal
-        */
-       if (isnull[attrnum] != slot->tts_isnull[attrnum])
-           return false;
-
-       /*
-        * If both are NULL, they can be considered equal.
-        */
-       if (isnull[attrnum])
-           continue;
-
-       att = TupleDescAttr(desc, attrnum);
-
-       typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO);
-       if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
-           ereport(ERROR,
-                   (errcode(ERRCODE_UNDEFINED_FUNCTION),
-                    errmsg("could not identify an equality operator for type %s",
-                           format_type_be(att->atttypid))));
-
-       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
-                                       values[attrnum],
-                                       slot->tts_values[attrnum])))
-           return false;
-   }
-
-   return true;
-}
 
 /*
  * Search the relation 'rel' for tuple using the sequential scan.
@@ -288,33 +233,34 @@ bool
 RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
                         TupleTableSlot *searchslot, TupleTableSlot *outslot)
 {
-   HeapTuple   scantuple;
-   HeapScanDesc scan;
+   TupleTableSlot *scanslot;
+   TableScanDesc scan;
    SnapshotData snap;
    TransactionId xwait;
    bool        found;
-   TupleDesc   desc = RelationGetDescr(rel);
+   TupleDesc   desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel);
 
    Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor));
 
    /* Start a heap scan. */
    InitDirtySnapshot(snap);
-   scan = heap_beginscan(rel, &snap, 0, NULL);
+   scan = table_beginscan(rel, &snap, 0, NULL);
+
+   scanslot = table_gimmegimmeslot(rel, NULL);
 
 retry:
    found = false;
 
-   heap_rescan(scan, NULL);
+   table_rescan(scan, NULL);
 
    /* Try to find the tuple */
-   while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot))
    {
-       if (!tuple_equals_slot(desc, scantuple, searchslot))
+       if (!ExecSlotCompare(scanslot, searchslot))
            continue;
 
        found = true;
-       ExecStoreHeapTuple(scantuple, outslot, false);
-       ExecMaterializeSlot(outslot);
+       ExecCopySlot(outslot, scanslot);
 
        xwait = TransactionIdIsValid(snap.xmin) ?
            snap.xmin : snap.xmax;
@@ -333,25 +279,18 @@ retry:
    /* Found tuple, try to lock it in the lockmode. */
    if (found)
    {
-       Buffer      buf;
        HeapUpdateFailureData hufd;
        HTSU_Result res;
-       HeapTupleData locktup;
-       HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot;
-
-       /* Only a heap tuple has item pointers. */
-       Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot));
-       ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self);
 
        PushActiveSnapshot(GetLatestSnapshot());
 
-       res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
-                             lockmode,
-                             LockWaitBlock,
-                             false /* don't follow updates */ ,
-                             &buf, &hufd);
-       /* the tuple slot already has the buffer pinned */
-       ReleaseBuffer(buf);
+       res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+                              outslot,
+                              GetCurrentCommandId(false),
+                              lockmode,
+                              LockWaitBlock,
+                              0 /* don't follow updates */ ,
+                              &hufd);
 
        PopActiveSnapshot();
 
@@ -370,6 +309,12 @@ retry:
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                             errmsg("concurrent update, retrying")));
                goto retry;
+           case HeapTupleDeleted:
+               /* XXX: Improve handling here */
+               ereport(LOG,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("concurrent delete, retrying")));
+               goto retry;
            case HeapTupleInvisible:
                elog(ERROR, "attempted to lock invisible tuple");
                break;
@@ -379,7 +324,8 @@ retry:
        }
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
+   ExecDropSingleTupleTableSlot(scanslot);
 
    return found;
 }
@@ -394,7 +340,6 @@ void
 ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
 {
    bool        skip_tuple = false;
-   HeapTuple   tuple;
    ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
    Relation    rel = resultRelInfo->ri_RelationDesc;
 
@@ -407,10 +352,8 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_insert_before_row)
    {
-       slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           skip_tuple = true;
+       if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
+           skip_tuple = true;      /* "do nothing" */
    }
 
    if (!skip_tuple)
@@ -423,19 +366,15 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
        if (resultRelInfo->ri_PartitionCheck)
            ExecPartitionCheck(resultRelInfo, slot, estate, true);
 
-       /* Materialize slot into a tuple that we can scribble upon. */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
-       /* OK, store the tuple and create index entries for it */
-       simple_heap_insert(rel, tuple);
+       table_insert(resultRelInfo->ri_RelationDesc, slot,
+                      GetCurrentCommandId(true), 0, NULL);
 
        if (resultRelInfo->ri_NumIndices > 0)
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL,
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                   NIL);
 
        /* AFTER ROW INSERT Triggers */
-       ExecARInsertTriggers(estate, resultRelInfo, tuple,
+       ExecARInsertTriggers(estate, resultRelInfo, slot,
                             recheckIndexes, NULL);
 
        /*
@@ -459,15 +398,9 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
                         TupleTableSlot *searchslot, TupleTableSlot *slot)
 {
    bool        skip_tuple = false;
-   HeapTuple   tuple;
    ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
    Relation    rel = resultRelInfo->ri_RelationDesc;
-   HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot;
-   HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)slot;
-
-   /* We expect both searchslot and the slot to contain a heap tuple. */
-   Assert(TTS_IS_HEAPTUPLE(searchslot) || TTS_IS_BUFFERTUPLE(searchslot));
-   Assert(TTS_IS_HEAPTUPLE(slot) || TTS_IS_BUFFERTUPLE(slot));
+   ItemPointer tid = &(searchslot->tts_tid);
 
    /* For now we support only tables. */
    Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -478,16 +411,18 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_update_before_row)
    {
-       slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-                                   &hsearchslot->tuple->t_self, NULL, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           skip_tuple = true;
+       if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+                                   tid,
+                                 NULL, slot))
+           skip_tuple = true;      /* "do nothing" */
    }
 
    if (!skip_tuple)
    {
        List       *recheckIndexes = NIL;
+       HeapUpdateFailureData hufd;
+       LockTupleMode lockmode;
+       bool update_indexes;
 
        /* Check the constraints of the tuple */
        if (rel->rd_att->constr)
@@ -495,22 +430,22 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
        if (resultRelInfo->ri_PartitionCheck)
            ExecPartitionCheck(resultRelInfo, slot, estate, true);
 
-       /* Materialize slot into a tuple that we can scribble upon. */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       table_update(rel, tid, slot, GetCurrentCommandId(true), estate->es_snapshot,
+                    InvalidSnapshot, true, &hufd, &lockmode, &update_indexes);
 
-       /* OK, update the tuple and index entries for it */
-       simple_heap_update(rel, &hsearchslot->tuple->t_self, hslot->tuple);
+       /*
+        * FIXME: move from simple_heap_update to table_update removes
+        * concurrency handling
+        */
 
-       if (resultRelInfo->ri_NumIndices > 0 &&
-           !HeapTupleIsHeapOnly(hslot->tuple))
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL,
+       if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                   NIL);
 
        /* AFTER ROW UPDATE Triggers */
        ExecARUpdateTriggers(estate, resultRelInfo,
-                            &hsearchslot->tuple->t_self, NULL, tuple,
-                            recheckIndexes, NULL);
+                            tid,
+                            NULL, slot, recheckIndexes, NULL);
 
        list_free(recheckIndexes);
    }
@@ -529,7 +464,7 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
    bool        skip_tuple = false;
    ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
    Relation    rel = resultRelInfo->ri_RelationDesc;
-   HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot;
+   ItemPointer tid = &(searchslot->tts_tid);
 
    /* For now we support only tables and heap tuples. */
    Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -542,20 +477,24 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
        resultRelInfo->ri_TrigDesc->trig_delete_before_row)
    {
        skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-                                          &hsearchslot->tuple->t_self, NULL,
-                                          NULL);
+                                          tid, NULL, NULL);
+
    }
 
    if (!skip_tuple)
    {
        List       *recheckIndexes = NIL;
+       HeapUpdateFailureData hufd;
 
        /* OK, delete the tuple */
-       simple_heap_delete(rel, &hsearchslot->tuple->t_self);
+       /* FIXME: needs checks for return  codes */
+       table_delete(rel, tid, GetCurrentCommandId(true),
+                    estate->es_snapshot, InvalidSnapshot,
+                    true,  &hufd, false);
 
        /* AFTER ROW DELETE Triggers */
        ExecARDeleteTriggers(estate, resultRelInfo,
-                            &hsearchslot->tuple->t_self, NULL, NULL);
+                            tid, NULL, NULL);
 
        list_free(recheckIndexes);
    }
index d90bb16b570d14efeb8a53e17e7c455a3e851d53..a3349099b6ee5cec2f618f9531d9319ec419161b 100644 (file)
@@ -40,7 +40,7 @@ ExecScanFetch(ScanState *node,
 
    CHECK_FOR_INTERRUPTS();
 
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
    {
        /*
         * We are inside an EvalPlanQual recheck.  Return the test tuple if
@@ -73,17 +73,15 @@ ExecScanFetch(ScanState *node,
            /* Else mark to remember that we shouldn't return more */
            estate->es_epqScanDone[scanrelid - 1] = true;
 
+           slot = estate->es_epqTupleSlot[scanrelid - 1];
+
            /* Return empty slot if we haven't got a test tuple */
-           if (estate->es_epqTuple[scanrelid - 1] == NULL)
+           if (TupIsNull(slot))
                return ExecClearTuple(slot);
 
-           /* Store test tuple in the plan node's scan slot */
-           ExecForceStoreHeapTuple(estate->es_epqTuple[scanrelid - 1],
-                                   slot);
-
            /* Check if it meets the access-method conditions */
            if (!(*recheckMtd) (node, slot))
-               ExecClearTuple(slot);   /* would not be returned by scan */
+               return ExecClearTuple(slot);    /* would not be returned by scan */
 
            return slot;
        }
index 472a5f39cfc682aa5cad0f6d83317856f091db9d..d91a71a7c192a1b8d374f618208e6d96c8babb46 100644 (file)
@@ -453,6 +453,7 @@ tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree)
    hslot->tuple = tuple;
    hslot->off = 0;
    slot->tts_flags &= ~TTS_FLAG_EMPTY;
+   slot->tts_tid = tuple->t_self;
 
    if (shouldFree)
        slot->tts_flags |= TTS_FLAG_SHOULDFREE;
@@ -717,20 +718,31 @@ tts_buffer_heap_materialize(TupleTableSlot *slot)
     * associated with it, unless it's materialized (which would've returned
     * above).
     */
+   // PBORKED: restore
+#if 0
    Assert(bslot->base.tuple);
+#endif
 
    oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
-   bslot->base.tuple = heap_copytuple(bslot->base.tuple);
+#if 1
+   if (!bslot->base.tuple)
+   {
+       bslot->base.tuple = heap_form_tuple(slot->tts_tupleDescriptor,
+                                           slot->tts_values,
+                                           slot->tts_isnull);
+   }
+#endif
+   else
+   {
+       bslot->base.tuple = heap_copytuple(bslot->base.tuple);
+   }
    MemoryContextSwitchTo(oldContext);
 
-   /*
-    * A heap tuple stored in a BufferHeapTupleTableSlot should have a buffer
-    * associated with it, unless it's materialized.
-    */
-   Assert(BufferIsValid(bslot->buffer));
-   if (likely(BufferIsValid(bslot->buffer)))
+   if (BufferIsValid(bslot->buffer))
+   {
        ReleaseBuffer(bslot->buffer);
-   bslot->buffer = InvalidBuffer;
+       bslot->buffer = InvalidBuffer;
+   }
 
    /*
     * Have to deform from scratch, otherwise tts_values[] entries could point
@@ -764,6 +776,10 @@ tts_buffer_heap_copyslot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)
    }
    else
    {
+       // PBORKED: shouldn't be required
+       if (!bsrcslot->base.tuple)
+           tts_buffer_heap_materialize(srcslot);
+
        tts_buffer_heap_store_tuple(dstslot, bsrcslot->base.tuple, bsrcslot->buffer);
        /*
         * Need to materialize because the HeapTupleData portion of the tuple
@@ -858,6 +874,7 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer
    slot->tts_nvalid = 0;
    bslot->base.tuple = tuple;
    bslot->base.off = 0;
+   slot->tts_tid = tuple->t_self;
 
    /*
     * If tuple is on a disk page, keep the page pinned as long as we hold a
@@ -873,7 +890,9 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer
        if (BufferIsValid(bslot->buffer))
            ReleaseBuffer(bslot->buffer);
        bslot->buffer = buffer;
-       IncrBufferRefCount(buffer);
+       // PBORKED: Should always be valid
+       if (BufferIsValid(buffer))
+           IncrBufferRefCount(buffer);
    }
 }
 
@@ -1211,6 +1230,56 @@ MakeSingleTupleTableSlot(TupleDesc tupdesc,
    return slot;
 }
 
+// FIXME this definitely does not belong here.
+/* --------------------------------
+ *     ExecSlotCompare
+ *
+ *     This is a slot comparision function to find out
+ *     whether both the slots are same or not?
+ * --------------------------------
+ */
+bool
+ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2)
+{
+   int         attrnum;
+
+   Assert(slot1->tts_tupleDescriptor->natts == slot2->tts_tupleDescriptor->natts);
+
+   slot_getallattrs(slot1);
+   slot_getallattrs(slot2);
+
+   /* Check equality of the attributes. */
+   for (attrnum = 0; attrnum < slot1->tts_tupleDescriptor->natts; attrnum++)
+   {
+       Form_pg_attribute att;
+       TypeCacheEntry *typentry;
+
+       /*
+        * If one value is NULL and other is not, then they are certainly not
+        * equal
+        */
+       if (slot1->tts_isnull[attrnum] != slot2->tts_isnull[attrnum])
+           return false;
+
+       att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum);
+
+       typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO);
+       if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_FUNCTION),
+                    errmsg("could not identify an equality operator for type %s",
+                           format_type_be(att->atttypid))));
+
+       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+                                       slot1->tts_values[attrnum],
+                                       slot2->tts_values[attrnum])))
+           return false;
+   }
+
+   return true;
+}
+
+
 /* --------------------------------
  *     ExecDropSingleTupleTableSlot
  *
@@ -1328,9 +1397,15 @@ ExecStoreHeapTuple(HeapTuple tuple,
    Assert(slot != NULL);
    Assert(slot->tts_tupleDescriptor != NULL);
 
-   if (unlikely(!TTS_IS_HEAPTUPLE(slot)))
+   // PBORKED: should onlyneed heaptuples here.
+   if (TTS_IS_BUFFERTUPLE(slot))
+       tts_buffer_heap_store_tuple(slot, tuple, InvalidBuffer);
+   else if (TTS_IS_HEAPTUPLE(slot))
+       tts_heap_store_tuple(slot, tuple, shouldFree);
+   else
        elog(ERROR, "trying to store a heap tuple into wrong type of slot");
-   tts_heap_store_tuple(slot, tuple, shouldFree);
+
+   slot->tts_tableOid = tuple->t_tableOid;
 
    return slot;
 }
@@ -1371,6 +1446,8 @@ ExecStoreBufferHeapTuple(HeapTuple tuple,
        elog(ERROR, "trying to store an on-disk heap tuple into wrong type of slot");
    tts_buffer_heap_store_tuple(slot, tuple, buffer);
 
+   slot->tts_tableOid = tuple->t_tableOid;
+
    return slot;
 }
 
index 2a47abc02effbcf1eb5bd9ff5e8e68a2d9f0c512..4031642b8097cad51d15b8323ad6b73c893d32e5 100644 (file)
@@ -47,6 +47,7 @@
 
 #include "access/parallel.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "executor/executor.h"
 #include "jit/jit.h"
@@ -130,7 +131,7 @@ CreateExecutorState(void)
    estate->es_tuple_routing_result_relations = NIL;
 
    estate->es_trig_target_relations = NIL;
-   estate->es_trig_tuple_slot = NULL;
+   estate->es_trig_return_slot = NULL;
    estate->es_trig_oldtup_slot = NULL;
    estate->es_trig_newtup_slot = NULL;
 
@@ -157,7 +158,7 @@ CreateExecutorState(void)
 
    estate->es_per_tuple_exprcontext = NULL;
 
-   estate->es_epqTuple = NULL;
+   estate->es_epqTupleSlot = NULL;
    estate->es_epqTupleSet = NULL;
    estate->es_epqScanDone = NULL;
    estate->es_sourceText = NULL;
@@ -419,6 +420,63 @@ MakePerTupleExprContext(EState *estate)
    return estate->es_per_tuple_exprcontext;
 }
 
+TupleTableSlot *
+ExecTriggerGetOldSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_oldtup_slot == NULL)
+       estate->es_trig_oldtup_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_oldtup_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_oldtup_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_oldtup_slot;
+}
+
+TupleTableSlot *
+ExecTriggerGetNewSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_newtup_slot == NULL)
+       estate->es_trig_newtup_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_newtup_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_newtup_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_newtup_slot;
+}
+
+TupleTableSlot *
+ExecTriggerGetReturnSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_return_slot == NULL)
+       estate->es_trig_return_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_return_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_return_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_return_slot;
+}
+
 
 /* ----------------------------------------------------------------
  *              miscellaneous node-init support functions
index 00d02fd50f043be89308df6b475cf0b94973f9d9..8c0625eb2e8e690bf33ac3de88071c55f1bbf1b2 100644 (file)
@@ -38,6 +38,7 @@
 #include <math.h>
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/visibilitymap.h"
 #include "executor/execdebug.h"
 
 
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
-static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
 static inline void BitmapDoneInitializingSharedState(
                                  ParallelBitmapHeapState *pstate);
 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
                             TBMIterateResult *tbmres);
 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
 static inline void BitmapPrefetch(BitmapHeapScanState *node,
-              HeapScanDesc scan);
+              TableScanDesc scan);
 static bool BitmapShouldInitializeSharedState(
                                  ParallelBitmapHeapState *pstate);
 
@@ -76,12 +76,12 @@ static TupleTableSlot *
 BitmapHeapNext(BitmapHeapScanState *node)
 {
    ExprContext *econtext;
-   HeapScanDesc scan;
+   TableScanDesc scan;
+
    TIDBitmap  *tbm;
    TBMIterator *tbmiterator = NULL;
    TBMSharedIterator *shared_tbmiterator = NULL;
    TBMIterateResult *tbmres;
-   OffsetNumber targoffset;
    TupleTableSlot *slot;
    ParallelBitmapHeapState *pstate = node->pstate;
    dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
@@ -191,16 +191,27 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
    for (;;)
    {
-       Page        dp;
-       ItemId      lp;
-
        CHECK_FOR_INTERRUPTS();
 
-       /*
-        * Get next page of results if needed
-        */
-       if (tbmres == NULL)
+       if (node->return_empty_tuples > 0)
+       {
+           ExecStoreAllNullTuple(slot);
+           node->return_empty_tuples--;
+       }
+       else if (tbmres)
+       {
+           if (!table_scan_bitmap_pagescan_next(scan, slot))
+           {
+               node->tbmres = tbmres = NULL;
+               continue;
+           }
+       }
+       else
        {
+           /*
+            * Get next page of results if needed
+            */
+
            if (!pstate)
                node->tbmres = tbmres = tbm_iterate(tbmiterator);
            else
@@ -213,18 +224,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
            BitmapAdjustPrefetchIterator(node, tbmres);
 
-           /*
-            * Ignore any claimed entries past what we think is the end of the
-            * relation.  (This is probably not necessary given that we got at
-            * least AccessShareLock on the table before performing any of the
-            * indexscans, but let's be safe.)
-            */
-           if (tbmres->blockno >= scan->rs_nblocks)
-           {
-               node->tbmres = tbmres = NULL;
-               continue;
-           }
-
            /*
             * We can skip fetching the heap page if we don't need any fields
             * from the heap, and the bitmap entries don't need rechecking,
@@ -240,16 +239,21 @@ BitmapHeapNext(BitmapHeapScanState *node)
            {
                /*
                 * The number of tuples on this page is put into
-                * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
+                * node->return_empty_tuples; note we don't fill
+                * scan->rs_vistuples.
                 */
-               scan->rs_ntuples = tbmres->ntuples;
+               node->return_empty_tuples = tbmres->ntuples;
            }
            else
            {
                /*
                 * Fetch the current heap page and identify candidate tuples.
                 */
-               bitgetpage(scan, tbmres);
+               if (!table_scan_bitmap_pagescan(scan, tbmres))
+               {
+                   /* AM doesn't think this block is valid, skip */
+                   continue;
+               }
            }
 
            if (tbmres->ntuples >= 0)
@@ -257,51 +261,37 @@ BitmapHeapNext(BitmapHeapScanState *node)
            else
                node->lossy_pages++;
 
-           /*
-            * Set rs_cindex to first slot to examine
-            */
-           scan->rs_cindex = 0;
-
            /* Adjust the prefetch target */
            BitmapAdjustPrefetchTarget(node);
-       }
-       else
-       {
-           /*
-            * Continuing in previously obtained page; advance rs_cindex
-            */
-           scan->rs_cindex++;
-
-#ifdef USE_PREFETCH
 
            /*
-            * Try to prefetch at least a few pages even before we get to the
-            * second page if we don't stop reading after the first tuple.
+            * XXX: Note we do not prefetch here.
             */
-           if (!pstate)
-           {
-               if (node->prefetch_target < node->prefetch_maximum)
-                   node->prefetch_target++;
-           }
-           else if (pstate->prefetch_target < node->prefetch_maximum)
-           {
-               /* take spinlock while updating shared state */
-               SpinLockAcquire(&pstate->mutex);
-               if (pstate->prefetch_target < node->prefetch_maximum)
-                   pstate->prefetch_target++;
-               SpinLockRelease(&pstate->mutex);
-           }
-#endif                         /* USE_PREFETCH */
+
+           continue;
        }
 
+
+#ifdef USE_PREFETCH
+
        /*
-        * Out of range?  If so, nothing more to look at on this page
+        * Try to prefetch at least a few pages even before we get to the
+        * second page if we don't stop reading after the first tuple.
         */
-       if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
+       if (!pstate)
        {
-           node->tbmres = tbmres = NULL;
-           continue;
+           if (node->prefetch_target < node->prefetch_maximum)
+               node->prefetch_target++;
        }
+       else if (pstate->prefetch_target < node->prefetch_maximum)
+       {
+           /* take spinlock while updating shared state */
+           SpinLockAcquire(&pstate->mutex);
+           if (pstate->prefetch_target < node->prefetch_maximum)
+               pstate->prefetch_target++;
+           SpinLockRelease(&pstate->mutex);
+       }
+#endif                         /* USE_PREFETCH */
 
        /*
         * We issue prefetch requests *after* fetching the current page to try
@@ -312,52 +302,19 @@ BitmapHeapNext(BitmapHeapScanState *node)
         */
        BitmapPrefetch(node, scan);
 
-       if (node->skip_fetch)
-       {
-           /*
-            * If we don't have to fetch the tuple, just return nulls.
-            */
-           ExecStoreAllNullTuple(slot);
-       }
-       else
+       /*
+        * If we are using lossy info, we have to recheck the qual
+        * conditions at every tuple.
+        */
+       if (tbmres->recheck)
        {
-           /*
-            * Okay to fetch the tuple.
-            */
-           targoffset = scan->rs_vistuples[scan->rs_cindex];
-           dp = (Page) BufferGetPage(scan->rs_cbuf);
-           lp = PageGetItemId(dp, targoffset);
-           Assert(ItemIdIsNormal(lp));
-
-           scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-           scan->rs_ctup.t_len = ItemIdGetLength(lp);
-           scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
-           ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
-
-           pgstat_count_heap_fetch(scan->rs_rd);
-
-           /*
-            * Set up the result slot to point to this tuple.  Note that the
-            * slot acquires a pin on the buffer.
-            */
-           ExecStoreBufferHeapTuple(&scan->rs_ctup,
-                                    slot,
-                                    scan->rs_cbuf);
-
-           /*
-            * If we are using lossy info, we have to recheck the qual
-            * conditions at every tuple.
-            */
-           if (tbmres->recheck)
+           econtext->ecxt_scantuple = slot;
+           if (!ExecQualAndReset(node->bitmapqualorig, econtext))
            {
-               econtext->ecxt_scantuple = slot;
-               if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-               {
-                   /* Fails recheck, so drop it and loop back for another */
-                   InstrCountFiltered2(node, 1);
-                   ExecClearTuple(slot);
-                   continue;
-               }
+               /* Fails recheck, so drop it and loop back for another */
+               InstrCountFiltered2(node, 1);
+               ExecClearTuple(slot);
+               continue;
            }
        }
 
@@ -371,110 +328,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
    return ExecClearTuple(slot);
 }
 
-/*
- * bitgetpage - subroutine for BitmapHeapNext()
- *
- * This routine reads and pins the specified page of the relation, then
- * builds an array indicating which tuples on the page are both potentially
- * interesting according to the bitmap, and visible according to the snapshot.
- */
-static void
-bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
-{
-   BlockNumber page = tbmres->blockno;
-   Buffer      buffer;
-   Snapshot    snapshot;
-   int         ntup;
-
-   /*
-    * Acquire pin on the target heap page, trading in any pin we held before.
-    */
-   Assert(page < scan->rs_nblocks);
-
-   scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
-                                        scan->rs_rd,
-                                        page);
-   buffer = scan->rs_cbuf;
-   snapshot = scan->rs_snapshot;
-
-   ntup = 0;
-
-   /*
-    * Prune and repair fragmentation for the whole page, if possible.
-    */
-   heap_page_prune_opt(scan->rs_rd, buffer);
-
-   /*
-    * We must hold share lock on the buffer content while examining tuple
-    * visibility.  Afterwards, however, the tuples we have found to be
-    * visible are guaranteed good as long as we hold the buffer pin.
-    */
-   LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
-   /*
-    * We need two separate strategies for lossy and non-lossy cases.
-    */
-   if (tbmres->ntuples >= 0)
-   {
-       /*
-        * Bitmap is non-lossy, so we just look through the offsets listed in
-        * tbmres; but we have to follow any HOT chain starting at each such
-        * offset.
-        */
-       int         curslot;
-
-       for (curslot = 0; curslot < tbmres->ntuples; curslot++)
-       {
-           OffsetNumber offnum = tbmres->offsets[curslot];
-           ItemPointerData tid;
-           HeapTupleData heapTuple;
-
-           ItemPointerSet(&tid, page, offnum);
-           if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
-                                      &heapTuple, NULL, true))
-               scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
-       }
-   }
-   else
-   {
-       /*
-        * Bitmap is lossy, so we must examine each item pointer on the page.
-        * But we can ignore HOT chains, since we'll check each tuple anyway.
-        */
-       Page        dp = (Page) BufferGetPage(buffer);
-       OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
-       OffsetNumber offnum;
-
-       for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
-       {
-           ItemId      lp;
-           HeapTupleData loctup;
-           bool        valid;
-
-           lp = PageGetItemId(dp, offnum);
-           if (!ItemIdIsNormal(lp))
-               continue;
-           loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-           loctup.t_len = ItemIdGetLength(lp);
-           loctup.t_tableOid = scan->rs_rd->rd_id;
-           ItemPointerSet(&loctup.t_self, page, offnum);
-           valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
-           if (valid)
-           {
-               scan->rs_vistuples[ntup++] = offnum;
-               PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
-           }
-           CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
-                                           buffer, snapshot);
-       }
-   }
-
-   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-
-   Assert(ntup <= MaxHeapTuplesPerPage);
-   scan->rs_ntuples = ntup;
-}
-
 /*
  * BitmapDoneInitializingSharedState - Shared state is initialized
  *
@@ -598,7 +451,7 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
  * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
  */
 static inline void
-BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
+BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 {
 #ifdef USE_PREFETCH
    ParallelBitmapHeapState *pstate = node->pstate;
@@ -741,7 +594,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
    PlanState  *outerPlan = outerPlanState(node);
 
    /* rescan to release any page pin */
-   heap_rescan(node->ss.ss_currentScanDesc, NULL);
+   table_rescan(node->ss.ss_currentScanDesc, NULL);
 
    /* release bitmaps and buffers if any */
    if (node->tbmiterator)
@@ -785,7 +638,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 void
 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 {
-   HeapScanDesc scanDesc;
+   TableScanDesc scanDesc;
 
    /*
     * extract information from the node
@@ -830,7 +683,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
    /*
     * close heap scan
     */
-   heap_endscan(scanDesc);
+   table_endscan(scanDesc);
 }
 
 /* ----------------------------------------------------------------
@@ -914,8 +767,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
     */
    ExecInitScanTupleSlot(estate, &scanstate->ss,
                          RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
-
+                         table_slot_callbacks(currentRelation));
 
    /*
     * Initialize result type and projection.
@@ -953,10 +805,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
     * Even though we aren't going to do a conventional seqscan, it is useful
     * to create a HeapScanDesc --- most of the fields in it are usable.
     */
-   scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
-                                                        estate->es_snapshot,
-                                                        0,
-                                                        NULL);
+   scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
+                                                         estate->es_snapshot,
+                                                         0,
+                                                         NULL);
 
    /*
     * all done.
@@ -1104,5 +956,5 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
    node->pstate = pstate;
 
    snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
-   heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
+   table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
 }
index fab752058aeed8ec4844a77685f09c5a2f3a9dbc..c96db36e0dfdbf58ce95d49e13575429be7f30e2 100644 (file)
@@ -62,9 +62,12 @@ ForeignNext(ForeignScanState *node)
     */
    if (plan->fsSystemCol && !TupIsNull(slot))
    {
-       HeapTuple   tup = ExecFetchSlotHeapTuple(slot, true, NULL);
-
-       tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
+       ExecMaterializeSlot(slot);
+#if 0
+       ExecSlotUpdateTupleTableoid(slot,
+                                   RelationGetRelid(node->ss.ss_currentRelation));
+#endif
+       slot->tts_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
    }
 
    return slot;
index e6367ade76d9325ec33a1e0143e1b15ade54a624..1dd8bb3f3a60b7989a5671050cf8cf8a209ee555 100644 (file)
@@ -266,7 +266,7 @@ gather_getnext(GatherState *gatherstate)
    PlanState  *outerPlan = outerPlanState(gatherstate);
    TupleTableSlot *outerTupleSlot;
    TupleTableSlot *fslot = gatherstate->funnel_slot;
-   HeapTuple   tup;
+   HeapTuple tup;
 
    while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally)
    {
@@ -316,7 +316,7 @@ gather_readnext(GatherState *gatherstate)
    for (;;)
    {
        TupleQueueReader *reader;
-       HeapTuple   tup;
+       HeapTuple tup;
        bool        readerdone;
 
        /* Check for async events, particularly messages from workers. */
index 51d910bd5ee0854b7d81a5806752c4e6123a9b9f..54ef0ca7b7d68418468e53807d6e5d6f19dc6911 100644 (file)
@@ -45,7 +45,7 @@
  */
 typedef struct GMReaderTupleBuffer
 {
-   HeapTuple  *tuple;          /* array of length MAX_TUPLE_STORE */
+   HeapTuple *tuple;       /* array of length MAX_TUPLE_STORE */
    int         nTuples;        /* number of tuples currently stored */
    int         readCounter;    /* index of next tuple to extract */
    bool        done;           /* true if reader is known exhausted */
@@ -55,7 +55,7 @@ static TupleTableSlot *ExecGatherMerge(PlanState *pstate);
 static int32 heap_compare_slots(Datum a, Datum b, void *arg);
 static TupleTableSlot *gather_merge_getnext(GatherMergeState *gm_state);
 static HeapTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader,
-                 bool nowait, bool *done);
+                                     bool nowait, bool *done);
 static void ExecShutdownGatherMergeWorkers(GatherMergeState *node);
 static void gather_merge_setup(GatherMergeState *gm_state);
 static void gather_merge_init(GatherMergeState *gm_state);
@@ -637,7 +637,7 @@ static bool
 gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
 {
    GMReaderTupleBuffer *tuple_buffer;
-   HeapTuple   tup;
+   HeapTuple tup;
 
    /*
     * If we're being asked to generate a tuple from the leader, then we just
@@ -716,7 +716,7 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
                  bool *done)
 {
    TupleQueueReader *reader;
-   HeapTuple   tup;
+   HeapTuple tup;
 
    /* Check for async events, particularly messages from workers. */
    CHECK_FOR_INTERRUPTS();
index 8498273207b4334b3dac10fbe15f51ab7807b2c4..c39c4f453dbaea1716d492268d551be65bfb9ddf 100644 (file)
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/visibilitymap.h"
 #include "executor/execdebug.h"
 #include "executor/nodeIndexonlyscan.h"
@@ -117,7 +118,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
     */
    while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
    {
-       HeapTuple   tuple = NULL;
+       bool    tuple_from_heap = false;
 
        CHECK_FOR_INTERRUPTS();
 
@@ -163,17 +164,18 @@ IndexOnlyNext(IndexOnlyScanState *node)
             * Rats, we have to visit the heap to check visibility.
             */
            InstrCountTuples2(node, 1);
-           tuple = index_fetch_heap(scandesc);
-           if (tuple == NULL)
+           if (!index_fetch_heap(scandesc, slot))
                continue;       /* no visible tuple, try next index entry */
 
+           ExecClearTuple(slot);
+
            /*
             * Only MVCC snapshots are supported here, so there should be no
             * need to keep following the HOT chain once a visible entry has
             * been found.  If we did want to allow that, we'd need to keep
             * more state to remember not to call index_getnext_tid next time.
             */
-           if (scandesc->xs_continue_hot)
+           if (scandesc->xs_heap_continue)
                elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
 
            /*
@@ -182,13 +184,15 @@ IndexOnlyNext(IndexOnlyScanState *node)
             * but it's not clear whether it's a win to do so.  The next index
             * entry might require a visit to the same heap page.
             */
+
+           tuple_from_heap = true;
        }
 
        /*
         * Fill the scan tuple slot with data from the index.  This might be
-        * provided in either HeapTuple or IndexTuple format.  Conceivably an
-        * index AM might fill both fields, in which case we prefer the heap
-        * format, since it's probably a bit cheaper to fill a slot from.
+        * provided in either HeapTuple or IndexTuple format.  Conceivably
+        * an index AM might fill both fields, in which case we prefer the
+        * heap format, since it's probably a bit cheaper to fill a slot from.
         */
        if (scandesc->xs_hitup)
        {
@@ -200,6 +204,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
            Assert(slot->tts_tupleDescriptor->natts ==
                   scandesc->xs_hitupdesc->natts);
            ExecForceStoreHeapTuple(scandesc->xs_hitup, slot);
+           slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation);
        }
        else if (scandesc->xs_itup)
            StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
@@ -242,7 +247,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
         * anyway, then we already have the tuple-level lock and can skip the
         * page lock.
         */
-       if (tuple == NULL)
+       if (!tuple_from_heap)
            PredicateLockPage(scandesc->heapRelation,
                              ItemPointerGetBlockNumber(tid),
                              estate->es_snapshot);
@@ -424,7 +429,7 @@ ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
 {
    EState     *estate = node->ss.ps.state;
 
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
    {
        /*
         * We are inside an EvalPlanQual recheck.  If a test tuple exists for
@@ -459,7 +464,7 @@ ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
 {
    EState     *estate = node->ss.ps.state;
 
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
    {
        /* See comments in ExecIndexOnlyMarkPos */
        Index       scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
@@ -527,7 +532,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
     * suitable data anyway.)
     */
    tupDesc = ExecTypeFromTL(node->indextlist);
-   ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, &TTSOpsHeapTuple);
+   ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
+                         table_slot_callbacks(currentRelation));
 
    /*
     * Initialize result type and projection info.  The node's targetlist will
index 6b222a9f64cb863c13b12590d9ebeb1093243d98..b38dadaa9a53e5219981bf59273fa822185e4e92 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "access/nbtree.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "catalog/pg_am.h"
 #include "executor/execdebug.h"
 #include "executor/nodeIndexscan.h"
@@ -51,7 +52,7 @@
 typedef struct
 {
    pairingheap_node ph_node;
-   HeapTuple   htup;
+   HeapTuple htup;
    Datum      *orderbyvals;
    bool       *orderbynulls;
 } ReorderTuple;
@@ -84,7 +85,6 @@ IndexNext(IndexScanState *node)
    ExprContext *econtext;
    ScanDirection direction;
    IndexScanDesc scandesc;
-   HeapTuple   tuple;
    TupleTableSlot *slot;
 
    /*
@@ -131,20 +131,10 @@ IndexNext(IndexScanState *node)
    /*
     * ok, now that we have what we need, fetch the next tuple.
     */
-   while ((tuple = index_getnext(scandesc, direction)) != NULL)
+   while (index_getnext_slot(scandesc, direction, slot))
    {
        CHECK_FOR_INTERRUPTS();
 
-       /*
-        * Store the scanned tuple in the scan tuple slot of the scan state.
-        * Note: we pass 'false' because tuples returned by amgetnext are
-        * pointers onto disk pages and must not be pfree()'d.
-        */
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->xs_cbuf);    /* buffer containing
-                                                        * tuple */
-
        /*
         * If the index was lossy, we have to recheck the index quals using
         * the fetched tuple.
@@ -184,7 +174,6 @@ IndexNextWithReorder(IndexScanState *node)
    EState     *estate;
    ExprContext *econtext;
    IndexScanDesc scandesc;
-   HeapTuple   tuple;
    TupleTableSlot *slot;
    ReorderTuple *topmost = NULL;
    bool        was_exact;
@@ -253,9 +242,12 @@ IndexNextWithReorder(IndexScanState *node)
                                scandesc->xs_orderbynulls,
                                node) <= 0)
            {
+               HeapTuple tuple;
+
                tuple = reorderqueue_pop(node);
 
                /* Pass 'true', as the tuple in the queue is a palloc'd copy */
+               slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation);
                ExecStoreHeapTuple(tuple, slot, true);
                return slot;
            }
@@ -272,8 +264,7 @@ IndexNextWithReorder(IndexScanState *node)
         */
 next_indextuple:
        slot = node->ss.ss_ScanTupleSlot;
-       tuple = index_getnext(scandesc, ForwardScanDirection);
-       if (!tuple)
+       if (!index_getnext_slot(scandesc, ForwardScanDirection, slot))
        {
            /*
             * No more tuples from the index.  But we still need to drain any
@@ -283,14 +274,6 @@ next_indextuple:
            continue;
        }
 
-       /*
-        * Store the scanned tuple in the scan tuple slot of the scan state.
-        */
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->xs_cbuf);    /* buffer containing
-                                                        * tuple */
-
        /*
         * If the index was lossy, we have to recheck the index quals and
         * ORDER BY expressions using the fetched tuple.
@@ -358,6 +341,8 @@ next_indextuple:
                                                      topmost->orderbynulls,
                                                      node) > 0))
        {
+           HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+
            /* Put this tuple to the queue */
            reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
            continue;
@@ -515,7 +500,7 @@ reorderqueue_push(IndexScanState *node, HeapTuple tuple,
 static HeapTuple
 reorderqueue_pop(IndexScanState *node)
 {
-   HeapTuple   result;
+   HeapTuple result;
    ReorderTuple *topmost;
    int         i;
 
@@ -851,7 +836,7 @@ ExecIndexMarkPos(IndexScanState *node)
 {
    EState     *estate = node->ss.ps.state;
 
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
    {
        /*
         * We are inside an EvalPlanQual recheck.  If a test tuple exists for
@@ -886,7 +871,7 @@ ExecIndexRestrPos(IndexScanState *node)
 {
    EState     *estate = node->ss.ps.state;
 
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
    {
        /* See comments in ExecIndexMarkPos */
        Index       scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
@@ -950,7 +935,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
     */
    ExecInitScanTupleSlot(estate, &indexstate->ss,
                          RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(currentRelation));
 
    if (node->indexorderby != NIL)
        indexstate->ss.ps.scanopsfixed = false;
index 7887388b9e9fb3bf2356f09904adc57e7b12f641..4451779ff8a744454fcae3671448a34f1836bad7 100644 (file)
@@ -22,6 +22,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "executor/executor.h"
 #include "executor/nodeLockRows.h"
@@ -66,6 +67,8 @@ lnext:
    /* We don't need EvalPlanQual unless we get updated tuple version(s) */
    epq_needed = false;
 
+   EvalPlanQualBegin(&node->lr_epqstate, estate);
+
    /*
     * Attempt to lock the source tuple(s).  (Note we only have locking
     * rowmarks in lr_arowMarks.)
@@ -74,21 +77,19 @@ lnext:
    {
        ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
        ExecRowMark *erm = aerm->rowmark;
-       HeapTuple  *testTuple;
+       TupleTableSlot *markSlot;
        Datum       datum;
        bool        isNull;
-       HeapTupleData tuple;
-       Buffer      buffer;
        HeapUpdateFailureData hufd;
        LockTupleMode lockmode;
        HTSU_Result test;
-       HeapTuple   copyTuple;
+       ItemPointerData tid;
 
        /* clear any leftover test tuple for this rel */
-       testTuple = &(node->lr_curtuples[erm->rti - 1]);
-       if (*testTuple != NULL)
-           heap_freetuple(*testTuple);
-       *testTuple = NULL;
+       // used to be: - can we skip having lr_curtuples?
+       //testSlot = node->lr_curtuples[erm->rti - 1];
+       markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
+       ExecClearTuple(markSlot);
 
        /* if child rel, must check whether it produced this row */
        if (erm->rti != erm->prti)
@@ -129,25 +130,25 @@ lnext:
            bool        updated = false;
 
            fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+
            /* this should have been checked already, but let's be safe */
            if (fdwroutine->RefetchForeignRow == NULL)
                ereport(ERROR,
                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                         errmsg("cannot lock rows in foreign table \"%s\"",
                                RelationGetRelationName(erm->relation))));
-           copyTuple = fdwroutine->RefetchForeignRow(estate,
-                                                     erm,
-                                                     datum,
-                                                     &updated);
-           if (copyTuple == NULL)
+
+           markSlot = fdwroutine->RefetchForeignRow(estate,
+                                                    erm,
+                                                    datum,
+                                                    markSlot,
+                                                    &updated);
+           if (markSlot == NULL)
            {
                /* couldn't get the lock, so skip this row */
                goto lnext;
            }
 
-           /* save locked tuple for possible EvalPlanQual testing below */
-           *testTuple = copyTuple;
-
            /*
             * if FDW says tuple was updated before getting locked, we need to
             * perform EPQ testing to see if quals are still satisfied
@@ -159,7 +160,7 @@ lnext:
        }
 
        /* okay, try to lock the tuple */
-       tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+       tid = *((ItemPointer) DatumGetPointer(datum));
        switch (erm->markType)
        {
            case ROW_MARK_EXCLUSIVE:
@@ -180,11 +181,13 @@ lnext:
                break;
        }
 
-       test = heap_lock_tuple(erm->relation, &tuple,
-                              estate->es_output_cid,
-                              lockmode, erm->waitPolicy, true,
-                              &buffer, &hufd);
-       ReleaseBuffer(buffer);
+       test = table_lock_tuple(erm->relation, &tid, estate->es_snapshot,
+                               markSlot, estate->es_output_cid,
+                               lockmode, erm->waitPolicy,
+                               (IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION)
+                               | TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS,
+                               &hufd);
+
        switch (test)
        {
            case HeapTupleWouldBlock:
@@ -211,6 +214,15 @@ lnext:
 
            case HeapTupleMayBeUpdated:
                /* got the lock successfully */
+               if (hufd.traversed)
+               {
+                   /* locked tuple saved in markSlot for EvalPlanQual testing below */
+
+                   /* Remember we need to do EPQ testing */
+                   epq_needed = true;
+
+                   /* Continue loop until we have all target tuples */
+               }
                break;
 
            case HeapTupleUpdated:
@@ -218,41 +230,19 @@ lnext:
                    ereport(ERROR,
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                             errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+               /* skip lock */
+               goto lnext;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
                    ereport(ERROR,
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-               if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
-               {
-                   /* Tuple was deleted, so don't return it */
-                   goto lnext;
-               }
-
-               /* updated, so fetch and lock the updated version */
-               copyTuple = EvalPlanQualFetch(estate, erm->relation,
-                                             lockmode, erm->waitPolicy,
-                                             &hufd.ctid, hufd.xmax);
-
-               if (copyTuple == NULL)
-               {
-                   /*
-                    * Tuple was deleted; or it's locked and we're under SKIP
-                    * LOCKED policy, so don't return it
-                    */
-                   goto lnext;
-               }
-               /* remember the actually locked tuple's TID */
-               tuple.t_self = copyTuple->t_self;
-
-               /* Save locked tuple for EvalPlanQual testing below */
-               *testTuple = copyTuple;
-
-               /* Remember we need to do EPQ testing */
-               epq_needed = true;
-
-               /* Continue loop until we have all target tuples */
-               break;
+                            errmsg("could not serialize access due to concurrent update")));
+               /*
+                * Tuple was deleted; or it's locked and we're under SKIP
+                * LOCKED policy, so don't return it
+                */
+               goto lnext;
 
            case HeapTupleInvisible:
                elog(ERROR, "attempted to lock invisible tuple");
@@ -264,7 +254,7 @@ lnext:
        }
 
        /* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */
-       erm->curCtid = tuple.t_self;
+       erm->curCtid = tid;
    }
 
    /*
@@ -273,7 +263,7 @@ lnext:
    if (epq_needed)
    {
        /* Initialize EPQ machinery */
-       EvalPlanQualBegin(&node->lr_epqstate, estate);
+       //EvalPlanQualBegin(&node->lr_epqstate, estate);
 
        /*
         * Transfer any already-fetched tuples into the EPQ state, and fetch a
@@ -286,26 +276,31 @@ lnext:
        {
            ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
            ExecRowMark *erm = aerm->rowmark;
-           HeapTupleData tuple;
-           Buffer      buffer;
+           TupleTableSlot *markSlot;
+
+           markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
 
            /* skip non-active child tables, but clear their test tuples */
            if (!erm->ermActive)
            {
                Assert(erm->rti != erm->prti);  /* check it's child table */
-               EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL);
+               ExecClearTuple(markSlot);
                continue;
            }
 
            /* was tuple updated and fetched above? */
-           if (node->lr_curtuples[erm->rti - 1] != NULL)
+           //node->lr_curtuples[erm->rti - 1] != NULL
+           if (!TupIsNull(markSlot))
            {
+//             elog(ERROR, "frak");
+#if FIXME
                /* yes, so set it as the EPQ test tuple for this rel */
                EvalPlanQualSetTuple(&node->lr_epqstate,
                                     erm->rti,
                                     node->lr_curtuples[erm->rti - 1]);
                /* freeing this tuple is now the responsibility of EPQ */
                node->lr_curtuples[erm->rti - 1] = NULL;
+#endif
                continue;
            }
 
@@ -314,15 +309,13 @@ lnext:
            Assert(ItemPointerIsValid(&(erm->curCtid)));
 
            /* okay, fetch the tuple */
-           tuple.t_self = erm->curCtid;
-           if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-                           false, NULL))
+           if (!table_fetch_row_version(erm->relation, &erm->curCtid, SnapshotAny, markSlot,
+                              NULL))
                elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
 
            /* successful, copy and store tuple */
-           EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
-                                heap_copytuple(&tuple));
-           ReleaseBuffer(buffer);
+           //EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, tuple);
+           // ReleaseBuffer(buffer);
        }
 
        /*
@@ -405,8 +398,8 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
     * Create workspace in which we can remember per-RTE locked tuples
     */
    lrstate->lr_ntables = estate->es_range_table_size;
-   lrstate->lr_curtuples = (HeapTuple *)
-       palloc0(lrstate->lr_ntables * sizeof(HeapTuple));
+   lrstate->lr_curtuples = (TupleTableSlot **)
+       palloc0(lrstate->lr_ntables * sizeof(TupleTableSlot *));
 
    /*
     * Locate the ExecRowMark(s) that this node is responsible for, and
index 3aa0d4d0eb748528c506f1e26153cb03cbc80eca..d1ac9fc2e9c99595456ba94751d898be1ac5ce91 100644 (file)
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
+#include "catalog/pg_am.h"
 #include "commands/trigger.h"
 #include "executor/execPartition.h"
 #include "executor/executor.h"
@@ -168,15 +170,12 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
        econtext->ecxt_scantuple = tupleSlot;
    else
    {
-       HeapTuple   tuple;
-
        /*
         * RETURNING expressions might reference the tableoid column, so
         * initialize t_tableOid before evaluating them.
         */
        Assert(!TupIsNull(econtext->ecxt_scantuple));
-       tuple = ExecFetchSlotHeapTuple(econtext->ecxt_scantuple, true, NULL);
-       tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+       econtext->ecxt_scantuple->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
    }
    econtext->ecxt_outertuple = planSlot;
 
@@ -194,31 +193,33 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
  */
 static void
 ExecCheckHeapTupleVisible(EState *estate,
-                         HeapTuple tuple,
-                         Buffer buffer)
+                         Relation rel,
+                         TupleTableSlot *slot)
 {
    if (!IsolationUsesXactSnapshot())
        return;
 
-   /*
-    * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
-    * Caller should be holding pin, but not lock.
-    */
-   LockBuffer(buffer, BUFFER_LOCK_SHARE);
-   if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
+   if (!table_satisfies_snapshot(rel, slot, estate->es_snapshot))
    {
+       Datum       xminDatum;
+       TransactionId xmin;
+       bool        isnull;
+
+       xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+       Assert(!isnull);
+       xmin = DatumGetTransactionId(xminDatum);
+
        /*
         * We should not raise a serialization failure if the conflict is
         * against a tuple inserted by our own transaction, even if it's not
         * visible to our snapshot.  (This would happen, for example, if
         * conflicting keys are proposed for insertion in a single command.)
         */
-       if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
+       if (!TransactionIdIsCurrentTransactionId(xmin))
            ereport(ERROR,
                    (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                     errmsg("could not serialize access due to concurrent update")));
    }
-   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 }
 
 /*
@@ -227,21 +228,19 @@ ExecCheckHeapTupleVisible(EState *estate,
 static void
 ExecCheckTIDVisible(EState *estate,
                    ResultRelInfo *relinfo,
-                   ItemPointer tid)
+                   ItemPointer tid,
+                   TupleTableSlot *tempSlot)
 {
    Relation    rel = relinfo->ri_RelationDesc;
-   Buffer      buffer;
-   HeapTupleData tuple;
 
    /* Redundantly check isolation level */
    if (!IsolationUsesXactSnapshot())
        return;
 
-   tuple.t_self = *tid;
-   if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
+   if (!table_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL))
        elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
-   ExecCheckHeapTupleVisible(estate, &tuple, buffer);
-   ReleaseBuffer(buffer);
+   ExecCheckHeapTupleVisible(estate, rel, tempSlot);
+   ExecClearTuple(tempSlot);
 }
 
 /* ----------------------------------------------------------------
@@ -260,7 +259,6 @@ ExecInsert(ModifyTableState *mtstate,
           EState *estate,
           bool canSetTag)
 {
-   HeapTuple   tuple;
    ResultRelInfo *resultRelInfo;
    Relation    resultRelationDesc;
    List       *recheckIndexes = NIL;
@@ -269,11 +267,7 @@ ExecInsert(ModifyTableState *mtstate,
    ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
    OnConflictAction onconflict = node->onConflictAction;
 
-   /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
-    */
-   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   ExecMaterializeSlot(slot);
 
    /*
     * get information on the (current) result relation
@@ -293,26 +287,16 @@ ExecInsert(ModifyTableState *mtstate,
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_insert_before_row)
    {
-       slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
+           return NULL;        /* "do nothing" */
    }
 
    /* INSTEAD OF ROW INSERT Triggers */
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
    {
-       slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
+           return NULL;        /* "do nothing" */
    }
    else if (resultRelInfo->ri_FdwRoutine)
    {
@@ -327,14 +311,11 @@ ExecInsert(ModifyTableState *mtstate,
        if (slot == NULL)       /* "do nothing" */
            return NULL;
 
-       /* FDW might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
        /*
         * AFTER ROW Triggers or RETURNING expressions might reference the
         * tableoid column, so initialize t_tableOid before evaluating them.
         */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
    }
    else
    {
@@ -344,7 +325,7 @@ ExecInsert(ModifyTableState *mtstate,
         * Constraints might reference the tableoid column, so initialize
         * t_tableOid before evaluating them.
         */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
 
        /*
         * Check any RLS WITH CHECK policies.
@@ -436,9 +417,11 @@ ExecInsert(ModifyTableState *mtstate,
                     * In case of ON CONFLICT DO NOTHING, do nothing. However,
                     * verify that the tuple is visible to the executor's MVCC
                     * snapshot at higher isolation levels.
+                    *
+                    * Can reuse the input slot here (XXX).
                     */
                    Assert(onconflict == ONCONFLICT_NOTHING);
-                   ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
+                   ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid, slot);
                    InstrCountTuples2(&mtstate->ps, 1);
                    return NULL;
                }
@@ -451,24 +434,22 @@ ExecInsert(ModifyTableState *mtstate,
             * waiting for the whole transaction to complete.
             */
            specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
-           HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
 
            /* insert the tuple, with the speculative token */
-           heap_insert(resultRelationDesc, tuple,
-                       estate->es_output_cid,
-                       HEAP_INSERT_SPECULATIVE,
-                       NULL);
+           table_insert_speculative(resultRelationDesc, slot,
+                                    estate->es_output_cid,
+                                    HEAP_INSERT_SPECULATIVE,
+                                    NULL,
+                                    specToken);
 
            /* insert index entries for tuple */
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+           recheckIndexes = ExecInsertIndexTuples(slot,
                                                   estate, true, &specConflict,
                                                   arbiterIndexes);
 
            /* adjust the tuple's state accordingly */
-           if (!specConflict)
-               heap_finish_speculative(resultRelationDesc, tuple);
-           else
-               heap_abort_speculative(resultRelationDesc, tuple);
+           table_complete_speculative(resultRelationDesc, slot,
+                                      specToken, specConflict);
 
            /*
             * Wake up anyone waiting for our decision.  They will re-check
@@ -496,26 +477,23 @@ ExecInsert(ModifyTableState *mtstate,
        {
            /*
             * insert the tuple normally.
-            *
-            * Note: heap_insert returns the tid (location) of the new tuple
-            * in the t_self field.
             */
-           heap_insert(resultRelationDesc, tuple,
-                       estate->es_output_cid,
-                       0, NULL);
+           table_insert(resultRelationDesc, slot,
+                        estate->es_output_cid,
+                        0, NULL);
 
            /* insert index entries for tuple */
            if (resultRelInfo->ri_NumIndices > 0)
-               recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                      estate, false, NULL,
+               recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                       NIL);
+
        }
    }
 
    if (canSetTag)
    {
        (estate->es_processed)++;
-       setLastTid(&(tuple->t_self));
+       setLastTid(&(slot->tts_tid));
    }
 
    /*
@@ -530,7 +508,7 @@ ExecInsert(ModifyTableState *mtstate,
    {
        ExecARUpdateTriggers(estate, resultRelInfo, NULL,
                             NULL,
-                            tuple,
+                            slot,
                             NULL,
                             mtstate->mt_transition_capture);
 
@@ -542,7 +520,7 @@ ExecInsert(ModifyTableState *mtstate,
    }
 
    /* AFTER ROW INSERT Triggers */
-   ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
+   ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
                         ar_insert_trig_tcs);
 
    list_free(recheckIndexes);
@@ -602,7 +580,7 @@ ExecDelete(ModifyTableState *mtstate,
           bool canSetTag,
           bool changingPart,
           bool *tupleDeleted,
-          TupleTableSlot **epqslot)
+          TupleTableSlot **epqreturnslot)
 {
    ResultRelInfo *resultRelInfo;
    Relation    resultRelationDesc;
@@ -627,7 +605,7 @@ ExecDelete(ModifyTableState *mtstate,
        bool        dodelete;
 
        dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-                                       tupleid, oldtuple, epqslot);
+                                       tupleid, oldtuple, epqreturnslot);
 
        if (!dodelete)          /* "do nothing" */
            return NULL;
@@ -647,8 +625,6 @@ ExecDelete(ModifyTableState *mtstate,
    }
    else if (resultRelInfo->ri_FdwRoutine)
    {
-       HeapTuple   tuple;
-
        /*
         * delete from foreign table: let the FDW do it
         *
@@ -656,10 +632,7 @@ ExecDelete(ModifyTableState *mtstate,
         * although the FDW can return some other slot if it wants.  Set up
         * the slot's tupdesc so the FDW doesn't need to do that for itself.
         */
-       slot = estate->es_trig_tuple_slot;
-       if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
-           ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
-
+       slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc);
        slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
                                                               resultRelInfo,
                                                               slot,
@@ -674,8 +647,9 @@ ExecDelete(ModifyTableState *mtstate,
         */
        if (TTS_EMPTY(slot))
            ExecStoreAllNullTuple(slot);
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+
+       ExecMaterializeSlot(slot);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
    }
    else
    {
@@ -689,12 +663,58 @@ ExecDelete(ModifyTableState *mtstate,
         * mode transactions.
         */
 ldelete:;
-       result = heap_delete(resultRelationDesc, tupleid,
+       result = table_delete(resultRelationDesc, tupleid,
                             estate->es_output_cid,
+                            estate->es_snapshot,
                             estate->es_crosscheck_snapshot,
                             true /* wait for commit */ ,
                             &hufd,
                             changingPart);
+
+       if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot())
+       {
+           EvalPlanQualBegin(epqstate, estate);
+           slot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex);
+
+           result = table_lock_tuple(resultRelationDesc, tupleid,
+                                     estate->es_snapshot,
+                                     slot, estate->es_output_cid,
+                                     LockTupleExclusive, LockWaitBlock,
+                                     TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                                     &hufd);
+           /*hari FIXME*/
+           /*Assert(result != HeapTupleUpdated && hufd.traversed);*/
+           if (result == HeapTupleMayBeUpdated)
+           {
+               TupleTableSlot *epqslot;
+
+               epqslot = EvalPlanQual(estate,
+                                      epqstate,
+                                      resultRelationDesc,
+                                      resultRelInfo->ri_RangeTableIndex,
+                                      slot);
+               if (TupIsNull(epqslot))
+               {
+                   /* Tuple no more passing quals, exiting... */
+                   return NULL;
+               }
+
+               /**/
+               if (epqreturnslot)
+               {
+                   *epqreturnslot = epqslot;
+                   return NULL;
+               }
+
+               goto ldelete;
+           }
+           else if (result == HeapTupleInvisible)
+           {
+               /* tuple is not visible; nothing to do */
+               return NULL;
+           }
+       }
+
        switch (result)
        {
            case HeapTupleSelfUpdated:
@@ -740,39 +760,16 @@ ldelete:;
                    ereport(ERROR,
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                             errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+               else
+                   /* shouldn't get there */
+                   elog(ERROR, "wrong heap_delete status: %u", result);
+               break;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
                    ereport(ERROR,
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
-
-               if (!ItemPointerEquals(tupleid, &hufd.ctid))
-               {
-                   TupleTableSlot *my_epqslot;
-
-                   my_epqslot = EvalPlanQual(estate,
-                                             epqstate,
-                                             resultRelationDesc,
-                                             resultRelInfo->ri_RangeTableIndex,
-                                             LockTupleExclusive,
-                                             &hufd.ctid,
-                                             hufd.xmax);
-                   if (!TupIsNull(my_epqslot))
-                   {
-                       *tupleid = hufd.ctid;
-
-                       /*
-                        * If requested, skip delete and pass back the updated
-                        * row.
-                        */
-                       if (epqslot)
-                       {
-                           *epqslot = my_epqslot;
-                           return NULL;
-                       }
-                       else
-                           goto ldelete;
-                   }
-               }
+                            errmsg("could not serialize access due to concurrent delete")));
                /* tuple already deleted; nothing to do */
                return NULL;
 
@@ -834,36 +831,31 @@ ldelete:;
         * gotta fetch it.  We can use the trigger tuple slot.
         */
        TupleTableSlot *rslot;
-       HeapTupleData deltuple;
-       Buffer      delbuffer;
 
        if (resultRelInfo->ri_FdwRoutine)
        {
            /* FDW must have provided a slot containing the deleted row */
            Assert(!TupIsNull(slot));
-           delbuffer = InvalidBuffer;
        }
        else
        {
-           slot = estate->es_trig_tuple_slot;
+           slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc);
            if (oldtuple != NULL)
            {
-               deltuple = *oldtuple;
-               delbuffer = InvalidBuffer;
+               ExecForceStoreHeapTuple(oldtuple, slot);
            }
            else
            {
-               deltuple.t_self = *tupleid;
-               if (!heap_fetch(resultRelationDesc, SnapshotAny,
-                               &deltuple, &delbuffer, false, NULL))
+               if (!table_fetch_row_version(resultRelationDesc, tupleid, SnapshotAny,
+                                slot, NULL))
                    elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
            }
-
-           if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
-               ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
-           ExecStoreHeapTuple(&deltuple, slot, false);
        }
 
+       // FIXME: centralize
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
+       planSlot->tts_tableOid = RelationGetRelid(resultRelationDesc);
+
        rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
 
        /*
@@ -873,8 +865,6 @@ ldelete:;
        ExecMaterializeSlot(rslot);
 
        ExecClearTuple(slot);
-       if (BufferIsValid(delbuffer))
-           ReleaseBuffer(delbuffer);
 
        return rslot;
    }
@@ -914,7 +904,6 @@ ExecUpdate(ModifyTableState *mtstate,
           EState *estate,
           bool canSetTag)
 {
-   HeapTuple   tuple;
    ResultRelInfo *resultRelInfo;
    Relation    resultRelationDesc;
    HTSU_Result result;
@@ -928,11 +917,7 @@ ExecUpdate(ModifyTableState *mtstate,
    if (IsBootstrapProcessingMode())
        elog(ERROR, "cannot UPDATE during bootstrap");
 
-   /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
-    */
-   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   ExecMaterializeSlot(slot);
 
    /*
     * get information on the (current) result relation
@@ -944,28 +929,18 @@ ExecUpdate(ModifyTableState *mtstate,
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_update_before_row)
    {
-       slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-                                   tupleid, oldtuple, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+                                 tupleid, oldtuple, slot))
+           return NULL;        /* "do nothing" */
    }
 
    /* INSTEAD OF ROW UPDATE Triggers */
    if (resultRelInfo->ri_TrigDesc &&
        resultRelInfo->ri_TrigDesc->trig_update_instead_row)
    {
-       slot = ExecIRUpdateTriggers(estate, resultRelInfo,
-                                   oldtuple, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecIRUpdateTriggers(estate, resultRelInfo,
+                                 oldtuple, slot))
+           return NULL;        /* "do nothing" */
    }
    else if (resultRelInfo->ri_FdwRoutine)
    {
@@ -980,25 +955,23 @@ ExecUpdate(ModifyTableState *mtstate,
        if (slot == NULL)       /* "do nothing" */
            return NULL;
 
-       /* FDW might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
        /*
         * AFTER ROW Triggers or RETURNING expressions might reference the
         * tableoid column, so initialize t_tableOid before evaluating them.
         */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
    }
    else
    {
        LockTupleMode lockmode;
        bool        partition_constraint_failed;
+       bool        update_indexes;
 
        /*
         * Constraints might reference the tableoid column, so initialize
         * t_tableOid before evaluating them.
         */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
 
        /*
         * Check any RLS UPDATE WITH CHECK policies
@@ -1011,6 +984,9 @@ ExecUpdate(ModifyTableState *mtstate,
         */
 lreplace:;
 
+       /* ensure slot is independent, consider e.g. EPQ */
+       ExecMaterializeSlot(slot);
+
        /*
         * If partition constraint fails, this row might get moved to another
         * partition, in which case we should check the RLS CHECK policy just
@@ -1108,7 +1084,6 @@ lreplace:;
                else
                {
                    slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
-                   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
                    goto lreplace;
                }
            }
@@ -1179,11 +1154,54 @@ lreplace:;
         * needed for referential integrity updates in transaction-snapshot
         * mode transactions.
         */
-       result = heap_update(resultRelationDesc, tupleid, tuple,
-                            estate->es_output_cid,
-                            estate->es_crosscheck_snapshot,
-                            true /* wait for commit */ ,
-                            &hufd, &lockmode);
+       result = table_update(resultRelationDesc, tupleid, slot,
+                             estate->es_output_cid,
+                             estate->es_snapshot,
+                             estate->es_crosscheck_snapshot,
+                             true /* wait for commit */,
+                             &hufd, &lockmode, &update_indexes);
+
+       if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot())
+       {
+           TupleTableSlot *inputslot;
+
+           EvalPlanQualBegin(epqstate, estate);
+
+           inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex);
+           ExecCopySlot(inputslot, slot);
+
+           result = table_lock_tuple(resultRelationDesc, tupleid,
+                                     estate->es_snapshot,
+                                     inputslot, estate->es_output_cid,
+                                     lockmode, LockWaitBlock,
+                                     TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                                     &hufd);
+           /* hari FIXME*/
+           /*Assert(result != HeapTupleUpdated && hufd.traversed);*/
+           if (result == HeapTupleMayBeUpdated)
+           {
+               TupleTableSlot *epqslot;
+
+               epqslot = EvalPlanQual(estate,
+                                      epqstate,
+                                      resultRelationDesc,
+                                      resultRelInfo->ri_RangeTableIndex,
+                                      inputslot);
+               if (TupIsNull(epqslot))
+               {
+                   /* Tuple no more passing quals, exiting... */
+                   return NULL;
+               }
+               slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
+               goto lreplace;
+           }
+           else if (result == HeapTupleInvisible)
+           {
+               /* tuple is not visible; nothing to do */
+               return NULL;
+           }
+       }
+
        switch (result)
        {
            case HeapTupleSelfUpdated:
@@ -1224,34 +1242,21 @@ lreplace:;
                break;
 
            case HeapTupleUpdated:
+               /*
+                * The lower level isolation case for HeapTupleUpdated is
+                * handled above.
+                */
+               Assert(IsolationUsesXactSnapshot());
+               ereport(ERROR,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("could not serialize access due to concurrent update")));
+               break;
+
+           case HeapTupleDeleted:
                if (IsolationUsesXactSnapshot())
                    ereport(ERROR,
                            (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
-
-               if (!ItemPointerEquals(tupleid, &hufd.ctid))
-               {
-                   TupleTableSlot *epqslot;
-
-                   epqslot = EvalPlanQual(estate,
-                                          epqstate,
-                                          resultRelationDesc,
-                                          resultRelInfo->ri_RangeTableIndex,
-                                          lockmode,
-                                          &hufd.ctid,
-                                          hufd.xmax);
-                   if (!TupIsNull(epqslot))
-                   {
-                       *tupleid = hufd.ctid;
-                       slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
-                       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-                       goto lreplace;
-                   }
-               }
+                            errmsg("could not serialize access due to concurrent delete")));
                /* tuple already deleted; nothing to do */
                return NULL;
 
@@ -1260,6 +1265,7 @@ lreplace:;
                return NULL;
        }
 
+
        /*
         * Note: instead of having to update the old index tuples associated
         * with the heap tuple, all we do is form and insert new index tuples.
@@ -1272,20 +1278,19 @@ lreplace:;
         * insert index entries for tuple
         *
         * Note: heap_update returns the tid (location) of the new tuple in
-        * the t_self field.
+        * the t_self field.  FIXME
         *
         * If it's a HOT update, we mustn't insert new index entries.
         */
-       if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL, NIL);
+       if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
    }
 
    if (canSetTag)
        (estate->es_processed)++;
 
    /* AFTER ROW UPDATE Triggers */
-   ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple,
+   ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
                         recheckIndexes,
                         mtstate->operation == CMD_INSERT ?
                         mtstate->mt_oc_transition_capture :
@@ -1336,11 +1341,12 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
    ExprContext *econtext = mtstate->ps.ps_ExprContext;
    Relation    relation = resultRelInfo->ri_RelationDesc;
    ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
-   HeapTupleData tuple;
    HeapUpdateFailureData hufd;
    LockTupleMode lockmode;
    HTSU_Result test;
-   Buffer      buffer;
+   Datum       xminDatum;
+   TransactionId xmin;
+   bool        isnull;
 
    /* Determine lock mode to use */
    lockmode = ExecUpdateLockMode(estate, resultRelInfo);
@@ -1351,10 +1357,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
     * previous conclusion that the tuple is conclusively committed is not
     * true anymore.
     */
-   tuple.t_self = *conflictTid;
-   test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
-                          lockmode, LockWaitBlock, false, &buffer,
-                          &hufd);
+   test = table_lock_tuple(relation, conflictTid,
+                           estate->es_snapshot,
+                           mtstate->mt_existing, estate->es_output_cid,
+                           lockmode, LockWaitBlock, TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS,
+                           &hufd);
    switch (test)
    {
        case HeapTupleMayBeUpdated:
@@ -1379,7 +1386,13 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
             * that for SQL MERGE, an exception must be raised in the event of
             * an attempt to update the same row twice.
             */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+           xminDatum = slot_getsysattr(mtstate->mt_existing,
+                                       MinTransactionIdAttributeNumber,
+                                       &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+
+           if (TransactionIdIsCurrentTransactionId(xmin))
                ereport(ERROR,
                        (errcode(ERRCODE_CARDINALITY_VIOLATION),
                         errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
@@ -1420,7 +1433,16 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
             * loop here, as the new version of the row might not conflict
             * anymore, or the conflicting tuple has actually been deleted.
             */
-           ReleaseBuffer(buffer);
+           ExecClearTuple(mtstate->mt_existing);
+           return false;
+
+       case HeapTupleDeleted:
+           if (IsolationUsesXactSnapshot())
+               ereport(ERROR,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("could not serialize access due to concurrent delete")));
+
+           ExecClearTuple(mtstate->mt_existing);
            return false;
 
        default:
@@ -1442,10 +1464,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
     * snapshot.  This is in line with the way UPDATE deals with newer tuple
     * versions.
     */
-   ExecCheckHeapTupleVisible(estate, &tuple, buffer);
-
-   /* Store target's existing tuple in the state's dedicated slot */
-   ExecStoreBufferHeapTuple(&tuple, mtstate->mt_existing, buffer);
+   ExecCheckHeapTupleVisible(estate, relation, mtstate->mt_existing);
 
    /*
     * Make tuple and any needed join variables available to ExecQual and
@@ -1460,7 +1479,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 
    if (!ExecQual(onConflictSetWhere, econtext))
    {
-       ReleaseBuffer(buffer);
+       ExecClearTuple(mtstate->mt_existing);
        InstrCountFiltered1(&mtstate->ps, 1);
        return true;            /* done with the tuple */
    }
@@ -1500,12 +1519,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
     */
 
    /* Execute UPDATE with projection */
-   *returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
+   *returning = ExecUpdate(mtstate, conflictTid, NULL,
                            mtstate->mt_conflproj, planSlot,
                            &mtstate->mt_epqstate, mtstate->ps.state,
                            canSetTag);
-
-   ReleaseBuffer(buffer);
+   ExecClearTuple(mtstate->mt_existing);
    return true;
 }
 
@@ -1688,6 +1706,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
    estate->es_result_relation_info = partrel;
 
    /* Get the heap tuple out of the given slot. */
+   // PBORKED: this'll leak memory for some slot types
    tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
 
    /*
@@ -1826,7 +1845,7 @@ ExecModifyTable(PlanState *pstate)
    ItemPointer tupleid;
    ItemPointerData tuple_ctid;
    HeapTupleData oldtupdata;
-   HeapTuple   oldtuple;
+   HeapTuple oldtuple;
 
    CHECK_FOR_INTERRUPTS();
 
@@ -1839,7 +1858,7 @@ ExecModifyTable(PlanState *pstate)
     * case it is within a CTE subplan.  Hence this test must be here, not in
     * ExecInitModifyTable.)
     */
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
        elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
 
    /*
@@ -1982,7 +2001,7 @@ ExecModifyTable(PlanState *pstate)
                                                 &isNull);
                    /* shouldn't ever get a null result... */
                    if (isNull)
-                       elog(ERROR, "ctid is NULL");
+                       elog(PANIC, "ctid is NULL");
 
                    tupleid = (ItemPointer) DatumGetPointer(datum);
                    tuple_ctid = *tupleid;  /* be sure we don't free ctid!! */
@@ -2123,6 +2142,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
    mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
    mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
 
+   mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
+
    /* If modifying a partitioned table, initialize the root table info */
    if (node->rootResultRelIndex >= 0)
        mtstate->rootResultRelInfo = estate->es_root_result_relations +
@@ -2190,7 +2211,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
        mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
        mtstate->mt_scans[i] =
            ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
-                                  &TTSOpsHeapTuple);
+                                  table_slot_callbacks(resultRelInfo->ri_RelationDesc));
 
        /* Also let FDWs init themselves for foreign-table result rels */
        if (!resultRelInfo->ri_usesFdwDirectModify &&
@@ -2250,8 +2271,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
    if (update_tuple_routing_needed)
    {
        ExecSetupChildParentMapForSubplan(mtstate);
-       mtstate->mt_root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel),
-                                                        &TTSOpsHeapTuple);
+       mtstate->mt_root_tuple_slot = table_gimmegimmeslot(rel, NULL);
    }
 
    /*
@@ -2344,6 +2364,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
        ExprContext *econtext;
        TupleDesc   relationDesc;
        TupleDesc   tupDesc;
+       const TupleTableSlotOps *tts_cb;
 
        /* insert may only have one plan, inheritance is not expanded */
        Assert(nplans == 1);
@@ -2354,6 +2375,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 
        econtext = mtstate->ps.ps_ExprContext;
        relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
+       tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
 
        /*
         * Initialize slot for the existing tuple.  If we'll be performing
@@ -2364,7 +2386,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
        mtstate->mt_existing =
            ExecInitExtraTupleSlot(mtstate->ps.state,
                                   mtstate->mt_partition_tuple_routing ?
-                                  NULL : relationDesc, &TTSOpsBufferHeapTuple);
+                                  NULL : relationDesc, tts_cb);
 
        /* carried forward solely for the benefit of explain */
        mtstate->mt_excludedtlist = node->exclRelTlist;
@@ -2385,7 +2407,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
        mtstate->mt_conflproj =
            ExecInitExtraTupleSlot(mtstate->ps.state,
                                   mtstate->mt_partition_tuple_routing ?
-                                  NULL : tupDesc, &TTSOpsHeapTuple);
+                                  NULL : tupDesc, tts_cb);
        resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc;
 
        /* build UPDATE SET projection state */
@@ -2488,15 +2510,18 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
            for (i = 0; i < nplans; i++)
            {
                JunkFilter *j;
+               TupleTableSlot *junkresslot;
 
                subplan = mtstate->mt_plans[i]->plan;
                if (operation == CMD_INSERT || operation == CMD_UPDATE)
                    ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
                                        subplan->targetlist);
 
+               junkresslot =
+                   ExecInitExtraTupleSlot(estate, NULL,
+                                          table_slot_callbacks(resultRelInfo->ri_RelationDesc));
                j = ExecInitJunkFilter(subplan->targetlist,
-                                      ExecInitExtraTupleSlot(estate, NULL,
-                                                             &TTSOpsHeapTuple));
+                                      junkresslot);
 
                if (operation == CMD_UPDATE || operation == CMD_DELETE)
                {
@@ -2540,16 +2565,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
        }
    }
 
-   /*
-    * Set up a tuple table slot for use for trigger output tuples. In a plan
-    * containing multiple ModifyTable nodes, all can share one such slot, so
-    * we keep it in the estate. The tuple being inserted doesn't come from a
-    * buffer.
-    */
-   if (estate->es_trig_tuple_slot == NULL)
-       estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                           &TTSOpsHeapTuple);
-
    /*
     * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
     * to estate->es_auxmodifytables so that it will be run to completion by
index 78735fa15bc31ddf094390d9f260a85bca723b9e..5351ec347ff6f51bb8194a578bfcf8844fcc71e6 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/hash.h"
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "access/tsmapi.h"
 #include "executor/executor.h"
 #include "executor/nodeSamplescan.h"
@@ -28,9 +29,7 @@
 
 static TupleTableSlot *SampleNext(SampleScanState *node);
 static void tablesample_init(SampleScanState *scanstate);
-static HeapTuple tablesample_getnext(SampleScanState *scanstate);
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
-                  HeapScanDesc scan);
+static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
 
 /* ----------------------------------------------------------------
  *                     Scan Support
@@ -46,9 +45,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
 {
-   HeapTuple   tuple;
-   TupleTableSlot *slot;
-
    /*
     * if this is first call within a scan, initialize
     */
@@ -58,18 +54,7 @@ SampleNext(SampleScanState *node)
    /*
     * get the next tuple, and store it in our result slot
     */
-   tuple = tablesample_getnext(node);
-
-   slot = node->ss.ss_ScanTupleSlot;
-
-   if (tuple)
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */
-   else
-       ExecClearTuple(slot);
-
-   return slot;
+   return tablesample_getnext(node);
 }
 
 /*
@@ -147,7 +132,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
    /* and create slot with appropriate rowtype */
    ExecInitScanTupleSlot(estate, &scanstate->ss,
                          RelationGetDescr(scanstate->ss.ss_currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
    /*
     * Initialize result type and projection.
@@ -219,7 +204,7 @@ ExecEndSampleScan(SampleScanState *node)
     * close heap scan
     */
    if (node->ss.ss_currentScanDesc)
-       heap_endscan(node->ss.ss_currentScanDesc);
+       table_endscan(node->ss.ss_currentScanDesc);
 }
 
 /* ----------------------------------------------------------------
@@ -234,6 +219,9 @@ ExecReScanSampleScan(SampleScanState *node)
 {
    /* Remember we need to do BeginSampleScan again (if we did it at all) */
    node->begun = false;
+   node->done = false;
+   node->haveblock = false;
+   node->donetuples = 0;
 
    ExecScanReScan(&node->ss);
 }
@@ -255,6 +243,7 @@ tablesample_init(SampleScanState *scanstate)
    int         i;
    ListCell   *arg;
 
+   scanstate->donetuples = 0;
    params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
 
    i = 0;
@@ -319,19 +308,19 @@ tablesample_init(SampleScanState *scanstate)
    if (scanstate->ss.ss_currentScanDesc == NULL)
    {
        scanstate->ss.ss_currentScanDesc =
-           heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
-                                   scanstate->ss.ps.state->es_snapshot,
-                                   0, NULL,
-                                   scanstate->use_bulkread,
-                                   allow_sync,
-                                   scanstate->use_pagemode);
+           table_beginscan_sampling(scanstate->ss.ss_currentRelation,
+                                      scanstate->ss.ps.state->es_snapshot,
+                                      0, NULL,
+                                      scanstate->use_bulkread,
+                                      allow_sync,
+                                      scanstate->use_pagemode);
    }
    else
    {
-       heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
-                              scanstate->use_bulkread,
-                              allow_sync,
-                              scanstate->use_pagemode);
+       table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+                                 scanstate->use_bulkread,
+                                 allow_sync,
+                                 scanstate->use_pagemode);
    }
 
    pfree(params);
@@ -342,224 +331,49 @@ tablesample_init(SampleScanState *scanstate)
 
 /*
  * Get next tuple from TABLESAMPLE method.
- *
- * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
- * perhaps be better to refactor to share more code.
  */
-static HeapTuple
+static TupleTableSlot*
 tablesample_getnext(SampleScanState *scanstate)
 {
-   TsmRoutine *tsm = scanstate->tsmroutine;
-   HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
-   HeapTuple   tuple = &(scan->rs_ctup);
-   Snapshot    snapshot = scan->rs_snapshot;
-   bool        pagemode = scan->rs_pageatatime;
-   BlockNumber blockno;
-   Page        page;
-   bool        all_visible;
-   OffsetNumber maxoffset;
-
-   if (!scan->rs_inited)
-   {
-       /*
-        * return null immediately if relation is empty
-        */
-       if (scan->rs_nblocks == 0)
-       {
-           Assert(!BufferIsValid(scan->rs_cbuf));
-           tuple->t_data = NULL;
-           return NULL;
-       }
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           if (!BlockNumberIsValid(blockno))
-           {
-               tuple->t_data = NULL;
-               return NULL;
-           }
-       }
-       else
-           blockno = scan->rs_startblock;
-       Assert(blockno < scan->rs_nblocks);
-       heapgetpage(scan, blockno);
-       scan->rs_inited = true;
-   }
-   else
-   {
-       /* continue from previously returned page/tuple */
-       blockno = scan->rs_cblock;  /* current page */
-   }
+   TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
+   TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
 
-   /*
-    * When not using pagemode, we must lock the buffer during tuple
-    * visibility checks.
-    */
-   if (!pagemode)
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+   ExecClearTuple(slot);
 
-   page = (Page) BufferGetPage(scan->rs_cbuf);
-   all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-   maxoffset = PageGetMaxOffsetNumber(page);
+   if (scanstate->done)
+       return NULL;
 
    for (;;)
    {
-       OffsetNumber tupoffset;
-       bool        finished;
-
-       CHECK_FOR_INTERRUPTS();
-
-       /* Ask the tablesample method which tuples to check on this page. */
-       tupoffset = tsm->NextSampleTuple(scanstate,
-                                        blockno,
-                                        maxoffset);
-
-       if (OffsetNumberIsValid(tupoffset))
+       if (!scanstate->haveblock)
        {
-           ItemId      itemid;
-           bool        visible;
-
-           /* Skip invalid tuple pointers. */
-           itemid = PageGetItemId(page, tupoffset);
-           if (!ItemIdIsNormal(itemid))
-               continue;
-
-           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-           tuple->t_len = ItemIdGetLength(itemid);
-           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-           if (all_visible)
-               visible = true;
-           else
-               visible = SampleTupleVisible(tuple, tupoffset, scan);
-
-           /* in pagemode, heapgetpage did this for us */
-           if (!pagemode)
-               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
-                                               scan->rs_cbuf, snapshot);
-
-           if (visible)
-           {
-               /* Found visible tuple, return it. */
-               if (!pagemode)
-                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-               break;
-           }
-           else
+           if (!table_scan_sample_next_block(scan, scanstate))
            {
-               /* Try next tuple from same page. */
-               continue;
-           }
-       }
+               scanstate->haveblock = false;
+               scanstate->done = true;
 
-       /*
-        * if we get here, it means we've exhausted the items on this page and
-        * it's time to move to the next.
-        */
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+               /* exhausted relation */
+               return NULL;
+           }
 
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           Assert(!scan->rs_syncscan);
-           finished = !BlockNumberIsValid(blockno);
+           scanstate->haveblock = true;
        }
-       else
-       {
-           /* Without NextSampleBlock, just do a plain forward seqscan. */
-           blockno++;
-           if (blockno >= scan->rs_nblocks)
-               blockno = 0;
 
+       if (!table_scan_sample_next_tuple(scan, scanstate, slot))
+       {
            /*
-            * Report our new scan position for synchronization purposes.
-            *
-            * Note: we do this before checking for end of scan so that the
-            * final state of the position hint is back at the start of the
-            * rel.  That's not strictly necessary, but otherwise when you run
-            * the same query multiple times the starting position would shift
-            * a little bit backwards on every invocation, which is confusing.
-            * We don't guarantee any specific ordering in general, though.
+            * If we get here, it means we've exhausted the items on this page
+            * and it's time to move to the next.
             */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, blockno);
-
-           finished = (blockno == scan->rs_startblock);
+           scanstate->haveblock = false;
+           continue;
        }
 
-       /*
-        * Reached end of scan?
-        */
-       if (finished)
-       {
-           if (BufferIsValid(scan->rs_cbuf))
-               ReleaseBuffer(scan->rs_cbuf);
-           scan->rs_cbuf = InvalidBuffer;
-           scan->rs_cblock = InvalidBlockNumber;
-           tuple->t_data = NULL;
-           scan->rs_inited = false;
-           return NULL;
-       }
-
-       Assert(blockno < scan->rs_nblocks);
-       heapgetpage(scan, blockno);
-
-       /* Re-establish state for new page */
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-       page = (Page) BufferGetPage(scan->rs_cbuf);
-       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-       maxoffset = PageGetMaxOffsetNumber(page);
+       /* Found visible tuple, return it. */
+       break;
    }
 
-   /* Count successfully-fetched tuples as heap fetches */
-   pgstat_count_heap_getnext(scan->rs_rd);
-
-   return &(scan->rs_ctup);
-}
+   scanstate->donetuples++;
 
-/*
- * Check visibility of the tuple.
- */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   if (scan->rs_pageatatime)
-   {
-       /*
-        * In pageatatime mode, heapgetpage() already did visibility checks,
-        * so just look at the info it left in rs_vistuples[].
-        *
-        * We use a binary search over the known-sorted array.  Note: we could
-        * save some effort if we insisted that NextSampleTuple select tuples
-        * in increasing order, but it's not clear that there would be enough
-        * gain to justify the restriction.
-        */
-       int         start = 0,
-                   end = scan->rs_ntuples - 1;
-
-       while (start <= end)
-       {
-           int         mid = (start + end) / 2;
-           OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-           if (tupoffset == curoffset)
-               return true;
-           else if (tupoffset < curoffset)
-               end = mid - 1;
-           else
-               start = mid + 1;
-       }
-
-       return false;
-   }
-   else
-   {
-       /* Otherwise, we have to check the tuple individually. */
-       return HeapTupleSatisfiesVisibility(tuple,
-                                           scan->rs_snapshot,
-                                           scan->rs_cbuf);
-   }
+   return slot;
 }
index 55377add6efaad6712d610e3f9af51d1ddaac210..a6e89bf2af6c2c8cf2995608b3a95e19b84745dd 100644 (file)
@@ -28,6 +28,7 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/tableam.h"
 #include "executor/execdebug.h"
 #include "executor/nodeSeqscan.h"
 #include "utils/rel.h"
@@ -48,8 +49,7 @@ static TupleTableSlot *SeqNext(SeqScanState *node);
 static TupleTableSlot *
 SeqNext(SeqScanState *node)
 {
-   HeapTuple   tuple;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
    EState     *estate;
    ScanDirection direction;
    TupleTableSlot *slot;
@@ -68,34 +68,16 @@ SeqNext(SeqScanState *node)
         * We reach here if the scan is not parallel, or if we're serially
         * executing a scan that was planned to be parallel.
         */
-       scandesc = heap_beginscan(node->ss.ss_currentRelation,
-                                 estate->es_snapshot,
-                                 0, NULL);
+       scandesc = table_beginscan(node->ss.ss_currentRelation,
+                                    estate->es_snapshot,
+                                    0, NULL);
        node->ss.ss_currentScanDesc = scandesc;
    }
 
    /*
     * get the next tuple from the table
     */
-   tuple = heap_getnext(scandesc, direction);
-
-   /*
-    * save the tuple and the buffer returned to us by the access methods in
-    * our scan tuple slot and return the slot.  Note: we pass 'false' because
-    * tuples returned by heap_getnext() are pointers onto disk pages and were
-    * not created with palloc() and so should not be pfree()'d.  Note also
-    * that ExecStoreHeapTuple will increment the refcount of the buffer; the
-    * refcount will not be dropped until the tuple table slot is cleared.
-    */
-   if (tuple)
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->rs_cbuf);    /* buffer associated
-                                                        * with this tuple */
-   else
-       ExecClearTuple(slot);
-
-   return slot;
+   return table_scan_getnextslot(scandesc, direction, slot);
 }
 
 /*
@@ -173,7 +155,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
    /* and create slot with the appropriate rowtype */
    ExecInitScanTupleSlot(estate, &scanstate->ss,
                          RelationGetDescr(scanstate->ss.ss_currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
    /*
     * Initialize result type and projection.
@@ -199,7 +181,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
 void
 ExecEndSeqScan(SeqScanState *node)
 {
-   HeapScanDesc scanDesc;
+   TableScanDesc scanDesc;
 
    /*
     * get information from node
@@ -222,7 +204,7 @@ ExecEndSeqScan(SeqScanState *node)
     * close heap scan
     */
    if (scanDesc != NULL)
-       heap_endscan(scanDesc);
+       table_endscan(scanDesc);
 }
 
 /* ----------------------------------------------------------------
@@ -239,13 +221,13 @@ ExecEndSeqScan(SeqScanState *node)
 void
 ExecReScanSeqScan(SeqScanState *node)
 {
-   HeapScanDesc scan;
+   TableScanDesc scan;
 
    scan = node->ss.ss_currentScanDesc;
 
    if (scan != NULL)
-       heap_rescan(scan,       /* scan desc */
-                   NULL);      /* new scan keys */
+       table_rescan(scan,  /* scan desc */
+                      NULL);   /* new scan keys */
 
    ExecScanReScan((ScanState *) node);
 }
@@ -268,7 +250,7 @@ ExecSeqScanEstimate(SeqScanState *node,
 {
    EState     *estate = node->ss.ps.state;
 
-   node->pscan_len = heap_parallelscan_estimate(estate->es_snapshot);
+   node->pscan_len = table_parallelscan_estimate(estate->es_snapshot);
    shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
    shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
@@ -284,15 +266,15 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
                         ParallelContext *pcxt)
 {
    EState     *estate = node->ss.ps.state;
-   ParallelHeapScanDesc pscan;
+   ParallelTableScanDesc pscan;
 
    pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
-   heap_parallelscan_initialize(pscan,
-                                node->ss.ss_currentRelation,
-                                estate->es_snapshot);
+   table_parallelscan_initialize(pscan,
+                                 node->ss.ss_currentRelation,
+                                 estate->es_snapshot);
    shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
    node->ss.ss_currentScanDesc =
-       heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+       table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
 }
 
 /* ----------------------------------------------------------------
@@ -305,9 +287,10 @@ void
 ExecSeqScanReInitializeDSM(SeqScanState *node,
                           ParallelContext *pcxt)
 {
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
+   ParallelTableScanDesc pscan;
 
-   heap_parallelscan_reinitialize(scan->rs_parallel);
+   pscan = node->ss.ss_currentScanDesc->rs_parallel;
+   table_parallelscan_reinitialize(pscan);
 }
 
 /* ----------------------------------------------------------------
@@ -320,9 +303,9 @@ void
 ExecSeqScanInitializeWorker(SeqScanState *node,
                            ParallelWorkerContext *pwcxt)
 {
-   ParallelHeapScanDesc pscan;
+   ParallelTableScanDesc pscan;
 
    pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
    node->ss.ss_currentScanDesc =
-       heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+       table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
 }
index afec097bc844ebf03069ae3dce295a948483d6a7..c8bcf97f1de6a5365c5c2f6772c05683ac1e7296 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "postgres.h"
 
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "catalog/pg_type.h"
 #include "executor/execdebug.h"
@@ -306,9 +307,7 @@ TidNext(TidScanState *node)
    ScanDirection direction;
    Snapshot    snapshot;
    Relation    heapRelation;
-   HeapTuple   tuple;
    TupleTableSlot *slot;
-   Buffer      buffer = InvalidBuffer;
    ItemPointerData *tidList;
    int         numTids;
    bool        bBackward;
@@ -331,12 +330,6 @@ TidNext(TidScanState *node)
    tidList = node->tss_TidList;
    numTids = node->tss_NumTids;
 
-   /*
-    * We use node->tss_htup as the tuple pointer; note this can't just be a
-    * local variable here, as the scan tuple slot will keep a pointer to it.
-    */
-   tuple = &(node->tss_htup);
-
    /*
     * Initialize or advance scan position, depending on direction.
     */
@@ -364,7 +357,7 @@ TidNext(TidScanState *node)
 
    while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
    {
-       tuple->t_self = tidList[node->tss_TidPtr];
+       ItemPointerData tid = tidList[node->tss_TidPtr];
 
        /*
         * For WHERE CURRENT OF, the tuple retrieved from the cursor might
@@ -372,28 +365,11 @@ TidNext(TidScanState *node)
         * current according to our snapshot.
         */
        if (node->tss_isCurrentOf)
-           heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
-
-       if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
-       {
-           /*
-            * Store the scanned tuple in the scan tuple slot of the scan
-            * state.  Eventually we will only do this and not return a tuple.
-            */
-           ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                    slot,  /* slot to store in */
-                                    buffer);   /* buffer associated with
-                                                * tuple */
-
-           /*
-            * At this point we have an extra pin on the buffer, because
-            * ExecStoreHeapTuple incremented the pin count. Drop our local
-            * pin.
-            */
-           ReleaseBuffer(buffer);
+           table_get_latest_tid(heapRelation, snapshot, &tid);
 
+       if (table_fetch_row_version(heapRelation, &tid, snapshot, slot, NULL))
            return slot;
-       }
+
        /* Bad TID or failed snapshot qual; try next */
        if (bBackward)
            node->tss_TidPtr--;
@@ -544,7 +520,7 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
     */
    ExecInitScanTupleSlot(estate, &tidstate->ss,
                          RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(currentRelation));
 
    /*
     * Initialize result type and projection.
index ad726676d8ccc8dba291db98e29cde74aa26e1e3..34664e76d1b3f8bc4bb93b308be6a71a6c4745bc 100644 (file)
@@ -762,7 +762,7 @@ HeapTuple
 SPI_copytuple(HeapTuple tuple)
 {
    MemoryContext oldcxt;
-   HeapTuple   ctuple;
+   HeapTuple ctuple;
 
    if (tuple == NULL)
    {
@@ -983,7 +983,7 @@ char *
 SPI_gettype(TupleDesc tupdesc, int fnumber)
 {
    Oid         typoid;
-   HeapTuple   typeTuple;
+   HeapTuple typeTuple;
    char       *result;
 
    SPI_result = 0;
@@ -1844,7 +1844,7 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self)
        tuptable->free = tuptable->alloced;
        tuptable->alloced += tuptable->free;
        tuptable->vals = (HeapTuple *) repalloc_huge(tuptable->vals,
-                                                    tuptable->alloced * sizeof(HeapTuple));
+                                                       tuptable->alloced * sizeof(HeapTuple));
    }
 
    tuptable->vals[tuptable->alloced - tuptable->free] =
index e47ef491928ea8c529418dde343c240fed8075a6..e2b596cf74e1750dfcef4508866654904c64e4a7 100644 (file)
@@ -59,6 +59,8 @@ tqueueReceiveSlot(TupleTableSlot *slot, DestReceiver *self)
    bool        should_free;
 
    /* Send the tuple itself. */
+   // PBORKED: this shouldn't rely on heaptuples. If we need it to be tuple
+   // formed, it should be a minimal tuple.
    tuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
    result = shm_mq_send(tqueue->queue, tuple->t_len, tuple->t_data, false);
 
index db49968409688c6212e3f53ab86b36bdc6e13679..2848634e009117b6fbf2e96132ec7c90a77d93f6 100644 (file)
@@ -3332,6 +3332,7 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode)
    COPY_NODE_FIELD(options);
    COPY_SCALAR_FIELD(oncommit);
    COPY_STRING_FIELD(tablespacename);
+   COPY_STRING_FIELD(accessMethod);
    COPY_SCALAR_FIELD(if_not_exists);
 }
 
index a570ac0aabe3223d5c4a04abf82a258fc2fe06ed..58e8c7cc5f3d8f3e043801fa033d0adcdab66239 100644 (file)
@@ -21,6 +21,7 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/nbtree.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/xlog.h"
@@ -271,7 +272,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
            info->amsearchnulls = amroutine->amsearchnulls;
            info->amcanparallel = amroutine->amcanparallel;
            info->amhasgettuple = (amroutine->amgettuple != NULL);
-           info->amhasgetbitmap = (amroutine->amgetbitmap != NULL);
+           info->amhasgetbitmap = ((amroutine->amgetbitmap != NULL)
+                                   && (relation->rd_tableamroutine->scan_bitmap_pagescan != NULL));
            info->amcostestimate = amroutine->amcostestimate;
            Assert(info->amcostestimate != NULL);
 
index 2c2208ffb724d7f6864f8ad020bb128c253b3324..71bedf34f3355799531803517be434885dbaecd1 100644 (file)
@@ -48,6 +48,7 @@
 #include <ctype.h>
 #include <limits.h>
 
+#include "access/tableam.h"
 #include "catalog/index.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_am.h"
@@ -322,6 +323,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <str>        OptSchemaName
 %type <list>   OptSchemaEltList
 
+%type <chr>        am_type
+
 %type <boolean> TriggerForSpec TriggerForType
 %type <ival>   TriggerActionTime
 %type <list>   TriggerEvents TriggerOneEvent
@@ -337,7 +340,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
 %type <str>        copy_file_name
                database_name access_method_clause access_method attr_name
-               name cursor_name file_name
+               table_access_method_clause name cursor_name file_name
                index_name opt_index_name cluster_index_specification
 
 %type <list>   func_name handler_name qual_Op qual_all_Op subquery_Op
@@ -3170,7 +3173,8 @@ copy_generic_opt_arg_list_item:
  *****************************************************************************/
 
 CreateStmt:    CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
-           OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace
+           OptInherit OptPartitionSpec table_access_method_clause OptWith
+           OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $4->relpersistence = $2;
@@ -3180,15 +3184,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->partspec = $9;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $10;
-                   n->oncommit = $11;
-                   n->tablespacename = $12;
+                   n->accessMethod = $10;
+                   n->options = $11;
+                   n->oncommit = $12;
+                   n->tablespacename = $13;
                    n->if_not_exists = false;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '('
-           OptTableElementList ')' OptInherit OptPartitionSpec OptWith
-           OnCommitOption OptTableSpace
+           OptTableElementList ')' OptInherit OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $7->relpersistence = $2;
@@ -3198,15 +3203,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->partspec = $12;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $13;
-                   n->oncommit = $14;
-                   n->tablespacename = $15;
+                   n->accessMethod = $13;
+                   n->options = $14;
+                   n->oncommit = $15;
+                   n->tablespacename = $16;
                    n->if_not_exists = true;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE qualified_name OF any_name
-           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
-           OptTableSpace
+           OptTypedTableElementList OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $4->relpersistence = $2;
@@ -3217,15 +3223,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->ofTypename = makeTypeNameFromNameList($6);
                    n->ofTypename->location = @6;
                    n->constraints = NIL;
-                   n->options = $9;
-                   n->oncommit = $10;
-                   n->tablespacename = $11;
+                   n->accessMethod = $9;
+                   n->options = $10;
+                   n->oncommit = $11;
+                   n->tablespacename = $12;
                    n->if_not_exists = false;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name
-           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
-           OptTableSpace
+           OptTypedTableElementList OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $7->relpersistence = $2;
@@ -3236,15 +3243,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->ofTypename = makeTypeNameFromNameList($9);
                    n->ofTypename->location = @9;
                    n->constraints = NIL;
-                   n->options = $12;
-                   n->oncommit = $13;
-                   n->tablespacename = $14;
+                   n->accessMethod = $12;
+                   n->options = $13;
+                   n->oncommit = $14;
+                   n->tablespacename = $15;
                    n->if_not_exists = true;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
-           OptTypedTableElementList PartitionBoundSpec OptPartitionSpec OptWith
-           OnCommitOption OptTableSpace
+           OptTypedTableElementList PartitionBoundSpec OptPartitionSpec
+           table_access_method_clause OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $4->relpersistence = $2;
@@ -3255,15 +3263,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->partspec = $10;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $11;
-                   n->oncommit = $12;
-                   n->tablespacename = $13;
+                   n->accessMethod = $11;
+                   n->options = $12;
+                   n->oncommit = $13;
+                   n->tablespacename = $14;
                    n->if_not_exists = false;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF
            qualified_name OptTypedTableElementList PartitionBoundSpec OptPartitionSpec
-           OptWith OnCommitOption OptTableSpace
+           table_access_method_clause OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $7->relpersistence = $2;
@@ -3274,9 +3283,10 @@ CreateStmt:  CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                    n->partspec = $13;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $14;
-                   n->oncommit = $15;
-                   n->tablespacename = $16;
+                   n->accessMethod = $14;
+                   n->options = $15;
+                   n->oncommit = $16;
+                   n->tablespacename = $17;
                    n->if_not_exists = true;
                    $$ = (Node *)n;
                }
@@ -3921,6 +3931,12 @@ part_elem: ColId opt_collate opt_class
                    $$ = n;
                }
        ;
+
+table_access_method_clause:
+           USING access_method                 { $$ = $2; }
+           | /*EMPTY*/                         { $$ = NULL; }
+       ;
+
 /* WITHOUT OIDS is legacy only */
 OptWith:
            WITH reloptions             { $$ = $2; }
@@ -4026,14 +4042,16 @@ CreateAsStmt:
        ;
 
 create_as_target:
-           qualified_name opt_column_list OptWith OnCommitOption OptTableSpace
+           qualified_name opt_column_list table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                {
                    $$ = makeNode(IntoClause);
                    $$->rel = $1;
                    $$->colNames = $2;
-                   $$->options = $3;
-                   $$->onCommit = $4;
-                   $$->tableSpaceName = $5;
+                   $$->accessMethod = $3;
+                   $$->options = $4;
+                   $$->onCommit = $5;
+                   $$->tableSpaceName = $6;
                    $$->viewQuery = NULL;
                    $$->skipData = false;       /* might get changed later */
                }
@@ -4083,14 +4101,15 @@ CreateMatViewStmt:
        ;
 
 create_mv_target:
-           qualified_name opt_column_list opt_reloptions OptTableSpace
+           qualified_name opt_column_list table_access_method_clause opt_reloptions OptTableSpace
                {
                    $$ = makeNode(IntoClause);
                    $$->rel = $1;
                    $$->colNames = $2;
-                   $$->options = $3;
+                   $$->accessMethod = $3;
+                   $$->options = $4;
                    $$->onCommit = ONCOMMIT_NOOP;
-                   $$->tableSpaceName = $4;
+                   $$->tableSpaceName = $5;
                    $$->viewQuery = NULL;       /* filled at analysis time */
                    $$->skipData = false;       /* might get changed later */
                }
@@ -5298,16 +5317,21 @@ row_security_cmd:
  *
  *****************************************************************************/
 
-CreateAmStmt: CREATE ACCESS METHOD name TYPE_P INDEX HANDLER handler_name
+CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name
                {
                    CreateAmStmt *n = makeNode(CreateAmStmt);
                    n->amname = $4;
                    n->handler_name = $8;
-                   n->amtype = AMTYPE_INDEX;
+                   n->amtype = $6;
                    $$ = (Node *) n;
                }
        ;
 
+am_type:
+           INDEX           { $$ = AMTYPE_INDEX; }
+       |   TABLE           { $$ = AMTYPE_TABLE; }
+       ;
+
 /*****************************************************************************
  *
  *     QUERIES :
index eeaab2f4c9b972a89aa8afb817aed8b72016d49f..cb9e252f417ceaa22ffe46cff5cadef0d1bdb6a9 100644 (file)
@@ -13,6 +13,7 @@
 */
 #include "postgres.h"
 
+#include "access/tableam.h"
 #include "catalog/partition.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_type.h"
@@ -1200,12 +1201,10 @@ check_default_partition_contents(Relation parent, Relation default_rel,
        Expr       *constr;
        Expr       *partition_constraint;
        EState     *estate;
-       HeapTuple   tuple;
        ExprState  *partqualstate = NULL;
        Snapshot    snapshot;
-       TupleDesc   tupdesc;
        ExprContext *econtext;
-       HeapScanDesc scan;
+       TableScanDesc scan;
        MemoryContext oldCxt;
        TupleTableSlot *tupslot;
 
@@ -1252,7 +1251,6 @@ check_default_partition_contents(Relation parent, Relation default_rel,
            continue;
        }
 
-       tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel));
        constr = linitial(def_part_constraints);
        partition_constraint = (Expr *)
            map_partition_varattnos((List *) constr,
@@ -1264,8 +1262,8 @@ check_default_partition_contents(Relation parent, Relation default_rel,
 
        econtext = GetPerTupleExprContext(estate);
        snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(part_rel, snapshot, 0, NULL);
-       tupslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple);
+       scan = table_beginscan(part_rel, snapshot, 0, NULL);
+       tupslot = table_gimmegimmeslot(parent, &estate->es_tupleTable);
 
        /*
         * Switch to per-tuple memory context and reset it for each tuple
@@ -1273,9 +1271,8 @@ check_default_partition_contents(Relation parent, Relation default_rel,
         */
        oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
        {
-           ExecStoreHeapTuple(tuple, tupslot, false);
            econtext->ecxt_scantuple = tupslot;
 
            if (!ExecCheck(partqualstate, econtext))
@@ -1289,7 +1286,7 @@ check_default_partition_contents(Relation parent, Relation default_rel,
        }
 
        MemoryContextSwitchTo(oldCxt);
-       heap_endscan(scan);
+       table_endscan(scan);
        UnregisterSnapshot(snapshot);
        ExecDropSingleTupleTableSlot(tupslot);
        FreeExecutorState(estate);
index 2d5086d4062cf3240e54b43ca16e06cc3c9fdde9..cb7ee7d53a7b42225ea659f6c969ff568a2641db 100644 (file)
@@ -69,6 +69,7 @@
 #include "access/htup_details.h"
 #include "access/multixact.h"
 #include "access/reloptions.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/dependency.h"
@@ -1866,7 +1867,7 @@ get_database_list(void)
 {
    List       *dblist = NIL;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
    MemoryContext resultcxt;
 
@@ -1884,9 +1885,9 @@ get_database_list(void)
    (void) GetTransactionSnapshot();
 
    rel = heap_open(DatabaseRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
+   scan = table_beginscan_catalog(rel, 0, NULL);
 
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
    {
        Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup);
        avw_dbase  *avdb;
@@ -1913,7 +1914,7 @@ get_database_list(void)
        MemoryContextSwitchTo(oldcxt);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 
    CommitTransactionCommand();
@@ -1932,7 +1933,7 @@ do_autovacuum(void)
 {
    Relation    classRel;
    HeapTuple   tuple;
-   HeapScanDesc relScan;
+   TableScanDesc relScan;
    Form_pg_database dbForm;
    List       *table_oids = NIL;
    List       *orphan_oids = NIL;
@@ -2044,13 +2045,13 @@ do_autovacuum(void)
     * wide tables there might be proportionally much more activity in the
     * TOAST table than in its parent.
     */
-   relScan = heap_beginscan_catalog(classRel, 0, NULL);
+   relScan = table_beginscan_catalog(classRel, 0, NULL);
 
    /*
     * On the first pass, we collect main tables to vacuum, and also the main
     * table relid to TOAST relid mapping.
     */
-   while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
        PgStat_StatTabEntry *tabentry;
@@ -2133,7 +2134,7 @@ do_autovacuum(void)
        }
    }
 
-   heap_endscan(relScan);
+   table_endscan(relScan);
 
    /* second pass: check TOAST tables */
    ScanKeyInit(&key,
@@ -2141,8 +2142,8 @@ do_autovacuum(void)
                BTEqualStrategyNumber, F_CHAREQ,
                CharGetDatum(RELKIND_TOASTVALUE));
 
-   relScan = heap_beginscan_catalog(classRel, 1, &key);
-   while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
+   relScan = table_beginscan_catalog(classRel, 1, &key);
+   while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL)
    {
        Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
        PgStat_StatTabEntry *tabentry;
@@ -2188,7 +2189,7 @@ do_autovacuum(void)
            table_oids = lappend_oid(table_oids, relid);
    }
 
-   heap_endscan(relScan);
+   table_endscan(relScan);
    heap_close(classRel, AccessShareLock);
 
    /*
index 8676088e57d9f3375578db2de7159f5f58352596..7762dbc44b944474566899fed64ba1a50d9b9102 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/twophase_rmgr.h"
 #include "access/xact.h"
@@ -1206,7 +1207,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
    HTAB       *htab;
    HASHCTL     hash_ctl;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
    Snapshot    snapshot;
 
@@ -1221,8 +1222,8 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
 
    rel = heap_open(catalogid, AccessShareLock);
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
-   while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan(rel, snapshot, 0, NULL);
+   while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
    {
        Oid         thisoid;
        bool        isnull;
@@ -1234,7 +1235,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
 
        (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
    }
-   heap_endscan(scan);
+   table_endscan(scan);
    UnregisterSnapshot(snapshot);
    heap_close(rel, AccessShareLock);
 
index 3a84d8ca86a2843b42ec7c5e458a9410a9681ef9..8d452474c152064c130a66185d32a578cd90481d 100644 (file)
@@ -24,6 +24,7 @@
 #include "access/heapam.h"
 #include "access/htup.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/xact.h"
 
 #include "catalog/pg_subscription.h"
@@ -118,7 +119,7 @@ get_subscription_list(void)
 {
    List       *res = NIL;
    Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    HeapTuple   tup;
    MemoryContext resultcxt;
 
@@ -136,9 +137,9 @@ get_subscription_list(void)
    (void) GetTransactionSnapshot();
 
    rel = heap_open(SubscriptionRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
+   scan = table_beginscan_catalog(rel, 0, NULL);
 
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
    {
        Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
        Subscription *sub;
@@ -164,7 +165,7 @@ get_subscription_list(void)
        MemoryContextSwitchTo(oldcxt);
    }
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(rel, AccessShareLock);
 
    CommitTransactionCommand();
index 363ddf4505ef63ed9466a7eaa2167e2525dc65b3..363b82e1b5f4839755798d1e1a06c2ed572c5b44 100644 (file)
@@ -376,7 +376,7 @@ static void
 SnapBuildFreeSnapshot(Snapshot snap)
 {
    /* make sure we don't get passed an external snapshot */
-   Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+   Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY);
 
    /* make sure nobody modified our snapshot */
    Assert(snap->curcid == FirstCommandId);
@@ -434,7 +434,7 @@ void
 SnapBuildSnapDecRefcount(Snapshot snap)
 {
    /* make sure we don't get passed an external snapshot */
-   Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+   Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY);
 
    /* make sure nobody modified our snapshot */
    Assert(snap->curcid == FirstCommandId);
@@ -476,7 +476,7 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
 
    snapshot = MemoryContextAllocZero(builder->context, ssize);
 
-   snapshot->satisfies = HeapTupleSatisfiesHistoricMVCC;
+   snapshot->visibility_type = HISTORIC_MVCC_VISIBILITY;
 
    /*
     * We misuse the original meaning of SnapshotData's xip and subxip fields
index 893f1f008edf248362af7e2c1cd4bc0dcb86c7b9..247e03aec4b668242027851063fb18641ec491be 100644 (file)
@@ -27,6 +27,7 @@
 #include "pgstat.h"
 #include "funcapi.h"
 
+#include "access/tableam.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 
@@ -211,11 +212,6 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel)
 
    estate->es_output_cid = GetCurrentCommandId(true);
 
-   /* Triggers might need a slot */
-   if (resultRelInfo->ri_TrigDesc)
-       estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                           &TTSOpsVirtual);
-
    /* Prepare to catch AFTER triggers. */
    AfterTriggerBeginQuery();
 
@@ -718,10 +714,8 @@ apply_handle_update(StringInfo s)
    estate = create_estate_for_relation(rel);
    remoteslot = ExecInitExtraTupleSlot(estate,
                                        RelationGetDescr(rel->localrel),
-                                       &TTSOpsHeapTuple);
-   localslot = ExecInitExtraTupleSlot(estate,
-                                      RelationGetDescr(rel->localrel),
-                                      &TTSOpsHeapTuple);
+                                       &TTSOpsVirtual);
+   localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable);
    EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
 
    PushActiveSnapshot(GetTransactionSnapshot());
@@ -839,9 +833,7 @@ apply_handle_delete(StringInfo s)
    remoteslot = ExecInitExtraTupleSlot(estate,
                                        RelationGetDescr(rel->localrel),
                                        &TTSOpsVirtual);
-   localslot = ExecInitExtraTupleSlot(estate,
-                                      RelationGetDescr(rel->localrel),
-                                      &TTSOpsHeapTuple);
+   localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable);
    EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
 
    PushActiveSnapshot(GetTransactionSnapshot());
index 106194795081e1626143f4470edb89cd44e4e17a..2001190e27c21a41f243661d2ce261d0b1c49d78 100644 (file)
@@ -17,6 +17,7 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
@@ -424,8 +425,9 @@ DefineQueryRewrite(const char *rulename,
        if (event_relation->rd_rel->relkind != RELKIND_VIEW &&
            event_relation->rd_rel->relkind != RELKIND_MATVIEW)
        {
-           HeapScanDesc scanDesc;
+           TableScanDesc scanDesc;
            Snapshot    snapshot;
+           TupleTableSlot *slot;
 
            if (event_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
@@ -440,13 +442,15 @@ DefineQueryRewrite(const char *rulename,
                                RelationGetRelationName(event_relation))));
 
            snapshot = RegisterSnapshot(GetLatestSnapshot());
-           scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL);
-           if (heap_getnext(scanDesc, ForwardScanDirection) != NULL)
+           scanDesc = table_beginscan(event_relation, snapshot, 0, NULL);
+           slot = table_gimmegimmeslot(event_relation, NULL);
+           if (table_scan_getnextslot(scanDesc, ForwardScanDirection, slot))
                ereport(ERROR,
                        (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                         errmsg("could not convert table \"%s\" to a view because it is not empty",
                                RelationGetRelationName(event_relation))));
-           heap_endscan(scanDesc);
+           ExecDropSingleTupleTableSlot(slot);
+           table_endscan(scanDesc);
            UnregisterSnapshot(snapshot);
 
            if (event_relation->rd_rel->relhastriggers)
index e8390311d03fdd7f4577311fe7f71163fa38bf43..2960e21340a546b847bd9b7ade94e9dbe4a24231 100644 (file)
 #include "access/htup_details.h"
 #include "access/slru.h"
 #include "access/subtrans.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/twophase.h"
 #include "access/twophase_rmgr.h"
index dbe67cdb4cb81ccf82cd233e8a6d7fa4cce0128a..89aac13c8024b118b804221ab51ea7a8b46ff6a0 100644 (file)
@@ -418,3 +418,4 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(internal);
 PSEUDOTYPE_DUMMY_IO_FUNCS(opaque);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
 PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);
+PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler);
index cdda860e73a379d7563c8bb6d24cb2e3a95820a1..747602b5716ad2287a111dfe6e84fa2479dcc8b4 100644 (file)
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "catalog/pg_collation.h"
@@ -191,7 +192,7 @@ static int  ri_constraint_cache_valid_count = 0;
  * ----------
  */
 static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
-                 HeapTuple old_row,
+                 TupleTableSlot *oldslot,
                  const RI_ConstraintInfo *riinfo);
 static Datum ri_restrict(TriggerData *trigdata, bool is_no_action);
 static Datum ri_setnull(TriggerData *trigdata);
@@ -204,12 +205,12 @@ static void ri_GenerateQual(StringInfo buf,
                Oid opoid,
                const char *rightop, Oid rightoptype);
 static void ri_GenerateQualCollation(StringInfo buf, Oid collation);
-static int ri_NullCheck(TupleDesc tupdesc, HeapTuple tup,
+static int ri_NullCheck(TupleDesc tupdesc, TupleTableSlot *slot,
             const RI_ConstraintInfo *riinfo, bool rel_is_pk);
 static void ri_BuildQueryKey(RI_QueryKey *key,
                 const RI_ConstraintInfo *riinfo,
                 int32 constr_queryno);
-static bool ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
+static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
             const RI_ConstraintInfo *riinfo, bool rel_is_pk);
 static bool ri_AttributesEqual(Oid eq_opr, Oid typeid,
                   Datum oldvalue, Datum newvalue);
@@ -231,14 +232,14 @@ static SPIPlanPtr ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes,
 static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo,
                RI_QueryKey *qkey, SPIPlanPtr qplan,
                Relation fk_rel, Relation pk_rel,
-               HeapTuple old_tuple, HeapTuple new_tuple,
+               TupleTableSlot *oldslot, TupleTableSlot *newslot,
                bool detectNewRows, int expect_OK);
-static void ri_ExtractValues(Relation rel, HeapTuple tup,
+static void ri_ExtractValues(Relation rel, TupleTableSlot *slot,
                 const RI_ConstraintInfo *riinfo, bool rel_is_pk,
                 Datum *vals, char *nulls);
 static void ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                   Relation pk_rel, Relation fk_rel,
-                  HeapTuple violator, TupleDesc tupdesc,
+                  TupleTableSlot *violator, TupleDesc tupdesc,
                   int queryno) pg_attribute_noreturn();
 
 
@@ -254,8 +255,11 @@ RI_FKey_check(TriggerData *trigdata)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
+#if 0
    HeapTuple   new_row;
    Buffer      new_row_buf;
+#endif
+   TupleTableSlot *newslot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
    int         i;
@@ -267,15 +271,9 @@ RI_FKey_check(TriggerData *trigdata)
                                    trigdata->tg_relation, false);
 
    if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
-   {
-       new_row = trigdata->tg_newtuple;
-       new_row_buf = trigdata->tg_newtuplebuf;
-   }
+       newslot = trigdata->tg_newslot;
    else
-   {
-       new_row = trigdata->tg_trigtuple;
-       new_row_buf = trigdata->tg_trigtuplebuf;
-   }
+       newslot = trigdata->tg_trigslot;
 
    /*
     * We should not even consider checking the row if it is no longer valid,
@@ -285,13 +283,8 @@ RI_FKey_check(TriggerData *trigdata)
     * and lock on the buffer to call HeapTupleSatisfiesVisibility.  Caller
     * should be holding pin, but not lock.
     */
-   LockBuffer(new_row_buf, BUFFER_LOCK_SHARE);
-   if (!HeapTupleSatisfiesVisibility(new_row, SnapshotSelf, new_row_buf))
-   {
-       LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK);
+   if (!table_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
        return PointerGetDatum(NULL);
-   }
-   LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK);
 
    /*
     * Get the relation descriptors of the FK and PK tables.
@@ -307,7 +300,7 @@ RI_FKey_check(TriggerData *trigdata)
                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                 errmsg("MATCH PARTIAL not yet implemented")));
 
-   switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false))
+   switch (ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false))
    {
        case RI_KEYS_ALL_NULL:
 
@@ -437,7 +430,7 @@ RI_FKey_check(TriggerData *trigdata)
     */
    ri_PerformCheck(riinfo, &qkey, qplan,
                    fk_rel, pk_rel,
-                   NULL, new_row,
+                   NULL, newslot,
                    false,
                    SPI_OK_SELECT);
 
@@ -505,7 +498,7 @@ RI_FKey_check_upd(PG_FUNCTION_ARGS)
  */
 static bool
 ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
-                 HeapTuple old_row,
+                 TupleTableSlot *oldslot,
                  const RI_ConstraintInfo *riinfo)
 {
    SPIPlanPtr  qplan;
@@ -514,7 +507,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
    bool        result;
 
    /* Only called for non-null rows */
-   Assert(ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) == RI_KEYS_NONE_NULL);
+   Assert(ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) == RI_KEYS_NONE_NULL);
 
    if (SPI_connect() != SPI_OK_CONNECT)
        elog(ERROR, "SPI_connect failed");
@@ -572,7 +565,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
     */
    result = ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                            old_row, NULL,
+                            oldslot, NULL,
                             true,  /* treat like update */
                             SPI_OK_SELECT);
 
@@ -690,7 +683,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
 
@@ -708,7 +701,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
     */
    fk_rel = heap_open(riinfo->fk_relid, RowShareLock);
    pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
 
    switch (riinfo->confmatchtype)
    {
@@ -732,7 +725,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
             * allow another row to be substituted.
             */
            if (is_no_action &&
-               ri_Check_Pk_Match(pk_rel, fk_rel, old_row, riinfo))
+               ri_Check_Pk_Match(pk_rel, fk_rel, old_slot, riinfo))
            {
                heap_close(fk_rel, RowShareLock);
                return PointerGetDatum(NULL);
@@ -800,7 +793,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
             */
            ri_PerformCheck(riinfo, &qkey, qplan,
                            fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                            true,   /* must detect new rows */
                            SPI_OK_SELECT);
 
@@ -844,7 +837,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
    int         i;
@@ -868,7 +861,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
     */
    fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
    pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
 
    switch (riinfo->confmatchtype)
    {
@@ -940,7 +933,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
             */
            ri_PerformCheck(riinfo, &qkey, qplan,
                            fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                            true,   /* must detect new rows */
                            SPI_OK_DELETE);
 
@@ -984,8 +977,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
-   HeapTuple   new_row;
-   HeapTuple   old_row;
+   TupleTableSlot *new_slot;
+   TupleTableSlot *old_slot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
    int         i;
@@ -1011,8 +1004,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
     */
    fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
    pk_rel = trigdata->tg_relation;
-   new_row = trigdata->tg_newtuple;
-   old_row = trigdata->tg_trigtuple;
+   new_slot = trigdata->tg_newslot;
+   old_slot = trigdata->tg_trigslot;
 
    switch (riinfo->confmatchtype)
    {
@@ -1096,7 +1089,7 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
             */
            ri_PerformCheck(riinfo, &qkey, qplan,
                            fk_rel, pk_rel,
-                           old_row, new_row,
+                           old_slot, new_slot,
                            true,   /* must detect new rows */
                            SPI_OK_UPDATE);
 
@@ -1179,7 +1172,7 @@ ri_setnull(TriggerData *trigdata)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
    int         i;
@@ -1198,7 +1191,7 @@ ri_setnull(TriggerData *trigdata)
     */
    fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
    pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
 
    switch (riinfo->confmatchtype)
    {
@@ -1283,7 +1276,7 @@ ri_setnull(TriggerData *trigdata)
             */
            ri_PerformCheck(riinfo, &qkey, qplan,
                            fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                            true,   /* must detect new rows */
                            SPI_OK_UPDATE);
 
@@ -1366,7 +1359,7 @@ ri_setdefault(TriggerData *trigdata)
    const RI_ConstraintInfo *riinfo;
    Relation    fk_rel;
    Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
    RI_QueryKey qkey;
    SPIPlanPtr  qplan;
 
@@ -1384,7 +1377,7 @@ ri_setdefault(TriggerData *trigdata)
     */
    fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
    pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
 
    switch (riinfo->confmatchtype)
    {
@@ -1470,7 +1463,7 @@ ri_setdefault(TriggerData *trigdata)
             */
            ri_PerformCheck(riinfo, &qkey, qplan,
                            fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                            true,   /* must detect new rows */
                            SPI_OK_UPDATE);
 
@@ -1529,7 +1522,7 @@ ri_setdefault(TriggerData *trigdata)
  */
 bool
 RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
-                             HeapTuple old_row, HeapTuple new_row)
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot)
 {
    const RI_ConstraintInfo *riinfo;
 
@@ -1547,11 +1540,11 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
             * If any old key value is NULL, the row could not have been
             * referenced by an FK row, so no check is needed.
             */
-           if (ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) != RI_KEYS_NONE_NULL)
+           if (ri_NullCheck(RelationGetDescr(pk_rel), old_slot, riinfo, true) != RI_KEYS_NONE_NULL)
                return false;
 
            /* If all old and new key values are equal, no check is needed */
-           if (new_row && ri_KeysEqual(pk_rel, old_row, new_row, riinfo, true))
+           if (new_slot && ri_KeysEqual(pk_rel, old_slot, new_slot, riinfo, true))
                return false;
 
            /* Else we need to fire the trigger. */
@@ -1586,9 +1579,12 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
  */
 bool
 RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
-                             HeapTuple old_row, HeapTuple new_row)
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot)
 {
    const RI_ConstraintInfo *riinfo;
+   Datum       xminDatum;
+   TransactionId xmin;
+   bool        isnull;
 
    /*
     * Get arguments.
@@ -1603,7 +1599,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
             * If any new key value is NULL, the row must satisfy the
             * constraint, so no check is needed.
             */
-           if (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false) != RI_KEYS_NONE_NULL)
+           if (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false) != RI_KEYS_NONE_NULL)
                return false;
 
            /*
@@ -1614,11 +1610,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
             * UPDATE check.  (We could skip this if we knew the INSERT
             * trigger already fired, but there is no easy way to know that.)
             */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data)))
+           xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+           if (TransactionIdIsCurrentTransactionId(xmin))
                return true;
 
            /* If all old and new key values are equal, no check is needed */
-           if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false))
+           if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false))
                return false;
 
            /* Else we need to fire the trigger. */
@@ -1634,7 +1633,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
             * invalidated before the constraint is to be checked, but we
             * should queue the event to apply the check later.
             */
-           switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false))
+           switch (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false))
            {
                case RI_KEYS_ALL_NULL:
                    return false;
@@ -1652,11 +1651,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
             * UPDATE check.  (We could skip this if we knew the INSERT
             * trigger already fired, but there is no easy way to know that.)
             */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data)))
+           xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+           if (TransactionIdIsCurrentTransactionId(xmin))
                return true;
 
            /* If all old and new key values are equal, no check is needed */
-           if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false))
+           if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false))
                return false;
 
            /* Else we need to fire the trigger. */
@@ -1910,10 +1912,17 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
    /* Did we find a tuple violating the constraint? */
    if (SPI_processed > 0)
    {
+       TupleTableSlot *slot;
        HeapTuple   tuple = SPI_tuptable->vals[0];
        TupleDesc   tupdesc = SPI_tuptable->tupdesc;
        RI_ConstraintInfo fake_riinfo;
 
+       slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+
+       heap_deform_tuple(tuple, tupdesc,
+                         slot->tts_values, slot->tts_isnull);
+       ExecStoreVirtualTuple(slot);
+
        /*
         * The columns to look at in the result tuple are 1..N, not whatever
         * they are in the fk_rel.  Hack up riinfo so that the subroutines
@@ -1933,7 +1942,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
         * disallows partially-null FK rows.
         */
        if (fake_riinfo.confmatchtype == FKCONSTR_MATCH_FULL &&
-           ri_NullCheck(tupdesc, tuple, &fake_riinfo, false) != RI_KEYS_NONE_NULL)
+           ri_NullCheck(tupdesc, slot, &fake_riinfo, false) != RI_KEYS_NONE_NULL)
            ereport(ERROR,
                    (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
                     errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"",
@@ -1950,8 +1959,10 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
         */
        ri_ReportViolation(&fake_riinfo,
                           pk_rel, fk_rel,
-                          tuple, tupdesc,
+                          slot, tupdesc,
                           RI_PLAN_CHECK_LOOKUPPK);
+
+       ExecDropSingleTupleTableSlot(slot);
    }
 
    if (SPI_finish() != SPI_OK_FINISH)
@@ -2429,7 +2440,7 @@ static bool
 ri_PerformCheck(const RI_ConstraintInfo *riinfo,
                RI_QueryKey *qkey, SPIPlanPtr qplan,
                Relation fk_rel, Relation pk_rel,
-               HeapTuple old_tuple, HeapTuple new_tuple,
+               TupleTableSlot *old_slot, TupleTableSlot *new_slot,
                bool detectNewRows, int expect_OK)
 {
    Relation    query_rel,
@@ -2472,17 +2483,17 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
    }
 
    /* Extract the parameters to be passed into the query */
-   if (new_tuple)
+   if (new_slot)
    {
-       ri_ExtractValues(source_rel, new_tuple, riinfo, source_is_pk,
+       ri_ExtractValues(source_rel, new_slot, riinfo, source_is_pk,
                         vals, nulls);
-       if (old_tuple)
-           ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk,
+       if (old_slot)
+           ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk,
                             vals + riinfo->nkeys, nulls + riinfo->nkeys);
    }
    else
    {
-       ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk,
+       ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk,
                         vals, nulls);
    }
 
@@ -2552,7 +2563,7 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
        (SPI_processed == 0) == (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK))
        ri_ReportViolation(riinfo,
                           pk_rel, fk_rel,
-                          new_tuple ? new_tuple : old_tuple,
+                          new_slot ? new_slot : old_slot,
                           NULL,
                           qkey->constr_queryno);
 
@@ -2563,11 +2574,10 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
  * Extract fields from a tuple into Datum/nulls arrays
  */
 static void
-ri_ExtractValues(Relation rel, HeapTuple tup,
+ri_ExtractValues(Relation rel, TupleTableSlot *slot,
                 const RI_ConstraintInfo *riinfo, bool rel_is_pk,
                 Datum *vals, char *nulls)
 {
-   TupleDesc   tupdesc = rel->rd_att;
    const int16 *attnums;
    int         i;
    bool        isnull;
@@ -2579,8 +2589,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup,
 
    for (i = 0; i < riinfo->nkeys; i++)
    {
-       vals[i] = heap_getattr(tup, attnums[i], tupdesc,
-                              &isnull);
+       vals[i] = slot_getattr(slot, attnums[i], &isnull);
        nulls[i] = isnull ? 'n' : ' ';
    }
 }
@@ -2597,7 +2606,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup,
 static void
 ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                   Relation pk_rel, Relation fk_rel,
-                  HeapTuple violator, TupleDesc tupdesc,
+                  TupleTableSlot *violatorslot, TupleDesc tupdesc,
                   int queryno)
 {
    StringInfoData key_names;
@@ -2676,7 +2685,8 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                       *val;
 
            name = SPI_fname(tupdesc, fnum);
-           val = SPI_getvalue(violator, tupdesc, fnum);
+           // PBORKED: avoid heaptuple conversion
+           val = SPI_getvalue(ExecFetchSlotHeapTuple(violatorslot, false, NULL), tupdesc, fnum);
            if (!val)
                val = "null";
 
@@ -2730,7 +2740,7 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo,
  */
 static int
 ri_NullCheck(TupleDesc tupDesc,
-            HeapTuple tup,
+            TupleTableSlot *slot,
             const RI_ConstraintInfo *riinfo, bool rel_is_pk)
 {
    const int16 *attnums;
@@ -2745,7 +2755,7 @@ ri_NullCheck(TupleDesc tupDesc,
 
    for (i = 0; i < riinfo->nkeys; i++)
    {
-       if (heap_attisnull(tup, attnums[i], tupDesc))
+       if (slot_attisnull(slot, attnums[i]))
            nonenull = false;
        else
            allnull = false;
@@ -2896,10 +2906,9 @@ ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan)
  * ----------
  */
 static bool
-ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
+ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
             const RI_ConstraintInfo *riinfo, bool rel_is_pk)
 {
-   TupleDesc   tupdesc = RelationGetDescr(rel);
    const int16 *attnums;
    const Oid  *eq_oprs;
    int         i;
@@ -2915,6 +2924,7 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
        eq_oprs = riinfo->ff_eq_oprs;
    }
 
+   /* XXX: could be worthwhile to fetch all necessary attrs at once */
    for (i = 0; i < riinfo->nkeys; i++)
    {
        Datum       oldvalue;
@@ -2924,14 +2934,14 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
        /*
         * Get one attribute's oldvalue. If it is NULL - they're not equal.
         */
-       oldvalue = heap_getattr(oldtup, attnums[i], tupdesc, &isnull);
+       oldvalue = slot_getattr(oldslot, attnums[i], &isnull);
        if (isnull)
            return false;
 
        /*
         * Get one attribute's newvalue. If it is NULL - they're not equal.
         */
-       newvalue = heap_getattr(newtup, attnums[i], tupdesc, &isnull);
+       newvalue = slot_getattr(newslot, attnums[i], &isnull);
        if (isnull)
            return false;
 
index ffca0fe5bb8e6c5c7befdd13b503e13d97762f2e..270b01909ef5141ee52ecbe51f078519c1aec4cb 100644 (file)
 #include "access/brin.h"
 #include "access/gin.h"
 #include "access/htup_details.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "catalog/index.h"
 #include "catalog/pg_am.h"
@@ -5528,7 +5529,6 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
            bool        typByVal;
            ScanKeyData scankeys[1];
            IndexScanDesc index_scan;
-           HeapTuple   tup;
            Datum       values[INDEX_MAX_KEYS];
            bool        isnull[INDEX_MAX_KEYS];
            SnapshotData SnapshotNonVacuumable;
@@ -5551,8 +5551,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
            indexInfo = BuildIndexInfo(indexRel);
 
            /* some other stuff */
-           slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel),
-                                           &TTSOpsHeapTuple);
+           slot = table_gimmegimmeslot(heapRel, NULL);
            econtext->ecxt_scantuple = slot;
            get_typlenbyval(vardata->atttype, &typLen, &typByVal);
            InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
@@ -5604,11 +5603,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
                /* Fetch first tuple in sortop's direction */
-               if ((tup = index_getnext(index_scan,
-                                        indexscandir)) != NULL)
+               if (index_getnext_slot(index_scan, indexscandir, slot))
                {
-                   /* Extract the index column values from the heap tuple */
-                   ExecStoreHeapTuple(tup, slot, false);
+                   /* Extract the index column values from the slot */
                    FormIndexDatum(indexInfo, slot, estate,
                                   values, isnull);
 
@@ -5637,11 +5634,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                index_rescan(index_scan, scankeys, 1, NULL, 0);
 
                /* Fetch first tuple in reverse direction */
-               if ((tup = index_getnext(index_scan,
-                                        -indexscandir)) != NULL)
+               if (index_getnext_slot(index_scan, -indexscandir, slot))
                {
-                   /* Extract the index column values from the heap tuple */
-                   ExecStoreHeapTuple(tup, slot, false);
+                   /* Extract the index column values from the slot */
                    FormIndexDatum(indexInfo, slot, estate,
                                   values, isnull);
 
index 41d540b46ecded139d59d6290ef53b9df2660f0d..bb8a683b44d26844d0716c4938222daddab681b5 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "access/heapam.h"
 #include "access/sysattr.h"
+#include "access/tableam.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
 #include "libpq/pqformat.h"
@@ -352,7 +353,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
    ItemPointerCopy(tid, result);
 
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   heap_get_latest_tid(rel, snapshot, result);
+   table_get_latest_tid(rel, snapshot, result);
    UnregisterSnapshot(snapshot);
 
    heap_close(rel, AccessShareLock);
@@ -387,7 +388,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
    ItemPointerCopy(tid, result);
 
    snapshot = RegisterSnapshot(GetLatestSnapshot());
-   heap_get_latest_tid(rel, snapshot, result);
+   table_get_latest_tid(rel, snapshot, result);
    UnregisterSnapshot(snapshot);
 
    heap_close(rel, AccessShareLock);
index c3071db1cdf90d1229b45f61885875037611d6f5..8b79e5907731ad2a7bcf623386312293b008c926 100644 (file)
@@ -36,6 +36,7 @@
 #include "access/nbtree.h"
 #include "access/reloptions.h"
 #include "access/sysattr.h"
+#include "access/tableam.h"
 #include "access/tupdesc_details.h"
 #include "access/xact.h"
 #include "access/xlog.h"
@@ -1196,10 +1197,29 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
    }
 
    /*
-    * if it's an index, initialize index-related information
+    * initialize access method information
     */
-   if (OidIsValid(relation->rd_rel->relam))
-       RelationInitIndexAccessInfo(relation);
+   switch (relation->rd_rel->relkind)
+   {
+       case RELKIND_INDEX:
+       case RELKIND_PARTITIONED_INDEX:
+           Assert(relation->rd_rel->relam != InvalidOid);
+           RelationInitIndexAccessInfo(relation);
+           break;
+       case RELKIND_RELATION:
+       case RELKIND_SEQUENCE:
+       case RELKIND_TOASTVALUE:
+       case RELKIND_VIEW:      /* Not exactly the storage, but underlying
+                                * tuple access, it is required */
+       case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
+       case RELKIND_FOREIGN_TABLE: /* hari FIXME :To support COPY on foreign tables */
+           RelationInitTableAccessMethod(relation);
+           break;
+       default:
+           /* nothing to do in other cases */
+           break;
+   }
 
    /* extract reloptions if any */
    RelationParseRelOptions(relation, pg_class_tuple);
@@ -1701,6 +1721,52 @@ LookupOpclassInfo(Oid operatorClassOid,
    return opcentry;
 }
 
+/*
+ * Fill in the TableAmRoutine for a relation
+ *
+ * relation's rd_tableamhandler must be valid already.
+ */
+static void
+InitTableAmRoutine(Relation relation)
+{
+   relation->rd_tableamroutine = GetTableAmRoutine(relation->rd_tableamhandler);
+}
+
+/*
+ * Initialize table-access-method support data for a heap relation
+ */
+void
+RelationInitTableAccessMethod(Relation relation)
+{
+   HeapTuple   tuple;
+   Form_pg_am  aform;
+
+   if (IsCatalogRelation(relation) ||
+           !OidIsValid(relation->rd_rel->relam))
+   {
+       relation->rd_tableamhandler = HEAP_TABLE_AM_HANDLER_OID;
+   }
+   else
+   {
+       /*
+        * Look up the table access method, save the OID of its handler
+        * function.
+        */
+       tuple = SearchSysCache1(AMOID,
+                               ObjectIdGetDatum(relation->rd_rel->relam));
+       if (!HeapTupleIsValid(tuple))
+           elog(ERROR, "cache lookup failed for access method %u",
+                relation->rd_rel->relam);
+       aform = (Form_pg_am) GETSTRUCT(tuple);
+       relation->rd_tableamhandler = aform->amhandler;
+       ReleaseSysCache(tuple);
+   }
+
+   /*
+    * Now we can fetch the table AM's API struct
+    */
+   InitTableAmRoutine(relation);
+}
 
 /*
  *     formrdesc
@@ -1787,6 +1853,7 @@ formrdesc(const char *relationName, Oid relationReltype,
    relation->rd_rel->relallvisible = 0;
    relation->rd_rel->relkind = RELKIND_RELATION;
    relation->rd_rel->relnatts = (int16) natts;
+   relation->rd_rel->relam = HEAP_TABLE_AM_OID;
 
    /*
     * initialize attribute tuple form
@@ -1854,6 +1921,12 @@ formrdesc(const char *relationName, Oid relationReltype,
     */
    RelationInitPhysicalAddr(relation);
 
+   /*
+    * initialize the table am handler
+    */
+   relation->rd_rel->relam = HEAP_TABLE_AM_OID;
+   relation->rd_tableamroutine = GetHeapamTableAmRoutine();
+
    /*
     * initialize the rel-has-index flag, using hardwired knowledge
     */
@@ -3089,6 +3162,7 @@ RelationBuildLocalRelation(const char *relname,
                           Oid relnamespace,
                           TupleDesc tupDesc,
                           Oid relid,
+                          Oid accessmtd,
                           Oid relfilenode,
                           Oid reltablespace,
                           bool shared_relation,
@@ -3268,6 +3342,16 @@ RelationBuildLocalRelation(const char *relname,
 
    RelationInitPhysicalAddr(rel);
 
+   rel->rd_rel->relam = accessmtd;
+
+   if (relkind == RELKIND_RELATION ||
+       relkind == RELKIND_MATVIEW ||
+       relkind == RELKIND_VIEW ||  /* Not exactly the storage, but underlying
+                                    * tuple access, it is required */
+       relkind == RELKIND_PARTITIONED_TABLE ||
+       relkind == RELKIND_TOASTVALUE)
+       RelationInitTableAccessMethod(rel);
+
    /*
     * Okay to insert into the relcache hash table.
     *
@@ -3788,6 +3872,19 @@ RelationCacheInitializePhase3(void)
            restart = true;
        }
 
+       if (relation->rd_tableamroutine == NULL &&
+           (relation->rd_rel->relkind == RELKIND_RELATION ||
+            relation->rd_rel->relkind == RELKIND_MATVIEW ||
+            relation->rd_rel->relkind == RELKIND_VIEW ||
+            relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+            relation->rd_rel->relkind == RELKIND_TOASTVALUE))
+       {
+           RelationInitTableAccessMethod(relation);
+           Assert(relation->rd_tableamroutine != NULL);
+
+           restart = true;
+       }
+
        /* Release hold on the relation */
        RelationDecrementReferenceCount(relation);
 
@@ -5563,6 +5660,9 @@ load_relcache_init_file(bool shared)
            if (rel->rd_isnailed)
                nailed_rels++;
 
+           /* Load table AM stuff */
+           RelationInitTableAccessMethod(rel);
+
            Assert(rel->rd_index == NULL);
            Assert(rel->rd_indextuple == NULL);
            Assert(rel->rd_indexcxt == NULL);
index b636b1e262a40d02e76f69c564f544f14303ba9b..1d57177cb572caaa93d55fe52641d25368de3128 100644 (file)
@@ -22,6 +22,7 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/session.h"
+#include "access/tableam.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "access/xlog.h"
@@ -1246,15 +1247,15 @@ static bool
 ThereIsAtLeastOneRole(void)
 {
    Relation    pg_authid_rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
    bool        result;
 
    pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock);
 
-   scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL);
-   result = (heap_getnext(scan, ForwardScanDirection) != NULL);
+   scan = table_beginscan_catalog(pg_authid_rel, 0, NULL);
+   result = (heap_scan_getnext(scan, ForwardScanDirection) != NULL);
 
-   heap_endscan(scan);
+   table_endscan(scan);
    heap_close(pg_authid_rel, AccessShareLock);
 
    return result;
index 6fe1939881296e6d8456e92eab0d596ae3c7cda3..11b6df209a76041cdc2b46bcdfa5c66bb7c38869 100644 (file)
@@ -29,6 +29,7 @@
 #include "access/commit_ts.h"
 #include "access/gin.h"
 #include "access/rmgr.h"
+#include "access/tableam.h"
 #include "access/transam.h"
 #include "access/twophase.h"
 #include "access/xact.h"
@@ -3511,6 +3512,17 @@ static struct config_string ConfigureNamesString[] =
        check_datestyle, assign_datestyle, NULL
    },
 
+   {
+       {"default_table_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT,
+           gettext_noop("Sets the default table access method for new tables."),
+           NULL,
+           GUC_IS_NAME
+       },
+       &default_table_access_method,
+       DEFAULT_TABLE_ACCESS_METHOD,
+       check_default_table_access_method, NULL, NULL
+   },
+
    {
        {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT,
            gettext_noop("Sets the default tablespace to create tables and indexes in."),
index ee7fd83c02c9ff05512bdc19cadcc2dccb679a06..7d2b6facf2cfed47946886a1e2027275216c2aca 100644 (file)
@@ -3818,12 +3818,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
 static void
 copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
 {
-   HeapTuple   tuple = (HeapTuple) tup;
    Datum       original;
    MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
+   TupleTableSlot *slot = (TupleTableSlot *) tup;
+   HeapTuple   tuple;
 
    /* copy the tuple into sort storage */
-   tuple = heap_copytuple(tuple);
+   tuple = ExecCopySlotHeapTuple(slot);
    stup->tuple = (void *) tuple;
    USEMEM(state, GetMemoryChunkSpace(tuple));
 
index 5a6e6fa4c8e2186f2a5591997839d138707bd062..f17b1c5324968cbb5f58082048f51b32c3397669 100644 (file)
@@ -12,6 +12,6 @@ subdir = src/backend/utils/time
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = combocid.o tqual.o snapmgr.o
+OBJS = combocid.o snapmgr.o
 
 include $(top_srcdir)/src/backend/common.mk
index edf59efc29d8f867e3a15ef9bd8f3ab5b7783d5a..9c595459611ce13dcf090c8bc743fc3c3ce2a351 100644 (file)
@@ -141,9 +141,9 @@ static volatile OldSnapshotControlData *oldSnapshotControl;
  * These SnapshotData structs are static to simplify memory allocation
  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
  */
-static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
-static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
-SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
+static SnapshotData CurrentSnapshotData = {MVCC_VISIBILITY};
+static SnapshotData SecondarySnapshotData = {MVCC_VISIBILITY};
+SnapshotData CatalogSnapshotData = {MVCC_VISIBILITY};
 
 /* Pointers to valid snapshots */
 static Snapshot CurrentSnapshot = NULL;
@@ -2046,7 +2046,7 @@ EstimateSnapshotSpace(Snapshot snap)
    Size        size;
 
    Assert(snap != InvalidSnapshot);
-   Assert(snap->satisfies == HeapTupleSatisfiesMVCC);
+   Assert(snap->visibility_type == MVCC_VISIBILITY);
 
    /* We allocate any XID arrays needed in the same palloc block. */
    size = add_size(sizeof(SerializedSnapshotData),
@@ -2143,7 +2143,7 @@ RestoreSnapshot(char *start_address)
 
    /* Copy all required fields */
    snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
-   snapshot->satisfies = HeapTupleSatisfiesMVCC;
+   snapshot->visibility_type = MVCC_VISIBILITY;
    snapshot->xmin = serialized_snapshot.xmin;
    snapshot->xmax = serialized_snapshot.xmax;
    snapshot->xip = NULL;
index 534fac7bf2f41b3170d51423d8c66ec077742636..0aa107f4b4bd5ddf233a59d63c4f9e7e1b6dc4f0 100644 (file)
@@ -159,8 +159,10 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
                         ParallelIndexScanDesc pscan);
 extern ItemPointer index_getnext_tid(IndexScanDesc scan,
                  ScanDirection direction);
-extern HeapTuple index_fetch_heap(IndexScanDesc scan);
-extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
+struct TupleTableSlot;
+extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
+//extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
+extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, struct TupleTableSlot *slot);
 extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap);
 
 extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
index 108e4f10671d500144cae9ccb470463e81917aaf..a309db1a1c6458eb7fb5ee84d320bc8fdd4f0eab 100644 (file)
@@ -33,6 +33,8 @@
 
 typedef struct BulkInsertStateData *BulkInsertState;
 
+struct TupleTableSlot;
+
 /*
  * Possible lock modes for a tuple.
  */
@@ -71,8 +73,21 @@ typedef struct HeapUpdateFailureData
    ItemPointerData ctid;
    TransactionId xmax;
    CommandId   cmax;
+   bool        traversed;
 } HeapUpdateFailureData;
 
+/* Result codes for HeapTupleSatisfiesVacuum */
+typedef enum
+{
+   HEAPTUPLE_DEAD,             /* tuple is dead and deletable */
+   HEAPTUPLE_LIVE,             /* tuple is live (committed, no deleter) */
+   HEAPTUPLE_RECENTLY_DEAD,    /* tuple is dead, but not deletable yet */
+   HEAPTUPLE_INSERT_IN_PROGRESS,   /* inserting xact is still in progress */
+   HEAPTUPLE_DELETE_IN_PROGRESS    /* deleting xact is still in progress */
+} HTSV_Result;
+
+/* struct definition is private to rewriteheap.c */
+typedef struct RewriteStateData *RewriteState;
 
 /* ----------------
  *     function prototypes for heap access method
@@ -98,8 +113,9 @@ extern Relation heap_openrv_extended(const RangeVar *relation,
 #define heap_close(r,l)  relation_close(r,l)
 
 /* struct definitions appear in relscan.h */
+typedef struct TableScanDescData *TableScanDesc;
 typedef struct HeapScanDescData *HeapScanDesc;
-typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc;
+typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
 
 /*
  * HeapScanIsValid
@@ -107,53 +123,47 @@ typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc;
  */
 #define HeapScanIsValid(scan) PointerIsValid(scan)
 
-extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
-              int nkeys, ScanKey key);
-extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys,
-                      ScanKey key);
-extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
-                    int nkeys, ScanKey key,
-                    bool allow_strat, bool allow_sync);
-extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
-                 int nkeys, ScanKey key);
-extern HeapScanDesc heap_beginscan_sampling(Relation relation,
-                       Snapshot snapshot, int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_sync, bool allow_pagemode);
-extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
+extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
+              int nkeys, ScanKey key,
+              ParallelTableScanDesc parallel_scan,
+              bool allow_strat,
+              bool allow_sync,
+              bool allow_pagemode,
+              bool is_bitmapscan,
+              bool is_samplescan,
+              bool temp_snap);
+extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk,
                   BlockNumber endBlk);
-extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
-extern void heap_rescan(HeapScanDesc scan, ScanKey key);
-extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+extern void heapgetpage(TableScanDesc scan, BlockNumber page);
+extern void heap_rescan(TableScanDesc scan, ScanKey key, bool set_params,
+           bool allow_strat, bool allow_sync, bool allow_pagemode);
+extern void heap_rescan_set_params(TableScanDesc scan, ScanKey key,
                       bool allow_strat, bool allow_sync, bool allow_pagemode);
-extern void heap_endscan(HeapScanDesc scan);
-extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
-
-extern Size heap_parallelscan_estimate(Snapshot snapshot);
-extern void heap_parallelscan_initialize(ParallelHeapScanDesc target,
-                            Relation relation, Snapshot snapshot);
-extern void heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan);
-extern HeapScanDesc heap_beginscan_parallel(Relation, ParallelHeapScanDesc);
-
-extern bool heap_fetch(Relation relation, Snapshot snapshot,
-          HeapTuple tuple, Buffer *userbuf, bool keep_buf,
+extern void heap_endscan(TableScanDesc scan);
+extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
+extern struct TupleTableSlot *heap_getnextslot(TableScanDesc sscan, ScanDirection direction,
+                struct TupleTableSlot *slot);
+extern HeapTuple heap_scan_getnext(TableScanDesc sscan, ScanDirection direction);
+
+extern bool heap_fetch(Relation relation, ItemPointer tid, Snapshot snapshot,
+          HeapTuple tuple, Buffer *userbuf,
           Relation stats_relation);
 extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
                       Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
                       bool *all_dead, bool first_call);
 extern bool heap_hot_search(ItemPointer tid, Relation relation,
                Snapshot snapshot, bool *all_dead);
-
 extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
                    ItemPointer tid);
 extern void setLastTid(const ItemPointer tid);
 
 extern BulkInsertState GetBulkInsertState(void);
-extern void FreeBulkInsertState(BulkInsertState);
+extern void FreeBulkInsertState(BulkInsertState bistate);
 extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
 
 extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
            int options, BulkInsertState bistate);
-extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
+extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples,
                  CommandId cid, int options, BulkInsertState bistate);
 extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
            CommandId cid, Snapshot crosscheck, bool wait,
@@ -164,10 +174,11 @@ extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
            HeapTuple newtup,
            CommandId cid, Snapshot crosscheck, bool wait,
            HeapUpdateFailureData *hufd, LockTupleMode *lockmode);
-extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
+extern HTSU_Result heap_lock_tuple(Relation relation, ItemPointer tid,
                CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-               bool follow_update,
+               bool follow_update, HeapTuple tuple,
                Buffer *buffer, HeapUpdateFailureData *hufd);
+
 extern void heap_inplace_update(Relation relation, HeapTuple tuple);
 extern bool heap_freeze_tuple(HeapTupleHeader tuple,
                  TransactionId relfrozenxid, TransactionId relminmxid,
@@ -182,7 +193,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid,
                   HeapTuple tup);
 
 extern void heap_sync(Relation relation);
-extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
+extern void heap_update_snapshot(TableScanDesc scan, Snapshot snapshot);
 
 /* in heap/pruneheap.c */
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
@@ -205,4 +216,26 @@ extern Size SyncScanShmemSize(void);
 struct VacuumParams;
 extern void heap_vacuum_rel(Relation onerel, int options,
                struct VacuumParams *params, BufferAccessStrategy bstrategy);
+
+/* in heap/heapam_visibility.c */
+extern bool HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer);
+extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple stup, CommandId curcid,
+                        Buffer buffer);
+extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
+                        Buffer buffer);
+extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
+                    uint16 infomask, TransactionId xid);
+extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
+extern bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin);
+
+/* in heap/rewriteheap.c */
+extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
+                  TransactionId OldestXmin, TransactionId FreezeXid,
+                  MultiXactId MultiXactCutoff, bool use_wal);
+extern void end_heap_rewrite(RewriteState state);
+extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
+                  HeapTuple newTuple);
+extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
+
 #endif                         /* HEAPAM_H */
index e5289b8aa7d910b1abda75b604cdc6347b2cb20d..51a3ad74fa1ebee7d560061ac3d32a3c3907bf87 100644 (file)
 #include "storage/spin.h"
 
 /*
- * Shared state for parallel heap scan.
+ * Shared state for parallel table scan.
  *
- * Each backend participating in a parallel heap scan has its own
- * HeapScanDesc in backend-private memory, and those objects all contain
- * a pointer to this structure.  The information here must be sufficient
- * to properly initialize each new HeapScanDesc as workers join the scan,
- * and it must act as a font of block numbers for those workers.
+ * Each backend participating in a parallel table scan has its own
+ * TableScanDesc in backend-private memory, and those objects all contain a
+ * pointer to this structure.  The information here must be sufficient to
+ * properly initialize each new TableScanDesc as workers join the scan, and it
+ * must act as a font of block numbers for those workers.
  */
-typedef struct ParallelHeapScanDescData
+typedef struct ParallelTableScanDescData
 {
    Oid         phs_relid;      /* OID of relation to scan */
    bool        phs_syncscan;   /* report location to syncscan logic? */
@@ -41,9 +41,9 @@ typedef struct ParallelHeapScanDescData
                                         * workers so far. */
    bool        phs_snapshot_any;   /* SnapshotAny, not phs_snapshot_data? */
    char        phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
-} ParallelHeapScanDescData;
+} ParallelTableScanDescData;
 
-typedef struct HeapScanDescData
+typedef struct TableScanDescData
 {
    /* scan parameters */
    Relation    rs_rd;          /* heap relation descriptor */
@@ -62,16 +62,27 @@ typedef struct HeapScanDescData
    BlockNumber rs_startblock;  /* block # to start at */
    BlockNumber rs_numblocks;   /* max number of blocks to scan */
    /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
-   BufferAccessStrategy rs_strategy;   /* access strategy for reads */
    bool        rs_syncscan;    /* report location to syncscan logic? */
 
+   ParallelTableScanDesc rs_parallel;  /* parallel scan information */
+
+}          TableScanDescData;
+
+typedef struct HeapScanDescData
+{
+   /* scan parameters */
+   TableScanDescData rs_scan;  /* */
+
    /* scan current state */
    bool        rs_inited;      /* false = scan not init'd yet */
-   HeapTupleData rs_ctup;      /* current tuple in scan, if any */
    BlockNumber rs_cblock;      /* current block # in scan, if any */
    Buffer      rs_cbuf;        /* current buffer in scan, if any */
    /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
-   ParallelHeapScanDesc rs_parallel;   /* parallel scan information */
+
+   /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
+   BufferAccessStrategy rs_strategy;   /* access strategy for reads */
+
+   HeapTupleData rs_ctup;      /* current tuple in scan, if any */
 
    /* these fields only used in page-at-a-time mode and for bitmap scans */
    int         rs_cindex;      /* current tuple's index in vistuples */
@@ -79,6 +90,21 @@ typedef struct HeapScanDescData
    OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    /* their offsets */
 }          HeapScanDescData;
 
+
+typedef struct IndexFetchTableData
+{
+   Relation rel;
+} IndexFetchTableData;
+
+
+typedef struct IndexFetchHeapData
+{
+   IndexFetchTableData xs_base;
+
+   Buffer      xs_cbuf;        /* current heap buffer in scan, if any */
+   /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+} IndexFetchHeapData;
+
 /*
  * We use the same IndexScanDescData structure for both amgettuple-based
  * and amgetbitmap-based index scans.  Some fields are only relevant in
@@ -117,10 +143,10 @@ typedef struct IndexScanDescData
    HeapTuple   xs_hitup;       /* index data returned by AM, as HeapTuple */
    TupleDesc   xs_hitupdesc;   /* rowtype descriptor of xs_hitup */
 
-   /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */
-   HeapTupleData xs_ctup;      /* current heap tuple, if any */
-   Buffer      xs_cbuf;        /* current heap buffer in scan, if any */
-   /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+   ItemPointerData xs_heaptid; /* result */
+   bool        xs_heap_continue;   /* T if must keep walking, potential further results */
+   IndexFetchTableData *xs_heapfetch;
+
    bool        xs_recheck;     /* T means scan keys must be rechecked */
 
    /*
@@ -134,9 +160,6 @@ typedef struct IndexScanDescData
    bool       *xs_orderbynulls;
    bool        xs_recheckorderby;
 
-   /* state data for traversing HOT chains in index_getnext */
-   bool        xs_continue_hot;    /* T if must keep walking HOT chain */
-
    /* parallel index scan information, in shared memory */
    ParallelIndexScanDesc parallel_scan;
 }          IndexScanDescData;
@@ -150,14 +173,17 @@ typedef struct ParallelIndexScanDescData
    char        ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
 }          ParallelIndexScanDescData;
 
-/* Struct for heap-or-index scans of system tables */
+struct TupleTableSlot;
+
+/* Struct for storage-or-index scans of system tables */
 typedef struct SysScanDescData
 {
    Relation    heap_rel;       /* catalog being scanned */
    Relation    irel;           /* NULL if doing heap scan */
-   HeapScanDesc scan;          /* only valid in heap-scan case */
+   TableScanDesc scan;     /* only valid in storage-scan case */
    IndexScanDesc iscan;        /* only valid in index-scan case */
    Snapshot    snapshot;       /* snapshot to unregister at end of scan */
+   struct TupleTableSlot *slot;
 }          SysScanDescData;
 
 #endif                         /* RELSCAN_H */
index cfdf33b4bd672387286aeb802bd9fbf11b24470f..cc74012f72fd34263aeefc4b3fcaff9c9bddb4c0 100644 (file)
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
-/* struct definition is private to rewriteheap.c */
-typedef struct RewriteStateData *RewriteState;
-
-extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
-                  TransactionId OldestXmin, TransactionId FreezeXid,
-                  MultiXactId MultiXactCutoff, bool use_wal);
-extern void end_heap_rewrite(RewriteState state);
-extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
-                  HeapTuple newTuple);
-extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
-
 /*
  * On-Disk data format for an individual logical rewrite mapping.
  */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
new file mode 100644 (file)
index 0000000..7364afa
--- /dev/null
@@ -0,0 +1,750 @@
+/*-------------------------------------------------------------------------
+ *
+ * tableam.h
+ *   POSTGRES table access method definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tableam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLEAM_H
+#define TABLEAM_H
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "catalog/index.h"
+#include "executor/tuptable.h"
+#include "nodes/execnodes.h"
+#include "nodes/nodes.h"
+#include "fmgr.h"
+#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/snapshot.h"
+#include "utils/tqual.h"
+
+
+#define DEFAULT_TABLE_ACCESS_METHOD    "heap"
+
+extern char *default_table_access_method;
+extern bool synchronize_seqscans;
+
+/*
+ * Storage routine function hooks
+ */
+typedef bool (*SnapshotSatisfies_function) (Relation rel,
+                                           TupleTableSlot *slot,
+                                           Snapshot snapshot);
+
+typedef void (*TupleInsert_function) (Relation rel, TupleTableSlot *slot, CommandId cid,
+                                    int options, BulkInsertState bistate);
+
+typedef void (*TupleInsertSpeculative_function) (Relation rel,
+                                                TupleTableSlot *slot,
+                                                CommandId cid,
+                                                int options,
+                                                BulkInsertState bistate,
+                                                uint32 specToken);
+
+
+typedef void (*TupleCompleteSpeculative_function) (Relation rel,
+                                                 TupleTableSlot *slot,
+                                                 uint32 specToken,
+                                                 bool succeeded);
+
+typedef HTSU_Result (*TupleDelete_function) (Relation relation,
+                                            ItemPointer tid,
+                                            CommandId cid,
+                                            Snapshot snapshot,
+                                            Snapshot crosscheck,
+                                            bool wait,
+                                            HeapUpdateFailureData *hufd,
+                                            bool changingPart);
+
+typedef HTSU_Result (*TupleUpdate_function) (Relation relation,
+                                            ItemPointer otid,
+                                            TupleTableSlot *slot,
+                                            CommandId cid,
+                                            Snapshot snapshot,
+                                            Snapshot crosscheck,
+                                            bool wait,
+                                            HeapUpdateFailureData *hufd,
+                                            LockTupleMode *lockmode,
+                                            bool *update_indexes);
+
+typedef bool (*TupleFetchRowVersion_function) (Relation relation,
+                                              ItemPointer tid,
+                                              Snapshot snapshot,
+                                              TupleTableSlot *slot,
+                                              Relation stats_relation);
+
+typedef HTSU_Result (*TupleLock_function) (Relation relation,
+                                          ItemPointer tid,
+                                          Snapshot snapshot,
+                                          TupleTableSlot *slot,
+                                          CommandId cid,
+                                          LockTupleMode mode,
+                                          LockWaitPolicy wait_policy,
+                                          uint8 flags,
+                                          HeapUpdateFailureData *hufd);
+
+typedef void (*MultiInsert_function) (Relation relation, TupleTableSlot **slots, int nslots,
+                                     CommandId cid, int options, BulkInsertState bistate);
+
+typedef void (*TupleGetLatestTid_function) (Relation relation,
+                                           Snapshot snapshot,
+                                           ItemPointer tid);
+
+struct VacuumParams;
+typedef void (*RelationVacuum_function)(Relation onerel, int options,
+               struct VacuumParams *params, BufferAccessStrategy bstrategy);
+typedef void (*RelationScanAnalyzeNextBlock_function)(TableScanDesc scan, BlockNumber blockno,
+                                                     BufferAccessStrategy bstrategy);
+typedef bool (*RelationScanAnalyzeNextTuple_function)(TableScanDesc scan, TransactionId OldestXmin,
+                                                     double *liverows, double *deadrows, TupleTableSlot *slot);
+
+typedef void (*RelationCopyForCluster_function)(Relation NewHeap, Relation OldHeap, Relation OldIndex,
+                                      bool use_sort,
+                                      TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                                      double *num_tuples, double *tups_vacuumed, double *tups_recently_dead);
+
+typedef void (*RelationSync_function) (Relation relation);
+
+typedef const TupleTableSlotOps* (*SlotCallbacks_function) (Relation relation);
+
+typedef TableScanDesc (*ScanBegin_function) (Relation relation,
+                                           Snapshot snapshot,
+                                           int nkeys, ScanKey key,
+                                           ParallelTableScanDesc parallel_scan,
+                                           bool allow_strat,
+                                           bool allow_sync,
+                                           bool allow_pagemode,
+                                           bool is_bitmapscan,
+                                           bool is_samplescan,
+                                           bool temp_snap);
+
+typedef struct IndexFetchTableData* (*BeginIndexFetchTable_function) (Relation relation);
+typedef void (*ResetIndexFetchTable_function) (struct IndexFetchTableData* data);
+typedef void (*EndIndexFetchTable_function) (struct IndexFetchTableData* data);
+
+typedef void (*ScanSetlimits_function) (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks);
+
+typedef TupleTableSlot *(*ScanGetnextSlot_function) (TableScanDesc scan,
+                                                    ScanDirection direction, TupleTableSlot *slot);
+
+typedef void (*ScanEnd_function) (TableScanDesc scan);
+
+
+typedef void (*ScanRescan_function) (TableScanDesc scan, ScanKey key, bool set_params,
+                                    bool allow_strat, bool allow_sync, bool allow_pagemode);
+typedef void (*ScanUpdateSnapshot_function) (TableScanDesc scan, Snapshot snapshot);
+
+typedef bool (*TupleFetchFollow_function)(struct IndexFetchTableData *scan,
+                                         ItemPointer tid,
+                                         Snapshot snapshot,
+                                         TupleTableSlot *slot,
+                                         bool *call_again, bool *all_dead);
+
+typedef double (*IndexBuildRangeScan_function)(Relation heapRelation,
+                                              Relation indexRelation,
+                                              IndexInfo *indexInfo,
+                                              bool allow_sync,
+                                              bool anyvisible,
+                                              BlockNumber start_blockno,
+                                              BlockNumber end_blockno,
+                                              IndexBuildCallback callback,
+                                              void *callback_state,
+                                              TableScanDesc scan);
+struct ValidateIndexState;
+typedef void (*IndexValidateScan_function)(Relation heapRelation,
+                                          Relation indexRelation,
+                                          IndexInfo *indexInfo,
+                                          Snapshot snapshot,
+                                          struct ValidateIndexState *state);
+
+typedef bool (*BitmapPagescan_function)(TableScanDesc scan,
+                                       TBMIterateResult *tbmres);
+
+typedef bool (*BitmapPagescanNext_function)(TableScanDesc scan,
+                                           TupleTableSlot *slot);
+
+struct SampleScanState;
+typedef bool (*SampleScanNextBlock_function)(TableScanDesc scan, struct SampleScanState *scanstate);
+typedef bool (*SampleScanNextTuple_function)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot);
+
+/*
+ * API struct for a table AM.  Note this must be allocated in a
+ * server-lifetime manner, typically as a static const struct.
+ */
+typedef struct TableAmRoutine
+{
+   NodeTag     type;
+
+   SlotCallbacks_function slot_callbacks;
+
+   SnapshotSatisfies_function snapshot_satisfies;
+
+   /* Operations on physical tuples */
+   TupleInsert_function tuple_insert;
+   TupleInsertSpeculative_function tuple_insert_speculative;
+   TupleCompleteSpeculative_function tuple_complete_speculative;
+   TupleUpdate_function tuple_update;
+   TupleDelete_function tuple_delete;
+   TupleFetchRowVersion_function tuple_fetch_row_version;
+   TupleLock_function tuple_lock;
+   MultiInsert_function multi_insert;
+   TupleGetLatestTid_function tuple_get_latest_tid;
+   TupleFetchFollow_function tuple_fetch_follow;
+
+   RelationVacuum_function relation_vacuum;
+   RelationScanAnalyzeNextBlock_function scan_analyze_next_block;
+   RelationScanAnalyzeNextTuple_function scan_analyze_next_tuple;
+   RelationCopyForCluster_function relation_copy_for_cluster;
+   RelationSync_function relation_sync;
+
+   /* Operations on relation scans */
+   ScanBegin_function scan_begin;
+   ScanSetlimits_function scansetlimits;
+   ScanGetnextSlot_function scan_getnextslot;
+
+   BitmapPagescan_function scan_bitmap_pagescan;
+   BitmapPagescanNext_function scan_bitmap_pagescan_next;
+
+   SampleScanNextBlock_function scan_sample_next_block;
+   SampleScanNextTuple_function scan_sample_next_tuple;
+
+   ScanEnd_function scan_end;
+   ScanRescan_function scan_rescan;
+   ScanUpdateSnapshot_function scan_update_snapshot;
+
+   BeginIndexFetchTable_function begin_index_fetch;
+   EndIndexFetchTable_function reset_index_fetch;
+   EndIndexFetchTable_function end_index_fetch;
+
+
+   IndexBuildRangeScan_function index_build_range_scan;
+   IndexValidateScan_function index_validate_scan;
+}          TableAmRoutine;
+
+static inline const TupleTableSlotOps*
+table_slot_callbacks(Relation relation)
+{
+   const TupleTableSlotOps *tts_cb;
+
+   tts_cb = relation->rd_tableamroutine->slot_callbacks(relation);
+
+   return tts_cb;
+}
+
+/*
+ * INLINE functions
+ */
+extern TupleTableSlot* table_gimmegimmeslot(Relation relation, List **reglist);
+
+/*
+ * table_fetch_row_version     - retrieve tuple with given tid
+ *
+ *  XXX: This shouldn't just take a tid, but tid + additional information
+ */
+static inline bool
+table_fetch_row_version(Relation r,
+                       ItemPointer tid,
+                       Snapshot snapshot,
+                       TupleTableSlot *slot,
+                       Relation stats_relation)
+{
+   return r->rd_tableamroutine->tuple_fetch_row_version(r, tid,
+                                                        snapshot, slot,
+                                                        stats_relation);
+}
+
+
+/*
+ * table_lock_tuple - lock a tuple in shared or exclusive mode
+ *
+ *  XXX: This shouldn't just take a tid, but tid + additional information
+ */
+static inline HTSU_Result
+table_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot,
+                TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
+                LockWaitPolicy wait_policy, uint8 flags,
+                HeapUpdateFailureData *hufd)
+{
+   return relation->rd_tableamroutine->tuple_lock(relation, tid, snapshot, slot,
+                                               cid, mode, wait_policy,
+                                               flags, hufd);
+}
+
+/* ----------------
+ *     heap_beginscan_parallel - join a parallel scan
+ *
+ *     Caller must hold a suitable lock on the correct relation.
+ * ----------------
+ */
+static inline TableScanDesc
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
+{
+   Snapshot    snapshot;
+
+   Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
+
+   if (!parallel_scan->phs_snapshot_any)
+   {
+       /* Snapshot was serialized -- restore it */
+       snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data);
+       RegisterSnapshot(snapshot);
+   }
+   else
+   {
+       /* SnapshotAny passed by caller (not serialized) */
+       snapshot = SnapshotAny;
+   }
+
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, 0, NULL, parallel_scan,
+                                               true, true, true, false, false, !parallel_scan->phs_snapshot_any);
+}
+
+/*
+ * heap_setscanlimits - restrict range of a heapscan
+ *
+ * startBlk is the page to start at
+ * numBlks is number of pages to scan (InvalidBlockNumber means "all")
+ */
+static inline void
+table_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
+{
+   sscan->rs_rd->rd_tableamroutine->scansetlimits(sscan, startBlk, numBlks);
+}
+
+
+/* ----------------
+ *     heap_beginscan  - begin relation scan
+ *
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
+ * heap_beginscan_strat offers an extended API that lets the caller control
+ * whether a nondefault buffer access strategy can be used, and whether
+ * syncscan can be chosen (possibly resulting in the scan not starting from
+ * block zero).  Both of these default to true with plain heap_beginscan.
+ *
+ * heap_beginscan_bm is an alternative entry point for setting up a
+ * TableScanDesc for a bitmap heap scan.  Although that scan technology is
+ * really quite unlike a standard seqscan, there is just enough commonality
+ * to make it worth using the same data structure.
+ *
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * TableScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
+ * ----------------
+ */
+static inline TableScanDesc
+table_beginscan(Relation relation, Snapshot snapshot,
+                 int nkeys, ScanKey key)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               true, true, true, false, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
+{
+   Oid         relid = RelationGetRelid(relation);
+   Snapshot    snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
+
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               true, true, true, false, false, true);
+}
+
+static inline TableScanDesc
+table_beginscan_strat(Relation relation, Snapshot snapshot,
+                       int nkeys, ScanKey key,
+                       bool allow_strat, bool allow_sync)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               allow_strat, allow_sync, true,
+                                               false, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_bm(Relation relation, Snapshot snapshot,
+                    int nkeys, ScanKey key)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               false, false, true, true, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_sampling(Relation relation, Snapshot snapshot,
+                          int nkeys, ScanKey key,
+                          bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               allow_strat, allow_sync, allow_pagemode,
+                                               false, true, false);
+}
+
+static inline TableScanDesc
+table_beginscan_analyze(Relation relation)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, NULL, 0, NULL, NULL,
+                                               true, false, true,
+                                               false, true, false);
+}
+
+
+/* ----------------
+ *     heap_rescan     - restart a relation scan
+ * ----------------
+ */
+static inline void
+table_rescan(TableScanDesc scan,
+              ScanKey key)
+{
+   scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, false, false, false, false);
+}
+
+/* ----------------
+ *     heap_rescan_set_params  - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+static inline void
+table_rescan_set_params(TableScanDesc scan, ScanKey key,
+                         bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+   scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, true,
+                                            allow_strat, allow_sync, (allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot)));
+}
+
+/* ----------------
+ *     heap_endscan    - end relation scan
+ *
+ *     See how to integrate with index scans.
+ *     Check handling if reldesc caching.
+ * ----------------
+ */
+static inline void
+table_endscan(TableScanDesc scan)
+{
+   scan->rs_rd->rd_tableamroutine->scan_end(scan);
+}
+
+
+/* ----------------
+ *     heap_update_snapshot
+ *
+ *     Update snapshot info in heap scan descriptor.
+ * ----------------
+ */
+static inline void
+table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
+{
+   scan->rs_rd->rd_tableamroutine->scan_update_snapshot(scan, snapshot);
+}
+
+
+static inline bool
+table_scan_bitmap_pagescan(TableScanDesc scan,
+                          TBMIterateResult *tbmres)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan(scan, tbmres);
+}
+
+static inline bool
+table_scan_bitmap_pagescan_next(TableScanDesc scan, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan_next(scan, slot);
+}
+
+static inline bool
+table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_sample_next_block(scan, scanstate);
+}
+
+static inline bool
+table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_sample_next_tuple(scan, scanstate, slot);
+}
+
+static inline void
+table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+   scan->rs_rd->rd_tableamroutine->scan_analyze_next_block(scan, blockno, bstrategy);
+}
+
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_analyze_next_tuple(scan, OldestXmin, liverows, deadrows, slot);
+}
+
+static inline TupleTableSlot *
+table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+   slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
+   return sscan->rs_rd->rd_tableamroutine->scan_getnextslot(sscan, direction, slot);
+}
+
+static inline IndexFetchTableData*
+table_begin_index_fetch_table(Relation rel)
+{
+   return rel->rd_tableamroutine->begin_index_fetch(rel);
+}
+
+static inline void
+table_reset_index_fetch_table(struct IndexFetchTableData* scan)
+{
+   scan->rel->rd_tableamroutine->reset_index_fetch(scan);
+}
+
+static inline void
+table_end_index_fetch_table(struct IndexFetchTableData* scan)
+{
+   scan->rel->rd_tableamroutine->end_index_fetch(scan);
+}
+
+/*
+ * Insert a tuple from a slot into table AM routine
+ */
+static inline void
+table_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
+              int options, BulkInsertState bistate)
+{
+   relation->rd_tableamroutine->tuple_insert(relation, slot, cid, options,
+                                             bistate);
+}
+
+static inline void
+table_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid,
+                        int options, BulkInsertState bistate, uint32 specToken)
+{
+   relation->rd_tableamroutine->tuple_insert_speculative(relation, slot, cid, options,
+                                                         bistate, specToken);
+}
+
+static inline void
+table_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken,
+                               bool succeeded)
+{
+   return relation->rd_tableamroutine->tuple_complete_speculative(relation, slot, specToken, succeeded);
+}
+
+/*
+ * Delete a tuple from tid using table AM routine
+ */
+static inline HTSU_Result
+table_delete(Relation relation, ItemPointer tid, CommandId cid,
+            Snapshot snapshot, Snapshot crosscheck, bool wait,
+            HeapUpdateFailureData *hufd, bool changingPart)
+{
+   return relation->rd_tableamroutine->tuple_delete(relation, tid, cid,
+                                                    snapshot, crosscheck,
+                                                    wait, hufd, changingPart);
+}
+
+/*
+ * update a tuple from tid using table AM routine
+ */
+static inline HTSU_Result
+table_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+            CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait,
+            HeapUpdateFailureData *hufd, LockTupleMode *lockmode,
+            bool *update_indexes)
+{
+   return relation->rd_tableamroutine->tuple_update(relation, otid, slot,
+                                                    cid, snapshot, crosscheck,
+                                                    wait, hufd,
+                                                    lockmode, update_indexes);
+}
+
+static inline bool
+table_fetch_follow(struct IndexFetchTableData *scan,
+                  ItemPointer tid,
+                  Snapshot snapshot,
+                  TupleTableSlot *slot,
+                  bool *call_again, bool *all_dead)
+{
+
+   return scan->rel->rd_tableamroutine->tuple_fetch_follow(scan, tid, snapshot,
+                                                          slot, call_again,
+                                                          all_dead);
+}
+
+static inline bool
+table_fetch_follow_check(Relation rel,
+                        ItemPointer tid,
+                        Snapshot snapshot,
+                        bool *all_dead)
+{
+   IndexFetchTableData *scan = table_begin_index_fetch_table(rel);
+   TupleTableSlot *slot = table_gimmegimmeslot(rel, NULL);
+   bool call_again = false;
+   bool found;
+
+   found = table_fetch_follow(scan, tid, snapshot, slot, &call_again, all_dead);
+
+   table_end_index_fetch_table(scan);
+   ExecDropSingleTupleTableSlot(slot);
+
+   return found;
+}
+
+/*
+ * table_multi_insert  - insert multiple tuple into a table
+ */
+static inline void
+table_multi_insert(Relation relation, TupleTableSlot **slots, int nslots,
+                    CommandId cid, int options, BulkInsertState bistate)
+{
+   relation->rd_tableamroutine->multi_insert(relation, slots, nslots,
+                                          cid, options, bistate);
+}
+
+static inline void
+table_get_latest_tid(Relation relation,
+                      Snapshot snapshot,
+                      ItemPointer tid)
+{
+   relation->rd_tableamroutine->tuple_get_latest_tid(relation, snapshot, tid);
+}
+
+
+static inline void
+table_vacuum_rel(Relation rel, int options,
+            struct VacuumParams *params, BufferAccessStrategy bstrategy)
+{
+   rel->rd_tableamroutine->relation_vacuum(rel, options, params, bstrategy);
+}
+
+
+/* XXX: Move arguments to struct? */
+static inline void
+table_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex,
+                    bool use_sort,
+                    TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                    double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
+{
+   OldHeap->rd_tableamroutine->relation_copy_for_cluster(OldHeap, NewHeap, OldIndex,
+                                                         use_sort,
+                                                         OldestXmin, FreezeXid, MultiXactCutoff,
+                                                     num_tuples, tups_vacuumed, tups_recently_dead);
+}
+
+/*
+ * table_sync      - sync a heap, for use when no WAL has been written
+ */
+static inline void
+table_sync(Relation rel)
+{
+   rel->rd_tableamroutine->relation_sync(rel);
+}
+
+static inline double
+table_index_build_scan(Relation heapRelation,
+                      Relation indexRelation,
+                      IndexInfo *indexInfo,
+                      bool allow_sync,
+                      IndexBuildCallback callback,
+                      void *callback_state,
+                      TableScanDesc scan)
+{
+   return heapRelation->rd_tableamroutine->index_build_range_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       allow_sync,
+       false,
+       0,
+       InvalidBlockNumber,
+       callback,
+       callback_state,
+       scan);
+}
+
+static inline void
+table_index_validate_scan(Relation heapRelation,
+                         Relation indexRelation,
+                         IndexInfo *indexInfo,
+                         Snapshot snapshot,
+                         struct ValidateIndexState *state)
+{
+   heapRelation->rd_tableamroutine->index_validate_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       snapshot,
+       state);
+}
+
+static inline double
+table_index_build_range_scan(Relation heapRelation,
+                            Relation indexRelation,
+                            IndexInfo *indexInfo,
+                            bool allow_sync,
+                            bool anyvisible,
+                            BlockNumber start_blockno,
+                            BlockNumber numblocks,
+                            IndexBuildCallback callback,
+                            void *callback_state,
+                            TableScanDesc scan)
+{
+   return heapRelation->rd_tableamroutine->index_build_range_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       allow_sync,
+       anyvisible,
+       start_blockno,
+       numblocks,
+       callback,
+       callback_state,
+       scan);
+}
+
+/*
+ * Return true iff tuple in slot satisfies the snapshot.
+ *
+ * Notes:
+ * Assumes slot's tuple is valid.
+ * Hint bits in the HeapTuple's t_infomask may be updated as a side effect;
+ * if so, the indicated buffer is marked dirty.
+ *
+ * XXX: Add _tuple_ to name?
+ */
+static inline bool
+table_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
+{
+   return rel->rd_tableamroutine->snapshot_satisfies(rel, slot, snapshot);
+}
+
+extern BlockNumber table_parallelscan_nextpage(TableScanDesc scan);
+extern void table_parallelscan_startblock_init(TableScanDesc scan);
+extern Size table_parallelscan_estimate(Snapshot snapshot);
+extern void table_parallelscan_initialize(ParallelTableScanDesc target,
+                                         Relation relation, Snapshot snapshot);
+extern void table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan);
+
+extern const TableAmRoutine * GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine * GetTableAmRoutineByAmId(Oid amoid);
+extern const TableAmRoutine * GetHeapamTableAmRoutine(void);
+
+extern bool check_default_table_access_method(char **newval, void **extra,
+                                   GucSource source);
+
+#endif     /* TABLEAM_H */
index 3ecd4737e5decf00ed55d3ad300c68c482508f12..3947d7b915baba7d441f2c580b615c0b59c4831d 100644 (file)
@@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node,
                                          int nparams,
                                          uint32 seed);
 
-typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks);
 
 typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
                                                  BlockNumber blockno,
index 56a341a62228e183ac724c2468fefe6bef24a8f8..cedc19fbcf5a8e0f46030ae07fb567065d70682d 100644 (file)
@@ -45,6 +45,7 @@ extern Relation heap_create(const char *relname,
            Oid reltablespace,
            Oid relid,
            Oid relfilenode,
+           Oid accessmtd,
            TupleDesc tupDesc,
            char relkind,
            char relpersistence,
@@ -59,6 +60,7 @@ extern Oid heap_create_with_catalog(const char *relname,
                         Oid reltypeid,
                         Oid reloftypeid,
                         Oid ownerid,
+                        Oid accessmtd,
                         TupleDesc tupdesc,
                         List *cooked_constraints,
                         char relkind,
index 35a29f3498f1b423d4ba33e848af5ef4d339d625..5e53001d78cfa1857e5dcac08790ac167aae484f 100644 (file)
@@ -20,7 +20,7 @@
 
 #define DEFAULT_INDEX_TYPE "btree"
 
-/* Typedef for callback function for IndexBuildHeapScan */
+/* Typedef for callback function for table_index_build_scan */
 typedef void (*IndexBuildCallback) (Relation index,
                                    HeapTuple htup,
                                    Datum *values,
@@ -37,6 +37,15 @@ typedef enum
    INDEX_DROP_SET_DEAD
 } IndexStateFlagsAction;
 
+/* state info for validate_index bulkdelete callback */
+typedef struct ValidateIndexState
+{
+   Tuplesortstate *tuplesort;  /* for sorting the index TIDs */
+   /* statistics (for debug purposes only): */
+   double      htups,
+               itups,
+               tups_inserted;
+} ValidateIndexState;
 
 extern void index_check_primary_key(Relation heapRel,
                        IndexInfo *indexInfo,
@@ -111,24 +120,6 @@ extern void index_build(Relation heapRelation,
            bool isreindex,
            bool parallel);
 
-extern double IndexBuildHeapScan(Relation heapRelation,
-                  Relation indexRelation,
-                  IndexInfo *indexInfo,
-                  bool allow_sync,
-                  IndexBuildCallback callback,
-                  void *callback_state,
-                  HeapScanDesc scan);
-extern double IndexBuildHeapRangeScan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       bool allow_sync,
-                       bool anyvisible,
-                       BlockNumber start_blockno,
-                       BlockNumber end_blockno,
-                       IndexBuildCallback callback,
-                       void *callback_state,
-                       HeapScanDesc scan);
-
 extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
 
 extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);
@@ -155,4 +146,45 @@ extern void RestoreReindexState(void *reindexstate);
 
 extern void IndexSetParentIndex(Relation idx, Oid parentOid);
 
+
+/*
+ * itemptr_encode - Encode ItemPointer as int64/int8
+ *
+ * This representation must produce values encoded as int64 that sort in the
+ * same order as their corresponding original TID values would (using the
+ * default int8 opclass to produce a result equivalent to the default TID
+ * opclass).
+ *
+ * As noted in validate_index(), this can be significantly faster.
+ */
+static inline int64
+itemptr_encode(ItemPointer itemptr)
+{
+   BlockNumber block = ItemPointerGetBlockNumber(itemptr);
+   OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
+   int64       encoded;
+
+   /*
+    * Use the 16 least significant bits for the offset.  32 adjacent bits are
+    * used for the block number.  Since remaining bits are unused, there
+    * cannot be negative encoded values (We assume a two's complement
+    * representation).
+    */
+   encoded = ((uint64) block << 16) | (uint16) offset;
+
+   return encoded;
+}
+
+/*
+ * itemptr_decode - Decode int64/int8 representation back to ItemPointer
+ */
+static inline void
+itemptr_decode(ItemPointer itemptr, int64 encoded)
+{
+   BlockNumber block = (BlockNumber) (encoded >> 16);
+   OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
+
+   ItemPointerSet(itemptr, block, offset);
+}
+
 #endif                         /* INDEX_H */
index bef53a319af46a379b93b763bee0998a32ddb99e..0f44c420940c72f35e2ca807087e3e1a56bad11e 100644 (file)
@@ -30,5 +30,8 @@
 { oid => '3580', oid_symbol => 'BRIN_AM_OID',
   descr => 'block range index (BRIN) access method',
   amname => 'brin', amhandler => 'brinhandler', amtype => 'i' },
+{ oid => '4001', oid_symbol => 'HEAP_TABLE_AM_OID',
+  descr => 'heap table access method',
+  amname => 'heap', amhandler => 'heap_tableam_handler', amtype => 't' },
 
 ]
index 57d65f830fc2ce826b73d56b92a570a79e14ebc0..6db7e4b21e94f0284de37fb40660fe66f3363193 100644 (file)
@@ -53,6 +53,7 @@ typedef FormData_pg_am *Form_pg_am;
  * Allowed values for amtype
  */
 #define AMTYPE_INDEX                   'i' /* index access method */
+#define AMTYPE_TABLE                   't' /* table access method */
 
 #endif                         /* EXPOSE_TO_CLIENT_CODE */
 
index 5a884a852b531538a1853e747483afe42c2021ee..b43c37ff14579306d4a4855d9f26853c295133d5 100644 (file)
@@ -22,7 +22,7 @@
 
 { oid => '1247',
   relname => 'pg_type', relnamespace => 'PGNSP', reltype => '71',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
   reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
   reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
   relpersistence => 'p', relkind => 'r', relnatts => '31', relchecks => '0',
@@ -33,7 +33,7 @@
   reloptions => '_null_', relpartbound => '_null_' },
 { oid => '1249',
   relname => 'pg_attribute', relnamespace => 'PGNSP', reltype => '75',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
   reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
   reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
   relpersistence => 'p', relkind => 'r', relnatts => '24', relchecks => '0',
@@ -44,7 +44,7 @@
   reloptions => '_null_', relpartbound => '_null_' },
 { oid => '1255',
   relname => 'pg_proc', relnamespace => 'PGNSP', reltype => '81',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
   reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
   reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
   relpersistence => 'p', relkind => 'r', relnatts => '29', relchecks => '0',
@@ -55,7 +55,7 @@
   reloptions => '_null_', relpartbound => '_null_' },
 { oid => '1259',
   relname => 'pg_class', relnamespace => 'PGNSP', reltype => '83',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
   reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
   reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
   relpersistence => 'p', relkind => 'r', relnatts => '33', relchecks => '0',
index 84e63c6d06a2ad0de1d19c68655a82848a3e7e6c..873a5b8d22b2f0a3d5215897bbc8ad0150920492 100644 (file)
@@ -36,7 +36,7 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
    Oid         reloftype;      /* OID of entry in pg_type for underlying
                                 * composite type */
    Oid         relowner;       /* class owner */
-   Oid         relam;          /* index access method; 0 if not an index */
+   Oid         relam;          /* access method; 0 if not a table / index */
    Oid         relfilenode;    /* identifier of physical storage file */
 
    /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
index f79fcfe029f1545599759c0f22e89a952bc86ea1..112fe90ada9c53039e4140878f61f1437120b891 100644 (file)
   proname => 'int4', prorettype => 'int4', proargtypes => 'float4',
   prosrc => 'ftoi4' },
 
+# Table access method handlers
+{ oid => '4002', oid_symbol => 'HEAP_TABLE_AM_HANDLER_OID', 
+  descr => 'row-oriented heap table access method handler',
+  proname => 'heap_tableam_handler', provolatile => 'v', prorettype => 'table_am_handler',
+  proargtypes => 'internal', prosrc => 'heap_tableam_handler' },
+
 # Index access method handlers
 { oid => '330', descr => 'btree index access method handler',
   proname => 'bthandler', provolatile => 'v', prorettype => 'index_am_handler',
 { oid => '3312', descr => 'I/O',
   proname => 'tsm_handler_out', prorettype => 'cstring',
   proargtypes => 'tsm_handler', prosrc => 'tsm_handler_out' },
+{ oid => '3425', descr => 'I/O',
+  proname => 'table_am_handler_in', proisstrict => 'f',
+  prorettype => 'table_am_handler', proargtypes => 'cstring',
+  prosrc => 'table_am_handler_in' },
+{ oid => '3426', descr => 'I/O',
+  proname => 'table_am_handler_out', prorettype => 'cstring',
+  proargtypes => 'table_am_handler', prosrc => 'table_am_handler_out' },
 
 # tablesample method handlers
 { oid => '3313', descr => 'BERNOULLI tablesample method handler',
index d295eae1b99aa3994034f60b9e7a0cf8c2bdd752..f37856d2b4f52d4a7025a706bddd2c4a22735374 100644 (file)
   typcategory => 'P', typinput => 'tsm_handler_in',
   typoutput => 'tsm_handler_out', typreceive => '-', typsend => '-',
   typalign => 'i' },
+{ oid => '3998',
+  typname => 'table_am_handler', typlen => '4', typbyval => 't', typtype => 'p',
+  typcategory => 'P', typinput => 'table_am_handler_in',
+  typoutput => 'table_am_handler_out', typreceive => '-', typsend => '-',
+  typalign => 'i' },
 { oid => '3831',
   descr => 'pseudo-type representing a polymorphic base type that is a range',
   typname => 'anyrange', typlen => '-1', typbyval => 'f', typtype => 'p',
index 1031448c1451b7d60e466caa4c5539d2985ef144..0f02baee14083009fa509ca15c1781afc5a1359c 100644 (file)
@@ -35,8 +35,8 @@ typedef struct TriggerData
    HeapTuple   tg_trigtuple;
    HeapTuple   tg_newtuple;
    Trigger    *tg_trigger;
-   Buffer      tg_trigtuplebuf;
-   Buffer      tg_newtuplebuf;
+   TupleTableSlot *tg_trigslot;
+   TupleTableSlot *tg_newslot;
    Tuplestorestate *tg_oldtable;
    Tuplestorestate *tg_newtable;
 } TriggerData;
@@ -186,15 +186,15 @@ extern void ExecBSInsertTriggers(EState *estate,
 extern void ExecASInsertTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecBRInsertTriggers(EState *estate,
+extern bool ExecBRInsertTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     TupleTableSlot *slot);
 extern void ExecARInsertTriggers(EState *estate,
                     ResultRelInfo *relinfo,
-                    HeapTuple trigtuple,
+                    TupleTableSlot *slot,
                     List *recheckIndexes,
                     TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecIRInsertTriggers(EState *estate,
+extern bool ExecIRInsertTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     TupleTableSlot *slot);
 extern void ExecBSDeleteTriggers(EState *estate,
@@ -221,7 +221,7 @@ extern void ExecBSUpdateTriggers(EState *estate,
 extern void ExecASUpdateTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecBRUpdateTriggers(EState *estate,
+extern bool ExecBRUpdateTriggers(EState *estate,
                     EPQState *epqstate,
                     ResultRelInfo *relinfo,
                     ItemPointer tupleid,
@@ -231,10 +231,10 @@ extern void ExecARUpdateTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     ItemPointer tupleid,
                     HeapTuple fdw_trigtuple,
-                    HeapTuple newtuple,
+                    TupleTableSlot *slot,
                     List *recheckIndexes,
                     TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecIRUpdateTriggers(EState *estate,
+extern bool ExecIRUpdateTriggers(EState *estate,
                     ResultRelInfo *relinfo,
                     HeapTuple trigtuple,
                     TupleTableSlot *slot);
@@ -258,9 +258,9 @@ extern bool AfterTriggerPendingOnRel(Oid relid);
  * in utils/adt/ri_triggers.c
  */
 extern bool RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
-                             HeapTuple old_row, HeapTuple new_row);
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot);
 extern bool RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
-                             HeapTuple old_row, HeapTuple new_row);
+                             TupleTableSlot *old_slot, TupleTableSlot  *new_slot);
 extern bool RI_Initial_Check(Trigger *trigger,
                 Relation fk_rel, Relation pk_rel);
 
index 2feec628c03212fb07105fef73d8ff0e5ff398ab..ded4baf00475db575acb24efb30cd7dd9b6d2427 100644 (file)
@@ -183,19 +183,14 @@ extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
 extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo);
 extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok);
 extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
+extern TupleTableSlot *EvalPlanQualSlot(EPQState *epqstate,
+            Relation relation, Index rti);
 extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
-            Relation relation, Index rti, int lockmode,
-            ItemPointer tid, TransactionId priorXmax);
-extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation,
-                 int lockmode, LockWaitPolicy wait_policy, ItemPointer tid,
-                 TransactionId priorXmax);
+            Relation relation, Index rti, TupleTableSlot *slot);
 extern void EvalPlanQualInit(EPQState *epqstate, EState *estate,
                 Plan *subplan, List *auxrowmarks, int epqParam);
 extern void EvalPlanQualSetPlan(EPQState *epqstate,
                    Plan *subplan, List *auxrowmarks);
-extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
-                    HeapTuple tuple);
-extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
 
 #define EvalPlanQualSetSlot(epqstate, slot)  ((epqstate)->origslot = (slot))
 extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
@@ -486,6 +481,10 @@ extern void ReScanExprContext(ExprContext *econtext);
 
 extern ExprContext *MakePerTupleExprContext(EState *estate);
 
+extern TupleTableSlot *ExecTriggerGetOldSlot(EState *estate, Relation rel);
+extern TupleTableSlot *ExecTriggerGetNewSlot(EState *estate, Relation rel);
+extern TupleTableSlot *ExecTriggerGetReturnSlot(EState *estate, Relation rel);
+
 /* Get an EState's per-output-tuple exprcontext, making it if first use */
 #define GetPerTupleExprContext(estate) \
    ((estate)->es_per_tuple_exprcontext ? \
@@ -554,9 +553,8 @@ extern int  ExecCleanTargetListLength(List *targetlist);
  */
 extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
 extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
-extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid,
-                     EState *estate, bool noDupErr, bool *specConflict,
-                     List *arbiterIndexes);
+extern List *ExecInsertIndexTuples(TupleTableSlot *slot, EState *estate, bool noDupErr,
+                     bool *specConflict, List *arbiterIndexes);
 extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate,
                          ItemPointer conflictTid, List *arbiterIndexes);
 extern void check_exclusion_constraint(Relation heap, Relation index,
index d2616968ac4c21ce6e60ca33aabc9a98cebec687..d17d0de64f032d3b4f255333095b3adb7f8f9b8e 100644 (file)
@@ -25,7 +25,7 @@ typedef struct SPITupleTable
    uint64      alloced;        /* # of alloced vals */
    uint64      free;           /* # of free vals */
    TupleDesc   tupdesc;        /* tuple descriptor */
-   HeapTuple  *vals;           /* tuples */
+   HeapTuple *vals;            /* tuples */
    slist_node  next;           /* link for internal bookkeeping */
    SubTransactionId subid;     /* subxact in which tuptable was created */
 } SPITupleTable;
@@ -122,7 +122,7 @@ extern CachedPlan *SPI_plan_get_cached_plan(SPIPlanPtr plan);
 extern HeapTuple SPI_copytuple(HeapTuple tuple);
 extern HeapTupleHeader SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc);
 extern HeapTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts,
-               int *attnum, Datum *Values, const char *Nulls);
+                                   int *attnum, Datum *Values, const char *Nulls);
 extern int SPI_fnumber(TupleDesc tupdesc, const char *fname);
 extern char *SPI_fname(TupleDesc tupdesc, int fnumber);
 extern char *SPI_getvalue(HeapTuple tuple, TupleDesc tupdesc, int fnumber);
index 0fe36392525bae4678654e7605bfac25059b1400..8bcbe2fbff60ae10499828f40a336fd68314bcdd 100644 (file)
@@ -27,6 +27,6 @@ extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle);
 extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle);
 extern void DestroyTupleQueueReader(TupleQueueReader *reader);
 extern HeapTuple TupleQueueReaderNext(TupleQueueReader *reader,
-                    bool nowait, bool *done);
+                                        bool nowait, bool *done);
 
 #endif                         /* TQUEUE_H */
index 5c390a9669675c4d01f6016c9f3841968e389077..c87689b3dda9aad7ebb9bfde39fe4642a02d8061 100644 (file)
@@ -15,6 +15,7 @@
 #define TUPTABLE_H
 
 #include "access/htup.h"
+#include "access/sysattr.h"
 #include "access/tupdesc.h"
 #include "storage/buf.h"
 
@@ -125,6 +126,10 @@ typedef struct TupleTableSlot
 #define FIELDNO_TUPLETABLESLOT_ISNULL 6
    bool       *tts_isnull;     /* current per-attribute isnull flags */
    MemoryContext tts_mcxt;     /* slot itself is in this context */
+
+   ItemPointerData tts_tid;    /* XXX describe */
+   Oid     tts_tableOid;   /* XXX describe */
+
 } TupleTableSlot;
 
 /* routines for a TupleTableSlot implementation */
@@ -238,6 +243,7 @@ typedef struct VirtualTupleTableSlot
    char       *data;       /* data for materialized slots */
 } VirtualTupleTableSlot;
 
+#include <access/htup_details.h>
 typedef struct HeapTupleTableSlot
 {
    TupleTableSlot base;
@@ -246,6 +252,7 @@ typedef struct HeapTupleTableSlot
    HeapTuple   tuple;      /* physical tuple */
 #define FIELDNO_HEAPTUPLETABLESLOT_OFF 2
    uint32      off;        /* saved state for slot_deform_heap_tuple */
+   HeapTupleData tupdata;
 } HeapTupleTableSlot;
 
 /* heap tuple residing in a buffer */
@@ -323,6 +330,9 @@ extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum,
 extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
 
 
+// FIXME: remove
+extern bool ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2);
+
 #ifndef FRONTEND
 
 /*
@@ -395,6 +405,12 @@ slot_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull)
 {
    AssertArg(attnum < 0);      /* caller error */
 
+   if (attnum == TableOidAttributeNumber)
+   {
+       *isnull = false;
+       return slot->tts_tableOid;
+   }
+
    /* Fetch the system attribute from the underlying tuple. */
    return slot->tts_ops->getsysattr(slot, attnum, isnull);
 }
index c14eb546c64136523ee3755dd557302203d3d0a3..508b0eece84dd6a8635eb9f138d57b5593a5b69e 100644 (file)
@@ -121,10 +121,11 @@ typedef void (*EndDirectModify_function) (ForeignScanState *node);
 typedef RowMarkType (*GetForeignRowMarkType_function) (RangeTblEntry *rte,
                                                       LockClauseStrength strength);
 
-typedef HeapTuple (*RefetchForeignRow_function) (EState *estate,
-                                                ExecRowMark *erm,
-                                                Datum rowid,
-                                                bool *updated);
+typedef TupleTableSlot *(*RefetchForeignRow_function) (EState *estate,
+                                                      ExecRowMark *erm,
+                                                      Datum rowid,
+                                                      TupleTableSlot *slot,
+                                                      bool *updated);
 
 typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
                                             struct ExplainState *es);
index 5ed0f40f6970fbfa58ad27669064a204214fd776..359ed0a86cd34487c1230c1f38d67734fc74061d 100644 (file)
@@ -525,7 +525,7 @@ typedef struct EState
 
    /* Stuff used for firing triggers: */
    List       *es_trig_target_relations;   /* trigger-only ResultRelInfos */
-   TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */
+   TupleTableSlot *es_trig_return_slot; /* for trigger output tuples */
    TupleTableSlot *es_trig_oldtup_slot;    /* for TriggerEnabled */
    TupleTableSlot *es_trig_newtup_slot;    /* for TriggerEnabled */
 
@@ -568,7 +568,8 @@ typedef struct EState
     * remember if the tuple has been returned already.  Arrays are of size
     * es_range_table_size and are indexed by scan node scanrelid - 1.
     */
-   HeapTuple  *es_epqTuple;    /* array of EPQ substitute tuples */
+   //TableTuple *es_epqTuple;  /* array of EPQ substitute tuples */
+   TupleTableSlot **es_epqTupleSlot;
    bool       *es_epqTupleSet; /* true if EPQ tuple is provided */
    bool       *es_epqScanDone; /* true if EPQ tuple has been fetched */
 
@@ -1268,7 +1269,7 @@ typedef struct ScanState
 {
    PlanState   ps;             /* its first field is NodeTag */
    Relation    ss_currentRelation;
-   HeapScanDesc ss_currentScanDesc;
+   TableScanDesc ss_currentScanDesc;
    TupleTableSlot *ss_ScanTupleSlot;
 } ScanState;
 
@@ -1298,6 +1299,9 @@ typedef struct SampleScanState
    bool        use_pagemode;   /* use page-at-a-time visibility checking? */
    bool        begun;          /* false means need to call BeginSampleScan */
    uint32      seed;           /* random seed */
+   int64       donetuples;     /* number of tuples already returned */
+   bool        haveblock;      /* has a block for sampling been determined */
+   bool        done;           /* exhausted all tuples? */
 } SampleScanState;
 
 /*
@@ -1526,6 +1530,7 @@ typedef struct BitmapHeapScanState
    Buffer      pvmbuffer;
    long        exact_pages;
    long        lossy_pages;
+   int         return_empty_tuples;
    TBMIterator *prefetch_iterator;
    int         prefetch_pages;
    int         prefetch_target;
@@ -2256,7 +2261,7 @@ typedef struct LockRowsState
    PlanState   ps;             /* its first field is NodeTag */
    List       *lr_arowMarks;   /* List of ExecAuxRowMarks */
    EPQState    lr_epqstate;    /* for evaluating EvalPlanQual rechecks */
-   HeapTuple  *lr_curtuples;   /* locked tuples (one entry per RT entry) */
+   TupleTableSlot **lr_curtuples; /* locked tuples (one entry per RT entry) */
    int         lr_ntables;     /* length of lr_curtuples[] array */
 } LockRowsState;
 
index 24afd6efd41dc776930da730bb8613d82077c117..5f579781a12a62a8a20d71c1e810594174e7b9c5 100644 (file)
@@ -43,4 +43,9 @@ typedef enum LockWaitPolicy
    LockWaitError
 } LockWaitPolicy;
 
+/* Follow tuples whose update is in progress if lock modes don't conflict  */
+#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS    (1 << 0)
+/* Follow update chain and lock lastest version of tuple */
+#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION      (1 << 1)
+
 #endif                         /* LOCKOPTIONS_H */
index cac6ff0eda49ffd1cefe1f605d43a140e82218b7..5391c41d9e859d12558c91426f5525754653673b 100644 (file)
@@ -503,6 +503,7 @@ typedef enum NodeTag
    T_InlineCodeBlock,          /* in nodes/parsenodes.h */
    T_FdwRoutine,               /* in foreign/fdwapi.h */
    T_IndexAmRoutine,           /* in access/amapi.h */
+   T_TableAmRoutine,           /* in access/tableam.h */
    T_TsmRoutine,               /* in access/tsmapi.h */
    T_ForeignKeyCacheInfo,      /* in utils/rel.h */
    T_CallContext               /* in nodes/parsenodes.h */
index e5bdc1cec576d052f6ea436f24ccde802605c5fd..bb20fdd488efe395a4f8ae78a1e61fa8b2966e9c 100644 (file)
@@ -2032,6 +2032,7 @@ typedef struct CreateStmt
    List       *options;        /* options from WITH clause */
    OnCommitAction oncommit;    /* what do we do at COMMIT? */
    char       *tablespacename; /* table space to use, or NULL */
+   char       *accessMethod;   /* table access method */
    bool        if_not_exists;  /* just do nothing if it already exists? */
 } CreateStmt;
 
index b886ed35349b93e6292d09afef327d00ff8a2d03..0bdc0bbf506ba3fab2b2b8e1c6e46556c71fd15e 100644 (file)
@@ -111,6 +111,7 @@ typedef struct IntoClause
 
    RangeVar   *rel;            /* target relation name */
    List       *colNames;       /* column names to assign, or NIL */
+   char       *accessMethod;   /* table access method */
    List       *options;        /* options from WITH clause */
    OnCommitAction onCommit;    /* what do we do at COMMIT? */
    char       *tableSpaceName; /* table space to use, or NULL */
index 31532e97690f69b0dd9c38aaa69158958b7392a2..f6b829da2cdc36918bb648a84787e1b7ec49888a 100644 (file)
@@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator;
 typedef struct TBMSharedIterator TBMSharedIterator;
 
 /* Result structure for tbm_iterate */
-typedef struct
+typedef struct TBMIterateResult
 {
    BlockNumber blockno;        /* page number containing tuples */
    int         ntuples;        /* -1 indicates lossy result */
index 3cce3906a0e6de9b2b5ed1b9d98c41785a88d9e1..95915bdc92d53b6ce2cf8efd1ce1285e1e10ae4d 100644 (file)
@@ -20,7 +20,6 @@
 #include "storage/relfilenode.h"
 #include "utils/relcache.h"
 #include "utils/snapmgr.h"
-#include "utils/tqual.h"
 
 typedef void *Block;
 
@@ -268,8 +267,8 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
 
    if (old_snapshot_threshold >= 0
        && (snapshot) != NULL
-       && ((snapshot)->satisfies == HeapTupleSatisfiesMVCC
-           || (snapshot)->satisfies == HeapTupleSatisfiesToast)
+       && ((snapshot)->visibility_type == MVCC_VISIBILITY
+           || (snapshot)->visibility_type == TOAST_VISIBILITY)
        && !XLogRecPtrIsInvalid((snapshot)->lsn)
        && PageGetLSN(page) > (snapshot)->lsn)
        TestForOldSnapshot_impl(snapshot, relation);
index 2217081dcc35acf4aa1f39a16c827dbd584fa1de..9187cbbcf3bd29616dd19156ff995613a95ff0eb 100644 (file)
@@ -131,6 +131,12 @@ typedef struct RelationData
    /* use "struct" here to avoid needing to include htup.h: */
    struct HeapTupleData *rd_indextuple;    /* all of pg_index tuple */
 
+   /*
+    * Underlying table access method support
+    */
+   Oid         rd_tableamhandler;  /* OID of table AM handler function */
+   const struct TableAmRoutine *rd_tableamroutine; /* table AM's API struct */
+
    /*
     * index access support info (used only for an index relation)
     *
@@ -432,6 +438,12 @@ typedef struct ViewOptions
  */
 #define RelationGetDescr(relation) ((relation)->rd_att)
 
+/*
+ * RelationGetTableamRoutine
+ *     Returns the table AM routine for a relation.
+ */
+#define RelationGettableamRoutine(relation) ((relation)->rd_tableamroutine)
+
 /*
  * RelationGetRelationName
  *     Returns the rel's name.
index a99d6b6681dbe9ae420ef5107efad2f26528f720..de5b096bf7d8a8e2fe40ba7993838b49c6bc6c97 100644 (file)
@@ -76,6 +76,8 @@ extern void RelationInitIndexAccessInfo(Relation relation);
 struct PublicationActions;
 extern struct PublicationActions *GetRelationPublicationActions(Relation relation);
 
+extern void RelationInitTableAccessMethod(Relation relation);
+
 /*
  * Routines to support ereport() reports of relation-related errors
  */
@@ -98,6 +100,7 @@ extern Relation RelationBuildLocalRelation(const char *relname,
                           Oid relnamespace,
                           TupleDesc tupDesc,
                           Oid relid,
+                          Oid accessmtd,
                           Oid relfilenode,
                           Oid reltablespace,
                           bool shared_relation,
index a8a5a8f4c076399345be7a24782b098d4567c5be..77e25fb5615b0df8f4856ccf292c8da640a177a8 100644 (file)
 #include "lib/pairingheap.h"
 #include "storage/buf.h"
 
+typedef enum tuple_visibility_type
+{
+   MVCC_VISIBILITY = 0,        /* HeapTupleSatisfiesMVCC */
+   SELF_VISIBILITY,            /* HeapTupleSatisfiesSelf */
+   ANY_VISIBILITY,             /* HeapTupleSatisfiesAny */
+   TOAST_VISIBILITY,           /* HeapTupleSatisfiesToast */
+   DIRTY_VISIBILITY,           /* HeapTupleSatisfiesDirty */
+   HISTORIC_MVCC_VISIBILITY,   /* HeapTupleSatisfiesHistoricMVCC */
+   NON_VACUUMABLE_VISIBILTY    /* HeapTupleSatisfiesNonVacuumable */
+}          tuple_visibility_type;
 
 typedef struct SnapshotData *Snapshot;
 
@@ -52,7 +62,7 @@ typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup,
  */
 typedef struct SnapshotData
 {
-   SnapshotSatisfiesFunc satisfies;    /* tuple test function */
+   tuple_visibility_type visibility_type;  /* tuple visibility test type */
 
    /*
     * The remaining fields are used only for MVCC snapshots, and are normally
@@ -124,6 +134,7 @@ typedef enum
    HeapTupleInvisible,
    HeapTupleSelfUpdated,
    HeapTupleUpdated,
+   HeapTupleDeleted,
    HeapTupleBeingUpdated,
    HeapTupleWouldBlock         /* can be returned by heap_tuple_lock */
 } HTSU_Result;
index d3b6e99bb4ff0d7ae930a2f8db4b3ce6602eb679..1fe9cc64024617273ff6f6c7aa33b7446c067272 100644 (file)
@@ -18,7 +18,6 @@
 #include "utils/snapshot.h"
 #include "access/xlogdefs.h"
 
-
 /* Static variables representing various special snapshot semantics */
 extern PGDLLIMPORT SnapshotData SnapshotSelfData;
 extern PGDLLIMPORT SnapshotData SnapshotAnyData;
@@ -29,60 +28,8 @@ extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
 
 /* This macro encodes the knowledge of which snapshots are MVCC-safe */
 #define IsMVCCSnapshot(snapshot)  \
-   ((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \
-    (snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC)
-
-/*
- * HeapTupleSatisfiesVisibility
- *     True iff heap tuple satisfies a time qual.
- *
- * Notes:
- * Assumes heap tuple is valid.
- * Beware of multiple evaluations of snapshot argument.
- * Hint bits in the HeapTuple's t_infomask may be updated as a side effect;
- * if so, the indicated buffer is marked dirty.
- */
-#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer) \
-   ((*(snapshot)->satisfies) (tuple, snapshot, buffer))
-
-/* Result codes for HeapTupleSatisfiesVacuum */
-typedef enum
-{
-   HEAPTUPLE_DEAD,             /* tuple is dead and deletable */
-   HEAPTUPLE_LIVE,             /* tuple is live (committed, no deleter) */
-   HEAPTUPLE_RECENTLY_DEAD,    /* tuple is dead, but not deletable yet */
-   HEAPTUPLE_INSERT_IN_PROGRESS,   /* inserting xact is still in progress */
-   HEAPTUPLE_DELETE_IN_PROGRESS    /* deleting xact is still in progress */
-} HTSV_Result;
-
-/* These are the "satisfies" test routines for the various snapshot types */
-extern bool HeapTupleSatisfiesMVCC(HeapTuple htup,
-                      Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesSelf(HeapTuple htup,
-                      Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesAny(HeapTuple htup,
-                     Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesToast(HeapTuple htup,
-                       Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesDirty(HeapTuple htup,
-                       Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup,
-                               Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup,
-                              Snapshot snapshot, Buffer buffer);
-
-/* Special "satisfies" routines with different APIs */
-extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup,
-                        CommandId curcid, Buffer buffer);
-extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup,
-                        TransactionId OldestXmin, Buffer buffer);
-extern bool HeapTupleIsSurelyDead(HeapTuple htup,
-                     TransactionId OldestXmin);
-extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
-
-extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
-                    uint16 infomask, TransactionId xid);
-extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+   ((snapshot)->visibility_type == MVCC_VISIBILITY || \
+    (snapshot)->visibility_type == HISTORIC_MVCC_VISIBILITY)
 
 /*
  * To avoid leaking too much knowledge about reorderbuffer implementation
@@ -101,14 +48,14 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
  * local variable of type SnapshotData, and initialize it with this macro.
  */
 #define InitDirtySnapshot(snapshotdata)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
+   ((snapshotdata).visibility_type = DIRTY_VISIBILITY)
 
 /*
  * Similarly, some initialization is required for a NonVacuumable snapshot.
  * The caller must supply the xmin horizon to use (e.g., RecentGlobalXmin).
  */
 #define InitNonVacuumableSnapshot(snapshotdata, xmin_horizon)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesNonVacuumable, \
+   ((snapshotdata).visibility_type = NON_VACUUMABLE_VISIBILTY, \
     (snapshotdata).xmin = (xmin_horizon))
 
 /*
@@ -116,7 +63,7 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
  * to set lsn and whenTaken correctly to support snapshot_too_old.
  */
 #define InitToastSnapshot(snapshotdata, l, w)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesToast, \
+   ((snapshotdata).visibility_type = TOAST_VISIBILITY, \
     (snapshotdata).lsn = (l),                  \
     (snapshotdata).whenTaken = (w))
 
index 32908b66251f9e34f6f50babed5eca3ac3194d30..334096e9793bfd6dd39b58bfeaed486322279669 100644 (file)
@@ -105,7 +105,7 @@ typedef struct TuplesortInstrumentation
  *
  * The "cluster" API stores/sorts full HeapTuples including all visibility
  * info. The sort keys are specified by reference to a btree index that is
- * defined on the relation to be sorted.  Note that putheaptuple/getheaptuple
+ * defined on the relation to be sorted.  Note that putheaptupleslot/getheaptuple
  * go with this API, not the "begin_heap" one!
  *
  * The "index_btree" API stores/sorts IndexTuples (preserving all their
index 37fe6a7b277fe00117afa61750dd81857aae1086..a632d7f7bad9e3f2fc88f06e95b368ec7f26a792 100644 (file)
@@ -15,7 +15,7 @@ step s1u: UPDATE foo SET a=2 WHERE a=1;
 step s2d: DELETE FROM foo WHERE a=1; <waiting ...>
 step s1c: COMMIT;
 step s2d: <... completed>
-error in steps s1c s2d: ERROR:  tuple to be deleted was already moved to another partition due to concurrent update
+error in steps s1c s2d: ERROR:  tuple to be locked was already moved to another partition due to concurrent update
 step s2c: COMMIT;
 
 starting permutation: s1b s2b s2d s1u s2c s1c
index 47dd885c4e9b1ff8d1595197bc926c855b7d2fa5..e15ba33a0884758e2636fa6fff3e37a5f291d765 100644 (file)
@@ -99,3 +99,82 @@ HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 -- Drop access method cascade
 DROP ACCESS METHOD gist2 CASCADE;
 NOTICE:  drop cascades to index grect2ind2
+-- Create a heap2 table am handler with heapam handler
+CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler;
+SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2;
+ amname |      amhandler       | amtype 
+--------+----------------------+--------
+ heap   | heap_tableam_handler | t
+ heap2  | heap_tableam_handler | t
+(2 rows)
+
+CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2;
+INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series');
+SELECT count(*) FROM tbl_heap2;
+ count 
+-------
+    10
+(1 row)
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tbl_heap2';
+  relname  | relkind | amname 
+-----------+---------+--------
+ tbl_heap2 | r       | heap2
+(1 row)
+
+-- create table as using heap2
+CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblas_heap2';
+   relname   | relkind | amname 
+-------------+---------+--------
+ tblas_heap2 | r       | heap2
+(1 row)
+
+--
+-- select into doesn't support new syntax, so it should be
+-- default access method.
+--
+SELECT INTO tblselectinto_heap from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method')
+from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblselectinto_heap';
+      relname       | relkind | ?column? 
+--------------------+---------+----------
+ tblselectinto_heap | r       | t
+(1 row)
+
+DROP TABLE tblselectinto_heap;
+-- create materialized view using heap2
+CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS
+       SELECT * FROM tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'mv_heap2';
+ relname  | relkind | amname 
+----------+---------+--------
+ mv_heap2 | m       | heap2
+(1 row)
+
+-- Try creating the unsupported relation kinds with using syntax
+CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2;
+ERROR:  syntax error at or near "USING"
+LINE 1: CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2...
+                              ^
+CREATE SEQUENCE test_seq USING heap2;
+ERROR:  syntax error at or near "USING"
+LINE 1: CREATE SEQUENCE test_seq USING heap2;
+                                 ^
+-- Drop table access method, but fails as objects depends on it
+DROP ACCESS METHOD heap2;
+ERROR:  cannot drop access method heap2 because other objects depend on it
+DETAIL:  table tbl_heap2 depends on access method heap2
+table tblas_heap2 depends on access method heap2
+materialized view mv_heap2 depends on access method heap2
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+-- Drop table access method with cascade
+DROP ACCESS METHOD heap2 CASCADE;
+NOTICE:  drop cascades to 3 other objects
+DETAIL:  drop cascades to table tbl_heap2
+drop cascades to table tblas_heap2
+drop cascades to materialized view mv_heap2
index 6072f6bdb1fb0998f85ef35d6cfdf72009797325..4cd92c20dd19250478ccec14c382387f64d97211 100644 (file)
@@ -1741,11 +1741,24 @@ WHERE p1.amhandler = 0;
 -----+--------
 (0 rows)
 
--- Check for amhandler functions with the wrong signature
+-- Check for index amhandler functions with the wrong signature
 SELECT p1.oid, p1.amname, p2.oid, p2.proname
 FROM pg_am AS p1, pg_proc AS p2
-WHERE p2.oid = p1.amhandler AND
-    (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset
+WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND
+    (p2.prorettype != 'index_am_handler'::regtype
+     OR p2.proretset
+     OR p2.pronargs != 1
+     OR p2.proargtypes[0] != 'internal'::regtype);
+ oid | amname | oid | proname 
+-----+--------+-----+---------
+(0 rows)
+
+-- Check for table amhandler functions with the wrong signature
+SELECT p1.oid, p1.amname, p2.oid, p2.proname
+FROM pg_am AS p1, pg_proc AS p2
+WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND
+    (p2.prorettype != 'table_am_handler'::regtype
+     OR p2.proretset
      OR p2.pronargs != 1
      OR p2.proargtypes[0] != 'internal'::regtype);
  oid | amname | oid | proname 
index b1419d4bc21dbd5b520a2cac69a740205e9dac63..0e38d571c025bd14427e35eb8b640cbefff66d6a 100644 (file)
@@ -502,11 +502,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR
 -----+---------
 (0 rows)
 
--- Indexes should have an access method, others not.
+-- All tables and indexes should have an access method.
 SELECT p1.oid, p1.relname
 FROM pg_class as p1
-WHERE (p1.relkind = 'i' AND p1.relam = 0) OR
-    (p1.relkind != 'i' AND p1.relam != 0);
+WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and
+    p1.relam = 0;
  oid | relname 
 -----+---------
 (0 rows)
index 3e0ac104f3cca3d59f33f83c808b7d2954bc2fc6..2c7b4813e8b6712d9e1820965e7721c9388fb200 100644 (file)
@@ -66,3 +66,50 @@ DROP ACCESS METHOD gist2;
 
 -- Drop access method cascade
 DROP ACCESS METHOD gist2 CASCADE;
+
+-- Create a heap2 table am handler with heapam handler
+CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler;
+
+SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2;
+
+CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2;
+INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series');
+SELECT count(*) FROM tbl_heap2;
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tbl_heap2';
+
+-- create table as using heap2
+CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblas_heap2';
+
+--
+-- select into doesn't support new syntax, so it should be
+-- default access method.
+--
+SELECT INTO tblselectinto_heap from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method')
+from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblselectinto_heap';
+
+DROP TABLE tblselectinto_heap;
+
+-- create materialized view using heap2
+CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS
+       SELECT * FROM tbl_heap2;
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'mv_heap2';
+
+-- Try creating the unsupported relation kinds with using syntax
+CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2;
+
+CREATE SEQUENCE test_seq USING heap2;
+
+
+-- Drop table access method, but fails as objects depends on it
+DROP ACCESS METHOD heap2;
+
+-- Drop table access method with cascade
+DROP ACCESS METHOD heap2 CASCADE;
index 91c68f4204e34cef55c2e01e1fa08d130a018127..2664252d9438d31c2c178eba9f5c801a335b4124 100644 (file)
@@ -1169,15 +1169,25 @@ SELECT p1.oid, p1.amname
 FROM pg_am AS p1
 WHERE p1.amhandler = 0;
 
--- Check for amhandler functions with the wrong signature
+-- Check for index amhandler functions with the wrong signature
 
 SELECT p1.oid, p1.amname, p2.oid, p2.proname
 FROM pg_am AS p1, pg_proc AS p2
-WHERE p2.oid = p1.amhandler AND
-    (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset
+WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND
+    (p2.prorettype != 'index_am_handler'::regtype
+     OR p2.proretset
      OR p2.pronargs != 1
      OR p2.proargtypes[0] != 'internal'::regtype);
 
+-- Check for table amhandler functions with the wrong signature
+
+SELECT p1.oid, p1.amname, p2.oid, p2.proname
+FROM pg_am AS p1, pg_proc AS p2
+WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND
+    (p2.prorettype != 'table_am_handler'::regtype
+     OR p2.proretset
+     OR p2.pronargs != 1
+     OR p2.proargtypes[0] != 'internal'::regtype);
 
 -- **************** pg_amop ****************
 
index f9aeea32144f204427830ecf0cf95227a7a1c0fc..2efa229d40a0784d98a6ecc73a2d914d5cf7dcae 100644 (file)
@@ -367,12 +367,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR
     relpersistence NOT IN ('p', 'u', 't') OR
     relreplident NOT IN ('d', 'n', 'f', 'i');
 
--- Indexes should have an access method, others not.
-
+-- All tables and indexes should have an access method.
 SELECT p1.oid, p1.relname
 FROM pg_class as p1
-WHERE (p1.relkind = 'i' AND p1.relam = 0) OR
-    (p1.relkind != 'i' AND p1.relam != 0);
+WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and
+    p1.relam = 0;
 
 -- **************** pg_attribute ****************
 
index 9fe950b29dbb81e4bda9f2529f0b2849149ba43c..e4c9f81fe2792081ef29956e0aacd6957cff1dc4 100644 (file)
@@ -1608,8 +1608,8 @@ ParallelHashGrowth
 ParallelHashJoinBatch
 ParallelHashJoinBatchAccessor
 ParallelHashJoinState
-ParallelHeapScanDesc
-ParallelHeapScanDescData
+ParallelTableScanDesc
+ParallelTableScanDescData
 ParallelIndexScanDesc
 ParallelSlot
 ParallelState