Pluggable Storage.

author Andres Freund <andres@anarazel.de>

Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)

committer Andres Freund <andres@anarazel.de>

Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
author Andres Freund <andres@anarazel.de>
Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
committer Andres Freund <andres@anarazel.de>
Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c

index 14ed31753fb54646f3dd3c4b18cb89f6cb0dbdfc..aa2ac1b852de8d0a7ab2ce376cc06e096e60731f 100644 (file)
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -25,6 +25,7 @@
  
  #include "access/htup_details.h"
  #include "access/nbtree.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "catalog/index.h"
@@ -35,6 +36,7 @@
  #include "storage/lmgr.h"
  #include "utils/memutils.h"
  #include "utils/snapmgr.h"
+#include "utils/tqual.h"
  
  
  PG_MODULE_MAGIC;
@@ -478,7 +480,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
     if (state->heapallindexed)
     {
         IndexInfo  *indexinfo = BuildIndexInfo(state->rel);
-       HeapScanDesc scan;
+       TableScanDesc scan;
  
         /* Report on extra downlink checks performed in readonly case */
         if (state->readonly)
@@ -497,7 +499,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
          *
          * Note that IndexBuildHeapScan() calls heap_endscan() for us.
          */
-       scan = heap_beginscan_strat(state->heaprel, /* relation */
+       scan = table_beginscan_strat(state->heaprel, /* relation */
                                     snapshot,   /* snapshot */
                                     0,  /* number of keys */
                                     NULL,   /* scan key */
@@ -531,8 +533,8 @@ bt_check_every_level(Relation rel, Relation heaprel, bool readonly,
              RelationGetRelationName(state->rel),
              RelationGetRelationName(state->heaprel));
  
-       IndexBuildHeapScan(state->heaprel, state->rel, indexinfo, true,
-                          bt_tuple_present_callback, (void *) state, scan);
+       table_index_build_scan(state->heaprel, state->rel, indexinfo, true,
+                              bt_tuple_present_callback, (void *) state, scan);
  
         ereport(DEBUG1,
                 (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c

index 9f223d3b2a7bd4c4247016b11557850b8c25b9df..413828818b5941062c2e5f5112803b6f4fab2ca2 100644 (file)
--- a/contrib/bloom/blinsert.c
+++ b/contrib/bloom/blinsert.c
@@ -14,6 +14,7 @@
  
  #include "access/genam.h"
  #include "access/generic_xlog.h"
+#include "access/tableam.h"
  #include "catalog/index.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
@@ -69,7 +70,7 @@ initCachedPage(BloomBuildState *buildstate)
  }
  
  /*
- * Per-tuple callback from IndexBuildHeapScan.
+ * Per-tuple callback from table_index_build_scan.
   */
  static void
  bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
@@ -141,7 +142,7 @@ blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     initCachedPage(&buildstate);
  
     /* Do the heap scan */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
                                    bloomBuildCallback, (void *) &buildstate,
                                    NULL);
  
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c

index c1aae9d655121a10892c69f29689087173e7bc04..ce9ca704f6a26ae0e579de994c8f0deaf823b6d9 100644 (file)
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -13,6 +13,7 @@
  #include "access/htup_details.h"
  #include "access/visibilitymap.h"
  #include "catalog/pg_type.h"
+#include "catalog/pg_am_d.h"
  #include "catalog/storage_xlog.h"
  #include "funcapi.h"
  #include "miscadmin.h"
@@ -565,6 +566,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
     /* Only some relkinds have a visibility map */
     check_relation_relkind(rel);
  
+   if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+
+
     nblocks = RelationGetNumberOfBlocks(rel);
  
     /*
diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c

index 94e051d642b435b88baee4b0e931715b5c0f2852..852adba35949dcf79f4113cae02447ac9f6fc73e 100644 (file)
--- a/contrib/pgrowlocks/pgrowlocks.c
+++ b/contrib/pgrowlocks/pgrowlocks.c
@@ -26,8 +26,10 @@
  
  #include "access/multixact.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "catalog/namespace.h"
+#include "catalog/pg_am_d.h"
  #include "catalog/pg_authid.h"
  #include "funcapi.h"
  #include "miscadmin.h"
@@ -55,7 +57,7 @@ PG_FUNCTION_INFO_V1(pgrowlocks);
  typedef struct
  {
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     int         ncolumns;
  } MyData;
  
@@ -70,7 +72,8 @@ Datum
  pgrowlocks(PG_FUNCTION_ARGS)
  {
     FuncCallContext *funcctx;
-   HeapScanDesc scan;
+   TableScanDesc scan;
+   HeapScanDesc hscan;
     HeapTuple   tuple;
     TupleDesc   tupdesc;
     AttInMetadata *attinmeta;
@@ -99,6 +102,10 @@ pgrowlocks(PG_FUNCTION_ARGS)
         relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
         rel = relation_openrv(relrv, AccessShareLock);
  
+       if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+           ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                           errmsg("only heap AM is supported")));
+
         if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
             ereport(ERROR,
                     (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -124,7 +131,8 @@ pgrowlocks(PG_FUNCTION_ARGS)
             aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
                            RelationGetRelationName(rel));
  
-       scan = heap_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+       scan = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+       hscan = (HeapScanDesc) scan;
         mydata = palloc(sizeof(*mydata));
         mydata->rel = rel;
         mydata->scan = scan;
@@ -138,20 +146,20 @@ pgrowlocks(PG_FUNCTION_ARGS)
     attinmeta = funcctx->attinmeta;
     mydata = (MyData *) funcctx->user_fctx;
     scan = mydata->scan;
+   hscan = (HeapScanDesc) scan;
  
-   /* scan the relation */
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   /* scan the relation (will error if not heap) */
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         HTSU_Result htsu;
         TransactionId xmax;
         uint16      infomask;
  
         /* must hold a buffer lock to call HeapTupleSatisfiesUpdate */
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
  
-       htsu = HeapTupleSatisfiesUpdate(tuple,
-                                       GetCurrentCommandId(false),
-                                       scan->rs_cbuf);
+       htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false),
+                                       hscan->rs_cbuf);
         xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
         infomask = tuple->t_data->t_infomask;
  
@@ -284,7 +292,7 @@ pgrowlocks(PG_FUNCTION_ARGS)
                          BackendXidGetPid(xmax));
             }
  
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
  
             /* build a tuple */
             tuple = BuildTupleFromCStrings(attinmeta, values);
@@ -301,11 +309,11 @@ pgrowlocks(PG_FUNCTION_ARGS)
         }
         else
         {
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
         }
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(mydata->rel, AccessShareLock);
  
     SRF_RETURN_DONE(funcctx);
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c

index ef33cacec6af983d92547021dabaeebd566faa60..c59fd10dc17608d3c8c6cf10edfef0ab8222d70f 100644 (file)
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -12,12 +12,14 @@
   */
  #include "postgres.h"
  
-#include "access/visibilitymap.h"
  #include "access/transam.h"
+#include "access/visibilitymap.h"
  #include "access/xact.h"
  #include "access/multixact.h"
  #include "access/htup_details.h"
  #include "catalog/namespace.h"
+#include "catalog/pg_am_d.h"
+#include "commands/vacuum.h"
  #include "funcapi.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
@@ -26,7 +28,7 @@
  #include "storage/lmgr.h"
  #include "utils/builtins.h"
  #include "utils/tqual.h"
-#include "commands/vacuum.h"
+
  
  PG_FUNCTION_INFO_V1(pgstattuple_approx);
  PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
@@ -287,6 +289,10 @@ pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
                  errmsg("\"%s\" is not a table or materialized view",
                         RelationGetRelationName(rel))));
  
+   if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+
     statapprox_heap(rel, &stat);
  
     relation_close(rel, AccessShareLock);
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c

index 6d67bd8271c63c732360910c865956809593f18e..520438d779e9d073d6dab7a26610782960cb48bf 100644 (file)
--- a/contrib/pgstattuple/pgstattuple.c
+++ b/contrib/pgstattuple/pgstattuple.c
@@ -28,6 +28,7 @@
  #include "access/hash.h"
  #include "access/nbtree.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_am.h"
  #include "funcapi.h"
@@ -317,7 +318,8 @@ pgstat_relation(Relation rel, FunctionCallInfo fcinfo)
  static Datum
  pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
  {
-   HeapScanDesc scan;
+   TableScanDesc scan;
+   HeapScanDesc hscan;
     HeapTuple   tuple;
     BlockNumber nblocks;
     BlockNumber block = 0;      /* next block to count free space in */
@@ -327,20 +329,22 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
     SnapshotData SnapshotDirty;
  
     /* Disable syncscan because we assume we scan from block zero upwards */
-   scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false);
+   scan = table_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false);
+   hscan = (HeapScanDesc) scan;
+
     InitDirtySnapshot(SnapshotDirty);
  
     nblocks = scan->rs_nblocks; /* # blocks to be scanned */
  
-   /* scan the relation */
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   /* scan the relation (will error if not heap) */
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         CHECK_FOR_INTERRUPTS();
  
         /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
  
-       if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf))
+       if (HeapTupleSatisfies(tuple, &SnapshotDirty, hscan->rs_cbuf))
         {
             stat.tuple_len += tuple->t_len;
             stat.tuple_count++;
@@ -351,7 +355,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
             stat.dead_tuple_count++;
         }
  
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
  
         /*
          * To avoid physically reading the table twice, try to do the
@@ -366,7 +370,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
             CHECK_FOR_INTERRUPTS();
  
             buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
-                                       RBM_NORMAL, scan->rs_strategy);
+                                       RBM_NORMAL, hscan->rs_strategy);
             LockBuffer(buffer, BUFFER_LOCK_SHARE);
             stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
             UnlockReleaseBuffer(buffer);
@@ -379,14 +383,14 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo)
         CHECK_FOR_INTERRUPTS();
  
         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block,
-                                   RBM_NORMAL, scan->rs_strategy);
+                                   RBM_NORMAL, hscan->rs_strategy);
         LockBuffer(buffer, BUFFER_LOCK_SHARE);
         stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
         UnlockReleaseBuffer(buffer);
         block++;
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     relation_close(rel, AccessShareLock);
  
     stat.table_len = (uint64) nblocks * BLCKSZ;
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c

index 674eb982d06bf70ca91532d0eedd4cfc2d7db971..cc5b928950a5a1a665e5f150b1e0e606a5ebf97d 100644 (file)
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3927,7 +3927,7 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate,
     /*
      * Use the trigger tuple slot as a place to store the result tuple.
      */
-   resultSlot = estate->es_trig_tuple_slot;
+   resultSlot = ExecTriggerGetReturnSlot(estate, dmstate->resultRel);
     if (resultSlot->tts_tupleDescriptor != resultTupType)
         ExecSetSlotDescriptor(resultSlot, resultTupType);
  
diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c

index 83f841f0c2ea056ef41a4a3900dfdadd40a400a8..9f26c76ba6bf1f05b062d9659d7517b5b2ebc023 100644 (file)
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -46,7 +46,6 @@ typedef struct
  {
     uint32      seed;           /* random seed */
     int64       ntuples;        /* number of tuples to return */
-   int64       donetuples;     /* number of tuples already returned */
     OffsetNumber lt;            /* last tuple returned from current block */
     BlockNumber doneblocks;     /* number of already-scanned blocks */
     BlockNumber lb;             /* last block visited */
@@ -67,11 +66,10 @@ static void system_rows_beginsamplescan(SampleScanState *node,
                             Datum *params,
                             int nparams,
                             uint32 seed);
-static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
  static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
                             BlockNumber blockno,
                             OffsetNumber maxoffset);
-static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
  static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
  
  
@@ -187,7 +185,6 @@ system_rows_beginsamplescan(SampleScanState *node,
  
     sampler->seed = seed;
     sampler->ntuples = ntuples;
-   sampler->donetuples = 0;
     sampler->lt = InvalidOffsetNumber;
     sampler->doneblocks = 0;
     /* lb will be initialized during first NextSampleBlock call */
@@ -206,10 +203,9 @@ system_rows_beginsamplescan(SampleScanState *node,
   * Uses linear probing algorithm for picking next block.
   */
  static BlockNumber
-system_rows_nextsampleblock(SampleScanState *node)
+system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
  {
     SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
  
     /* First call within scan? */
     if (sampler->doneblocks == 0)
@@ -221,14 +217,14 @@ system_rows_nextsampleblock(SampleScanState *node)
             SamplerRandomState randstate;
  
             /* If relation is empty, there's nothing to scan */
-           if (scan->rs_nblocks == 0)
+           if (nblocks == 0)
                 return InvalidBlockNumber;
  
             /* We only need an RNG during this setup step */
             sampler_random_init_state(sampler->seed, randstate);
  
             /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = scan->rs_nblocks;
+           sampler->nblocks = nblocks;
  
             /* Choose random starting block within the relation */
             /* (Actually this is the predecessor of the first block visited) */
@@ -245,7 +241,7 @@ system_rows_nextsampleblock(SampleScanState *node)
  
     /* If we've read all blocks or returned all needed tuples, we're done */
     if (++sampler->doneblocks > sampler->nblocks ||
-       sampler->donetuples >= sampler->ntuples)
+       node->donetuples >= sampler->ntuples)
         return InvalidBlockNumber;
  
     /*
@@ -258,7 +254,7 @@ system_rows_nextsampleblock(SampleScanState *node)
     {
         /* Advance lb, using uint64 arithmetic to forestall overflow */
         sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= scan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
  
     return sampler->lb;
  }
@@ -278,76 +274,27 @@ system_rows_nextsampletuple(SampleScanState *node,
                             OffsetNumber maxoffset)
  {
     SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
     OffsetNumber tupoffset = sampler->lt;
  
     /* Quit if we've returned all needed tuples */
-   if (sampler->donetuples >= sampler->ntuples)
+   if (node->donetuples >= sampler->ntuples)
         return InvalidOffsetNumber;
  
-   /*
-    * Because we should only count visible tuples as being returned, we need
-    * to search for a visible tuple rather than just let the core code do it.
-    */
-
-   /* We rely on the data accumulated in pagemode access */
-   Assert(scan->rs_pageatatime);
-   for (;;)
-   {
-       /* Advance to next possible offset on page */
-       if (tupoffset == InvalidOffsetNumber)
-           tupoffset = FirstOffsetNumber;
-       else
-           tupoffset++;
-
-       /* Done? */
-       if (tupoffset > maxoffset)
-       {
-           tupoffset = InvalidOffsetNumber;
-           break;
-       }
+   /* Advance to next possible offset on page */
+   if (tupoffset == InvalidOffsetNumber)
+       tupoffset = FirstOffsetNumber;
+   else
+       tupoffset++;
  
-       /* Found a candidate? */
-       if (SampleOffsetVisible(tupoffset, scan))
-       {
-           sampler->donetuples++;
-           break;
-       }
-   }
+   /* Done? */
+   if (tupoffset > maxoffset)
+       tupoffset = InvalidOffsetNumber;
  
     sampler->lt = tupoffset;
  
     return tupoffset;
  }
  
-/*
- * Check if tuple offset is visible
- *
- * In pageatatime mode, heapgetpage() already did visibility checks,
- * so just look at the info it left in rs_vistuples[].
- */
-static bool
-SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   int         start = 0,
-               end = scan->rs_ntuples - 1;
-
-   while (start <= end)
-   {
-       int         mid = (start + end) / 2;
-       OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-       if (tupoffset == curoffset)
-           return true;
-       else if (tupoffset < curoffset)
-           end = mid - 1;
-       else
-           start = mid + 1;
-   }
-
-   return false;
-}
-
  /*
   * Compute greatest common divisor of two uint32's.
   */
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c

index 249d6f4d463181e5fd05bf853b09e5a3543f4ba3..ee6f4b95a2450527f7b220a0b5936604a2278af7 100644 (file)
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -66,7 +66,7 @@ static void system_time_beginsamplescan(SampleScanState *node,
                             Datum *params,
                             int nparams,
                             uint32 seed);
-static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
  static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
                             BlockNumber blockno,
                             OffsetNumber maxoffset);
@@ -213,10 +213,9 @@ system_time_beginsamplescan(SampleScanState *node,
   * Uses linear probing algorithm for picking next block.
   */
  static BlockNumber
-system_time_nextsampleblock(SampleScanState *node)
+system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
  {
     SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
     instr_time  cur_time;
  
     /* First call within scan? */
@@ -229,14 +228,14 @@ system_time_nextsampleblock(SampleScanState *node)
             SamplerRandomState randstate;
  
             /* If relation is empty, there's nothing to scan */
-           if (scan->rs_nblocks == 0)
+           if (nblocks == 0)
                 return InvalidBlockNumber;
  
             /* We only need an RNG during this setup step */
             sampler_random_init_state(sampler->seed, randstate);
  
             /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = scan->rs_nblocks;
+           sampler->nblocks = nblocks;
  
             /* Choose random starting block within the relation */
             /* (Actually this is the predecessor of the first block visited) */
@@ -272,7 +271,7 @@ system_time_nextsampleblock(SampleScanState *node)
     {
         /* Advance lb, using uint64 arithmetic to forestall overflow */
         sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= scan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
  
     return sampler->lb;
  }
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml

index 4ce88dd77c19edefca0121e66d8ff4e584b8c07c..12769f3288d5617ed453e0c3a359af2ea6ea3c87 100644 (file)
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -988,23 +988,25 @@ GetForeignRowMarkType(RangeTblEntry *rte,
  
      <para>
  <programlisting>
-HeapTuple
+TupleTableSlot *
  RefetchForeignRow(EState *estate,
                    ExecRowMark *erm,
                    Datum rowid,
+                  TupleTableSlot *slot,
                    bool *updated);
  </programlisting>
  
-     Re-fetch one tuple from the foreign table, after locking it if required.
+     Re-fetch one tuple slot from the foreign table, after locking it if required.
       <literal>estate</literal> is global execution state for the query.
       <literal>erm</literal> is the <structname>ExecRowMark</structname> struct describing
       the target foreign table and the row lock type (if any) to acquire.
       <literal>rowid</literal> identifies the tuple to be fetched.
-     <literal>updated</literal> is an output parameter.
+     <literal>slot</literal> contains nothing useful upon call, but can be used to
+     hold the returned tuple. <literal>updated</literal> is an output parameter.
      </para>
  
      <para>
-     This function should return a palloc'ed copy of the fetched tuple,
+     This function should return a slot containing the fetched tuple
       or <literal>NULL</literal> if the row lock couldn't be obtained.  The row lock
       type to acquire is defined by <literal>erm-&gt;markType</literal>, which is the
       value previously returned by <function>GetForeignRowMarkType</function>.
diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile

index bd93a6a8d1e606fcb6ce30deeddd5068c97b3f85..0880e0a8bbb63901164aef4a2de577db59c98b25 100644 (file)
--- a/src/backend/access/Makefile
+++ b/src/backend/access/Makefile
@@ -9,6 +9,6 @@ top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
  SUBDIRS        = brin common gin gist hash heap index nbtree rmgrdesc spgist \
-             tablesample transam
+             table tablesample transam
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c

index e95fbbcea74e3c233318d78e9c0d966cd23e58a5..b70737a7a65e2d4db8116ae8a9cc0073affc8245 100644 (file)
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -21,6 +21,7 @@
  #include "access/brin_xlog.h"
  #include "access/reloptions.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/xloginsert.h"
  #include "catalog/index.h"
  #include "catalog/pg_am.h"
@@ -585,7 +586,7 @@ brinendscan(IndexScanDesc scan)
  }
  
  /*
- * Per-heap-tuple callback for IndexBuildHeapScan.
+ * Per-heap-tuple callback for table_index_build_scan.
   *
   * Note we don't worry about the page range at the end of the table here; it is
   * present in the build state struct after we're called the last time, but not
@@ -716,8 +717,8 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
      * Now scan the relation.  No syncscan allowed here because we want the
      * heap blocks in physical order.
      */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
-                                  brinbuildCallback, (void *) state, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, false,
+                                      brinbuildCallback, (void *) state, NULL);
  
     /* process the final batch */
     form_and_insert_tuple(state);
@@ -1228,13 +1229,16 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
      * short of brinbuildCallback creating the new index entry.
      *
      * Note that it is critical we use the "any visible" mode of
-    * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted
-    * by transactions that are still in progress, among other corner cases.
+    * table_index_build_range_scan here: otherwise, we would miss tuples
+    * inserted by transactions that are still in progress, among other corner
+    * cases.
+    *
+    * ZBORKED?
      */
     state->bs_currRangeStart = heapBlk;
-   IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
-                           heapBlk, scanNumBlks,
-                           brinbuildCallback, (void *) state, NULL);
+   table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true,
+                                heapBlk, scanNumBlks,
+                                brinbuildCallback, (void *) state, NULL);
  
     /*
      * Now we update the values obtained by the scan with the placeholder
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c

index 5281eb682382ff170fadb0f91abde8a5842974b4..621bd93ccd5984ef8ee62bea39fb662c4a2f80dc 100644 (file)
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -17,6 +17,7 @@
  #include "access/gin_private.h"
  #include "access/ginxlog.h"
  #include "access/xloginsert.h"
+#include "access/tableam.h"
  #include "catalog/index.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
@@ -394,8 +395,8 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
      * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
      * prefers to receive tuples in TID order.
      */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
-                                  ginBuildCallback, (void *) &buildstate, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, false,
+                                      ginBuildCallback, (void *) &buildstate, NULL);
  
     /* dump remaining entries to the index */
     oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c

index 434f15f0148e0ff90dd131d36783edb405982960..c39ddc910cc694cfaba7d480910fe4cd305299bd 100644 (file)
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -19,6 +19,7 @@
  #include "access/genam.h"
  #include "access/gist_private.h"
  #include "access/gistxlog.h"
+#include "access/tableam.h"
  #include "access/xloginsert.h"
  #include "catalog/index.h"
  #include "miscadmin.h"
@@ -202,8 +203,8 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     /*
      * Do the heap scan.
      */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                  gistBuildCallback, (void *) &buildstate, NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                      gistBuildCallback, (void *) &buildstate, NULL);
  
     /*
      * If buffering was used, flush out all the tuples that are still in the
@@ -452,7 +453,7 @@ calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
  }
  
  /*
- * Per-tuple callback from IndexBuildHeapScan.
+ * Per-tuple callback from table_index_build_scan.
   */
  static void
  gistBuildCallback(Relation index,
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c

index e4a3786be01cfc5d9ecea9441a16435edbc6b339..7c75461dd12096c13c05b9f3a834e17e2af6581e 100644 (file)
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -561,7 +561,7 @@ getNextNearest(IndexScanDesc scan)
         if (GISTSearchItemIsHeap(*item))
         {
             /* found a heap item at currently minimal distance */
-           scan->xs_ctup.t_self = item->data.heap.heapPtr;
+           scan->xs_heaptid = item->data.heap.heapPtr;
             scan->xs_recheck = item->data.heap.recheck;
  
             index_store_float8_orderby_distances(scan, so->orderByTypes,
@@ -650,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
                             so->pageData[so->curPageData - 1].offnum;
                 }
                 /* continuing to return tuples from a leaf page */
-               scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
+               scan->xs_heaptid = so->pageData[so->curPageData].heapPtr;
                 scan->xs_recheck = so->pageData[so->curPageData].recheck;
  
                 /* in an index-only scan, also return the reconstructed tuple */
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index 0002df30c0d67fcb199fe3e574be8b725ad1c0f7..18b26d902663ab9213c817ca1725559a67b7895c 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -21,6 +21,7 @@
  #include "access/hash.h"
  #include "access/hash_xlog.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "catalog/index.h"
  #include "commands/vacuum.h"
  #include "miscadmin.h"
@@ -159,7 +160,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     buildstate.heapRel = heap;
  
     /* do the heap scan */
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
                                    hashbuildCallback, (void *) &buildstate, NULL);
  
     if (buildstate.spool)
@@ -190,7 +191,7 @@ hashbuildempty(Relation index)
  }
  
  /*
- * Per-tuple callback from IndexBuildHeapScan
+ * Per-tuple callback from table_index_build_scan
   */
  static void
  hashbuildCallback(Relation index,
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c

index 650041db0a5ca3a0dabb9429326916d07b0ef2c3..edda7991ad0cc920a751eae21ef1e48cffae3997 100644 (file)
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -119,7 +119,7 @@ _hash_next(IndexScanDesc scan, ScanDirection dir)
  
     /* OK, itemIndex says what to return */
     currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
  
     return true;
  }
@@ -432,7 +432,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
  
     /* OK, itemIndex says what to return */
     currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
  
     /* if we're here, _hash_readpage found a valid tuples */
     return true;
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile

index 7e7324a9166f330f2d87c44f27d6890f86ad2cdb..aee7bfd8346bd5cac71361896f090f47ff4e1a3c 100644 (file)
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,7 +12,7 @@ subdir = src/backend/access/heap
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o \
-   vacuumlazy.o visibilitymap.o
+OBJS = heapam.o heapam_handler.o heapam_visibility.o hio.o pruneheap.o \
+   rewriteheap.o syncscan.o tuptoaster.o vacuumlazy.o visibilitymap.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index 9650145642209bfcc8493dbabc385d2545860be5..f769d828ff75746f6451b5fdea32958bb3e6853d 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -45,6 +45,7 @@
  #include "access/multixact.h"
  #include "access/parallel.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/transam.h"
  #include "access/tuptoaster.h"
@@ -79,22 +80,6 @@
  #include "nodes/execnodes.h"
  #include "executor/executor.h"
  
-/* GUC variable */
-bool       synchronize_seqscans = true;
-
-
-static HeapScanDesc heap_beginscan_internal(Relation relation,
-                       Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       ParallelHeapScanDesc parallel_scan,
-                       bool allow_strat,
-                       bool allow_sync,
-                       bool allow_pagemode,
-                       bool is_bitmapscan,
-                       bool is_samplescan,
-                       bool temp_snap);
-static void heap_parallelscan_startblock_init(HeapScanDesc scan);
-static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan);
  static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
                     TransactionId xid, CommandId cid, int options);
  static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -233,10 +218,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
      * results for a non-MVCC snapshot, the caller must hold some higher-level
      * lock that ensures the interesting tuple(s) won't change.)
      */
-   if (scan->rs_parallel != NULL)
-       scan->rs_nblocks = scan->rs_parallel->phs_nblocks;
+   if (scan->rs_scan.rs_parallel != NULL)
+       scan->rs_scan.rs_nblocks = scan->rs_scan.rs_parallel->phs_nblocks;
     else
-       scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
+       scan->rs_scan.rs_nblocks = RelationGetNumberOfBlocks(scan->rs_scan.rs_rd);
  
     /*
      * If the table is large relative to NBuffers, use a bulk-read access
@@ -250,11 +235,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
      * Note that heap_parallelscan_initialize has a very similar test; if you
      * change this, consider changing that one, too.
      */
-   if (!RelationUsesLocalBuffers(scan->rs_rd) &&
-       scan->rs_nblocks > NBuffers / 4)
+   if (!RelationUsesLocalBuffers(scan->rs_scan.rs_rd) &&
+       scan->rs_scan.rs_nblocks > NBuffers / 4)
     {
-       allow_strat = scan->rs_allow_strat;
-       allow_sync = scan->rs_allow_sync;
+       allow_strat = scan->rs_scan.rs_allow_strat;
+       allow_sync = scan->rs_scan.rs_allow_sync;
     }
     else
         allow_strat = allow_sync = false;
@@ -272,10 +257,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
         scan->rs_strategy = NULL;
     }
  
-   if (scan->rs_parallel != NULL)
+   if (scan->rs_scan.rs_parallel != NULL)
     {
-       /* For parallel scan, believe whatever ParallelHeapScanDesc says. */
-       scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
+       /* For parallel scan, believe whatever ParallelTableScanDesc says. */
+       scan->rs_scan.rs_syncscan = scan->rs_scan.rs_parallel->phs_syncscan;
     }
     else if (keep_startblock)
     {
@@ -284,20 +269,20 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
          * so that rewinding a cursor doesn't generate surprising results.
          * Reset the active syncscan setting, though.
          */
-       scan->rs_syncscan = (allow_sync && synchronize_seqscans);
+       scan->rs_scan.rs_syncscan = (allow_sync && synchronize_seqscans);
     }
     else if (allow_sync && synchronize_seqscans)
     {
-       scan->rs_syncscan = true;
-       scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
+       scan->rs_scan.rs_syncscan = true;
+       scan->rs_scan.rs_startblock = ss_get_location(scan->rs_scan.rs_rd, scan->rs_scan.rs_nblocks);
     }
     else
     {
-       scan->rs_syncscan = false;
-       scan->rs_startblock = 0;
+       scan->rs_scan.rs_syncscan = false;
+       scan->rs_scan.rs_startblock = 0;
     }
  
-   scan->rs_numblocks = InvalidBlockNumber;
+   scan->rs_scan.rs_numblocks = InvalidBlockNumber;
     scan->rs_inited = false;
     scan->rs_ctup.t_data = NULL;
     ItemPointerSetInvalid(&scan->rs_ctup.t_self);
@@ -310,15 +295,15 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
      * copy the scan key, if appropriate
      */
     if (key != NULL)
-       memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
+       memcpy(scan->rs_scan.rs_key, key, scan->rs_scan.rs_nkeys * sizeof(ScanKeyData));
  
     /*
      * Currently, we don't have a stats counter for bitmap heap scans (but the
      * underlying bitmap index scans will be counted) or sample scans (we only
      * update stats for tuple fetches there)
      */
-   if (!scan->rs_bitmapscan && !scan->rs_samplescan)
-       pgstat_count_heap_scan(scan->rs_rd);
+   if (!scan->rs_scan.rs_bitmapscan && !scan->rs_scan.rs_samplescan)
+       pgstat_count_heap_scan(scan->rs_scan.rs_rd);
  }
  
  /*
@@ -328,16 +313,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
   * numBlks is number of pages to scan (InvalidBlockNumber means "all")
   */
  void
-heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
+heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
     Assert(!scan->rs_inited);   /* else too late to change */
-   Assert(!scan->rs_syncscan); /* else rs_startblock is significant */
+   Assert(!scan->rs_scan.rs_syncscan); /* else rs_startblock is
+                                            * significant */
  
     /* Check startBlk is valid (but allow case of zero blocks...) */
-   Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
+   Assert(startBlk == 0 || startBlk < scan->rs_scan.rs_nblocks);
  
-   scan->rs_startblock = startBlk;
-   scan->rs_numblocks = numBlks;
+   scan->rs_scan.rs_startblock = startBlk;
+   scan->rs_scan.rs_numblocks = numBlks;
  }
  
  /*
@@ -348,8 +336,9 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
   * which tuples on the page are visible.
   */
  void
-heapgetpage(HeapScanDesc scan, BlockNumber page)
+heapgetpage(TableScanDesc sscan, BlockNumber page)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
     Buffer      buffer;
     Snapshot    snapshot;
     Page        dp;
@@ -359,7 +348,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
     ItemId      lpp;
     bool        all_visible;
  
-   Assert(page < scan->rs_nblocks);
+   Assert(page < scan->rs_scan.rs_nblocks);
  
     /* release previous scan buffer, if any */
     if (BufferIsValid(scan->rs_cbuf))
@@ -376,20 +365,20 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
     CHECK_FOR_INTERRUPTS();
  
     /* read page using selected strategy */
-   scan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM, page,
-                                      RBM_NORMAL, scan->rs_strategy);
+   scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, page,
+                                              RBM_NORMAL, scan->rs_strategy);
     scan->rs_cblock = page;
  
-   if (!scan->rs_pageatatime)
+   if (!scan->rs_scan.rs_pageatatime)
         return;
  
     buffer = scan->rs_cbuf;
-   snapshot = scan->rs_snapshot;
+   snapshot = scan->rs_scan.rs_snapshot;
  
     /*
      * Prune and repair fragmentation for the whole page, if possible.
      */
-   heap_page_prune_opt(scan->rs_rd, buffer);
+   heap_page_prune_opt(scan->rs_scan.rs_rd, buffer);
  
     /*
      * We must hold share lock on the buffer content while examining tuple
@@ -399,7 +388,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
     LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
     dp = BufferGetPage(buffer);
-   TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+   TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
     lines = PageGetMaxOffsetNumber(dp);
     ntup = 0;
  
@@ -434,7 +423,7 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
             HeapTupleData loctup;
             bool        valid;
  
-           loctup.t_tableOid = RelationGetRelid(scan->rs_rd);
+           loctup.t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd);
             loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
             loctup.t_len = ItemIdGetLength(lpp);
             ItemPointerSet(&(loctup.t_self), page, lineoff);
@@ -442,9 +431,9 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
             if (all_visible)
                 valid = true;
             else
-               valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+               valid = HeapTupleSatisfies(&loctup, snapshot, buffer);
  
-           CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
+           CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup,
                                             buffer, snapshot);
  
             if (valid)
@@ -488,7 +477,7 @@ heapgettup(HeapScanDesc scan,
            ScanKey key)
  {
     HeapTuple   tuple = &(scan->rs_ctup);
-   Snapshot    snapshot = scan->rs_snapshot;
+   Snapshot    snapshot = scan->rs_scan.rs_snapshot;
     bool        backward = ScanDirectionIsBackward(dir);
     BlockNumber page;
     bool        finished;
@@ -508,17 +497,17 @@ heapgettup(HeapScanDesc scan,
             /*
              * return null immediately if relation is empty
              */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
             {
                 Assert(!BufferIsValid(scan->rs_cbuf));
                 tuple->t_data = NULL;
                 return;
             }
-           if (scan->rs_parallel != NULL)
+           if (scan->rs_scan.rs_parallel != NULL)
             {
-               heap_parallelscan_startblock_init(scan);
+               table_parallelscan_startblock_init(&scan->rs_scan);
  
-               page = heap_parallelscan_nextpage(scan);
+               page = table_parallelscan_nextpage(&scan->rs_scan);
  
                 /* Other processes might have already finished the scan. */
                 if (page == InvalidBlockNumber)
@@ -529,8 +518,8 @@ heapgettup(HeapScanDesc scan,
                 }
             }
             else
-               page = scan->rs_startblock; /* first page */
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_startblock; /* first page */
+           heapgetpage((TableScanDesc) scan, page);
             lineoff = FirstOffsetNumber;    /* first offnum */
             scan->rs_inited = true;
         }
@@ -545,7 +534,7 @@ heapgettup(HeapScanDesc scan,
         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
  
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
         lines = PageGetMaxOffsetNumber(dp);
         /* page and lineoff now reference the physically next tid */
  
@@ -554,14 +543,14 @@ heapgettup(HeapScanDesc scan,
     else if (backward)
     {
         /* backward parallel scan not supported */
-       Assert(scan->rs_parallel == NULL);
+       Assert(scan->rs_scan.rs_parallel == NULL);
  
         if (!scan->rs_inited)
         {
             /*
              * return null immediately if relation is empty
              */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
             {
                 Assert(!BufferIsValid(scan->rs_cbuf));
                 tuple->t_data = NULL;
@@ -574,13 +563,13 @@ heapgettup(HeapScanDesc scan,
              * time, and much more likely that we'll just bollix things for
              * forward scanners.
              */
-           scan->rs_syncscan = false;
+           scan->rs_scan.rs_syncscan = false;
             /* start from last page of the scan */
-           if (scan->rs_startblock > 0)
-               page = scan->rs_startblock - 1;
+           if (scan->rs_scan.rs_startblock > 0)
+               page = scan->rs_scan.rs_startblock - 1;
             else
-               page = scan->rs_nblocks - 1;
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_nblocks - 1;
+           heapgetpage((TableScanDesc) scan, page);
         }
         else
         {
@@ -591,7 +580,7 @@ heapgettup(HeapScanDesc scan,
         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
  
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
         lines = PageGetMaxOffsetNumber(dp);
  
         if (!scan->rs_inited)
@@ -622,11 +611,11 @@ heapgettup(HeapScanDesc scan,
  
         page = ItemPointerGetBlockNumber(&(tuple->t_self));
         if (page != scan->rs_cblock)
-           heapgetpage(scan, page);
+           heapgetpage((TableScanDesc) scan, page);
  
         /* Since the tuple was previously fetched, needn't lock page here */
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
         lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
         lpp = PageGetItemId(dp, lineoff);
         Assert(ItemIdIsNormal(lpp));
@@ -657,15 +646,13 @@ heapgettup(HeapScanDesc scan,
                 /*
                  * if current tuple qualifies, return it.
                  */
-               valid = HeapTupleSatisfiesVisibility(tuple,
-                                                    snapshot,
-                                                    scan->rs_cbuf);
+               valid = HeapTupleSatisfies(tuple, snapshot, scan->rs_cbuf);
  
-               CheckForSerializableConflictOut(valid, scan->rs_rd, tuple,
+               CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, tuple,
                                                 scan->rs_cbuf, snapshot);
  
                 if (valid && key != NULL)
-                   HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
+                   HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd),
                                 nkeys, key, valid);
  
                 if (valid)
@@ -702,24 +689,24 @@ heapgettup(HeapScanDesc scan,
          */
         if (backward)
         {
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
             if (page == 0)
-               page = scan->rs_nblocks;
+               page = scan->rs_scan.rs_nblocks;
             page--;
         }
-       else if (scan->rs_parallel != NULL)
+       else if (scan->rs_scan.rs_parallel != NULL)
         {
-           page = heap_parallelscan_nextpage(scan);
+           page = table_parallelscan_nextpage(&scan->rs_scan);
             finished = (page == InvalidBlockNumber);
         }
         else
         {
             page++;
-           if (page >= scan->rs_nblocks)
+           if (page >= scan->rs_scan.rs_nblocks)
                 page = 0;
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
  
             /*
              * Report our new scan position for synchronization purposes. We
@@ -733,8 +720,8 @@ heapgettup(HeapScanDesc scan,
              * a little bit backwards on every invocation, which is confusing.
              * We don't guarantee any specific ordering in general, though.
              */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, page);
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, page);
         }
  
         /*
@@ -751,12 +738,12 @@ heapgettup(HeapScanDesc scan,
             return;
         }
  
-       heapgetpage(scan, page);
+       heapgetpage((TableScanDesc) scan, page);
  
         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
  
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(snapshot, scan->rs_scan.rs_rd, dp);
         lines = PageGetMaxOffsetNumber((Page) dp);
         linesleft = lines;
         if (backward)
@@ -812,17 +799,17 @@ heapgettup_pagemode(HeapScanDesc scan,
             /*
              * return null immediately if relation is empty
              */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
             {
                 Assert(!BufferIsValid(scan->rs_cbuf));
                 tuple->t_data = NULL;
                 return;
             }
-           if (scan->rs_parallel != NULL)
+           if (scan->rs_scan.rs_parallel != NULL)
             {
-               heap_parallelscan_startblock_init(scan);
+               table_parallelscan_startblock_init(&scan->rs_scan);
  
-               page = heap_parallelscan_nextpage(scan);
+               page = table_parallelscan_nextpage(&scan->rs_scan);
  
                 /* Other processes might have already finished the scan. */
                 if (page == InvalidBlockNumber)
@@ -833,8 +820,8 @@ heapgettup_pagemode(HeapScanDesc scan,
                 }
             }
             else
-               page = scan->rs_startblock; /* first page */
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_startblock; /* first page */
+           heapgetpage((TableScanDesc) scan, page);
             lineindex = 0;
             scan->rs_inited = true;
         }
@@ -845,8 +832,9 @@ heapgettup_pagemode(HeapScanDesc scan,
             lineindex = scan->rs_cindex + 1;
         }
  
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
         lines = scan->rs_ntuples;
         /* page and lineindex now reference the next visible tid */
  
@@ -855,14 +843,14 @@ heapgettup_pagemode(HeapScanDesc scan,
     else if (backward)
     {
         /* backward parallel scan not supported */
-       Assert(scan->rs_parallel == NULL);
+       Assert(scan->rs_scan.rs_parallel == NULL);
  
         if (!scan->rs_inited)
         {
             /*
              * return null immediately if relation is empty
              */
-           if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
+           if (scan->rs_scan.rs_nblocks == 0 || scan->rs_scan.rs_numblocks == 0)
             {
                 Assert(!BufferIsValid(scan->rs_cbuf));
                 tuple->t_data = NULL;
@@ -875,13 +863,13 @@ heapgettup_pagemode(HeapScanDesc scan,
              * time, and much more likely that we'll just bollix things for
              * forward scanners.
              */
-           scan->rs_syncscan = false;
+           scan->rs_scan.rs_syncscan = false;
             /* start from last page of the scan */
-           if (scan->rs_startblock > 0)
-               page = scan->rs_startblock - 1;
+           if (scan->rs_scan.rs_startblock > 0)
+               page = scan->rs_scan.rs_startblock - 1;
             else
-               page = scan->rs_nblocks - 1;
-           heapgetpage(scan, page);
+               page = scan->rs_scan.rs_nblocks - 1;
+           heapgetpage((TableScanDesc) scan, page);
         }
         else
         {
@@ -889,8 +877,9 @@ heapgettup_pagemode(HeapScanDesc scan,
             page = scan->rs_cblock; /* current page */
         }
  
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
         lines = scan->rs_ntuples;
  
         if (!scan->rs_inited)
@@ -920,11 +909,11 @@ heapgettup_pagemode(HeapScanDesc scan,
  
         page = ItemPointerGetBlockNumber(&(tuple->t_self));
         if (page != scan->rs_cblock)
-           heapgetpage(scan, page);
+           heapgetpage((TableScanDesc) scan, page);
  
         /* Since the tuple was previously fetched, needn't lock page here */
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
         lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
         lpp = PageGetItemId(dp, lineoff);
         Assert(ItemIdIsNormal(lpp));
@@ -962,17 +951,19 @@ heapgettup_pagemode(HeapScanDesc scan,
             {
                 bool        valid;
  
-               HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
+               HeapKeyTest(tuple, RelationGetDescr(scan->rs_scan.rs_rd),
                             nkeys, key, valid);
                 if (valid)
                 {
                     scan->rs_cindex = lineindex;
+                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
                     return;
                 }
             }
             else
             {
                 scan->rs_cindex = lineindex;
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
                 return;
             }
  
@@ -986,30 +977,36 @@ heapgettup_pagemode(HeapScanDesc scan,
                 ++lineindex;
         }
  
+       /*
+        * if we get here, it means we've exhausted the items on this page and
+        * it's time to move to the next.
+        */
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
         /*
          * if we get here, it means we've exhausted the items on this page and
          * it's time to move to the next.
          */
         if (backward)
         {
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
             if (page == 0)
-               page = scan->rs_nblocks;
+               page = scan->rs_scan.rs_nblocks;
             page--;
         }
-       else if (scan->rs_parallel != NULL)
+       else if (scan->rs_scan.rs_parallel != NULL)
         {
-           page = heap_parallelscan_nextpage(scan);
+           page = table_parallelscan_nextpage(&scan->rs_scan);
             finished = (page == InvalidBlockNumber);
         }
         else
         {
             page++;
-           if (page >= scan->rs_nblocks)
+           if (page >= scan->rs_scan.rs_nblocks)
                 page = 0;
-           finished = (page == scan->rs_startblock) ||
-               (scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks == 0 : false);
+           finished = (page == scan->rs_scan.rs_startblock) ||
+               (scan->rs_scan.rs_numblocks != InvalidBlockNumber ? --scan->rs_scan.rs_numblocks == 0 : false);
  
             /*
              * Report our new scan position for synchronization purposes. We
@@ -1023,8 +1020,8 @@ heapgettup_pagemode(HeapScanDesc scan,
              * a little bit backwards on every invocation, which is confusing.
              * We don't guarantee any specific ordering in general, though.
              */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, page);
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, page);
         }
  
         /*
@@ -1041,10 +1038,11 @@ heapgettup_pagemode(HeapScanDesc scan,
             return;
         }
  
-       heapgetpage(scan, page);
+       heapgetpage((TableScanDesc) scan, page);
  
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
         dp = BufferGetPage(scan->rs_cbuf);
-       TestForOldSnapshot(scan->rs_snapshot, scan->rs_rd, dp);
+       TestForOldSnapshot(scan->rs_scan.rs_snapshot, scan->rs_scan.rs_rd, dp);
         lines = scan->rs_ntuples;
         linesleft = lines;
         if (backward)
@@ -1387,87 +1385,16 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
     return r;
  }
  
-
-/* ----------------
- *     heap_beginscan  - begin relation scan
- *
- * heap_beginscan is the "standard" case.
- *
- * heap_beginscan_catalog differs in setting up its own temporary snapshot.
- *
- * heap_beginscan_strat offers an extended API that lets the caller control
- * whether a nondefault buffer access strategy can be used, and whether
- * syncscan can be chosen (possibly resulting in the scan not starting from
- * block zero).  Both of these default to true with plain heap_beginscan.
- *
- * heap_beginscan_bm is an alternative entry point for setting up a
- * HeapScanDesc for a bitmap heap scan.  Although that scan technology is
- * really quite unlike a standard seqscan, there is just enough commonality
- * to make it worth using the same data structure.
- *
- * heap_beginscan_sampling is an alternative entry point for setting up a
- * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
- * using the same data structure although the behavior is rather different.
- * In addition to the options offered by heap_beginscan_strat, this call
- * also allows control of whether page-mode visibility checking is used.
- * ----------------
- */
-HeapScanDesc
+TableScanDesc
  heap_beginscan(Relation relation, Snapshot snapshot,
-              int nkeys, ScanKey key)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  true, true, true, false, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
-{
-   Oid         relid = RelationGetRelid(relation);
-   Snapshot    snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
-
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  true, true, true, false, false, true);
-}
-
-HeapScanDesc
-heap_beginscan_strat(Relation relation, Snapshot snapshot,
-                    int nkeys, ScanKey key,
-                    bool allow_strat, bool allow_sync)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  allow_strat, allow_sync, true,
-                                  false, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_bm(Relation relation, Snapshot snapshot,
-                 int nkeys, ScanKey key)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  false, false, true, true, false, false);
-}
-
-HeapScanDesc
-heap_beginscan_sampling(Relation relation, Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_sync, bool allow_pagemode)
-{
-   return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
-                                  allow_strat, allow_sync, allow_pagemode,
-                                  false, true, false);
-}
-
-static HeapScanDesc
-heap_beginscan_internal(Relation relation, Snapshot snapshot,
-                       int nkeys, ScanKey key,
-                       ParallelHeapScanDesc parallel_scan,
-                       bool allow_strat,
-                       bool allow_sync,
-                       bool allow_pagemode,
-                       bool is_bitmapscan,
-                       bool is_samplescan,
-                       bool temp_snap)
+              int nkeys, ScanKey key,
+              ParallelTableScanDesc parallel_scan,
+              bool allow_strat,
+              bool allow_sync,
+              bool allow_pagemode,
+              bool is_bitmapscan,
+              bool is_samplescan,
+              bool temp_snap)
  {
     HeapScanDesc scan;
  
@@ -1485,21 +1412,21 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
      */
     scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
  
-   scan->rs_rd = relation;
-   scan->rs_snapshot = snapshot;
-   scan->rs_nkeys = nkeys;
-   scan->rs_bitmapscan = is_bitmapscan;
-   scan->rs_samplescan = is_samplescan;
+   scan->rs_scan.rs_rd = relation;
+   scan->rs_scan.rs_snapshot = snapshot;
+   scan->rs_scan.rs_nkeys = nkeys;
+   scan->rs_scan.rs_bitmapscan = is_bitmapscan;
+   scan->rs_scan.rs_samplescan = is_samplescan;
     scan->rs_strategy = NULL;   /* set in initscan */
-   scan->rs_allow_strat = allow_strat;
-   scan->rs_allow_sync = allow_sync;
-   scan->rs_temp_snap = temp_snap;
-   scan->rs_parallel = parallel_scan;
+   scan->rs_scan.rs_allow_strat = allow_strat;
+   scan->rs_scan.rs_allow_sync = allow_sync;
+   scan->rs_scan.rs_temp_snap = temp_snap;
+   scan->rs_scan.rs_parallel = parallel_scan;
  
     /*
      * we can use page-at-a-time mode if it's an MVCC-safe snapshot
      */
-   scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
+   scan->rs_scan.rs_pageatatime = allow_pagemode && snapshot && IsMVCCSnapshot(snapshot);
  
     /*
      * For a seqscan in a serializable transaction, acquire a predicate lock
@@ -1512,7 +1439,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
      * covering the predicate. But in that case we still have to lock any
      * matching heap tuples.
      */
-   if (!is_bitmapscan)
+   if (!is_bitmapscan && snapshot)
         PredicateLockRelation(relation, snapshot);
  
     /* we only need to set this up once */
@@ -1523,13 +1450,13 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
      * initscan() and we don't want to allocate memory again
      */
     if (nkeys > 0)
-       scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
+       scan->rs_scan.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
     else
-       scan->rs_key = NULL;
+       scan->rs_scan.rs_key = NULL;
  
     initscan(scan, key, false);
  
-   return scan;
+   return (TableScanDesc) scan;
  }
  
  /* ----------------
@@ -1537,9 +1464,18 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
   * ----------------
   */
  void
-heap_rescan(HeapScanDesc scan,
-           ScanKey key)
+heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
+           bool allow_strat, bool allow_sync, bool allow_pagemode)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   if (set_params)
+   {
+       scan->rs_scan.rs_allow_strat = allow_strat;
+       scan->rs_scan.rs_allow_sync = allow_sync;
+       scan->rs_scan.rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_scan.rs_snapshot);
+   }
+
     /*
      * unpin scan buffers
      */
@@ -1550,27 +1486,21 @@ heap_rescan(HeapScanDesc scan,
      * reinitialize scan descriptor
      */
     initscan(scan, key, true);
-}
  
-/* ----------------
- *     heap_rescan_set_params  - restart a relation scan after changing params
- *
- * This call allows changing the buffer strategy, syncscan, and pagemode
- * options before starting a fresh scan.  Note that although the actual use
- * of syncscan might change (effectively, enabling or disabling reporting),
- * the previously selected startblock will be kept.
- * ----------------
- */
-void
-heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
-                      bool allow_strat, bool allow_sync, bool allow_pagemode)
-{
-   /* adjust parameters */
-   scan->rs_allow_strat = allow_strat;
-   scan->rs_allow_sync = allow_sync;
-   scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot);
-   /* ... and rescan */
-   heap_rescan(scan, key);
+   /*
+    * reset parallel scan, if present
+    */
+   if (scan->rs_scan.rs_parallel != NULL)
+   {
+       ParallelTableScanDesc parallel_scan;
+
+       /*
+        * Caller is responsible for making sure that all workers have
+        * finished the scan before calling this.
+        */
+       parallel_scan = scan->rs_scan.rs_parallel;
+       pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
+   }
  }
  
  /* ----------------
@@ -1581,8 +1511,10 @@ heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
   * ----------------
   */
  void
-heap_endscan(HeapScanDesc scan)
+heap_endscan(TableScanDesc sscan)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
     /* Note: no locking manipulations needed */
  
     /*
@@ -1594,215 +1526,20 @@ heap_endscan(HeapScanDesc scan)
     /*
      * decrement relation reference count and free scan descriptor storage
      */
-   RelationDecrementReferenceCount(scan->rs_rd);
+   RelationDecrementReferenceCount(scan->rs_scan.rs_rd);
  
-   if (scan->rs_key)
-       pfree(scan->rs_key);
+   if (scan->rs_scan.rs_key)
+       pfree(scan->rs_scan.rs_key);
  
     if (scan->rs_strategy != NULL)
         FreeAccessStrategy(scan->rs_strategy);
  
-   if (scan->rs_temp_snap)
-       UnregisterSnapshot(scan->rs_snapshot);
+   if (scan->rs_scan.rs_temp_snap)
+       UnregisterSnapshot(scan->rs_scan.rs_snapshot);
  
     pfree(scan);
  }
  
-/* ----------------
- *     heap_parallelscan_estimate - estimate storage for ParallelHeapScanDesc
- *
- *     Sadly, this doesn't reduce to a constant, because the size required
- *     to serialize the snapshot can vary.
- * ----------------
- */
-Size
-heap_parallelscan_estimate(Snapshot snapshot)
-{
-   return add_size(offsetof(ParallelHeapScanDescData, phs_snapshot_data),
-                   EstimateSnapshotSpace(snapshot));
-}
-
-/* ----------------
- *     heap_parallelscan_initialize - initialize ParallelHeapScanDesc
- *
- *     Must allow as many bytes of shared memory as returned by
- *     heap_parallelscan_estimate.  Call this just once in the leader
- *     process; then, individual workers attach via heap_beginscan_parallel.
- * ----------------
- */
-void
-heap_parallelscan_initialize(ParallelHeapScanDesc target, Relation relation,
-                            Snapshot snapshot)
-{
-   target->phs_relid = RelationGetRelid(relation);
-   target->phs_nblocks = RelationGetNumberOfBlocks(relation);
-   /* compare phs_syncscan initialization to similar logic in initscan */
-   target->phs_syncscan = synchronize_seqscans &&
-       !RelationUsesLocalBuffers(relation) &&
-       target->phs_nblocks > NBuffers / 4;
-   SpinLockInit(&target->phs_mutex);
-   target->phs_startblock = InvalidBlockNumber;
-   pg_atomic_init_u64(&target->phs_nallocated, 0);
-   if (IsMVCCSnapshot(snapshot))
-   {
-       SerializeSnapshot(snapshot, target->phs_snapshot_data);
-       target->phs_snapshot_any = false;
-   }
-   else
-   {
-       Assert(snapshot == SnapshotAny);
-       target->phs_snapshot_any = true;
-   }
-}
-
-/* ----------------
- *     heap_parallelscan_reinitialize - reset a parallel scan
- *
- *     Call this in the leader process.  Caller is responsible for
- *     making sure that all workers have finished the scan beforehand.
- * ----------------
- */
-void
-heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan)
-{
-   pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
-}
-
-/* ----------------
- *     heap_beginscan_parallel - join a parallel scan
- *
- *     Caller must hold a suitable lock on the correct relation.
- * ----------------
- */
-HeapScanDesc
-heap_beginscan_parallel(Relation relation, ParallelHeapScanDesc parallel_scan)
-{
-   Snapshot    snapshot;
-
-   Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
-
-   if (!parallel_scan->phs_snapshot_any)
-   {
-       /* Snapshot was serialized -- restore it */
-       snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data);
-       RegisterSnapshot(snapshot);
-   }
-   else
-   {
-       /* SnapshotAny passed by caller (not serialized) */
-       snapshot = SnapshotAny;
-   }
-
-   return heap_beginscan_internal(relation, snapshot, 0, NULL, parallel_scan,
-                                  true, true, true, false, false,
-                                  !parallel_scan->phs_snapshot_any);
-}
-
-/* ----------------
- *     heap_parallelscan_startblock_init - find and set the scan's startblock
- *
- *     Determine where the parallel seq scan should start.  This function may
- *     be called many times, once by each parallel worker.  We must be careful
- *     only to set the startblock once.
- * ----------------
- */
-static void
-heap_parallelscan_startblock_init(HeapScanDesc scan)
-{
-   BlockNumber sync_startpage = InvalidBlockNumber;
-   ParallelHeapScanDesc parallel_scan;
-
-   Assert(scan->rs_parallel);
-   parallel_scan = scan->rs_parallel;
-
-retry:
-   /* Grab the spinlock. */
-   SpinLockAcquire(&parallel_scan->phs_mutex);
-
-   /*
-    * If the scan's startblock has not yet been initialized, we must do so
-    * now.  If this is not a synchronized scan, we just start at block 0, but
-    * if it is a synchronized scan, we must get the starting position from
-    * the synchronized scan machinery.  We can't hold the spinlock while
-    * doing that, though, so release the spinlock, get the information we
-    * need, and retry.  If nobody else has initialized the scan in the
-    * meantime, we'll fill in the value we fetched on the second time
-    * through.
-    */
-   if (parallel_scan->phs_startblock == InvalidBlockNumber)
-   {
-       if (!parallel_scan->phs_syncscan)
-           parallel_scan->phs_startblock = 0;
-       else if (sync_startpage != InvalidBlockNumber)
-           parallel_scan->phs_startblock = sync_startpage;
-       else
-       {
-           SpinLockRelease(&parallel_scan->phs_mutex);
-           sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks);
-           goto retry;
-       }
-   }
-   SpinLockRelease(&parallel_scan->phs_mutex);
-}
-
-/* ----------------
- *     heap_parallelscan_nextpage - get the next page to scan
- *
- *     Get the next page to scan.  Even if there are no pages left to scan,
- *     another backend could have grabbed a page to scan and not yet finished
- *     looking at it, so it doesn't follow that the scan is done when the
- *     first backend gets an InvalidBlockNumber return.
- * ----------------
- */
-static BlockNumber
-heap_parallelscan_nextpage(HeapScanDesc scan)
-{
-   BlockNumber page;
-   ParallelHeapScanDesc parallel_scan;
-   uint64      nallocated;
-
-   Assert(scan->rs_parallel);
-   parallel_scan = scan->rs_parallel;
-
-   /*
-    * phs_nallocated tracks how many pages have been allocated to workers
-    * already.  When phs_nallocated >= rs_nblocks, all blocks have been
-    * allocated.
-    *
-    * Because we use an atomic fetch-and-add to fetch the current value, the
-    * phs_nallocated counter will exceed rs_nblocks, because workers will
-    * still increment the value, when they try to allocate the next block but
-    * all blocks have been allocated already. The counter must be 64 bits
-    * wide because of that, to avoid wrapping around when rs_nblocks is close
-    * to 2^32.
-    *
-    * The actual page to return is calculated by adding the counter to the
-    * starting block number, modulo nblocks.
-    */
-   nallocated = pg_atomic_fetch_add_u64(&parallel_scan->phs_nallocated, 1);
-   if (nallocated >= scan->rs_nblocks)
-       page = InvalidBlockNumber;  /* all blocks have been allocated */
-   else
-       page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks;
-
-   /*
-    * Report scan location.  Normally, we report the current page number.
-    * When we reach the end of the scan, though, we report the starting page,
-    * not the ending page, just so the starting positions for later scans
-    * doesn't slew backwards.  We only report the position at the end of the
-    * scan once, though: subsequent callers will report nothing.
-    */
-   if (scan->rs_syncscan)
-   {
-       if (page != InvalidBlockNumber)
-           ss_report_location(scan->rs_rd, page);
-       else if (nallocated == scan->rs_nblocks)
-           ss_report_location(scan->rs_rd, parallel_scan->phs_startblock);
-   }
-
-   return page;
-}
-
  /* ----------------
   *     heap_update_snapshot
   *
@@ -1810,13 +1547,15 @@ heap_parallelscan_nextpage(HeapScanDesc scan)
   * ----------------
   */
  void
-heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
+heap_update_snapshot(TableScanDesc sscan, Snapshot snapshot)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
     Assert(IsMVCCSnapshot(snapshot));
  
     RegisterSnapshot(snapshot);
-   scan->rs_snapshot = snapshot;
-   scan->rs_temp_snap = true;
+   scan->rs_scan.rs_snapshot = snapshot;
+   scan->rs_scan.rs_temp_snap = true;
  }
  
  /* ----------------
@@ -1842,19 +1581,20 @@ heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot)
  #define HEAPDEBUG_3
  #endif                         /* !defined(HEAPDEBUGALL) */
  
-
  HeapTuple
-heap_getnext(HeapScanDesc scan, ScanDirection direction)
+heap_getnext(TableScanDesc sscan, ScanDirection direction)
  {
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
     /* Note: no locking manipulations needed */
  
     HEAPDEBUG_1;                /* heap_getnext( info ) */
  
-   if (scan->rs_pageatatime)
+   if (scan->rs_scan.rs_pageatatime)
         heapgettup_pagemode(scan, direction,
-                           scan->rs_nkeys, scan->rs_key);
+                           scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
     else
-       heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);
+       heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
  
     if (scan->rs_ctup.t_data == NULL)
     {
@@ -1868,9 +1608,57 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
      */
     HEAPDEBUG_3;                /* heap_getnext returning tuple */
  
-   pgstat_count_heap_getnext(scan->rs_rd);
+   pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+   return &scan->rs_ctup;
+}
+
+#ifdef HEAPAMSLOTDEBUGALL
+#define HEAPAMSLOTDEBUG_1 \
+   elog(DEBUG2, "heapam_getnext([%s,nkeys=%d],dir=%d) called", \
+        RelationGetRelationName(scan->rs_scan.rs_rd), scan->rs_scan.rs_nkeys, (int) direction)
+#define HEAPAMSLOTDEBUG_2 \
+   elog(DEBUG2, "heapam_getnext returning EOS")
+#define HEAPAMSLOTDEBUG_3 \
+   elog(DEBUG2, "heapam_getnext returning tuple")
+#else
+#define HEAPAMSLOTDEBUG_1
+#define HEAPAMSLOTDEBUG_2
+#define HEAPAMSLOTDEBUG_3
+#endif
+
+TupleTableSlot *
+heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   /* Note: no locking manipulations needed */
+
+   HEAPAMSLOTDEBUG_1;          /* heap_getnext( info ) */
+
+   if (scan->rs_scan.rs_pageatatime)
+       heapgettup_pagemode(scan, direction,
+                           scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
+   else
+       heapgettup(scan, direction, scan->rs_scan.rs_nkeys, scan->rs_scan.rs_key);
  
-   return &(scan->rs_ctup);
+   if (scan->rs_ctup.t_data == NULL)
+   {
+       HEAPAMSLOTDEBUG_2;      /* heap_getnext returning EOS */
+       ExecClearTuple(slot);
+       return NULL;
+   }
+
+   /*
+    * if we get here it means we have a new current scan tuple, so point to
+    * the proper return buffer and return the tuple.
+    */
+   HEAPAMSLOTDEBUG_3;          /* heap_getnext returning tuple */
+
+   pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+   return ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
+                                   scan->rs_cbuf);
  }
  
  /*
@@ -1890,10 +1678,8 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
   * If the tuple is found but fails the time qual check, then false is returned
   * but tuple->t_data is left pointing to the tuple.
   *
- * keep_buf determines what is done with the buffer in the false-result cases.
- * When the caller specifies keep_buf = true, we retain the pin on the buffer
- * and return it in *userbuf (so the caller must eventually unpin it); when
- * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
+ * In the false-result cases the buffer pin is released and *userbuf is set to
+ * InvalidBuffer.
   *
   * stats_relation is the relation to charge the heap_fetch operation against
   * for statistical purposes.  (This could be the heap rel itself, an
@@ -1913,13 +1699,12 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
   */
  bool
  heap_fetch(Relation relation,
+          ItemPointer tid,
            Snapshot snapshot,
            HeapTuple tuple,
            Buffer *userbuf,
-          bool keep_buf,
            Relation stats_relation)
  {
-   ItemPointer tid = &(tuple->t_self);
     ItemId      lp;
     Buffer      buffer;
     Page        page;
@@ -1946,13 +1731,8 @@ heap_fetch(Relation relation,
     if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
     {
         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-       if (keep_buf)
-           *userbuf = buffer;
-       else
-       {
-           ReleaseBuffer(buffer);
-           *userbuf = InvalidBuffer;
-       }
+       ReleaseBuffer(buffer);
+       *userbuf = InvalidBuffer;
         tuple->t_data = NULL;
         return false;
     }
@@ -1968,20 +1748,16 @@ heap_fetch(Relation relation,
     if (!ItemIdIsNormal(lp))
     {
         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-       if (keep_buf)
-           *userbuf = buffer;
-       else
-       {
-           ReleaseBuffer(buffer);
-           *userbuf = InvalidBuffer;
-       }
+       ReleaseBuffer(buffer);
+       *userbuf = InvalidBuffer;
         tuple->t_data = NULL;
         return false;
     }
  
     /*
-    * fill in *tuple fields
+    * fill in tuple fields and place it in stuple
      */
+   ItemPointerCopy(tid, &(tuple->t_self));
     tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
     tuple->t_len = ItemIdGetLength(lp);
     tuple->t_tableOid = RelationGetRelid(relation);
@@ -1989,7 +1765,7 @@ heap_fetch(Relation relation,
     /*
      * check time qualification of tuple, then release lock
      */
-   valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+   valid = HeapTupleSatisfies(tuple, snapshot, buffer);
  
     if (valid)
         PredicateLockTuple(relation, tuple, snapshot);
@@ -2013,14 +1789,9 @@ heap_fetch(Relation relation,
         return true;
     }
  
-   /* Tuple failed time qual, but maybe caller wants to see it anyway. */
-   if (keep_buf)
-       *userbuf = buffer;
-   else
-   {
-       ReleaseBuffer(buffer);
-       *userbuf = InvalidBuffer;
-   }
+   /* Tuple failed time qual */
+   ReleaseBuffer(buffer);
+   *userbuf = InvalidBuffer;
  
     return false;
  }
@@ -2136,7 +1907,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
             ItemPointerSet(&(heapTuple->t_self), BufferGetBlockNumber(buffer), offnum);
  
             /* If it's visible per the snapshot, we must return it */
-           valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
+           valid = HeapTupleSatisfies(heapTuple, snapshot, buffer);
             CheckForSerializableConflictOut(valid, relation, heapTuple,
                                             buffer, snapshot);
             /* reset to original, non-redirected, tid */
@@ -2310,7 +2081,7 @@ heap_get_latest_tid(Relation relation,
          * Check time qualification of tuple; if visible, set it as the new
          * result candidate.
          */
-       valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
+       valid = HeapTupleSatisfies(&tp, snapshot, buffer);
         CheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
         if (valid)
             *tid = ctid;
@@ -2333,7 +2104,6 @@ heap_get_latest_tid(Relation relation,
     }                           /* end of loop */
  }
  
-
  /*
   * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends
   *
@@ -2380,7 +2150,7 @@ GetBulkInsertState(void)
     bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
     bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
     bistate->current_buf = InvalidBuffer;
-   return bistate;
+   return (void *)bistate;
  }
  
  /*
@@ -2694,7 +2464,7 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
   * temporary context before calling this, if that's a problem.
   */
  void
-heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
+heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
                   CommandId cid, int options, BulkInsertState bistate)
  {
     TransactionId xid = GetCurrentTransactionId();
@@ -2715,12 +2485,17 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
     saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
                                                    HEAP_DEFAULT_FILLFACTOR);
  
-   /* Toast and set header data in all the tuples */
+   /* Toast and set header data in all the slots */
     heaptuples = palloc(ntuples * sizeof(HeapTuple));
     for (i = 0; i < ntuples; i++)
-       heaptuples[i] = heap_prepare_insert(relation, tuples[i],
+   {
+       heaptuples[i] = heap_prepare_insert(relation, ExecFetchSlotHeapTuple(slots[i], true, NULL),
                                             xid, cid, options);
  
+       if (slots[i]->tts_tableOid != InvalidOid)
+           heaptuples[i]->t_tableOid = slots[i]->tts_tableOid;
+   }
+
     /*
      * We're about to do the actual inserts -- but check for conflict first,
      * to minimize the possibility of having to roll back work we've just
@@ -2955,7 +2730,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
      * probably faster to always copy than check.
      */
     for (i = 0; i < ntuples; i++)
-       tuples[i]->t_self = heaptuples[i]->t_self;
+       slots[i]->tts_tid = heaptuples[i]->t_self;
  
     pgstat_count_heap_insert(relation, ntuples);
  }
@@ -3225,7 +3000,7 @@ l1:
     if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
     {
         /* Perform additional check for transaction-snapshot mode RI updates */
-       if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
+       if (!HeapTupleSatisfies(&tp, crosscheck, buffer))
             result = HeapTupleUpdated;
     }
  
@@ -3233,6 +3008,7 @@ l1:
     {
         Assert(result == HeapTupleSelfUpdated ||
                result == HeapTupleUpdated ||
+              result == HeapTupleDeleted ||
                result == HeapTupleBeingUpdated);
         Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID));
         hufd->ctid = tp.t_data->t_ctid;
@@ -3246,6 +3022,8 @@ l1:
             UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
         if (vmbuffer != InvalidBuffer)
             ReleaseBuffer(vmbuffer);
+       if (result == HeapTupleUpdated && ItemPointerEquals(tid, &hufd->ctid))
+           result = HeapTupleDeleted;
         return result;
     }
  
@@ -3463,6 +3241,10 @@ simple_heap_delete(Relation relation, ItemPointer tid)
             elog(ERROR, "tuple concurrently updated");
             break;
  
+       case HeapTupleDeleted:
+           elog(ERROR, "tuple concurrently deleted");
+           break;
+
         default:
             elog(ERROR, "unrecognized heap_delete status: %u", result);
             break;
@@ -3860,7 +3642,7 @@ l2:
     if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
     {
         /* Perform additional check for transaction-snapshot mode RI updates */
-       if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
+       if (!HeapTupleSatisfies(&oldtup, crosscheck, buffer))
             result = HeapTupleUpdated;
     }
  
@@ -3868,6 +3650,7 @@ l2:
     {
         Assert(result == HeapTupleSelfUpdated ||
                result == HeapTupleUpdated ||
+              result == HeapTupleDeleted ||
                result == HeapTupleBeingUpdated);
         Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
         hufd->ctid = oldtup.t_data->t_ctid;
@@ -3887,6 +3670,8 @@ l2:
         bms_free(id_attrs);
         bms_free(modified_attrs);
         bms_free(interesting_attrs);
+       if (result == HeapTupleUpdated && ItemPointerEquals(otid, &hufd->ctid))
+           result = HeapTupleDeleted;
         return result;
     }
  
@@ -4593,6 +4378,10 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup)
             elog(ERROR, "tuple concurrently updated");
             break;
  
+       case HeapTupleDeleted:
+           elog(ERROR, "tuple concurrently deleted");
+           break;
+
         default:
             elog(ERROR, "unrecognized heap_update status: %u", result);
             break;
@@ -4627,7 +4416,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
   *
   * Input parameters:
   * relation: relation containing tuple (caller must hold suitable lock)
- * tuple->t_self: TID of tuple to lock (rest of struct need not be valid)
+ * tid: TID of tuple to lock
   * cid: current command ID (used for visibility test, and stored into
   *     tuple's cmax if lock is successful)
   * mode: indicates if shared or exclusive tuple lock is desired
@@ -4645,6 +4434,7 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
   * HeapTupleInvisible: lock failed because tuple was never visible to us
   * HeapTupleSelfUpdated: lock failed because tuple updated by self
   * HeapTupleUpdated: lock failed because tuple updated by other xact
+ * HeapTupleDeleted: lock failed because tuple deleted by other xact
   * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
   *
   * In the failure cases other than HeapTupleInvisible, the routine fills
@@ -4657,13 +4447,12 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
   * See README.tuplock for a thorough explanation of this mechanism.
   */
  HTSU_Result
-heap_lock_tuple(Relation relation, HeapTuple tuple,
+heap_lock_tuple(Relation relation, ItemPointer tid,
                 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
                 bool follow_updates,
-               Buffer *buffer, HeapUpdateFailureData *hufd)
+               HeapTuple tuple, Buffer *buffer, HeapUpdateFailureData *hufd)
  {
     HTSU_Result result;
-   ItemPointer tid = &(tuple->t_self);
     ItemId      lp;
     Page        page;
     Buffer      vmbuffer = InvalidBuffer;
@@ -4698,6 +4487,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
     tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
     tuple->t_len = ItemIdGetLength(lp);
     tuple->t_tableOid = RelationGetRelid(relation);
+   tuple->t_self = *tid;
  
  l3:
     result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
@@ -4713,7 +4503,7 @@ l3:
         result = HeapTupleInvisible;
         goto out_locked;
     }
-   else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated)
+   else if (result == HeapTupleBeingUpdated || result == HeapTupleUpdated || result == HeapTupleDeleted)
     {
         TransactionId xwait;
         uint16      infomask;
@@ -4993,7 +4783,7 @@ l3:
          * or we must wait for the locking transaction or multixact; so below
          * we ensure that we grab buffer lock after the sleep.
          */
-       if (require_sleep && result == HeapTupleUpdated)
+       if (require_sleep && (result == HeapTupleUpdated || result == HeapTupleDeleted))
         {
             LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
             goto failed;
@@ -5153,6 +4943,8 @@ l3:
             HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
             HeapTupleHeaderIsOnlyLocked(tuple->t_data))
             result = HeapTupleMayBeUpdated;
+       else if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
+           result = HeapTupleDeleted;
         else
             result = HeapTupleUpdated;
     }
@@ -5161,7 +4953,7 @@ failed:
     if (result != HeapTupleMayBeUpdated)
     {
         Assert(result == HeapTupleSelfUpdated || result == HeapTupleUpdated ||
-              result == HeapTupleWouldBlock);
+              result == HeapTupleWouldBlock || result == HeapTupleDeleted);
         Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
         hufd->ctid = tuple->t_data->t_ctid;
         hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
@@ -5765,9 +5557,8 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid,
         new_infomask = 0;
         new_xmax = InvalidTransactionId;
         block = ItemPointerGetBlockNumber(&tupid);
-       ItemPointerCopy(&tupid, &(mytup.t_self));
  
-       if (!heap_fetch(rel, SnapshotAny, &mytup, &buf, false, NULL))
+       if (!heap_fetch(rel, &tupid, SnapshotAny, &mytup, &buf, NULL))
         {
             /*
              * if we fail to find the updated version of the tuple, it's
@@ -6050,6 +5841,10 @@ next:
     result = HeapTupleMayBeUpdated;
  
  out_locked:
+
+   if (result == HeapTupleUpdated && ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid))
+       result = HeapTupleDeleted;
+
     UnlockReleaseBuffer(buf);
  
  out_unlocked:
@@ -6228,6 +6023,10 @@ heap_abort_speculative(Relation relation, HeapTuple tuple)
     BlockNumber block;
     Buffer      buffer;
  
+   /*
+    * Assert(slot->tts_speculativeToken != 0); This needs some update in
+    * toast
+    */
     Assert(ItemPointerIsValid(tid));
  
     block = ItemPointerGetBlockNumber(tid);
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c

new file mode 100644 (file)

index 0000000..49f8d83
--- /dev/null
+++ b/src/backend/access/heap/heapam_handler.c
@@ -0,0 +1,2122 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam_handler.c
+ *   heap table access method code
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   src/backend/access/heap/heapam_handler.c
+ *
+ *
+ * NOTES
+ *   This file contains the heap_ routines which implement
+ *   the POSTGRES heap table access method used for all POSTGRES
+ *   relations.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "access/rewriteheap.h"
+#include "access/tableam.h"
+#include "access/tsmapi.h"
+#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/pg_am_d.h"
+#include "executor/executor.h"
+#include "pgstat.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+#include "storage/bufpage.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "access/xact.h"
+
+
+/* ----------------------------------------------------------------
+ *             storage AM support routines for heapam
+ * ----------------------------------------------------------------
+ */
+
+static bool
+heapam_fetch_row_version(Relation relation,
+                        ItemPointer tid,
+                        Snapshot snapshot,
+                        TupleTableSlot *slot,
+                        Relation stats_relation)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   Buffer buffer;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   if (heap_fetch(relation, tid, snapshot, &bslot->base.tupdata, &buffer, stats_relation))
+   {
+       ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
+       ReleaseBuffer(buffer);
+
+       slot->tts_tableOid = RelationGetRelid(relation);
+
+       return true;
+   }
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+
+   return false;
+}
+
+/*
+ * Insert a heap tuple from a slot, which may contain an OID and speculative
+ * insertion token.
+ */
+static void
+heapam_heap_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
+                  int options, BulkInsertState bistate)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* Update the tuple with table oid */
+   slot->tts_tableOid = RelationGetRelid(relation);
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   /* Perform the insertion, and copy the resulting ItemPointer */
+   heap_insert(relation, tuple, cid, options, bistate);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+static void
+heapam_heap_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid,
+                              int options, BulkInsertState bistate, uint32 specToken)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* Update the tuple with table oid */
+   slot->tts_tableOid = RelationGetRelid(relation);
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
+
+   /* Perform the insertion, and copy the resulting ItemPointer */
+   heap_insert(relation, tuple, cid, options, bistate);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+static void
+heapam_heap_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 spekToken,
+                                bool succeeded)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+
+   /* adjust the tuple's state accordingly */
+   if (!succeeded)
+       heap_finish_speculative(relation, tuple);
+   else
+   {
+       heap_abort_speculative(relation, tuple);
+   }
+
+   if (shouldFree)
+       pfree(tuple);
+}
+
+
+static HTSU_Result
+heapam_heap_delete(Relation relation, ItemPointer tid, CommandId cid,
+                  Snapshot snapshot, Snapshot crosscheck, bool wait,
+                  HeapUpdateFailureData *hufd, bool changingPart)
+{
+   /*
+    * Currently Deleting of index tuples are handled at vacuum, in case
+    * if the storage itself is cleaning the dead tuples by itself, it is
+    * the time to call the index tuple deletion also.
+    */
+   return heap_delete(relation, tid, cid, crosscheck, wait, hufd, changingPart);
+}
+
+
+/*
+ * Locks tuple and fetches its newest version and TID.
+ *
+ * relation - table containing tuple
+ * tid - TID of tuple to lock
+ * snapshot - snapshot indentifying required version (used for assert check only)
+ * slot - tuple to be returned
+ * cid - current command ID (used for visibility test, and stored into
+ *       tuple's cmax if lock is successful)
+ * mode - indicates if shared or exclusive tuple lock is desired
+ * wait_policy - what to do if tuple lock is not available
+ * flags – indicating how do we handle updated tuples
+ * *hufd - filled in failure cases
+ *
+ * Function result may be:
+ * HeapTupleMayBeUpdated: lock was successfully acquired
+ * HeapTupleInvisible: lock failed because tuple was never visible to us
+ * HeapTupleSelfUpdated: lock failed because tuple updated by self
+ * HeapTupleUpdated: lock failed because tuple updated by other xact
+ * HeapTupleDeleted: lock failed because tuple deleted by other xact
+ * HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
+ *
+ * In the failure cases other than HeapTupleInvisible, the routine fills
+ * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
+ * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated,
+ * since we cannot obtain cmax from a combocid generated by another
+ * transaction).
+ * See comments for struct HeapUpdateFailureData for additional info.
+ */
+static HTSU_Result
+heapam_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot,
+               TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
+               LockWaitPolicy wait_policy, uint8 flags,
+               HeapUpdateFailureData *hufd)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   HTSU_Result     result;
+   Buffer          buffer;
+   HeapTuple       tuple = &bslot->base.tupdata;
+
+   hufd->traversed = false;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+retry:
+   result = heap_lock_tuple(relation, tid, cid, mode, wait_policy,
+       (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) ? true : false,
+                            tuple, &buffer, hufd);
+
+   if (result == HeapTupleUpdated &&
+       (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
+   {
+       ReleaseBuffer(buffer);
+       /* Should not encounter speculative tuple on recheck */
+       Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
+
+       if (!ItemPointerEquals(&hufd->ctid, &tuple->t_self))
+       {
+           SnapshotData    SnapshotDirty;
+           TransactionId   priorXmax;
+
+           /* it was updated, so look at the updated version */
+           *tid = hufd->ctid;
+           /* updated row should have xmin matching this xmax */
+           priorXmax = hufd->xmax;
+
+           /*
+            * fetch target tuple
+            *
+            * Loop here to deal with updated or busy tuples
+            */
+           InitDirtySnapshot(SnapshotDirty);
+           for (;;)
+           {
+               if (ItemPointerIndicatesMovedPartitions(tid))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
+
+               if (heap_fetch(relation, tid, &SnapshotDirty, tuple, &buffer, NULL))
+               {
+                   /*
+                    * If xmin isn't what we're expecting, the slot must have been
+                    * recycled and reused for an unrelated tuple.  This implies that
+                    * the latest version of the row was deleted, so we need do
+                    * nothing.  (Should be safe to examine xmin without getting
+                    * buffer's content lock.  We assume reading a TransactionId to be
+                    * atomic, and Xmin never changes in an existing tuple, except to
+                    * invalid or frozen, and neither of those can match priorXmax.)
+                    */
+                   if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+                                            priorXmax))
+                   {
+                       ReleaseBuffer(buffer);
+                       return HeapTupleDeleted;
+                   }
+
+                   /* otherwise xmin should not be dirty... */
+                   if (TransactionIdIsValid(SnapshotDirty.xmin))
+                       elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
+
+                   /*
+                    * If tuple is being updated by other transaction then we have to
+                    * wait for its commit/abort, or die trying.
+                    */
+                   if (TransactionIdIsValid(SnapshotDirty.xmax))
+                   {
+                       ReleaseBuffer(buffer);
+                       switch (wait_policy)
+                       {
+                           case LockWaitBlock:
+                               XactLockTableWait(SnapshotDirty.xmax,
+                                                 relation, &tuple->t_self,
+                                                 XLTW_FetchUpdated);
+                               break;
+                           case LockWaitSkip:
+                               if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
+                                   return result;  /* skip instead of waiting */
+                               break;
+                           case LockWaitError:
+                               if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
+                                   ereport(ERROR,
+                                           (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+                                            errmsg("could not obtain lock on row in relation \"%s\"",
+                                                   RelationGetRelationName(relation))));
+                               break;
+                       }
+                       continue;       /* loop back to repeat heap_fetch */
+                   }
+
+                   /*
+                    * If tuple was inserted by our own transaction, we have to check
+                    * cmin against es_output_cid: cmin >= current CID means our
+                    * command cannot see the tuple, so we should ignore it. Otherwise
+                    * heap_lock_tuple() will throw an error, and so would any later
+                    * attempt to update or delete the tuple.  (We need not check cmax
+                    * because HeapTupleSatisfiesDirty will consider a tuple deleted
+                    * by our transaction dead, regardless of cmax.) We just checked
+                    * that priorXmax == xmin, so we can test that variable instead of
+                    * doing HeapTupleHeaderGetXmin again.
+                    */
+                   if (TransactionIdIsCurrentTransactionId(priorXmax) &&
+                       HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
+                   {
+                       ReleaseBuffer(buffer);
+                       return result;
+                   }
+
+                   hufd->traversed = true;
+                   *tid = tuple->t_data->t_ctid;
+                   ReleaseBuffer(buffer);
+                   goto retry;
+               }
+
+               /*
+                * If the referenced slot was actually empty, the latest version of
+                * the row must have been deleted, so we need do nothing.
+                */
+               if (tuple->t_data == NULL)
+               {
+                   return HeapTupleDeleted;
+               }
+
+               /*
+                * As above, if xmin isn't what we're expecting, do nothing.
+                */
+               if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+                                        priorXmax))
+               {
+                   if (BufferIsValid(buffer))
+                       ReleaseBuffer(buffer);
+                   return HeapTupleDeleted;
+               }
+
+               /*
+                * If we get here, the tuple was found but failed SnapshotDirty.
+                * Assuming the xmin is either a committed xact or our own xact (as it
+                * certainly should be if we're trying to modify the tuple), this must
+                * mean that the row was updated or deleted by either a committed xact
+                * or our own xact.  If it was deleted, we can ignore it; if it was
+                * updated then chain up to the next version and repeat the whole
+                * process.
+                *
+                * As above, it should be safe to examine xmax and t_ctid without the
+                * buffer content lock, because they can't be changing.
+                */
+               if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
+               {
+                   /* deleted, so forget about it */
+                   if (BufferIsValid(buffer))
+                       ReleaseBuffer(buffer);
+                   return HeapTupleDeleted;
+               }
+
+               /* updated, so look at the updated row */
+               *tid = tuple->t_data->t_ctid;
+               /* updated row should have xmin matching this xmax */
+               priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+               if (BufferIsValid(buffer))
+                   ReleaseBuffer(buffer);
+               /* loop back to fetch next in chain */
+           }
+       }
+       else
+       {
+           /* tuple was deleted, so give up */
+           return HeapTupleDeleted;
+       }
+   }
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+   ExecStoreBufferHeapTuple(tuple, slot, buffer);
+   ReleaseBuffer(buffer); // FIXME: invent option to just transfer pin?
+
+   return result;
+}
+
+
+static HTSU_Result
+heapam_heap_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+                  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
+                  bool wait, HeapUpdateFailureData *hufd,
+                  LockTupleMode *lockmode, bool *update_indexes)
+{
+   bool        shouldFree = true;
+   HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
+   HTSU_Result result;
+
+   /* Update the tuple with table oid */
+   if (slot->tts_tableOid != InvalidOid)
+       tuple->t_tableOid = slot->tts_tableOid;
+
+   result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
+                        hufd, lockmode);
+   ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
+
+   slot->tts_tableOid = RelationGetRelid(relation);
+
+   /*
+    * Note: instead of having to update the old index tuples associated with
+    * the heap tuple, all we do is form and insert new index tuples. This is
+    * because UPDATEs are actually DELETEs and INSERTs, and index tuple
+    * deletion is done later by VACUUM (see notes in ExecDelete). All we do
+    * here is insert new index tuples.  -cim 9/27/89
+    */
+
+   /*
+    * insert index entries for tuple
+    *
+    * Note: heap_update returns the tid (location) of the new tuple in the
+    * t_self field.
+    *
+    * If it's a HOT update, we mustn't insert new index entries.
+    */
+   *update_indexes = result == HeapTupleMayBeUpdated &&
+       !HeapTupleIsHeapOnly(tuple);
+
+   if (shouldFree)
+       pfree(tuple);
+
+   return result;
+}
+
+static const TupleTableSlotOps *
+heapam_slot_callbacks(Relation relation)
+{
+   return &TTSOpsBufferHeapTuple;
+}
+
+HeapTuple
+heap_scan_getnext(TableScanDesc sscan, ScanDirection direction)
+{
+   if (unlikely(sscan->rs_rd->rd_rel->relam != HEAP_TABLE_AM_OID))
+       ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                       errmsg("only heap AM is supported")));
+   return heap_getnext(sscan, direction);
+}
+
+static bool
+heapam_satisfies(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
+{
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   bool res;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+   Assert(BufferIsValid(bslot->buffer));
+
+   /*
+    * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
+    * Caller should be holding pin, but not lock.
+    */
+   LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
+   res = HeapTupleSatisfies(bslot->base.tuple, snapshot, bslot->buffer);
+   LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
+
+   return res;
+}
+
+static IndexFetchTableData*
+heapam_begin_index_fetch(Relation rel)
+{
+   IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
+
+   hscan->xs_base.rel = rel;
+   hscan->xs_cbuf = InvalidBuffer;
+   //hscan->xs_continue_hot = false;
+
+   return &hscan->xs_base;
+}
+
+
+static void
+heapam_reset_index_fetch(IndexFetchTableData* scan)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+
+   if (BufferIsValid(hscan->xs_cbuf))
+   {
+       ReleaseBuffer(hscan->xs_cbuf);
+       hscan->xs_cbuf = InvalidBuffer;
+   }
+
+   //hscan->xs_continue_hot = false;
+}
+
+static void
+heapam_end_index_fetch(IndexFetchTableData* scan)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+
+   heapam_reset_index_fetch(scan);
+
+   pfree(hscan);
+}
+
+static bool
+heapam_fetch_follow(struct IndexFetchTableData *scan,
+                   ItemPointer tid,
+                   Snapshot snapshot,
+                   TupleTableSlot *slot,
+                   bool *call_again, bool *all_dead)
+{
+   IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
+   BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
+   bool got_heap_tuple;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
+   if (!*call_again)
+   {
+       /* Switch to correct buffer if we don't have it already */
+       Buffer      prev_buf = hscan->xs_cbuf;
+
+       hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
+                                             hscan->xs_base.rel,
+                                             ItemPointerGetBlockNumber(tid));
+
+       /*
+        * Prune page, but only if we weren't already on this page
+        */
+       if (prev_buf != hscan->xs_cbuf)
+           heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
+   }
+
+   /* Obtain share-lock on the buffer so we can examine visibility */
+   LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
+   got_heap_tuple = heap_hot_search_buffer(tid,
+                                           hscan->xs_base.rel,
+                                           hscan->xs_cbuf,
+                                           snapshot,
+                                           &bslot->base.tupdata,
+                                           all_dead,
+                                           !*call_again);
+   bslot->base.tupdata.t_self = *tid;
+   LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+   if (got_heap_tuple)
+   {
+       /*
+        * Only in a non-MVCC snapshot can more than one member of the HOT
+        * chain be visible.
+        */
+       *call_again = !IsMVCCSnapshot(snapshot);
+       // FIXME pgstat_count_heap_fetch(scan->indexRelation);
+
+       slot->tts_tableOid = RelationGetRelid(scan->rel);
+       ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
+   }
+   else
+   {
+       /* We've reached the end of the HOT chain. */
+       *call_again = false;
+   }
+
+   return got_heap_tuple;
+}
+
+/*
+ * As above, except that instead of scanning the complete heap, only the given
+ * number of blocks are scanned.  Scan to end-of-rel can be signalled by
+ * passing InvalidBlockNumber as numblocks.  Note that restricting the range
+ * to scan cannot be done when requesting syncscan.
+ *
+ * When "anyvisible" mode is requested, all tuples visible to any transaction
+ * are indexed and counted as live, including those inserted or deleted by
+ * transactions that are still in progress.
+ */
+static double
+IndexBuildHeapRangeScan(Relation heapRelation,
+                       Relation indexRelation,
+                       IndexInfo *indexInfo,
+                       bool allow_sync,
+                       bool anyvisible,
+                       BlockNumber start_blockno,
+                       BlockNumber numblocks,
+                       IndexBuildCallback callback,
+                       void *callback_state,
+                       TableScanDesc sscan)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   bool        is_system_catalog;
+   bool        checking_uniqueness;
+   HeapTuple   heapTuple;
+   Datum       values[INDEX_MAX_KEYS];
+   bool        isnull[INDEX_MAX_KEYS];
+   double      reltuples;
+   ExprState  *predicate;
+   TupleTableSlot *slot;
+   EState     *estate;
+   ExprContext *econtext;
+   Snapshot    snapshot;
+   bool        need_unregister_snapshot = false;
+   TransactionId OldestXmin;
+   BlockNumber root_blkno = InvalidBlockNumber;
+   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+
+   /*
+    * sanity checks
+    */
+   Assert(OidIsValid(indexRelation->rd_rel->relam));
+
+   /* Remember if it's a system catalog */
+   is_system_catalog = IsSystemRelation(heapRelation);
+
+   /* See whether we're verifying uniqueness/exclusion properties */
+   checking_uniqueness = (indexInfo->ii_Unique ||
+                          indexInfo->ii_ExclusionOps != NULL);
+
+   /*
+    * "Any visible" mode is not compatible with uniqueness checks; make sure
+    * only one of those is requested.
+    */
+   Assert(!(anyvisible && checking_uniqueness));
+
+   /*
+    * Need an EState for evaluation of index expressions and partial-index
+    * predicates.  Also a slot to hold the current tuple.
+    */
+   estate = CreateExecutorState();
+   econtext = GetPerTupleExprContext(estate);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
+                                   &TTSOpsHeapTuple);
+
+   /* Arrange for econtext's scan tuple to be the tuple under test */
+   econtext->ecxt_scantuple = slot;
+
+   /* Set up execution state for predicate, if any. */
+   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+   /*
+    * Prepare for scan of the base relation.  In a normal index build, we use
+    * SnapshotAny because we must retrieve all tuples and do our own time
+    * qual checks (because we have to index RECENTLY_DEAD tuples). In a
+    * concurrent build, or during bootstrap, we take a regular MVCC snapshot
+    * and index whatever's live according to that.
+    */
+   OldestXmin = InvalidTransactionId;
+
+   /* okay to ignore lazy VACUUMs here */
+   if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
+       OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
+
+   if (!scan)
+   {
+       /*
+        * Serial index build.
+        *
+        * Must begin our own heap scan in this case.  We may also need to
+        * register a snapshot whose lifetime is under our direct control.
+        */
+       if (!TransactionIdIsValid(OldestXmin))
+       {
+           snapshot = RegisterSnapshot(GetTransactionSnapshot());
+           need_unregister_snapshot = true;
+       }
+       else
+           snapshot = SnapshotAny;
+
+       sscan = table_beginscan_strat(heapRelation, /* relation */
+                                     snapshot, /* snapshot */
+                                     0,    /* number of keys */
+                                     NULL, /* scan key */
+                                     true, /* buffer access strategy OK */
+                                     allow_sync);  /* syncscan OK? */
+       scan = (HeapScanDesc) sscan;
+   }
+   else
+   {
+       /*
+        * Parallel index build.
+        *
+        * Parallel case never registers/unregisters own snapshot.  Snapshot
+        * is taken from parallel heap scan, and is SnapshotAny or an MVCC
+        * snapshot, based on same criteria as serial case.
+        */
+       Assert(!IsBootstrapProcessingMode());
+       Assert(allow_sync);
+       snapshot = scan->rs_scan.rs_snapshot;
+   }
+
+   /*
+    * Must call GetOldestXmin() with SnapshotAny.  Should never call
+    * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
+    * this for parallel builds, since ambuild routines that support parallel
+    * builds must work these details out for themselves.)
+    */
+   Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
+   Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
+          !TransactionIdIsValid(OldestXmin));
+   Assert(snapshot == SnapshotAny || !anyvisible);
+
+   /* set our scan endpoints */
+   if (!allow_sync)
+       table_setscanlimits(sscan, start_blockno, numblocks);
+   else
+   {
+       /* syncscan can only be requested on whole relation */
+       Assert(start_blockno == 0);
+       Assert(numblocks == InvalidBlockNumber);
+   }
+
+   reltuples = 0;
+
+   /*
+    * Scan all tuples in the base relation.
+    */
+   while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL)
+   {
+       bool        tupleIsAlive;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /*
+        * When dealing with a HOT-chain of updated tuples, we want to index
+        * the values of the live tuple (if any), but index it under the TID
+        * of the chain's root tuple.  This approach is necessary to preserve
+        * the HOT-chain structure in the heap. So we need to be able to find
+        * the root item offset for every tuple that's in a HOT-chain.  When
+        * first reaching a new page of the relation, call
+        * heap_get_root_tuples() to build a map of root item offsets on the
+        * page.
+        *
+        * It might look unsafe to use this information across buffer
+        * lock/unlock.  However, we hold ShareLock on the table so no
+        * ordinary insert/update/delete should occur; and we hold pin on the
+        * buffer continuously while visiting the page, so no pruning
+        * operation can occur either.
+        *
+        * Also, although our opinions about tuple liveness could change while
+        * we scan the page (due to concurrent transaction commits/aborts),
+        * the chain root locations won't, so this info doesn't need to be
+        * rebuilt after waiting for another transaction.
+        *
+        * Note the implied assumption that there is no more than one live
+        * tuple per HOT-chain --- else we could create more than one index
+        * entry pointing to the same root tuple.
+        */
+       if (scan->rs_cblock != root_blkno)
+       {
+           Page        page = BufferGetPage(scan->rs_cbuf);
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+           heap_get_root_tuples(page, root_offsets);
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           root_blkno = scan->rs_cblock;
+       }
+
+       if (snapshot == SnapshotAny)
+       {
+           /* do our own time qual check */
+           bool        indexIt;
+           TransactionId xwait;
+
+   recheck:
+
+           /*
+            * We could possibly get away with not locking the buffer here,
+            * since caller should hold ShareLock on the relation, but let's
+            * be conservative about it.  (This remark is still correct even
+            * with HOT-pruning: our pin on the buffer prevents pruning.)
+            */
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+           /*
+            * The criteria for counting a tuple as live in this block need to
+            * match what analyze.c's acquire_sample_rows() does, otherwise
+            * CREATE INDEX and ANALYZE may produce wildly different reltuples
+            * values, e.g. when there are many recently-dead tuples.
+            */
+           switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin, scan->rs_cbuf))
+           {
+               case HEAPTUPLE_DEAD:
+                   /* Definitely dead, we can ignore it */
+                   indexIt = false;
+                   tupleIsAlive = false;
+                   break;
+               case HEAPTUPLE_LIVE:
+                   /* Normal case, index and unique-check it */
+                   indexIt = true;
+                   tupleIsAlive = true;
+                   /* Count it as live, too */
+                   reltuples += 1;
+                   break;
+               case HEAPTUPLE_RECENTLY_DEAD:
+
+                   /*
+                    * If tuple is recently deleted then we must index it
+                    * anyway to preserve MVCC semantics.  (Pre-existing
+                    * transactions could try to use the index after we finish
+                    * building it, and may need to see such tuples.)
+                    *
+                    * However, if it was HOT-updated then we must only index
+                    * the live tuple at the end of the HOT-chain.  Since this
+                    * breaks semantics for pre-existing snapshots, mark the
+                    * index as unusable for them.
+                    *
+                    * We don't count recently-dead tuples in reltuples, even
+                    * if we index them; see acquire_sample_rows().
+                    */
+                   if (HeapTupleIsHotUpdated(heapTuple))
+                   {
+                       indexIt = false;
+                       /* mark the index as unsafe for old snapshots */
+                       indexInfo->ii_BrokenHotChain = true;
+                   }
+                   else
+                       indexIt = true;
+                   /* In any case, exclude the tuple from unique-checking */
+                   tupleIsAlive = false;
+                   break;
+               case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+                   /*
+                    * In "anyvisible" mode, this tuple is visible and we
+                    * don't need any further checks.
+                    */
+                   if (anyvisible)
+                   {
+                       indexIt = true;
+                       tupleIsAlive = true;
+                       reltuples += 1;
+                       break;
+                   }
+
+                   /*
+                    * Since caller should hold ShareLock or better, normally
+                    * the only way to see this is if it was inserted earlier
+                    * in our own transaction.  However, it can happen in
+                    * system catalogs, since we tend to release write lock
+                    * before commit there.  Give a warning if neither case
+                    * applies.
+                    */
+                   xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
+                   if (!TransactionIdIsCurrentTransactionId(xwait))
+                   {
+                       if (!is_system_catalog)
+                           elog(WARNING, "concurrent insert in progress within table \"%s\"",
+                                RelationGetRelationName(heapRelation));
+
+                       /*
+                        * If we are performing uniqueness checks, indexing
+                        * such a tuple could lead to a bogus uniqueness
+                        * failure.  In that case we wait for the inserting
+                        * transaction to finish and check again.
+                        */
+                       if (checking_uniqueness)
+                       {
+                           /*
+                            * Must drop the lock on the buffer before we wait
+                            */
+                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                           XactLockTableWait(xwait, heapRelation,
+                                             &heapTuple->t_self,
+                                             XLTW_InsertIndexUnique);
+                           CHECK_FOR_INTERRUPTS();
+                           goto recheck;
+                       }
+                   }
+                   else
+                   {
+                       /*
+                        * For consistency with acquire_sample_rows(), count
+                        * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
+                        * when inserted by our own transaction.
+                        */
+                       reltuples += 1;
+                   }
+
+                   /*
+                    * We must index such tuples, since if the index build
+                    * commits then they're good.
+                    */
+                   indexIt = true;
+                   tupleIsAlive = true;
+                   break;
+               case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+                   /*
+                    * As with INSERT_IN_PROGRESS case, this is unexpected
+                    * unless it's our own deletion or a system catalog; but
+                    * in anyvisible mode, this tuple is visible.
+                    */
+                   if (anyvisible)
+                   {
+                       indexIt = true;
+                       tupleIsAlive = false;
+                       reltuples += 1;
+                       break;
+                   }
+
+                   xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+                   if (!TransactionIdIsCurrentTransactionId(xwait))
+                   {
+                       if (!is_system_catalog)
+                           elog(WARNING, "concurrent delete in progress within table \"%s\"",
+                                RelationGetRelationName(heapRelation));
+
+                       /*
+                        * If we are performing uniqueness checks, assuming
+                        * the tuple is dead could lead to missing a
+                        * uniqueness violation.  In that case we wait for the
+                        * deleting transaction to finish and check again.
+                        *
+                        * Also, if it's a HOT-updated tuple, we should not
+                        * index it but rather the live tuple at the end of
+                        * the HOT-chain.  However, the deleting transaction
+                        * could abort, possibly leaving this tuple as live
+                        * after all, in which case it has to be indexed. The
+                        * only way to know what to do is to wait for the
+                        * deleting transaction to finish and check again.
+                        */
+                       if (checking_uniqueness ||
+                           HeapTupleIsHotUpdated(heapTuple))
+                       {
+                           /*
+                            * Must drop the lock on the buffer before we wait
+                            */
+                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+                           XactLockTableWait(xwait, heapRelation,
+                                             &heapTuple->t_self,
+                                             XLTW_InsertIndexUnique);
+                           CHECK_FOR_INTERRUPTS();
+                           goto recheck;
+                       }
+
+                       /*
+                        * Otherwise index it but don't check for uniqueness,
+                        * the same as a RECENTLY_DEAD tuple.
+                        */
+                       indexIt = true;
+
+                       /*
+                        * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
+                        * if they were not deleted by the current
+                        * transaction.  That's what acquire_sample_rows()
+                        * does, and we want the behavior to be consistent.
+                        */
+                       reltuples += 1;
+                   }
+                   else if (HeapTupleIsHotUpdated(heapTuple))
+                   {
+                       /*
+                        * It's a HOT-updated tuple deleted by our own xact.
+                        * We can assume the deletion will commit (else the
+                        * index contents don't matter), so treat the same as
+                        * RECENTLY_DEAD HOT-updated tuples.
+                        */
+                       indexIt = false;
+                       /* mark the index as unsafe for old snapshots */
+                       indexInfo->ii_BrokenHotChain = true;
+                   }
+                   else
+                   {
+                       /*
+                        * It's a regular tuple deleted by our own xact. Index
+                        * it, but don't check for uniqueness nor count in
+                        * reltuples, the same as a RECENTLY_DEAD tuple.
+                        */
+                       indexIt = true;
+                   }
+                   /* In any case, exclude the tuple from unique-checking */
+                   tupleIsAlive = false;
+                   break;
+               default:
+                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+                   indexIt = tupleIsAlive = false; /* keep compiler quiet */
+                   break;
+           }
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           if (!indexIt)
+               continue;
+       }
+       else
+       {
+           /* heap_getnext did the time qual check */
+           tupleIsAlive = true;
+           reltuples += 1;
+       }
+
+       MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+       /* Set up for predicate or expression evaluation */
+       ExecStoreHeapTuple(heapTuple, slot, false);
+
+       /*
+        * In a partial index, discard tuples that don't satisfy the
+        * predicate.
+        */
+       if (predicate != NULL)
+       {
+           if (!ExecQual(predicate, econtext))
+               continue;
+       }
+
+       /*
+        * For the current heap tuple, extract all the attributes we use in
+        * this index, and note which are null.  This also performs evaluation
+        * of any expressions needed.
+        */
+       FormIndexDatum(indexInfo,
+                      slot,
+                      estate,
+                      values,
+                      isnull);
+
+       /*
+        * You'd think we should go ahead and build the index tuple here, but
+        * some index AMs want to do further processing on the data first.  So
+        * pass the values[] and isnull[] arrays, instead.
+        */
+
+       if (HeapTupleIsHeapOnly(heapTuple))
+       {
+           /*
+            * For a heap-only tuple, pretend its TID is that of the root. See
+            * src/backend/access/heap/README.HOT for discussion.
+            */
+           HeapTupleData rootTuple;
+           OffsetNumber offnum;
+
+           rootTuple = *heapTuple;
+           offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
+
+           if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATA_CORRUPTED),
+                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
+                                        ItemPointerGetBlockNumber(&heapTuple->t_self),
+                                        offnum,
+                                        RelationGetRelationName(heapRelation))));
+
+           ItemPointerSetOffsetNumber(&rootTuple.t_self,
+                                      root_offsets[offnum - 1]);
+
+           /* Call the AM's callback routine to process the tuple */
+           callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
+                    callback_state);
+       }
+       else
+       {
+           /* Call the AM's callback routine to process the tuple */
+           callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
+                    callback_state);
+       }
+   }
+
+   table_endscan(sscan);
+
+   /* we can now forget our snapshot, if set and registered by us */
+   if (need_unregister_snapshot)
+       UnregisterSnapshot(snapshot);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   FreeExecutorState(estate);
+
+   /* These may have been pointing to the now-gone estate */
+   indexInfo->ii_ExpressionsState = NIL;
+   indexInfo->ii_PredicateState = NULL;
+
+   return reltuples;
+}
+
+/*
+ * validate_index_heapscan - second table scan for concurrent index build
+ *
+ * This has much code in common with IndexBuildHeapScan, but it's enough
+ * different that it seems cleaner to have two routines not one.
+ */
+static void
+validate_index_heapscan(Relation heapRelation,
+                       Relation indexRelation,
+                       IndexInfo *indexInfo,
+                       Snapshot snapshot,
+                       ValidateIndexState *state)
+{
+   TableScanDesc sscan;
+   HeapScanDesc scan;
+   HeapTuple   heapTuple;
+   Datum       values[INDEX_MAX_KEYS];
+   bool        isnull[INDEX_MAX_KEYS];
+   ExprState  *predicate;
+   TupleTableSlot *slot;
+   EState     *estate;
+   ExprContext *econtext;
+   BlockNumber root_blkno = InvalidBlockNumber;
+   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+   bool        in_index[MaxHeapTuplesPerPage];
+
+   /* state variables for the merge */
+   ItemPointer indexcursor = NULL;
+   ItemPointerData decoded;
+   bool        tuplesort_empty = false;
+
+   /*
+    * sanity checks
+    */
+   Assert(OidIsValid(indexRelation->rd_rel->relam));
+
+   /*
+    * Need an EState for evaluation of index expressions and partial-index
+    * predicates.  Also a slot to hold the current tuple.
+    */
+   estate = CreateExecutorState();
+   econtext = GetPerTupleExprContext(estate);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
+                                   &TTSOpsHeapTuple);
+
+   /* Arrange for econtext's scan tuple to be the tuple under test */
+   econtext->ecxt_scantuple = slot;
+
+   /* Set up execution state for predicate, if any. */
+   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+   /*
+    * Prepare for scan of the base relation.  We need just those tuples
+    * satisfying the passed-in reference snapshot.  We must disable syncscan
+    * here, because it's critical that we read from block zero forward to
+    * match the sorted TIDs.
+    */
+   sscan = table_beginscan_strat(heapRelation, /* relation */
+                                  snapshot,    /* snapshot */
+                                  0,   /* number of keys */
+                                  NULL,    /* scan key */
+                                  true,    /* buffer access strategy OK */
+                                  false);  /* syncscan not OK */
+   scan = (HeapScanDesc) sscan;
+
+   /*
+    * Scan all tuples matching the snapshot.
+    *
+    * PBORKED: Slotify
+    */
+   while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL)
+   {
+       ItemPointer heapcursor = &heapTuple->t_self;
+       ItemPointerData rootTuple;
+       OffsetNumber root_offnum;
+
+       CHECK_FOR_INTERRUPTS();
+
+       state->htups += 1;
+
+       /*
+        * As commented in IndexBuildHeapScan, we should index heap-only
+        * tuples under the TIDs of their root tuples; so when we advance onto
+        * a new heap page, build a map of root item offsets on the page.
+        *
+        * This complicates merging against the tuplesort output: we will
+        * visit the live tuples in order by their offsets, but the root
+        * offsets that we need to compare against the index contents might be
+        * ordered differently.  So we might have to "look back" within the
+        * tuplesort output, but only within the current page.  We handle that
+        * by keeping a bool array in_index[] showing all the
+        * already-passed-over tuplesort output TIDs of the current page. We
+        * clear that array here, when advancing onto a new heap page.
+        */
+       if (scan->rs_cblock != root_blkno)
+       {
+           Page        page = BufferGetPage(scan->rs_cbuf);
+
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+           heap_get_root_tuples(page, root_offsets);
+           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           memset(in_index, 0, sizeof(in_index));
+
+           root_blkno = scan->rs_cblock;
+       }
+
+       /* Convert actual tuple TID to root TID */
+       rootTuple = *heapcursor;
+       root_offnum = ItemPointerGetOffsetNumber(heapcursor);
+
+       if (HeapTupleIsHeapOnly(heapTuple))
+       {
+           root_offnum = root_offsets[root_offnum - 1];
+           if (!OffsetNumberIsValid(root_offnum))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATA_CORRUPTED),
+                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
+                                        ItemPointerGetBlockNumber(heapcursor),
+                                        ItemPointerGetOffsetNumber(heapcursor),
+                                        RelationGetRelationName(heapRelation))));
+           ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
+       }
+
+       /*
+        * "merge" by skipping through the index tuples until we find or pass
+        * the current root tuple.
+        */
+       while (!tuplesort_empty &&
+              (!indexcursor ||
+               ItemPointerCompare(indexcursor, &rootTuple) < 0))
+       {
+           Datum       ts_val;
+           bool        ts_isnull;
+
+           if (indexcursor)
+           {
+               /*
+                * Remember index items seen earlier on the current heap page
+                */
+               if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
+                   in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
+           }
+
+           tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
+                                                 &ts_val, &ts_isnull, NULL);
+           Assert(tuplesort_empty || !ts_isnull);
+           if (!tuplesort_empty)
+           {
+               itemptr_decode(&decoded, DatumGetInt64(ts_val));
+               indexcursor = &decoded;
+
+               /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
+#ifndef USE_FLOAT8_BYVAL
+               pfree(DatumGetPointer(ts_val));
+#endif
+           }
+           else
+           {
+               /* Be tidy */
+               indexcursor = NULL;
+           }
+       }
+
+       /*
+        * If the tuplesort has overshot *and* we didn't see a match earlier,
+        * then this tuple is missing from the index, so insert it.
+        */
+       if ((tuplesort_empty ||
+            ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
+           !in_index[root_offnum - 1])
+       {
+           MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+           /* Set up for predicate or expression evaluation */
+           ExecStoreHeapTuple(heapTuple, slot, false);
+
+           /*
+            * In a partial index, discard tuples that don't satisfy the
+            * predicate.
+            */
+           if (predicate != NULL)
+           {
+               if (!ExecQual(predicate, econtext))
+                   continue;
+           }
+
+           /*
+            * For the current heap tuple, extract all the attributes we use
+            * in this index, and note which are null.  This also performs
+            * evaluation of any expressions needed.
+            */
+           FormIndexDatum(indexInfo,
+                          slot,
+                          estate,
+                          values,
+                          isnull);
+
+           /*
+            * You'd think we should go ahead and build the index tuple here,
+            * but some index AMs want to do further processing on the data
+            * first. So pass the values[] and isnull[] arrays, instead.
+            */
+
+           /*
+            * If the tuple is already committed dead, you might think we
+            * could suppress uniqueness checking, but this is no longer true
+            * in the presence of HOT, because the insert is actually a proxy
+            * for a uniqueness check on the whole HOT-chain.  That is, the
+            * tuple we have here could be dead because it was already
+            * HOT-updated, and if so the updating transaction will not have
+            * thought it should insert index entries.  The index AM will
+            * check the whole HOT-chain and correctly detect a conflict if
+            * there is one.
+            */
+
+           index_insert(indexRelation,
+                        values,
+                        isnull,
+                        &rootTuple,
+                        heapRelation,
+                        indexInfo->ii_Unique ?
+                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
+                        indexInfo);
+
+           state->tups_inserted += 1;
+       }
+   }
+
+   table_endscan(sscan);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   FreeExecutorState(estate);
+
+   /* These may have been pointing to the now-gone estate */
+   indexInfo->ii_ExpressionsState = NIL;
+   indexInfo->ii_PredicateState = NULL;
+}
+
+static bool
+heapam_scan_bitmap_pagescan(TableScanDesc sscan,
+                           TBMIterateResult *tbmres)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   BlockNumber page = tbmres->blockno;
+   Buffer      buffer;
+   Snapshot    snapshot;
+   int         ntup;
+
+   scan->rs_cindex = 0;
+   scan->rs_ntuples = 0;
+
+   /*
+    * Ignore any claimed entries past what we think is the end of the
+    * relation.  (This is probably not necessary given that we got at
+    * least AccessShareLock on the table before performing any of the
+    * indexscans, but let's be safe.)
+    */
+   if (page >= scan->rs_scan.rs_nblocks)
+       return false;
+
+   scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
+                                                scan->rs_scan.rs_rd,
+                                                page);
+   scan->rs_cblock = page;
+   buffer = scan->rs_cbuf;
+   snapshot = scan->rs_scan.rs_snapshot;
+
+   ntup = 0;
+
+   /*
+    * Prune and repair fragmentation for the whole page, if possible.
+    */
+   heap_page_prune_opt(scan->rs_scan.rs_rd, buffer);
+
+   /*
+    * We must hold share lock on the buffer content while examining tuple
+    * visibility.  Afterwards, however, the tuples we have found to be
+    * visible are guaranteed good as long as we hold the buffer pin.
+    */
+   LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+   /*
+    * We need two separate strategies for lossy and non-lossy cases.
+    */
+   if (tbmres->ntuples >= 0)
+   {
+       /*
+        * Bitmap is non-lossy, so we just look through the offsets listed in
+        * tbmres; but we have to follow any HOT chain starting at each such
+        * offset.
+        */
+       int         curslot;
+
+       for (curslot = 0; curslot < tbmres->ntuples; curslot++)
+       {
+           OffsetNumber offnum = tbmres->offsets[curslot];
+           ItemPointerData tid;
+           HeapTupleData heapTuple;
+
+           ItemPointerSet(&tid, page, offnum);
+           if (heap_hot_search_buffer(&tid, sscan->rs_rd, buffer, snapshot,
+                                      &heapTuple, NULL, true))
+               scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
+       }
+   }
+   else
+   {
+       /*
+        * Bitmap is lossy, so we must examine each item pointer on the page.
+        * But we can ignore HOT chains, since we'll check each tuple anyway.
+        */
+       Page        dp = (Page) BufferGetPage(buffer);
+       OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
+       OffsetNumber offnum;
+
+       for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
+       {
+           ItemId      lp;
+           HeapTupleData loctup;
+           bool        valid;
+
+           lp = PageGetItemId(dp, offnum);
+           if (!ItemIdIsNormal(lp))
+               continue;
+           loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+           loctup.t_len = ItemIdGetLength(lp);
+           loctup.t_tableOid = scan->rs_scan.rs_rd->rd_id;
+           ItemPointerSet(&loctup.t_self, page, offnum);
+           valid = HeapTupleSatisfies(&loctup, snapshot, buffer);
+           if (valid)
+           {
+               scan->rs_vistuples[ntup++] = offnum;
+               PredicateLockTuple(scan->rs_scan.rs_rd, &loctup, snapshot);
+           }
+           CheckForSerializableConflictOut(valid, scan->rs_scan.rs_rd, &loctup,
+                                           buffer, snapshot);
+       }
+   }
+
+   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+   Assert(ntup <= MaxHeapTuplesPerPage);
+   scan->rs_ntuples = ntup;
+
+   return ntup > 0;
+}
+
+static bool
+heapam_scan_bitmap_pagescan_next(TableScanDesc sscan, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   OffsetNumber targoffset;
+   Page        dp;
+   ItemId      lp;
+
+   if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
+       return false;
+
+   targoffset = scan->rs_vistuples[scan->rs_cindex];
+   dp = (Page) BufferGetPage(scan->rs_cbuf);
+   lp = PageGetItemId(dp, targoffset);
+   Assert(ItemIdIsNormal(lp));
+
+   scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
+   scan->rs_ctup.t_len = ItemIdGetLength(lp);
+   scan->rs_ctup.t_tableOid = scan->rs_scan.rs_rd->rd_id;
+   ItemPointerSet(&scan->rs_ctup.t_self, scan->rs_cblock, targoffset);
+
+   pgstat_count_heap_fetch(scan->rs_scan.rs_rd);
+
+   /*
+    * Set up the result slot to point to this tuple.  Note that the
+    * slot acquires a pin on the buffer.
+    */
+   ExecStoreBufferHeapTuple(&scan->rs_ctup,
+                            slot,
+                            scan->rs_cbuf);
+
+   scan->rs_cindex++;
+
+   return true;
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer,
+                  HeapTuple tuple,
+                  OffsetNumber tupoffset)
+{
+   if (scan->rs_scan.rs_pageatatime)
+   {
+       /*
+        * In pageatatime mode, heapgetpage() already did visibility checks,
+        * so just look at the info it left in rs_vistuples[].
+        *
+        * We use a binary search over the known-sorted array.  Note: we could
+        * save some effort if we insisted that NextSampleTuple select tuples
+        * in increasing order, but it's not clear that there would be enough
+        * gain to justify the restriction.
+        */
+       int         start = 0,
+                   end = scan->rs_ntuples - 1;
+
+       while (start <= end)
+       {
+           int         mid = (start + end) / 2;
+           OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+           if (tupoffset == curoffset)
+               return true;
+           else if (tupoffset < curoffset)
+               end = mid - 1;
+           else
+               start = mid + 1;
+       }
+
+       return false;
+   }
+   else
+   {
+       /* Otherwise, we have to check the tuple individually. */
+       return HeapTupleSatisfies(tuple, scan->rs_scan.rs_snapshot, buffer);
+   }
+}
+
+static bool
+heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno;
+
+   /* return false immediately if relation is empty */
+   if (scan->rs_scan.rs_nblocks == 0)
+       return false;
+
+   if (tsm->NextSampleBlock)
+   {
+       blockno = tsm->NextSampleBlock(scanstate, scan->rs_scan.rs_nblocks);
+       scan->rs_cblock = blockno;
+   }
+   else
+   {
+       /* scanning table sequentially */
+
+       if (scan->rs_cblock == InvalidBlockNumber)
+       {
+           Assert(!scan->rs_inited);
+           blockno = scan->rs_scan.rs_startblock;
+       }
+       else
+       {
+           Assert(scan->rs_inited);
+
+           blockno = scan->rs_cblock + 1;
+
+           if (blockno >= scan->rs_scan.rs_nblocks)
+           {
+               /* wrap to begining of rel, might not have started at 0 */
+               blockno = 0;
+           }
+
+           /*
+            * Report our new scan position for synchronization purposes.
+            *
+            * Note: we do this before checking for end of scan so that the
+            * final state of the position hint is back at the start of the
+            * rel.  That's not strictly necessary, but otherwise when you run
+            * the same query multiple times the starting position would shift
+            * a little bit backwards on every invocation, which is confusing.
+            * We don't guarantee any specific ordering in general, though.
+            */
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, blockno);
+
+           if (blockno == scan->rs_scan.rs_startblock)
+           {
+               blockno = InvalidBlockNumber;
+           }
+       }
+   }
+
+   if (!BlockNumberIsValid(blockno))
+   {
+       if (BufferIsValid(scan->rs_cbuf))
+           ReleaseBuffer(scan->rs_cbuf);
+       scan->rs_cbuf = InvalidBuffer;
+       scan->rs_cblock = InvalidBlockNumber;
+       scan->rs_inited = false;
+
+       return false;
+   }
+
+   heapgetpage(sscan, blockno);
+   scan->rs_inited = true;
+
+   return true;
+}
+
+static bool
+heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno = scan->rs_cblock;
+   bool        pagemode = scan->rs_scan.rs_pageatatime;
+
+   Page        page;
+   bool        all_visible;
+   OffsetNumber maxoffset;
+
+   ExecClearTuple(slot);
+
+   /*
+    * When not using pagemode, we must lock the buffer during tuple
+    * visibility checks.
+    */
+   if (!pagemode)
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+   page = (Page) BufferGetPage(scan->rs_cbuf);
+   all_visible = PageIsAllVisible(page) && !scan->rs_scan.rs_snapshot->takenDuringRecovery;
+   maxoffset = PageGetMaxOffsetNumber(page);
+
+   for (;;)
+   {
+       OffsetNumber tupoffset;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /* Ask the tablesample method which tuples to check on this page. */
+       tupoffset = tsm->NextSampleTuple(scanstate,
+                                        blockno,
+                                        maxoffset);
+
+       if (OffsetNumberIsValid(tupoffset))
+       {
+           ItemId      itemid;
+           bool        visible;
+           HeapTuple   tuple = &(scan->rs_ctup);
+
+           /* Skip invalid tuple pointers. */
+           itemid = PageGetItemId(page, tupoffset);
+           if (!ItemIdIsNormal(itemid))
+               continue;
+
+           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+           tuple->t_len = ItemIdGetLength(itemid);
+           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+
+           if (all_visible)
+               visible = true;
+           else
+               visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset);
+
+           /* in pagemode, heapgetpage did this for us */
+           if (!pagemode)
+               CheckForSerializableConflictOut(visible, scan->rs_scan.rs_rd, tuple,
+                                               scan->rs_cbuf, scan->rs_scan.rs_snapshot);
+
+           /* Try next tuple from same page. */
+           if (!visible)
+               continue;
+
+           ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf);
+
+           /* Found visible tuple, return it. */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           /* Count successfully-fetched tuples as heap fetches */
+           pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+           return true;
+       }
+       else
+       {
+           /*
+            * If we get here, it means we've exhausted the items on this page and
+            * it's time to move to the next.
+            */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           break;
+       }
+   }
+
+   return false;
+}
+
+static void
+heapam_scan_analyze_next_block(TableScanDesc sscan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+
+   /*
+    * We must maintain a pin on the target page's buffer to ensure that
+    * the maxoffset value stays good (else concurrent VACUUM might delete
+    * tuples out from under us).  Hence, pin the page until we are done
+    * looking at it.  We also choose to hold sharelock on the buffer
+    * throughout --- we could release and re-acquire sharelock for each
+    * tuple, but since we aren't doing much work per tuple, the extra
+    * lock traffic is probably better avoided.
+    */
+   scan->rs_cblock = blockno;
+   scan->rs_cbuf = ReadBufferExtended(scan->rs_scan.rs_rd, MAIN_FORKNUM, blockno,
+                                      RBM_NORMAL, bstrategy);
+   scan->rs_cindex = FirstOffsetNumber;
+   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+}
+
+static bool
+heapam_scan_analyze_next_tuple(TableScanDesc sscan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   Page        targpage;
+   OffsetNumber maxoffset;
+   BufferHeapTupleTableSlot *hslot;
+
+   Assert(TTS_IS_BUFFERTUPLE(slot));
+
+   hslot = (BufferHeapTupleTableSlot *) slot;
+   targpage = BufferGetPage(scan->rs_cbuf);
+   maxoffset = PageGetMaxOffsetNumber(targpage);
+
+   /* Inner loop over all tuples on the selected page */
+   for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
+   {
+       ItemId      itemid;
+       HeapTuple   targtuple = &hslot->base.tupdata;
+       bool        sample_it = false;
+
+       itemid = PageGetItemId(targpage, scan->rs_cindex);
+
+       /*
+        * We ignore unused and redirect line pointers.  DEAD line
+        * pointers should be counted as dead, because we need vacuum to
+        * run to get rid of them.  Note that this rule agrees with the
+        * way that heap_page_prune() counts things.
+        */
+       if (!ItemIdIsNormal(itemid))
+       {
+           if (ItemIdIsDead(itemid))
+               *deadrows += 1;
+           continue;
+       }
+
+       ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
+
+       targtuple->t_tableOid = RelationGetRelid(scan->rs_scan.rs_rd);
+       targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
+       targtuple->t_len = ItemIdGetLength(itemid);
+
+       switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, scan->rs_cbuf))
+       {
+           case HEAPTUPLE_LIVE:
+               sample_it = true;
+               *liverows += 1;
+               break;
+
+           case HEAPTUPLE_DEAD:
+           case HEAPTUPLE_RECENTLY_DEAD:
+               /* Count dead and recently-dead rows */
+               *deadrows += 1;
+               break;
+
+           case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+               /*
+                * Insert-in-progress rows are not counted.  We assume
+                * that when the inserting transaction commits or aborts,
+                * it will send a stats message to increment the proper
+                * count.  This works right only if that transaction ends
+                * after we finish analyzing the table; if things happen
+                * in the other order, its stats update will be
+                * overwritten by ours.  However, the error will be large
+                * only if the other transaction runs long enough to
+                * insert many tuples, so assuming it will finish after us
+                * is the safer option.
+                *
+                * A special case is that the inserting transaction might
+                * be our own.  In this case we should count and sample
+                * the row, to accommodate users who load a table and
+                * analyze it in one transaction.  (pgstat_report_analyze
+                * has to adjust the numbers we send to the stats
+                * collector to make this come out right.)
+                */
+               if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
+               {
+                   sample_it = true;
+                   *liverows += 1;
+               }
+               break;
+
+           case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+               /*
+                * We count delete-in-progress rows as still live, using
+                * the same reasoning given above; but we don't bother to
+                * include them in the sample.
+                *
+                * If the delete was done by our own transaction, however,
+                * we must count the row as dead to make
+                * pgstat_report_analyze's stats adjustments come out
+                * right.  (Note: this works out properly when the row was
+                * both inserted and deleted in our xact.)
+                */
+               if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
+                   *deadrows += 1;
+               else
+                   *liverows += 1;
+               break;
+
+           default:
+               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+               break;
+       }
+
+       if (sample_it)
+       {
+           ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+           scan->rs_cindex++;
+
+           /* note that we leave the buffer locked here! */
+           return true;
+       }
+   }
+
+   /* Now release the lock and pin on the page */
+   UnlockReleaseBuffer(scan->rs_cbuf);
+   scan->rs_cbuf = InvalidBuffer;
+
+   return false;
+}
+
+/*
+ * Reconstruct and rewrite the given tuple
+ *
+ * We cannot simply copy the tuple as-is, for several reasons:
+ *
+ * 1. We'd like to squeeze out the values of any dropped columns, both
+ * to save space and to ensure we have no corner-case failures. (It's
+ * possible for example that the new table hasn't got a TOAST table
+ * and so is unable to store any large values of dropped cols.)
+ *
+ * 2. The tuple might not even be legal for the new table; this is
+ * currently only known to happen as an after-effect of ALTER TABLE
+ * SET WITHOUT OIDS.
+ *
+ * So, we must reconstruct the tuple from component Datums.
+ */
+static void
+reform_and_rewrite_tuple(HeapTuple tuple,
+                        Relation OldHeap, Relation NewHeap,
+                        Datum *values, bool *isnull, RewriteState rwstate)
+{
+   TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
+   TupleDesc newTupDesc = RelationGetDescr(NewHeap);
+   HeapTuple   copiedTuple;
+   int         i;
+
+   heap_deform_tuple(tuple, oldTupDesc, values, isnull);
+
+   /* Be sure to null out any dropped columns */
+   for (i = 0; i < newTupDesc->natts; i++)
+   {
+       if (TupleDescAttr(newTupDesc, i)->attisdropped)
+           isnull[i] = true;
+   }
+
+   copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
+
+   /* The heap rewrite module does the rest */
+   rewrite_heap_tuple(rwstate, tuple, copiedTuple);
+
+   heap_freetuple(copiedTuple);
+}
+
+static void
+heap_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex,
+                    bool use_sort,
+                    TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                    double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
+{
+   RewriteState rwstate;
+   IndexScanDesc indexScan;
+   TableScanDesc heapScan;
+   bool        use_wal;
+   bool        is_system_catalog;
+   Tuplesortstate *tuplesort;
+   TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
+   TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
+   TupleTableSlot *slot;
+   int         natts;
+   Datum      *values;
+   bool       *isnull;
+   BufferHeapTupleTableSlot *hslot;
+
+   /* Remember if it's a system catalog */
+   is_system_catalog = IsSystemRelation(OldHeap);
+
+   /*
+    * We need to log the copied data in WAL iff WAL archiving/streaming is
+    * enabled AND it's a WAL-logged rel.
+    */
+   use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
+
+   /* use_wal off requires smgr_targblock be initially invalid */
+   Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
+
+   /* Preallocate values/isnull arrays */
+   natts = newTupDesc->natts;
+   values = (Datum *) palloc(natts * sizeof(Datum));
+   isnull = (bool *) palloc(natts * sizeof(bool));
+
+   /* Initialize the rewrite operation */
+   rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
+                                MultiXactCutoff, use_wal);
+
+
+   /* Set up sorting if wanted */
+   if (use_sort)
+       tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
+                                           maintenance_work_mem,
+                                           NULL, false);
+   else
+       tuplesort = NULL;
+
+   /*
+    * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
+    * that still need to be copied, we scan with SnapshotAny and use
+    * HeapTupleSatisfiesVacuum for the visibility test.
+    */
+   if (OldIndex != NULL && !use_sort)
+   {
+       heapScan = NULL;
+       indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
+       index_rescan(indexScan, NULL, 0, NULL, 0);
+   }
+   else
+   {
+       heapScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
+       indexScan = NULL;
+   }
+
+   slot = table_gimmegimmeslot(OldHeap, NULL);
+   hslot = (BufferHeapTupleTableSlot *) slot;
+
+   /*
+    * Scan through the OldHeap, either in OldIndex order or sequentially;
+    * copy each tuple into the NewHeap, or transiently to the tuplesort
+    * module.  Note that we don't bother sorting dead tuples (they won't get
+    * to the new table anyway).
+    */
+   for (;;)
+   {
+       bool        isdead;
+       TransactionId xid;
+
+       CHECK_FOR_INTERRUPTS();
+
+       if (indexScan != NULL)
+       {
+           if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
+               break;
+
+           /* Since we used no scan keys, should never need to recheck */
+           if (indexScan->xs_recheck)
+               elog(ERROR, "CLUSTER does not support lossy index conditions");
+       }
+       else
+       {
+           if (!table_scan_getnextslot(heapScan, ForwardScanDirection, slot))
+               break;
+       }
+
+       LockBuffer(hslot->buffer, BUFFER_LOCK_SHARE);
+
+       switch (HeapTupleSatisfiesVacuum(hslot->base.tuple, OldestXmin, hslot->buffer))
+       {
+           case HEAPTUPLE_DEAD:
+               /* Definitely dead */
+               isdead = true;
+               break;
+           case HEAPTUPLE_RECENTLY_DEAD:
+               *tups_recently_dead += 1;
+               /* fall through */
+           case HEAPTUPLE_LIVE:
+               /* Live or recently dead, must copy it */
+               isdead = false;
+               break;
+           case HEAPTUPLE_INSERT_IN_PROGRESS:
+
+               /*
+                * Since we hold exclusive lock on the relation, normally the
+                * only way to see this is if it was inserted earlier in our
+                * own transaction.  However, it can happen in system
+                * catalogs, since we tend to release write lock before commit
+                * there.  Give a warning if neither case applies; but in any
+                * case we had better copy it.
+                */
+               xid = HeapTupleHeaderGetXmin(hslot->base.tuple->t_data);
+               if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid))
+                   elog(WARNING, "concurrent insert in progress within table \"%s\"",
+                        RelationGetRelationName(OldHeap));
+               /* treat as live */
+               isdead = false;
+               break;
+           case HEAPTUPLE_DELETE_IN_PROGRESS:
+
+               /*
+                * Similar situation to INSERT_IN_PROGRESS case.
+                */
+               xid = HeapTupleHeaderGetUpdateXid(hslot->base.tuple->t_data);
+               if (!is_system_catalog && !TransactionIdIsCurrentTransactionId(xid))
+                   elog(WARNING, "concurrent delete in progress within table \"%s\"",
+                        RelationGetRelationName(OldHeap));
+               /* treat as recently dead */
+               *tups_recently_dead += 1;
+               isdead = false;
+               break;
+           default:
+               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+               isdead = false; /* keep compiler quiet */
+               break;
+       }
+
+       LockBuffer(hslot->buffer, BUFFER_LOCK_UNLOCK);
+
+       if (isdead)
+       {
+           *tups_vacuumed += 1;
+           /* heap rewrite module still needs to see it... */
+           if (rewrite_heap_dead_tuple(rwstate, ExecFetchSlotHeapTuple(slot, false, NULL)))
+           {
+               /* A previous recently-dead tuple is now known dead */
+               *tups_vacuumed += 1;
+               *tups_recently_dead -= 1;
+           }
+           continue;
+       }
+
+       *num_tuples += 1;
+       if (tuplesort != NULL)
+           tuplesort_puttupleslot(tuplesort, slot);
+       else
+           reform_and_rewrite_tuple(ExecFetchSlotHeapTuple(slot, false, NULL),
+                                    OldHeap, NewHeap,
+                                    values, isnull, rwstate);
+   }
+
+   if (indexScan != NULL)
+       index_endscan(indexScan);
+   if (heapScan != NULL)
+       table_endscan(heapScan);
+
+   ExecDropSingleTupleTableSlot(slot);
+
+   /*
+    * In scan-and-sort mode, complete the sort, then read out all live tuples
+    * from the tuplestore and write them to the new relation.
+    */
+   if (tuplesort != NULL)
+   {
+       tuplesort_performsort(tuplesort);
+
+       for (;;)
+       {
+           HeapTuple   tuple;
+
+           CHECK_FOR_INTERRUPTS();
+
+           tuple = tuplesort_getheaptuple(tuplesort, true);
+           if (tuple == NULL)
+               break;
+
+           reform_and_rewrite_tuple(tuple,
+                                    OldHeap, NewHeap,
+                                    values, isnull, rwstate);
+       }
+
+       tuplesort_end(tuplesort);
+   }
+
+   /* Write out any remaining tuples, and fsync if needed */
+   end_heap_rewrite(rwstate);
+
+   /* Clean up */
+   pfree(values);
+   pfree(isnull);
+}
+
+static const TableAmRoutine heapam_methods = {
+   .type = T_TableAmRoutine,
+
+   .slot_callbacks = heapam_slot_callbacks,
+
+   .snapshot_satisfies = heapam_satisfies,
+
+   .scan_begin = heap_beginscan,
+   .scansetlimits = heap_setscanlimits,
+   .scan_getnextslot = heap_getnextslot,
+   .scan_end = heap_endscan,
+   .scan_rescan = heap_rescan,
+   .scan_update_snapshot = heap_update_snapshot,
+
+   .scan_bitmap_pagescan = heapam_scan_bitmap_pagescan,
+   .scan_bitmap_pagescan_next = heapam_scan_bitmap_pagescan_next,
+
+   .scan_sample_next_block = heapam_scan_sample_next_block,
+   .scan_sample_next_tuple = heapam_scan_sample_next_tuple,
+
+   .tuple_fetch_row_version = heapam_fetch_row_version,
+   .tuple_fetch_follow = heapam_fetch_follow,
+   .tuple_insert = heapam_heap_insert,
+   .tuple_insert_speculative = heapam_heap_insert_speculative,
+   .tuple_complete_speculative = heapam_heap_complete_speculative,
+   .tuple_delete = heapam_heap_delete,
+   .tuple_update = heapam_heap_update,
+   .tuple_lock = heapam_lock_tuple,
+   .multi_insert = heap_multi_insert,
+
+   .tuple_get_latest_tid = heap_get_latest_tid,
+
+   .relation_vacuum = heap_vacuum_rel,
+   .scan_analyze_next_block = heapam_scan_analyze_next_block,
+   .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
+   .relation_copy_for_cluster = heap_copy_for_cluster,
+   .relation_sync = heap_sync,
+
+   .begin_index_fetch = heapam_begin_index_fetch,
+   .reset_index_fetch = heapam_reset_index_fetch,
+   .end_index_fetch = heapam_end_index_fetch,
+
+   .index_build_range_scan = IndexBuildHeapRangeScan,
+
+   .index_validate_scan = validate_index_heapscan
+};
+
+const TableAmRoutine *
+GetHeapamTableAmRoutine(void)
+{
+   return &heapam_methods;
+}
+
+Datum
+heap_tableam_handler(PG_FUNCTION_ARGS)
+{
+   PG_RETURN_POINTER(&heapam_methods);
+}
diff --git a/src/backend/utils/time/tqual.c b/src/backend/access/heap/heapam_visibility.c

similarity index 96%

rename from src/backend/utils/time/tqual.c

rename to src/backend/access/heap/heapam_visibility.c

index f7c4c9188ce2dd66a67d62cc91c1c18e4fd7518e..1ac1a20c1dc7b0846f7feedb2ed5e2e8d447ece1 100644 (file)
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -1,7 +1,6 @@
  /*-------------------------------------------------------------------------
   *
- * tqual.c
- *   POSTGRES "time qualification" code, ie, tuple visibility rules.
+ * POSTGRES "time qualification" code, ie, tuple visibility rules.
   *
   * NOTE: all the HeapTupleSatisfies routines will update the tuple's
   * "hint" status bits if we see that the inserting or deleting transaction
@@ -56,13 +55,14 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   src/backend/utils/time/tqual.c
+ *   src/backend/access/heap/heapam_visibilty.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
+#include "access/heapam.h"
  #include "access/htup_details.h"
  #include "access/multixact.h"
  #include "access/subtrans.h"
@@ -76,11 +76,9 @@
  #include "utils/snapmgr.h"
  #include "utils/tqual.h"
  
-
  /* Static variables representing various special snapshot semantics */
-SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf};
-SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny};
-
+SnapshotData SnapshotSelfData = {SELF_VISIBILITY};
+SnapshotData SnapshotAnyData = {ANY_VISIBILITY};
  
  /*
   * SetHintBits()
@@ -117,6 +115,9 @@ static inline void
  SetHintBits(HeapTupleHeader tuple, Buffer buffer,
             uint16 infomask, TransactionId xid)
  {
+   if (!BufferIsValid(buffer))
+       return;
+
     if (TransactionIdIsValid(xid))
     {
         /* NB: xid must be known committed here! */
@@ -172,7 +173,7 @@ HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
   *         (Xmax != my-transaction &&          the row was deleted by another transaction
   *          Xmax is not committed)))           that has not been committed
   */
-bool
+static bool
  HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
  {
     HeapTupleHeader tuple = htup->t_data;
@@ -342,7 +343,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
   * HeapTupleSatisfiesAny
   *     Dummy "satisfies" routine: any tuple satisfies SnapshotAny.
   */
-bool
+static bool
  HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
  {
     return true;
@@ -362,7 +363,7 @@ HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
   * Among other things, this means you can't do UPDATEs of rows in a TOAST
   * table.
   */
-bool
+static bool
  HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
                         Buffer buffer)
  {
@@ -612,7 +613,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
     {
         if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
             return HeapTupleMayBeUpdated;
-       return HeapTupleUpdated;    /* updated by other */
+       /* updated by other */
+       if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+           return HeapTupleDeleted;
+       else
+           return HeapTupleUpdated;
     }
  
     if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
@@ -653,7 +658,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
             return HeapTupleBeingUpdated;
  
         if (TransactionIdDidCommit(xmax))
-           return HeapTupleUpdated;
+       {
+           if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+               return HeapTupleDeleted;
+           else
+               return HeapTupleUpdated;
+       }
  
         /*
          * By here, the update in the Xmax is either aborted or crashed, but
@@ -709,7 +719,12 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
  
     SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
                 HeapTupleHeaderGetRawXmax(tuple));
-   return HeapTupleUpdated;    /* updated by other */
+
+   /* updated by other */
+   if (ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
+       return HeapTupleDeleted;
+   else
+       return HeapTupleUpdated;
  }
  
  /*
@@ -735,7 +750,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
   * on the insertion without aborting the whole transaction, the associated
   * token is also returned in snapshot->speculativeToken.
   */
-bool
+static bool
  HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
                         Buffer buffer)
  {
@@ -959,7 +974,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
   * inserting/deleting transaction was still running --- which was more cycles
   * and more contention on the PGXACT array.
   */
-bool
+static bool
  HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
                        Buffer buffer)
  {
@@ -1161,9 +1176,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
   * even if we see that the deleting transaction has committed.
   */
  HTSV_Result
-HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
+HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
                          Buffer buffer)
  {
+   HeapTuple   htup = (HeapTuple) stup;
     HeapTupleHeader tuple = htup->t_data;
  
     Assert(ItemPointerIsValid(&htup->t_self));
@@ -1383,18 +1399,17 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
     return HEAPTUPLE_DEAD;
  }
  
-
  /*
   * HeapTupleSatisfiesNonVacuumable
   *
- * True if tuple might be visible to some transaction; false if it's
- * surely dead to everyone, ie, vacuumable.
+ *     True if tuple might be visible to some transaction; false if it's
+ *     surely dead to everyone, ie, vacuumable.
   *
- * This is an interface to HeapTupleSatisfiesVacuum that meets the
- * SnapshotSatisfiesFunc API, so it can be used through a Snapshot.
- * snapshot->xmin must have been set up with the xmin horizon to use.
+ *     This is an interface to HeapTupleSatisfiesVacuum that meets the
+ *     SnapshotSatisfiesFunc API, so it can be used through a Snapshot.
+ *     snapshot->xmin must have been set up with the xmin horizon to use.
   */
-bool
+static bool
  HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot,
                                 Buffer buffer)
  {
@@ -1402,65 +1417,59 @@ HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot,
         != HEAPTUPLE_DEAD;
  }
  
-
  /*
- * HeapTupleIsSurelyDead
+ * Is the tuple really only locked?  That is, is it not updated?
   *
- * Cheaply determine whether a tuple is surely dead to all onlookers.
- * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
- * tuple has just been tested by another visibility routine (usually
- * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
- * should already be set.  We assume that if no hint bits are set, the xmin
- * or xmax transaction is still running.  This is therefore faster than
- * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
- * It's okay to return false when in doubt, but we must return true only
- * if the tuple is removable.
+ * It's easy to check just infomask bits if the locker is not a multi; but
+ * otherwise we need to verify that the updating transaction has not aborted.
+ *
+ * This function is here because it follows the same time qualification rules
+ * laid out at the top of this file.
   */
  bool
-HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
+HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
  {
-   HeapTupleHeader tuple = htup->t_data;
+   TransactionId xmax;
  
-   Assert(ItemPointerIsValid(&htup->t_self));
-   Assert(htup->t_tableOid != InvalidOid);
+   /* if there's no valid Xmax, then there's obviously no update either */
+   if (tuple->t_infomask & HEAP_XMAX_INVALID)
+       return true;
  
-   /*
-    * If the inserting transaction is marked invalid, then it aborted, and
-    * the tuple is definitely dead.  If it's marked neither committed nor
-    * invalid, then we assume it's still alive (since the presumption is that
-    * all relevant hint bits were just set moments ago).
-    */
-   if (!HeapTupleHeaderXminCommitted(tuple))
-       return HeapTupleHeaderXminInvalid(tuple) ? true : false;
+   if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
+       return true;
  
-   /*
-    * If the inserting transaction committed, but any deleting transaction
-    * aborted, the tuple is still alive.
-    */
-   if (tuple->t_infomask & HEAP_XMAX_INVALID)
-       return false;
+   /* invalid xmax means no update */
+   if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+       return true;
  
     /*
-    * If the XMAX is just a lock, the tuple is still alive.
+    * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
+    * necessarily have been updated
      */
-   if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
+   if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
         return false;
  
-   /*
-    * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
-    * know without checking pg_multixact.
-    */
-   if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
-       return false;
+   /* ... but if it's a multi, then perhaps the updating Xid aborted. */
+   xmax = HeapTupleGetUpdateXid(tuple);
  
-   /* If deleter isn't known to have committed, assume it's still running. */
-   if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
+   /* not LOCKED_ONLY, so it has to have an xmax */
+   Assert(TransactionIdIsValid(xmax));
+
+   if (TransactionIdIsCurrentTransactionId(xmax))
+       return false;
+   if (TransactionIdIsInProgress(xmax))
+       return false;
+   if (TransactionIdDidCommit(xmax))
         return false;
  
-   /* Deleter committed, so tuple is dead if the XID is old enough. */
-   return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
+   /*
+    * not current, not in progress, not committed -- must have aborted or
+    * crashed
+    */
+   return true;
  }
  
+
  /*
   * XidInMVCCSnapshot
   *     Is the given XID still-in-progress according to the snapshot?
@@ -1584,55 +1593,61 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
  }
  
  /*
- * Is the tuple really only locked?  That is, is it not updated?
- *
- * It's easy to check just infomask bits if the locker is not a multi; but
- * otherwise we need to verify that the updating transaction has not aborted.
+ * HeapTupleIsSurelyDead
   *
- * This function is here because it follows the same time qualification rules
- * laid out at the top of this file.
+ * Cheaply determine whether a tuple is surely dead to all onlookers.
+ * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
+ * tuple has just been tested by another visibility routine (usually
+ * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
+ * should already be set.  We assume that if no hint bits are set, the xmin
+ * or xmax transaction is still running.  This is therefore faster than
+ * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
+ * It's okay to return false when in doubt, but we must return TRUE only
+ * if the tuple is removable.
   */
  bool
-HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
+HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
  {
-   TransactionId xmax;
-
-   /* if there's no valid Xmax, then there's obviously no update either */
-   if (tuple->t_infomask & HEAP_XMAX_INVALID)
-       return true;
+   HeapTupleHeader tuple = htup->t_data;
  
-   if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
-       return true;
+   Assert(ItemPointerIsValid(&htup->t_self));
+   Assert(htup->t_tableOid != InvalidOid);
  
-   /* invalid xmax means no update */
-   if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
-       return true;
+   /*
+    * If the inserting transaction is marked invalid, then it aborted, and
+    * the tuple is definitely dead.  If it's marked neither committed nor
+    * invalid, then we assume it's still alive (since the presumption is that
+    * all relevant hint bits were just set moments ago).
+    */
+   if (!HeapTupleHeaderXminCommitted(tuple))
+       return HeapTupleHeaderXminInvalid(tuple) ? true : false;
  
     /*
-    * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
-    * necessarily have been updated
+    * If the inserting transaction committed, but any deleting transaction
+    * aborted, the tuple is still alive.
      */
-   if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
+   if (tuple->t_infomask & HEAP_XMAX_INVALID)
         return false;
  
-   /* ... but if it's a multi, then perhaps the updating Xid aborted. */
-   xmax = HeapTupleGetUpdateXid(tuple);
-
-   /* not LOCKED_ONLY, so it has to have an xmax */
-   Assert(TransactionIdIsValid(xmax));
-
-   if (TransactionIdIsCurrentTransactionId(xmax))
-       return false;
-   if (TransactionIdIsInProgress(xmax))
-       return false;
-   if (TransactionIdDidCommit(xmax))
+   /*
+    * If the XMAX is just a lock, the tuple is still alive.
+    */
+   if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
         return false;
  
     /*
-    * not current, not in progress, not committed -- must have aborted or
-    * crashed
+    * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
+    * know without checking pg_multixact.
      */
-   return true;
+   if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+       return false;
+
+   /* If deleter isn't known to have committed, assume it's still running. */
+   if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
+       return false;
+
+   /* Deleter committed, so tuple is dead if the XID is old enough. */
+   return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
  }
  
  /*
@@ -1659,7 +1674,7 @@ TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
   * dangerous to do so as the semantics of doing so during timetravel are more
   * complicated than when dealing "only" with the present.
   */
-bool
+static bool
  HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
                                Buffer buffer)
  {
@@ -1796,3 +1811,34 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
     else
         return true;
  }
+
+bool
+HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer)
+{
+   switch (snapshot->visibility_type)
+   {
+       case MVCC_VISIBILITY:
+           return HeapTupleSatisfiesMVCC(stup, snapshot, buffer);
+           break;
+       case SELF_VISIBILITY:
+           return HeapTupleSatisfiesSelf(stup, snapshot, buffer);
+           break;
+       case ANY_VISIBILITY:
+           return HeapTupleSatisfiesAny(stup, snapshot, buffer);
+           break;
+       case TOAST_VISIBILITY:
+           return HeapTupleSatisfiesToast(stup, snapshot, buffer);
+           break;
+       case DIRTY_VISIBILITY:
+           return HeapTupleSatisfiesDirty(stup, snapshot, buffer);
+           break;
+       case HISTORIC_MVCC_VISIBILITY:
+           return HeapTupleSatisfiesHistoricMVCC(stup, snapshot, buffer);
+           break;
+       case NON_VACUUMABLE_VISIBILTY:
+           return HeapTupleSatisfiesNonVacuumable(stup, snapshot, buffer);
+           break;
+   }
+
+   return false; /* keep compiler quiet */
+}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c

index 44caeca336c19a2dcbf0bc26f603a1e3cbca3546..327e277422ef803e55d58b6dd367920b550cff35 100644 (file)
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -110,6 +110,7 @@
  #include "access/heapam.h"
  #include "access/heapam_xlog.h"
  #include "access/rewriteheap.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/tuptoaster.h"
  #include "access/xact.h"
@@ -126,13 +127,13 @@
  
  #include "storage/bufmgr.h"
  #include "storage/fd.h"
+#include "storage/procarray.h"
  #include "storage/smgr.h"
  
  #include "utils/memutils.h"
  #include "utils/rel.h"
  #include "utils/tqual.h"
  
-#include "storage/procarray.h"
  
  /*
   * State associated with a rewrite operation. This is opaque to the user
@@ -357,7 +358,7 @@ end_heap_rewrite(RewriteState state)
      * wrote before the checkpoint.
      */
     if (RelationNeedsWAL(state->rs_new_rel))
-       heap_sync(state->rs_new_rel);
+       table_sync(state->rs_new_rel);
  
     logical_end_heap_rewrite(state);
  
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index d1dad998d28c9053a262212ca301e30a5ffb544f..486cde4aff8bb045245c08a5a6f70196f79418df 100644 (file)
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -32,6 +32,7 @@
  
  #include "access/genam.h"
  #include "access/heapam.h"
+#include "access/tableam.h"
  #include "access/tuptoaster.h"
  #include "access/xact.h"
  #include "catalog/catalog.h"
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c

index 9d08775687985313681bfc8b8bf5d9b5ef8b9a41..5f033c5ee46ddc25f1a8653f750b8c9ababd4476 100644 (file)
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -20,6 +20,7 @@
  #include "postgres.h"
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "catalog/index.h"
  #include "lib/stringinfo.h"
@@ -82,6 +83,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
     scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
  
     scan->heapRelation = NULL;  /* may be set later */
+   scan->xs_heapfetch = NULL;
     scan->indexRelation = indexRelation;
     scan->xs_snapshot = InvalidSnapshot;    /* caller must initialize this */
     scan->numberOfKeys = nkeys;
@@ -122,11 +124,6 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
     scan->xs_hitup = NULL;
     scan->xs_hitupdesc = NULL;
  
-   ItemPointerSetInvalid(&scan->xs_ctup.t_self);
-   scan->xs_ctup.t_data = NULL;
-   scan->xs_cbuf = InvalidBuffer;
-   scan->xs_continue_hot = false;
-
     return scan;
  }
  
@@ -334,6 +331,7 @@ systable_beginscan(Relation heapRelation,
  
     sysscan->heap_rel = heapRelation;
     sysscan->irel = irel;
+   sysscan->slot = table_gimmegimmeslot(heapRelation, NULL);
  
     if (snapshot == NULL)
     {
@@ -383,9 +381,9 @@ systable_beginscan(Relation heapRelation,
          * disadvantage; and there are no compensating advantages, because
          * it's unlikely that such scans will occur in parallel.
          */
-       sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
-                                            nkeys, key,
-                                            true, false);
+       sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
+                                               nkeys, key,
+                                               true, false);
         sysscan->iscan = NULL;
     }
  
@@ -400,15 +398,18 @@ systable_beginscan(Relation heapRelation,
   * Note that returned tuple is a reference to data in a disk buffer;
   * it must not be modified, and should be presumed inaccessible after
   * next getnext() or endscan() call.
+ *
+ * FIXME: Change to be slot based.
   */
  HeapTuple
  systable_getnext(SysScanDesc sysscan)
  {
-   HeapTuple   htup;
+   HeapTuple   htup = NULL;
  
     if (sysscan->irel)
     {
-       htup = index_getnext(sysscan->iscan, ForwardScanDirection);
+       if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
+           htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
  
         /*
          * We currently don't need to support lossy index operators for any
@@ -421,7 +422,7 @@ systable_getnext(SysScanDesc sysscan)
             elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
     }
     else
-       htup = heap_getnext(sysscan->scan, ForwardScanDirection);
+       htup = heap_scan_getnext(sysscan->scan, ForwardScanDirection);
  
     return htup;
  }
@@ -454,26 +455,26 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
  
     if (sysscan->irel)
     {
-       IndexScanDesc scan = sysscan->iscan;
+       IndexFetchHeapData *hscan = (IndexFetchHeapData *) sysscan->iscan->xs_heapfetch;
  
-       Assert(IsMVCCSnapshot(scan->xs_snapshot));
-       Assert(tup == &scan->xs_ctup);
-       Assert(BufferIsValid(scan->xs_cbuf));
+       Assert(IsMVCCSnapshot(sysscan->iscan->xs_snapshot));
+       //Assert(tup == &hscan->xs_ctup); replace by peeking into slot?
+       Assert(BufferIsValid(hscan->xs_cbuf));
         /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
-       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
-       result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
-       LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+       LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
+       result = HeapTupleSatisfies(tup, freshsnap, hscan->xs_cbuf);
+       LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
     }
     else
     {
-       HeapScanDesc scan = sysscan->scan;
+       HeapScanDesc scan = (HeapScanDesc) sysscan->scan;
  
-       Assert(IsMVCCSnapshot(scan->rs_snapshot));
-       Assert(tup == &scan->rs_ctup);
+       Assert(IsMVCCSnapshot(scan->rs_scan.rs_snapshot));
+       /* hari Assert(tup == &scan->rs_ctup); */
         Assert(BufferIsValid(scan->rs_cbuf));
         /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-       result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
+       result = HeapTupleSatisfies(tup, freshsnap, scan->rs_cbuf);
         LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
     }
     return result;
@@ -487,13 +488,19 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
  void
  systable_endscan(SysScanDesc sysscan)
  {
+   if (sysscan->slot)
+   {
+       ExecDropSingleTupleTableSlot(sysscan->slot);
+       sysscan->slot = NULL;
+   }
+
     if (sysscan->irel)
     {
         index_endscan(sysscan->iscan);
         index_close(sysscan->irel, AccessShareLock);
     }
     else
-       heap_endscan(sysscan->scan);
+       table_endscan(sysscan->scan);
  
     if (sysscan->snapshot)
         UnregisterSnapshot(sysscan->snapshot);
@@ -540,6 +547,7 @@ systable_beginscan_ordered(Relation heapRelation,
  
     sysscan->heap_rel = heapRelation;
     sysscan->irel = indexRelation;
+   sysscan->slot = table_gimmegimmeslot(heapRelation, NULL);
  
     if (snapshot == NULL)
     {
@@ -585,10 +593,12 @@ systable_beginscan_ordered(Relation heapRelation,
  HeapTuple
  systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
  {
-   HeapTuple   htup;
+   HeapTuple   htup = NULL;
  
     Assert(sysscan->irel);
-   htup = index_getnext(sysscan->iscan, direction);
+   if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
+       htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
+
     /* See notes in systable_getnext */
     if (htup && sysscan->iscan->xs_recheck)
         elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
@@ -602,6 +612,12 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
  void
  systable_endscan_ordered(SysScanDesc sysscan)
  {
+   if (sysscan->slot)
+   {
+       ExecDropSingleTupleTableSlot(sysscan->slot);
+       sysscan->slot = NULL;
+   }
+
     Assert(sysscan->irel);
     index_endscan(sysscan->iscan);
     if (sysscan->snapshot)
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c

index eade540ef5da6ec87e742c5194d9e6d86326e4fb..fe5af31f8701d6ae893be78703593cba9f7b60b5 100644 (file)
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -71,6 +71,7 @@
  
  #include "access/amapi.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xlog.h"
  #include "catalog/index.h"
@@ -235,6 +236,8 @@ index_beginscan(Relation heapRelation,
     scan->heapRelation = heapRelation;
     scan->xs_snapshot = snapshot;
  
+   scan->xs_heapfetch = table_begin_index_fetch_table(heapRelation);
+
     return scan;
  }
  
@@ -318,16 +321,12 @@ index_rescan(IndexScanDesc scan,
     Assert(nkeys == scan->numberOfKeys);
     Assert(norderbys == scan->numberOfOrderBys);
  
-   /* Release any held pin on a heap page */
-   if (BufferIsValid(scan->xs_cbuf))
-   {
-       ReleaseBuffer(scan->xs_cbuf);
-       scan->xs_cbuf = InvalidBuffer;
-   }
-
-   scan->xs_continue_hot = false;
+   /* Release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
  
     scan->kill_prior_tuple = false; /* for safety */
+   scan->xs_heap_continue = false;
  
     scan->indexRelation->rd_amroutine->amrescan(scan, keys, nkeys,
                                                 orderbys, norderbys);
@@ -343,11 +342,11 @@ index_endscan(IndexScanDesc scan)
     SCAN_CHECKS;
     CHECK_SCAN_PROCEDURE(amendscan);
  
-   /* Release any held pin on a heap page */
-   if (BufferIsValid(scan->xs_cbuf))
+   /* Release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
     {
-       ReleaseBuffer(scan->xs_cbuf);
-       scan->xs_cbuf = InvalidBuffer;
+       table_end_index_fetch_table(scan->xs_heapfetch);
+       scan->xs_heapfetch = NULL;
     }
  
     /* End the AM's scan */
@@ -380,7 +379,7 @@ index_markpos(IndexScanDesc scan)
   *     index_restrpos  - restore a scan position
   *
   * NOTE: this only restores the internal scan state of the index AM.
- * The current result tuple (scan->xs_ctup) doesn't change.  See comments
+ * The current result tuple (scan->xs_ctup) doesn't change FIXME.  See comments
   * for ExecRestrPos().
   *
   * NOTE: in the presence of HOT chains, mark/restore only works correctly
@@ -400,9 +399,12 @@ index_restrpos(IndexScanDesc scan)
     SCAN_CHECKS;
     CHECK_SCAN_PROCEDURE(amrestrpos);
  
-   scan->xs_continue_hot = false;
+   /* release resources (like buffer pins) for heap accesses */
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
  
     scan->kill_prior_tuple = false; /* for safety */
+   scan->xs_heap_continue = false;
  
     scan->indexRelation->rd_amroutine->amrestrpos(scan);
  }
@@ -483,6 +485,9 @@ index_parallelrescan(IndexScanDesc scan)
  {
     SCAN_CHECKS;
  
+   if (scan->xs_heapfetch)
+       table_reset_index_fetch_table(scan->xs_heapfetch);
+
     /* amparallelrescan is optional; assume no-op if not provided by AM */
     if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL)
         scan->indexRelation->rd_amroutine->amparallelrescan(scan);
@@ -513,6 +518,8 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys,
     scan->heapRelation = heaprel;
     scan->xs_snapshot = snapshot;
  
+   scan->xs_heapfetch = table_begin_index_fetch_table(heaprel);
+
     return scan;
  }
  
@@ -535,7 +542,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
  
     /*
      * The AM's amgettuple proc finds the next index entry matching the scan
-    * keys, and puts the TID into scan->xs_ctup.t_self.  It should also set
+    * keys, and puts the TID into scan->xs_heaptid.  It should also set
      * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we
      * pay no attention to those fields here.
      */
@@ -543,23 +550,23 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
  
     /* Reset kill flag immediately for safety */
     scan->kill_prior_tuple = false;
+   scan->xs_heap_continue = false;
  
     /* If we're out of index entries, we're done */
     if (!found)
     {
-       /* ... but first, release any held pin on a heap page */
-       if (BufferIsValid(scan->xs_cbuf))
-       {
-           ReleaseBuffer(scan->xs_cbuf);
-           scan->xs_cbuf = InvalidBuffer;
-       }
+       /* release resources (like buffer pins) for heap accesses */
+       if (scan->xs_heapfetch)
+           table_reset_index_fetch_table(scan->xs_heapfetch);
+
         return NULL;
     }
+   Assert(ItemPointerIsValid(&scan->xs_heaptid));
  
     pgstat_count_index_tuples(scan->indexRelation, 1);
  
     /* Return the TID of the tuple we found. */
-   return &scan->xs_ctup.t_self;
+   return &scan->xs_heaptid;
  }
  
  /* ----------------
@@ -580,53 +587,17 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
   * enough information to do it efficiently in the general case.
   * ----------------
   */
-HeapTuple
-index_fetch_heap(IndexScanDesc scan)
+bool
+index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot)
  {
-   ItemPointer tid = &scan->xs_ctup.t_self;
     bool        all_dead = false;
-   bool        got_heap_tuple;
-
-   /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
-   if (!scan->xs_continue_hot)
-   {
-       /* Switch to correct buffer if we don't have it already */
-       Buffer      prev_buf = scan->xs_cbuf;
-
-       scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
-                                            scan->heapRelation,
-                                            ItemPointerGetBlockNumber(tid));
+   bool        found;
  
-       /*
-        * Prune page, but only if we weren't already on this page
-        */
-       if (prev_buf != scan->xs_cbuf)
-           heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf);
-   }
+   found = table_fetch_follow(scan->xs_heapfetch, &scan->xs_heaptid, scan->xs_snapshot,
+                              slot, &scan->xs_heap_continue, &all_dead);
  
-   /* Obtain share-lock on the buffer so we can examine visibility */
-   LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
-   got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
-                                           scan->xs_cbuf,
-                                           scan->xs_snapshot,
-                                           &scan->xs_ctup,
-                                           &all_dead,
-                                           !scan->xs_continue_hot);
-   LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
-
-   if (got_heap_tuple)
-   {
-       /*
-        * Only in a non-MVCC snapshot can more than one member of the HOT
-        * chain be visible.
-        */
-       scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
+   if (found)
         pgstat_count_heap_fetch(scan->indexRelation);
-       return &scan->xs_ctup;
-   }
-
-   /* We've reached the end of the HOT chain. */
-   scan->xs_continue_hot = false;
  
     /*
      * If we scanned a whole HOT chain and found only dead tuples, tell index
@@ -638,50 +609,41 @@ index_fetch_heap(IndexScanDesc scan)
     if (!scan->xactStartedInRecovery)
         scan->kill_prior_tuple = all_dead;
  
-   return NULL;
+   return found;
  }
  
  /* ----------------
- *     index_getnext - get the next heap tuple from a scan
+ *     index_getnext_slot - get the next tuple from a scan
   *
- * The result is the next heap tuple satisfying the scan keys and the
- * snapshot, or NULL if no more matching tuples exist.
+ * The result is true if a tuple satisfying the scan keys and the snapshot was
+ * found, false otherwise.  The tuple is stored in the specified slot.
   *
   * On success, the buffer containing the heap tup is pinned (the pin will be
   * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
- * call).
+ * call).  XXX
   *
   * Note: caller must check scan->xs_recheck, and perform rechecking of the
   * scan keys if required.  We do not do that here because we don't have
   * enough information to do it efficiently in the general case.
   * ----------------
   */
-HeapTuple
-index_getnext(IndexScanDesc scan, ScanDirection direction)
+bool
+index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
  {
-   HeapTuple   heapTuple;
-   ItemPointer tid;
-
     for (;;)
     {
-       if (scan->xs_continue_hot)
-       {
-           /*
-            * We are resuming scan of a HOT chain after having returned an
-            * earlier member.  Must still hold pin on current heap page.
-            */
-           Assert(BufferIsValid(scan->xs_cbuf));
-           Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
-                  BufferGetBlockNumber(scan->xs_cbuf));
-       }
-       else
+       if (!scan->xs_heap_continue)
         {
+           ItemPointer tid;
+
             /* Time to fetch the next TID from the index */
             tid = index_getnext_tid(scan, direction);
  
             /* If we're out of index entries, we're done */
             if (tid == NULL)
                 break;
+
+           Assert(ItemPointerEquals(tid, &scan->xs_heaptid));
         }
  
         /*
@@ -689,14 +651,15 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
          * If we don't find anything, loop around and grab the next TID from
          * the index.
          */
-       heapTuple = index_fetch_heap(scan);
-       if (heapTuple != NULL)
-           return heapTuple;
+       Assert(ItemPointerIsValid(&scan->xs_heaptid));
+       if (index_fetch_heap(scan, slot))
+           return true;
     }
  
-   return NULL;                /* failure exit */
+   return false;
  }
  
+
  /* ----------------
   *     index_getbitmap - get all tuples at once from an index scan
   *
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c

index 582e5b0652d40e72272d5f941fb776a79f0ff1f3..b2ad95f9706dfec4bb9d593b5c68b7006b7aeb3e 100644 (file)
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -18,6 +18,7 @@
  #include "access/heapam.h"
  #include "access/nbtree.h"
  #include "access/nbtxlog.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xloginsert.h"
  #include "miscadmin.h"
@@ -415,8 +416,8 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
                  * that satisfies SnapshotDirty.  This is necessary because we
                  * have just a single index entry for the entire chain.
                  */
-               else if (heap_hot_search(&htid, heapRel, &SnapshotDirty,
-                                        &all_dead))
+               else if (table_fetch_follow_check(heapRel, &htid, &SnapshotDirty,
+                                                 &all_dead))
                 {
                     TransactionId xwait;
  
@@ -469,7 +470,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
                      * entry.
                      */
                     htid = itup->t_tid;
-                   if (heap_hot_search(&htid, heapRel, SnapshotSelf, NULL))
+                   if (table_fetch_follow_check(heapRel, &htid, SnapshotSelf, NULL))
                     {
                         /* Normal case --- it's still live */
                     }
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index e8725fbbe1eec84fc9b8e03092b54d96b3bbd8cd..935a412fafa4103c0b13a55745c99081185dbed2 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -310,7 +310,7 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
         if (_bt_first(scan, ForwardScanDirection))
         {
             /* Save tuple ID, and continue scanning */
-           heapTid = &scan->xs_ctup.t_self;
+           heapTid = &scan->xs_heaptid;
             tbm_add_tuples(tbm, heapTid, 1, false);
             ntids++;
  
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c

index 16223d01ec40d50089fab31ff04e3438d88f22ee..ecbf5482d8829d289fc484d3c293621f1e0ea21a 100644 (file)
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -1136,7 +1136,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
  readcomplete:
     /* OK, itemIndex says what to return */
     currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
     if (scan->xs_want_itup)
         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
  
@@ -1186,7 +1186,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
  
     /* OK, itemIndex says what to return */
     currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
     if (scan->xs_want_itup)
         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
  
@@ -1965,7 +1965,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
  
     /* OK, itemIndex says what to return */
     currItem = &so->currPos.items[so->currPos.itemIndex];
-   scan->xs_ctup.t_self = currItem->heapTid;
+   scan->xs_heaptid = currItem->heapTid;
     if (scan->xs_want_itup)
         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
  
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c

index 16f57557776908519ea376f8ea2a5803acc50131..a4b287aff774f115be69371045167d5ba947b03c 100644 (file)
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -60,6 +60,7 @@
  #include "access/nbtree.h"
  #include "access/parallel.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "access/xlog.h"
  #include "access/xloginsert.h"
@@ -71,7 +72,7 @@
  #include "utils/rel.h"
  #include "utils/sortsupport.h"
  #include "utils/tuplesort.h"
-
+#include "utils/tqual.h"
  
  /* Magic numbers for parallel state sharing */
  #define PARALLEL_KEY_BTREE_SHARED      UINT64CONST(0xA000000000000001)
@@ -159,7 +160,7 @@ typedef struct BTShared
      *
      * See _bt_parallel_estimate_shared().
      */
-   ParallelHeapScanDescData heapdesc;
+   ParallelTableScanDescData paralleldesc;
  } BTShared;
  
  /*
@@ -469,9 +470,9 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
  
     /* Fill spool using either serial or parallel heap scan */
     if (!buildstate->btleader)
-       reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                      _bt_build_callback, (void *) buildstate,
-                                      NULL);
+       reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                          _bt_build_callback, (void *) buildstate,
+                                          NULL);
     else
         reltuples = _bt_parallel_heapscan(buildstate,
                                           &indexInfo->ii_BrokenHotChain);
@@ -546,7 +547,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
  }
  
  /*
- * Per-tuple callback from IndexBuildHeapScan
+ * Per-tuple callback from table_index_build_scan
   */
  static void
  _bt_build_callback(Relation index,
@@ -1315,7 +1316,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
     btshared->havedead = false;
     btshared->indtuples = 0.0;
     btshared->brokenhotchain = false;
-   heap_parallelscan_initialize(&btshared->heapdesc, btspool->heap, snapshot);
+   table_parallelscan_initialize(&btshared->paralleldesc, btspool->heap, snapshot);
  
     /*
      * Store shared tuplesort-private state, for which we reserved space.
@@ -1410,8 +1411,8 @@ _bt_parallel_estimate_shared(Snapshot snapshot)
         return sizeof(BTShared);
     }
  
-   return add_size(offsetof(BTShared, heapdesc) +
-                   offsetof(ParallelHeapScanDescData, phs_snapshot_data),
+   return add_size(offsetof(BTShared, paralleldesc) +
+                   offsetof(ParallelTableScanDescData, phs_snapshot_data),
                     EstimateSnapshotSpace(snapshot));
  }
  
@@ -1623,7 +1624,7 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
  {
     SortCoordinate coordinate;
     BTBuildState buildstate;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     double      reltuples;
     IndexInfo  *indexInfo;
  
@@ -1676,10 +1677,10 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2,
     /* Join parallel scan */
     indexInfo = BuildIndexInfo(btspool->index);
     indexInfo->ii_Concurrent = btshared->isconcurrent;
-   scan = heap_beginscan_parallel(btspool->heap, &btshared->heapdesc);
-   reltuples = IndexBuildHeapScan(btspool->heap, btspool->index, indexInfo,
-                                  true, _bt_build_callback,
-                                  (void *) &buildstate, scan);
+   scan = table_beginscan_parallel(btspool->heap, &btshared->paralleldesc);
+   reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo,
+                                      true, _bt_build_callback,
+                                      (void *) &buildstate, scan);
  
     /*
      * Execute this worker's part of the sort.
diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c

index 7dd0d61fbbca6365dae442c4978555574368d712..2a4d0c9a54870f56e4b8357871475d5a6b2d0d37 100644 (file)
--- a/src/backend/access/spgist/spginsert.c
+++ b/src/backend/access/spgist/spginsert.c
@@ -19,6 +19,7 @@
  #include "access/genam.h"
  #include "access/spgist_private.h"
  #include "access/spgxlog.h"
+#include "access/tableam.h"
  #include "access/xlog.h"
  #include "access/xloginsert.h"
  #include "catalog/index.h"
@@ -37,7 +38,7 @@ typedef struct
  } SpGistBuildState;
  
  
-/* Callback to process one heap tuple during IndexBuildHeapScan */
+/* Callback to process one heap tuple during table_index_build_scan */
  static void
  spgistBuildCallback(Relation index, HeapTuple htup, Datum *values,
                     bool *isnull, bool tupleIsAlive, void *state)
@@ -142,9 +143,9 @@ spgbuild(Relation heap, Relation index, IndexInfo *indexInfo)
                                               "SP-GiST build temporary context",
                                               ALLOCSET_DEFAULT_SIZES);
  
-   reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
-                                  spgistBuildCallback, (void *) &buildstate,
-                                  NULL);
+   reltuples = table_index_build_scan(heap, index, indexInfo, true,
+                                      spgistBuildCallback, (void *) &buildstate,
+                                      NULL);
  
     MemoryContextDelete(buildstate.tmpCtx);
  
diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c

index c883ae95e48c0e9f61b0cc552170cf781da90662..e84be1adead6cb77f7be305e5f0f10203bc28d49 100644 (file)
--- a/src/backend/access/spgist/spgscan.c
+++ b/src/backend/access/spgist/spgscan.c
@@ -927,7 +927,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
         if (so->iPtr < so->nPtrs)
         {
             /* continuing to return reported tuples */
-           scan->xs_ctup.t_self = so->heapPtrs[so->iPtr];
+           scan->xs_heaptid = so->heapPtrs[so->iPtr];
             scan->xs_recheck = so->recheck[so->iPtr];
             scan->xs_hitup = so->reconTups[so->iPtr];
  
diff --git a/src/backend/access/table/Makefile b/src/backend/access/table/Makefile

new file mode 100644 (file)

index 0000000..006ba99
--- /dev/null
+++ b/src/backend/access/table/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for access/table
+#
+# IDENTIFICATION
+#    src/backend/access/table/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/table
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tableam.o tableamapi.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c

new file mode 100644 (file)

index 0000000..af99264
--- /dev/null
+++ b/src/backend/access/table/tableam.c
@@ -0,0 +1,187 @@
+/*----------------------------------------------------------------------
+ *
+ * tableam.c
+ *     Table access method routines too big to be inline functions.
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/table/tableam.c
+ *----------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tableam.h"
+#include "storage/bufmgr.h"
+#include "storage/shmem.h"
+
+/* GUC variable */
+bool       synchronize_seqscans = true;
+
+char *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
+
+
+/* ----------------
+ *     table_parallelscan_estimate - estimate storage for ParallelTableScanDesc
+ *
+ *     Sadly, this doesn't reduce to a constant, because the size required
+ *     to serialize the snapshot can vary.
+ * ----------------
+ */
+Size
+table_parallelscan_estimate(Snapshot snapshot)
+{
+   return add_size(offsetof(ParallelTableScanDescData, phs_snapshot_data),
+                   EstimateSnapshotSpace(snapshot));
+}
+
+/* ----------------
+ *     table_parallelscan_initialize - initialize ParallelTableScanDesc
+ *
+ *     Must allow as many bytes of shared memory as returned by
+ *     table_parallelscan_estimate.  Call this just once in the leader
+ *     process; then, individual workers attach via table_beginscan_parallel.
+ * ----------------
+ */
+void
+table_parallelscan_initialize(ParallelTableScanDesc target, Relation relation,
+                            Snapshot snapshot)
+{
+   target->phs_relid = RelationGetRelid(relation);
+   target->phs_nblocks = RelationGetNumberOfBlocks(relation);
+   /* compare phs_syncscan initialization to similar logic in initscan */
+   target->phs_syncscan = synchronize_seqscans &&
+       !RelationUsesLocalBuffers(relation) &&
+       target->phs_nblocks > NBuffers / 4;
+   SpinLockInit(&target->phs_mutex);
+   target->phs_startblock = InvalidBlockNumber;
+   pg_atomic_init_u64(&target->phs_nallocated, 0);
+   if (IsMVCCSnapshot(snapshot))
+   {
+       SerializeSnapshot(snapshot, target->phs_snapshot_data);
+       target->phs_snapshot_any = false;
+   }
+   else
+   {
+       Assert(snapshot == SnapshotAny);
+       target->phs_snapshot_any = true;
+   }
+}
+
+/* ----------------
+ *     table_parallelscan_reinitialize - reset a parallel scan
+ *
+ *     Call this in the leader process.  Caller is responsible for
+ *     making sure that all workers have finished the scan beforehand.
+ * ----------------
+ */
+void
+table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan)
+{
+   pg_atomic_write_u64(&parallel_scan->phs_nallocated, 0);
+}
+
+/* ----------------
+ *     table_parallelscan_startblock_init - find and set the scan's startblock
+ *
+ *     Determine where the parallel seq scan should start.  This function may
+ *     be called many times, once by each parallel worker.  We must be careful
+ *     only to set the startblock once.
+ * ----------------
+ */
+void
+table_parallelscan_startblock_init(TableScanDesc scan)
+{
+   BlockNumber sync_startpage = InvalidBlockNumber;
+   ParallelTableScanDesc parallel_scan;
+
+   Assert(scan->rs_parallel);
+   parallel_scan = scan->rs_parallel;
+
+retry:
+   /* Grab the spinlock. */
+   SpinLockAcquire(&parallel_scan->phs_mutex);
+
+   /*
+    * If the scan's startblock has not yet been initialized, we must do so
+    * now.  If this is not a synchronized scan, we just start at block 0, but
+    * if it is a synchronized scan, we must get the starting position from
+    * the synchronized scan machinery.  We can't hold the spinlock while
+    * doing that, though, so release the spinlock, get the information we
+    * need, and retry.  If nobody else has initialized the scan in the
+    * meantime, we'll fill in the value we fetched on the second time
+    * through.
+    */
+   if (parallel_scan->phs_startblock == InvalidBlockNumber)
+   {
+       if (!parallel_scan->phs_syncscan)
+           parallel_scan->phs_startblock = 0;
+       else if (sync_startpage != InvalidBlockNumber)
+           parallel_scan->phs_startblock = sync_startpage;
+       else
+       {
+           SpinLockRelease(&parallel_scan->phs_mutex);
+           sync_startpage = ss_get_location(scan->rs_rd, scan->rs_nblocks);
+           goto retry;
+       }
+   }
+   SpinLockRelease(&parallel_scan->phs_mutex);
+}
+
+/* ----------------
+ *     table_parallelscan_nextpage - get the next page to scan
+ *
+ *     Get the next page to scan.  Even if there are no pages left to scan,
+ *     another backend could have grabbed a page to scan and not yet finished
+ *     looking at it, so it doesn't follow that the scan is done when the
+ *     first backend gets an InvalidBlockNumber return.
+ * ----------------
+ */
+BlockNumber
+table_parallelscan_nextpage(TableScanDesc scan)
+{
+   BlockNumber page;
+   ParallelTableScanDesc parallel_scan;
+   uint64      nallocated;
+
+   Assert(scan->rs_parallel);
+   parallel_scan = scan->rs_parallel;
+
+   /*
+    * phs_nallocated tracks how many pages have been allocated to workers
+    * already.  When phs_nallocated >= rs_nblocks, all blocks have been
+    * allocated.
+    *
+    * Because we use an atomic fetch-and-add to fetch the current value, the
+    * phs_nallocated counter will exceed rs_nblocks, because workers will
+    * still increment the value, when they try to allocate the next block but
+    * all blocks have been allocated already. The counter must be 64 bits
+    * wide because of that, to avoid wrapping around when rs_nblocks is close
+    * to 2^32.
+    *
+    * The actual page to return is calculated by adding the counter to the
+    * starting block number, modulo nblocks.
+    */
+   nallocated = pg_atomic_fetch_add_u64(&parallel_scan->phs_nallocated, 1);
+   if (nallocated >= scan->rs_nblocks)
+       page = InvalidBlockNumber;  /* all blocks have been allocated */
+   else
+       page = (nallocated + parallel_scan->phs_startblock) % scan->rs_nblocks;
+
+   /*
+    * Report scan location.  Normally, we report the current page number.
+    * When we reach the end of the scan, though, we report the starting page,
+    * not the ending page, just so the starting positions for later scans
+    * doesn't slew backwards.  We only report the position at the end of the
+    * scan once, though: subsequent callers will report nothing.
+    */
+   if (scan->rs_syncscan)
+   {
+       if (page != InvalidBlockNumber)
+           ss_report_location(scan->rs_rd, page);
+       else if (nallocated == scan->rs_nblocks)
+           ss_report_location(scan->rs_rd, parallel_scan->phs_startblock);
+   }
+
+   return page;
+}
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c

new file mode 100644 (file)

index 0000000..b2d283a
--- /dev/null
+++ b/src/backend/access/table/tableamapi.c
@@ -0,0 +1,187 @@
+/*----------------------------------------------------------------------
+ *
+ * tableamapi.c
+ *     Support routines for API for Postgres table access methods
+ *
+ * FIXME: looks like this should be in amapi.c.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * src/backend/access/table/tableamapi.c
+ *----------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_proc.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+#include "utils/memutils.h"
+
+static Oid get_table_am_oid(const char *tableamname, bool missing_ok);
+
+TupleTableSlot*
+table_gimmegimmeslot(Relation relation, List **reglist)
+{
+   const TupleTableSlotOps *tts_cb;
+   TupleTableSlot *slot;
+
+   tts_cb = table_slot_callbacks(relation);
+   slot = MakeSingleTupleTableSlot(RelationGetDescr(relation), tts_cb);
+
+   if (reglist)
+       *reglist = lappend(*reglist, slot);
+
+   return slot;
+}
+
+
+/*
+ * GetTableAmRoutine
+ *     Call the specified access method handler routine to get its
+ *     TableAmRoutine struct, which will be palloc'd in the caller's
+ *     memory context.
+ */
+const TableAmRoutine *
+GetTableAmRoutine(Oid amhandler)
+{
+   Datum       datum;
+   const TableAmRoutine *routine;
+
+   datum = OidFunctionCall0(amhandler);
+   routine = (TableAmRoutine *) DatumGetPointer(datum);
+
+   if (routine == NULL || !IsA(routine, TableAmRoutine))
+       elog(ERROR, "Table access method handler %u did not return a TableAmRoutine struct",
+            amhandler);
+
+   return routine;
+}
+
+/*
+ * GetTableAmRoutineByAmId - look up the handler of the table access
+ * method with the given OID, and get its TableAmRoutine struct.
+ */
+const TableAmRoutine *
+GetTableAmRoutineByAmId(Oid amoid)
+{
+   regproc     amhandler;
+   HeapTuple   tuple;
+   Form_pg_am  amform;
+
+   /* Get handler function OID for the access method */
+   tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid));
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "cache lookup failed for access method %u",
+            amoid);
+   amform = (Form_pg_am) GETSTRUCT(tuple);
+
+   /* Check that it is a table access method */
+   if (amform->amtype != AMTYPE_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("access method \"%s\" is not of type %s",
+                       NameStr(amform->amname), "TABLE")));
+
+   amhandler = amform->amhandler;
+
+   /* Complain if handler OID is invalid */
+   if (!RegProcedureIsValid(amhandler))
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("table access method \"%s\" does not have a handler",
+                       NameStr(amform->amname))));
+
+   ReleaseSysCache(tuple);
+
+   /* And finally, call the handler function to get the API struct. */
+   return GetTableAmRoutine(amhandler);
+}
+
+/*
+ * get_table_am_oid - given a table access method name, look up the OID
+ *
+ * If missing_ok is false, throw an error if table access method name not
+ * found. If true, just return InvalidOid.
+ */
+static Oid
+get_table_am_oid(const char *tableamname, bool missing_ok)
+{
+   Oid         result;
+   Relation    rel;
+   TableScanDesc scandesc;
+   HeapTuple   tuple;
+   ScanKeyData entry[1];
+
+   /*
+    * Search pg_tablespace.  We use a heapscan here even though there is an
+    * index on name, on the theory that pg_tablespace will usually have just
+    * a few entries and so an indexed lookup is a waste of effort.
+    */
+   rel = heap_open(AccessMethodRelationId, AccessShareLock);
+
+   ScanKeyInit(&entry[0],
+               Anum_pg_am_amname,
+               BTEqualStrategyNumber, F_NAMEEQ,
+               CStringGetDatum(tableamname));
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
+
+   /* We assume that there can be at most one matching tuple */
+   if (HeapTupleIsValid(tuple) &&
+           ((Form_pg_am) GETSTRUCT(tuple))->amtype == AMTYPE_TABLE)
+       result = ((Form_pg_am) GETSTRUCT(tuple))->oid;
+   else
+       result = InvalidOid;
+
+   table_endscan(scandesc);
+   heap_close(rel, AccessShareLock);
+
+   if (!OidIsValid(result) && !missing_ok)
+       ereport(ERROR,
+               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                errmsg("table access method \"%s\" does not exist",
+                        tableamname)));
+
+   return result;
+}
+
+/* check_hook: validate new default_table_access_method */
+bool
+check_default_table_access_method(char **newval, void **extra, GucSource source)
+{
+   /*
+    * If we aren't inside a transaction, we cannot do database access so
+    * cannot verify the name.  Must accept the value on faith.
+    */
+   if (IsTransactionState())
+   {
+       if (**newval != '\0' &&
+           !OidIsValid(get_table_am_oid(*newval, true)))
+       {
+           /*
+            * When source == PGC_S_TEST, don't throw a hard error for a
+            * nonexistent table access method, only a NOTICE.
+            * See comments in guc.h.
+            */
+           if (source == PGC_S_TEST)
+           {
+               ereport(NOTICE,
+                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                        errmsg("Table access method \"%s\" does not exist",
+                               *newval)));
+           }
+           else
+           {
+               GUC_check_errdetail("Table access method \"%s\" does not exist.",
+                                   *newval);
+               return false;
+           }
+       }
+   }
+
+   return true;
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c

index 4d937b4258af037e9c335d9b48378768c59cd658..1354e3f54aaef3ac1e4d1e002801a35432ac5cc7 100644 (file)
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -56,7 +56,7 @@ static void system_beginsamplescan(SampleScanState *node,
                        Datum *params,
                        int nparams,
                        uint32 seed);
-static BlockNumber system_nextsampleblock(SampleScanState *node);
+static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
  static OffsetNumber system_nextsampletuple(SampleScanState *node,
                        BlockNumber blockno,
                        OffsetNumber maxoffset);
@@ -177,10 +177,9 @@ system_beginsamplescan(SampleScanState *node,
   * Select next block to sample.
   */
  static BlockNumber
-system_nextsampleblock(SampleScanState *node)
+system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
  {
     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
     BlockNumber nextblock = sampler->nextblock;
     uint32      hashinput[2];
  
@@ -199,7 +198,7 @@ system_nextsampleblock(SampleScanState *node)
      * Loop over block numbers until finding suitable block or reaching end of
      * relation.
      */
-   for (; nextblock < scan->rs_nblocks; nextblock++)
+   for (; nextblock < nblocks; nextblock++)
     {
         uint32      hash;
  
@@ -211,7 +210,7 @@ system_nextsampleblock(SampleScanState *node)
             break;
     }
  
-   if (nextblock < scan->rs_nblocks)
+   if (nextblock < nblocks)
     {
         /* Found a suitable block; remember where we should start next time */
         sampler->nextblock = nextblock + 1;
diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y

index 71c3714c48c9f838af6aa890e271a17fd89c56c6..e4208f406848d6602bf60c9423fd2d3cd7afc142 100644 (file)
--- a/src/backend/bootstrap/bootparse.y
+++ b/src/backend/bootstrap/bootparse.y
@@ -220,6 +220,7 @@ Boot_CreateStmt:
                                                    shared_relation ? GLOBALTABLESPACE_OID : 0,
                                                    $3,
                                                    InvalidOid,
+                                                  HEAP_TABLE_AM_OID,
                                                    tupdesc,
                                                    RELKIND_RELATION,
                                                    RELPERSISTENCE_PERMANENT,
@@ -239,6 +240,7 @@ Boot_CreateStmt:
                                                       $6,
                                                       InvalidOid,
                                                       BOOTSTRAP_SUPERUSERID,
+                                                     HEAP_TABLE_AM_OID,
                                                       tupdesc,
                                                       NIL,
                                                       RELKIND_RELATION,
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c

index 7caab64ce7808be7b18c5a555dbca9930fa49e49..c55060e8f7c98a67df00e3b3959ccd0b174e66c9 100644 (file)
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -18,6 +18,7 @@
  #include <signal.h>
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "access/xlog_internal.h"
  #include "bootstrap/bootstrap.h"
@@ -593,7 +594,7 @@ boot_openrel(char *relname)
     int         i;
     struct typmap **app;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
  
     if (strlen(relname) >= NAMEDATALEN)
@@ -603,18 +604,18 @@ boot_openrel(char *relname)
     {
         /* We can now load the pg_type data */
         rel = heap_open(TypeRelationId, NoLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
         i = 0;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
             ++i;
-       heap_endscan(scan);
+       table_endscan(scan);
         app = Typ = ALLOC(struct typmap *, i + 1);
         while (i-- > 0)
             *app++ = ALLOC(struct typmap, 1);
         *app = NULL;
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
         app = Typ;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
         {
             (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid;
             memcpy((char *) &(*app)->am_typ,
@@ -622,7 +623,7 @@ boot_openrel(char *relname)
                    sizeof((*app)->am_typ));
             app++;
         }
-       heap_endscan(scan);
+       table_endscan(scan);
         heap_close(rel, NoLock);
     }
  
@@ -905,7 +906,7 @@ gettype(char *type)
  {
     int         i;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
     struct typmap **app;
  
@@ -929,25 +930,25 @@ gettype(char *type)
         }
         elog(DEBUG4, "external type: %s", type);
         rel = heap_open(TypeRelationId, NoLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
         i = 0;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
             ++i;
-       heap_endscan(scan);
+       table_endscan(scan);
         app = Typ = ALLOC(struct typmap *, i + 1);
         while (i-- > 0)
             *app++ = ALLOC(struct typmap, 1);
         *app = NULL;
-       scan = heap_beginscan_catalog(rel, 0, NULL);
+       scan = table_beginscan_catalog(rel, 0, NULL);
         app = Typ;
-       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
         {
             (*app)->am_oid = ((Form_pg_type) GETSTRUCT(tup))->oid;
             memmove((char *) &(*app++)->am_typ,
                     (char *) GETSTRUCT(tup),
                     sizeof((*app)->am_typ));
         }
-       heap_endscan(scan);
+       table_endscan(scan);
         heap_close(rel, NoLock);
         return gettype(type);
     }
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c

index 1dd70bb9c693e9a9516ead77020cd09e9f0bbab2..a5068cef5d92eea41e44ec9d623566d6a31f3dfe 100644 (file)
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -20,6 +20,7 @@
  #include "access/genam.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "catalog/binary_upgrade.h"
@@ -822,7 +823,7 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames)
                     ScanKeyData key[2];
                     int         keycount;
                     Relation    rel;
-                   HeapScanDesc scan;
+                   TableScanDesc scan;
                     HeapTuple   tuple;
  
                     keycount = 0;
@@ -844,16 +845,16 @@ objectsInSchemaToOids(ObjectType objtype, List *nspnames)
                                     CharGetDatum(PROKIND_PROCEDURE));
  
                     rel = heap_open(ProcedureRelationId, AccessShareLock);
-                   scan = heap_beginscan_catalog(rel, keycount, key);
+                   scan = table_beginscan_catalog(rel, keycount, key);
  
-                   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+                   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
                     {
                         Oid     oid = ((Form_pg_proc) GETSTRUCT(tuple))->oid;
  
                         objects = lappend_oid(objects, oid);
                     }
  
-                   heap_endscan(scan);
+                   table_endscan(scan);
                     heap_close(rel, AccessShareLock);
                 }
                 break;
@@ -878,7 +879,7 @@ getRelationsInNamespace(Oid namespaceId, char relkind)
     List       *relations = NIL;
     ScanKeyData key[2];
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
  
     ScanKeyInit(&key[0],
@@ -891,16 +892,16 @@ getRelationsInNamespace(Oid namespaceId, char relkind)
                 CharGetDatum(relkind));
  
     rel = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 2, key);
+   scan = table_beginscan_catalog(rel, 2, key);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Oid     oid  = ((Form_pg_class) GETSTRUCT(tuple))->oid;
  
         relations = lappend_oid(relations, oid);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  
     return relations;
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl

index edc8ea9f53393166033f8513f66debdf5c6b59fc..552aa46657a60e359288083b32f6de23227b77b1 100644 (file)
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -163,7 +163,9 @@ my $BOOTSTRAP_SUPERUSERID =
  my $PG_CATALOG_NAMESPACE =
    Catalog::FindDefinedSymbolFromData($catalog_data{pg_namespace},
     'PG_CATALOG_NAMESPACE');
-
+my $PG_HEAP_AM =
+  Catalog::FindDefinedSymbolFromData($catalog_data{pg_am},
+   'HEAP_TABLE_AM_OID');
  
  # Build lookup tables for OID macro substitutions and for pg_attribute
  # copies of pg_type values.
@@ -426,6 +428,7 @@ EOM
             # (It's intentional that this can apply to parts of a field).
             $bki_values{$attname} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
             $bki_values{$attname} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
+           $bki_values{$attname} =~ s/\bPGHEAPAM\b/$PG_HEAP_AM/g;
  
             # Replace OID synonyms with OIDs per the appropriate lookup rule.
             #
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 11debaa780e7f3c47f90ba19dd003352c49324b6..f85eab1714c2a15210bcc3f07b7ec85631d11d33 100644 (file)
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -42,6 +42,7 @@
  #include "catalog/index.h"
  #include "catalog/objectaccess.h"
  #include "catalog/partition.h"
+#include "catalog/pg_am.h"
  #include "catalog/pg_attrdef.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_constraint.h"
@@ -292,6 +293,7 @@ heap_create(const char *relname,
             Oid reltablespace,
             Oid relid,
             Oid relfilenode,
+           Oid accessmtd,
             TupleDesc tupDesc,
             char relkind,
             char relpersistence,
@@ -394,6 +396,7 @@ heap_create(const char *relname,
                                      relnamespace,
                                      tupDesc,
                                      relid,
+                                    accessmtd,
                                      relfilenode,
                                      reltablespace,
                                      shared_relation,
@@ -1052,6 +1055,7 @@ heap_create_with_catalog(const char *relname,
                          Oid reltypeid,
                          Oid reloftypeid,
                          Oid ownerid,
+                        Oid accessmtd,
                          TupleDesc tupdesc,
                          List *cooked_constraints,
                          char relkind,
@@ -1193,6 +1197,7 @@ heap_create_with_catalog(const char *relname,
                                reltablespace,
                                relid,
                                InvalidOid,
+                              accessmtd,
                                tupdesc,
                                relkind,
                                relpersistence,
@@ -1349,6 +1354,22 @@ heap_create_with_catalog(const char *relname,
             referenced.objectSubId = 0;
             recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
         }
+
+       /*
+        * Make a dependency link to force the relation to be deleted if its
+        * access method is. Do this only for relation and materialized views.
+        *
+        * No need to add an explicit dependency with toast, as the original
+        * table depends on it.
+        */
+       if ((relkind == RELKIND_RELATION) ||
+               (relkind == RELKIND_MATVIEW))
+       {
+           referenced.classId = AccessMethodRelationId;
+           referenced.objectId = accessmtd;
+           referenced.objectSubId = 0;
+           recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+       }
     }
  
     /* Post creation hook for new relation */
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 8709e8c22c77edaf3ffbd2c677a5e22cb8231bff..6c541cbe6ea67ac1f36ef6316566e735134206f6 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -27,6 +27,7 @@
  #include "access/multixact.h"
  #include "access/relscan.h"
  #include "access/reloptions.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/transam.h"
  #include "access/visibilitymap.h"
@@ -81,16 +82,6 @@
  /* Potentially set by pg_upgrade_support functions */
  Oid            binary_upgrade_next_index_pg_class_oid = InvalidOid;
  
-/* state info for validate_index bulkdelete callback */
-typedef struct
-{
-   Tuplesortstate *tuplesort;  /* for sorting the index TIDs */
-   /* statistics (for debug purposes only): */
-   double      htups,
-               itups,
-               tups_inserted;
-} v_i_state;
-
  /*
   * Pointer-free representation of variables used when reindexing system
   * catalogs; we use this to propagate those values to parallel workers.
@@ -131,14 +122,7 @@ static void index_update_stats(Relation rel,
  static void IndexCheckExclusion(Relation heapRelation,
                     Relation indexRelation,
                     IndexInfo *indexInfo);
-static inline int64 itemptr_encode(ItemPointer itemptr);
-static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
  static bool validate_index_callback(ItemPointer itemptr, void *opaque);
-static void validate_index_heapscan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       Snapshot snapshot,
-                       v_i_state *state);
  static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
  static void SetReindexProcessing(Oid heapOid, Oid indexOid);
  static void ResetReindexProcessing(void);
@@ -909,6 +893,7 @@ index_create(Relation heapRelation,
                                 tableSpaceId,
                                 indexRelationId,
                                 relFileNode,
+                               accessMethodObjectId,
                                 indexTupDesc,
                                 relkind,
                                 relpersistence,
@@ -2130,7 +2115,7 @@ index_update_stats(Relation rel,
         ReindexIsProcessingHeap(RelationRelationId))
     {
         /* don't assume syscache will work */
-       HeapScanDesc pg_class_scan;
+       TableScanDesc pg_class_scan;
         ScanKeyData key[1];
  
         ScanKeyInit(&key[0],
@@ -2138,10 +2123,10 @@ index_update_stats(Relation rel,
                     BTEqualStrategyNumber, F_OIDEQ,
                     ObjectIdGetDatum(relid));
  
-       pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
-       tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
+       pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
+       tuple = heap_scan_getnext(pg_class_scan, ForwardScanDirection);
         tuple = heap_copytuple(tuple);
-       heap_endscan(pg_class_scan);
+       table_endscan(pg_class_scan);
     }
     else
     {
@@ -2397,555 +2382,6 @@ index_build(Relation heapRelation,
     SetUserIdAndSecContext(save_userid, save_sec_context);
  }
  
-
-/*
- * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
- *
- * This is called back from an access-method-specific index build procedure
- * after the AM has done whatever setup it needs.  The parent heap relation
- * is scanned to find tuples that should be entered into the index.  Each
- * such tuple is passed to the AM's callback routine, which does the right
- * things to add it to the new index.  After we return, the AM's index
- * build procedure does whatever cleanup it needs.
- *
- * The total count of live heap tuples is returned.  This is for updating
- * pg_class statistics.  (It's annoying not to be able to do that here, but we
- * want to merge that update with others; see index_update_stats.)  Note that
- * the index AM itself must keep track of the number of index tuples; we don't
- * do so here because the AM might reject some of the tuples for its own
- * reasons, such as being unable to store NULLs.
- *
- * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
- * any potentially broken HOT chains.  Currently, we set this if there are
- * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
- * trying very hard to detect whether they're really incompatible with the
- * chain tip.
- */
-double
-IndexBuildHeapScan(Relation heapRelation,
-                  Relation indexRelation,
-                  IndexInfo *indexInfo,
-                  bool allow_sync,
-                  IndexBuildCallback callback,
-                  void *callback_state,
-                  HeapScanDesc scan)
-{
-   return IndexBuildHeapRangeScan(heapRelation, indexRelation,
-                                  indexInfo, allow_sync,
-                                  false,
-                                  0, InvalidBlockNumber,
-                                  callback, callback_state, scan);
-}
-
-/*
- * As above, except that instead of scanning the complete heap, only the given
- * number of blocks are scanned.  Scan to end-of-rel can be signalled by
- * passing InvalidBlockNumber as numblocks.  Note that restricting the range
- * to scan cannot be done when requesting syncscan.
- *
- * When "anyvisible" mode is requested, all tuples visible to any transaction
- * are indexed and counted as live, including those inserted or deleted by
- * transactions that are still in progress.
- */
-double
-IndexBuildHeapRangeScan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       bool allow_sync,
-                       bool anyvisible,
-                       BlockNumber start_blockno,
-                       BlockNumber numblocks,
-                       IndexBuildCallback callback,
-                       void *callback_state,
-                       HeapScanDesc scan)
-{
-   bool        is_system_catalog;
-   bool        checking_uniqueness;
-   HeapTuple   heapTuple;
-   Datum       values[INDEX_MAX_KEYS];
-   bool        isnull[INDEX_MAX_KEYS];
-   double      reltuples;
-   ExprState  *predicate;
-   TupleTableSlot *slot;
-   EState     *estate;
-   ExprContext *econtext;
-   Snapshot    snapshot;
-   bool        need_unregister_snapshot = false;
-   TransactionId OldestXmin;
-   BlockNumber root_blkno = InvalidBlockNumber;
-   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
-
-   /*
-    * sanity checks
-    */
-   Assert(OidIsValid(indexRelation->rd_rel->relam));
-
-   /* Remember if it's a system catalog */
-   is_system_catalog = IsSystemRelation(heapRelation);
-
-   /* See whether we're verifying uniqueness/exclusion properties */
-   checking_uniqueness = (indexInfo->ii_Unique ||
-                          indexInfo->ii_ExclusionOps != NULL);
-
-   /*
-    * "Any visible" mode is not compatible with uniqueness checks; make sure
-    * only one of those is requested.
-    */
-   Assert(!(anyvisible && checking_uniqueness));
-
-   /*
-    * Need an EState for evaluation of index expressions and partial-index
-    * predicates.  Also a slot to hold the current tuple.
-    */
-   estate = CreateExecutorState();
-   econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
-
-   /* Arrange for econtext's scan tuple to be the tuple under test */
-   econtext->ecxt_scantuple = slot;
-
-   /* Set up execution state for predicate, if any. */
-   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
-
-   /*
-    * Prepare for scan of the base relation.  In a normal index build, we use
-    * SnapshotAny because we must retrieve all tuples and do our own time
-    * qual checks (because we have to index RECENTLY_DEAD tuples). In a
-    * concurrent build, or during bootstrap, we take a regular MVCC snapshot
-    * and index whatever's live according to that.
-    */
-   OldestXmin = InvalidTransactionId;
-
-   /* okay to ignore lazy VACUUMs here */
-   if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
-       OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
-
-   if (!scan)
-   {
-       /*
-        * Serial index build.
-        *
-        * Must begin our own heap scan in this case.  We may also need to
-        * register a snapshot whose lifetime is under our direct control.
-        */
-       if (!TransactionIdIsValid(OldestXmin))
-       {
-           snapshot = RegisterSnapshot(GetTransactionSnapshot());
-           need_unregister_snapshot = true;
-       }
-       else
-           snapshot = SnapshotAny;
-
-       scan = heap_beginscan_strat(heapRelation,   /* relation */
-                                   snapshot,   /* snapshot */
-                                   0,  /* number of keys */
-                                   NULL,   /* scan key */
-                                   true,   /* buffer access strategy OK */
-                                   allow_sync);    /* syncscan OK? */
-   }
-   else
-   {
-       /*
-        * Parallel index build.
-        *
-        * Parallel case never registers/unregisters own snapshot.  Snapshot
-        * is taken from parallel heap scan, and is SnapshotAny or an MVCC
-        * snapshot, based on same criteria as serial case.
-        */
-       Assert(!IsBootstrapProcessingMode());
-       Assert(allow_sync);
-       snapshot = scan->rs_snapshot;
-   }
-
-   /*
-    * Must call GetOldestXmin() with SnapshotAny.  Should never call
-    * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
-    * this for parallel builds, since ambuild routines that support parallel
-    * builds must work these details out for themselves.)
-    */
-   Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
-   Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
-          !TransactionIdIsValid(OldestXmin));
-   Assert(snapshot == SnapshotAny || !anyvisible);
-
-   /* set our scan endpoints */
-   if (!allow_sync)
-       heap_setscanlimits(scan, start_blockno, numblocks);
-   else
-   {
-       /* syncscan can only be requested on whole relation */
-       Assert(start_blockno == 0);
-       Assert(numblocks == InvalidBlockNumber);
-   }
-
-   reltuples = 0;
-
-   /*
-    * Scan all tuples in the base relation.
-    */
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
-   {
-       bool        tupleIsAlive;
-
-       CHECK_FOR_INTERRUPTS();
-
-       /*
-        * When dealing with a HOT-chain of updated tuples, we want to index
-        * the values of the live tuple (if any), but index it under the TID
-        * of the chain's root tuple.  This approach is necessary to preserve
-        * the HOT-chain structure in the heap. So we need to be able to find
-        * the root item offset for every tuple that's in a HOT-chain.  When
-        * first reaching a new page of the relation, call
-        * heap_get_root_tuples() to build a map of root item offsets on the
-        * page.
-        *
-        * It might look unsafe to use this information across buffer
-        * lock/unlock.  However, we hold ShareLock on the table so no
-        * ordinary insert/update/delete should occur; and we hold pin on the
-        * buffer continuously while visiting the page, so no pruning
-        * operation can occur either.
-        *
-        * Also, although our opinions about tuple liveness could change while
-        * we scan the page (due to concurrent transaction commits/aborts),
-        * the chain root locations won't, so this info doesn't need to be
-        * rebuilt after waiting for another transaction.
-        *
-        * Note the implied assumption that there is no more than one live
-        * tuple per HOT-chain --- else we could create more than one index
-        * entry pointing to the same root tuple.
-        */
-       if (scan->rs_cblock != root_blkno)
-       {
-           Page        page = BufferGetPage(scan->rs_cbuf);
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-           heap_get_root_tuples(page, root_offsets);
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           root_blkno = scan->rs_cblock;
-       }
-
-       if (snapshot == SnapshotAny)
-       {
-           /* do our own time qual check */
-           bool        indexIt;
-           TransactionId xwait;
-
-   recheck:
-
-           /*
-            * We could possibly get away with not locking the buffer here,
-            * since caller should hold ShareLock on the relation, but let's
-            * be conservative about it.  (This remark is still correct even
-            * with HOT-pruning: our pin on the buffer prevents pruning.)
-            */
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-           /*
-            * The criteria for counting a tuple as live in this block need to
-            * match what analyze.c's acquire_sample_rows() does, otherwise
-            * CREATE INDEX and ANALYZE may produce wildly different reltuples
-            * values, e.g. when there are many recently-dead tuples.
-            */
-           switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
-                                            scan->rs_cbuf))
-           {
-               case HEAPTUPLE_DEAD:
-                   /* Definitely dead, we can ignore it */
-                   indexIt = false;
-                   tupleIsAlive = false;
-                   break;
-               case HEAPTUPLE_LIVE:
-                   /* Normal case, index and unique-check it */
-                   indexIt = true;
-                   tupleIsAlive = true;
-                   /* Count it as live, too */
-                   reltuples += 1;
-                   break;
-               case HEAPTUPLE_RECENTLY_DEAD:
-
-                   /*
-                    * If tuple is recently deleted then we must index it
-                    * anyway to preserve MVCC semantics.  (Pre-existing
-                    * transactions could try to use the index after we finish
-                    * building it, and may need to see such tuples.)
-                    *
-                    * However, if it was HOT-updated then we must only index
-                    * the live tuple at the end of the HOT-chain.  Since this
-                    * breaks semantics for pre-existing snapshots, mark the
-                    * index as unusable for them.
-                    *
-                    * We don't count recently-dead tuples in reltuples, even
-                    * if we index them; see acquire_sample_rows().
-                    */
-                   if (HeapTupleIsHotUpdated(heapTuple))
-                   {
-                       indexIt = false;
-                       /* mark the index as unsafe for old snapshots */
-                       indexInfo->ii_BrokenHotChain = true;
-                   }
-                   else
-                       indexIt = true;
-                   /* In any case, exclude the tuple from unique-checking */
-                   tupleIsAlive = false;
-                   break;
-               case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-                   /*
-                    * In "anyvisible" mode, this tuple is visible and we
-                    * don't need any further checks.
-                    */
-                   if (anyvisible)
-                   {
-                       indexIt = true;
-                       tupleIsAlive = true;
-                       reltuples += 1;
-                       break;
-                   }
-
-                   /*
-                    * Since caller should hold ShareLock or better, normally
-                    * the only way to see this is if it was inserted earlier
-                    * in our own transaction.  However, it can happen in
-                    * system catalogs, since we tend to release write lock
-                    * before commit there.  Give a warning if neither case
-                    * applies.
-                    */
-                   xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
-                   if (!TransactionIdIsCurrentTransactionId(xwait))
-                   {
-                       if (!is_system_catalog)
-                           elog(WARNING, "concurrent insert in progress within table \"%s\"",
-                                RelationGetRelationName(heapRelation));
-
-                       /*
-                        * If we are performing uniqueness checks, indexing
-                        * such a tuple could lead to a bogus uniqueness
-                        * failure.  In that case we wait for the inserting
-                        * transaction to finish and check again.
-                        */
-                       if (checking_uniqueness)
-                       {
-                           /*
-                            * Must drop the lock on the buffer before we wait
-                            */
-                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-                           XactLockTableWait(xwait, heapRelation,
-                                             &heapTuple->t_self,
-                                             XLTW_InsertIndexUnique);
-                           CHECK_FOR_INTERRUPTS();
-                           goto recheck;
-                       }
-                   }
-                   else
-                   {
-                       /*
-                        * For consistency with acquire_sample_rows(), count
-                        * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
-                        * when inserted by our own transaction.
-                        */
-                       reltuples += 1;
-                   }
-
-                   /*
-                    * We must index such tuples, since if the index build
-                    * commits then they're good.
-                    */
-                   indexIt = true;
-                   tupleIsAlive = true;
-                   break;
-               case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-                   /*
-                    * As with INSERT_IN_PROGRESS case, this is unexpected
-                    * unless it's our own deletion or a system catalog; but
-                    * in anyvisible mode, this tuple is visible.
-                    */
-                   if (anyvisible)
-                   {
-                       indexIt = true;
-                       tupleIsAlive = false;
-                       reltuples += 1;
-                       break;
-                   }
-
-                   xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
-                   if (!TransactionIdIsCurrentTransactionId(xwait))
-                   {
-                       if (!is_system_catalog)
-                           elog(WARNING, "concurrent delete in progress within table \"%s\"",
-                                RelationGetRelationName(heapRelation));
-
-                       /*
-                        * If we are performing uniqueness checks, assuming
-                        * the tuple is dead could lead to missing a
-                        * uniqueness violation.  In that case we wait for the
-                        * deleting transaction to finish and check again.
-                        *
-                        * Also, if it's a HOT-updated tuple, we should not
-                        * index it but rather the live tuple at the end of
-                        * the HOT-chain.  However, the deleting transaction
-                        * could abort, possibly leaving this tuple as live
-                        * after all, in which case it has to be indexed. The
-                        * only way to know what to do is to wait for the
-                        * deleting transaction to finish and check again.
-                        */
-                       if (checking_uniqueness ||
-                           HeapTupleIsHotUpdated(heapTuple))
-                       {
-                           /*
-                            * Must drop the lock on the buffer before we wait
-                            */
-                           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-                           XactLockTableWait(xwait, heapRelation,
-                                             &heapTuple->t_self,
-                                             XLTW_InsertIndexUnique);
-                           CHECK_FOR_INTERRUPTS();
-                           goto recheck;
-                       }
-
-                       /*
-                        * Otherwise index it but don't check for uniqueness,
-                        * the same as a RECENTLY_DEAD tuple.
-                        */
-                       indexIt = true;
-
-                       /*
-                        * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
-                        * if they were not deleted by the current
-                        * transaction.  That's what acquire_sample_rows()
-                        * does, and we want the behavior to be consistent.
-                        */
-                       reltuples += 1;
-                   }
-                   else if (HeapTupleIsHotUpdated(heapTuple))
-                   {
-                       /*
-                        * It's a HOT-updated tuple deleted by our own xact.
-                        * We can assume the deletion will commit (else the
-                        * index contents don't matter), so treat the same as
-                        * RECENTLY_DEAD HOT-updated tuples.
-                        */
-                       indexIt = false;
-                       /* mark the index as unsafe for old snapshots */
-                       indexInfo->ii_BrokenHotChain = true;
-                   }
-                   else
-                   {
-                       /*
-                        * It's a regular tuple deleted by our own xact. Index
-                        * it, but don't check for uniqueness nor count in
-                        * reltuples, the same as a RECENTLY_DEAD tuple.
-                        */
-                       indexIt = true;
-                   }
-                   /* In any case, exclude the tuple from unique-checking */
-                   tupleIsAlive = false;
-                   break;
-               default:
-                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-                   indexIt = tupleIsAlive = false; /* keep compiler quiet */
-                   break;
-           }
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           if (!indexIt)
-               continue;
-       }
-       else
-       {
-           /* heap_getnext did the time qual check */
-           tupleIsAlive = true;
-           reltuples += 1;
-       }
-
-       MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-       /* Set up for predicate or expression evaluation */
-       ExecStoreHeapTuple(heapTuple, slot, false);
-
-       /*
-        * In a partial index, discard tuples that don't satisfy the
-        * predicate.
-        */
-       if (predicate != NULL)
-       {
-           if (!ExecQual(predicate, econtext))
-               continue;
-       }
-
-       /*
-        * For the current heap tuple, extract all the attributes we use in
-        * this index, and note which are null.  This also performs evaluation
-        * of any expressions needed.
-        */
-       FormIndexDatum(indexInfo,
-                      slot,
-                      estate,
-                      values,
-                      isnull);
-
-       /*
-        * You'd think we should go ahead and build the index tuple here, but
-        * some index AMs want to do further processing on the data first.  So
-        * pass the values[] and isnull[] arrays, instead.
-        */
-
-       if (HeapTupleIsHeapOnly(heapTuple))
-       {
-           /*
-            * For a heap-only tuple, pretend its TID is that of the root. See
-            * src/backend/access/heap/README.HOT for discussion.
-            */
-           HeapTupleData rootTuple;
-           OffsetNumber offnum;
-
-           rootTuple = *heapTuple;
-           offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
-
-           if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
-               ereport(ERROR,
-                       (errcode(ERRCODE_DATA_CORRUPTED),
-                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
-                                        ItemPointerGetBlockNumber(&heapTuple->t_self),
-                                        offnum,
-                                        RelationGetRelationName(heapRelation))));
-
-           ItemPointerSetOffsetNumber(&rootTuple.t_self,
-                                      root_offsets[offnum - 1]);
-
-           /* Call the AM's callback routine to process the tuple */
-           callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
-                    callback_state);
-       }
-       else
-       {
-           /* Call the AM's callback routine to process the tuple */
-           callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
-                    callback_state);
-       }
-   }
-
-   heap_endscan(scan);
-
-   /* we can now forget our snapshot, if set and registered by us */
-   if (need_unregister_snapshot)
-       UnregisterSnapshot(snapshot);
-
-   ExecDropSingleTupleTableSlot(slot);
-
-   FreeExecutorState(estate);
-
-   /* These may have been pointing to the now-gone estate */
-   indexInfo->ii_ExpressionsState = NIL;
-   indexInfo->ii_PredicateState = NULL;
-
-   return reltuples;
-}
-
-
  /*
   * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
   *
@@ -2963,8 +2399,7 @@ IndexCheckExclusion(Relation heapRelation,
                     Relation indexRelation,
                     IndexInfo *indexInfo)
  {
-   HeapScanDesc scan;
-   HeapTuple   heapTuple;
+   TableScanDesc scan;
     Datum       values[INDEX_MAX_KEYS];
     bool        isnull[INDEX_MAX_KEYS];
     ExprState  *predicate;
@@ -2987,8 +2422,7 @@ IndexCheckExclusion(Relation heapRelation,
      */
     estate = CreateExecutorState();
     econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
+   slot = table_gimmegimmeslot(heapRelation, NULL);
  
     /* Arrange for econtext's scan tuple to be the tuple under test */
     econtext->ecxt_scantuple = slot;
@@ -3000,22 +2434,17 @@ IndexCheckExclusion(Relation heapRelation,
      * Scan all live tuples in the base relation.
      */
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan_strat(heapRelation,   /* relation */
-                               snapshot,   /* snapshot */
-                               0,  /* number of keys */
-                               NULL,   /* scan key */
-                               true,   /* buffer access strategy OK */
-                               true);  /* syncscan OK */
-
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_strat(heapRelation,  /* relation */
+                                  snapshot,    /* snapshot */
+                                  0,   /* number of keys */
+                                  NULL,    /* scan key */
+                                  true,    /* buffer access strategy OK */
+                                  true);   /* syncscan OK */
+
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
     {
         CHECK_FOR_INTERRUPTS();
  
-       MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-       /* Set up for predicate or expression evaluation */
-       ExecStoreHeapTuple(heapTuple, slot, false);
-
         /*
          * In a partial index, ignore tuples that don't satisfy the predicate.
          */
@@ -3039,11 +2468,13 @@ IndexCheckExclusion(Relation heapRelation,
          */
         check_exclusion_constraint(heapRelation,
                                    indexRelation, indexInfo,
-                                  &(heapTuple->t_self), values, isnull,
+                                  &(slot->tts_tid), values, isnull,
                                    estate, true);
+
+       MemoryContextReset(econtext->ecxt_per_tuple_memory);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     UnregisterSnapshot(snapshot);
  
     ExecDropSingleTupleTableSlot(slot);
@@ -3126,7 +2557,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
                 indexRelation;
     IndexInfo  *indexInfo;
     IndexVacuumInfo ivinfo;
-   v_i_state   state;
+   ValidateIndexState state;
     Oid         save_userid;
     int         save_sec_context;
     int         save_nestlevel;
@@ -3187,11 +2618,11 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
     /*
      * Now scan the heap and "merge" it with the index
      */
-   validate_index_heapscan(heapRelation,
-                           indexRelation,
-                           indexInfo,
-                           snapshot,
-                           &state);
+   table_index_validate_scan(heapRelation,
+                             indexRelation,
+                             indexInfo,
+                             snapshot,
+                             &state);
  
     /* Done with tuplesort object */
     tuplesort_end(state.tuplesort);
@@ -3211,53 +2642,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
     heap_close(heapRelation, NoLock);
  }
  
-/*
- * itemptr_encode - Encode ItemPointer as int64/int8
- *
- * This representation must produce values encoded as int64 that sort in the
- * same order as their corresponding original TID values would (using the
- * default int8 opclass to produce a result equivalent to the default TID
- * opclass).
- *
- * As noted in validate_index(), this can be significantly faster.
- */
-static inline int64
-itemptr_encode(ItemPointer itemptr)
-{
-   BlockNumber block = ItemPointerGetBlockNumber(itemptr);
-   OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
-   int64       encoded;
-
-   /*
-    * Use the 16 least significant bits for the offset.  32 adjacent bits are
-    * used for the block number.  Since remaining bits are unused, there
-    * cannot be negative encoded values (We assume a two's complement
-    * representation).
-    */
-   encoded = ((uint64) block << 16) | (uint16) offset;
-
-   return encoded;
-}
-
-/*
- * itemptr_decode - Decode int64/int8 representation back to ItemPointer
- */
-static inline void
-itemptr_decode(ItemPointer itemptr, int64 encoded)
-{
-   BlockNumber block = (BlockNumber) (encoded >> 16);
-   OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
-
-   ItemPointerSet(itemptr, block, offset);
-}
-
  /*
   * validate_index_callback - bulkdelete callback to collect the index TIDs
   */
  static bool
  validate_index_callback(ItemPointer itemptr, void *opaque)
  {
-   v_i_state  *state = (v_i_state *) opaque;
+   ValidateIndexState *state = (ValidateIndexState *) opaque;
     int64       encoded = itemptr_encode(itemptr);
  
     tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
@@ -3265,243 +2656,6 @@ validate_index_callback(ItemPointer itemptr, void *opaque)
     return false;               /* never actually delete anything */
  }
  
-/*
- * validate_index_heapscan - second table scan for concurrent index build
- *
- * This has much code in common with IndexBuildHeapScan, but it's enough
- * different that it seems cleaner to have two routines not one.
- */
-static void
-validate_index_heapscan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       Snapshot snapshot,
-                       v_i_state *state)
-{
-   HeapScanDesc scan;
-   HeapTuple   heapTuple;
-   Datum       values[INDEX_MAX_KEYS];
-   bool        isnull[INDEX_MAX_KEYS];
-   ExprState  *predicate;
-   TupleTableSlot *slot;
-   EState     *estate;
-   ExprContext *econtext;
-   BlockNumber root_blkno = InvalidBlockNumber;
-   OffsetNumber root_offsets[MaxHeapTuplesPerPage];
-   bool        in_index[MaxHeapTuplesPerPage];
-
-   /* state variables for the merge */
-   ItemPointer indexcursor = NULL;
-   ItemPointerData decoded;
-   bool        tuplesort_empty = false;
-
-   /*
-    * sanity checks
-    */
-   Assert(OidIsValid(indexRelation->rd_rel->relam));
-
-   /*
-    * Need an EState for evaluation of index expressions and partial-index
-    * predicates.  Also a slot to hold the current tuple.
-    */
-   estate = CreateExecutorState();
-   econtext = GetPerTupleExprContext(estate);
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
-                                   &TTSOpsHeapTuple);
-
-   /* Arrange for econtext's scan tuple to be the tuple under test */
-   econtext->ecxt_scantuple = slot;
-
-   /* Set up execution state for predicate, if any. */
-   predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
-
-   /*
-    * Prepare for scan of the base relation.  We need just those tuples
-    * satisfying the passed-in reference snapshot.  We must disable syncscan
-    * here, because it's critical that we read from block zero forward to
-    * match the sorted TIDs.
-    */
-   scan = heap_beginscan_strat(heapRelation,   /* relation */
-                               snapshot,   /* snapshot */
-                               0,  /* number of keys */
-                               NULL,   /* scan key */
-                               true,   /* buffer access strategy OK */
-                               false); /* syncscan not OK */
-
-   /*
-    * Scan all tuples matching the snapshot.
-    */
-   while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
-   {
-       ItemPointer heapcursor = &heapTuple->t_self;
-       ItemPointerData rootTuple;
-       OffsetNumber root_offnum;
-
-       CHECK_FOR_INTERRUPTS();
-
-       state->htups += 1;
-
-       /*
-        * As commented in IndexBuildHeapScan, we should index heap-only
-        * tuples under the TIDs of their root tuples; so when we advance onto
-        * a new heap page, build a map of root item offsets on the page.
-        *
-        * This complicates merging against the tuplesort output: we will
-        * visit the live tuples in order by their offsets, but the root
-        * offsets that we need to compare against the index contents might be
-        * ordered differently.  So we might have to "look back" within the
-        * tuplesort output, but only within the current page.  We handle that
-        * by keeping a bool array in_index[] showing all the
-        * already-passed-over tuplesort output TIDs of the current page. We
-        * clear that array here, when advancing onto a new heap page.
-        */
-       if (scan->rs_cblock != root_blkno)
-       {
-           Page        page = BufferGetPage(scan->rs_cbuf);
-
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-           heap_get_root_tuples(page, root_offsets);
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-           memset(in_index, 0, sizeof(in_index));
-
-           root_blkno = scan->rs_cblock;
-       }
-
-       /* Convert actual tuple TID to root TID */
-       rootTuple = *heapcursor;
-       root_offnum = ItemPointerGetOffsetNumber(heapcursor);
-
-       if (HeapTupleIsHeapOnly(heapTuple))
-       {
-           root_offnum = root_offsets[root_offnum - 1];
-           if (!OffsetNumberIsValid(root_offnum))
-               ereport(ERROR,
-                       (errcode(ERRCODE_DATA_CORRUPTED),
-                        errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
-                                        ItemPointerGetBlockNumber(heapcursor),
-                                        ItemPointerGetOffsetNumber(heapcursor),
-                                        RelationGetRelationName(heapRelation))));
-           ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
-       }
-
-       /*
-        * "merge" by skipping through the index tuples until we find or pass
-        * the current root tuple.
-        */
-       while (!tuplesort_empty &&
-              (!indexcursor ||
-               ItemPointerCompare(indexcursor, &rootTuple) < 0))
-       {
-           Datum       ts_val;
-           bool        ts_isnull;
-
-           if (indexcursor)
-           {
-               /*
-                * Remember index items seen earlier on the current heap page
-                */
-               if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
-                   in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
-           }
-
-           tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
-                                                 &ts_val, &ts_isnull, NULL);
-           Assert(tuplesort_empty || !ts_isnull);
-           if (!tuplesort_empty)
-           {
-               itemptr_decode(&decoded, DatumGetInt64(ts_val));
-               indexcursor = &decoded;
-
-               /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
-#ifndef USE_FLOAT8_BYVAL
-               pfree(DatumGetPointer(ts_val));
-#endif
-           }
-           else
-           {
-               /* Be tidy */
-               indexcursor = NULL;
-           }
-       }
-
-       /*
-        * If the tuplesort has overshot *and* we didn't see a match earlier,
-        * then this tuple is missing from the index, so insert it.
-        */
-       if ((tuplesort_empty ||
-            ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
-           !in_index[root_offnum - 1])
-       {
-           MemoryContextReset(econtext->ecxt_per_tuple_memory);
-
-           /* Set up for predicate or expression evaluation */
-           ExecStoreHeapTuple(heapTuple, slot, false);
-
-           /*
-            * In a partial index, discard tuples that don't satisfy the
-            * predicate.
-            */
-           if (predicate != NULL)
-           {
-               if (!ExecQual(predicate, econtext))
-                   continue;
-           }
-
-           /*
-            * For the current heap tuple, extract all the attributes we use
-            * in this index, and note which are null.  This also performs
-            * evaluation of any expressions needed.
-            */
-           FormIndexDatum(indexInfo,
-                          slot,
-                          estate,
-                          values,
-                          isnull);
-
-           /*
-            * You'd think we should go ahead and build the index tuple here,
-            * but some index AMs want to do further processing on the data
-            * first. So pass the values[] and isnull[] arrays, instead.
-            */
-
-           /*
-            * If the tuple is already committed dead, you might think we
-            * could suppress uniqueness checking, but this is no longer true
-            * in the presence of HOT, because the insert is actually a proxy
-            * for a uniqueness check on the whole HOT-chain.  That is, the
-            * tuple we have here could be dead because it was already
-            * HOT-updated, and if so the updating transaction will not have
-            * thought it should insert index entries.  The index AM will
-            * check the whole HOT-chain and correctly detect a conflict if
-            * there is one.
-            */
-
-           index_insert(indexRelation,
-                        values,
-                        isnull,
-                        &rootTuple,
-                        heapRelation,
-                        indexInfo->ii_Unique ?
-                        UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
-                        indexInfo);
-
-           state->tups_inserted += 1;
-       }
-   }
-
-   heap_endscan(scan);
-
-   ExecDropSingleTupleTableSlot(slot);
-
-   FreeExecutorState(estate);
-
-   /* These may have been pointing to the now-gone estate */
-   indexInfo->ii_ExpressionsState = NIL;
-   indexInfo->ii_PredicateState = NULL;
-}
-
-
  /*
   * index_set_state_flags - adjust pg_index state flags
   *
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c

index 558022647ccc2af182f549328e48bd250a0444a1..4e6c5df15802d88c377a81e8a215fe5099d017d3 100644 (file)
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -17,6 +17,7 @@
  #include "access/genam.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/tupconvert.h"
  #include "access/sysattr.h"
  #include "catalog/indexing.h"
diff --git a/src/backend/catalog/pg_conversion.c b/src/backend/catalog/pg_conversion.c

index c692958d917188a341281b84661a05e80986f1c1..d668c9b02188373127ad4ab22a5417d8029d505a 100644 (file)
--- a/src/backend/catalog/pg_conversion.c
+++ b/src/backend/catalog/pg_conversion.c
@@ -16,6 +16,7 @@
  
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "catalog/catalog.h"
  #include "catalog/dependency.h"
@@ -153,7 +154,7 @@ RemoveConversionById(Oid conversionOid)
  {
     Relation    rel;
     HeapTuple   tuple;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     ScanKeyData scanKeyData;
  
     ScanKeyInit(&scanKeyData,
@@ -164,14 +165,14 @@ RemoveConversionById(Oid conversionOid)
     /* open pg_conversion */
     rel = heap_open(ConversionRelationId, RowExclusiveLock);
  
-   scan = heap_beginscan_catalog(rel, 1, &scanKeyData);
+   scan = table_beginscan_catalog(rel, 1, &scanKeyData);
  
     /* search for the target tuple */
-   if (HeapTupleIsValid(tuple = heap_getnext(scan, ForwardScanDirection)))
+   if (HeapTupleIsValid(tuple = heap_scan_getnext(scan, ForwardScanDirection)))
         CatalogTupleDelete(rel, &tuple->t_self);
     else
         elog(ERROR, "could not find tuple for conversion %u", conversionOid);
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, RowExclusiveLock);
  }
  
diff --git a/src/backend/catalog/pg_db_role_setting.c b/src/backend/catalog/pg_db_role_setting.c

index e123691923c728be89a774b1359133822319f13d..fec0c9e2ec209c82923b4eac6856bc1df63d6cd8 100644 (file)
--- a/src/backend/catalog/pg_db_role_setting.c
+++ b/src/backend/catalog/pg_db_role_setting.c
@@ -13,6 +13,7 @@
  #include "access/genam.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "catalog/indexing.h"
  #include "catalog/objectaccess.h"
  #include "catalog/pg_db_role_setting.h"
@@ -170,7 +171,7 @@ void
  DropSetting(Oid databaseid, Oid roleid)
  {
     Relation    relsetting;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     ScanKeyData keys[2];
     HeapTuple   tup;
     int         numkeys = 0;
@@ -196,12 +197,12 @@ DropSetting(Oid databaseid, Oid roleid)
         numkeys++;
     }
  
-   scan = heap_beginscan_catalog(relsetting, numkeys, keys);
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   scan = table_beginscan_catalog(relsetting, numkeys, keys);
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
     {
         CatalogTupleDelete(relsetting, &tup->t_self);
     }
-   heap_endscan(scan);
+   table_endscan(scan);
  
     heap_close(relsetting, RowExclusiveLock);
  }
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c

index a9fbb731654eb069e0e5c3e0a9426991c64d11a4..51183f9a2aea2e27cfa6677109631417ec3cfeda 100644 (file)
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -21,6 +21,7 @@
  #include "access/hash.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  
  #include "catalog/catalog.h"
@@ -329,7 +330,7 @@ GetAllTablesPublicationRelations(void)
  {
     Relation    classRel;
     ScanKeyData key[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
     List       *result = NIL;
  
@@ -340,9 +341,9 @@ GetAllTablesPublicationRelations(void)
                 BTEqualStrategyNumber, F_CHAREQ,
                 CharGetDatum(RELKIND_RELATION));
  
-   scan = heap_beginscan_catalog(classRel, 1, key);
+   scan = table_beginscan_catalog(classRel, 1, key);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
         Oid         relid = relForm->oid;
@@ -351,7 +352,7 @@ GetAllTablesPublicationRelations(void)
             result = lappend_oid(result, relid);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(classRel, AccessShareLock);
  
     return result;
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c

index e136aa6a0b22de2e4575a63b418f9704406e433c..f1dc998f48907476b6af5e11ab48924e334adf21 100644 (file)
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -19,6 +19,7 @@
  #include "access/genam.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  
  #include "catalog/indexing.h"
@@ -390,7 +391,7 @@ void
  RemoveSubscriptionRel(Oid subid, Oid relid)
  {
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     ScanKeyData skey[2];
     HeapTuple   tup;
     int         nkeys = 0;
@@ -416,12 +417,12 @@ RemoveSubscriptionRel(Oid subid, Oid relid)
     }
  
     /* Do the search and delete what we found. */
-   scan = heap_beginscan_catalog(rel, nkeys, skey);
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   scan = table_beginscan_catalog(rel, nkeys, skey);
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
     {
         CatalogTupleDelete(rel, &tup->t_self);
     }
-   heap_endscan(scan);
+   table_endscan(scan);
  
     heap_close(rel, RowExclusiveLock);
  }
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c

index 462969a83843c2021340427a812c58f805774c79..60143f0e44d9ec6b6f889ce857b8fd349f309086 100644 (file)
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -266,6 +266,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
                                            toast_typid,
                                            InvalidOid,
                                            rel->rd_rel->relowner,
+                                          rel->rd_rel->relam,
                                            tupdesc,
                                            NIL,
                                            RELKIND_TOASTVALUE,
diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c

index 4367290a27cab24103f60d3cf643f4c55f7938c4..b55f8e708bbf46e8cf9fbf8e98d21ac31ebc0467 100644 (file)
--- a/src/backend/commands/amcmds.c
+++ b/src/backend/commands/amcmds.c
@@ -30,7 +30,7 @@
  #include "utils/syscache.h"
  
  
-static Oid lookup_index_am_handler_func(List *handler_name, char amtype);
+static Oid lookup_am_handler_func(List *handler_name, char amtype);
  static const char *get_am_type_string(char amtype);
  
  
@@ -74,7 +74,7 @@ CreateAccessMethod(CreateAmStmt *stmt)
     /*
      * Get the handler function oid, verifying the AM type while at it.
      */
-   amhandler = lookup_index_am_handler_func(stmt->handler_name, stmt->amtype);
+   amhandler = lookup_am_handler_func(stmt->handler_name, stmt->amtype);
  
     /*
      * Insert tuple into pg_am.
@@ -229,6 +229,8 @@ get_am_type_string(char amtype)
     {
         case AMTYPE_INDEX:
             return "INDEX";
+       case AMTYPE_TABLE:
+           return "TABLE";
         default:
             /* shouldn't happen */
             elog(ERROR, "invalid access method type '%c'", amtype);
@@ -243,7 +245,7 @@ get_am_type_string(char amtype)
   * This function either return valid function Oid or throw an error.
   */
  static Oid
-lookup_index_am_handler_func(List *handler_name, char amtype)
+lookup_am_handler_func(List *handler_name, char amtype)
  {
     Oid         handlerOid;
     static const Oid funcargtypes[1] = {INTERNALOID};
@@ -267,6 +269,15 @@ lookup_index_am_handler_func(List *handler_name, char amtype)
                                 NameListToString(handler_name),
                                 "index_am_handler")));
             break;
+           /* XXX refactor duplicate error */
+       case AMTYPE_TABLE:
+           if (get_func_rettype(handlerOid) != TABLE_AM_HANDLEROID)
+               ereport(ERROR,
+                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                        errmsg("function %s must return type %s",
+                               NameListToString(handler_name),
+                               "storage_am_handler")));
+           break;
         default:
             elog(ERROR, "unrecognized access method type \"%c\"", amtype);
     }
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index b8445dc372881acb4cd314c19072e0fa41b1a588..29e2377b52008d49f3687dc7838c6d605d53820c 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -18,6 +18,7 @@
  
  #include "access/multixact.h"
  #include "access/sysattr.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/tupconvert.h"
  #include "access/tuptoaster.h"
@@ -1000,6 +1001,8 @@ acquire_sample_rows(Relation onerel, int elevel,
     TransactionId OldestXmin;
     BlockSamplerData bs;
     ReservoirStateData rstate;
+   TupleTableSlot *slot;
+   TableScanDesc scan;
  
     Assert(targrows > 0);
  
@@ -1013,178 +1016,72 @@ acquire_sample_rows(Relation onerel, int elevel,
     /* Prepare for sampling rows */
     reservoir_init_selection_state(&rstate, targrows);
  
+   scan = table_beginscan_analyze(onerel);
+   slot = table_gimmegimmeslot(onerel, NULL);
+
     /* Outer loop over blocks to sample */
     while (BlockSampler_HasMore(&bs))
     {
         BlockNumber targblock = BlockSampler_Next(&bs);
-       Buffer      targbuffer;
-       Page        targpage;
-       OffsetNumber targoffset,
-                   maxoffset;
  
         vacuum_delay_point();
  
         /*
-        * We must maintain a pin on the target page's buffer to ensure that
-        * the maxoffset value stays good (else concurrent VACUUM might delete
-        * tuples out from under us).  Hence, pin the page until we are done
-        * looking at it.  We also choose to hold sharelock on the buffer
-        * throughout --- we could release and re-acquire sharelock for each
-        * tuple, but since we aren't doing much work per tuple, the extra
-        * lock traffic is probably better avoided.
+        * XXX: we could have this function return a boolean, instead of
+        * forcing such checks to happen in next_tuple().
          */
-       targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
-                                       RBM_NORMAL, vac_strategy);
-       LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
-       targpage = BufferGetPage(targbuffer);
-       maxoffset = PageGetMaxOffsetNumber(targpage);
-
-       /* Inner loop over all tuples on the selected page */
-       for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
-       {
-           ItemId      itemid;
-           HeapTupleData targtuple;
-           bool        sample_it = false;
-
-           itemid = PageGetItemId(targpage, targoffset);
+       table_scan_analyze_next_block(scan, targblock, vac_strategy);
  
+       while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+       {
             /*
-            * We ignore unused and redirect line pointers.  DEAD line
-            * pointers should be counted as dead, because we need vacuum to
-            * run to get rid of them.  Note that this rule agrees with the
-            * way that heap_page_prune() counts things.
+            * The first targrows sample rows are simply copied into the
+            * reservoir. Then we start replacing tuples in the sample
+            * until we reach the end of the relation.  This algorithm is
+            * from Jeff Vitter's paper (see full citation below). It
+            * works by repeatedly computing the number of tuples to skip
+            * before selecting a tuple, which replaces a randomly chosen
+            * element of the reservoir (current set of tuples).  At all
+            * times the reservoir is a true random sample of the tuples
+            * we've passed over so far, so when we fall off the end of
+            * the relation we're done.
              */
-           if (!ItemIdIsNormal(itemid))
-           {
-               if (ItemIdIsDead(itemid))
-                   deadrows += 1;
-               continue;
-           }
-
-           ItemPointerSet(&targtuple.t_self, targblock, targoffset);
-
-           targtuple.t_tableOid = RelationGetRelid(onerel);
-           targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
-           targtuple.t_len = ItemIdGetLength(itemid);
-
-           switch (HeapTupleSatisfiesVacuum(&targtuple,
-                                            OldestXmin,
-                                            targbuffer))
-           {
-               case HEAPTUPLE_LIVE:
-                   sample_it = true;
-                   liverows += 1;
-                   break;
-
-               case HEAPTUPLE_DEAD:
-               case HEAPTUPLE_RECENTLY_DEAD:
-                   /* Count dead and recently-dead rows */
-                   deadrows += 1;
-                   break;
-
-               case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-                   /*
-                    * Insert-in-progress rows are not counted.  We assume
-                    * that when the inserting transaction commits or aborts,
-                    * it will send a stats message to increment the proper
-                    * count.  This works right only if that transaction ends
-                    * after we finish analyzing the table; if things happen
-                    * in the other order, its stats update will be
-                    * overwritten by ours.  However, the error will be large
-                    * only if the other transaction runs long enough to
-                    * insert many tuples, so assuming it will finish after us
-                    * is the safer option.
-                    *
-                    * A special case is that the inserting transaction might
-                    * be our own.  In this case we should count and sample
-                    * the row, to accommodate users who load a table and
-                    * analyze it in one transaction.  (pgstat_report_analyze
-                    * has to adjust the numbers we send to the stats
-                    * collector to make this come out right.)
-                    */
-                   if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
-                   {
-                       sample_it = true;
-                       liverows += 1;
-                   }
-                   break;
-
-               case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-                   /*
-                    * We count delete-in-progress rows as still live, using
-                    * the same reasoning given above; but we don't bother to
-                    * include them in the sample.
-                    *
-                    * If the delete was done by our own transaction, however,
-                    * we must count the row as dead to make
-                    * pgstat_report_analyze's stats adjustments come out
-                    * right.  (Note: this works out properly when the row was
-                    * both inserted and deleted in our xact.)
-                    */
-                   if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
-                       deadrows += 1;
-                   else
-                       liverows += 1;
-                   break;
-
-               default:
-                   elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-                   break;
-           }
-
-           if (sample_it)
+           if (numrows < targrows)
+               rows[numrows++] = ExecCopySlotHeapTuple(slot);
+           else
             {
                 /*
-                * The first targrows sample rows are simply copied into the
-                * reservoir. Then we start replacing tuples in the sample
-                * until we reach the end of the relation.  This algorithm is
-                * from Jeff Vitter's paper (see full citation below). It
-                * works by repeatedly computing the number of tuples to skip
-                * before selecting a tuple, which replaces a randomly chosen
-                * element of the reservoir (current set of tuples).  At all
-                * times the reservoir is a true random sample of the tuples
-                * we've passed over so far, so when we fall off the end of
-                * the relation we're done.
+                * t in Vitter's paper is the number of records already
+                * processed.  If we need to compute a new S value, we
+                * must use the not-yet-incremented value of samplerows as
+                * t.
                  */
-               if (numrows < targrows)
-                   rows[numrows++] = heap_copytuple(&targtuple);
-               else
+               if (rowstoskip < 0)
+                   rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+
+               if (rowstoskip <= 0)
                 {
                     /*
-                    * t in Vitter's paper is the number of records already
-                    * processed.  If we need to compute a new S value, we
-                    * must use the not-yet-incremented value of samplerows as
-                    * t.
+                    * Found a suitable tuple, so save it, replacing one
+                    * old tuple at random
                      */
-                   if (rowstoskip < 0)
-                       rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
+                   int         k = (int) (targrows * sampler_random_fract(rstate.randstate));
  
-                   if (rowstoskip <= 0)
-                   {
-                       /*
-                        * Found a suitable tuple, so save it, replacing one
-                        * old tuple at random
-                        */
-                       int         k = (int) (targrows * sampler_random_fract(rstate.randstate));
-
-                       Assert(k >= 0 && k < targrows);
-                       heap_freetuple(rows[k]);
-                       rows[k] = heap_copytuple(&targtuple);
-                   }
-
-                   rowstoskip -= 1;
+                   Assert(k >= 0 && k < targrows);
+                   heap_freetuple(rows[k]);
+                   rows[k] = ExecCopySlotHeapTuple(slot);
                 }
  
-               samplerows += 1;
+               rowstoskip -= 1;
             }
-       }
  
-       /* Now release the lock and pin on the page */
-       UnlockReleaseBuffer(targbuffer);
+           samplerows += 1;
+       }
     }
  
+   ExecDropSingleTupleTableSlot(slot);
+   table_endscan(scan);
+
     /*
      * If we didn't find as many tuples as we wanted then we're done. No sort
      * is needed, since they're already in order.
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c

index 610e425a566d5621784ff386df024fdc84959f56..1b8d03642c54482c36207f3559e1bac7418a74b3 100644 (file)
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -20,7 +20,7 @@
  #include "access/amapi.h"
  #include "access/multixact.h"
  #include "access/relscan.h"
-#include "access/rewriteheap.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/tuptoaster.h"
  #include "access/xact.h"
@@ -41,7 +41,6 @@
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
  #include "storage/predicate.h"
-#include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/fmgroids.h"
  #include "utils/inval.h"
@@ -68,14 +67,10 @@ typedef struct
  
  
  static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
-static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
+static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
                bool verbose, bool *pSwapToastByContent,
                TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
  static List *get_tables_to_cluster(MemoryContext cluster_context);
-static void reform_and_rewrite_tuple(HeapTuple tuple,
-                        TupleDesc oldTupDesc, TupleDesc newTupDesc,
-                        Datum *values, bool *isnull,
-                        RewriteState rwstate);
  
  
  /*---------------------------------------------------------------------------
@@ -597,7 +592,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
                                AccessExclusiveLock);
  
     /* Copy the heap data into the new table in the desired order */
-   copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose,
+   copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
                    &swap_toast_by_content, &frozenXid, &cutoffMulti);
  
     /*
@@ -682,6 +677,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
                                           InvalidOid,
                                           InvalidOid,
                                           OldHeap->rd_rel->relowner,
+                                         OldHeap->rd_rel->relam,
                                           OldHeapDesc,
                                           NIL,
                                           RELKIND_RELATION,
@@ -739,7 +735,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
  }
  
  /*
- * Do the physical copying of heap data.
+ * Do the physical copying of table data.
   *
   * There are three output parameters:
   * *pSwapToastByContent is set true if toast tables must be swapped by content.
@@ -747,7 +743,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
   * *pCutoffMulti receives the MultiXactId used as a cutoff point.
   */
  static void
-copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                bool *pSwapToastByContent, TransactionId *pFreezeXid,
                MultiXactId *pCutoffMulti)
  {
@@ -757,21 +753,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     Relation    relRelation;
     HeapTuple   reltup;
     Form_pg_class relform;
-   TupleDesc   oldTupDesc;
-   TupleDesc   newTupDesc;
-   int         natts;
-   Datum      *values;
-   bool       *isnull;
-   IndexScanDesc indexScan;
-   HeapScanDesc heapScan;
-   bool        use_wal;
-   bool        is_system_catalog;
+   TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
+   TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     TransactionId OldestXmin;
     TransactionId FreezeXid;
     MultiXactId MultiXactCutoff;
-   RewriteState rwstate;
     bool        use_sort;
-   Tuplesortstate *tuplesort;
     double      num_tuples = 0,
                 tups_vacuumed = 0,
                 tups_recently_dead = 0;
@@ -799,11 +786,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     newTupDesc = RelationGetDescr(NewHeap);
     Assert(newTupDesc->natts == oldTupDesc->natts);
  
-   /* Preallocate values/isnull arrays */
-   natts = newTupDesc->natts;
-   values = (Datum *) palloc(natts * sizeof(Datum));
-   isnull = (bool *) palloc(natts * sizeof(bool));
-
     /*
      * If the OldHeap has a toast table, get lock on the toast table to keep
      * it from being vacuumed.  This is needed because autovacuum processes
@@ -820,15 +802,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     if (OldHeap->rd_rel->reltoastrelid)
         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
  
-   /*
-    * We need to log the copied data in WAL iff WAL archiving/streaming is
-    * enabled AND it's a WAL-logged rel.
-    */
-   use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
-
-   /* use_wal off requires smgr_targblock be initially invalid */
-   Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
-
     /*
      * If both tables have TOAST tables, perform toast swap by content.  It is
      * possible that the old table has a toast table but the new one doesn't,
@@ -889,13 +862,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     *pFreezeXid = FreezeXid;
     *pCutoffMulti = MultiXactCutoff;
  
-   /* Remember if it's a system catalog */
-   is_system_catalog = IsSystemRelation(OldHeap);
-
-   /* Initialize the rewrite operation */
-   rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
-                                MultiXactCutoff, use_wal);
-
     /*
      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
@@ -908,39 +874,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     else
         use_sort = false;
  
-   /* Set up sorting if wanted */
-   if (use_sort)
-       tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
-                                           maintenance_work_mem,
-                                           NULL, false);
-   else
-       tuplesort = NULL;
-
-   /*
-    * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
-    * that still need to be copied, we scan with SnapshotAny and use
-    * HeapTupleSatisfiesVacuum for the visibility test.
-    */
-   if (OldIndex != NULL && !use_sort)
-   {
-       heapScan = NULL;
-       indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
-       index_rescan(indexScan, NULL, 0, NULL, 0);
-   }
-   else
-   {
-       heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
-       indexScan = NULL;
-   }
-
     /* Log what we're doing */
-   if (indexScan != NULL)
+   if (OldIndex != NULL && !use_sort)
         ereport(elevel,
                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
                         get_namespace_name(RelationGetNamespace(OldHeap)),
                         RelationGetRelationName(OldHeap),
                         RelationGetRelationName(OldIndex))));
-   else if (tuplesort != NULL)
+   else if (use_sort)
         ereport(elevel,
                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
                         get_namespace_name(RelationGetNamespace(OldHeap)),
@@ -952,150 +893,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                         RelationGetRelationName(OldHeap))));
  
     /*
-    * Scan through the OldHeap, either in OldIndex order or sequentially;
-    * copy each tuple into the NewHeap, or transiently to the tuplesort
-    * module.  Note that we don't bother sorting dead tuples (they won't get
-    * to the new table anyway).
+    * Hand of the actual copying to AM specific function, the generic code
+    * cannot know how to deal with visibility across AMs.
      */
-   for (;;)
-   {
-       HeapTuple   tuple;
-       Buffer      buf;
-       bool        isdead;
-
-       CHECK_FOR_INTERRUPTS();
-
-       if (indexScan != NULL)
-       {
-           tuple = index_getnext(indexScan, ForwardScanDirection);
-           if (tuple == NULL)
-               break;
-
-           /* Since we used no scan keys, should never need to recheck */
-           if (indexScan->xs_recheck)
-               elog(ERROR, "CLUSTER does not support lossy index conditions");
-
-           buf = indexScan->xs_cbuf;
-       }
-       else
-       {
-           tuple = heap_getnext(heapScan, ForwardScanDirection);
-           if (tuple == NULL)
-               break;
-
-           buf = heapScan->rs_cbuf;
-       }
-
-       LockBuffer(buf, BUFFER_LOCK_SHARE);
-
-       switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
-       {
-           case HEAPTUPLE_DEAD:
-               /* Definitely dead */
-               isdead = true;
-               break;
-           case HEAPTUPLE_RECENTLY_DEAD:
-               tups_recently_dead += 1;
-               /* fall through */
-           case HEAPTUPLE_LIVE:
-               /* Live or recently dead, must copy it */
-               isdead = false;
-               break;
-           case HEAPTUPLE_INSERT_IN_PROGRESS:
-
-               /*
-                * Since we hold exclusive lock on the relation, normally the
-                * only way to see this is if it was inserted earlier in our
-                * own transaction.  However, it can happen in system
-                * catalogs, since we tend to release write lock before commit
-                * there.  Give a warning if neither case applies; but in any
-                * case we had better copy it.
-                */
-               if (!is_system_catalog &&
-                   !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
-                   elog(WARNING, "concurrent insert in progress within table \"%s\"",
-                        RelationGetRelationName(OldHeap));
-               /* treat as live */
-               isdead = false;
-               break;
-           case HEAPTUPLE_DELETE_IN_PROGRESS:
-
-               /*
-                * Similar situation to INSERT_IN_PROGRESS case.
-                */
-               if (!is_system_catalog &&
-                   !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
-                   elog(WARNING, "concurrent delete in progress within table \"%s\"",
-                        RelationGetRelationName(OldHeap));
-               /* treat as recently dead */
-               tups_recently_dead += 1;
-               isdead = false;
-               break;
-           default:
-               elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
-               isdead = false; /* keep compiler quiet */
-               break;
-       }
-
-       LockBuffer(buf, BUFFER_LOCK_UNLOCK);
-
-       if (isdead)
-       {
-           tups_vacuumed += 1;
-           /* heap rewrite module still needs to see it... */
-           if (rewrite_heap_dead_tuple(rwstate, tuple))
-           {
-               /* A previous recently-dead tuple is now known dead */
-               tups_vacuumed += 1;
-               tups_recently_dead -= 1;
-           }
-           continue;
-       }
-
-       num_tuples += 1;
-       if (tuplesort != NULL)
-           tuplesort_putheaptuple(tuplesort, tuple);
-       else
-           reform_and_rewrite_tuple(tuple,
-                                    oldTupDesc, newTupDesc,
-                                    values, isnull,
-                                    rwstate);
-   }
-
-   if (indexScan != NULL)
-       index_endscan(indexScan);
-   if (heapScan != NULL)
-       heap_endscan(heapScan);
-
-   /*
-    * In scan-and-sort mode, complete the sort, then read out all live tuples
-    * from the tuplestore and write them to the new relation.
-    */
-   if (tuplesort != NULL)
-   {
-       tuplesort_performsort(tuplesort);
-
-       for (;;)
-       {
-           HeapTuple   tuple;
-
-           CHECK_FOR_INTERRUPTS();
-
-           tuple = tuplesort_getheaptuple(tuplesort, true);
-           if (tuple == NULL)
-               break;
-
-           reform_and_rewrite_tuple(tuple,
-                                    oldTupDesc, newTupDesc,
-                                    values, isnull,
-                                    rwstate);
-       }
-
-       tuplesort_end(tuplesort);
-   }
-
-   /* Write out any remaining tuples, and fsync if needed */
-   end_heap_rewrite(rwstate);
+   table_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
+                          OldestXmin, FreezeXid, MultiXactCutoff,
+                          &num_tuples, &tups_vacuumed, &tups_recently_dead);
  
     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
     NewHeap->rd_toastoid = InvalidOid;
@@ -1113,10 +916,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
                        tups_recently_dead,
                        pg_rusage_show(&ru0))));
  
-   /* Clean up */
-   pfree(values);
-   pfree(isnull);
-
     if (OldIndex != NULL)
         index_close(OldIndex, NoLock);
     heap_close(OldHeap, NoLock);
@@ -1693,7 +1492,7 @@ static List *
  get_tables_to_cluster(MemoryContext cluster_context)
  {
     Relation    indRelation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     ScanKeyData entry;
     HeapTuple   indexTuple;
     Form_pg_index index;
@@ -1712,8 +1511,8 @@ get_tables_to_cluster(MemoryContext cluster_context)
                 Anum_pg_index_indisclustered,
                 BTEqualStrategyNumber, F_BOOLEQ,
                 BoolGetDatum(true));
-   scan = heap_beginscan_catalog(indRelation, 1, &entry);
-   while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(indRelation, 1, &entry);
+   while ((indexTuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         index = (Form_pg_index) GETSTRUCT(indexTuple);
  
@@ -1733,52 +1532,9 @@ get_tables_to_cluster(MemoryContext cluster_context)
  
         MemoryContextSwitchTo(old_context);
     }
-   heap_endscan(scan);
+   table_endscan(scan);
  
     relation_close(indRelation, AccessShareLock);
  
     return rvs;
  }
-
-
-/*
- * Reconstruct and rewrite the given tuple
- *
- * We cannot simply copy the tuple as-is, for several reasons:
- *
- * 1. We'd like to squeeze out the values of any dropped columns, both
- * to save space and to ensure we have no corner-case failures. (It's
- * possible for example that the new table hasn't got a TOAST table
- * and so is unable to store any large values of dropped cols.)
- *
- * 2. The tuple might not even be legal for the new table; this is
- * currently only known to happen as an after-effect of ALTER TABLE
- * SET WITHOUT OIDS (in an older version, via pg_upgrade).
- *
- * So, we must reconstruct the tuple from component Datums.
- */
-static void
-reform_and_rewrite_tuple(HeapTuple tuple,
-                        TupleDesc oldTupDesc, TupleDesc newTupDesc,
-                        Datum *values, bool *isnull,
-                        RewriteState rwstate)
-{
-   HeapTuple   copiedTuple;
-   int         i;
-
-   heap_deform_tuple(tuple, oldTupDesc, values, isnull);
-
-   /* Be sure to null out any dropped columns */
-   for (i = 0; i < newTupDesc->natts; i++)
-   {
-       if (TupleDescAttr(newTupDesc, i)->attisdropped)
-           isnull[i] = true;
-   }
-
-   copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
-
-   /* The heap rewrite module does the rest */
-   rewrite_heap_tuple(rwstate, tuple, copiedTuple);
-
-   heap_freetuple(copiedTuple);
-}
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c

index b0b2cb2a1461bb0792b161f36cb6e70074a51138..9fbea0b4d8e162c815e8ccbf822543f6c67b967b 100644 (file)
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -13,6 +13,8 @@
   */
  #include "postgres.h"
  
+#include "access/tableam.h"
+#include "access/relscan.h"
  #include "catalog/index.h"
  #include "commands/trigger.h"
  #include "executor/executor.h"
@@ -69,6 +71,9 @@ unique_key_recheck(PG_FUNCTION_ARGS)
  
     /*
      * Get the new data that was inserted/updated.
+    *
+    * PBORKED: should use slot API, otherwise we'll not work correctly
+    * for zheap et al.
      */
     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
         new_row = trigdata->tg_trigtuple;
@@ -83,6 +88,8 @@ unique_key_recheck(PG_FUNCTION_ARGS)
         new_row = NULL;         /* keep compiler quiet */
     }
  
+   slot = table_gimmegimmeslot(trigdata->tg_relation, NULL);
+
     /*
      * If the new_row is now dead (ie, inserted and then deleted within our
      * transaction), we can skip the check.  However, we have to be careful,
@@ -102,12 +109,20 @@ unique_key_recheck(PG_FUNCTION_ARGS)
      * removed.
      */
     tmptid = new_row->t_self;
-   if (!heap_hot_search(&tmptid, trigdata->tg_relation, SnapshotSelf, NULL))
     {
-       /*
-        * All rows in the HOT chain are dead, so skip the check.
-        */
-       return PointerGetDatum(NULL);
+       IndexFetchTableData *scan = table_begin_index_fetch_table(trigdata->tg_relation);
+       bool call_again = false;
+
+       if (!table_fetch_follow(scan, &tmptid, SnapshotSelf, slot, &call_again, NULL))
+       {
+           /*
+            * All rows referenced by the index are dead, so skip the check.
+            */
+           ExecDropSingleTupleTableSlot(slot);
+           table_end_index_fetch_table(scan);
+           return PointerGetDatum(NULL);
+       }
+       table_end_index_fetch_table(scan);
     }
  
     /*
@@ -119,14 +134,6 @@ unique_key_recheck(PG_FUNCTION_ARGS)
                           RowExclusiveLock);
     indexInfo = BuildIndexInfo(indexRel);
  
-   /*
-    * The heap tuple must be put into a slot for FormIndexDatum.
-    */
-   slot = MakeSingleTupleTableSlot(RelationGetDescr(trigdata->tg_relation),
-                                   &TTSOpsHeapTuple);
-
-   ExecStoreHeapTuple(new_row, slot, false);
-
     /*
      * Typically the index won't have expressions, but if it does we need an
      * EState to evaluate them.  We need it for exclusion constraints too,
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c

index 4aa8890fe816b2a50c91f73acb66435f3dfded00..587e166c2a2a8176d052d025c0be22030fa346d9 100644 (file)
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -21,6 +21,7 @@
  
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "access/xlog.h"
@@ -315,9 +316,9 @@ static void CopyOneRowTo(CopyState cstate,
              Datum *values, bool *nulls);
  static void CopyFromInsertBatch(CopyState cstate, EState *estate,
                     CommandId mycid, int hi_options,
-                   ResultRelInfo *resultRelInfo, TupleTableSlot *myslot,
+                   ResultRelInfo *resultRelInfo,
                     BulkInsertState bistate,
-                   int nBufferedTuples, HeapTuple *bufferedTuples,
+                   int nBufferedTuples, TupleTableSlot **bufferedSlots,
                     uint64 firstBufferedLineNo);
  static bool CopyReadLine(CopyState cstate);
  static bool CopyReadLineText(CopyState cstate);
@@ -2046,33 +2047,27 @@ CopyTo(CopyState cstate)
  
     if (cstate->rel)
     {
-       Datum      *values;
-       bool       *nulls;
-       HeapScanDesc scandesc;
-       HeapTuple   tuple;
-
-       values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
-       nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
-
-       scandesc = heap_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+       TupleTableSlot *slot;
+       TableScanDesc scandesc;
  
+       scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
+       slot = table_gimmegimmeslot(cstate->rel, NULL);
         processed = 0;
-       while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
+
+       while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
         {
             CHECK_FOR_INTERRUPTS();
  
-           /* Deconstruct the tuple ... faster than repeated heap_getattr */
-           heap_deform_tuple(tuple, tupDesc, values, nulls);
+           /* Deconstruct the tuple ... */
+           slot_getallattrs(slot);
  
             /* Format and send the data */
-           CopyOneRowTo(cstate, values, nulls);
+           CopyOneRowTo(cstate, slot->tts_values, slot->tts_isnull);
             processed++;
         }
  
-       heap_endscan(scandesc);
-
-       pfree(values);
-       pfree(nulls);
+       ExecDropSingleTupleTableSlot(slot);
+       table_endscan(scandesc);
     }
     else
     {
@@ -2284,17 +2279,13 @@ limit_printout_length(const char *str)
  uint64
  CopyFrom(CopyState cstate)
  {
-   HeapTuple   tuple;
-   TupleDesc   tupDesc;
-   Datum      *values;
-   bool       *nulls;
     ResultRelInfo *resultRelInfo;
     ResultRelInfo *target_resultRelInfo;
     ResultRelInfo *prevResultRelInfo = NULL;
     EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
     ModifyTableState *mtstate;
     ExprContext *econtext;
-   TupleTableSlot *myslot;
+   TupleTableSlot *singleslot;
     MemoryContext oldcontext = CurrentMemoryContext;
  
     PartitionTupleRouting *proute = NULL;
@@ -2302,8 +2293,8 @@ CopyFrom(CopyState cstate)
     ErrorContextCallback errcallback;
     CommandId   mycid = GetCurrentCommandId(true);
     int         hi_options = 0; /* start with default heap_insert options */
-   BulkInsertState bistate;
     CopyInsertMethod insertMethod;
+   BulkInsertState bistate;
     uint64      processed = 0;
     int         nBufferedTuples = 0;
     bool        has_before_insert_row_trig;
@@ -2312,8 +2303,8 @@ CopyFrom(CopyState cstate)
  
  #define MAX_BUFFERED_TUPLES 1000
  #define RECHECK_MULTI_INSERT_THRESHOLD 1000
-   HeapTuple  *bufferedTuples = NULL;  /* initialize to silence warning */
-   Size        bufferedTuplesSize = 0;
+   TupleTableSlot  **bufferedSlots = NULL; /* initialize to silence warning */
+   Size        bufferedSlotsSize = 0;
     uint64      firstBufferedLineNo = 0;
     uint64      lastPartitionSampleLineNo = 0;
     uint64      nPartitionChanges = 0;
@@ -2355,8 +2346,6 @@ CopyFrom(CopyState cstate)
                             RelationGetRelationName(cstate->rel))));
     }
  
-   tupDesc = RelationGetDescr(cstate->rel);
-
     /*----------
      * Check to see if we can avoid writing WAL
      *
@@ -2486,13 +2475,6 @@ CopyFrom(CopyState cstate)
  
     ExecInitRangeTable(estate, cstate->range_table);
  
-   /* Set up a tuple slot too */
-   myslot = ExecInitExtraTupleSlot(estate, tupDesc,
-                                   &TTSOpsHeapTuple);
-   /* Triggers might need a slot as well */
-   estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                       &TTSOpsHeapTuple);
-
     /*
      * Set up a ModifyTableState so we can let FDW(s) init themselves for
      * foreign-table result relation(s).
@@ -2611,7 +2593,17 @@ CopyFrom(CopyState cstate)
         else
             insertMethod = CIM_MULTI;
  
-       bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
+       bufferedSlots = palloc0(MAX_BUFFERED_TUPLES * sizeof(TupleTableSlot *));
+   }
+
+   /*
+    * If not using batch mode (which allocates slots as needed), Set up a
+    * tuple slot too.
+    */
+   if (insertMethod == CIM_SINGLE || insertMethod == CIM_MULTI_CONDITIONAL)
+   {
+       singleslot = table_gimmegimmeslot(resultRelInfo->ri_RelationDesc,
+                                         &estate->es_tupleTable);
     }
  
     has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
@@ -2628,9 +2620,6 @@ CopyFrom(CopyState cstate)
      */
     ExecBSInsertTriggers(estate, resultRelInfo);
  
-   values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
-   nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
-
     bistate = GetBulkInsertState();
     econtext = GetPerTupleExprContext(estate);
  
@@ -2642,7 +2631,7 @@ CopyFrom(CopyState cstate)
  
     for (;;)
     {
-       TupleTableSlot *slot;
+       TupleTableSlot *myslot;
         bool        skip_tuple;
  
         CHECK_FOR_INTERRUPTS();
@@ -2657,28 +2646,46 @@ CopyFrom(CopyState cstate)
             ResetPerTupleExprContext(estate);
         }
  
+       if (insertMethod == CIM_SINGLE || proute)
+       {
+           myslot = singleslot;
+           Assert(myslot != NULL);
+       }
+       else
+       {
+           if (bufferedSlots[nBufferedTuples] == NULL)
+           {
+               const TupleTableSlotOps *tts_cb;
+
+               tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
+
+               bufferedSlots[nBufferedTuples] =
+                   MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc),
+                                            tts_cb);
+           }
+           myslot = bufferedSlots[nBufferedTuples];
+       }
+
         /* Switch into its memory context */
         MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
  
-       if (!NextCopyFrom(cstate, econtext, values, nulls))
+       ExecClearTuple(myslot);
+
+       /* Directly store the values/nulls array in the slot */
+       if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
             break;
  
-       /* And now we can form the input tuple. */
-       tuple = heap_form_tuple(tupDesc, values, nulls);
+       ExecStoreVirtualTuple(myslot);
  
         /*
          * Constraints might reference the tableoid column, so initialize
          * t_tableOid before evaluating them.
          */
-       tuple->t_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
+       myslot->tts_tableOid = RelationGetRelid(target_resultRelInfo->ri_RelationDesc);
  
         /* Triggers and stuff need to be invoked in query context. */
         MemoryContextSwitchTo(oldcontext);
  
-       /* Place tuple in tuple slot --- but slot shouldn't free it */
-       slot = myslot;
-       ExecStoreHeapTuple(tuple, slot, false);
-
         /* Determine the partition to heap_insert the tuple into */
         if (proute)
         {
@@ -2690,7 +2697,7 @@ CopyFrom(CopyState cstate)
              * if the found partition is not suitable for INSERTs.
              */
             resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo,
-                                             proute, slot, estate);
+                                             proute, myslot, estate);
  
             if (prevResultRelInfo != resultRelInfo)
             {
@@ -2707,11 +2714,19 @@ CopyFrom(CopyState cstate)
                         ExprContext *swapcontext;
  
                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                                           prevResultRelInfo, myslot, bistate,
-                                           nBufferedTuples, bufferedTuples,
+                                           prevResultRelInfo, bistate,
+                                           nBufferedTuples, bufferedSlots,
                                             firstBufferedLineNo);
                         nBufferedTuples = 0;
-                       bufferedTuplesSize = 0;
+
+                       /* force new slots to be used */
+                       for (int i = 0; i < MAX_BUFFERED_TUPLES; i++)
+                       {
+                           if (bufferedSlots[i] == NULL)
+                               continue;
+                           ExecDropSingleTupleTableSlot(bufferedSlots[i]);
+                           bufferedSlots[i] = NULL;
+                       }
  
                         Assert(secondaryExprContext);
  
@@ -2820,36 +2835,57 @@ CopyFrom(CopyState cstate)
                      * Otherwise, just remember the original unconverted
                      * tuple, to avoid a needless round trip conversion.
                      */
-                   cstate->transition_capture->tcs_original_insert_tuple = tuple;
+                   cstate->transition_capture->tcs_original_insert_tuple =
+                       ExecFetchSlotHeapTuple(myslot, false, NULL);
                     cstate->transition_capture->tcs_map = NULL;
                 }
             }
  
+
             /*
              * We might need to convert from the root rowtype to the partition
              * rowtype.
              */
             map = resultRelInfo->ri_PartitionInfo->pi_RootToPartitionMap;
-           if (map != NULL)
+           if (insertMethod == CIM_SINGLE ||
+               (insertMethod == CIM_MULTI_CONDITIONAL && !leafpart_use_multi_insert))
+           {
+               if (map != NULL)
+               {
+                   TupleTableSlot *new_slot;
+
+                   new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot;
+                   myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+               }
+           }
+           else if (insertMethod == CIM_MULTI_CONDITIONAL)
             {
                 TupleTableSlot *new_slot;
-               MemoryContext oldcontext;
  
-               new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot;
-               Assert(new_slot != NULL);
+               if (bufferedSlots[nBufferedTuples] == NULL)
+               {
+                   const TupleTableSlotOps *tts_cb;
+
+                   tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
+                   bufferedSlots[nBufferedTuples] =
+                       MakeSingleTupleTableSlot(RelationGetDescr(resultRelInfo->ri_RelationDesc),
+                                                tts_cb);
+               }
  
-               slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
+               new_slot = bufferedSlots[nBufferedTuples];
  
-               /*
-                * Get the tuple in the per-tuple context, so that it will be
-                * freed after each batch insert.
-                */
-               oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-               tuple = ExecCopySlotHeapTuple(slot);
-               MemoryContextSwitchTo(oldcontext);
+               if (map != NULL)
+                   myslot = execute_attr_map_slot(map->attrMap, myslot, new_slot);
+               else
+               {
+                   ExecCopySlot(new_slot, myslot);
+                   myslot = new_slot;
+               }
+           }
+           else
+           {
+               elog(ERROR, "huh");
             }
-
-           tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
         }
  
         skip_tuple = false;
@@ -2857,12 +2893,8 @@ CopyFrom(CopyState cstate)
         /* BEFORE ROW INSERT Triggers */
         if (has_before_insert_row_trig)
         {
-           slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-           if (slot == NULL)   /* "do nothing" */
-               skip_tuple = true;
-           else                /* trigger might have changed tuple */
-               tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+           if (!ExecBRInsertTriggers(estate, resultRelInfo, myslot))
+               skip_tuple = true;  /* "do nothing" */
         }
  
         if (!skip_tuple)
@@ -2870,7 +2902,7 @@ CopyFrom(CopyState cstate)
             if (has_instead_insert_row_trig)
             {
                 /* Pass the data to the INSTEAD ROW INSERT trigger */
-               ExecIRInsertTriggers(estate, resultRelInfo, slot);
+               ExecIRInsertTriggers(estate, resultRelInfo, myslot);
             }
             else
             {
@@ -2880,7 +2912,7 @@ CopyFrom(CopyState cstate)
                  */
                 if (resultRelInfo->ri_FdwRoutine == NULL &&
                     resultRelInfo->ri_RelationDesc->rd_att->constr)
-                   ExecConstraints(resultRelInfo, slot, estate);
+                   ExecConstraints(resultRelInfo, myslot, estate);
  
                 /*
                  * Also check the tuple against the partition constraint, if
@@ -2890,7 +2922,7 @@ CopyFrom(CopyState cstate)
                  */
                 if (resultRelInfo->ri_PartitionCheck &&
                     (proute == NULL || has_before_insert_row_trig))
-                   ExecPartitionCheck(resultRelInfo, slot, estate, true);
+                   ExecPartitionCheck(resultRelInfo, myslot, estate, true);
  
                 /*
                  * Perform multi-inserts when enabled, or when loading a
@@ -2902,8 +2934,10 @@ CopyFrom(CopyState cstate)
                     /* Add this tuple to the tuple buffer */
                     if (nBufferedTuples == 0)
                         firstBufferedLineNo = cstate->cur_lineno;
-                   bufferedTuples[nBufferedTuples++] = tuple;
-                   bufferedTuplesSize += tuple->t_len;
+
+                   Assert(bufferedSlots[nBufferedTuples] == myslot);
+                   nBufferedTuples++;
+                   bufferedSlotsSize += cstate->line_buf.len;
  
                     /*
                      * If the buffer filled up, flush it.  Also flush if the
@@ -2912,14 +2946,14 @@ CopyFrom(CopyState cstate)
                      * buffer when the tuples are exceptionally wide.
                      */
                     if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
-                       bufferedTuplesSize > 65535)
+                       bufferedSlotsSize > 65535)
                     {
                         CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                                           resultRelInfo, myslot, bistate,
-                                           nBufferedTuples, bufferedTuples,
+                                           resultRelInfo, bistate,
+                                           nBufferedTuples, bufferedSlots,
                                             firstBufferedLineNo);
                         nBufferedTuples = 0;
-                       bufferedTuplesSize = 0;
+                       bufferedSlotsSize = 0;
                     }
                 }
                 else
@@ -2929,39 +2963,38 @@ CopyFrom(CopyState cstate)
                     /* OK, store the tuple */
                     if (resultRelInfo->ri_FdwRoutine != NULL)
                     {
-                       slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
-                                                                              resultRelInfo,
-                                                                              slot,
-                                                                              NULL);
+                       myslot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
+                                                                                resultRelInfo,
+                                                                                myslot,
+                                                                                NULL);
  
-                       if (slot == NULL)   /* "do nothing" */
+                       if (myslot == NULL) /* "do nothing" */
                             continue;   /* next tuple please */
  
-                       /* FDW might have changed tuple */
-                       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
                         /*
                          * AFTER ROW Triggers might reference the tableoid
                          * column, so initialize t_tableOid before evaluating
                          * them.
                          */
-                       tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+                       myslot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
                     }
                     else
-                       heap_insert(resultRelInfo->ri_RelationDesc, tuple,
-                                   mycid, hi_options, bistate);
+                   {
+                       /* OK, store the tuple and create index entries for it */
+                       table_insert(resultRelInfo->ri_RelationDesc, myslot, mycid, hi_options,
+                                    bistate);
+                   }
  
                     /* And create index entries for it */
                     if (resultRelInfo->ri_NumIndices > 0)
-                       recheckIndexes = ExecInsertIndexTuples(slot,
-                                                              &(tuple->t_self),
+                       recheckIndexes = ExecInsertIndexTuples(myslot,
                                                                estate,
                                                                false,
                                                                NULL,
                                                                NIL);
  
                     /* AFTER ROW INSERT Triggers */
-                   ExecARInsertTriggers(estate, resultRelInfo, tuple,
+                   ExecARInsertTriggers(estate, resultRelInfo, myslot,
                                          recheckIndexes, cstate->transition_capture);
  
                     list_free(recheckIndexes);
@@ -2983,21 +3016,33 @@ CopyFrom(CopyState cstate)
         if (insertMethod == CIM_MULTI_CONDITIONAL)
         {
             CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                               prevResultRelInfo, myslot, bistate,
-                               nBufferedTuples, bufferedTuples,
+                               prevResultRelInfo, bistate,
+                               nBufferedTuples, bufferedSlots,
                                 firstBufferedLineNo);
         }
         else
             CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-                               resultRelInfo, myslot, bistate,
-                               nBufferedTuples, bufferedTuples,
+                               resultRelInfo, bistate,
+                               nBufferedTuples, bufferedSlots,
                                 firstBufferedLineNo);
     }
  
+   /* free slots */
+   if (bufferedSlots)
+   {
+       for (int i = 0; i < MAX_BUFFERED_TUPLES; i++)
+       {
+           if (bufferedSlots[i] == NULL)
+               continue;
+           ExecDropSingleTupleTableSlot(bufferedSlots[i]);
+           bufferedSlots[i] = NULL;
+       }
+   }
+
     /* Done, clean up */
     error_context_stack = errcallback.previous;
  
-   FreeBulkInsertState(bistate);
+   ReleaseBulkInsertStatePin(bistate);
  
     MemoryContextSwitchTo(oldcontext);
  
@@ -3014,9 +3059,6 @@ CopyFrom(CopyState cstate)
     /* Handle queued AFTER triggers */
     AfterTriggerEndQuery(estate);
  
-   pfree(values);
-   pfree(nulls);
-
     ExecResetTupleTable(estate->es_tupleTable, false);
  
     /* Allow the FDW to shut down */
@@ -3041,7 +3083,7 @@ CopyFrom(CopyState cstate)
      * indexes since those use WAL anyway)
      */
     if (hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(cstate->rel);
+       table_sync(cstate->rel);
  
     return processed;
  }
@@ -3054,8 +3096,7 @@ CopyFrom(CopyState cstate)
  static void
  CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
                     int hi_options, ResultRelInfo *resultRelInfo,
-                   TupleTableSlot *myslot, BulkInsertState bistate,
-                   int nBufferedTuples, HeapTuple *bufferedTuples,
+                   BulkInsertState bistate, int nBufferedTuples, TupleTableSlot **bufferedSlots,
                     uint64 firstBufferedLineNo)
  {
     MemoryContext oldcontext;
@@ -3075,12 +3116,12 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
      * before calling it.
      */
     oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
-   heap_multi_insert(resultRelInfo->ri_RelationDesc,
-                     bufferedTuples,
-                     nBufferedTuples,
-                     mycid,
-                     hi_options,
-                     bistate);
+   table_multi_insert(resultRelInfo->ri_RelationDesc,
+                      bufferedSlots,
+                      nBufferedTuples,
+                      mycid,
+                      hi_options,
+                      bistate);
     MemoryContextSwitchTo(oldcontext);
  
     /*
@@ -3094,12 +3135,10 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
             List       *recheckIndexes;
  
             cstate->cur_lineno = firstBufferedLineNo + i;
-           ExecStoreHeapTuple(bufferedTuples[i], myslot, false);
             recheckIndexes =
-               ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
-                                     estate, false, NULL, NIL);
+               ExecInsertIndexTuples(bufferedSlots[i], estate, false, NULL, NIL);
             ExecARInsertTriggers(estate, resultRelInfo,
-                                bufferedTuples[i],
+                                bufferedSlots[i],
                                  recheckIndexes, cstate->transition_capture);
             list_free(recheckIndexes);
         }
@@ -3117,7 +3156,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
         {
             cstate->cur_lineno = firstBufferedLineNo + i;
             ExecARInsertTriggers(estate, resultRelInfo,
-                                bufferedTuples[i],
+                                bufferedSlots[i],
                                  NIL, cstate->transition_capture);
         }
     }
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c

index d01b258b65444a47ff23773e07fe73a14ade567b..d346bf0749adefc6d16e8b1ee298d5259499e786 100644 (file)
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -26,6 +26,7 @@
  
  #include "access/reloptions.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "access/xlog.h"
@@ -59,7 +60,8 @@ typedef struct
     ObjectAddress reladdr;      /* address of rel, for ExecCreateTableAs */
     CommandId   output_cid;     /* cmin to insert in output tuples */
     int         hi_options;     /* heap_insert performance options */
-   BulkInsertState bistate;    /* bulk insert state */
+   void       *bistate;        /* bulk insert state */
+   TupleTableSlot *slot;
  } DR_intorel;
  
  /* utility functions for CTAS definition creation */
@@ -107,6 +109,9 @@ create_ctas_internal(List *attrList, IntoClause *into)
     create->oncommit = into->onCommit;
     create->tablespacename = into->tableSpaceName;
     create->if_not_exists = false;
+   create->accessMethod = into->accessMethod;
+
+   // PBORKED: toast options
  
     /*
      * Create the relation.  (This will error out if there's an existing view,
@@ -550,6 +555,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
     myState->rel = intoRelationDesc;
     myState->reladdr = intoRelationAddr;
     myState->output_cid = GetCurrentCommandId(true);
+   myState->slot = table_gimmegimmeslot(intoRelationDesc, NULL);
  
     /*
      * We can skip WAL-logging the insertions, unless PITR or streaming
@@ -570,19 +576,21 @@ static bool
  intorel_receive(TupleTableSlot *slot, DestReceiver *self)
  {
     DR_intorel *myState = (DR_intorel *) self;
-   HeapTuple   tuple;
  
     /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
+    * Ensure input tuple is the right format for the target relation.
      */
-   tuple = ExecCopySlotHeapTuple(slot);
+   if (slot->tts_ops != myState->slot->tts_ops)
+   {
+       ExecCopySlot(myState->slot, slot);
+       slot = myState->slot;
+   }
  
-   heap_insert(myState->rel,
-               tuple,
-               myState->output_cid,
-               myState->hi_options,
-               myState->bistate);
+   table_insert(myState->rel,
+                slot,
+                myState->output_cid,
+                myState->hi_options,
+                myState->bistate);
  
     /* We know this is a newly created relation, so there are no indexes */
  
@@ -597,11 +605,12 @@ intorel_shutdown(DestReceiver *self)
  {
     DR_intorel *myState = (DR_intorel *) self;
  
+   ExecDropSingleTupleTableSlot(myState->slot);
     FreeBulkInsertState(myState->bistate);
  
     /* If we skipped using WAL, must heap_sync before commit */
     if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(myState->rel);
+       table_sync(myState->rel);
  
     /* close rel, but keep lock until commit */
     heap_close(myState->rel, NoLock);
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c

index f640f4697294563bd35890d0f346eaab16a97e44..e198a7319c2e8da0ab90788640175bc9a4302737 100644 (file)
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -26,6 +26,7 @@
  #include "access/genam.h"
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "access/xloginsert.h"
  #include "access/xlogutils.h"
@@ -98,7 +99,7 @@ static int    errdetail_busy_db(int notherbackends, int npreparedxacts);
  Oid
  createdb(ParseState *pstate, const CreatedbStmt *stmt)
  {
-   HeapScanDesc scan;
+   TableScanDesc scan;
     Relation    rel;
     Oid         src_dboid;
     Oid         src_owner;
@@ -590,8 +591,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
          * each one to the new database.
          */
         rel = heap_open(TableSpaceRelationId, AccessShareLock);
-       scan = heap_beginscan_catalog(rel, 0, NULL);
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       scan = table_beginscan_catalog(rel, 0, NULL);
+       while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
         {
             Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
             Oid         srctablespace = spaceform->oid;
@@ -644,7 +645,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
                                   XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
             }
         }
-       heap_endscan(scan);
+       table_endscan(scan);
         heap_close(rel, AccessShareLock);
  
         /*
@@ -1871,12 +1872,12 @@ static void
  remove_dbtablespaces(Oid db_id)
  {
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
  
     rel = heap_open(TableSpaceRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 0, NULL);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
         Oid         dsttablespace = spcform->oid;
@@ -1918,7 +1919,7 @@ remove_dbtablespaces(Oid db_id)
         pfree(dstpath);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  }
  
@@ -1939,12 +1940,12 @@ check_db_file_conflict(Oid db_id)
  {
     bool        result = false;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
  
     rel = heap_open(TableSpaceRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 0, NULL);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_tablespace spcform = (Form_pg_tablespace) GETSTRUCT(tuple);
         Oid         dsttablespace = spcform->oid;
@@ -1968,7 +1969,7 @@ check_db_file_conflict(Oid db_id)
         pfree(dstpath);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  
     return result;
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c

index 6c06167fb2ae39e81e0cf64e6069cceb9e689dca..1420c12af874be4a6e02044ad9a4cdab65527bb3 100644 (file)
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -18,6 +18,7 @@
  #include "access/amapi.h"
  #include "access/htup_details.h"
  #include "access/reloptions.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "catalog/catalog.h"
@@ -2338,7 +2339,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
  {
     Oid         objectOid;
     Relation    relationRelation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     ScanKeyData scan_keys[1];
     HeapTuple   tuple;
     MemoryContext private_context;
@@ -2412,8 +2413,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
      * rels will be processed indirectly by reindex_relation).
      */
     relationRelation = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
         Oid         relid = classtuple->oid;
@@ -2471,7 +2472,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
  
         MemoryContextSwitchTo(old);
     }
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(relationRelation, AccessShareLock);
  
     /* Now reindex each rel in a separate transaction */
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c

index a171ebabf8fb6d12ad5be4e6ebe77392133b3817..20cc2ae597095b2e447f9b39919a9b59bf7fd941 100644 (file)
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -16,6 +16,7 @@
  
  #include "access/htup_details.h"
  #include "access/multixact.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "access/xlog.h"
  #include "catalog/catalog.h"
@@ -52,7 +53,8 @@ typedef struct
     Relation    transientrel;   /* relation to write to */
     CommandId   output_cid;     /* cmin to insert in output tuples */
     int         hi_options;     /* heap_insert performance options */
-   BulkInsertState bistate;    /* bulk insert state */
+   void       *bistate;        /* bulk insert state */
+   TupleTableSlot *slot;
  } DR_transientrel;
  
  static int matview_maintenance_depth = 0;
@@ -454,6 +456,7 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
      */
     myState->transientrel = transientrel;
     myState->output_cid = GetCurrentCommandId(true);
+   myState->slot = table_gimmegimmeslot(transientrel, NULL);
  
     /*
      * We can skip WAL-logging the insertions, unless PITR or streaming
@@ -475,25 +478,24 @@ static bool
  transientrel_receive(TupleTableSlot *slot, DestReceiver *self)
  {
     DR_transientrel *myState = (DR_transientrel *) self;
-   HeapTuple   tuple;
  
     /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
+    * Ensure input tuple is the right format for the target relation.
      */
-   tuple = ExecCopySlotHeapTuple(slot);
+   if (slot->tts_ops != myState->slot->tts_ops)
+   {
+       ExecCopySlot(myState->slot, slot);
+       slot = myState->slot;
+   }
  
-   heap_insert(myState->transientrel,
-               tuple,
-               myState->output_cid,
-               myState->hi_options,
-               myState->bistate);
+   table_insert(myState->transientrel,
+                slot,
+                myState->output_cid,
+                myState->hi_options,
+                myState->bistate);
  
     /* We know this is a newly created relation, so there are no indexes */
  
-   /* Free the copied tuple. */
-   heap_freetuple(tuple);
-
     return true;
  }
  
@@ -505,11 +507,12 @@ transientrel_shutdown(DestReceiver *self)
  {
     DR_transientrel *myState = (DR_transientrel *) self;
  
+   ExecDropSingleTupleTableSlot(myState->slot);
     FreeBulkInsertState(myState->bistate);
  
     /* If we skipped using WAL, must heap_sync before commit */
     if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
-       heap_sync(myState->transientrel);
+       table_sync(myState->transientrel);
  
     /* close transientrel, but keep lock until commit */
     heap_close(myState->transientrel, NoLock);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c

index d6d0de1b01b1ea57a8b927237e2e688b6c3a487a..20f1e5f2c618640a449dc5a166be562ad0488a66 100644 (file)
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -20,6 +20,7 @@
  #include "access/multixact.h"
  #include "access/reloptions.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/tupconvert.h"
  #include "access/xact.h"
@@ -536,6 +537,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
     static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
     Oid         ofTypeId;
     ObjectAddress address;
+   const char *accessMethod = NULL;
+   Oid         accessMethodId = InvalidOid;
  
     /*
      * Truncate relname to appropriate length (probably a waste of time, as
@@ -717,6 +720,35 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
             attr->attidentity = colDef->identity;
     }
  
+   /*
+    * If the statement hasn't specified an access method, but we're defining
+    * a type of relation that needs one, use the default.
+    */
+   if (stmt->accessMethod != NULL)
+       accessMethod = stmt->accessMethod;
+   else if (relkind == RELKIND_RELATION ||
+            relkind == RELKIND_TOASTVALUE ||
+            relkind == RELKIND_MATVIEW ||
+            relkind == RELKIND_PARTITIONED_TABLE)
+       accessMethod = default_table_access_method;
+
+   /*
+    * look up the access method, verify it can handle the requested features
+    */
+   if (accessMethod != NULL)
+   {
+       HeapTuple   tuple;
+
+       tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethod));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                        errmsg("table access method \"%s\" does not exist",
+                                accessMethod)));
+       accessMethodId = ((Form_pg_am) GETSTRUCT(tuple))->oid;
+       ReleaseSysCache(tuple);
+   }
+
     /*
      * Create the relation.  Inherited defaults and constraints are passed in
      * for immediate handling --- since they don't need parsing, they can be
@@ -729,6 +761,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
                                           InvalidOid,
                                           ofTypeId,
                                           ownerId,
+                                         accessMethodId,
                                           descriptor,
                                           list_concat(cookedDefaults,
                                                       old_constraints),
@@ -1576,6 +1609,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
              */
             CheckTableForSerializableConflictIn(rel);
  
+           // PBORKED: Need to abstract this
             minmulti = GetOldestMultiXactId();
  
             /*
@@ -1584,6 +1618,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
              * Create a new empty storage file for the relation, and assign it
              * as the relfilenode value. The old storage file is scheduled for
              * deletion at commit.
+            *
+            * PBORKED: needs to be a callback
              */
             RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence,
                                       RecentXmin, minmulti);
@@ -4523,7 +4559,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
     ListCell   *l;
     EState     *estate;
     CommandId   mycid;
-   BulkInsertState bistate;
+   void       *bistate;
     int         hi_options;
     ExprState  *partqualstate = NULL;
  
@@ -4627,12 +4663,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
     if (newrel || needscan)
     {
         ExprContext *econtext;
-       Datum      *values;
-       bool       *isnull;
         TupleTableSlot *oldslot;
         TupleTableSlot *newslot;
-       HeapScanDesc scan;
-       HeapTuple   tuple;
+       TableScanDesc scan;
         MemoryContext oldCxt;
         List       *dropped_attrs = NIL;
         ListCell   *lc;
@@ -4664,15 +4697,16 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
          * tuples are the same, the tupDescs might not be (consider ADD COLUMN
          * without a default).
          */
-       oldslot = MakeSingleTupleTableSlot(oldTupDesc, &TTSOpsHeapTuple);
-       newslot = MakeSingleTupleTableSlot(newTupDesc, &TTSOpsHeapTuple);
+       // PBORKED: Explain about using oldTupDesc when not rewriting
+       oldslot = MakeSingleTupleTableSlot(tab->rewrite > 0 ? oldTupDesc : newTupDesc,
+                                          table_slot_callbacks(oldrel));
+       newslot = MakeSingleTupleTableSlot(newTupDesc,
+                                          table_slot_callbacks(newrel ? newrel : oldrel));
  
-       /* Preallocate values/isnull arrays */
-       i = Max(newTupDesc->natts, oldTupDesc->natts);
-       values = (Datum *) palloc(i * sizeof(Datum));
-       isnull = (bool *) palloc(i * sizeof(bool));
-       memset(values, 0, i * sizeof(Datum));
-       memset(isnull, true, i * sizeof(bool));
+       memset(newslot->tts_values, 0,
+              sizeof(Datum) * newTupDesc->natts);
+       memset(newslot->tts_isnull, 0,
+              sizeof(bool) * newTupDesc->natts);
  
         /*
          * Any attributes that are dropped according to the new tuple
@@ -4690,7 +4724,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
          * checking all the constraints.
          */
         snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(oldrel, snapshot, 0, NULL);
+       scan = table_beginscan(oldrel, snapshot, 0, NULL);
  
         /*
          * Switch to per-tuple memory context and reset it for each tuple
@@ -4698,55 +4732,69 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
          */
         oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
  
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
         {
+           TupleTableSlot *insertslot;
+
             if (tab->rewrite > 0)
             {
                 /* Extract data from old tuple */
-               heap_deform_tuple(tuple, oldTupDesc, values, isnull);
+               slot_getallattrs(oldslot);
+               ExecClearTuple(newslot);
+
+               /* copy attributes */
+               memcpy(newslot->tts_values, oldslot->tts_values,
+                      sizeof(Datum) * oldslot->tts_nvalid);
+               memcpy(newslot->tts_isnull, oldslot->tts_isnull,
+                      sizeof(bool) * oldslot->tts_nvalid);
  
                 /* Set dropped attributes to null in new tuple */
                 foreach(lc, dropped_attrs)
-                   isnull[lfirst_int(lc)] = true;
+                   newslot->tts_isnull[lfirst_int(lc)] = true;
  
                 /*
                  * Process supplied expressions to replace selected columns.
                  * Expression inputs come from the old tuple.
                  */
-               ExecStoreHeapTuple(tuple, oldslot, false);
                 econtext->ecxt_scantuple = oldslot;
  
                 foreach(l, tab->newvals)
                 {
                     NewColumnValue *ex = lfirst(l);
  
-                   values[ex->attnum - 1] = ExecEvalExpr(ex->exprstate,
-                                                         econtext,
-                                                         &isnull[ex->attnum - 1]);
+                   newslot->tts_values[ex->attnum - 1]
+                       = ExecEvalExpr(ex->exprstate,
+                                      econtext,
+                                      &newslot->tts_isnull[ex->attnum - 1]);
                 }
  
-               /*
-                * Form the new tuple. Note that we don't explicitly pfree it,
-                * since the per-tuple memory context will be reset shortly.
-                */
-               tuple = heap_form_tuple(newTupDesc, values, isnull);
+               ExecStoreVirtualTuple(newslot);
  
                 /*
                  * Constraints might reference the tableoid column, so
                  * initialize t_tableOid before evaluating them.
                  */
-               tuple->t_tableOid = RelationGetRelid(oldrel);
+               newslot->tts_tableOid = RelationGetRelid(oldrel);
+               insertslot = newslot;
+           }
+           else
+           {
+               /*
+                * If there's no rewrite, old and new table are guaranteed to
+                * have the same AM, so we can just use the old slot to
+                * verify new constraints etc.
+                */
+               insertslot = oldslot;
             }
  
             /* Now check any constraints on the possibly-changed tuple */
-           ExecStoreHeapTuple(tuple, newslot, false);
-           econtext->ecxt_scantuple = newslot;
+           econtext->ecxt_scantuple = insertslot;
  
             foreach(l, notnull_attrs)
             {
                 int         attn = lfirst_int(l);
  
-               if (heap_attisnull(tuple, attn + 1, newTupDesc))
+               if (slot_attisnull(insertslot, attn + 1))
                 {
                     Form_pg_attribute attr = TupleDescAttr(newTupDesc, attn);
  
@@ -4795,7 +4843,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
  
             /* Write the tuple out to the new relation */
             if (newrel)
-               heap_insert(newrel, tuple, mycid, hi_options, bistate);
+               table_insert(newrel, insertslot, mycid, hi_options, bistate);
  
             ResetExprContext(econtext);
  
@@ -4803,7 +4851,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
         }
  
         MemoryContextSwitchTo(oldCxt);
-       heap_endscan(scan);
+       table_endscan(scan);
         UnregisterSnapshot(snapshot);
  
         ExecDropSingleTupleTableSlot(oldslot);
@@ -4819,7 +4867,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
  
         /* If we skipped writing WAL, then we need to sync the heap. */
         if (hi_options & HEAP_INSERT_SKIP_WAL)
-           heap_sync(newrel);
+           table_sync(newrel);
  
         heap_close(newrel, NoLock);
     }
@@ -5198,7 +5246,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
  {
     Relation    classRel;
     ScanKeyData key[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
     List       *result = NIL;
  
@@ -5209,9 +5257,9 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
                 BTEqualStrategyNumber, F_OIDEQ,
                 ObjectIdGetDatum(typeOid));
  
-   scan = heap_beginscan_catalog(classRel, 1, key);
+   scan = table_beginscan_catalog(classRel, 1, key);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple);
  
@@ -5225,7 +5273,7 @@ find_typed_table_dependencies(Oid typeOid, const char *typeName, DropBehavior be
             result = lappend_oid(result, classform->oid);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(classRel, AccessShareLock);
  
     return result;
@@ -8323,9 +8371,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
     char       *conbin;
     Expr       *origexpr;
     ExprState  *exprstate;
-   TupleDesc   tupdesc;
-   HeapScanDesc scan;
-   HeapTuple   tuple;
+   TableScanDesc scan;
     ExprContext *econtext;
     MemoryContext oldcxt;
     TupleTableSlot *slot;
@@ -8360,12 +8406,11 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
     exprstate = ExecPrepareExpr(origexpr, estate);
  
     econtext = GetPerTupleExprContext(estate);
-   tupdesc = RelationGetDescr(rel);
-   slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple);
+   slot = table_gimmegimmeslot(rel, NULL);
     econtext->ecxt_scantuple = slot;
  
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
+   scan = table_beginscan(rel, snapshot, 0, NULL);
  
     /*
      * Switch to per-tuple memory context and reset it for each tuple
@@ -8373,10 +8418,8 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
      */
     oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
     {
-       ExecStoreHeapTuple(tuple, slot, false);
-
         if (!ExecCheck(exprstate, econtext))
             ereport(ERROR,
                     (errcode(ERRCODE_CHECK_VIOLATION),
@@ -8388,7 +8431,7 @@ validateCheckConstraint(Relation rel, HeapTuple constrtup)
     }
  
     MemoryContextSwitchTo(oldcxt);
-   heap_endscan(scan);
+   table_endscan(scan);
     UnregisterSnapshot(snapshot);
     ExecDropSingleTupleTableSlot(slot);
     FreeExecutorState(estate);
@@ -8407,8 +8450,8 @@ validateForeignKeyConstraint(char *conname,
                              Oid pkindOid,
                              Oid constraintOid)
  {
-   HeapScanDesc scan;
-   HeapTuple   tuple;
+   TableScanDesc scan;
+   TupleTableSlot *slot;
     Trigger     trig;
     Snapshot    snapshot;
  
@@ -8443,9 +8486,10 @@ validateForeignKeyConstraint(char *conname,
      * ereport(ERROR) and that's that.
      */
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
+   scan = table_beginscan(rel, snapshot, 0, NULL);
+   slot = table_gimmegimmeslot(rel, NULL);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
     {
         FunctionCallInfoData fcinfo;
         TriggerData trigdata;
@@ -8463,19 +8507,19 @@ validateForeignKeyConstraint(char *conname,
         trigdata.type = T_TriggerData;
         trigdata.tg_event = TRIGGER_EVENT_INSERT | TRIGGER_EVENT_ROW;
         trigdata.tg_relation = rel;
-       trigdata.tg_trigtuple = tuple;
+       trigdata.tg_trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       trigdata.tg_trigslot = slot;
         trigdata.tg_newtuple = NULL;
         trigdata.tg_trigger = &trig;
-       trigdata.tg_trigtuplebuf = scan->rs_cbuf;
-       trigdata.tg_newtuplebuf = InvalidBuffer;
  
         fcinfo.context = (Node *) &trigdata;
  
         RI_FKey_check_ins(&fcinfo);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     UnregisterSnapshot(snapshot);
+   ExecDropSingleTupleTableSlot(slot);
  }
  
  static void
@@ -11001,7 +11045,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
     ListCell   *l;
     ScanKeyData key[1];
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
     Oid         orig_tablespaceoid;
     Oid         new_tablespaceoid;
@@ -11066,8 +11110,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
                 ObjectIdGetDatum(orig_tablespaceoid));
  
     rel = heap_open(RelationRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 1, key);
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan_catalog(rel, 1, key);
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
         Oid         relOid = relForm->oid;
@@ -11125,7 +11169,7 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
         relations = lappend_oid(relations, relOid);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  
     if (relations == NIL)
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c

index 4a714f6e2be15b2461c502245e684e9e4ed326ef..ca429731d4091f94c1acdd288047a6e383e7de00 100644 (file)
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -53,6 +53,7 @@
  #include "access/heapam.h"
  #include "access/reloptions.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "access/xlog.h"
@@ -406,7 +407,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
  {
  #ifdef HAVE_SYMLINK
     char       *tablespacename = stmt->tablespacename;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
     Relation    rel;
     HeapTuple   tuple;
     Form_pg_tablespace spcform;
@@ -422,8 +423,8 @@ DropTableSpace(DropTableSpaceStmt *stmt)
                 Anum_pg_tablespace_spcname,
                 BTEqualStrategyNumber, F_NAMEEQ,
                 CStringGetDatum(tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
  
     if (!HeapTupleIsValid(tuple))
     {
@@ -440,7 +441,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
                     (errmsg("tablespace \"%s\" does not exist, skipping",
                             tablespacename)));
             /* XXX I assume I need one or both of these next two calls */
-           heap_endscan(scandesc);
+           table_endscan(scandesc);
             heap_close(rel, NoLock);
         }
         return;
@@ -468,7 +469,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
      */
     CatalogTupleDelete(rel, &tuple->t_self);
  
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
  
     /*
      * Remove any comments or security labels on this tablespace.
@@ -919,7 +920,7 @@ RenameTableSpace(const char *oldname, const char *newname)
     Oid         tspId;
     Relation    rel;
     ScanKeyData entry[1];
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
     HeapTuple   newtuple;
     Form_pg_tablespace newform;
@@ -932,8 +933,8 @@ RenameTableSpace(const char *oldname, const char *newname)
                 Anum_pg_tablespace_spcname,
                 BTEqualStrategyNumber, F_NAMEEQ,
                 CStringGetDatum(oldname));
-   scan = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scan, ForwardScanDirection);
+   scan = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scan, ForwardScanDirection);
     if (!HeapTupleIsValid(tup))
         ereport(ERROR,
                 (errcode(ERRCODE_UNDEFINED_OBJECT),
@@ -944,7 +945,7 @@ RenameTableSpace(const char *oldname, const char *newname)
     newform = (Form_pg_tablespace) GETSTRUCT(newtuple);
     tspId = newform->oid;
  
-   heap_endscan(scan);
+   table_endscan(scan);
  
     /* Must be owner */
     if (!pg_tablespace_ownercheck(tspId, GetUserId()))
@@ -962,15 +963,15 @@ RenameTableSpace(const char *oldname, const char *newname)
                 Anum_pg_tablespace_spcname,
                 BTEqualStrategyNumber, F_NAMEEQ,
                 CStringGetDatum(newname));
-   scan = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scan, ForwardScanDirection);
+   scan = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scan, ForwardScanDirection);
     if (HeapTupleIsValid(tup))
         ereport(ERROR,
                 (errcode(ERRCODE_DUPLICATE_OBJECT),
                  errmsg("tablespace \"%s\" already exists",
                         newname)));
  
-   heap_endscan(scan);
+   table_endscan(scan);
  
     /* OK, update the entry */
     namestrcpy(&(newform->spcname), newname);
@@ -994,7 +995,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
  {
     Relation    rel;
     ScanKeyData entry[1];
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
     HeapTuple   tup;
     Oid         tablespaceoid;
     Datum       datum;
@@ -1012,8 +1013,8 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
                 Anum_pg_tablespace_spcname,
                 BTEqualStrategyNumber, F_NAMEEQ,
                 CStringGetDatum(stmt->tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tup = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tup = heap_scan_getnext(scandesc, ForwardScanDirection);
     if (!HeapTupleIsValid(tup))
         ereport(ERROR,
                 (errcode(ERRCODE_UNDEFINED_OBJECT),
@@ -1054,7 +1055,7 @@ AlterTableSpaceOptions(AlterTableSpaceOptionsStmt *stmt)
     heap_freetuple(newtuple);
  
     /* Conclude heap scan. */
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
     heap_close(rel, NoLock);
  
     return tablespaceoid;
@@ -1388,7 +1389,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
  {
     Oid         result;
     Relation    rel;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
     HeapTuple   tuple;
     ScanKeyData entry[1];
  
@@ -1403,8 +1404,8 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
                 Anum_pg_tablespace_spcname,
                 BTEqualStrategyNumber, F_NAMEEQ,
                 CStringGetDatum(tablespacename));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
  
     /* We assume that there can be at most one matching tuple */
     if (HeapTupleIsValid(tuple))
@@ -1412,7 +1413,7 @@ get_tablespace_oid(const char *tablespacename, bool missing_ok)
     else
         result = InvalidOid;
  
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
     heap_close(rel, AccessShareLock);
  
     if (!OidIsValid(result) && !missing_ok)
@@ -1434,7 +1435,7 @@ get_tablespace_name(Oid spc_oid)
  {
     char       *result;
     Relation    rel;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
     HeapTuple   tuple;
     ScanKeyData entry[1];
  
@@ -1449,8 +1450,8 @@ get_tablespace_name(Oid spc_oid)
                 Anum_pg_tablespace_oid,
                 BTEqualStrategyNumber, F_OIDEQ,
                 ObjectIdGetDatum(spc_oid));
-   scandesc = heap_beginscan_catalog(rel, 1, entry);
-   tuple = heap_getnext(scandesc, ForwardScanDirection);
+   scandesc = table_beginscan_catalog(rel, 1, entry);
+   tuple = heap_scan_getnext(scandesc, ForwardScanDirection);
  
     /* We assume that there can be at most one matching tuple */
     if (HeapTupleIsValid(tuple))
@@ -1458,7 +1459,7 @@ get_tablespace_name(Oid spc_oid)
     else
         result = NULL;
  
-   heap_endscan(scandesc);
+   table_endscan(scandesc);
     heap_close(rel, AccessShareLock);
  
     return result;
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c

index bcdd86ce92f73519e3fcf7d3758d806db2e6966a..6a00a96f59c7869b2c500ccf9c635cfbebf2757f 100644 (file)
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -15,6 +15,7 @@
  
  #include "access/genam.h"
  #include "access/heapam.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/htup_details.h"
  #include "access/xact.h"
@@ -80,16 +81,18 @@ static int  MyTriggerDepth = 0;
  /* Local function prototypes */
  static void ConvertTriggerToFK(CreateTrigStmt *stmt, Oid funcoid);
  static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger);
-static HeapTuple GetTupleForTrigger(EState *estate,
+static bool GetTupleForTrigger(EState *estate,
                    EPQState *epqstate,
                    ResultRelInfo *relinfo,
                    ItemPointer tid,
                    LockTupleMode lockmode,
-                  TupleTableSlot **newSlot);
+                  TupleTableSlot *oldslot,
+                  TupleTableSlot *newslot,
+                  bool *is_epq);
  static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
                Trigger *trigger, TriggerEvent event,
                Bitmapset *modifiedCols,
-              HeapTuple oldtup, HeapTuple newtup);
+              TupleTableSlot *oldslot, TupleTableSlot *newslot);
  static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
                     int tgindx,
                     FmgrInfo *finfo,
@@ -97,7 +100,7 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
                     MemoryContext per_tuple_context);
  static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                       int event, bool row_trigger,
-                     HeapTuple oldtup, HeapTuple newtup,
+                     TupleTableSlot *oldtup, TupleTableSlot *newtup,
                       List *recheckIndexes, Bitmapset *modifiedCols,
                       TransitionCaptureState *transition_capture);
  static void AfterTriggerEnlargeQueryState(void);
@@ -2470,10 +2473,11 @@ ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
     LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -2513,7 +2517,7 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                               false, NULL, NULL, NIL, NULL, transition_capture);
  }
  
-TupleTableSlot *
+bool
  ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                      TupleTableSlot *slot)
  {
@@ -2530,10 +2534,13 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
         TRIGGER_EVENT_ROW |
         TRIGGER_EVENT_BEFORE;
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -2544,65 +2551,54 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                                   TRIGGER_TYPE_INSERT))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, NULL, newtuple))
+                           NULL, NULL, slot))
             continue;
  
+       LocTriggerData.tg_trigslot = slot;
         LocTriggerData.tg_trigtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
+
         LocTriggerData.tg_trigger = trigger;
         newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
                                        relinfo->ri_TrigFunctions,
                                        relinfo->ri_TrigInstrument,
                                        GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
             heap_freetuple(oldtuple);
         if (newtuple == NULL)
         {
             if (should_free)
                 heap_freetuple(slottuple);
-           return NULL;        /* "do nothing" */
+           return false;
+       }
+       if (newtuple != oldtuple)
+       {
+           ExecForceStoreHeapTuple(newtuple, slot);
+           newtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
         }
     }
  
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-
-   if (should_free)
-       heap_freetuple(slottuple);
-   return slot;
+   return true;
  }
  
  void
  ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
-                    HeapTuple trigtuple, List *recheckIndexes,
+                    TupleTableSlot *slot, List *recheckIndexes,
                      TransitionCaptureState *transition_capture)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
  
     if ((trigdesc && trigdesc->trig_insert_after_row) ||
         (transition_capture && transition_capture->tcs_insert_new_table))
+   {
         AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
-                             true, NULL, trigtuple,
+                             true, NULL, slot,
                               recheckIndexes, NULL,
                               transition_capture);
+   }
  }
  
-TupleTableSlot *
+bool
  ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                      TupleTableSlot *slot)
  {
@@ -2619,10 +2615,13 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
         TRIGGER_EVENT_ROW |
         TRIGGER_EVENT_INSTEAD;
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -2633,47 +2632,33 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
                                   TRIGGER_TYPE_INSERT))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, NULL, newtuple))
+                           NULL, NULL, slot))
             continue;
  
+       LocTriggerData.tg_trigslot = slot;
         LocTriggerData.tg_trigtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
         LocTriggerData.tg_trigger = trigger;
         newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
                                        relinfo->ri_TrigFunctions,
                                        relinfo->ri_TrigInstrument,
                                        GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
             heap_freetuple(oldtuple);
         if (newtuple == NULL)
         {
             if (should_free)
                 heap_freetuple(slottuple);
-           return NULL;        /* "do nothing" */
+           return false;       /* "do nothing" */
+       }
+       if (oldtuple != newtuple)
+       {
+           ExecForceStoreHeapTuple(newtuple, LocTriggerData.tg_trigslot);
+           newtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
         }
     }
  
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-
-   if (should_free)
-       heap_freetuple(slottuple);
-   return slot;
+   return true;
  }
  
  void
@@ -2701,10 +2686,11 @@ ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
     LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -2758,20 +2744,20 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
                      HeapTuple fdw_trigtuple,
                      TupleTableSlot **epqslot)
  {
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
     bool        result = true;
     TriggerData LocTriggerData;
     HeapTuple   trigtuple;
-   HeapTuple   newtuple;
-   TupleTableSlot *newSlot;
     int         i;
  
     Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
     if (fdw_trigtuple == NULL)
     {
-       trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                                      LockTupleExclusive, &newSlot);
-       if (trigtuple == NULL)
+       bool is_epqtuple;
+
+       if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+                               LockTupleExclusive, slot, NULL, &is_epqtuple))
             return false;
  
         /*
@@ -2779,27 +2765,36 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
          * function requested for the updated tuple, skip the trigger
          * execution.
          */
-       if (newSlot != NULL && epqslot != NULL)
+       if (is_epqtuple && epqslot != NULL)
         {
-           *epqslot = newSlot;
-           heap_freetuple(trigtuple);
+           *epqslot = slot;
             return false;
         }
+
+       trigtuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+
     }
     else
+   {
         trigtuple = fdw_trigtuple;
+       ExecForceStoreHeapTuple(trigtuple, slot);
+   }
  
     LocTriggerData.type = T_TriggerData;
     LocTriggerData.tg_event = TRIGGER_EVENT_DELETE |
         TRIGGER_EVENT_ROW |
         TRIGGER_EVENT_BEFORE;
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
+       HeapTuple   newtuple;
         Trigger    *trigger = &trigdesc->triggers[i];
  
         if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
@@ -2808,11 +2803,11 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
                                   TRIGGER_TYPE_DELETE))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, NULL))
+                           NULL, slot, NULL))
             continue;
  
+       LocTriggerData.tg_trigslot = slot;
         LocTriggerData.tg_trigtuple = trigtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
         LocTriggerData.tg_trigger = trigger;
         newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
@@ -2824,10 +2819,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
             result = false;     /* tell caller to suppress delete */
             break;
         }
-       if (newtuple != trigtuple)
+       if (false && newtuple != trigtuple)
             heap_freetuple(newtuple);
     }
-   if (trigtuple != fdw_trigtuple)
+   if (false && trigtuple != fdw_trigtuple)
         heap_freetuple(trigtuple);
  
     return result;
@@ -2840,28 +2835,31 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                      TransitionCaptureState *transition_capture)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
  
     if ((trigdesc && trigdesc->trig_delete_after_row) ||
         (transition_capture && transition_capture->tcs_delete_old_table))
     {
-       HeapTuple   trigtuple;
-
         Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
         if (fdw_trigtuple == NULL)
-           trigtuple = GetTupleForTrigger(estate,
-                                          NULL,
-                                          relinfo,
-                                          tupleid,
-                                          LockTupleExclusive,
-                                          NULL);
+       {
+           GetTupleForTrigger(estate,
+                              NULL,
+                              relinfo,
+                              tupleid,
+                              LockTupleExclusive,
+                              slot,
+                              NULL,
+                              NULL);
+       }
         else
-           trigtuple = fdw_trigtuple;
+       {
+           ExecForceStoreHeapTuple(fdw_trigtuple, slot);
+       }
  
         AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
-                             true, trigtuple, NULL, NIL, NULL,
+                             true, slot, NULL, NIL, NULL,
                               transition_capture);
-       if (trigtuple != fdw_trigtuple)
-           heap_freetuple(trigtuple);
     }
  }
  
@@ -2870,8 +2868,8 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                      HeapTuple trigtuple)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *slot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
     TriggerData LocTriggerData;
-   HeapTuple   rettuple;
     int         i;
  
     LocTriggerData.type = T_TriggerData;
@@ -2879,12 +2877,18 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
         TRIGGER_EVENT_ROW |
         TRIGGER_EVENT_INSTEAD;
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+   LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
+   ExecForceStoreHeapTuple(trigtuple, slot);
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
+       HeapTuple   rettuple;
         Trigger    *trigger = &trigdesc->triggers[i];
  
         if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
@@ -2893,11 +2897,11 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                                   TRIGGER_TYPE_DELETE))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, NULL))
+                           NULL, slot, NULL))
             continue;
  
+       LocTriggerData.tg_trigslot = slot;
         LocTriggerData.tg_trigtuple = trigtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
         LocTriggerData.tg_trigger = trigger;
         rettuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
@@ -2906,7 +2910,7 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
                                        GetPerTupleMemoryContext(estate));
         if (rettuple == NULL)
             return false;       /* Delete was suppressed */
-       if (rettuple != trigtuple)
+       if (false && rettuple != trigtuple)
             heap_freetuple(rettuple);
     }
     return true;
@@ -2940,10 +2944,11 @@ ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
     LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -2985,20 +2990,19 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                               transition_capture);
  }
  
-TupleTableSlot *
+bool
  ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
                      ResultRelInfo *relinfo,
                      ItemPointer tupleid,
                      HeapTuple fdw_trigtuple,
-                    TupleTableSlot *slot)
+                    TupleTableSlot *newslot)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
-   HeapTuple   slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+   HeapTuple   slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
     HeapTuple   newtuple = slottuple;
     TriggerData LocTriggerData;
     HeapTuple   trigtuple;
-   HeapTuple   oldtuple;
-   TupleTableSlot *newSlot;
     int         i;
     Bitmapset  *updatedCols;
     LockTupleMode lockmode;
@@ -3009,37 +3013,41 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
     Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
     if (fdw_trigtuple == NULL)
     {
+       bool        is_epqtuple = false;
+
         /* get a copy of the on-disk tuple we are planning to update */
-       trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                                      lockmode, &newSlot);
-       if (trigtuple == NULL)
-           return NULL;        /* cancel the update action */
+       if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
+                               lockmode, oldslot, newslot, &is_epqtuple))
+           return false;       /* cancel the update action */
+
+       /*
+        * In READ COMMITTED isolation level it's possible that target tuple was
+        * changed due to concurrent update.  In that case we have a raw subplan
+        * output tuple in newSlot, and need to run it through the junk filter to
+        * produce an insertable tuple.
+        *
+        * Caution: more than likely, the passed-in slot is the same as the
+        * junkfilter's output slot, so we are clobbering the original value of
+        * slottuple by doing the filtering.  This is OK since neither we nor our
+        * caller have any more interest in the prior contents of that slot.
+        */
+       if (is_epqtuple)
+       {
+           TupleTableSlot *slot = ExecFilterJunk(relinfo->ri_junkFilter, newslot);
+
+           ExecCopySlot(newslot, slot);
+           slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
+           newtuple = slottuple;
+       }
+
+       trigtuple = ExecFetchSlotHeapTuple(oldslot, true, NULL);
     }
     else
     {
+       ExecForceStoreHeapTuple(fdw_trigtuple, oldslot);
         trigtuple = fdw_trigtuple;
-       newSlot = NULL;
-   }
-
-   /*
-    * In READ COMMITTED isolation level it's possible that target tuple was
-    * changed due to concurrent update.  In that case we have a raw subplan
-    * output tuple in newSlot, and need to run it through the junk filter to
-    * produce an insertable tuple.
-    *
-    * Caution: more than likely, the passed-in slot is the same as the
-    * junkfilter's output slot, so we are clobbering the original value of
-    * slottuple by doing the filtering.  This is OK since neither we nor our
-    * caller have any more interest in the prior contents of that slot.
-    */
-   if (newSlot != NULL)
-   {
-       slot = ExecFilterJunk(relinfo->ri_junkFilter, newSlot);
-       slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-       newtuple = slottuple;
     }
  
-
     LocTriggerData.type = T_TriggerData;
     LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
         TRIGGER_EVENT_ROW |
@@ -3051,6 +3059,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
+       HeapTuple   oldtuple;
  
         if (!TRIGGER_TYPE_MATCHES(trigger->tgtype,
                                   TRIGGER_TYPE_ROW,
@@ -3058,67 +3067,55 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
                                   TRIGGER_TYPE_UPDATE))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           updatedCols, trigtuple, newtuple))
+                           updatedCols, oldslot, newslot))
             continue;
  
+       LocTriggerData.tg_trigslot = oldslot;
         LocTriggerData.tg_trigtuple = trigtuple;
         LocTriggerData.tg_newtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-       LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+       LocTriggerData.tg_newslot = newslot;
         LocTriggerData.tg_trigger = trigger;
         newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
                                        relinfo->ri_TrigFunctions,
                                        relinfo->ri_TrigInstrument,
                                        GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
             heap_freetuple(oldtuple);
         if (newtuple == NULL)
         {
-           if (trigtuple != fdw_trigtuple)
+           if (false && trigtuple != fdw_trigtuple)
                 heap_freetuple(trigtuple);
-           return NULL;        /* "do nothing" */
+           return false;       /* "do nothing" */
         }
+
+       if (newtuple != oldtuple)
+           ExecForceStoreHeapTuple(newtuple, newslot);
     }
-   if (trigtuple != fdw_trigtuple && trigtuple != newtuple)
+   if (false && trigtuple != fdw_trigtuple && trigtuple != newtuple)
         heap_freetuple(trigtuple);
  
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
-
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
-   }
-   return slot;
+   return true;
  }
  
  void
  ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                      ItemPointer tupleid,
                      HeapTuple fdw_trigtuple,
-                    HeapTuple newtuple,
+                    TupleTableSlot *newslot,
                      List *recheckIndexes,
                      TransitionCaptureState *transition_capture)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+
+   ExecClearTuple(oldslot);
  
     if ((trigdesc && trigdesc->trig_update_after_row) ||
         (transition_capture &&
          (transition_capture->tcs_update_old_table ||
           transition_capture->tcs_update_new_table)))
     {
-       HeapTuple   trigtuple;
-
         /*
          * Note: if the UPDATE is converted into a DELETE+INSERT as part of
          * update-partition-key operation, then this function is also called
@@ -3126,30 +3123,31 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
          * In such case, either old tuple or new tuple can be NULL.
          */
         if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
-           trigtuple = GetTupleForTrigger(estate,
-                                          NULL,
-                                          relinfo,
-                                          tupleid,
-                                          LockTupleExclusive,
-                                          NULL);
-       else
-           trigtuple = fdw_trigtuple;
+           GetTupleForTrigger(estate,
+                              NULL,
+                              relinfo,
+                              tupleid,
+                              LockTupleExclusive,
+                              oldslot,
+                              NULL,
+                              NULL);
+       else if (fdw_trigtuple != NULL)
+           ExecForceStoreHeapTuple(fdw_trigtuple, oldslot);
  
         AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
-                             true, trigtuple, newtuple, recheckIndexes,
+                             true, oldslot, newslot, recheckIndexes,
                               GetUpdatedColumns(relinfo, estate),
                               transition_capture);
-       if (trigtuple != fdw_trigtuple)
-           heap_freetuple(trigtuple);
     }
  }
  
-TupleTableSlot *
+bool
  ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
-                    HeapTuple trigtuple, TupleTableSlot *slot)
+                    HeapTuple trigtuple, TupleTableSlot *newslot)
  {
     TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
-   HeapTuple   slottuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   TupleTableSlot *oldslot = ExecTriggerGetOldSlot(estate, relinfo->ri_RelationDesc);
+   HeapTuple   slottuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
     HeapTuple   newtuple = slottuple;
     TriggerData LocTriggerData;
     HeapTuple   oldtuple;
@@ -3162,6 +3160,9 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
+
+   ExecForceStoreHeapTuple(trigtuple, oldslot);
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -3172,42 +3173,30 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                                   TRIGGER_TYPE_UPDATE))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, LocTriggerData.tg_event,
-                           NULL, trigtuple, newtuple))
+                           NULL, oldslot, newslot))
             continue;
  
+       LocTriggerData.tg_trigslot = oldslot;
         LocTriggerData.tg_trigtuple = trigtuple;
+       LocTriggerData.tg_newslot = newslot;
         LocTriggerData.tg_newtuple = oldtuple = newtuple;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-       LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
         LocTriggerData.tg_trigger = trigger;
         newtuple = ExecCallTriggerFunc(&LocTriggerData,
                                        i,
                                        relinfo->ri_TrigFunctions,
                                        relinfo->ri_TrigInstrument,
                                        GetPerTupleMemoryContext(estate));
-       if (oldtuple != newtuple && oldtuple != slottuple)
+       if (false && oldtuple != newtuple && oldtuple != slottuple)
             heap_freetuple(oldtuple);
         if (newtuple == NULL)
-           return NULL;        /* "do nothing" */
-   }
-
-   if (newtuple != slottuple)
-   {
-       /*
-        * Return the modified tuple using the es_trig_tuple_slot.  We assume
-        * the tuple was allocated in per-tuple memory context, and therefore
-        * will go away by itself. The tuple table slot should not try to
-        * clear it.
-        */
-       TupleTableSlot *newslot = estate->es_trig_tuple_slot;
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
+           return false;       /* "do nothing" */
  
-       if (newslot->tts_tupleDescriptor != tupdesc)
-           ExecSetSlotDescriptor(newslot, tupdesc);
-       ExecStoreHeapTuple(newtuple, newslot, false);
-       slot = newslot;
+       if (oldtuple != newtuple)
+           ExecForceStoreHeapTuple(newtuple, newslot);
     }
-   return slot;
+
+   return true;
  }
  
  void
@@ -3230,10 +3219,11 @@ ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
     LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
     LocTriggerData.tg_trigtuple = NULL;
     LocTriggerData.tg_newtuple = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
     LocTriggerData.tg_oldtable = NULL;
     LocTriggerData.tg_newtable = NULL;
-   LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-   LocTriggerData.tg_newtuplebuf = InvalidBuffer;
+
     for (i = 0; i < trigdesc->numtriggers; i++)
     {
         Trigger    *trigger = &trigdesc->triggers[i];
@@ -3273,25 +3263,24 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
  }
  
  
-static HeapTuple
+static bool
  GetTupleForTrigger(EState *estate,
                    EPQState *epqstate,
                    ResultRelInfo *relinfo,
                    ItemPointer tid,
                    LockTupleMode lockmode,
-                  TupleTableSlot **newSlot)
+                  TupleTableSlot *oldslot,
+                  TupleTableSlot *newslot,
+                  bool *is_epqtuple)
  {
     Relation    relation = relinfo->ri_RelationDesc;
-   HeapTupleData tuple;
-   HeapTuple   result;
-   Buffer      buffer;
  
-   if (newSlot != NULL)
+   if (is_epqtuple)
     {
         HTSU_Result test;
         HeapUpdateFailureData hufd;
  
-       *newSlot = NULL;
+       *is_epqtuple = false;
  
         /* caller must pass an epqstate if EvalPlanQual is possible */
         Assert(epqstate != NULL);
@@ -3299,12 +3288,13 @@ GetTupleForTrigger(EState *estate,
         /*
          * lock tuple for update
          */
-ltrmark:;
-       tuple.t_self = *tid;
-       test = heap_lock_tuple(relation, &tuple,
-                              estate->es_output_cid,
-                              lockmode, LockWaitBlock,
-                              false, &buffer, &hufd);
+       test = table_lock_tuple(relation, tid, estate->es_snapshot, oldslot,
+                               estate->es_output_cid,
+                               lockmode, LockWaitBlock,
+                               IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                               &hufd);
+
+       // FIXME: result = tuple;
         switch (test)
         {
             case HeapTupleSelfUpdated:
@@ -3324,103 +3314,71 @@ ltrmark:;
                              errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
  
                 /* treat it as deleted; do not process */
-               ReleaseBuffer(buffer);
-               return NULL;
+               return false;
  
             case HeapTupleMayBeUpdated:
-               break;
-
-           case HeapTupleUpdated:
-               ReleaseBuffer(buffer);
-               if (IsolationUsesXactSnapshot())
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
  
-               if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
+               if (hufd.traversed)
                 {
-                   /* it was updated, so look at the updated version */
+                   TupleTableSlot *testslot;
                     TupleTableSlot *epqslot;
  
+                   EvalPlanQualBegin(epqstate, estate);
+
+                   testslot = EvalPlanQualSlot(epqstate, relation, relinfo->ri_RangeTableIndex);
+                   ExecCopySlot(testslot, oldslot);
+
                     epqslot = EvalPlanQual(estate,
                                            epqstate,
                                            relation,
                                            relinfo->ri_RangeTableIndex,
-                                          lockmode,
-                                          &hufd.ctid,
-                                          hufd.xmax);
-                   if (!TupIsNull(epqslot))
-                   {
-                       *tid = hufd.ctid;
-                       *newSlot = epqslot;
-
-                       /*
-                        * EvalPlanQual already locked the tuple, but we
-                        * re-call heap_lock_tuple anyway as an easy way of
-                        * re-fetching the correct tuple.  Speed is hardly a
-                        * criterion in this path anyhow.
-                        */
-                       goto ltrmark;
-                   }
+                                          testslot);
+
+                   /* If PlanQual failed for updated tuple - we must not process this tuple!*/
+                   if (TupIsNull(epqslot))
+                       return false;
+
+                   if (newslot)
+                       ExecCopySlot(newslot, epqslot);
+                   else
+                       ExecCopySlot(oldslot, epqslot);
+
+                   *is_epqtuple = true;
                 }
+               break;
  
-               /*
-                * if tuple was deleted or PlanQual failed for updated tuple -
-                * we must not process this tuple!
-                */
-               return NULL;
+           case HeapTupleUpdated:
+               if (IsolationUsesXactSnapshot())
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("could not serialize access due to concurrent update")));
+               elog(ERROR, "wrong heap_lock_tuple status: %u", test);
+               break;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
+                   ereport(ERROR,
+                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                            errmsg("could not serialize access due to concurrent update")));
+               /* tuple was deleted */
+               return false;
  
             case HeapTupleInvisible:
                 elog(ERROR, "attempted to lock invisible tuple");
                 break;
  
             default:
-               ReleaseBuffer(buffer);
                 elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
-               return NULL;    /* keep compiler quiet */
+               return false;   /* keep compiler quiet */
         }
     }
     else
     {
-       Page        page;
-       ItemId      lp;
-
-       buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
-
-       /*
-        * Although we already know this tuple is valid, we must lock the
-        * buffer to ensure that no one has a buffer cleanup lock; otherwise
-        * they might move the tuple while we try to copy it.  But we can
-        * release the lock before actually doing the heap_copytuple call,
-        * since holding pin is sufficient to prevent anyone from getting a
-        * cleanup lock they don't already hold.
-        */
-       LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
-       page = BufferGetPage(buffer);
-       lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
-
-       Assert(ItemIdIsNormal(lp));
-
-       tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp);
-       tuple.t_len = ItemIdGetLength(lp);
-       tuple.t_self = *tid;
-       tuple.t_tableOid = RelationGetRelid(relation);
-
-       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+       if (!table_fetch_row_version(relation, tid, SnapshotAny, oldslot, NULL))
+           elog(ERROR, "couldn't fetch tuple");
     }
  
-   if (HeapTupleHeaderGetNatts(tuple.t_data) < relation->rd_att->natts)
-       result = heap_expand_tuple(&tuple, relation->rd_att);
-   else
-       result = heap_copytuple(&tuple);
-   ReleaseBuffer(buffer);
-
-   return result;
+   return true;
  }
  
  /*
@@ -3430,7 +3388,7 @@ static bool
  TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
                Trigger *trigger, TriggerEvent event,
                Bitmapset *modifiedCols,
-              HeapTuple oldtup, HeapTuple newtup)
+              TupleTableSlot *oldslot, TupleTableSlot *newslot)
  {
     /* Check replication-role-dependent enable state */
     if (SessionReplicationRole == SESSION_REPLICATION_ROLE_REPLICA)
@@ -3472,11 +3430,8 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
     /* Check for WHEN clause */
     if (trigger->tgqual)
     {
-       TupleDesc   tupdesc = RelationGetDescr(relinfo->ri_RelationDesc);
         ExprState **predicate;
         ExprContext *econtext;
-       TupleTableSlot *oldslot = NULL;
-       TupleTableSlot *newslot = NULL;
         MemoryContext oldContext;
         int         i;
  
@@ -3515,40 +3470,6 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
          */
         econtext = GetPerTupleExprContext(estate);
  
-       /*
-        * Put OLD and NEW tuples into tupleslots for expression evaluation.
-        * These slots can be shared across the whole estate, but be careful
-        * that they have the current resultrel's tupdesc.
-        */
-       if (HeapTupleIsValid(oldtup))
-       {
-           if (estate->es_trig_oldtup_slot == NULL)
-           {
-               oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
-               estate->es_trig_oldtup_slot =
-                   ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple);
-               MemoryContextSwitchTo(oldContext);
-           }
-           oldslot = estate->es_trig_oldtup_slot;
-           if (oldslot->tts_tupleDescriptor != tupdesc)
-               ExecSetSlotDescriptor(oldslot, tupdesc);
-           ExecStoreHeapTuple(oldtup, oldslot, false);
-       }
-       if (HeapTupleIsValid(newtup))
-       {
-           if (estate->es_trig_newtup_slot == NULL)
-           {
-               oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
-               estate->es_trig_newtup_slot =
-                   ExecInitExtraTupleSlot(estate, NULL, &TTSOpsHeapTuple);
-               MemoryContextSwitchTo(oldContext);
-           }
-           newslot = estate->es_trig_newtup_slot;
-           if (newslot->tts_tupleDescriptor != tupdesc)
-               ExecSetSlotDescriptor(newslot, tupdesc);
-           ExecStoreHeapTuple(newtup, newslot, false);
-       }
-
         /*
          * Finally evaluate the expression, making the old and/or new tuples
          * available as INNER_VAR/OUTER_VAR respectively.
@@ -3882,7 +3803,8 @@ struct AfterTriggersTableData
  
  static AfterTriggersData afterTriggers;
  
-static void AfterTriggerExecute(AfterTriggerEvent event,
+static void AfterTriggerExecute(EState *estate,
+                   AfterTriggerEvent event,
                     Relation rel, TriggerDesc *trigdesc,
                     FmgrInfo *finfo,
                     Instrumentation *instr,
@@ -4217,7 +4139,8 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
   * ----------
   */
  static void
-AfterTriggerExecute(AfterTriggerEvent event,
+AfterTriggerExecute(EState *estate,
+                   AfterTriggerEvent event,
                     Relation rel, TriggerDesc *trigdesc,
                     FmgrInfo *finfo, Instrumentation *instr,
                     MemoryContext per_tuple_context,
@@ -4227,17 +4150,16 @@ AfterTriggerExecute(AfterTriggerEvent event,
     AfterTriggerShared evtshared = GetTriggerSharedData(event);
     Oid         tgoid = evtshared->ats_tgoid;
     TriggerData LocTriggerData;
-   HeapTupleData tuple1;
-   HeapTupleData tuple2;
     HeapTuple   rettuple;
-   Buffer      buffer1 = InvalidBuffer;
-   Buffer      buffer2 = InvalidBuffer;
     int         tgindx;
  
     /*
      * Locate trigger in trigdesc.
      */
     LocTriggerData.tg_trigger = NULL;
+   LocTriggerData.tg_trigslot = NULL;
+   LocTriggerData.tg_newslot = NULL;
+
     for (tgindx = 0; tgindx < trigdesc->numtriggers; tgindx++)
     {
         if (trigdesc->triggers[tgindx].tgoid == tgoid)
@@ -4287,31 +4209,31 @@ AfterTriggerExecute(AfterTriggerEvent event,
              * that is stored as a heap tuple, constructed in different memory
              * context, in the slot anyway.
              */
-           LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(trig_tuple_slot1,
-                                                                   true, NULL);
-           LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
+           LocTriggerData.tg_trigslot = trig_tuple_slot1; // FIXME
+           LocTriggerData.tg_trigtuple =
+               ExecFetchSlotHeapTuple(trig_tuple_slot1, true, NULL);
  
+           LocTriggerData.tg_newslot = trig_tuple_slot2; // FIXME
             LocTriggerData.tg_newtuple =
                 ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) ==
                  TRIGGER_EVENT_UPDATE) ?
                 ExecFetchSlotHeapTuple(trig_tuple_slot2, true, NULL) : NULL;
-           LocTriggerData.tg_newtuplebuf = InvalidBuffer;
  
             break;
  
         default:
+
             if (ItemPointerIsValid(&(event->ate_ctid1)))
             {
-               ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL))
+               LocTriggerData.tg_trigslot = ExecTriggerGetOldSlot(estate, rel);
+               if (!table_fetch_row_version(rel, &(event->ate_ctid1), SnapshotAny, LocTriggerData.tg_trigslot, NULL))
                     elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
-               LocTriggerData.tg_trigtuple = &tuple1;
-               LocTriggerData.tg_trigtuplebuf = buffer1;
+               LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, NULL);
+
             }
             else
             {
                 LocTriggerData.tg_trigtuple = NULL;
-               LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
             }
  
             /* don't touch ctid2 if not there */
@@ -4319,16 +4241,14 @@ AfterTriggerExecute(AfterTriggerEvent event,
                 AFTER_TRIGGER_2CTID &&
                 ItemPointerIsValid(&(event->ate_ctid2)))
             {
-               ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL))
+               LocTriggerData.tg_newslot = ExecTriggerGetNewSlot(estate, rel);
+               if (!table_fetch_row_version(rel, &(event->ate_ctid2), SnapshotAny, LocTriggerData.tg_newslot, NULL))
                     elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
-               LocTriggerData.tg_newtuple = &tuple2;
-               LocTriggerData.tg_newtuplebuf = buffer2;
+               LocTriggerData.tg_newtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, NULL);
             }
             else
             {
                 LocTriggerData.tg_newtuple = NULL;
-               LocTriggerData.tg_newtuplebuf = InvalidBuffer;
             }
     }
  
@@ -4380,12 +4300,12 @@ AfterTriggerExecute(AfterTriggerEvent event,
         heap_freetuple(rettuple);
  
     /*
-    * Release buffers
+    * Release resources
      */
-   if (buffer1 != InvalidBuffer)
-       ReleaseBuffer(buffer1);
-   if (buffer2 != InvalidBuffer)
-       ReleaseBuffer(buffer2);
+   if (LocTriggerData.tg_trigslot)
+       ExecClearTuple(LocTriggerData.tg_trigslot);
+   if (LocTriggerData.tg_newslot)
+       ExecClearTuple(LocTriggerData.tg_newslot);
  
     /*
      * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
@@ -4552,6 +4472,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                         slot2 = MakeSingleTupleTableSlot(rel->rd_att,
                                                          &TTSOpsMinimalTuple);
                     }
+
                     if (trigdesc == NULL)   /* should not happen */
                         elog(ERROR, "relation %u has no triggers",
                              evtshared->ats_relid);
@@ -4562,7 +4483,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                  * still set, so recursive examinations of the event list
                  * won't try to re-fire it.
                  */
-               AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
+               AfterTriggerExecute(estate, event, rel, trigdesc, finfo, instr,
                                     per_tuple_context, slot1, slot2);
  
                 /*
@@ -4606,6 +4527,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
     if (local_estate)
     {
         ExecCleanUpTriggerState(estate);
+       ExecResetTupleTable(estate->es_tupleTable, false);
         FreeExecutorState(estate);
     }
  
@@ -5743,7 +5665,7 @@ AfterTriggerPendingOnRel(Oid relid)
  static void
  AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                       int event, bool row_trigger,
-                     HeapTuple oldtup, HeapTuple newtup,
+                     TupleTableSlot *oldslot, TupleTableSlot *newslot,
                       List *recheckIndexes, Bitmapset *modifiedCols,
                       TransitionCaptureState *transition_capture)
  {
@@ -5792,11 +5714,11 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
          * deleted.
          */
         Assert(!(event == TRIGGER_EVENT_DELETE && delete_old_table &&
-                oldtup == NULL));
+                TupIsNull(oldslot)));
         Assert(!(event == TRIGGER_EVENT_INSERT && insert_new_table &&
-                newtup == NULL));
+                TupIsNull(newslot)));
  
-       if (oldtup != NULL &&
+       if (!TupIsNull(oldslot) &&
             ((event == TRIGGER_EVENT_DELETE && delete_old_table) ||
              (event == TRIGGER_EVENT_UPDATE && update_old_table)))
         {
@@ -5806,15 +5728,17 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
  
             if (map != NULL)
             {
-               HeapTuple   converted = execute_attr_map_tuple(oldtup, map);
-
+               HeapTuple   converted;
+               // PBORKED
+               converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(oldslot, true, NULL),
+                                                  map);
                 tuplestore_puttuple(old_tuplestore, converted);
                 pfree(converted);
             }
             else
-               tuplestore_puttuple(old_tuplestore, oldtup);
+               tuplestore_puttupleslot(old_tuplestore, oldslot);
         }
-       if (newtup != NULL &&
+       if (!TupIsNull(newslot) &&
             ((event == TRIGGER_EVENT_INSERT && insert_new_table) ||
              (event == TRIGGER_EVENT_UPDATE && update_new_table)))
         {
@@ -5826,13 +5750,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                 tuplestore_puttuple(new_tuplestore, original_insert_tuple);
             else if (map != NULL)
             {
-               HeapTuple   converted = execute_attr_map_tuple(newtup, map);
+               HeapTuple   converted;
  
+               converted = execute_attr_map_tuple(ExecFetchSlotHeapTuple(newslot, true, NULL),
+                                                  map);
                 tuplestore_puttuple(new_tuplestore, converted);
                 pfree(converted);
             }
             else
-               tuplestore_puttuple(new_tuplestore, newtup);
+               tuplestore_puttupleslot(new_tuplestore, newslot);
         }
  
         /*
@@ -5846,7 +5772,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
             (event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) ||
             (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) ||
             (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row) ||
-           (event == TRIGGER_EVENT_UPDATE && ((oldtup == NULL) ^ (newtup == NULL))))
+           (event == TRIGGER_EVENT_UPDATE && (TupIsNull(oldslot) ^ TupIsNull(newslot))))
             return;
     }
  
@@ -5868,15 +5794,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
             tgtype_event = TRIGGER_TYPE_INSERT;
             if (row_trigger)
             {
-               Assert(oldtup == NULL);
-               Assert(newtup != NULL);
-               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1));
+               Assert(oldslot == NULL);
+               Assert(newslot != NULL);
+               ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1));
                 ItemPointerSetInvalid(&(new_event.ate_ctid2));
             }
             else
             {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                 ItemPointerSetInvalid(&(new_event.ate_ctid1));
                 ItemPointerSetInvalid(&(new_event.ate_ctid2));
                 cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5887,15 +5813,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
             tgtype_event = TRIGGER_TYPE_DELETE;
             if (row_trigger)
             {
-               Assert(oldtup != NULL);
-               Assert(newtup == NULL);
-               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
+               Assert(oldslot != NULL);
+               Assert(newslot == NULL);
+               ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
                 ItemPointerSetInvalid(&(new_event.ate_ctid2));
             }
             else
             {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                 ItemPointerSetInvalid(&(new_event.ate_ctid1));
                 ItemPointerSetInvalid(&(new_event.ate_ctid2));
                 cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5906,15 +5832,15 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
             tgtype_event = TRIGGER_TYPE_UPDATE;
             if (row_trigger)
             {
-               Assert(oldtup != NULL);
-               Assert(newtup != NULL);
-               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
-               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2));
+               Assert(oldslot != NULL);
+               Assert(newslot != NULL);
+               ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
+               ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
             }
             else
             {
-               Assert(oldtup == NULL);
-               Assert(newtup == NULL);
+               Assert(oldslot == NULL);
+               Assert(newslot == NULL);
                 ItemPointerSetInvalid(&(new_event.ate_ctid1));
                 ItemPointerSetInvalid(&(new_event.ate_ctid2));
                 cancel_prior_stmt_triggers(RelationGetRelid(rel),
@@ -5923,8 +5849,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
             break;
         case TRIGGER_EVENT_TRUNCATE:
             tgtype_event = TRIGGER_TYPE_TRUNCATE;
-           Assert(oldtup == NULL);
-           Assert(newtup == NULL);
+           Assert(oldslot == NULL);
+           Assert(newslot == NULL);
             ItemPointerSetInvalid(&(new_event.ate_ctid1));
             ItemPointerSetInvalid(&(new_event.ate_ctid2));
             break;
@@ -5951,7 +5877,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                                   tgtype_event))
             continue;
         if (!TriggerEnabled(estate, relinfo, trigger, event,
-                           modifiedCols, oldtup, newtup))
+                           modifiedCols, oldslot, newslot))
             continue;
  
         if (relkind == RELKIND_FOREIGN_TABLE && row_trigger)
@@ -5978,7 +5904,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                 case RI_TRIGGER_PK:
                     /* Update or delete on trigger's PK table */
                     if (!RI_FKey_pk_upd_check_required(trigger, rel,
-                                                      oldtup, newtup))
+                                                      oldslot, newslot))
                     {
                         /* skip queuing this event */
                         continue;
@@ -5988,7 +5914,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
                 case RI_TRIGGER_FK:
                     /* Update on trigger's FK table */
                     if (!RI_FKey_fk_upd_check_required(trigger, rel,
-                                                      oldtup, newtup))
+                                                      oldslot, newslot))
                     {
                         /* skip queuing this event */
                         continue;
@@ -6042,10 +5968,10 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
      */
     if (fdw_tuplestore)
     {
-       if (oldtup != NULL)
-           tuplestore_puttuple(fdw_tuplestore, oldtup);
-       if (newtup != NULL)
-           tuplestore_puttuple(fdw_tuplestore, newtup);
+       if (oldslot != NULL)
+           tuplestore_puttupleslot(fdw_tuplestore, oldslot);
+       if (newslot != NULL)
+           tuplestore_puttupleslot(fdw_tuplestore, newslot);
     }
  }
  
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c

index 1ffc8231d467a7df86b223c1a6c02ffc439ad4c0..24e8f0c279c55fa194e8c6dec38baf138ef96090 100644 (file)
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -32,6 +32,7 @@
  #include "postgres.h"
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "catalog/binary_upgrade.h"
  #include "catalog/catalog.h"
@@ -2358,14 +2359,16 @@ AlterDomainNotNull(List *names, bool notNull)
             RelToCheck *rtc = (RelToCheck *) lfirst(rt);
             Relation    testrel = rtc->rel;
             TupleDesc   tupdesc = RelationGetDescr(testrel);
-           HeapScanDesc scan;
-           HeapTuple   tuple;
+           TableScanDesc scan;
+           TupleTableSlot *slot;
             Snapshot    snapshot;
  
             /* Scan all tuples in this relation */
             snapshot = RegisterSnapshot(GetLatestSnapshot());
-           scan = heap_beginscan(testrel, snapshot, 0, NULL);
-           while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+           scan = table_beginscan(testrel, snapshot, 0, NULL);
+           slot = table_gimmegimmeslot(testrel, NULL);
+
+           while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
             {
                 int         i;
  
@@ -2375,7 +2378,7 @@ AlterDomainNotNull(List *names, bool notNull)
                     int         attnum = rtc->atts[i];
                     Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
  
-                   if (heap_attisnull(tuple, attnum, tupdesc))
+                   if (slot_attisnull(slot, attnum))
                     {
                         /*
                          * In principle the auxiliary information for this
@@ -2394,7 +2397,9 @@ AlterDomainNotNull(List *names, bool notNull)
                     }
                 }
             }
-           heap_endscan(scan);
+
+           ExecDropSingleTupleTableSlot(slot);
+           table_endscan(scan);
             UnregisterSnapshot(snapshot);
  
             /* Close each rel after processing, but keep lock */
@@ -2757,14 +2762,16 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
         RelToCheck *rtc = (RelToCheck *) lfirst(rt);
         Relation    testrel = rtc->rel;
         TupleDesc   tupdesc = RelationGetDescr(testrel);
-       HeapScanDesc scan;
-       HeapTuple   tuple;
+       TableScanDesc scan;
+       TupleTableSlot *slot;
         Snapshot    snapshot;
  
         /* Scan all tuples in this relation */
         snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(testrel, snapshot, 0, NULL);
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       scan = table_beginscan(testrel, snapshot, 0, NULL);
+       slot = table_gimmegimmeslot(testrel, NULL);
+
+       while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
         {
             int         i;
  
@@ -2777,7 +2784,7 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
                 Datum       conResult;
                 Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum - 1);
  
-               d = heap_getattr(tuple, attnum, tupdesc, &isNull);
+               d = slot_getattr(slot, attnum, &isNull);
  
                 econtext->domainValue_datum = d;
                 econtext->domainValue_isNull = isNull;
@@ -2807,7 +2814,9 @@ validateDomainConstraint(Oid domainoid, char *ccbin)
  
             ResetExprContext(econtext);
         }
-       heap_endscan(scan);
+
+       ExecDropSingleTupleTableSlot(slot);
+       table_endscan(scan);
         UnregisterSnapshot(snapshot);
  
         /* Hold relation lock till commit (XXX bad for concurrency) */
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 15eec19418c8e5358155f3317260ff8e0ac0474b..fcae282044b2567500dba943ca17ef76db4727a7 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -28,6 +28,7 @@
  #include "access/heapam.h"
  #include "access/htup_details.h"
  #include "access/multixact.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "catalog/namespace.h"
@@ -746,14 +747,14 @@ get_all_vacuum_rels(int options)
  {
     List       *vacrels = NIL;
     Relation    pgclass;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
  
     pgclass = heap_open(RelationRelationId, AccessShareLock);
  
-   scan = heap_beginscan_catalog(pgclass, 0, NULL);
+   scan = table_beginscan_catalog(pgclass, 0, NULL);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
         MemoryContext oldcontext;
@@ -785,7 +786,7 @@ get_all_vacuum_rels(int options)
         MemoryContextSwitchTo(oldcontext);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(pgclass, AccessShareLock);
  
     return vacrels;
@@ -1382,7 +1383,7 @@ vac_truncate_clog(TransactionId frozenXID,
  {
     TransactionId nextXID = ReadNewTransactionId();
     Relation    relation;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tuple;
     Oid         oldestxid_datoid;
     Oid         minmulti_datoid;
@@ -1413,9 +1414,9 @@ vac_truncate_clog(TransactionId frozenXID,
      */
     relation = heap_open(DatabaseRelationId, AccessShareLock);
  
-   scan = heap_beginscan_catalog(relation, 0, NULL);
+   scan = table_beginscan_catalog(relation, 0, NULL);
  
-   while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
         TransactionId datfrozenxid = dbform->datfrozenxid;
@@ -1452,7 +1453,7 @@ vac_truncate_clog(TransactionId frozenXID,
         }
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
  
     heap_close(relation, AccessShareLock);
  
@@ -1711,7 +1712,7 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
         cluster_rel(relid, InvalidOid, cluster_options);
     }
     else
-       heap_vacuum_rel(onerel, options, params, vac_strategy);
+       table_vacuum_rel(onerel, options, params, vac_strategy);
  
     /* Roll back any GUC changes executed by index functions */
     AtEOXact_GUC(false, save_nestlevel);
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c

index 9e784219786ac5192e71ed75808049851c82bf57..33951c9f6dcb5e3f6127cc31d0f5fcd211661fab 100644 (file)
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -544,7 +544,7 @@ static bool
  IndexSupportsBackwardScan(Oid indexid)
  {
     bool        result;
-   HeapTuple   ht_idxrel;
+   HeapTuple ht_idxrel;
     Form_pg_class idxrelrec;
     IndexAmRoutine *amroutine;
  
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c

index 39c462a4e59232ed30b86fed96d63f7098713e48..35048400cfcfa998b51c47e10628ae9792179b7f 100644 (file)
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -203,7 +203,7 @@ execCurrentOf(CurrentOfExpr *cexpr,
              */
             IndexScanDesc scan = ((IndexOnlyScanState *) scanstate)->ioss_ScanDesc;
  
-           *current_tid = scan->xs_ctup.t_self;
+           *current_tid = scan->xs_heaptid;
         }
         else
         {
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c

index ec4a2506f151e43102c33644c0003b0ec7fcac08..6cac1cf99cd490e5df2c2a4c3f80b6b03ec5c5e1 100644 (file)
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -57,6 +57,7 @@
  #include "postgres.h"
  
  #include "access/tuptoaster.h"
+#include "access/sysattr.h"
  #include "catalog/pg_type.h"
  #include "commands/sequence.h"
  #include "executor/execExpr.h"
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c

index 8b35bb458de3992cde113a7dee8561d72f73a84d..66d838dbcef8c11a9090fe3552c01895aa41745a 100644 (file)
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -107,6 +107,7 @@
  #include "postgres.h"
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "catalog/index.h"
  #include "executor/executor.h"
@@ -269,12 +270,12 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
   */
  List *
  ExecInsertIndexTuples(TupleTableSlot *slot,
-                     ItemPointer tupleid,
                       EState *estate,
                       bool noDupErr,
                       bool *specConflict,
                       List *arbiterIndexes)
  {
+   ItemPointer tupleid = &slot->tts_tid;
     List       *result = NIL;
     ResultRelInfo *resultRelInfo;
     int         i;
@@ -286,6 +287,8 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
     Datum       values[INDEX_MAX_KEYS];
     bool        isnull[INDEX_MAX_KEYS];
  
+   Assert(ItemPointerIsValid(tupleid));
+
     /*
      * Get information from the result relation info structure.
      */
@@ -650,7 +653,6 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
     Oid        *index_collations = index->rd_indcollation;
     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
     IndexScanDesc index_scan;
-   HeapTuple   tup;
     ScanKeyData scankeys[INDEX_MAX_KEYS];
     SnapshotData DirtySnapshot;
     int         i;
@@ -706,8 +708,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
      * to this slot.  Be sure to save and restore caller's value for
      * scantuple.
      */
-   existing_slot = MakeSingleTupleTableSlot(RelationGetDescr(heap),
-                                            &TTSOpsHeapTuple);
+   existing_slot = table_gimmegimmeslot(heap, NULL);
  
     econtext = GetPerTupleExprContext(estate);
     save_scantuple = econtext->ecxt_scantuple;
@@ -723,11 +724,9 @@ retry:
     index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0);
     index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0);
  
-   while ((tup = index_getnext(index_scan,
-                               ForwardScanDirection)) != NULL)
+   while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot))
     {
         TransactionId xwait;
-       ItemPointerData ctid_wait;
         XLTW_Oper   reason_wait;
         Datum       existing_values[INDEX_MAX_KEYS];
         bool        existing_isnull[INDEX_MAX_KEYS];
@@ -738,7 +737,7 @@ retry:
          * Ignore the entry for the tuple we're trying to check.
          */
         if (ItemPointerIsValid(tupleid) &&
-           ItemPointerEquals(tupleid, &tup->t_self))
+           ItemPointerEquals(tupleid, &existing_slot->tts_tid))
         {
             if (found_self)     /* should not happen */
                 elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -751,7 +750,6 @@ retry:
          * Extract the index column values and isnull flags from the existing
          * tuple.
          */
-       ExecStoreHeapTuple(tup, existing_slot, false);
         FormIndexDatum(indexInfo, existing_slot, estate,
                        existing_values, existing_isnull);
  
@@ -786,7 +784,10 @@ retry:
               DirtySnapshot.speculativeToken &&
               TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
         {
-           ctid_wait = tup->t_data->t_ctid;
+           /*
+            * PBORKED? When waiting, we used to use t_ctid, rather than
+            * t_self, but I don't see a need for that?
+            */
             reason_wait = indexInfo->ii_ExclusionOps ?
                 XLTW_RecheckExclusionConstr : XLTW_InsertIndex;
             index_endscan(index_scan);
@@ -794,7 +795,9 @@ retry:
                 SpeculativeInsertionWait(DirtySnapshot.xmin,
                                          DirtySnapshot.speculativeToken);
             else
-               XactLockTableWait(xwait, heap, &ctid_wait, reason_wait);
+               XactLockTableWait(xwait, heap,
+                                 &existing_slot->tts_tid, reason_wait);
+
             goto retry;
         }
  
@@ -806,7 +809,9 @@ retry:
         {
             conflict = true;
             if (conflictTid)
-               *conflictTid = tup->t_self;
+           {
+               *conflictTid = existing_slot->tts_tid;
+           }
             break;
         }
  
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c

index d83d296d82c283a1c3a92dbb76afb864ea607b01..9fe420bfa87e01d53cdd7018fbe32a30ebf02820 100644 (file)
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -38,6 +38,7 @@
  #include "postgres.h"
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/transam.h"
  #include "access/xact.h"
@@ -976,12 +977,12 @@ InitPlan(QueryDesc *queryDesc, int eflags)
      * Initialize the executor's tuple table to empty.
      */
     estate->es_tupleTable = NIL;
-   estate->es_trig_tuple_slot = NULL;
+   estate->es_trig_return_slot = NULL;
     estate->es_trig_oldtup_slot = NULL;
     estate->es_trig_newtup_slot = NULL;
  
     /* mark EvalPlanQual not active */
-   estate->es_epqTuple = NULL;
+   estate->es_epqTupleSlot = NULL;
     estate->es_epqTupleSet = NULL;
     estate->es_epqScanDone = NULL;
  
@@ -2403,6 +2404,30 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
     return aerm;
  }
  
+TupleTableSlot *
+EvalPlanQualSlot(EPQState *epqstate,
+                Relation relation, Index rti)
+{
+   TupleTableSlot **slot = &epqstate->estate->es_epqTupleSlot[rti - 1];
+
+   if (*slot == NULL)
+   {
+       MemoryContext oldcontext;
+
+       oldcontext = MemoryContextSwitchTo(epqstate->estate->es_query_cxt);
+
+       if (relation)
+           *slot = table_gimmegimmeslot(relation, &epqstate->estate->es_tupleTable);
+       else
+           *slot = MakeTupleTableSlot(epqstate->origslot->tts_tupleDescriptor, &TTSOpsVirtual);
+
+       epqstate->estate->es_epqTupleSet[rti - 1] = true;
+       MemoryContextSwitchTo(oldcontext);
+   }
+
+   return *slot;
+}
+
  
  /*
   * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to
@@ -2420,9 +2445,7 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
   * epqstate - state for EvalPlanQual rechecking
   * relation - table containing tuple
   * rti - rangetable index of table containing tuple
- * lockmode - requested tuple lock mode
- * *tid - t_ctid from the outdated tuple (ie, next updated version)
- * priorXmax - t_xmax from the outdated tuple
+ * tuple - tuple for processing
   *
   * *tid is also an output parameter: it's modified to hold the TID of the
   * latest version of the tuple (note this may be changed even on failure)
@@ -2435,39 +2458,25 @@ ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist)
   */
  TupleTableSlot *
  EvalPlanQual(EState *estate, EPQState *epqstate,
-            Relation relation, Index rti, int lockmode,
-            ItemPointer tid, TransactionId priorXmax)
+            Relation relation, Index rti, TupleTableSlot *testslot)
  {
     TupleTableSlot *slot;
-   HeapTuple   copyTuple;
  
     Assert(rti > 0);
  
-   /*
-    * Get and lock the updated version of the row; if fail, return NULL.
-    */
-   copyTuple = EvalPlanQualFetch(estate, relation, lockmode, LockWaitBlock,
-                                 tid, priorXmax);
-
-   if (copyTuple == NULL)
-       return NULL;
-
-   /*
-    * For UPDATE/DELETE we have to return tid of actual row we're executing
-    * PQ for.
-    */
-   *tid = copyTuple->t_self;
-
     /*
      * Need to run a recheck subquery.  Initialize or reinitialize EPQ state.
      */
     EvalPlanQualBegin(epqstate, estate);
  
+#if FIXME
     /*
      * Free old test tuple, if any, and store new tuple where relation's scan
      * node will see it
      */
-   EvalPlanQualSetTuple(epqstate, rti, copyTuple);
+   EvalPlanQualSetTuple(epqstate, rti, testslot);
+#endif
+   Assert(testslot == epqstate->estate->es_epqTupleSlot[rti - 1]);
  
     /*
      * Fetch any non-locked source rows
@@ -2489,272 +2498,20 @@ EvalPlanQual(EState *estate, EPQState *epqstate,
     if (!TupIsNull(slot))
         ExecMaterializeSlot(slot);
  
+#if FIXME
     /*
      * Clear out the test tuple.  This is needed in case the EPQ query is
      * re-used to test a tuple for a different relation.  (Not clear that can
      * really happen, but let's be safe.)
      */
     EvalPlanQualSetTuple(epqstate, rti, NULL);
+#else
+   ExecClearTuple(epqstate->estate->es_epqTupleSlot[rti - 1]);
+#endif
  
     return slot;
  }
  
-/*
- * Fetch a copy of the newest version of an outdated tuple
- *
- * estate - executor state data
- * relation - table containing tuple
- * lockmode - requested tuple lock mode
- * wait_policy - requested lock wait policy
- * *tid - t_ctid from the outdated tuple (ie, next updated version)
- * priorXmax - t_xmax from the outdated tuple
- *
- * Returns a palloc'd copy of the newest tuple version, or NULL if we find
- * that there is no newest version (ie, the row was deleted not updated).
- * We also return NULL if the tuple is locked and the wait policy is to skip
- * such tuples.
- *
- * If successful, we have locked the newest tuple version, so caller does not
- * need to worry about it changing anymore.
- *
- * Note: properly, lockmode should be declared as enum LockTupleMode,
- * but we use "int" to avoid having to include heapam.h in executor.h.
- */
-HeapTuple
-EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
-                 LockWaitPolicy wait_policy,
-                 ItemPointer tid, TransactionId priorXmax)
-{
-   HeapTuple   copyTuple = NULL;
-   HeapTupleData tuple;
-   SnapshotData SnapshotDirty;
-
-   /*
-    * fetch target tuple
-    *
-    * Loop here to deal with updated or busy tuples
-    */
-   InitDirtySnapshot(SnapshotDirty);
-   tuple.t_self = *tid;
-   for (;;)
-   {
-       Buffer      buffer;
-
-       if (heap_fetch(relation, &SnapshotDirty, &tuple, &buffer, true, NULL))
-       {
-           HTSU_Result test;
-           HeapUpdateFailureData hufd;
-
-           /*
-            * If xmin isn't what we're expecting, the slot must have been
-            * recycled and reused for an unrelated tuple.  This implies that
-            * the latest version of the row was deleted, so we need do
-            * nothing.  (Should be safe to examine xmin without getting
-            * buffer's content lock.  We assume reading a TransactionId to be
-            * atomic, and Xmin never changes in an existing tuple, except to
-            * invalid or frozen, and neither of those can match priorXmax.)
-            */
-           if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
-                                    priorXmax))
-           {
-               ReleaseBuffer(buffer);
-               return NULL;
-           }
-
-           /* otherwise xmin should not be dirty... */
-           if (TransactionIdIsValid(SnapshotDirty.xmin))
-               elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
-
-           /*
-            * If tuple is being updated by other transaction then we have to
-            * wait for its commit/abort, or die trying.
-            */
-           if (TransactionIdIsValid(SnapshotDirty.xmax))
-           {
-               ReleaseBuffer(buffer);
-               switch (wait_policy)
-               {
-                   case LockWaitBlock:
-                       XactLockTableWait(SnapshotDirty.xmax,
-                                         relation, &tuple.t_self,
-                                         XLTW_FetchUpdated);
-                       break;
-                   case LockWaitSkip:
-                       if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
-                           return NULL;    /* skip instead of waiting */
-                       break;
-                   case LockWaitError:
-                       if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
-                           ereport(ERROR,
-                                   (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
-                                    errmsg("could not obtain lock on row in relation \"%s\"",
-                                           RelationGetRelationName(relation))));
-                       break;
-               }
-               continue;       /* loop back to repeat heap_fetch */
-           }
-
-           /*
-            * If tuple was inserted by our own transaction, we have to check
-            * cmin against es_output_cid: cmin >= current CID means our
-            * command cannot see the tuple, so we should ignore it. Otherwise
-            * heap_lock_tuple() will throw an error, and so would any later
-            * attempt to update or delete the tuple.  (We need not check cmax
-            * because HeapTupleSatisfiesDirty will consider a tuple deleted
-            * by our transaction dead, regardless of cmax.) We just checked
-            * that priorXmax == xmin, so we can test that variable instead of
-            * doing HeapTupleHeaderGetXmin again.
-            */
-           if (TransactionIdIsCurrentTransactionId(priorXmax) &&
-               HeapTupleHeaderGetCmin(tuple.t_data) >= estate->es_output_cid)
-           {
-               ReleaseBuffer(buffer);
-               return NULL;
-           }
-
-           /*
-            * This is a live tuple, so now try to lock it.
-            */
-           test = heap_lock_tuple(relation, &tuple,
-                                  estate->es_output_cid,
-                                  lockmode, wait_policy,
-                                  false, &buffer, &hufd);
-           /* We now have two pins on the buffer, get rid of one */
-           ReleaseBuffer(buffer);
-
-           switch (test)
-           {
-               case HeapTupleSelfUpdated:
-
-                   /*
-                    * The target tuple was already updated or deleted by the
-                    * current command, or by a later command in the current
-                    * transaction.  We *must* ignore the tuple in the former
-                    * case, so as to avoid the "Halloween problem" of
-                    * repeated update attempts.  In the latter case it might
-                    * be sensible to fetch the updated tuple instead, but
-                    * doing so would require changing heap_update and
-                    * heap_delete to not complain about updating "invisible"
-                    * tuples, which seems pretty scary (heap_lock_tuple will
-                    * not complain, but few callers expect
-                    * HeapTupleInvisible, and we're not one of them).  So for
-                    * now, treat the tuple as deleted and do not process.
-                    */
-                   ReleaseBuffer(buffer);
-                   return NULL;
-
-               case HeapTupleMayBeUpdated:
-                   /* successfully locked */
-                   break;
-
-               case HeapTupleUpdated:
-                   ReleaseBuffer(buffer);
-                   if (IsolationUsesXactSnapshot())
-                       ereport(ERROR,
-                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                errmsg("could not serialize access due to concurrent update")));
-                   if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                       ereport(ERROR,
-                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-                   /* Should not encounter speculative tuple on recheck */
-                   Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
-                   if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
-                   {
-                       /* it was updated, so look at the updated version */
-                       tuple.t_self = hufd.ctid;
-                       /* updated row should have xmin matching this xmax */
-                       priorXmax = hufd.xmax;
-                       continue;
-                   }
-                   /* tuple was deleted, so give up */
-                   return NULL;
-
-               case HeapTupleWouldBlock:
-                   ReleaseBuffer(buffer);
-                   return NULL;
-
-               case HeapTupleInvisible:
-                   elog(ERROR, "attempted to lock invisible tuple");
-                   break;
-
-               default:
-                   ReleaseBuffer(buffer);
-                   elog(ERROR, "unrecognized heap_lock_tuple status: %u",
-                        test);
-                   return NULL;    /* keep compiler quiet */
-           }
-
-           /*
-            * We got tuple - now copy it for use by recheck query.
-            */
-           copyTuple = heap_copytuple(&tuple);
-           ReleaseBuffer(buffer);
-           break;
-       }
-
-       /*
-        * If the referenced slot was actually empty, the latest version of
-        * the row must have been deleted, so we need do nothing.
-        */
-       if (tuple.t_data == NULL)
-       {
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /*
-        * As above, if xmin isn't what we're expecting, do nothing.
-        */
-       if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
-                                priorXmax))
-       {
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /*
-        * If we get here, the tuple was found but failed SnapshotDirty.
-        * Assuming the xmin is either a committed xact or our own xact (as it
-        * certainly should be if we're trying to modify the tuple), this must
-        * mean that the row was updated or deleted by either a committed xact
-        * or our own xact.  If it was deleted, we can ignore it; if it was
-        * updated then chain up to the next version and repeat the whole
-        * process.
-        *
-        * As above, it should be safe to examine xmax and t_ctid without the
-        * buffer content lock, because they can't be changing.
-        */
-
-       /* check whether next version would be in a different partition */
-       if (HeapTupleHeaderIndicatesMovedPartitions(tuple.t_data))
-           ereport(ERROR,
-                   (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                    errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-       /* check whether tuple has been deleted */
-       if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
-       {
-           /* deleted, so forget about it */
-           ReleaseBuffer(buffer);
-           return NULL;
-       }
-
-       /* updated, so look at the updated row */
-       tuple.t_self = tuple.t_data->t_ctid;
-       /* updated row should have xmin matching this xmax */
-       priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data);
-       ReleaseBuffer(buffer);
-       /* loop back to fetch next in chain */
-   }
-
-   /*
-    * Return the copied tuple
-    */
-   return copyTuple;
-}
-
  /*
   * EvalPlanQualInit -- initialize during creation of a plan state node
   * that might need to invoke EPQ processing.
@@ -2792,40 +2549,34 @@ EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks)
     epqstate->arowMarks = auxrowmarks;
  }
  
+#if 0
  /*
   * Install one test tuple into EPQ state, or clear test tuple if tuple == NULL
   *
   * NB: passed tuple must be palloc'd; it may get freed later
   */
  void
-EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple)
+EvalPlanQualSetTuple(EPQState *epqstate, Index rti, TupleTableSlot *slot)
  {
     EState     *estate = epqstate->estate;
  
     Assert(rti > 0);
  
-   /*
-    * free old test tuple, if any, and store new tuple where relation's scan
-    * node will see it
-    */
-   if (estate->es_epqTuple[rti - 1] != NULL)
-       heap_freetuple(estate->es_epqTuple[rti - 1]);
-   estate->es_epqTuple[rti - 1] = tuple;
+   if (estate->es_epqTupleSlot[rti - 1] != NULL)
+       ExecClearTuple(estate->es_epqTupleSlot[rti - 1]);
+   if (slot)
+   {
+       if (!estate->es_epqTupleSlot[rti])
+       {
+           slot = table_gimmegimmeslot(erm->relation);
+           epqstate->estate->es_epqTupleSlot[erm->rti] = slot;
+       }
+       // XXX: It'd be better if we could work around needing to copy.
+       ExecCopySlot(estate->es_epqTupleSlot[rti - 1], slot);
+   }
     estate->es_epqTupleSet[rti - 1] = true;
  }
-
-/*
- * Fetch back the current test tuple (if any) for the specified RTI
- */
-HeapTuple
-EvalPlanQualGetTuple(EPQState *epqstate, Index rti)
-{
-   EState     *estate = epqstate->estate;
-
-   Assert(rti > 0);
-
-   return estate->es_epqTuple[rti - 1];
-}
+#endif
  
  /*
   * Fetch the current row values for any non-locked relations that need
@@ -2845,13 +2596,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
         ExecRowMark *erm = aerm->rowmark;
         Datum       datum;
         bool        isNull;
-       HeapTupleData tuple;
+       TupleTableSlot *slot;
  
         if (RowMarkRequiresRowShareLock(erm->markType))
             elog(ERROR, "EvalPlanQual doesn't support locking rowmarks");
  
         /* clear any leftover test tuple for this rel */
-       EvalPlanQualSetTuple(epqstate, erm->rti, NULL);
+       slot = EvalPlanQualSlot(epqstate, erm->relation, erm->rti);
+       ExecClearTuple(slot);
  
         /* if child rel, must check whether it produced this row */
         if (erm->rti != erm->prti)
@@ -2876,8 +2628,6 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
  
         if (erm->markType == ROW_MARK_REFERENCE)
         {
-           HeapTuple   copyTuple;
-
             Assert(erm->relation != NULL);
  
             /* fetch the tuple's ctid */
@@ -2895,17 +2645,20 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
                 bool        updated = false;
  
                 fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+
                 /* this should have been checked already, but let's be safe */
                 if (fdwroutine->RefetchForeignRow == NULL)
                     ereport(ERROR,
                             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                              errmsg("cannot lock rows in foreign table \"%s\"",
                                     RelationGetRelationName(erm->relation))));
-               copyTuple = fdwroutine->RefetchForeignRow(epqstate->estate,
-                                                         erm,
-                                                         datum,
-                                                         &updated);
-               if (copyTuple == NULL)
+
+               slot = fdwroutine->RefetchForeignRow(epqstate->estate,
+                                                    erm,
+                                                    datum,
+                                                    slot,
+                                                    &updated);
+               if (slot == NULL)
                     elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
                 /*
@@ -2917,25 +2670,14 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
             else
             {
                 /* ordinary table, fetch the tuple */
-               Buffer      buffer;
  
-               tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
-               if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-                               false, NULL))
+               if (!table_fetch_row_version(erm->relation, (ItemPointer) DatumGetPointer(datum),
+                                            SnapshotAny, slot, NULL))
                     elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
-
-               /* successful, copy tuple */
-               copyTuple = heap_copytuple(&tuple);
-               ReleaseBuffer(buffer);
             }
-
-           /* store tuple */
-           EvalPlanQualSetTuple(epqstate, erm->rti, copyTuple);
         }
         else
         {
-           HeapTupleHeader td;
-
             Assert(erm->markType == ROW_MARK_COPY);
  
             /* fetch the whole-row Var for the relation */
@@ -2945,19 +2687,8 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
             /* non-locked rels could be on the inside of outer joins */
             if (isNull)
                 continue;
-           td = DatumGetHeapTupleHeader(datum);
-
-           /* build a temporary HeapTuple control structure */
-           tuple.t_len = HeapTupleHeaderGetDatumLength(td);
-           tuple.t_data = td;
-           /* relation might be a foreign table, if so provide tableoid */
-           tuple.t_tableOid = erm->relid;
-           /* also copy t_ctid in case there's valid data there */
-           tuple.t_self = td->t_ctid;
-
-           /* copy and store tuple */
-           EvalPlanQualSetTuple(epqstate, erm->rti,
-                                heap_copytuple(&tuple));
+
+           ExecForceStoreHeapTupleDatum(datum, slot);
         }
     }
  }
@@ -3153,15 +2884,15 @@ EvalPlanQualStart(EPQState *epqstate, EState *parentestate, Plan *planTree)
      * sub-rechecks to inherit the values being examined by an outer recheck.
      */
     estate->es_epqScanDone = (bool *) palloc0(rtsize * sizeof(bool));
-   if (parentestate->es_epqTuple != NULL)
+   if (parentestate->es_epqTupleSlot != NULL)
     {
-       estate->es_epqTuple = parentestate->es_epqTuple;
+       estate->es_epqTupleSlot = parentestate->es_epqTupleSlot;
         estate->es_epqTupleSet = parentestate->es_epqTupleSet;
     }
     else
     {
-       estate->es_epqTuple = (HeapTuple *)
-           palloc0(rtsize * sizeof(HeapTuple));
+       estate->es_epqTupleSlot = (TupleTableSlot **)
+           palloc0(rtsize * sizeof(TupleTableSlot *));
         estate->es_epqTupleSet = (bool *)
             palloc0(rtsize * sizeof(bool));
     }
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c

index 31f7288b46071fdeb8424d46472f65ec19d4ddd2..f0a6318a255c7b4836222ccc6dbe908a6beb5b87 100644 (file)
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -13,6 +13,7 @@
   */
  #include "postgres.h"
  
+#include "access/tableam.h"
  #include "catalog/partition.h"
  #include "catalog/pg_inherits.h"
  #include "catalog/pg_type.h"
@@ -899,8 +900,7 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
          * end of the command.
          */
         partrouteinfo->pi_PartitionTupleSlot =
-           ExecInitExtraTupleSlot(estate, RelationGetDescr(partrel),
-                                  &TTSOpsHeapTuple);
+           table_gimmegimmeslot(partrel, &estate->es_tupleTable);
     }
     else
         partrouteinfo->pi_PartitionTupleSlot = NULL;
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c

index 5bd3bbc35e96ba44552504b9364b27f9f300c22f..553159b08e2a3bd9cc57cbc801ae1dc32309195b 100644 (file)
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -15,6 +15,7 @@
  #include "postgres.h"
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "commands/trigger.h"
@@ -117,7 +118,6 @@ RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
                              TupleTableSlot *searchslot,
                              TupleTableSlot *outslot)
  {
-   HeapTuple   scantuple;
     ScanKeyData skey[INDEX_MAX_KEYS];
     IndexScanDesc scan;
     SnapshotData snap;
@@ -143,10 +143,9 @@ retry:
     index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0);
  
     /* Try to find the tuple */
-   if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
+   if (index_getnext_slot(scan, ForwardScanDirection, outslot))
     {
         found = true;
-       ExecStoreHeapTuple(scantuple, outslot, false);
         ExecMaterializeSlot(outslot);
  
         xwait = TransactionIdIsValid(snap.xmin) ?
@@ -166,25 +165,18 @@ retry:
     /* Found tuple, try to lock it in the lockmode. */
     if (found)
     {
-       Buffer      buf;
         HeapUpdateFailureData hufd;
         HTSU_Result res;
-       HeapTupleData locktup;
-       HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot;
-
-       /* Only a heap tuple has item pointers. */
-       Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot));
-       ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self);
  
         PushActiveSnapshot(GetLatestSnapshot());
  
-       res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
-                             lockmode,
-                             LockWaitBlock,
-                             false /* don't follow updates */ ,
-                             &buf, &hufd);
-       /* the tuple slot already has the buffer pinned */
-       ReleaseBuffer(buf);
+       res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+                                outslot,
+                                GetCurrentCommandId(false),
+                                lockmode,
+                                LockWaitBlock,
+                                0 /* don't follow updates */ ,
+                                &hufd);
  
         PopActiveSnapshot();
  
@@ -203,6 +195,12 @@ retry:
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                              errmsg("concurrent update, retrying")));
                 goto retry;
+           case HeapTupleDeleted:
+               /* XXX: Improve handling here */
+               ereport(LOG,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("concurrent delete, retrying")));
+               goto retry;
             case HeapTupleInvisible:
                 elog(ERROR, "attempted to lock invisible tuple");
                 break;
@@ -220,59 +218,6 @@ retry:
     return found;
  }
  
-/*
- * Compare the tuple and slot and check if they have equal values.
- *
- * We use binary datum comparison which might return false negatives but
- * that's the best we can do here as there may be multiple notions of
- * equality for the data types and table columns don't specify which one
- * to use.
- */
-static bool
-tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot)
-{
-   Datum       values[MaxTupleAttributeNumber];
-   bool        isnull[MaxTupleAttributeNumber];
-   int         attrnum;
-
-   heap_deform_tuple(tup, desc, values, isnull);
-
-   /* Check equality of the attributes. */
-   for (attrnum = 0; attrnum < desc->natts; attrnum++)
-   {
-       Form_pg_attribute att;
-       TypeCacheEntry *typentry;
-
-       /*
-        * If one value is NULL and other is not, then they are certainly not
-        * equal
-        */
-       if (isnull[attrnum] != slot->tts_isnull[attrnum])
-           return false;
-
-       /*
-        * If both are NULL, they can be considered equal.
-        */
-       if (isnull[attrnum])
-           continue;
-
-       att = TupleDescAttr(desc, attrnum);
-
-       typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO);
-       if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
-           ereport(ERROR,
-                   (errcode(ERRCODE_UNDEFINED_FUNCTION),
-                    errmsg("could not identify an equality operator for type %s",
-                           format_type_be(att->atttypid))));
-
-       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
-                                       values[attrnum],
-                                       slot->tts_values[attrnum])))
-           return false;
-   }
-
-   return true;
-}
  
  /*
   * Search the relation 'rel' for tuple using the sequential scan.
@@ -288,33 +233,34 @@ bool
  RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
                          TupleTableSlot *searchslot, TupleTableSlot *outslot)
  {
-   HeapTuple   scantuple;
-   HeapScanDesc scan;
+   TupleTableSlot *scanslot;
+   TableScanDesc scan;
     SnapshotData snap;
     TransactionId xwait;
     bool        found;
-   TupleDesc   desc = RelationGetDescr(rel);
+   TupleDesc   desc PG_USED_FOR_ASSERTS_ONLY = RelationGetDescr(rel);
  
     Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor));
  
     /* Start a heap scan. */
     InitDirtySnapshot(snap);
-   scan = heap_beginscan(rel, &snap, 0, NULL);
+   scan = table_beginscan(rel, &snap, 0, NULL);
+
+   scanslot = table_gimmegimmeslot(rel, NULL);
  
  retry:
     found = false;
  
-   heap_rescan(scan, NULL);
+   table_rescan(scan, NULL);
  
     /* Try to find the tuple */
-   while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   while (table_scan_getnextslot(scan, ForwardScanDirection, scanslot))
     {
-       if (!tuple_equals_slot(desc, scantuple, searchslot))
+       if (!ExecSlotCompare(scanslot, searchslot))
             continue;
  
         found = true;
-       ExecStoreHeapTuple(scantuple, outslot, false);
-       ExecMaterializeSlot(outslot);
+       ExecCopySlot(outslot, scanslot);
  
         xwait = TransactionIdIsValid(snap.xmin) ?
             snap.xmin : snap.xmax;
@@ -333,25 +279,18 @@ retry:
     /* Found tuple, try to lock it in the lockmode. */
     if (found)
     {
-       Buffer      buf;
         HeapUpdateFailureData hufd;
         HTSU_Result res;
-       HeapTupleData locktup;
-       HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)outslot;
-
-       /* Only a heap tuple has item pointers. */
-       Assert(TTS_IS_HEAPTUPLE(outslot) || TTS_IS_BUFFERTUPLE(outslot));
-       ItemPointerCopy(&hslot->tuple->t_self, &locktup.t_self);
  
         PushActiveSnapshot(GetLatestSnapshot());
  
-       res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
-                             lockmode,
-                             LockWaitBlock,
-                             false /* don't follow updates */ ,
-                             &buf, &hufd);
-       /* the tuple slot already has the buffer pinned */
-       ReleaseBuffer(buf);
+       res = table_lock_tuple(rel, &(outslot->tts_tid), GetLatestSnapshot(),
+                              outslot,
+                              GetCurrentCommandId(false),
+                              lockmode,
+                              LockWaitBlock,
+                              0 /* don't follow updates */ ,
+                              &hufd);
  
         PopActiveSnapshot();
  
@@ -370,6 +309,12 @@ retry:
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                              errmsg("concurrent update, retrying")));
                 goto retry;
+           case HeapTupleDeleted:
+               /* XXX: Improve handling here */
+               ereport(LOG,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("concurrent delete, retrying")));
+               goto retry;
             case HeapTupleInvisible:
                 elog(ERROR, "attempted to lock invisible tuple");
                 break;
@@ -379,7 +324,8 @@ retry:
         }
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
+   ExecDropSingleTupleTableSlot(scanslot);
  
     return found;
  }
@@ -394,7 +340,6 @@ void
  ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
  {
     bool        skip_tuple = false;
-   HeapTuple   tuple;
     ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
     Relation    rel = resultRelInfo->ri_RelationDesc;
  
@@ -407,10 +352,8 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
     {
-       slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           skip_tuple = true;
+       if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
+           skip_tuple = true;      /* "do nothing" */
     }
  
     if (!skip_tuple)
@@ -423,19 +366,15 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
         if (resultRelInfo->ri_PartitionCheck)
             ExecPartitionCheck(resultRelInfo, slot, estate, true);
  
-       /* Materialize slot into a tuple that we can scribble upon. */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
-       /* OK, store the tuple and create index entries for it */
-       simple_heap_insert(rel, tuple);
+       table_insert(resultRelInfo->ri_RelationDesc, slot,
+                      GetCurrentCommandId(true), 0, NULL);
  
         if (resultRelInfo->ri_NumIndices > 0)
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL,
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                    NIL);
  
         /* AFTER ROW INSERT Triggers */
-       ExecARInsertTriggers(estate, resultRelInfo, tuple,
+       ExecARInsertTriggers(estate, resultRelInfo, slot,
                              recheckIndexes, NULL);
  
         /*
@@ -459,15 +398,9 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
                          TupleTableSlot *searchslot, TupleTableSlot *slot)
  {
     bool        skip_tuple = false;
-   HeapTuple   tuple;
     ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
     Relation    rel = resultRelInfo->ri_RelationDesc;
-   HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot;
-   HeapTupleTableSlot *hslot = (HeapTupleTableSlot *)slot;
-
-   /* We expect both searchslot and the slot to contain a heap tuple. */
-   Assert(TTS_IS_HEAPTUPLE(searchslot) || TTS_IS_BUFFERTUPLE(searchslot));
-   Assert(TTS_IS_HEAPTUPLE(slot) || TTS_IS_BUFFERTUPLE(slot));
+   ItemPointer tid = &(searchslot->tts_tid);
  
     /* For now we support only tables. */
     Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -478,16 +411,18 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_update_before_row)
     {
-       slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-                                   &hsearchslot->tuple->t_self, NULL, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           skip_tuple = true;
+       if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+                                   tid,
+                                 NULL, slot))
+           skip_tuple = true;      /* "do nothing" */
     }
  
     if (!skip_tuple)
     {
         List       *recheckIndexes = NIL;
+       HeapUpdateFailureData hufd;
+       LockTupleMode lockmode;
+       bool update_indexes;
  
         /* Check the constraints of the tuple */
         if (rel->rd_att->constr)
@@ -495,22 +430,22 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
         if (resultRelInfo->ri_PartitionCheck)
             ExecPartitionCheck(resultRelInfo, slot, estate, true);
  
-       /* Materialize slot into a tuple that we can scribble upon. */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       table_update(rel, tid, slot, GetCurrentCommandId(true), estate->es_snapshot,
+                    InvalidSnapshot, true, &hufd, &lockmode, &update_indexes);
  
-       /* OK, update the tuple and index entries for it */
-       simple_heap_update(rel, &hsearchslot->tuple->t_self, hslot->tuple);
+       /*
+        * FIXME: move from simple_heap_update to table_update removes
+        * concurrency handling
+        */
  
-       if (resultRelInfo->ri_NumIndices > 0 &&
-           !HeapTupleIsHeapOnly(hslot->tuple))
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL,
+       if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                    NIL);
  
         /* AFTER ROW UPDATE Triggers */
         ExecARUpdateTriggers(estate, resultRelInfo,
-                            &hsearchslot->tuple->t_self, NULL, tuple,
-                            recheckIndexes, NULL);
+                            tid,
+                            NULL, slot, recheckIndexes, NULL);
  
         list_free(recheckIndexes);
     }
@@ -529,7 +464,7 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
     bool        skip_tuple = false;
     ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
     Relation    rel = resultRelInfo->ri_RelationDesc;
-   HeapTupleTableSlot *hsearchslot = (HeapTupleTableSlot *)searchslot;
+   ItemPointer tid = &(searchslot->tts_tid);
  
     /* For now we support only tables and heap tuples. */
     Assert(rel->rd_rel->relkind == RELKIND_RELATION);
@@ -542,20 +477,24 @@ ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
         resultRelInfo->ri_TrigDesc->trig_delete_before_row)
     {
         skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-                                          &hsearchslot->tuple->t_self, NULL,
-                                          NULL);
+                                          tid, NULL, NULL);
+
     }
  
     if (!skip_tuple)
     {
         List       *recheckIndexes = NIL;
+       HeapUpdateFailureData hufd;
  
         /* OK, delete the tuple */
-       simple_heap_delete(rel, &hsearchslot->tuple->t_self);
+       /* FIXME: needs checks for return  codes */
+       table_delete(rel, tid, GetCurrentCommandId(true),
+                    estate->es_snapshot, InvalidSnapshot,
+                    true,  &hufd, false);
  
         /* AFTER ROW DELETE Triggers */
         ExecARDeleteTriggers(estate, resultRelInfo,
-                            &hsearchslot->tuple->t_self, NULL, NULL);
+                            tid, NULL, NULL);
  
         list_free(recheckIndexes);
     }
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c

index d90bb16b570d14efeb8a53e17e7c455a3e851d53..a3349099b6ee5cec2f618f9531d9319ec419161b 100644 (file)
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -40,7 +40,7 @@ ExecScanFetch(ScanState *node,
  
     CHECK_FOR_INTERRUPTS();
  
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
     {
         /*
          * We are inside an EvalPlanQual recheck.  Return the test tuple if
@@ -73,17 +73,15 @@ ExecScanFetch(ScanState *node,
             /* Else mark to remember that we shouldn't return more */
             estate->es_epqScanDone[scanrelid - 1] = true;
  
+           slot = estate->es_epqTupleSlot[scanrelid - 1];
+
             /* Return empty slot if we haven't got a test tuple */
-           if (estate->es_epqTuple[scanrelid - 1] == NULL)
+           if (TupIsNull(slot))
                 return ExecClearTuple(slot);
  
-           /* Store test tuple in the plan node's scan slot */
-           ExecForceStoreHeapTuple(estate->es_epqTuple[scanrelid - 1],
-                                   slot);
-
             /* Check if it meets the access-method conditions */
             if (!(*recheckMtd) (node, slot))
-               ExecClearTuple(slot);   /* would not be returned by scan */
+               return ExecClearTuple(slot);    /* would not be returned by scan */
  
             return slot;
         }
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c

index 472a5f39cfc682aa5cad0f6d83317856f091db9d..d91a71a7c192a1b8d374f618208e6d96c8babb46 100644 (file)
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -453,6 +453,7 @@ tts_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, bool shouldFree)
     hslot->tuple = tuple;
     hslot->off = 0;
     slot->tts_flags &= ~TTS_FLAG_EMPTY;
+   slot->tts_tid = tuple->t_self;
  
     if (shouldFree)
         slot->tts_flags |= TTS_FLAG_SHOULDFREE;
@@ -717,20 +718,31 @@ tts_buffer_heap_materialize(TupleTableSlot *slot)
      * associated with it, unless it's materialized (which would've returned
      * above).
      */
+   // PBORKED: restore
+#if 0
     Assert(bslot->base.tuple);
+#endif
  
     oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
-   bslot->base.tuple = heap_copytuple(bslot->base.tuple);
+#if 1
+   if (!bslot->base.tuple)
+   {
+       bslot->base.tuple = heap_form_tuple(slot->tts_tupleDescriptor,
+                                           slot->tts_values,
+                                           slot->tts_isnull);
+   }
+#endif
+   else
+   {
+       bslot->base.tuple = heap_copytuple(bslot->base.tuple);
+   }
     MemoryContextSwitchTo(oldContext);
  
-   /*
-    * A heap tuple stored in a BufferHeapTupleTableSlot should have a buffer
-    * associated with it, unless it's materialized.
-    */
-   Assert(BufferIsValid(bslot->buffer));
-   if (likely(BufferIsValid(bslot->buffer)))
+   if (BufferIsValid(bslot->buffer))
+   {
         ReleaseBuffer(bslot->buffer);
-   bslot->buffer = InvalidBuffer;
+       bslot->buffer = InvalidBuffer;
+   }
  
     /*
      * Have to deform from scratch, otherwise tts_values[] entries could point
@@ -764,6 +776,10 @@ tts_buffer_heap_copyslot(TupleTableSlot *dstslot, TupleTableSlot *srcslot)
     }
     else
     {
+       // PBORKED: shouldn't be required
+       if (!bsrcslot->base.tuple)
+           tts_buffer_heap_materialize(srcslot);
+
         tts_buffer_heap_store_tuple(dstslot, bsrcslot->base.tuple, bsrcslot->buffer);
         /*
          * Need to materialize because the HeapTupleData portion of the tuple
@@ -858,6 +874,7 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer
     slot->tts_nvalid = 0;
     bslot->base.tuple = tuple;
     bslot->base.off = 0;
+   slot->tts_tid = tuple->t_self;
  
     /*
      * If tuple is on a disk page, keep the page pinned as long as we hold a
@@ -873,7 +890,9 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple, Buffer buffer
         if (BufferIsValid(bslot->buffer))
             ReleaseBuffer(bslot->buffer);
         bslot->buffer = buffer;
-       IncrBufferRefCount(buffer);
+       // PBORKED: Should always be valid
+       if (BufferIsValid(buffer))
+           IncrBufferRefCount(buffer);
     }
  }
  
@@ -1211,6 +1230,56 @@ MakeSingleTupleTableSlot(TupleDesc tupdesc,
     return slot;
  }
  
+// FIXME this definitely does not belong here.
+/* --------------------------------
+ *     ExecSlotCompare
+ *
+ *     This is a slot comparision function to find out
+ *     whether both the slots are same or not?
+ * --------------------------------
+ */
+bool
+ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2)
+{
+   int         attrnum;
+
+   Assert(slot1->tts_tupleDescriptor->natts == slot2->tts_tupleDescriptor->natts);
+
+   slot_getallattrs(slot1);
+   slot_getallattrs(slot2);
+
+   /* Check equality of the attributes. */
+   for (attrnum = 0; attrnum < slot1->tts_tupleDescriptor->natts; attrnum++)
+   {
+       Form_pg_attribute att;
+       TypeCacheEntry *typentry;
+
+       /*
+        * If one value is NULL and other is not, then they are certainly not
+        * equal
+        */
+       if (slot1->tts_isnull[attrnum] != slot2->tts_isnull[attrnum])
+           return false;
+
+       att = TupleDescAttr(slot1->tts_tupleDescriptor, attrnum);
+
+       typentry = lookup_type_cache(att->atttypid, TYPECACHE_EQ_OPR_FINFO);
+       if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_FUNCTION),
+                    errmsg("could not identify an equality operator for type %s",
+                           format_type_be(att->atttypid))));
+
+       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+                                       slot1->tts_values[attrnum],
+                                       slot2->tts_values[attrnum])))
+           return false;
+   }
+
+   return true;
+}
+
+
  /* --------------------------------
   *     ExecDropSingleTupleTableSlot
   *
@@ -1328,9 +1397,15 @@ ExecStoreHeapTuple(HeapTuple tuple,
     Assert(slot != NULL);
     Assert(slot->tts_tupleDescriptor != NULL);
  
-   if (unlikely(!TTS_IS_HEAPTUPLE(slot)))
+   // PBORKED: should onlyneed heaptuples here.
+   if (TTS_IS_BUFFERTUPLE(slot))
+       tts_buffer_heap_store_tuple(slot, tuple, InvalidBuffer);
+   else if (TTS_IS_HEAPTUPLE(slot))
+       tts_heap_store_tuple(slot, tuple, shouldFree);
+   else
         elog(ERROR, "trying to store a heap tuple into wrong type of slot");
-   tts_heap_store_tuple(slot, tuple, shouldFree);
+
+   slot->tts_tableOid = tuple->t_tableOid;
  
     return slot;
  }
@@ -1371,6 +1446,8 @@ ExecStoreBufferHeapTuple(HeapTuple tuple,
         elog(ERROR, "trying to store an on-disk heap tuple into wrong type of slot");
     tts_buffer_heap_store_tuple(slot, tuple, buffer);
  
+   slot->tts_tableOid = tuple->t_tableOid;
+
     return slot;
  }
  
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c

index 2a47abc02effbcf1eb5bd9ff5e8e68a2d9f0c512..4031642b8097cad51d15b8323ad6b73c893d32e5 100644 (file)
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -47,6 +47,7 @@
  
  #include "access/parallel.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "executor/executor.h"
  #include "jit/jit.h"
@@ -130,7 +131,7 @@ CreateExecutorState(void)
     estate->es_tuple_routing_result_relations = NIL;
  
     estate->es_trig_target_relations = NIL;
-   estate->es_trig_tuple_slot = NULL;
+   estate->es_trig_return_slot = NULL;
     estate->es_trig_oldtup_slot = NULL;
     estate->es_trig_newtup_slot = NULL;
  
@@ -157,7 +158,7 @@ CreateExecutorState(void)
  
     estate->es_per_tuple_exprcontext = NULL;
  
-   estate->es_epqTuple = NULL;
+   estate->es_epqTupleSlot = NULL;
     estate->es_epqTupleSet = NULL;
     estate->es_epqScanDone = NULL;
     estate->es_sourceText = NULL;
@@ -419,6 +420,63 @@ MakePerTupleExprContext(EState *estate)
     return estate->es_per_tuple_exprcontext;
  }
  
+TupleTableSlot *
+ExecTriggerGetOldSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_oldtup_slot == NULL)
+       estate->es_trig_oldtup_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_oldtup_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_oldtup_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_oldtup_slot;
+}
+
+TupleTableSlot *
+ExecTriggerGetNewSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_newtup_slot == NULL)
+       estate->es_trig_newtup_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_newtup_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_newtup_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_newtup_slot;
+}
+
+TupleTableSlot *
+ExecTriggerGetReturnSlot(EState *estate, Relation rel)
+{
+   TupleDesc reldesc = RelationGetDescr(rel);
+   MemoryContext oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+   /* PBORKED: This needs to handle switching slot types between partitions */
+   if (estate->es_trig_return_slot == NULL)
+       estate->es_trig_return_slot = ExecInitExtraTupleSlot(estate, NULL,
+                                                            table_slot_callbacks(rel));
+
+   if (estate->es_trig_return_slot->tts_tupleDescriptor != reldesc)
+       ExecSetSlotDescriptor(estate->es_trig_return_slot, reldesc);
+
+   MemoryContextSwitchTo(oldcontext);
+
+   return estate->es_trig_return_slot;
+}
+
  
  /* ----------------------------------------------------------------
   *              miscellaneous node-init support functions
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c

index 00d02fd50f043be89308df6b475cf0b94973f9d9..8c0625eb2e8e690bf33ac3de88071c55f1bbf1b2 100644 (file)
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -38,6 +38,7 @@
  #include <math.h>
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/visibilitymap.h"
  #include "executor/execdebug.h"
@@ -54,14 +55,13 @@
  
  
  static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
-static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
  static inline void BitmapDoneInitializingSharedState(
                                   ParallelBitmapHeapState *pstate);
  static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
                              TBMIterateResult *tbmres);
  static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
  static inline void BitmapPrefetch(BitmapHeapScanState *node,
-              HeapScanDesc scan);
+              TableScanDesc scan);
  static bool BitmapShouldInitializeSharedState(
                                   ParallelBitmapHeapState *pstate);
  
@@ -76,12 +76,12 @@ static TupleTableSlot *
  BitmapHeapNext(BitmapHeapScanState *node)
  {
     ExprContext *econtext;
-   HeapScanDesc scan;
+   TableScanDesc scan;
+
     TIDBitmap  *tbm;
     TBMIterator *tbmiterator = NULL;
     TBMSharedIterator *shared_tbmiterator = NULL;
     TBMIterateResult *tbmres;
-   OffsetNumber targoffset;
     TupleTableSlot *slot;
     ParallelBitmapHeapState *pstate = node->pstate;
     dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
@@ -191,16 +191,27 @@ BitmapHeapNext(BitmapHeapScanState *node)
  
     for (;;)
     {
-       Page        dp;
-       ItemId      lp;
-
         CHECK_FOR_INTERRUPTS();
  
-       /*
-        * Get next page of results if needed
-        */
-       if (tbmres == NULL)
+       if (node->return_empty_tuples > 0)
+       {
+           ExecStoreAllNullTuple(slot);
+           node->return_empty_tuples--;
+       }
+       else if (tbmres)
+       {
+           if (!table_scan_bitmap_pagescan_next(scan, slot))
+           {
+               node->tbmres = tbmres = NULL;
+               continue;
+           }
+       }
+       else
         {
+           /*
+            * Get next page of results if needed
+            */
+
             if (!pstate)
                 node->tbmres = tbmres = tbm_iterate(tbmiterator);
             else
@@ -213,18 +224,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
  
             BitmapAdjustPrefetchIterator(node, tbmres);
  
-           /*
-            * Ignore any claimed entries past what we think is the end of the
-            * relation.  (This is probably not necessary given that we got at
-            * least AccessShareLock on the table before performing any of the
-            * indexscans, but let's be safe.)
-            */
-           if (tbmres->blockno >= scan->rs_nblocks)
-           {
-               node->tbmres = tbmres = NULL;
-               continue;
-           }
-
             /*
              * We can skip fetching the heap page if we don't need any fields
              * from the heap, and the bitmap entries don't need rechecking,
@@ -240,16 +239,21 @@ BitmapHeapNext(BitmapHeapScanState *node)
             {
                 /*
                  * The number of tuples on this page is put into
-                * scan->rs_ntuples; note we don't fill scan->rs_vistuples.
+                * node->return_empty_tuples; note we don't fill
+                * scan->rs_vistuples.
                  */
-               scan->rs_ntuples = tbmres->ntuples;
+               node->return_empty_tuples = tbmres->ntuples;
             }
             else
             {
                 /*
                  * Fetch the current heap page and identify candidate tuples.
                  */
-               bitgetpage(scan, tbmres);
+               if (!table_scan_bitmap_pagescan(scan, tbmres))
+               {
+                   /* AM doesn't think this block is valid, skip */
+                   continue;
+               }
             }
  
             if (tbmres->ntuples >= 0)
@@ -257,51 +261,37 @@ BitmapHeapNext(BitmapHeapScanState *node)
             else
                 node->lossy_pages++;
  
-           /*
-            * Set rs_cindex to first slot to examine
-            */
-           scan->rs_cindex = 0;
-
             /* Adjust the prefetch target */
             BitmapAdjustPrefetchTarget(node);
-       }
-       else
-       {
-           /*
-            * Continuing in previously obtained page; advance rs_cindex
-            */
-           scan->rs_cindex++;
-
-#ifdef USE_PREFETCH
  
             /*
-            * Try to prefetch at least a few pages even before we get to the
-            * second page if we don't stop reading after the first tuple.
+            * XXX: Note we do not prefetch here.
              */
-           if (!pstate)
-           {
-               if (node->prefetch_target < node->prefetch_maximum)
-                   node->prefetch_target++;
-           }
-           else if (pstate->prefetch_target < node->prefetch_maximum)
-           {
-               /* take spinlock while updating shared state */
-               SpinLockAcquire(&pstate->mutex);
-               if (pstate->prefetch_target < node->prefetch_maximum)
-                   pstate->prefetch_target++;
-               SpinLockRelease(&pstate->mutex);
-           }
-#endif                         /* USE_PREFETCH */
+
+           continue;
         }
  
+
+#ifdef USE_PREFETCH
+
         /*
-        * Out of range?  If so, nothing more to look at on this page
+        * Try to prefetch at least a few pages even before we get to the
+        * second page if we don't stop reading after the first tuple.
          */
-       if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
+       if (!pstate)
         {
-           node->tbmres = tbmres = NULL;
-           continue;
+           if (node->prefetch_target < node->prefetch_maximum)
+               node->prefetch_target++;
         }
+       else if (pstate->prefetch_target < node->prefetch_maximum)
+       {
+           /* take spinlock while updating shared state */
+           SpinLockAcquire(&pstate->mutex);
+           if (pstate->prefetch_target < node->prefetch_maximum)
+               pstate->prefetch_target++;
+           SpinLockRelease(&pstate->mutex);
+       }
+#endif                         /* USE_PREFETCH */
  
         /*
          * We issue prefetch requests *after* fetching the current page to try
@@ -312,52 +302,19 @@ BitmapHeapNext(BitmapHeapScanState *node)
          */
         BitmapPrefetch(node, scan);
  
-       if (node->skip_fetch)
-       {
-           /*
-            * If we don't have to fetch the tuple, just return nulls.
-            */
-           ExecStoreAllNullTuple(slot);
-       }
-       else
+       /*
+        * If we are using lossy info, we have to recheck the qual
+        * conditions at every tuple.
+        */
+       if (tbmres->recheck)
         {
-           /*
-            * Okay to fetch the tuple.
-            */
-           targoffset = scan->rs_vistuples[scan->rs_cindex];
-           dp = (Page) BufferGetPage(scan->rs_cbuf);
-           lp = PageGetItemId(dp, targoffset);
-           Assert(ItemIdIsNormal(lp));
-
-           scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-           scan->rs_ctup.t_len = ItemIdGetLength(lp);
-           scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
-           ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
-
-           pgstat_count_heap_fetch(scan->rs_rd);
-
-           /*
-            * Set up the result slot to point to this tuple.  Note that the
-            * slot acquires a pin on the buffer.
-            */
-           ExecStoreBufferHeapTuple(&scan->rs_ctup,
-                                    slot,
-                                    scan->rs_cbuf);
-
-           /*
-            * If we are using lossy info, we have to recheck the qual
-            * conditions at every tuple.
-            */
-           if (tbmres->recheck)
+           econtext->ecxt_scantuple = slot;
+           if (!ExecQualAndReset(node->bitmapqualorig, econtext))
             {
-               econtext->ecxt_scantuple = slot;
-               if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-               {
-                   /* Fails recheck, so drop it and loop back for another */
-                   InstrCountFiltered2(node, 1);
-                   ExecClearTuple(slot);
-                   continue;
-               }
+               /* Fails recheck, so drop it and loop back for another */
+               InstrCountFiltered2(node, 1);
+               ExecClearTuple(slot);
+               continue;
             }
         }
  
@@ -371,110 +328,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
     return ExecClearTuple(slot);
  }
  
-/*
- * bitgetpage - subroutine for BitmapHeapNext()
- *
- * This routine reads and pins the specified page of the relation, then
- * builds an array indicating which tuples on the page are both potentially
- * interesting according to the bitmap, and visible according to the snapshot.
- */
-static void
-bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
-{
-   BlockNumber page = tbmres->blockno;
-   Buffer      buffer;
-   Snapshot    snapshot;
-   int         ntup;
-
-   /*
-    * Acquire pin on the target heap page, trading in any pin we held before.
-    */
-   Assert(page < scan->rs_nblocks);
-
-   scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
-                                        scan->rs_rd,
-                                        page);
-   buffer = scan->rs_cbuf;
-   snapshot = scan->rs_snapshot;
-
-   ntup = 0;
-
-   /*
-    * Prune and repair fragmentation for the whole page, if possible.
-    */
-   heap_page_prune_opt(scan->rs_rd, buffer);
-
-   /*
-    * We must hold share lock on the buffer content while examining tuple
-    * visibility.  Afterwards, however, the tuples we have found to be
-    * visible are guaranteed good as long as we hold the buffer pin.
-    */
-   LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
-   /*
-    * We need two separate strategies for lossy and non-lossy cases.
-    */
-   if (tbmres->ntuples >= 0)
-   {
-       /*
-        * Bitmap is non-lossy, so we just look through the offsets listed in
-        * tbmres; but we have to follow any HOT chain starting at each such
-        * offset.
-        */
-       int         curslot;
-
-       for (curslot = 0; curslot < tbmres->ntuples; curslot++)
-       {
-           OffsetNumber offnum = tbmres->offsets[curslot];
-           ItemPointerData tid;
-           HeapTupleData heapTuple;
-
-           ItemPointerSet(&tid, page, offnum);
-           if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
-                                      &heapTuple, NULL, true))
-               scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
-       }
-   }
-   else
-   {
-       /*
-        * Bitmap is lossy, so we must examine each item pointer on the page.
-        * But we can ignore HOT chains, since we'll check each tuple anyway.
-        */
-       Page        dp = (Page) BufferGetPage(buffer);
-       OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
-       OffsetNumber offnum;
-
-       for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
-       {
-           ItemId      lp;
-           HeapTupleData loctup;
-           bool        valid;
-
-           lp = PageGetItemId(dp, offnum);
-           if (!ItemIdIsNormal(lp))
-               continue;
-           loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
-           loctup.t_len = ItemIdGetLength(lp);
-           loctup.t_tableOid = scan->rs_rd->rd_id;
-           ItemPointerSet(&loctup.t_self, page, offnum);
-           valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
-           if (valid)
-           {
-               scan->rs_vistuples[ntup++] = offnum;
-               PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
-           }
-           CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
-                                           buffer, snapshot);
-       }
-   }
-
-   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-
-   Assert(ntup <= MaxHeapTuplesPerPage);
-   scan->rs_ntuples = ntup;
-}
-
  /*
   * BitmapDoneInitializingSharedState - Shared state is initialized
   *
@@ -598,7 +451,7 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
   * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
   */
  static inline void
-BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
+BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
  {
  #ifdef USE_PREFETCH
     ParallelBitmapHeapState *pstate = node->pstate;
@@ -741,7 +594,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
     PlanState  *outerPlan = outerPlanState(node);
  
     /* rescan to release any page pin */
-   heap_rescan(node->ss.ss_currentScanDesc, NULL);
+   table_rescan(node->ss.ss_currentScanDesc, NULL);
  
     /* release bitmaps and buffers if any */
     if (node->tbmiterator)
@@ -785,7 +638,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
  void
  ExecEndBitmapHeapScan(BitmapHeapScanState *node)
  {
-   HeapScanDesc scanDesc;
+   TableScanDesc scanDesc;
  
     /*
      * extract information from the node
@@ -830,7 +683,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
     /*
      * close heap scan
      */
-   heap_endscan(scanDesc);
+   table_endscan(scanDesc);
  }
  
  /* ----------------------------------------------------------------
@@ -914,8 +767,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
      */
     ExecInitScanTupleSlot(estate, &scanstate->ss,
                           RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
-
+                         table_slot_callbacks(currentRelation));
  
     /*
      * Initialize result type and projection.
@@ -953,10 +805,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
      * Even though we aren't going to do a conventional seqscan, it is useful
      * to create a HeapScanDesc --- most of the fields in it are usable.
      */
-   scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
-                                                        estate->es_snapshot,
-                                                        0,
-                                                        NULL);
+   scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
+                                                         estate->es_snapshot,
+                                                         0,
+                                                         NULL);
  
     /*
      * all done.
@@ -1104,5 +956,5 @@ ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
     node->pstate = pstate;
  
     snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
-   heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
+   table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
  }
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c

index fab752058aeed8ec4844a77685f09c5a2f3a9dbc..c96db36e0dfdbf58ce95d49e13575429be7f30e2 100644 (file)
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -62,9 +62,12 @@ ForeignNext(ForeignScanState *node)
      */
     if (plan->fsSystemCol && !TupIsNull(slot))
     {
-       HeapTuple   tup = ExecFetchSlotHeapTuple(slot, true, NULL);
-
-       tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
+       ExecMaterializeSlot(slot);
+#if 0
+       ExecSlotUpdateTupleTableoid(slot,
+                                   RelationGetRelid(node->ss.ss_currentRelation));
+#endif
+       slot->tts_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
     }
  
     return slot;
diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c

index e6367ade76d9325ec33a1e0143e1b15ade54a624..1dd8bb3f3a60b7989a5671050cf8cf8a209ee555 100644 (file)
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -266,7 +266,7 @@ gather_getnext(GatherState *gatherstate)
     PlanState  *outerPlan = outerPlanState(gatherstate);
     TupleTableSlot *outerTupleSlot;
     TupleTableSlot *fslot = gatherstate->funnel_slot;
-   HeapTuple   tup;
+   HeapTuple tup;
  
     while (gatherstate->nreaders > 0 || gatherstate->need_to_scan_locally)
     {
@@ -316,7 +316,7 @@ gather_readnext(GatherState *gatherstate)
     for (;;)
     {
         TupleQueueReader *reader;
-       HeapTuple   tup;
+       HeapTuple tup;
         bool        readerdone;
  
         /* Check for async events, particularly messages from workers. */
diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c

index 51d910bd5ee0854b7d81a5806752c4e6123a9b9f..54ef0ca7b7d68418468e53807d6e5d6f19dc6911 100644 (file)
--- a/src/backend/executor/nodeGatherMerge.c
+++ b/src/backend/executor/nodeGatherMerge.c
@@ -45,7 +45,7 @@
   */
  typedef struct GMReaderTupleBuffer
  {
-   HeapTuple  *tuple;          /* array of length MAX_TUPLE_STORE */
+   HeapTuple *tuple;       /* array of length MAX_TUPLE_STORE */
     int         nTuples;        /* number of tuples currently stored */
     int         readCounter;    /* index of next tuple to extract */
     bool        done;           /* true if reader is known exhausted */
@@ -55,7 +55,7 @@ static TupleTableSlot *ExecGatherMerge(PlanState *pstate);
  static int32 heap_compare_slots(Datum a, Datum b, void *arg);
  static TupleTableSlot *gather_merge_getnext(GatherMergeState *gm_state);
  static HeapTuple gm_readnext_tuple(GatherMergeState *gm_state, int nreader,
-                 bool nowait, bool *done);
+                                     bool nowait, bool *done);
  static void ExecShutdownGatherMergeWorkers(GatherMergeState *node);
  static void gather_merge_setup(GatherMergeState *gm_state);
  static void gather_merge_init(GatherMergeState *gm_state);
@@ -637,7 +637,7 @@ static bool
  gather_merge_readnext(GatherMergeState *gm_state, int reader, bool nowait)
  {
     GMReaderTupleBuffer *tuple_buffer;
-   HeapTuple   tup;
+   HeapTuple tup;
  
     /*
      * If we're being asked to generate a tuple from the leader, then we just
@@ -716,7 +716,7 @@ gm_readnext_tuple(GatherMergeState *gm_state, int nreader, bool nowait,
                   bool *done)
  {
     TupleQueueReader *reader;
-   HeapTuple   tup;
+   HeapTuple tup;
  
     /* Check for async events, particularly messages from workers. */
     CHECK_FOR_INTERRUPTS();
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c

index 8498273207b4334b3dac10fbe15f51ab7807b2c4..c39c4f453dbaea1716d492268d551be65bfb9ddf 100644 (file)
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -31,6 +31,7 @@
  #include "postgres.h"
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/visibilitymap.h"
  #include "executor/execdebug.h"
  #include "executor/nodeIndexonlyscan.h"
@@ -117,7 +118,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
      */
     while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
     {
-       HeapTuple   tuple = NULL;
+       bool    tuple_from_heap = false;
  
         CHECK_FOR_INTERRUPTS();
  
@@ -163,17 +164,18 @@ IndexOnlyNext(IndexOnlyScanState *node)
              * Rats, we have to visit the heap to check visibility.
              */
             InstrCountTuples2(node, 1);
-           tuple = index_fetch_heap(scandesc);
-           if (tuple == NULL)
+           if (!index_fetch_heap(scandesc, slot))
                 continue;       /* no visible tuple, try next index entry */
  
+           ExecClearTuple(slot);
+
             /*
              * Only MVCC snapshots are supported here, so there should be no
              * need to keep following the HOT chain once a visible entry has
              * been found.  If we did want to allow that, we'd need to keep
              * more state to remember not to call index_getnext_tid next time.
              */
-           if (scandesc->xs_continue_hot)
+           if (scandesc->xs_heap_continue)
                 elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
  
             /*
@@ -182,13 +184,15 @@ IndexOnlyNext(IndexOnlyScanState *node)
              * but it's not clear whether it's a win to do so.  The next index
              * entry might require a visit to the same heap page.
              */
+
+           tuple_from_heap = true;
         }
  
         /*
          * Fill the scan tuple slot with data from the index.  This might be
-        * provided in either HeapTuple or IndexTuple format.  Conceivably an
-        * index AM might fill both fields, in which case we prefer the heap
-        * format, since it's probably a bit cheaper to fill a slot from.
+        * provided in either HeapTuple or IndexTuple format.  Conceivably
+        * an index AM might fill both fields, in which case we prefer the
+        * heap format, since it's probably a bit cheaper to fill a slot from.
          */
         if (scandesc->xs_hitup)
         {
@@ -200,6 +204,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
             Assert(slot->tts_tupleDescriptor->natts ==
                    scandesc->xs_hitupdesc->natts);
             ExecForceStoreHeapTuple(scandesc->xs_hitup, slot);
+           slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation);
         }
         else if (scandesc->xs_itup)
             StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
@@ -242,7 +247,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
          * anyway, then we already have the tuple-level lock and can skip the
          * page lock.
          */
-       if (tuple == NULL)
+       if (!tuple_from_heap)
             PredicateLockPage(scandesc->heapRelation,
                               ItemPointerGetBlockNumber(tid),
                               estate->es_snapshot);
@@ -424,7 +429,7 @@ ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
  {
     EState     *estate = node->ss.ps.state;
  
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
     {
         /*
          * We are inside an EvalPlanQual recheck.  If a test tuple exists for
@@ -459,7 +464,7 @@ ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
  {
     EState     *estate = node->ss.ps.state;
  
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
     {
         /* See comments in ExecIndexOnlyMarkPos */
         Index       scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
@@ -527,7 +532,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
      * suitable data anyway.)
      */
     tupDesc = ExecTypeFromTL(node->indextlist);
-   ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc, &TTSOpsHeapTuple);
+   ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
+                         table_slot_callbacks(currentRelation));
  
     /*
      * Initialize result type and projection info.  The node's targetlist will
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c

index 6b222a9f64cb863c13b12590d9ebeb1093243d98..b38dadaa9a53e5219981bf59273fa822185e4e92 100644 (file)
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -31,6 +31,7 @@
  
  #include "access/nbtree.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "catalog/pg_am.h"
  #include "executor/execdebug.h"
  #include "executor/nodeIndexscan.h"
@@ -51,7 +52,7 @@
  typedef struct
  {
     pairingheap_node ph_node;
-   HeapTuple   htup;
+   HeapTuple htup;
     Datum      *orderbyvals;
     bool       *orderbynulls;
  } ReorderTuple;
@@ -84,7 +85,6 @@ IndexNext(IndexScanState *node)
     ExprContext *econtext;
     ScanDirection direction;
     IndexScanDesc scandesc;
-   HeapTuple   tuple;
     TupleTableSlot *slot;
  
     /*
@@ -131,20 +131,10 @@ IndexNext(IndexScanState *node)
     /*
      * ok, now that we have what we need, fetch the next tuple.
      */
-   while ((tuple = index_getnext(scandesc, direction)) != NULL)
+   while (index_getnext_slot(scandesc, direction, slot))
     {
         CHECK_FOR_INTERRUPTS();
  
-       /*
-        * Store the scanned tuple in the scan tuple slot of the scan state.
-        * Note: we pass 'false' because tuples returned by amgetnext are
-        * pointers onto disk pages and must not be pfree()'d.
-        */
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->xs_cbuf);    /* buffer containing
-                                                        * tuple */
-
         /*
          * If the index was lossy, we have to recheck the index quals using
          * the fetched tuple.
@@ -184,7 +174,6 @@ IndexNextWithReorder(IndexScanState *node)
     EState     *estate;
     ExprContext *econtext;
     IndexScanDesc scandesc;
-   HeapTuple   tuple;
     TupleTableSlot *slot;
     ReorderTuple *topmost = NULL;
     bool        was_exact;
@@ -253,9 +242,12 @@ IndexNextWithReorder(IndexScanState *node)
                                 scandesc->xs_orderbynulls,
                                 node) <= 0)
             {
+               HeapTuple tuple;
+
                 tuple = reorderqueue_pop(node);
  
                 /* Pass 'true', as the tuple in the queue is a palloc'd copy */
+               slot->tts_tableOid = RelationGetRelid(scandesc->heapRelation);
                 ExecStoreHeapTuple(tuple, slot, true);
                 return slot;
             }
@@ -272,8 +264,7 @@ IndexNextWithReorder(IndexScanState *node)
          */
  next_indextuple:
         slot = node->ss.ss_ScanTupleSlot;
-       tuple = index_getnext(scandesc, ForwardScanDirection);
-       if (!tuple)
+       if (!index_getnext_slot(scandesc, ForwardScanDirection, slot))
         {
             /*
              * No more tuples from the index.  But we still need to drain any
@@ -283,14 +274,6 @@ next_indextuple:
             continue;
         }
  
-       /*
-        * Store the scanned tuple in the scan tuple slot of the scan state.
-        */
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->xs_cbuf);    /* buffer containing
-                                                        * tuple */
-
         /*
          * If the index was lossy, we have to recheck the index quals and
          * ORDER BY expressions using the fetched tuple.
@@ -358,6 +341,8 @@ next_indextuple:
                                                       topmost->orderbynulls,
                                                       node) > 0))
         {
+           HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+
             /* Put this tuple to the queue */
             reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
             continue;
@@ -515,7 +500,7 @@ reorderqueue_push(IndexScanState *node, HeapTuple tuple,
  static HeapTuple
  reorderqueue_pop(IndexScanState *node)
  {
-   HeapTuple   result;
+   HeapTuple result;
     ReorderTuple *topmost;
     int         i;
  
@@ -851,7 +836,7 @@ ExecIndexMarkPos(IndexScanState *node)
  {
     EState     *estate = node->ss.ps.state;
  
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
     {
         /*
          * We are inside an EvalPlanQual recheck.  If a test tuple exists for
@@ -886,7 +871,7 @@ ExecIndexRestrPos(IndexScanState *node)
  {
     EState     *estate = node->ss.ps.state;
  
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
     {
         /* See comments in ExecIndexMarkPos */
         Index       scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
@@ -950,7 +935,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
      */
     ExecInitScanTupleSlot(estate, &indexstate->ss,
                           RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(currentRelation));
  
     if (node->indexorderby != NIL)
         indexstate->ss.ps.scanopsfixed = false;
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c

index 7887388b9e9fb3bf2356f09904adc57e7b12f641..4451779ff8a744454fcae3671448a34f1836bad7 100644 (file)
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -22,6 +22,7 @@
  #include "postgres.h"
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "executor/executor.h"
  #include "executor/nodeLockRows.h"
@@ -66,6 +67,8 @@ lnext:
     /* We don't need EvalPlanQual unless we get updated tuple version(s) */
     epq_needed = false;
  
+   EvalPlanQualBegin(&node->lr_epqstate, estate);
+
     /*
      * Attempt to lock the source tuple(s).  (Note we only have locking
      * rowmarks in lr_arowMarks.)
@@ -74,21 +77,19 @@ lnext:
     {
         ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
         ExecRowMark *erm = aerm->rowmark;
-       HeapTuple  *testTuple;
+       TupleTableSlot *markSlot;
         Datum       datum;
         bool        isNull;
-       HeapTupleData tuple;
-       Buffer      buffer;
         HeapUpdateFailureData hufd;
         LockTupleMode lockmode;
         HTSU_Result test;
-       HeapTuple   copyTuple;
+       ItemPointerData tid;
  
         /* clear any leftover test tuple for this rel */
-       testTuple = &(node->lr_curtuples[erm->rti - 1]);
-       if (*testTuple != NULL)
-           heap_freetuple(*testTuple);
-       *testTuple = NULL;
+       // used to be: - can we skip having lr_curtuples?
+       //testSlot = node->lr_curtuples[erm->rti - 1];
+       markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
+       ExecClearTuple(markSlot);
  
         /* if child rel, must check whether it produced this row */
         if (erm->rti != erm->prti)
@@ -129,25 +130,25 @@ lnext:
             bool        updated = false;
  
             fdwroutine = GetFdwRoutineForRelation(erm->relation, false);
+
             /* this should have been checked already, but let's be safe */
             if (fdwroutine->RefetchForeignRow == NULL)
                 ereport(ERROR,
                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                          errmsg("cannot lock rows in foreign table \"%s\"",
                                 RelationGetRelationName(erm->relation))));
-           copyTuple = fdwroutine->RefetchForeignRow(estate,
-                                                     erm,
-                                                     datum,
-                                                     &updated);
-           if (copyTuple == NULL)
+
+           markSlot = fdwroutine->RefetchForeignRow(estate,
+                                                    erm,
+                                                    datum,
+                                                    markSlot,
+                                                    &updated);
+           if (markSlot == NULL)
             {
                 /* couldn't get the lock, so skip this row */
                 goto lnext;
             }
  
-           /* save locked tuple for possible EvalPlanQual testing below */
-           *testTuple = copyTuple;
-
             /*
              * if FDW says tuple was updated before getting locked, we need to
              * perform EPQ testing to see if quals are still satisfied
@@ -159,7 +160,7 @@ lnext:
         }
  
         /* okay, try to lock the tuple */
-       tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
+       tid = *((ItemPointer) DatumGetPointer(datum));
         switch (erm->markType)
         {
             case ROW_MARK_EXCLUSIVE:
@@ -180,11 +181,13 @@ lnext:
                 break;
         }
  
-       test = heap_lock_tuple(erm->relation, &tuple,
-                              estate->es_output_cid,
-                              lockmode, erm->waitPolicy, true,
-                              &buffer, &hufd);
-       ReleaseBuffer(buffer);
+       test = table_lock_tuple(erm->relation, &tid, estate->es_snapshot,
+                               markSlot, estate->es_output_cid,
+                               lockmode, erm->waitPolicy,
+                               (IsolationUsesXactSnapshot() ? 0 : TUPLE_LOCK_FLAG_FIND_LAST_VERSION)
+                               | TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS,
+                               &hufd);
+
         switch (test)
         {
             case HeapTupleWouldBlock:
@@ -211,6 +214,15 @@ lnext:
  
             case HeapTupleMayBeUpdated:
                 /* got the lock successfully */
+               if (hufd.traversed)
+               {
+                   /* locked tuple saved in markSlot for EvalPlanQual testing below */
+
+                   /* Remember we need to do EPQ testing */
+                   epq_needed = true;
+
+                   /* Continue loop until we have all target tuples */
+               }
                 break;
  
             case HeapTupleUpdated:
@@ -218,41 +230,19 @@ lnext:
                     ereport(ERROR,
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                              errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+               /* skip lock */
+               goto lnext;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
                     ereport(ERROR,
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
-
-               if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
-               {
-                   /* Tuple was deleted, so don't return it */
-                   goto lnext;
-               }
-
-               /* updated, so fetch and lock the updated version */
-               copyTuple = EvalPlanQualFetch(estate, erm->relation,
-                                             lockmode, erm->waitPolicy,
-                                             &hufd.ctid, hufd.xmax);
-
-               if (copyTuple == NULL)
-               {
-                   /*
-                    * Tuple was deleted; or it's locked and we're under SKIP
-                    * LOCKED policy, so don't return it
-                    */
-                   goto lnext;
-               }
-               /* remember the actually locked tuple's TID */
-               tuple.t_self = copyTuple->t_self;
-
-               /* Save locked tuple for EvalPlanQual testing below */
-               *testTuple = copyTuple;
-
-               /* Remember we need to do EPQ testing */
-               epq_needed = true;
-
-               /* Continue loop until we have all target tuples */
-               break;
+                            errmsg("could not serialize access due to concurrent update")));
+               /*
+                * Tuple was deleted; or it's locked and we're under SKIP
+                * LOCKED policy, so don't return it
+                */
+               goto lnext;
  
             case HeapTupleInvisible:
                 elog(ERROR, "attempted to lock invisible tuple");
@@ -264,7 +254,7 @@ lnext:
         }
  
         /* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */
-       erm->curCtid = tuple.t_self;
+       erm->curCtid = tid;
     }
  
     /*
@@ -273,7 +263,7 @@ lnext:
     if (epq_needed)
     {
         /* Initialize EPQ machinery */
-       EvalPlanQualBegin(&node->lr_epqstate, estate);
+       //EvalPlanQualBegin(&node->lr_epqstate, estate);
  
         /*
          * Transfer any already-fetched tuples into the EPQ state, and fetch a
@@ -286,26 +276,31 @@ lnext:
         {
             ExecAuxRowMark *aerm = (ExecAuxRowMark *) lfirst(lc);
             ExecRowMark *erm = aerm->rowmark;
-           HeapTupleData tuple;
-           Buffer      buffer;
+           TupleTableSlot *markSlot;
+
+           markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti);
  
             /* skip non-active child tables, but clear their test tuples */
             if (!erm->ermActive)
             {
                 Assert(erm->rti != erm->prti);  /* check it's child table */
-               EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, NULL);
+               ExecClearTuple(markSlot);
                 continue;
             }
  
             /* was tuple updated and fetched above? */
-           if (node->lr_curtuples[erm->rti - 1] != NULL)
+           //node->lr_curtuples[erm->rti - 1] != NULL
+           if (!TupIsNull(markSlot))
             {
+//             elog(ERROR, "frak");
+#if FIXME
                 /* yes, so set it as the EPQ test tuple for this rel */
                 EvalPlanQualSetTuple(&node->lr_epqstate,
                                      erm->rti,
                                      node->lr_curtuples[erm->rti - 1]);
                 /* freeing this tuple is now the responsibility of EPQ */
                 node->lr_curtuples[erm->rti - 1] = NULL;
+#endif
                 continue;
             }
  
@@ -314,15 +309,13 @@ lnext:
             Assert(ItemPointerIsValid(&(erm->curCtid)));
  
             /* okay, fetch the tuple */
-           tuple.t_self = erm->curCtid;
-           if (!heap_fetch(erm->relation, SnapshotAny, &tuple, &buffer,
-                           false, NULL))
+           if (!table_fetch_row_version(erm->relation, &erm->curCtid, SnapshotAny, markSlot,
+                              NULL))
                 elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck");
  
             /* successful, copy and store tuple */
-           EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti,
-                                heap_copytuple(&tuple));
-           ReleaseBuffer(buffer);
+           //EvalPlanQualSetTuple(&node->lr_epqstate, erm->rti, tuple);
+           // ReleaseBuffer(buffer);
         }
  
         /*
@@ -405,8 +398,8 @@ ExecInitLockRows(LockRows *node, EState *estate, int eflags)
      * Create workspace in which we can remember per-RTE locked tuples
      */
     lrstate->lr_ntables = estate->es_range_table_size;
-   lrstate->lr_curtuples = (HeapTuple *)
-       palloc0(lrstate->lr_ntables * sizeof(HeapTuple));
+   lrstate->lr_curtuples = (TupleTableSlot **)
+       palloc0(lrstate->lr_ntables * sizeof(TupleTableSlot *));
  
     /*
      * Locate the ExecRowMark(s) that this node is responsible for, and
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c

index 3aa0d4d0eb748528c506f1e26153cb03cbc80eca..d1ac9fc2e9c99595456ba94751d898be1ac5ce91 100644 (file)
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -38,8 +38,10 @@
  #include "postgres.h"
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "catalog/catalog.h"
+#include "catalog/pg_am.h"
  #include "commands/trigger.h"
  #include "executor/execPartition.h"
  #include "executor/executor.h"
@@ -168,15 +170,12 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
         econtext->ecxt_scantuple = tupleSlot;
     else
     {
-       HeapTuple   tuple;
-
         /*
          * RETURNING expressions might reference the tableoid column, so
          * initialize t_tableOid before evaluating them.
          */
         Assert(!TupIsNull(econtext->ecxt_scantuple));
-       tuple = ExecFetchSlotHeapTuple(econtext->ecxt_scantuple, true, NULL);
-       tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+       econtext->ecxt_scantuple->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
     }
     econtext->ecxt_outertuple = planSlot;
  
@@ -194,31 +193,33 @@ ExecProcessReturning(ResultRelInfo *resultRelInfo,
   */
  static void
  ExecCheckHeapTupleVisible(EState *estate,
-                         HeapTuple tuple,
-                         Buffer buffer)
+                         Relation rel,
+                         TupleTableSlot *slot)
  {
     if (!IsolationUsesXactSnapshot())
         return;
  
-   /*
-    * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
-    * Caller should be holding pin, but not lock.
-    */
-   LockBuffer(buffer, BUFFER_LOCK_SHARE);
-   if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
+   if (!table_satisfies_snapshot(rel, slot, estate->es_snapshot))
     {
+       Datum       xminDatum;
+       TransactionId xmin;
+       bool        isnull;
+
+       xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
+       Assert(!isnull);
+       xmin = DatumGetTransactionId(xminDatum);
+
         /*
          * We should not raise a serialization failure if the conflict is
          * against a tuple inserted by our own transaction, even if it's not
          * visible to our snapshot.  (This would happen, for example, if
          * conflicting keys are proposed for insertion in a single command.)
          */
-       if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
+       if (!TransactionIdIsCurrentTransactionId(xmin))
             ereport(ERROR,
                     (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                      errmsg("could not serialize access due to concurrent update")));
     }
-   LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
  }
  
  /*
@@ -227,21 +228,19 @@ ExecCheckHeapTupleVisible(EState *estate,
  static void
  ExecCheckTIDVisible(EState *estate,
                     ResultRelInfo *relinfo,
-                   ItemPointer tid)
+                   ItemPointer tid,
+                   TupleTableSlot *tempSlot)
  {
     Relation    rel = relinfo->ri_RelationDesc;
-   Buffer      buffer;
-   HeapTupleData tuple;
  
     /* Redundantly check isolation level */
     if (!IsolationUsesXactSnapshot())
         return;
  
-   tuple.t_self = *tid;
-   if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
+   if (!table_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL))
         elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
-   ExecCheckHeapTupleVisible(estate, &tuple, buffer);
-   ReleaseBuffer(buffer);
+   ExecCheckHeapTupleVisible(estate, rel, tempSlot);
+   ExecClearTuple(tempSlot);
  }
  
  /* ----------------------------------------------------------------
@@ -260,7 +259,6 @@ ExecInsert(ModifyTableState *mtstate,
            EState *estate,
            bool canSetTag)
  {
-   HeapTuple   tuple;
     ResultRelInfo *resultRelInfo;
     Relation    resultRelationDesc;
     List       *recheckIndexes = NIL;
@@ -269,11 +267,7 @@ ExecInsert(ModifyTableState *mtstate,
     ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
     OnConflictAction onconflict = node->onConflictAction;
  
-   /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
-    */
-   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   ExecMaterializeSlot(slot);
  
     /*
      * get information on the (current) result relation
@@ -293,26 +287,16 @@ ExecInsert(ModifyTableState *mtstate,
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_insert_before_row)
     {
-       slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
+           return NULL;        /* "do nothing" */
     }
  
     /* INSTEAD OF ROW INSERT Triggers */
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
     {
-       slot = ExecIRInsertTriggers(estate, resultRelInfo, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
+           return NULL;        /* "do nothing" */
     }
     else if (resultRelInfo->ri_FdwRoutine)
     {
@@ -327,14 +311,11 @@ ExecInsert(ModifyTableState *mtstate,
         if (slot == NULL)       /* "do nothing" */
             return NULL;
  
-       /* FDW might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
         /*
          * AFTER ROW Triggers or RETURNING expressions might reference the
          * tableoid column, so initialize t_tableOid before evaluating them.
          */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
     }
     else
     {
@@ -344,7 +325,7 @@ ExecInsert(ModifyTableState *mtstate,
          * Constraints might reference the tableoid column, so initialize
          * t_tableOid before evaluating them.
          */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
  
         /*
          * Check any RLS WITH CHECK policies.
@@ -436,9 +417,11 @@ ExecInsert(ModifyTableState *mtstate,
                      * In case of ON CONFLICT DO NOTHING, do nothing. However,
                      * verify that the tuple is visible to the executor's MVCC
                      * snapshot at higher isolation levels.
+                    *
+                    * Can reuse the input slot here (XXX).
                      */
                     Assert(onconflict == ONCONFLICT_NOTHING);
-                   ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
+                   ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid, slot);
                     InstrCountTuples2(&mtstate->ps, 1);
                     return NULL;
                 }
@@ -451,24 +434,22 @@ ExecInsert(ModifyTableState *mtstate,
              * waiting for the whole transaction to complete.
              */
             specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
-           HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
  
             /* insert the tuple, with the speculative token */
-           heap_insert(resultRelationDesc, tuple,
-                       estate->es_output_cid,
-                       HEAP_INSERT_SPECULATIVE,
-                       NULL);
+           table_insert_speculative(resultRelationDesc, slot,
+                                    estate->es_output_cid,
+                                    HEAP_INSERT_SPECULATIVE,
+                                    NULL,
+                                    specToken);
  
             /* insert index entries for tuple */
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+           recheckIndexes = ExecInsertIndexTuples(slot,
                                                    estate, true, &specConflict,
                                                    arbiterIndexes);
  
             /* adjust the tuple's state accordingly */
-           if (!specConflict)
-               heap_finish_speculative(resultRelationDesc, tuple);
-           else
-               heap_abort_speculative(resultRelationDesc, tuple);
+           table_complete_speculative(resultRelationDesc, slot,
+                                      specToken, specConflict);
  
             /*
              * Wake up anyone waiting for our decision.  They will re-check
@@ -496,26 +477,23 @@ ExecInsert(ModifyTableState *mtstate,
         {
             /*
              * insert the tuple normally.
-            *
-            * Note: heap_insert returns the tid (location) of the new tuple
-            * in the t_self field.
              */
-           heap_insert(resultRelationDesc, tuple,
-                       estate->es_output_cid,
-                       0, NULL);
+           table_insert(resultRelationDesc, slot,
+                        estate->es_output_cid,
+                        0, NULL);
  
             /* insert index entries for tuple */
             if (resultRelInfo->ri_NumIndices > 0)
-               recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                      estate, false, NULL,
+               recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
                                                        NIL);
+
         }
     }
  
     if (canSetTag)
     {
         (estate->es_processed)++;
-       setLastTid(&(tuple->t_self));
+       setLastTid(&(slot->tts_tid));
     }
  
     /*
@@ -530,7 +508,7 @@ ExecInsert(ModifyTableState *mtstate,
     {
         ExecARUpdateTriggers(estate, resultRelInfo, NULL,
                              NULL,
-                            tuple,
+                            slot,
                              NULL,
                              mtstate->mt_transition_capture);
  
@@ -542,7 +520,7 @@ ExecInsert(ModifyTableState *mtstate,
     }
  
     /* AFTER ROW INSERT Triggers */
-   ExecARInsertTriggers(estate, resultRelInfo, tuple, recheckIndexes,
+   ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
                          ar_insert_trig_tcs);
  
     list_free(recheckIndexes);
@@ -602,7 +580,7 @@ ExecDelete(ModifyTableState *mtstate,
            bool canSetTag,
            bool changingPart,
            bool *tupleDeleted,
-          TupleTableSlot **epqslot)
+          TupleTableSlot **epqreturnslot)
  {
     ResultRelInfo *resultRelInfo;
     Relation    resultRelationDesc;
@@ -627,7 +605,7 @@ ExecDelete(ModifyTableState *mtstate,
         bool        dodelete;
  
         dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-                                       tupleid, oldtuple, epqslot);
+                                       tupleid, oldtuple, epqreturnslot);
  
         if (!dodelete)          /* "do nothing" */
             return NULL;
@@ -647,8 +625,6 @@ ExecDelete(ModifyTableState *mtstate,
     }
     else if (resultRelInfo->ri_FdwRoutine)
     {
-       HeapTuple   tuple;
-
         /*
          * delete from foreign table: let the FDW do it
          *
@@ -656,10 +632,7 @@ ExecDelete(ModifyTableState *mtstate,
          * although the FDW can return some other slot if it wants.  Set up
          * the slot's tupdesc so the FDW doesn't need to do that for itself.
          */
-       slot = estate->es_trig_tuple_slot;
-       if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
-           ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
-
+       slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc);
         slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
                                                                resultRelInfo,
                                                                slot,
@@ -674,8 +647,9 @@ ExecDelete(ModifyTableState *mtstate,
          */
         if (TTS_EMPTY(slot))
             ExecStoreAllNullTuple(slot);
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+
+       ExecMaterializeSlot(slot);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
     }
     else
     {
@@ -689,12 +663,58 @@ ExecDelete(ModifyTableState *mtstate,
          * mode transactions.
          */
  ldelete:;
-       result = heap_delete(resultRelationDesc, tupleid,
+       result = table_delete(resultRelationDesc, tupleid,
                              estate->es_output_cid,
+                            estate->es_snapshot,
                              estate->es_crosscheck_snapshot,
                              true /* wait for commit */ ,
                              &hufd,
                              changingPart);
+
+       if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot())
+       {
+           EvalPlanQualBegin(epqstate, estate);
+           slot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex);
+
+           result = table_lock_tuple(resultRelationDesc, tupleid,
+                                     estate->es_snapshot,
+                                     slot, estate->es_output_cid,
+                                     LockTupleExclusive, LockWaitBlock,
+                                     TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                                     &hufd);
+           /*hari FIXME*/
+           /*Assert(result != HeapTupleUpdated && hufd.traversed);*/
+           if (result == HeapTupleMayBeUpdated)
+           {
+               TupleTableSlot *epqslot;
+
+               epqslot = EvalPlanQual(estate,
+                                      epqstate,
+                                      resultRelationDesc,
+                                      resultRelInfo->ri_RangeTableIndex,
+                                      slot);
+               if (TupIsNull(epqslot))
+               {
+                   /* Tuple no more passing quals, exiting... */
+                   return NULL;
+               }
+
+               /**/
+               if (epqreturnslot)
+               {
+                   *epqreturnslot = epqslot;
+                   return NULL;
+               }
+
+               goto ldelete;
+           }
+           else if (result == HeapTupleInvisible)
+           {
+               /* tuple is not visible; nothing to do */
+               return NULL;
+           }
+       }
+
         switch (result)
         {
             case HeapTupleSelfUpdated:
@@ -740,39 +760,16 @@ ldelete:;
                     ereport(ERROR,
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
                              errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
+               else
+                   /* shouldn't get there */
+                   elog(ERROR, "wrong heap_delete status: %u", result);
+               break;
+
+           case HeapTupleDeleted:
+               if (IsolationUsesXactSnapshot())
                     ereport(ERROR,
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be deleted was already moved to another partition due to concurrent update")));
-
-               if (!ItemPointerEquals(tupleid, &hufd.ctid))
-               {
-                   TupleTableSlot *my_epqslot;
-
-                   my_epqslot = EvalPlanQual(estate,
-                                             epqstate,
-                                             resultRelationDesc,
-                                             resultRelInfo->ri_RangeTableIndex,
-                                             LockTupleExclusive,
-                                             &hufd.ctid,
-                                             hufd.xmax);
-                   if (!TupIsNull(my_epqslot))
-                   {
-                       *tupleid = hufd.ctid;
-
-                       /*
-                        * If requested, skip delete and pass back the updated
-                        * row.
-                        */
-                       if (epqslot)
-                       {
-                           *epqslot = my_epqslot;
-                           return NULL;
-                       }
-                       else
-                           goto ldelete;
-                   }
-               }
+                            errmsg("could not serialize access due to concurrent delete")));
                 /* tuple already deleted; nothing to do */
                 return NULL;
  
@@ -834,36 +831,31 @@ ldelete:;
          * gotta fetch it.  We can use the trigger tuple slot.
          */
         TupleTableSlot *rslot;
-       HeapTupleData deltuple;
-       Buffer      delbuffer;
  
         if (resultRelInfo->ri_FdwRoutine)
         {
             /* FDW must have provided a slot containing the deleted row */
             Assert(!TupIsNull(slot));
-           delbuffer = InvalidBuffer;
         }
         else
         {
-           slot = estate->es_trig_tuple_slot;
+           slot = ExecTriggerGetReturnSlot(estate, resultRelationDesc);
             if (oldtuple != NULL)
             {
-               deltuple = *oldtuple;
-               delbuffer = InvalidBuffer;
+               ExecForceStoreHeapTuple(oldtuple, slot);
             }
             else
             {
-               deltuple.t_self = *tupleid;
-               if (!heap_fetch(resultRelationDesc, SnapshotAny,
-                               &deltuple, &delbuffer, false, NULL))
+               if (!table_fetch_row_version(resultRelationDesc, tupleid, SnapshotAny,
+                                slot, NULL))
                     elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
             }
-
-           if (slot->tts_tupleDescriptor != RelationGetDescr(resultRelationDesc))
-               ExecSetSlotDescriptor(slot, RelationGetDescr(resultRelationDesc));
-           ExecStoreHeapTuple(&deltuple, slot, false);
         }
  
+       // FIXME: centralize
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
+       planSlot->tts_tableOid = RelationGetRelid(resultRelationDesc);
+
         rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
  
         /*
@@ -873,8 +865,6 @@ ldelete:;
         ExecMaterializeSlot(rslot);
  
         ExecClearTuple(slot);
-       if (BufferIsValid(delbuffer))
-           ReleaseBuffer(delbuffer);
  
         return rslot;
     }
@@ -914,7 +904,6 @@ ExecUpdate(ModifyTableState *mtstate,
            EState *estate,
            bool canSetTag)
  {
-   HeapTuple   tuple;
     ResultRelInfo *resultRelInfo;
     Relation    resultRelationDesc;
     HTSU_Result result;
@@ -928,11 +917,7 @@ ExecUpdate(ModifyTableState *mtstate,
     if (IsBootstrapProcessingMode())
         elog(ERROR, "cannot UPDATE during bootstrap");
  
-   /*
-    * get the heap tuple out of the tuple table slot, making sure we have a
-    * writable copy
-    */
-   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+   ExecMaterializeSlot(slot);
  
     /*
      * get information on the (current) result relation
@@ -944,28 +929,18 @@ ExecUpdate(ModifyTableState *mtstate,
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_update_before_row)
     {
-       slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-                                   tupleid, oldtuple, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+                                 tupleid, oldtuple, slot))
+           return NULL;        /* "do nothing" */
     }
  
     /* INSTEAD OF ROW UPDATE Triggers */
     if (resultRelInfo->ri_TrigDesc &&
         resultRelInfo->ri_TrigDesc->trig_update_instead_row)
     {
-       slot = ExecIRUpdateTriggers(estate, resultRelInfo,
-                                   oldtuple, slot);
-
-       if (slot == NULL)       /* "do nothing" */
-           return NULL;
-
-       /* trigger might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
+       if (!ExecIRUpdateTriggers(estate, resultRelInfo,
+                                 oldtuple, slot))
+           return NULL;        /* "do nothing" */
     }
     else if (resultRelInfo->ri_FdwRoutine)
     {
@@ -980,25 +955,23 @@ ExecUpdate(ModifyTableState *mtstate,
         if (slot == NULL)       /* "do nothing" */
             return NULL;
  
-       /* FDW might have changed tuple */
-       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-
         /*
          * AFTER ROW Triggers or RETURNING expressions might reference the
          * tableoid column, so initialize t_tableOid before evaluating them.
          */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
     }
     else
     {
         LockTupleMode lockmode;
         bool        partition_constraint_failed;
+       bool        update_indexes;
  
         /*
          * Constraints might reference the tableoid column, so initialize
          * t_tableOid before evaluating them.
          */
-       tuple->t_tableOid = RelationGetRelid(resultRelationDesc);
+       slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
  
         /*
          * Check any RLS UPDATE WITH CHECK policies
@@ -1011,6 +984,9 @@ ExecUpdate(ModifyTableState *mtstate,
          */
  lreplace:;
  
+       /* ensure slot is independent, consider e.g. EPQ */
+       ExecMaterializeSlot(slot);
+
         /*
          * If partition constraint fails, this row might get moved to another
          * partition, in which case we should check the RLS CHECK policy just
@@ -1108,7 +1084,6 @@ lreplace:;
                 else
                 {
                     slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
-                   tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
                     goto lreplace;
                 }
             }
@@ -1179,11 +1154,54 @@ lreplace:;
          * needed for referential integrity updates in transaction-snapshot
          * mode transactions.
          */
-       result = heap_update(resultRelationDesc, tupleid, tuple,
-                            estate->es_output_cid,
-                            estate->es_crosscheck_snapshot,
-                            true /* wait for commit */ ,
-                            &hufd, &lockmode);
+       result = table_update(resultRelationDesc, tupleid, slot,
+                             estate->es_output_cid,
+                             estate->es_snapshot,
+                             estate->es_crosscheck_snapshot,
+                             true /* wait for commit */,
+                             &hufd, &lockmode, &update_indexes);
+
+       if (result == HeapTupleUpdated && !IsolationUsesXactSnapshot())
+       {
+           TupleTableSlot *inputslot;
+
+           EvalPlanQualBegin(epqstate, estate);
+
+           inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex);
+           ExecCopySlot(inputslot, slot);
+
+           result = table_lock_tuple(resultRelationDesc, tupleid,
+                                     estate->es_snapshot,
+                                     inputslot, estate->es_output_cid,
+                                     lockmode, LockWaitBlock,
+                                     TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
+                                     &hufd);
+           /* hari FIXME*/
+           /*Assert(result != HeapTupleUpdated && hufd.traversed);*/
+           if (result == HeapTupleMayBeUpdated)
+           {
+               TupleTableSlot *epqslot;
+
+               epqslot = EvalPlanQual(estate,
+                                      epqstate,
+                                      resultRelationDesc,
+                                      resultRelInfo->ri_RangeTableIndex,
+                                      inputslot);
+               if (TupIsNull(epqslot))
+               {
+                   /* Tuple no more passing quals, exiting... */
+                   return NULL;
+               }
+               slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
+               goto lreplace;
+           }
+           else if (result == HeapTupleInvisible)
+           {
+               /* tuple is not visible; nothing to do */
+               return NULL;
+           }
+       }
+
         switch (result)
         {
             case HeapTupleSelfUpdated:
@@ -1224,34 +1242,21 @@ lreplace:;
                 break;
  
             case HeapTupleUpdated:
+               /*
+                * The lower level isolation case for HeapTupleUpdated is
+                * handled above.
+                */
+               Assert(IsolationUsesXactSnapshot());
+               ereport(ERROR,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("could not serialize access due to concurrent update")));
+               break;
+
+           case HeapTupleDeleted:
                 if (IsolationUsesXactSnapshot())
                     ereport(ERROR,
                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("could not serialize access due to concurrent update")));
-               if (ItemPointerIndicatesMovedPartitions(&hufd.ctid))
-                   ereport(ERROR,
-                           (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                            errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
-
-               if (!ItemPointerEquals(tupleid, &hufd.ctid))
-               {
-                   TupleTableSlot *epqslot;
-
-                   epqslot = EvalPlanQual(estate,
-                                          epqstate,
-                                          resultRelationDesc,
-                                          resultRelInfo->ri_RangeTableIndex,
-                                          lockmode,
-                                          &hufd.ctid,
-                                          hufd.xmax);
-                   if (!TupIsNull(epqslot))
-                   {
-                       *tupleid = hufd.ctid;
-                       slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
-                       tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
-                       goto lreplace;
-                   }
-               }
+                            errmsg("could not serialize access due to concurrent delete")));
                 /* tuple already deleted; nothing to do */
                 return NULL;
  
@@ -1260,6 +1265,7 @@ lreplace:;
                 return NULL;
         }
  
+
         /*
          * Note: instead of having to update the old index tuples associated
          * with the heap tuple, all we do is form and insert new index tuples.
@@ -1272,20 +1278,19 @@ lreplace:;
          * insert index entries for tuple
          *
          * Note: heap_update returns the tid (location) of the new tuple in
-        * the t_self field.
+        * the t_self field.  FIXME
          *
          * If it's a HOT update, we mustn't insert new index entries.
          */
-       if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
-           recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
-                                                  estate, false, NULL, NIL);
+       if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
+           recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
     }
  
     if (canSetTag)
         (estate->es_processed)++;
  
     /* AFTER ROW UPDATE Triggers */
-   ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple,
+   ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
                          recheckIndexes,
                          mtstate->operation == CMD_INSERT ?
                          mtstate->mt_oc_transition_capture :
@@ -1336,11 +1341,12 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
     ExprContext *econtext = mtstate->ps.ps_ExprContext;
     Relation    relation = resultRelInfo->ri_RelationDesc;
     ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
-   HeapTupleData tuple;
     HeapUpdateFailureData hufd;
     LockTupleMode lockmode;
     HTSU_Result test;
-   Buffer      buffer;
+   Datum       xminDatum;
+   TransactionId xmin;
+   bool        isnull;
  
     /* Determine lock mode to use */
     lockmode = ExecUpdateLockMode(estate, resultRelInfo);
@@ -1351,10 +1357,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
      * previous conclusion that the tuple is conclusively committed is not
      * true anymore.
      */
-   tuple.t_self = *conflictTid;
-   test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
-                          lockmode, LockWaitBlock, false, &buffer,
-                          &hufd);
+   test = table_lock_tuple(relation, conflictTid,
+                           estate->es_snapshot,
+                           mtstate->mt_existing, estate->es_output_cid,
+                           lockmode, LockWaitBlock, TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS,
+                           &hufd);
     switch (test)
     {
         case HeapTupleMayBeUpdated:
@@ -1379,7 +1386,13 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
              * that for SQL MERGE, an exception must be raised in the event of
              * an attempt to update the same row twice.
              */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+           xminDatum = slot_getsysattr(mtstate->mt_existing,
+                                       MinTransactionIdAttributeNumber,
+                                       &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+
+           if (TransactionIdIsCurrentTransactionId(xmin))
                 ereport(ERROR,
                         (errcode(ERRCODE_CARDINALITY_VIOLATION),
                          errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
@@ -1420,7 +1433,16 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
              * loop here, as the new version of the row might not conflict
              * anymore, or the conflicting tuple has actually been deleted.
              */
-           ReleaseBuffer(buffer);
+           ExecClearTuple(mtstate->mt_existing);
+           return false;
+
+       case HeapTupleDeleted:
+           if (IsolationUsesXactSnapshot())
+               ereport(ERROR,
+                       (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                        errmsg("could not serialize access due to concurrent delete")));
+
+           ExecClearTuple(mtstate->mt_existing);
             return false;
  
         default:
@@ -1442,10 +1464,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
      * snapshot.  This is in line with the way UPDATE deals with newer tuple
      * versions.
      */
-   ExecCheckHeapTupleVisible(estate, &tuple, buffer);
-
-   /* Store target's existing tuple in the state's dedicated slot */
-   ExecStoreBufferHeapTuple(&tuple, mtstate->mt_existing, buffer);
+   ExecCheckHeapTupleVisible(estate, relation, mtstate->mt_existing);
  
     /*
      * Make tuple and any needed join variables available to ExecQual and
@@ -1460,7 +1479,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
  
     if (!ExecQual(onConflictSetWhere, econtext))
     {
-       ReleaseBuffer(buffer);
+       ExecClearTuple(mtstate->mt_existing);
         InstrCountFiltered1(&mtstate->ps, 1);
         return true;            /* done with the tuple */
     }
@@ -1500,12 +1519,11 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
      */
  
     /* Execute UPDATE with projection */
-   *returning = ExecUpdate(mtstate, &tuple.t_self, NULL,
+   *returning = ExecUpdate(mtstate, conflictTid, NULL,
                             mtstate->mt_conflproj, planSlot,
                             &mtstate->mt_epqstate, mtstate->ps.state,
                             canSetTag);
-
-   ReleaseBuffer(buffer);
+   ExecClearTuple(mtstate->mt_existing);
     return true;
  }
  
@@ -1688,6 +1706,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
     estate->es_result_relation_info = partrel;
  
     /* Get the heap tuple out of the given slot. */
+   // PBORKED: this'll leak memory for some slot types
     tuple = ExecFetchSlotHeapTuple(slot, true, NULL);
  
     /*
@@ -1826,7 +1845,7 @@ ExecModifyTable(PlanState *pstate)
     ItemPointer tupleid;
     ItemPointerData tuple_ctid;
     HeapTupleData oldtupdata;
-   HeapTuple   oldtuple;
+   HeapTuple oldtuple;
  
     CHECK_FOR_INTERRUPTS();
  
@@ -1839,7 +1858,7 @@ ExecModifyTable(PlanState *pstate)
      * case it is within a CTE subplan.  Hence this test must be here, not in
      * ExecInitModifyTable.)
      */
-   if (estate->es_epqTuple != NULL)
+   if (estate->es_epqTupleSlot != NULL)
         elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
  
     /*
@@ -1982,7 +2001,7 @@ ExecModifyTable(PlanState *pstate)
                                                  &isNull);
                     /* shouldn't ever get a null result... */
                     if (isNull)
-                       elog(ERROR, "ctid is NULL");
+                       elog(PANIC, "ctid is NULL");
  
                     tupleid = (ItemPointer) DatumGetPointer(datum);
                     tuple_ctid = *tupleid;  /* be sure we don't free ctid!! */
@@ -2123,6 +2142,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
     mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
     mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
  
+   mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
+
     /* If modifying a partitioned table, initialize the root table info */
     if (node->rootResultRelIndex >= 0)
         mtstate->rootResultRelInfo = estate->es_root_result_relations +
@@ -2190,7 +2211,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
         mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
         mtstate->mt_scans[i] =
             ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
-                                  &TTSOpsHeapTuple);
+                                  table_slot_callbacks(resultRelInfo->ri_RelationDesc));
  
         /* Also let FDWs init themselves for foreign-table result rels */
         if (!resultRelInfo->ri_usesFdwDirectModify &&
@@ -2250,8 +2271,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
     if (update_tuple_routing_needed)
     {
         ExecSetupChildParentMapForSubplan(mtstate);
-       mtstate->mt_root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel),
-                                                        &TTSOpsHeapTuple);
+       mtstate->mt_root_tuple_slot = table_gimmegimmeslot(rel, NULL);
     }
  
     /*
@@ -2344,6 +2364,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
         ExprContext *econtext;
         TupleDesc   relationDesc;
         TupleDesc   tupDesc;
+       const TupleTableSlotOps *tts_cb;
  
         /* insert may only have one plan, inheritance is not expanded */
         Assert(nplans == 1);
@@ -2354,6 +2375,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
  
         econtext = mtstate->ps.ps_ExprContext;
         relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
+       tts_cb = table_slot_callbacks(resultRelInfo->ri_RelationDesc);
  
         /*
          * Initialize slot for the existing tuple.  If we'll be performing
@@ -2364,7 +2386,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
         mtstate->mt_existing =
             ExecInitExtraTupleSlot(mtstate->ps.state,
                                    mtstate->mt_partition_tuple_routing ?
-                                  NULL : relationDesc, &TTSOpsBufferHeapTuple);
+                                  NULL : relationDesc, tts_cb);
  
         /* carried forward solely for the benefit of explain */
         mtstate->mt_excludedtlist = node->exclRelTlist;
@@ -2385,7 +2407,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
         mtstate->mt_conflproj =
             ExecInitExtraTupleSlot(mtstate->ps.state,
                                    mtstate->mt_partition_tuple_routing ?
-                                  NULL : tupDesc, &TTSOpsHeapTuple);
+                                  NULL : tupDesc, tts_cb);
         resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc;
  
         /* build UPDATE SET projection state */
@@ -2488,15 +2510,18 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
             for (i = 0; i < nplans; i++)
             {
                 JunkFilter *j;
+               TupleTableSlot *junkresslot;
  
                 subplan = mtstate->mt_plans[i]->plan;
                 if (operation == CMD_INSERT || operation == CMD_UPDATE)
                     ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
                                         subplan->targetlist);
  
+               junkresslot =
+                   ExecInitExtraTupleSlot(estate, NULL,
+                                          table_slot_callbacks(resultRelInfo->ri_RelationDesc));
                 j = ExecInitJunkFilter(subplan->targetlist,
-                                      ExecInitExtraTupleSlot(estate, NULL,
-                                                             &TTSOpsHeapTuple));
+                                      junkresslot);
  
                 if (operation == CMD_UPDATE || operation == CMD_DELETE)
                 {
@@ -2540,16 +2565,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
         }
     }
  
-   /*
-    * Set up a tuple table slot for use for trigger output tuples. In a plan
-    * containing multiple ModifyTable nodes, all can share one such slot, so
-    * we keep it in the estate. The tuple being inserted doesn't come from a
-    * buffer.
-    */
-   if (estate->es_trig_tuple_slot == NULL)
-       estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                           &TTSOpsHeapTuple);
-
     /*
      * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
      * to estate->es_auxmodifytables so that it will be run to completion by
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c

index 78735fa15bc31ddf094390d9f260a85bca723b9e..5351ec347ff6f51bb8194a578bfcf8844fcc71e6 100644 (file)
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -16,6 +16,7 @@
  
  #include "access/hash.h"
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "access/tsmapi.h"
  #include "executor/executor.h"
  #include "executor/nodeSamplescan.h"
@@ -28,9 +29,7 @@
  
  static TupleTableSlot *SampleNext(SampleScanState *node);
  static void tablesample_init(SampleScanState *scanstate);
-static HeapTuple tablesample_getnext(SampleScanState *scanstate);
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
-                  HeapScanDesc scan);
+static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
  
  /* ----------------------------------------------------------------
   *                     Scan Support
@@ -46,9 +45,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
  static TupleTableSlot *
  SampleNext(SampleScanState *node)
  {
-   HeapTuple   tuple;
-   TupleTableSlot *slot;
-
     /*
      * if this is first call within a scan, initialize
      */
@@ -58,18 +54,7 @@ SampleNext(SampleScanState *node)
     /*
      * get the next tuple, and store it in our result slot
      */
-   tuple = tablesample_getnext(node);
-
-   slot = node->ss.ss_ScanTupleSlot;
-
-   if (tuple)
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                node->ss.ss_currentScanDesc->rs_cbuf); /* tuple's buffer */
-   else
-       ExecClearTuple(slot);
-
-   return slot;
+   return tablesample_getnext(node);
  }
  
  /*
@@ -147,7 +132,7 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
     /* and create slot with appropriate rowtype */
     ExecInitScanTupleSlot(estate, &scanstate->ss,
                           RelationGetDescr(scanstate->ss.ss_currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(scanstate->ss.ss_currentRelation));
  
     /*
      * Initialize result type and projection.
@@ -219,7 +204,7 @@ ExecEndSampleScan(SampleScanState *node)
      * close heap scan
      */
     if (node->ss.ss_currentScanDesc)
-       heap_endscan(node->ss.ss_currentScanDesc);
+       table_endscan(node->ss.ss_currentScanDesc);
  }
  
  /* ----------------------------------------------------------------
@@ -234,6 +219,9 @@ ExecReScanSampleScan(SampleScanState *node)
  {
     /* Remember we need to do BeginSampleScan again (if we did it at all) */
     node->begun = false;
+   node->done = false;
+   node->haveblock = false;
+   node->donetuples = 0;
  
     ExecScanReScan(&node->ss);
  }
@@ -255,6 +243,7 @@ tablesample_init(SampleScanState *scanstate)
     int         i;
     ListCell   *arg;
  
+   scanstate->donetuples = 0;
     params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
  
     i = 0;
@@ -319,19 +308,19 @@ tablesample_init(SampleScanState *scanstate)
     if (scanstate->ss.ss_currentScanDesc == NULL)
     {
         scanstate->ss.ss_currentScanDesc =
-           heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
-                                   scanstate->ss.ps.state->es_snapshot,
-                                   0, NULL,
-                                   scanstate->use_bulkread,
-                                   allow_sync,
-                                   scanstate->use_pagemode);
+           table_beginscan_sampling(scanstate->ss.ss_currentRelation,
+                                      scanstate->ss.ps.state->es_snapshot,
+                                      0, NULL,
+                                      scanstate->use_bulkread,
+                                      allow_sync,
+                                      scanstate->use_pagemode);
     }
     else
     {
-       heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
-                              scanstate->use_bulkread,
-                              allow_sync,
-                              scanstate->use_pagemode);
+       table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
+                                 scanstate->use_bulkread,
+                                 allow_sync,
+                                 scanstate->use_pagemode);
     }
  
     pfree(params);
@@ -342,224 +331,49 @@ tablesample_init(SampleScanState *scanstate)
  
  /*
   * Get next tuple from TABLESAMPLE method.
- *
- * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
- * perhaps be better to refactor to share more code.
   */
-static HeapTuple
+static TupleTableSlot*
  tablesample_getnext(SampleScanState *scanstate)
  {
-   TsmRoutine *tsm = scanstate->tsmroutine;
-   HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
-   HeapTuple   tuple = &(scan->rs_ctup);
-   Snapshot    snapshot = scan->rs_snapshot;
-   bool        pagemode = scan->rs_pageatatime;
-   BlockNumber blockno;
-   Page        page;
-   bool        all_visible;
-   OffsetNumber maxoffset;
-
-   if (!scan->rs_inited)
-   {
-       /*
-        * return null immediately if relation is empty
-        */
-       if (scan->rs_nblocks == 0)
-       {
-           Assert(!BufferIsValid(scan->rs_cbuf));
-           tuple->t_data = NULL;
-           return NULL;
-       }
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           if (!BlockNumberIsValid(blockno))
-           {
-               tuple->t_data = NULL;
-               return NULL;
-           }
-       }
-       else
-           blockno = scan->rs_startblock;
-       Assert(blockno < scan->rs_nblocks);
-       heapgetpage(scan, blockno);
-       scan->rs_inited = true;
-   }
-   else
-   {
-       /* continue from previously returned page/tuple */
-       blockno = scan->rs_cblock;  /* current page */
-   }
+   TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
+   TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
  
-   /*
-    * When not using pagemode, we must lock the buffer during tuple
-    * visibility checks.
-    */
-   if (!pagemode)
-       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+   ExecClearTuple(slot);
  
-   page = (Page) BufferGetPage(scan->rs_cbuf);
-   all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-   maxoffset = PageGetMaxOffsetNumber(page);
+   if (scanstate->done)
+       return NULL;
  
     for (;;)
     {
-       OffsetNumber tupoffset;
-       bool        finished;
-
-       CHECK_FOR_INTERRUPTS();
-
-       /* Ask the tablesample method which tuples to check on this page. */
-       tupoffset = tsm->NextSampleTuple(scanstate,
-                                        blockno,
-                                        maxoffset);
-
-       if (OffsetNumberIsValid(tupoffset))
+       if (!scanstate->haveblock)
         {
-           ItemId      itemid;
-           bool        visible;
-
-           /* Skip invalid tuple pointers. */
-           itemid = PageGetItemId(page, tupoffset);
-           if (!ItemIdIsNormal(itemid))
-               continue;
-
-           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-           tuple->t_len = ItemIdGetLength(itemid);
-           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-           if (all_visible)
-               visible = true;
-           else
-               visible = SampleTupleVisible(tuple, tupoffset, scan);
-
-           /* in pagemode, heapgetpage did this for us */
-           if (!pagemode)
-               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
-                                               scan->rs_cbuf, snapshot);
-
-           if (visible)
-           {
-               /* Found visible tuple, return it. */
-               if (!pagemode)
-                   LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-               break;
-           }
-           else
+           if (!table_scan_sample_next_block(scan, scanstate))
             {
-               /* Try next tuple from same page. */
-               continue;
-           }
-       }
+               scanstate->haveblock = false;
+               scanstate->done = true;
  
-       /*
-        * if we get here, it means we've exhausted the items on this page and
-        * it's time to move to the next.
-        */
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+               /* exhausted relation */
+               return NULL;
+           }
  
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           Assert(!scan->rs_syncscan);
-           finished = !BlockNumberIsValid(blockno);
+           scanstate->haveblock = true;
         }
-       else
-       {
-           /* Without NextSampleBlock, just do a plain forward seqscan. */
-           blockno++;
-           if (blockno >= scan->rs_nblocks)
-               blockno = 0;
  
+       if (!table_scan_sample_next_tuple(scan, scanstate, slot))
+       {
             /*
-            * Report our new scan position for synchronization purposes.
-            *
-            * Note: we do this before checking for end of scan so that the
-            * final state of the position hint is back at the start of the
-            * rel.  That's not strictly necessary, but otherwise when you run
-            * the same query multiple times the starting position would shift
-            * a little bit backwards on every invocation, which is confusing.
-            * We don't guarantee any specific ordering in general, though.
+            * If we get here, it means we've exhausted the items on this page
+            * and it's time to move to the next.
              */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, blockno);
-
-           finished = (blockno == scan->rs_startblock);
+           scanstate->haveblock = false;
+           continue;
         }
  
-       /*
-        * Reached end of scan?
-        */
-       if (finished)
-       {
-           if (BufferIsValid(scan->rs_cbuf))
-               ReleaseBuffer(scan->rs_cbuf);
-           scan->rs_cbuf = InvalidBuffer;
-           scan->rs_cblock = InvalidBlockNumber;
-           tuple->t_data = NULL;
-           scan->rs_inited = false;
-           return NULL;
-       }
-
-       Assert(blockno < scan->rs_nblocks);
-       heapgetpage(scan, blockno);
-
-       /* Re-establish state for new page */
-       if (!pagemode)
-           LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-       page = (Page) BufferGetPage(scan->rs_cbuf);
-       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-       maxoffset = PageGetMaxOffsetNumber(page);
+       /* Found visible tuple, return it. */
+       break;
     }
  
-   /* Count successfully-fetched tuples as heap fetches */
-   pgstat_count_heap_getnext(scan->rs_rd);
-
-   return &(scan->rs_ctup);
-}
+   scanstate->donetuples++;
  
-/*
- * Check visibility of the tuple.
- */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   if (scan->rs_pageatatime)
-   {
-       /*
-        * In pageatatime mode, heapgetpage() already did visibility checks,
-        * so just look at the info it left in rs_vistuples[].
-        *
-        * We use a binary search over the known-sorted array.  Note: we could
-        * save some effort if we insisted that NextSampleTuple select tuples
-        * in increasing order, but it's not clear that there would be enough
-        * gain to justify the restriction.
-        */
-       int         start = 0,
-                   end = scan->rs_ntuples - 1;
-
-       while (start <= end)
-       {
-           int         mid = (start + end) / 2;
-           OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-           if (tupoffset == curoffset)
-               return true;
-           else if (tupoffset < curoffset)
-               end = mid - 1;
-           else
-               start = mid + 1;
-       }
-
-       return false;
-   }
-   else
-   {
-       /* Otherwise, we have to check the tuple individually. */
-       return HeapTupleSatisfiesVisibility(tuple,
-                                           scan->rs_snapshot,
-                                           scan->rs_cbuf);
-   }
+   return slot;
  }
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c

index 55377add6efaad6712d610e3f9af51d1ddaac210..a6e89bf2af6c2c8cf2995608b3a95e19b84745dd 100644 (file)
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -28,6 +28,7 @@
  #include "postgres.h"
  
  #include "access/relscan.h"
+#include "access/tableam.h"
  #include "executor/execdebug.h"
  #include "executor/nodeSeqscan.h"
  #include "utils/rel.h"
@@ -48,8 +49,7 @@ static TupleTableSlot *SeqNext(SeqScanState *node);
  static TupleTableSlot *
  SeqNext(SeqScanState *node)
  {
-   HeapTuple   tuple;
-   HeapScanDesc scandesc;
+   TableScanDesc scandesc;
     EState     *estate;
     ScanDirection direction;
     TupleTableSlot *slot;
@@ -68,34 +68,16 @@ SeqNext(SeqScanState *node)
          * We reach here if the scan is not parallel, or if we're serially
          * executing a scan that was planned to be parallel.
          */
-       scandesc = heap_beginscan(node->ss.ss_currentRelation,
-                                 estate->es_snapshot,
-                                 0, NULL);
+       scandesc = table_beginscan(node->ss.ss_currentRelation,
+                                    estate->es_snapshot,
+                                    0, NULL);
         node->ss.ss_currentScanDesc = scandesc;
     }
  
     /*
      * get the next tuple from the table
      */
-   tuple = heap_getnext(scandesc, direction);
-
-   /*
-    * save the tuple and the buffer returned to us by the access methods in
-    * our scan tuple slot and return the slot.  Note: we pass 'false' because
-    * tuples returned by heap_getnext() are pointers onto disk pages and were
-    * not created with palloc() and so should not be pfree()'d.  Note also
-    * that ExecStoreHeapTuple will increment the refcount of the buffer; the
-    * refcount will not be dropped until the tuple table slot is cleared.
-    */
-   if (tuple)
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                scandesc->rs_cbuf);    /* buffer associated
-                                                        * with this tuple */
-   else
-       ExecClearTuple(slot);
-
-   return slot;
+   return table_scan_getnextslot(scandesc, direction, slot);
  }
  
  /*
@@ -173,7 +155,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
     /* and create slot with the appropriate rowtype */
     ExecInitScanTupleSlot(estate, &scanstate->ss,
                           RelationGetDescr(scanstate->ss.ss_currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(scanstate->ss.ss_currentRelation));
  
     /*
      * Initialize result type and projection.
@@ -199,7 +181,7 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
  void
  ExecEndSeqScan(SeqScanState *node)
  {
-   HeapScanDesc scanDesc;
+   TableScanDesc scanDesc;
  
     /*
      * get information from node
@@ -222,7 +204,7 @@ ExecEndSeqScan(SeqScanState *node)
      * close heap scan
      */
     if (scanDesc != NULL)
-       heap_endscan(scanDesc);
+       table_endscan(scanDesc);
  }
  
  /* ----------------------------------------------------------------
@@ -239,13 +221,13 @@ ExecEndSeqScan(SeqScanState *node)
  void
  ExecReScanSeqScan(SeqScanState *node)
  {
-   HeapScanDesc scan;
+   TableScanDesc scan;
  
     scan = node->ss.ss_currentScanDesc;
  
     if (scan != NULL)
-       heap_rescan(scan,       /* scan desc */
-                   NULL);      /* new scan keys */
+       table_rescan(scan,  /* scan desc */
+                      NULL);   /* new scan keys */
  
     ExecScanReScan((ScanState *) node);
  }
@@ -268,7 +250,7 @@ ExecSeqScanEstimate(SeqScanState *node,
  {
     EState     *estate = node->ss.ps.state;
  
-   node->pscan_len = heap_parallelscan_estimate(estate->es_snapshot);
+   node->pscan_len = table_parallelscan_estimate(estate->es_snapshot);
     shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
     shm_toc_estimate_keys(&pcxt->estimator, 1);
  }
@@ -284,15 +266,15 @@ ExecSeqScanInitializeDSM(SeqScanState *node,
                          ParallelContext *pcxt)
  {
     EState     *estate = node->ss.ps.state;
-   ParallelHeapScanDesc pscan;
+   ParallelTableScanDesc pscan;
  
     pscan = shm_toc_allocate(pcxt->toc, node->pscan_len);
-   heap_parallelscan_initialize(pscan,
-                                node->ss.ss_currentRelation,
-                                estate->es_snapshot);
+   table_parallelscan_initialize(pscan,
+                                 node->ss.ss_currentRelation,
+                                 estate->es_snapshot);
     shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
     node->ss.ss_currentScanDesc =
-       heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+       table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
  }
  
  /* ----------------------------------------------------------------
@@ -305,9 +287,10 @@ void
  ExecSeqScanReInitializeDSM(SeqScanState *node,
                            ParallelContext *pcxt)
  {
-   HeapScanDesc scan = node->ss.ss_currentScanDesc;
+   ParallelTableScanDesc pscan;
  
-   heap_parallelscan_reinitialize(scan->rs_parallel);
+   pscan = node->ss.ss_currentScanDesc->rs_parallel;
+   table_parallelscan_reinitialize(pscan);
  }
  
  /* ----------------------------------------------------------------
@@ -320,9 +303,9 @@ void
  ExecSeqScanInitializeWorker(SeqScanState *node,
                             ParallelWorkerContext *pwcxt)
  {
-   ParallelHeapScanDesc pscan;
+   ParallelTableScanDesc pscan;
  
     pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
     node->ss.ss_currentScanDesc =
-       heap_beginscan_parallel(node->ss.ss_currentRelation, pscan);
+       table_beginscan_parallel(node->ss.ss_currentRelation, pscan);
  }
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c

index afec097bc844ebf03069ae3dce295a948483d6a7..c8bcf97f1de6a5365c5c2f6772c05683ac1e7296 100644 (file)
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -22,6 +22,7 @@
   */
  #include "postgres.h"
  
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "catalog/pg_type.h"
  #include "executor/execdebug.h"
@@ -306,9 +307,7 @@ TidNext(TidScanState *node)
     ScanDirection direction;
     Snapshot    snapshot;
     Relation    heapRelation;
-   HeapTuple   tuple;
     TupleTableSlot *slot;
-   Buffer      buffer = InvalidBuffer;
     ItemPointerData *tidList;
     int         numTids;
     bool        bBackward;
@@ -331,12 +330,6 @@ TidNext(TidScanState *node)
     tidList = node->tss_TidList;
     numTids = node->tss_NumTids;
  
-   /*
-    * We use node->tss_htup as the tuple pointer; note this can't just be a
-    * local variable here, as the scan tuple slot will keep a pointer to it.
-    */
-   tuple = &(node->tss_htup);
-
     /*
      * Initialize or advance scan position, depending on direction.
      */
@@ -364,7 +357,7 @@ TidNext(TidScanState *node)
  
     while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
     {
-       tuple->t_self = tidList[node->tss_TidPtr];
+       ItemPointerData tid = tidList[node->tss_TidPtr];
  
         /*
          * For WHERE CURRENT OF, the tuple retrieved from the cursor might
@@ -372,28 +365,11 @@ TidNext(TidScanState *node)
          * current according to our snapshot.
          */
         if (node->tss_isCurrentOf)
-           heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
-
-       if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
-       {
-           /*
-            * Store the scanned tuple in the scan tuple slot of the scan
-            * state.  Eventually we will only do this and not return a tuple.
-            */
-           ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                    slot,  /* slot to store in */
-                                    buffer);   /* buffer associated with
-                                                * tuple */
-
-           /*
-            * At this point we have an extra pin on the buffer, because
-            * ExecStoreHeapTuple incremented the pin count. Drop our local
-            * pin.
-            */
-           ReleaseBuffer(buffer);
+           table_get_latest_tid(heapRelation, snapshot, &tid);
  
+       if (table_fetch_row_version(heapRelation, &tid, snapshot, slot, NULL))
             return slot;
-       }
+
         /* Bad TID or failed snapshot qual; try next */
         if (bBackward)
             node->tss_TidPtr--;
@@ -544,7 +520,7 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
      */
     ExecInitScanTupleSlot(estate, &tidstate->ss,
                           RelationGetDescr(currentRelation),
-                         &TTSOpsBufferHeapTuple);
+                         table_slot_callbacks(currentRelation));
  
     /*
      * Initialize result type and projection.
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c

index ad726676d8ccc8dba291db98e29cde74aa26e1e3..34664e76d1b3f8bc4bb93b308be6a71a6c4745bc 100644 (file)
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -762,7 +762,7 @@ HeapTuple
  SPI_copytuple(HeapTuple tuple)
  {
     MemoryContext oldcxt;
-   HeapTuple   ctuple;
+   HeapTuple ctuple;
  
     if (tuple == NULL)
     {
@@ -983,7 +983,7 @@ char *
  SPI_gettype(TupleDesc tupdesc, int fnumber)
  {
     Oid         typoid;
-   HeapTuple   typeTuple;
+   HeapTuple typeTuple;
     char       *result;
  
     SPI_result = 0;
@@ -1844,7 +1844,7 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self)
         tuptable->free = tuptable->alloced;
         tuptable->alloced += tuptable->free;
         tuptable->vals = (HeapTuple *) repalloc_huge(tuptable->vals,
-                                                    tuptable->alloced * sizeof(HeapTuple));
+                                                       tuptable->alloced * sizeof(HeapTuple));
     }
  
     tuptable->vals[tuptable->alloced - tuptable->free] =
diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c

index e47ef491928ea8c529418dde343c240fed8075a6..e2b596cf74e1750dfcef4508866654904c64e4a7 100644 (file)
--- a/src/backend/executor/tqueue.c
+++ b/src/backend/executor/tqueue.c
@@ -59,6 +59,8 @@ tqueueReceiveSlot(TupleTableSlot *slot, DestReceiver *self)
     bool        should_free;
  
     /* Send the tuple itself. */
+   // PBORKED: this shouldn't rely on heaptuples. If we need it to be tuple
+   // formed, it should be a minimal tuple.
     tuple = ExecFetchSlotHeapTuple(slot, true, &should_free);
     result = shm_mq_send(tqueue->queue, tuple->t_len, tuple->t_data, false);
  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index db49968409688c6212e3f53ab86b36bdc6e13679..2848634e009117b6fbf2e96132ec7c90a77d93f6 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3332,6 +3332,7 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode)
     COPY_NODE_FIELD(options);
     COPY_SCALAR_FIELD(oncommit);
     COPY_STRING_FIELD(tablespacename);
+   COPY_STRING_FIELD(accessMethod);
     COPY_SCALAR_FIELD(if_not_exists);
  }
  
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index a570ac0aabe3223d5c4a04abf82a258fc2fe06ed..58e8c7cc5f3d8f3e043801fa033d0adcdab66239 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -21,6 +21,7 @@
  #include "access/heapam.h"
  #include "access/htup_details.h"
  #include "access/nbtree.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/transam.h"
  #include "access/xlog.h"
@@ -271,7 +272,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
             info->amsearchnulls = amroutine->amsearchnulls;
             info->amcanparallel = amroutine->amcanparallel;
             info->amhasgettuple = (amroutine->amgettuple != NULL);
-           info->amhasgetbitmap = (amroutine->amgetbitmap != NULL);
+           info->amhasgetbitmap = ((amroutine->amgetbitmap != NULL)
+                                   && (relation->rd_tableamroutine->scan_bitmap_pagescan != NULL));
             info->amcostestimate = amroutine->amcostestimate;
             Assert(info->amcostestimate != NULL);
  
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 2c2208ffb724d7f6864f8ad020bb128c253b3324..71bedf34f3355799531803517be434885dbaecd1 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -48,6 +48,7 @@
  #include <ctype.h>
  #include <limits.h>
  
+#include "access/tableam.h"
  #include "catalog/index.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_am.h"
@@ -322,6 +323,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  %type <str>        OptSchemaName
  %type <list>   OptSchemaEltList
  
+%type <chr>        am_type
+
  %type <boolean> TriggerForSpec TriggerForType
  %type <ival>   TriggerActionTime
  %type <list>   TriggerEvents TriggerOneEvent
@@ -337,7 +340,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
  
  %type <str>        copy_file_name
                 database_name access_method_clause access_method attr_name
-               name cursor_name file_name
+               table_access_method_clause name cursor_name file_name
                 index_name opt_index_name cluster_index_specification
  
  %type <list>   func_name handler_name qual_Op qual_all_Op subquery_Op
@@ -3170,7 +3173,8 @@ copy_generic_opt_arg_list_item:
   *****************************************************************************/
  
  CreateStmt:    CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
-           OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace
+           OptInherit OptPartitionSpec table_access_method_clause OptWith
+           OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $4->relpersistence = $2;
@@ -3180,15 +3184,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->partspec = $9;
                     n->ofTypename = NULL;
                     n->constraints = NIL;
-                   n->options = $10;
-                   n->oncommit = $11;
-                   n->tablespacename = $12;
+                   n->accessMethod = $10;
+                   n->options = $11;
+                   n->oncommit = $12;
+                   n->tablespacename = $13;
                     n->if_not_exists = false;
                     $$ = (Node *)n;
                 }
         | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '('
-           OptTableElementList ')' OptInherit OptPartitionSpec OptWith
-           OnCommitOption OptTableSpace
+           OptTableElementList ')' OptInherit OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $7->relpersistence = $2;
@@ -3198,15 +3203,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->partspec = $12;
                     n->ofTypename = NULL;
                     n->constraints = NIL;
-                   n->options = $13;
-                   n->oncommit = $14;
-                   n->tablespacename = $15;
+                   n->accessMethod = $13;
+                   n->options = $14;
+                   n->oncommit = $15;
+                   n->tablespacename = $16;
                     n->if_not_exists = true;
                     $$ = (Node *)n;
                 }
         | CREATE OptTemp TABLE qualified_name OF any_name
-           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
-           OptTableSpace
+           OptTypedTableElementList OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $4->relpersistence = $2;
@@ -3217,15 +3223,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->ofTypename = makeTypeNameFromNameList($6);
                     n->ofTypename->location = @6;
                     n->constraints = NIL;
-                   n->options = $9;
-                   n->oncommit = $10;
-                   n->tablespacename = $11;
+                   n->accessMethod = $9;
+                   n->options = $10;
+                   n->oncommit = $11;
+                   n->tablespacename = $12;
                     n->if_not_exists = false;
                     $$ = (Node *)n;
                 }
         | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name
-           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
-           OptTableSpace
+           OptTypedTableElementList OptPartitionSpec table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $7->relpersistence = $2;
@@ -3236,15 +3243,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->ofTypename = makeTypeNameFromNameList($9);
                     n->ofTypename->location = @9;
                     n->constraints = NIL;
-                   n->options = $12;
-                   n->oncommit = $13;
-                   n->tablespacename = $14;
+                   n->accessMethod = $12;
+                   n->options = $13;
+                   n->oncommit = $14;
+                   n->tablespacename = $15;
                     n->if_not_exists = true;
                     $$ = (Node *)n;
                 }
         | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
-           OptTypedTableElementList PartitionBoundSpec OptPartitionSpec OptWith
-           OnCommitOption OptTableSpace
+           OptTypedTableElementList PartitionBoundSpec OptPartitionSpec
+           table_access_method_clause OptWith OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $4->relpersistence = $2;
@@ -3255,15 +3263,16 @@ CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->partspec = $10;
                     n->ofTypename = NULL;
                     n->constraints = NIL;
-                   n->options = $11;
-                   n->oncommit = $12;
-                   n->tablespacename = $13;
+                   n->accessMethod = $11;
+                   n->options = $12;
+                   n->oncommit = $13;
+                   n->tablespacename = $14;
                     n->if_not_exists = false;
                     $$ = (Node *)n;
                 }
         | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF
             qualified_name OptTypedTableElementList PartitionBoundSpec OptPartitionSpec
-           OptWith OnCommitOption OptTableSpace
+           table_access_method_clause OptWith OnCommitOption OptTableSpace
                 {
                     CreateStmt *n = makeNode(CreateStmt);
                     $7->relpersistence = $2;
@@ -3274,9 +3283,10 @@ CreateStmt:  CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
                     n->partspec = $13;
                     n->ofTypename = NULL;
                     n->constraints = NIL;
-                   n->options = $14;
-                   n->oncommit = $15;
-                   n->tablespacename = $16;
+                   n->accessMethod = $14;
+                   n->options = $15;
+                   n->oncommit = $16;
+                   n->tablespacename = $17;
                     n->if_not_exists = true;
                     $$ = (Node *)n;
                 }
@@ -3921,6 +3931,12 @@ part_elem: ColId opt_collate opt_class
                     $$ = n;
                 }
         ;
+
+table_access_method_clause:
+           USING access_method                 { $$ = $2; }
+           | /*EMPTY*/                         { $$ = NULL; }
+       ;
+
  /* WITHOUT OIDS is legacy only */
  OptWith:
             WITH reloptions             { $$ = $2; }
@@ -4026,14 +4042,16 @@ CreateAsStmt:
         ;
  
  create_as_target:
-           qualified_name opt_column_list OptWith OnCommitOption OptTableSpace
+           qualified_name opt_column_list table_access_method_clause
+           OptWith OnCommitOption OptTableSpace
                 {
                     $$ = makeNode(IntoClause);
                     $$->rel = $1;
                     $$->colNames = $2;
-                   $$->options = $3;
-                   $$->onCommit = $4;
-                   $$->tableSpaceName = $5;
+                   $$->accessMethod = $3;
+                   $$->options = $4;
+                   $$->onCommit = $5;
+                   $$->tableSpaceName = $6;
                     $$->viewQuery = NULL;
                     $$->skipData = false;       /* might get changed later */
                 }
@@ -4083,14 +4101,15 @@ CreateMatViewStmt:
         ;
  
  create_mv_target:
-           qualified_name opt_column_list opt_reloptions OptTableSpace
+           qualified_name opt_column_list table_access_method_clause opt_reloptions OptTableSpace
                 {
                     $$ = makeNode(IntoClause);
                     $$->rel = $1;
                     $$->colNames = $2;
-                   $$->options = $3;
+                   $$->accessMethod = $3;
+                   $$->options = $4;
                     $$->onCommit = ONCOMMIT_NOOP;
-                   $$->tableSpaceName = $4;
+                   $$->tableSpaceName = $5;
                     $$->viewQuery = NULL;       /* filled at analysis time */
                     $$->skipData = false;       /* might get changed later */
                 }
@@ -5298,16 +5317,21 @@ row_security_cmd:
   *
   *****************************************************************************/
  
-CreateAmStmt: CREATE ACCESS METHOD name TYPE_P INDEX HANDLER handler_name
+CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name
                 {
                     CreateAmStmt *n = makeNode(CreateAmStmt);
                     n->amname = $4;
                     n->handler_name = $8;
-                   n->amtype = AMTYPE_INDEX;
+                   n->amtype = $6;
                     $$ = (Node *) n;
                 }
         ;
  
+am_type:
+           INDEX           { $$ = AMTYPE_INDEX; }
+       |   TABLE           { $$ = AMTYPE_TABLE; }
+       ;
+
  /*****************************************************************************
   *
   *     QUERIES :
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c

index eeaab2f4c9b972a89aa8afb817aed8b72016d49f..cb9e252f417ceaa22ffe46cff5cadef0d1bdb6a9 100644 (file)
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -13,6 +13,7 @@
  */
  #include "postgres.h"
  
+#include "access/tableam.h"
  #include "catalog/partition.h"
  #include "catalog/pg_inherits.h"
  #include "catalog/pg_type.h"
@@ -1200,12 +1201,10 @@ check_default_partition_contents(Relation parent, Relation default_rel,
         Expr       *constr;
         Expr       *partition_constraint;
         EState     *estate;
-       HeapTuple   tuple;
         ExprState  *partqualstate = NULL;
         Snapshot    snapshot;
-       TupleDesc   tupdesc;
         ExprContext *econtext;
-       HeapScanDesc scan;
+       TableScanDesc scan;
         MemoryContext oldCxt;
         TupleTableSlot *tupslot;
  
@@ -1252,7 +1251,6 @@ check_default_partition_contents(Relation parent, Relation default_rel,
             continue;
         }
  
-       tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel));
         constr = linitial(def_part_constraints);
         partition_constraint = (Expr *)
             map_partition_varattnos((List *) constr,
@@ -1264,8 +1262,8 @@ check_default_partition_contents(Relation parent, Relation default_rel,
  
         econtext = GetPerTupleExprContext(estate);
         snapshot = RegisterSnapshot(GetLatestSnapshot());
-       scan = heap_beginscan(part_rel, snapshot, 0, NULL);
-       tupslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple);
+       scan = table_beginscan(part_rel, snapshot, 0, NULL);
+       tupslot = table_gimmegimmeslot(parent, &estate->es_tupleTable);
  
         /*
          * Switch to per-tuple memory context and reset it for each tuple
@@ -1273,9 +1271,8 @@ check_default_partition_contents(Relation parent, Relation default_rel,
          */
         oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
  
-       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
         {
-           ExecStoreHeapTuple(tuple, tupslot, false);
             econtext->ecxt_scantuple = tupslot;
  
             if (!ExecCheck(partqualstate, econtext))
@@ -1289,7 +1286,7 @@ check_default_partition_contents(Relation parent, Relation default_rel,
         }
  
         MemoryContextSwitchTo(oldCxt);
-       heap_endscan(scan);
+       table_endscan(scan);
         UnregisterSnapshot(snapshot);
         ExecDropSingleTupleTableSlot(tupslot);
         FreeExecutorState(estate);
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c

index 2d5086d4062cf3240e54b43ca16e06cc3c9fdde9..cb7ee7d53a7b42225ea659f6c969ff568a2641db 100644 (file)
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -69,6 +69,7 @@
  #include "access/htup_details.h"
  #include "access/multixact.h"
  #include "access/reloptions.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "catalog/dependency.h"
@@ -1866,7 +1867,7 @@ get_database_list(void)
  {
     List       *dblist = NIL;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
     MemoryContext resultcxt;
  
@@ -1884,9 +1885,9 @@ get_database_list(void)
     (void) GetTransactionSnapshot();
  
     rel = heap_open(DatabaseRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
+   scan = table_beginscan_catalog(rel, 0, NULL);
  
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
     {
         Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup);
         avw_dbase  *avdb;
@@ -1913,7 +1914,7 @@ get_database_list(void)
         MemoryContextSwitchTo(oldcxt);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  
     CommitTransactionCommand();
@@ -1932,7 +1933,7 @@ do_autovacuum(void)
  {
     Relation    classRel;
     HeapTuple   tuple;
-   HeapScanDesc relScan;
+   TableScanDesc relScan;
     Form_pg_database dbForm;
     List       *table_oids = NIL;
     List       *orphan_oids = NIL;
@@ -2044,13 +2045,13 @@ do_autovacuum(void)
      * wide tables there might be proportionally much more activity in the
      * TOAST table than in its parent.
      */
-   relScan = heap_beginscan_catalog(classRel, 0, NULL);
+   relScan = table_beginscan_catalog(classRel, 0, NULL);
  
     /*
      * On the first pass, we collect main tables to vacuum, and also the main
      * table relid to TOAST relid mapping.
      */
-   while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
+   while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
         PgStat_StatTabEntry *tabentry;
@@ -2133,7 +2134,7 @@ do_autovacuum(void)
         }
     }
  
-   heap_endscan(relScan);
+   table_endscan(relScan);
  
     /* second pass: check TOAST tables */
     ScanKeyInit(&key,
@@ -2141,8 +2142,8 @@ do_autovacuum(void)
                 BTEqualStrategyNumber, F_CHAREQ,
                 CharGetDatum(RELKIND_TOASTVALUE));
  
-   relScan = heap_beginscan_catalog(classRel, 1, &key);
-   while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
+   relScan = table_beginscan_catalog(classRel, 1, &key);
+   while ((tuple = heap_scan_getnext(relScan, ForwardScanDirection)) != NULL)
     {
         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
         PgStat_StatTabEntry *tabentry;
@@ -2188,7 +2189,7 @@ do_autovacuum(void)
             table_oids = lappend_oid(table_oids, relid);
     }
  
-   heap_endscan(relScan);
+   table_endscan(relScan);
     heap_close(classRel, AccessShareLock);
  
     /*
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c

index 8676088e57d9f3375578db2de7159f5f58352596..7762dbc44b944474566899fed64ba1a50d9b9102 100644 (file)
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -36,6 +36,7 @@
  
  #include "access/heapam.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/twophase_rmgr.h"
  #include "access/xact.h"
@@ -1206,7 +1207,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
     HTAB       *htab;
     HASHCTL     hash_ctl;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
     Snapshot    snapshot;
  
@@ -1221,8 +1222,8 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
  
     rel = heap_open(catalogid, AccessShareLock);
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   scan = heap_beginscan(rel, snapshot, 0, NULL);
-   while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+   scan = table_beginscan(rel, snapshot, 0, NULL);
+   while ((tup = heap_scan_getnext(scan, ForwardScanDirection)) != NULL)
     {
         Oid         thisoid;
         bool        isnull;
@@ -1234,7 +1235,7 @@ pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid)
  
         (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
     }
-   heap_endscan(scan);
+   table_endscan(scan);
     UnregisterSnapshot(snapshot);
     heap_close(rel, AccessShareLock);
  
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c

index 3a84d8ca86a2843b42ec7c5e458a9410a9681ef9..8d452474c152064c130a66185d32a578cd90481d 100644 (file)
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -24,6 +24,7 @@
  #include "access/heapam.h"
  #include "access/htup.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/xact.h"
  
  #include "catalog/pg_subscription.h"
@@ -118,7 +119,7 @@ get_subscription_list(void)
  {
     List       *res = NIL;
     Relation    rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     HeapTuple   tup;
     MemoryContext resultcxt;
  
@@ -136,9 +137,9 @@ get_subscription_list(void)
     (void) GetTransactionSnapshot();
  
     rel = heap_open(SubscriptionRelationId, AccessShareLock);
-   scan = heap_beginscan_catalog(rel, 0, NULL);
+   scan = table_beginscan_catalog(rel, 0, NULL);
  
-   while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+   while (HeapTupleIsValid(tup = heap_scan_getnext(scan, ForwardScanDirection)))
     {
         Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
         Subscription *sub;
@@ -164,7 +165,7 @@ get_subscription_list(void)
         MemoryContextSwitchTo(oldcxt);
     }
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(rel, AccessShareLock);
  
     CommitTransactionCommand();
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c

index 363ddf4505ef63ed9466a7eaa2167e2525dc65b3..363b82e1b5f4839755798d1e1a06c2ed572c5b44 100644 (file)
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -376,7 +376,7 @@ static void
  SnapBuildFreeSnapshot(Snapshot snap)
  {
     /* make sure we don't get passed an external snapshot */
-   Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+   Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY);
  
     /* make sure nobody modified our snapshot */
     Assert(snap->curcid == FirstCommandId);
@@ -434,7 +434,7 @@ void
  SnapBuildSnapDecRefcount(Snapshot snap)
  {
     /* make sure we don't get passed an external snapshot */
-   Assert(snap->satisfies == HeapTupleSatisfiesHistoricMVCC);
+   Assert(snap->visibility_type == HISTORIC_MVCC_VISIBILITY);
  
     /* make sure nobody modified our snapshot */
     Assert(snap->curcid == FirstCommandId);
@@ -476,7 +476,7 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
  
     snapshot = MemoryContextAllocZero(builder->context, ssize);
  
-   snapshot->satisfies = HeapTupleSatisfiesHistoricMVCC;
+   snapshot->visibility_type = HISTORIC_MVCC_VISIBILITY;
  
     /*
      * We misuse the original meaning of SnapshotData's xip and subxip fields
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c

index 893f1f008edf248362af7e2c1cd4bc0dcb86c7b9..247e03aec4b668242027851063fb18641ec491be 100644 (file)
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -27,6 +27,7 @@
  #include "pgstat.h"
  #include "funcapi.h"
  
+#include "access/tableam.h"
  #include "access/xact.h"
  #include "access/xlog_internal.h"
  
@@ -211,11 +212,6 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel)
  
     estate->es_output_cid = GetCurrentCommandId(true);
  
-   /* Triggers might need a slot */
-   if (resultRelInfo->ri_TrigDesc)
-       estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL,
-                                                           &TTSOpsVirtual);
-
     /* Prepare to catch AFTER triggers. */
     AfterTriggerBeginQuery();
  
@@ -718,10 +714,8 @@ apply_handle_update(StringInfo s)
     estate = create_estate_for_relation(rel);
     remoteslot = ExecInitExtraTupleSlot(estate,
                                         RelationGetDescr(rel->localrel),
-                                       &TTSOpsHeapTuple);
-   localslot = ExecInitExtraTupleSlot(estate,
-                                      RelationGetDescr(rel->localrel),
-                                      &TTSOpsHeapTuple);
+                                       &TTSOpsVirtual);
+   localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable);
     EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
  
     PushActiveSnapshot(GetTransactionSnapshot());
@@ -839,9 +833,7 @@ apply_handle_delete(StringInfo s)
     remoteslot = ExecInitExtraTupleSlot(estate,
                                         RelationGetDescr(rel->localrel),
                                         &TTSOpsVirtual);
-   localslot = ExecInitExtraTupleSlot(estate,
-                                      RelationGetDescr(rel->localrel),
-                                      &TTSOpsHeapTuple);
+   localslot = table_gimmegimmeslot(rel->localrel, &estate->es_tupleTable);
     EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
  
     PushActiveSnapshot(GetTransactionSnapshot());
diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c

index 106194795081e1626143f4470edb89cd44e4e17a..2001190e27c21a41f243661d2ce261d0b1c49d78 100644 (file)
--- a/src/backend/rewrite/rewriteDefine.c
+++ b/src/backend/rewrite/rewriteDefine.c
@@ -17,6 +17,7 @@
  #include "access/heapam.h"
  #include "access/htup_details.h"
  #include "access/multixact.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/xact.h"
  #include "catalog/catalog.h"
@@ -424,8 +425,9 @@ DefineQueryRewrite(const char *rulename,
         if (event_relation->rd_rel->relkind != RELKIND_VIEW &&
             event_relation->rd_rel->relkind != RELKIND_MATVIEW)
         {
-           HeapScanDesc scanDesc;
+           TableScanDesc scanDesc;
             Snapshot    snapshot;
+           TupleTableSlot *slot;
  
             if (event_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
                 ereport(ERROR,
@@ -440,13 +442,15 @@ DefineQueryRewrite(const char *rulename,
                                 RelationGetRelationName(event_relation))));
  
             snapshot = RegisterSnapshot(GetLatestSnapshot());
-           scanDesc = heap_beginscan(event_relation, snapshot, 0, NULL);
-           if (heap_getnext(scanDesc, ForwardScanDirection) != NULL)
+           scanDesc = table_beginscan(event_relation, snapshot, 0, NULL);
+           slot = table_gimmegimmeslot(event_relation, NULL);
+           if (table_scan_getnextslot(scanDesc, ForwardScanDirection, slot))
                 ereport(ERROR,
                         (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                          errmsg("could not convert table \"%s\" to a view because it is not empty",
                                 RelationGetRelationName(event_relation))));
-           heap_endscan(scanDesc);
+           ExecDropSingleTupleTableSlot(slot);
+           table_endscan(scanDesc);
             UnregisterSnapshot(snapshot);
  
             if (event_relation->rd_rel->relhastriggers)
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c

index e8390311d03fdd7f4577311fe7f71163fa38bf43..2960e21340a546b847bd9b7ade94e9dbe4a24231 100644 (file)
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -188,6 +188,7 @@
  #include "access/htup_details.h"
  #include "access/slru.h"
  #include "access/subtrans.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/twophase.h"
  #include "access/twophase_rmgr.h"
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c

index dbe67cdb4cb81ccf82cd233e8a6d7fa4cce0128a..89aac13c8024b118b804221ab51ea7a8b46ff6a0 100644 (file)
--- a/src/backend/utils/adt/pseudotypes.c
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -418,3 +418,4 @@ PSEUDOTYPE_DUMMY_IO_FUNCS(internal);
  PSEUDOTYPE_DUMMY_IO_FUNCS(opaque);
  PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
  PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);
+PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler);
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c

index cdda860e73a379d7563c8bb6d24cb2e3a95820a1..747602b5716ad2287a111dfe6e84fa2479dcc8b4 100644 (file)
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -31,6 +31,7 @@
  #include "postgres.h"
  
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "catalog/pg_collation.h"
@@ -191,7 +192,7 @@ static int  ri_constraint_cache_valid_count = 0;
   * ----------
   */
  static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
-                 HeapTuple old_row,
+                 TupleTableSlot *oldslot,
                   const RI_ConstraintInfo *riinfo);
  static Datum ri_restrict(TriggerData *trigdata, bool is_no_action);
  static Datum ri_setnull(TriggerData *trigdata);
@@ -204,12 +205,12 @@ static void ri_GenerateQual(StringInfo buf,
                 Oid opoid,
                 const char *rightop, Oid rightoptype);
  static void ri_GenerateQualCollation(StringInfo buf, Oid collation);
-static int ri_NullCheck(TupleDesc tupdesc, HeapTuple tup,
+static int ri_NullCheck(TupleDesc tupdesc, TupleTableSlot *slot,
              const RI_ConstraintInfo *riinfo, bool rel_is_pk);
  static void ri_BuildQueryKey(RI_QueryKey *key,
                  const RI_ConstraintInfo *riinfo,
                  int32 constr_queryno);
-static bool ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
+static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
              const RI_ConstraintInfo *riinfo, bool rel_is_pk);
  static bool ri_AttributesEqual(Oid eq_opr, Oid typeid,
                    Datum oldvalue, Datum newvalue);
@@ -231,14 +232,14 @@ static SPIPlanPtr ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes,
  static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo,
                 RI_QueryKey *qkey, SPIPlanPtr qplan,
                 Relation fk_rel, Relation pk_rel,
-               HeapTuple old_tuple, HeapTuple new_tuple,
+               TupleTableSlot *oldslot, TupleTableSlot *newslot,
                 bool detectNewRows, int expect_OK);
-static void ri_ExtractValues(Relation rel, HeapTuple tup,
+static void ri_ExtractValues(Relation rel, TupleTableSlot *slot,
                  const RI_ConstraintInfo *riinfo, bool rel_is_pk,
                  Datum *vals, char *nulls);
  static void ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                    Relation pk_rel, Relation fk_rel,
-                  HeapTuple violator, TupleDesc tupdesc,
+                  TupleTableSlot *violator, TupleDesc tupdesc,
                    int queryno) pg_attribute_noreturn();
  
  
@@ -254,8 +255,11 @@ RI_FKey_check(TriggerData *trigdata)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
+#if 0
     HeapTuple   new_row;
     Buffer      new_row_buf;
+#endif
+   TupleTableSlot *newslot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
     int         i;
@@ -267,15 +271,9 @@ RI_FKey_check(TriggerData *trigdata)
                                     trigdata->tg_relation, false);
  
     if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
-   {
-       new_row = trigdata->tg_newtuple;
-       new_row_buf = trigdata->tg_newtuplebuf;
-   }
+       newslot = trigdata->tg_newslot;
     else
-   {
-       new_row = trigdata->tg_trigtuple;
-       new_row_buf = trigdata->tg_trigtuplebuf;
-   }
+       newslot = trigdata->tg_trigslot;
  
     /*
      * We should not even consider checking the row if it is no longer valid,
@@ -285,13 +283,8 @@ RI_FKey_check(TriggerData *trigdata)
      * and lock on the buffer to call HeapTupleSatisfiesVisibility.  Caller
      * should be holding pin, but not lock.
      */
-   LockBuffer(new_row_buf, BUFFER_LOCK_SHARE);
-   if (!HeapTupleSatisfiesVisibility(new_row, SnapshotSelf, new_row_buf))
-   {
-       LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK);
+   if (!table_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
         return PointerGetDatum(NULL);
-   }
-   LockBuffer(new_row_buf, BUFFER_LOCK_UNLOCK);
  
     /*
      * Get the relation descriptors of the FK and PK tables.
@@ -307,7 +300,7 @@ RI_FKey_check(TriggerData *trigdata)
                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                  errmsg("MATCH PARTIAL not yet implemented")));
  
-   switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false))
+   switch (ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false))
     {
         case RI_KEYS_ALL_NULL:
  
@@ -437,7 +430,7 @@ RI_FKey_check(TriggerData *trigdata)
      */
     ri_PerformCheck(riinfo, &qkey, qplan,
                     fk_rel, pk_rel,
-                   NULL, new_row,
+                   NULL, newslot,
                     false,
                     SPI_OK_SELECT);
  
@@ -505,7 +498,7 @@ RI_FKey_check_upd(PG_FUNCTION_ARGS)
   */
  static bool
  ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
-                 HeapTuple old_row,
+                 TupleTableSlot *oldslot,
                   const RI_ConstraintInfo *riinfo)
  {
     SPIPlanPtr  qplan;
@@ -514,7 +507,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
     bool        result;
  
     /* Only called for non-null rows */
-   Assert(ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) == RI_KEYS_NONE_NULL);
+   Assert(ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) == RI_KEYS_NONE_NULL);
  
     if (SPI_connect() != SPI_OK_CONNECT)
         elog(ERROR, "SPI_connect failed");
@@ -572,7 +565,7 @@ ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
      */
     result = ri_PerformCheck(riinfo, &qkey, qplan,
                              fk_rel, pk_rel,
-                            old_row, NULL,
+                            oldslot, NULL,
                              true,  /* treat like update */
                              SPI_OK_SELECT);
  
@@ -690,7 +683,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
  
@@ -708,7 +701,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
      */
     fk_rel = heap_open(riinfo->fk_relid, RowShareLock);
     pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
  
     switch (riinfo->confmatchtype)
     {
@@ -732,7 +725,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
              * allow another row to be substituted.
              */
             if (is_no_action &&
-               ri_Check_Pk_Match(pk_rel, fk_rel, old_row, riinfo))
+               ri_Check_Pk_Match(pk_rel, fk_rel, old_slot, riinfo))
             {
                 heap_close(fk_rel, RowShareLock);
                 return PointerGetDatum(NULL);
@@ -800,7 +793,7 @@ ri_restrict(TriggerData *trigdata, bool is_no_action)
              */
             ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                             true,   /* must detect new rows */
                             SPI_OK_SELECT);
  
@@ -844,7 +837,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
     int         i;
@@ -868,7 +861,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
      */
     fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
     pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
  
     switch (riinfo->confmatchtype)
     {
@@ -940,7 +933,7 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS)
              */
             ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                             true,   /* must detect new rows */
                             SPI_OK_DELETE);
  
@@ -984,8 +977,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
-   HeapTuple   new_row;
-   HeapTuple   old_row;
+   TupleTableSlot *new_slot;
+   TupleTableSlot *old_slot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
     int         i;
@@ -1011,8 +1004,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
      */
     fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
     pk_rel = trigdata->tg_relation;
-   new_row = trigdata->tg_newtuple;
-   old_row = trigdata->tg_trigtuple;
+   new_slot = trigdata->tg_newslot;
+   old_slot = trigdata->tg_trigslot;
  
     switch (riinfo->confmatchtype)
     {
@@ -1096,7 +1089,7 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
              */
             ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                           old_row, new_row,
+                           old_slot, new_slot,
                             true,   /* must detect new rows */
                             SPI_OK_UPDATE);
  
@@ -1179,7 +1172,7 @@ ri_setnull(TriggerData *trigdata)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
     int         i;
@@ -1198,7 +1191,7 @@ ri_setnull(TriggerData *trigdata)
      */
     fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
     pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
  
     switch (riinfo->confmatchtype)
     {
@@ -1283,7 +1276,7 @@ ri_setnull(TriggerData *trigdata)
              */
             ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                             true,   /* must detect new rows */
                             SPI_OK_UPDATE);
  
@@ -1366,7 +1359,7 @@ ri_setdefault(TriggerData *trigdata)
     const RI_ConstraintInfo *riinfo;
     Relation    fk_rel;
     Relation    pk_rel;
-   HeapTuple   old_row;
+   TupleTableSlot *old_slot;
     RI_QueryKey qkey;
     SPIPlanPtr  qplan;
  
@@ -1384,7 +1377,7 @@ ri_setdefault(TriggerData *trigdata)
      */
     fk_rel = heap_open(riinfo->fk_relid, RowExclusiveLock);
     pk_rel = trigdata->tg_relation;
-   old_row = trigdata->tg_trigtuple;
+   old_slot = trigdata->tg_trigslot;
  
     switch (riinfo->confmatchtype)
     {
@@ -1470,7 +1463,7 @@ ri_setdefault(TriggerData *trigdata)
              */
             ri_PerformCheck(riinfo, &qkey, qplan,
                             fk_rel, pk_rel,
-                           old_row, NULL,
+                           old_slot, NULL,
                             true,   /* must detect new rows */
                             SPI_OK_UPDATE);
  
@@ -1529,7 +1522,7 @@ ri_setdefault(TriggerData *trigdata)
   */
  bool
  RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
-                             HeapTuple old_row, HeapTuple new_row)
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot)
  {
     const RI_ConstraintInfo *riinfo;
  
@@ -1547,11 +1540,11 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
              * If any old key value is NULL, the row could not have been
              * referenced by an FK row, so no check is needed.
              */
-           if (ri_NullCheck(RelationGetDescr(pk_rel), old_row, riinfo, true) != RI_KEYS_NONE_NULL)
+           if (ri_NullCheck(RelationGetDescr(pk_rel), old_slot, riinfo, true) != RI_KEYS_NONE_NULL)
                 return false;
  
             /* If all old and new key values are equal, no check is needed */
-           if (new_row && ri_KeysEqual(pk_rel, old_row, new_row, riinfo, true))
+           if (new_slot && ri_KeysEqual(pk_rel, old_slot, new_slot, riinfo, true))
                 return false;
  
             /* Else we need to fire the trigger. */
@@ -1586,9 +1579,12 @@ RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
   */
  bool
  RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
-                             HeapTuple old_row, HeapTuple new_row)
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot)
  {
     const RI_ConstraintInfo *riinfo;
+   Datum       xminDatum;
+   TransactionId xmin;
+   bool        isnull;
  
     /*
      * Get arguments.
@@ -1603,7 +1599,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
              * If any new key value is NULL, the row must satisfy the
              * constraint, so no check is needed.
              */
-           if (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false) != RI_KEYS_NONE_NULL)
+           if (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false) != RI_KEYS_NONE_NULL)
                 return false;
  
             /*
@@ -1614,11 +1610,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
              * UPDATE check.  (We could skip this if we knew the INSERT
              * trigger already fired, but there is no easy way to know that.)
              */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data)))
+           xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+           if (TransactionIdIsCurrentTransactionId(xmin))
                 return true;
  
             /* If all old and new key values are equal, no check is needed */
-           if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false))
+           if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false))
                 return false;
  
             /* Else we need to fire the trigger. */
@@ -1634,7 +1633,7 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
              * invalidated before the constraint is to be checked, but we
              * should queue the event to apply the check later.
              */
-           switch (ri_NullCheck(RelationGetDescr(fk_rel), new_row, riinfo, false))
+           switch (ri_NullCheck(RelationGetDescr(fk_rel), new_slot, riinfo, false))
             {
                 case RI_KEYS_ALL_NULL:
                     return false;
@@ -1652,11 +1651,14 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
              * UPDATE check.  (We could skip this if we knew the INSERT
              * trigger already fired, but there is no easy way to know that.)
              */
-           if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data)))
+           xminDatum = slot_getsysattr(old_slot, MinTransactionIdAttributeNumber, &isnull);
+           Assert(!isnull);
+           xmin = DatumGetTransactionId(xminDatum);
+           if (TransactionIdIsCurrentTransactionId(xmin))
                 return true;
  
             /* If all old and new key values are equal, no check is needed */
-           if (ri_KeysEqual(fk_rel, old_row, new_row, riinfo, false))
+           if (ri_KeysEqual(fk_rel, old_slot, new_slot, riinfo, false))
                 return false;
  
             /* Else we need to fire the trigger. */
@@ -1910,10 +1912,17 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
     /* Did we find a tuple violating the constraint? */
     if (SPI_processed > 0)
     {
+       TupleTableSlot *slot;
         HeapTuple   tuple = SPI_tuptable->vals[0];
         TupleDesc   tupdesc = SPI_tuptable->tupdesc;
         RI_ConstraintInfo fake_riinfo;
  
+       slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+
+       heap_deform_tuple(tuple, tupdesc,
+                         slot->tts_values, slot->tts_isnull);
+       ExecStoreVirtualTuple(slot);
+
         /*
          * The columns to look at in the result tuple are 1..N, not whatever
          * they are in the fk_rel.  Hack up riinfo so that the subroutines
@@ -1933,7 +1942,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
          * disallows partially-null FK rows.
          */
         if (fake_riinfo.confmatchtype == FKCONSTR_MATCH_FULL &&
-           ri_NullCheck(tupdesc, tuple, &fake_riinfo, false) != RI_KEYS_NONE_NULL)
+           ri_NullCheck(tupdesc, slot, &fake_riinfo, false) != RI_KEYS_NONE_NULL)
             ereport(ERROR,
                     (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
                      errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"",
@@ -1950,8 +1959,10 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
          */
         ri_ReportViolation(&fake_riinfo,
                            pk_rel, fk_rel,
-                          tuple, tupdesc,
+                          slot, tupdesc,
                            RI_PLAN_CHECK_LOOKUPPK);
+
+       ExecDropSingleTupleTableSlot(slot);
     }
  
     if (SPI_finish() != SPI_OK_FINISH)
@@ -2429,7 +2440,7 @@ static bool
  ri_PerformCheck(const RI_ConstraintInfo *riinfo,
                 RI_QueryKey *qkey, SPIPlanPtr qplan,
                 Relation fk_rel, Relation pk_rel,
-               HeapTuple old_tuple, HeapTuple new_tuple,
+               TupleTableSlot *old_slot, TupleTableSlot *new_slot,
                 bool detectNewRows, int expect_OK)
  {
     Relation    query_rel,
@@ -2472,17 +2483,17 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
     }
  
     /* Extract the parameters to be passed into the query */
-   if (new_tuple)
+   if (new_slot)
     {
-       ri_ExtractValues(source_rel, new_tuple, riinfo, source_is_pk,
+       ri_ExtractValues(source_rel, new_slot, riinfo, source_is_pk,
                          vals, nulls);
-       if (old_tuple)
-           ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk,
+       if (old_slot)
+           ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk,
                              vals + riinfo->nkeys, nulls + riinfo->nkeys);
     }
     else
     {
-       ri_ExtractValues(source_rel, old_tuple, riinfo, source_is_pk,
+       ri_ExtractValues(source_rel, old_slot, riinfo, source_is_pk,
                          vals, nulls);
     }
  
@@ -2552,7 +2563,7 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
         (SPI_processed == 0) == (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK))
         ri_ReportViolation(riinfo,
                            pk_rel, fk_rel,
-                          new_tuple ? new_tuple : old_tuple,
+                          new_slot ? new_slot : old_slot,
                            NULL,
                            qkey->constr_queryno);
  
@@ -2563,11 +2574,10 @@ ri_PerformCheck(const RI_ConstraintInfo *riinfo,
   * Extract fields from a tuple into Datum/nulls arrays
   */
  static void
-ri_ExtractValues(Relation rel, HeapTuple tup,
+ri_ExtractValues(Relation rel, TupleTableSlot *slot,
                  const RI_ConstraintInfo *riinfo, bool rel_is_pk,
                  Datum *vals, char *nulls)
  {
-   TupleDesc   tupdesc = rel->rd_att;
     const int16 *attnums;
     int         i;
     bool        isnull;
@@ -2579,8 +2589,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup,
  
     for (i = 0; i < riinfo->nkeys; i++)
     {
-       vals[i] = heap_getattr(tup, attnums[i], tupdesc,
-                              &isnull);
+       vals[i] = slot_getattr(slot, attnums[i], &isnull);
         nulls[i] = isnull ? 'n' : ' ';
     }
  }
@@ -2597,7 +2606,7 @@ ri_ExtractValues(Relation rel, HeapTuple tup,
  static void
  ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                    Relation pk_rel, Relation fk_rel,
-                  HeapTuple violator, TupleDesc tupdesc,
+                  TupleTableSlot *violatorslot, TupleDesc tupdesc,
                    int queryno)
  {
     StringInfoData key_names;
@@ -2676,7 +2685,8 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo,
                        *val;
  
             name = SPI_fname(tupdesc, fnum);
-           val = SPI_getvalue(violator, tupdesc, fnum);
+           // PBORKED: avoid heaptuple conversion
+           val = SPI_getvalue(ExecFetchSlotHeapTuple(violatorslot, false, NULL), tupdesc, fnum);
             if (!val)
                 val = "null";
  
@@ -2730,7 +2740,7 @@ ri_ReportViolation(const RI_ConstraintInfo *riinfo,
   */
  static int
  ri_NullCheck(TupleDesc tupDesc,
-            HeapTuple tup,
+            TupleTableSlot *slot,
              const RI_ConstraintInfo *riinfo, bool rel_is_pk)
  {
     const int16 *attnums;
@@ -2745,7 +2755,7 @@ ri_NullCheck(TupleDesc tupDesc,
  
     for (i = 0; i < riinfo->nkeys; i++)
     {
-       if (heap_attisnull(tup, attnums[i], tupDesc))
+       if (slot_attisnull(slot, attnums[i]))
             nonenull = false;
         else
             allnull = false;
@@ -2896,10 +2906,9 @@ ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan)
   * ----------
   */
  static bool
-ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
+ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
              const RI_ConstraintInfo *riinfo, bool rel_is_pk)
  {
-   TupleDesc   tupdesc = RelationGetDescr(rel);
     const int16 *attnums;
     const Oid  *eq_oprs;
     int         i;
@@ -2915,6 +2924,7 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
         eq_oprs = riinfo->ff_eq_oprs;
     }
  
+   /* XXX: could be worthwhile to fetch all necessary attrs at once */
     for (i = 0; i < riinfo->nkeys; i++)
     {
         Datum       oldvalue;
@@ -2924,14 +2934,14 @@ ri_KeysEqual(Relation rel, HeapTuple oldtup, HeapTuple newtup,
         /*
          * Get one attribute's oldvalue. If it is NULL - they're not equal.
          */
-       oldvalue = heap_getattr(oldtup, attnums[i], tupdesc, &isnull);
+       oldvalue = slot_getattr(oldslot, attnums[i], &isnull);
         if (isnull)
             return false;
  
         /*
          * Get one attribute's newvalue. If it is NULL - they're not equal.
          */
-       newvalue = heap_getattr(newtup, attnums[i], tupdesc, &isnull);
+       newvalue = slot_getattr(newslot, attnums[i], &isnull);
         if (isnull)
             return false;
  
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index ffca0fe5bb8e6c5c7befdd13b503e13d97762f2e..270b01909ef5141ee52ecbe51f078519c1aec4cb 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -103,6 +103,7 @@
  #include "access/brin.h"
  #include "access/gin.h"
  #include "access/htup_details.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "catalog/index.h"
  #include "catalog/pg_am.h"
@@ -5528,7 +5529,6 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
             bool        typByVal;
             ScanKeyData scankeys[1];
             IndexScanDesc index_scan;
-           HeapTuple   tup;
             Datum       values[INDEX_MAX_KEYS];
             bool        isnull[INDEX_MAX_KEYS];
             SnapshotData SnapshotNonVacuumable;
@@ -5551,8 +5551,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
             indexInfo = BuildIndexInfo(indexRel);
  
             /* some other stuff */
-           slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRel),
-                                           &TTSOpsHeapTuple);
+           slot = table_gimmegimmeslot(heapRel, NULL);
             econtext->ecxt_scantuple = slot;
             get_typlenbyval(vardata->atttype, &typLen, &typByVal);
             InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
@@ -5604,11 +5603,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                 index_rescan(index_scan, scankeys, 1, NULL, 0);
  
                 /* Fetch first tuple in sortop's direction */
-               if ((tup = index_getnext(index_scan,
-                                        indexscandir)) != NULL)
+               if (index_getnext_slot(index_scan, indexscandir, slot))
                 {
-                   /* Extract the index column values from the heap tuple */
-                   ExecStoreHeapTuple(tup, slot, false);
+                   /* Extract the index column values from the slot */
                     FormIndexDatum(indexInfo, slot, estate,
                                    values, isnull);
  
@@ -5637,11 +5634,9 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
                 index_rescan(index_scan, scankeys, 1, NULL, 0);
  
                 /* Fetch first tuple in reverse direction */
-               if ((tup = index_getnext(index_scan,
-                                        -indexscandir)) != NULL)
+               if (index_getnext_slot(index_scan, -indexscandir, slot))
                 {
-                   /* Extract the index column values from the heap tuple */
-                   ExecStoreHeapTuple(tup, slot, false);
+                   /* Extract the index column values from the slot */
                     FormIndexDatum(indexInfo, slot, estate,
                                    values, isnull);
  
diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c

index 41d540b46ecded139d59d6290ef53b9df2660f0d..bb8a683b44d26844d0716c4938222daddab681b5 100644 (file)
--- a/src/backend/utils/adt/tid.c
+++ b/src/backend/utils/adt/tid.c
@@ -22,6 +22,7 @@
  
  #include "access/heapam.h"
  #include "access/sysattr.h"
+#include "access/tableam.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_type.h"
  #include "libpq/pqformat.h"
@@ -352,7 +353,7 @@ currtid_byreloid(PG_FUNCTION_ARGS)
     ItemPointerCopy(tid, result);
  
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   heap_get_latest_tid(rel, snapshot, result);
+   table_get_latest_tid(rel, snapshot, result);
     UnregisterSnapshot(snapshot);
  
     heap_close(rel, AccessShareLock);
@@ -387,7 +388,7 @@ currtid_byrelname(PG_FUNCTION_ARGS)
     ItemPointerCopy(tid, result);
  
     snapshot = RegisterSnapshot(GetLatestSnapshot());
-   heap_get_latest_tid(rel, snapshot, result);
+   table_get_latest_tid(rel, snapshot, result);
     UnregisterSnapshot(snapshot);
  
     heap_close(rel, AccessShareLock);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index c3071db1cdf90d1229b45f61885875037611d6f5..8b79e5907731ad2a7bcf623386312293b008c926 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -36,6 +36,7 @@
  #include "access/nbtree.h"
  #include "access/reloptions.h"
  #include "access/sysattr.h"
+#include "access/tableam.h"
  #include "access/tupdesc_details.h"
  #include "access/xact.h"
  #include "access/xlog.h"
@@ -1196,10 +1197,29 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
     }
  
     /*
-    * if it's an index, initialize index-related information
+    * initialize access method information
      */
-   if (OidIsValid(relation->rd_rel->relam))
-       RelationInitIndexAccessInfo(relation);
+   switch (relation->rd_rel->relkind)
+   {
+       case RELKIND_INDEX:
+       case RELKIND_PARTITIONED_INDEX:
+           Assert(relation->rd_rel->relam != InvalidOid);
+           RelationInitIndexAccessInfo(relation);
+           break;
+       case RELKIND_RELATION:
+       case RELKIND_SEQUENCE:
+       case RELKIND_TOASTVALUE:
+       case RELKIND_VIEW:      /* Not exactly the storage, but underlying
+                                * tuple access, it is required */
+       case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
+       case RELKIND_FOREIGN_TABLE: /* hari FIXME :To support COPY on foreign tables */
+           RelationInitTableAccessMethod(relation);
+           break;
+       default:
+           /* nothing to do in other cases */
+           break;
+   }
  
     /* extract reloptions if any */
     RelationParseRelOptions(relation, pg_class_tuple);
@@ -1701,6 +1721,52 @@ LookupOpclassInfo(Oid operatorClassOid,
     return opcentry;
  }
  
+/*
+ * Fill in the TableAmRoutine for a relation
+ *
+ * relation's rd_tableamhandler must be valid already.
+ */
+static void
+InitTableAmRoutine(Relation relation)
+{
+   relation->rd_tableamroutine = GetTableAmRoutine(relation->rd_tableamhandler);
+}
+
+/*
+ * Initialize table-access-method support data for a heap relation
+ */
+void
+RelationInitTableAccessMethod(Relation relation)
+{
+   HeapTuple   tuple;
+   Form_pg_am  aform;
+
+   if (IsCatalogRelation(relation) ||
+           !OidIsValid(relation->rd_rel->relam))
+   {
+       relation->rd_tableamhandler = HEAP_TABLE_AM_HANDLER_OID;
+   }
+   else
+   {
+       /*
+        * Look up the table access method, save the OID of its handler
+        * function.
+        */
+       tuple = SearchSysCache1(AMOID,
+                               ObjectIdGetDatum(relation->rd_rel->relam));
+       if (!HeapTupleIsValid(tuple))
+           elog(ERROR, "cache lookup failed for access method %u",
+                relation->rd_rel->relam);
+       aform = (Form_pg_am) GETSTRUCT(tuple);
+       relation->rd_tableamhandler = aform->amhandler;
+       ReleaseSysCache(tuple);
+   }
+
+   /*
+    * Now we can fetch the table AM's API struct
+    */
+   InitTableAmRoutine(relation);
+}
  
  /*
   *     formrdesc
@@ -1787,6 +1853,7 @@ formrdesc(const char *relationName, Oid relationReltype,
     relation->rd_rel->relallvisible = 0;
     relation->rd_rel->relkind = RELKIND_RELATION;
     relation->rd_rel->relnatts = (int16) natts;
+   relation->rd_rel->relam = HEAP_TABLE_AM_OID;
  
     /*
      * initialize attribute tuple form
@@ -1854,6 +1921,12 @@ formrdesc(const char *relationName, Oid relationReltype,
      */
     RelationInitPhysicalAddr(relation);
  
+   /*
+    * initialize the table am handler
+    */
+   relation->rd_rel->relam = HEAP_TABLE_AM_OID;
+   relation->rd_tableamroutine = GetHeapamTableAmRoutine();
+
     /*
      * initialize the rel-has-index flag, using hardwired knowledge
      */
@@ -3089,6 +3162,7 @@ RelationBuildLocalRelation(const char *relname,
                            Oid relnamespace,
                            TupleDesc tupDesc,
                            Oid relid,
+                          Oid accessmtd,
                            Oid relfilenode,
                            Oid reltablespace,
                            bool shared_relation,
@@ -3268,6 +3342,16 @@ RelationBuildLocalRelation(const char *relname,
  
     RelationInitPhysicalAddr(rel);
  
+   rel->rd_rel->relam = accessmtd;
+
+   if (relkind == RELKIND_RELATION ||
+       relkind == RELKIND_MATVIEW ||
+       relkind == RELKIND_VIEW ||  /* Not exactly the storage, but underlying
+                                    * tuple access, it is required */
+       relkind == RELKIND_PARTITIONED_TABLE ||
+       relkind == RELKIND_TOASTVALUE)
+       RelationInitTableAccessMethod(rel);
+
     /*
      * Okay to insert into the relcache hash table.
      *
@@ -3788,6 +3872,19 @@ RelationCacheInitializePhase3(void)
             restart = true;
         }
  
+       if (relation->rd_tableamroutine == NULL &&
+           (relation->rd_rel->relkind == RELKIND_RELATION ||
+            relation->rd_rel->relkind == RELKIND_MATVIEW ||
+            relation->rd_rel->relkind == RELKIND_VIEW ||
+            relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+            relation->rd_rel->relkind == RELKIND_TOASTVALUE))
+       {
+           RelationInitTableAccessMethod(relation);
+           Assert(relation->rd_tableamroutine != NULL);
+
+           restart = true;
+       }
+
         /* Release hold on the relation */
         RelationDecrementReferenceCount(relation);
  
@@ -5563,6 +5660,9 @@ load_relcache_init_file(bool shared)
             if (rel->rd_isnailed)
                 nailed_rels++;
  
+           /* Load table AM stuff */
+           RelationInitTableAccessMethod(rel);
+
             Assert(rel->rd_index == NULL);
             Assert(rel->rd_indextuple == NULL);
             Assert(rel->rd_indexcxt == NULL);
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c

index b636b1e262a40d02e76f69c564f544f14303ba9b..1d57177cb572caaa93d55fe52641d25368de3128 100644 (file)
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -22,6 +22,7 @@
  #include "access/heapam.h"
  #include "access/htup_details.h"
  #include "access/session.h"
+#include "access/tableam.h"
  #include "access/sysattr.h"
  #include "access/xact.h"
  #include "access/xlog.h"
@@ -1246,15 +1247,15 @@ static bool
  ThereIsAtLeastOneRole(void)
  {
     Relation    pg_authid_rel;
-   HeapScanDesc scan;
+   TableScanDesc scan;
     bool        result;
  
     pg_authid_rel = heap_open(AuthIdRelationId, AccessShareLock);
  
-   scan = heap_beginscan_catalog(pg_authid_rel, 0, NULL);
-   result = (heap_getnext(scan, ForwardScanDirection) != NULL);
+   scan = table_beginscan_catalog(pg_authid_rel, 0, NULL);
+   result = (heap_scan_getnext(scan, ForwardScanDirection) != NULL);
  
-   heap_endscan(scan);
+   table_endscan(scan);
     heap_close(pg_authid_rel, AccessShareLock);
  
     return result;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index 6fe1939881296e6d8456e92eab0d596ae3c7cda3..11b6df209a76041cdc2b46bcdfa5c66bb7c38869 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -29,6 +29,7 @@
  #include "access/commit_ts.h"
  #include "access/gin.h"
  #include "access/rmgr.h"
+#include "access/tableam.h"
  #include "access/transam.h"
  #include "access/twophase.h"
  #include "access/xact.h"
@@ -3511,6 +3512,17 @@ static struct config_string ConfigureNamesString[] =
         check_datestyle, assign_datestyle, NULL
     },
  
+   {
+       {"default_table_access_method", PGC_USERSET, CLIENT_CONN_STATEMENT,
+           gettext_noop("Sets the default table access method for new tables."),
+           NULL,
+           GUC_IS_NAME
+       },
+       &default_table_access_method,
+       DEFAULT_TABLE_ACCESS_METHOD,
+       check_default_table_access_method, NULL, NULL
+   },
+
     {
         {"default_tablespace", PGC_USERSET, CLIENT_CONN_STATEMENT,
             gettext_noop("Sets the default tablespace to create tables and indexes in."),
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index ee7fd83c02c9ff05512bdc19cadcc2dccb679a06..7d2b6facf2cfed47946886a1e2027275216c2aca 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -3818,12 +3818,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
  static void
  copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
  {
-   HeapTuple   tuple = (HeapTuple) tup;
     Datum       original;
     MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext);
+   TupleTableSlot *slot = (TupleTableSlot *) tup;
+   HeapTuple   tuple;
  
     /* copy the tuple into sort storage */
-   tuple = heap_copytuple(tuple);
+   tuple = ExecCopySlotHeapTuple(slot);
     stup->tuple = (void *) tuple;
     USEMEM(state, GetMemoryChunkSpace(tuple));
  
diff --git a/src/backend/utils/time/Makefile b/src/backend/utils/time/Makefile

index 5a6e6fa4c8e2186f2a5591997839d138707bd062..f17b1c5324968cbb5f58082048f51b32c3397669 100644 (file)
--- a/src/backend/utils/time/Makefile
+++ b/src/backend/utils/time/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/time
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = combocid.o tqual.o snapmgr.o
+OBJS = combocid.o snapmgr.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c

index edf59efc29d8f867e3a15ef9bd8f3ab5b7783d5a..9c595459611ce13dcf090c8bc743fc3c3ce2a351 100644 (file)
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -141,9 +141,9 @@ static volatile OldSnapshotControlData *oldSnapshotControl;
   * These SnapshotData structs are static to simplify memory allocation
   * (see the hack in GetSnapshotData to avoid repeated malloc/free).
   */
-static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
-static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
-SnapshotData CatalogSnapshotData = {HeapTupleSatisfiesMVCC};
+static SnapshotData CurrentSnapshotData = {MVCC_VISIBILITY};
+static SnapshotData SecondarySnapshotData = {MVCC_VISIBILITY};
+SnapshotData CatalogSnapshotData = {MVCC_VISIBILITY};
  
  /* Pointers to valid snapshots */
  static Snapshot CurrentSnapshot = NULL;
@@ -2046,7 +2046,7 @@ EstimateSnapshotSpace(Snapshot snap)
     Size        size;
  
     Assert(snap != InvalidSnapshot);
-   Assert(snap->satisfies == HeapTupleSatisfiesMVCC);
+   Assert(snap->visibility_type == MVCC_VISIBILITY);
  
     /* We allocate any XID arrays needed in the same palloc block. */
     size = add_size(sizeof(SerializedSnapshotData),
@@ -2143,7 +2143,7 @@ RestoreSnapshot(char *start_address)
  
     /* Copy all required fields */
     snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
-   snapshot->satisfies = HeapTupleSatisfiesMVCC;
+   snapshot->visibility_type = MVCC_VISIBILITY;
     snapshot->xmin = serialized_snapshot.xmin;
     snapshot->xmax = serialized_snapshot.xmax;
     snapshot->xip = NULL;
diff --git a/src/include/access/genam.h b/src/include/access/genam.h

index 534fac7bf2f41b3170d51423d8c66ec077742636..0aa107f4b4bd5ddf233a59d63c4f9e7e1b6dc4f0 100644 (file)
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -159,8 +159,10 @@ extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
                          ParallelIndexScanDesc pscan);
  extern ItemPointer index_getnext_tid(IndexScanDesc scan,
                   ScanDirection direction);
-extern HeapTuple index_fetch_heap(IndexScanDesc scan);
-extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
+struct TupleTableSlot;
+extern bool index_fetch_heap(IndexScanDesc scan, struct TupleTableSlot *slot);
+//extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction);
+extern bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, struct TupleTableSlot *slot);
  extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap);
  
  extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h

index 108e4f10671d500144cae9ccb470463e81917aaf..a309db1a1c6458eb7fb5ee84d320bc8fdd4f0eab 100644 (file)
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -33,6 +33,8 @@
  
  typedef struct BulkInsertStateData *BulkInsertState;
  
+struct TupleTableSlot;
+
  /*
   * Possible lock modes for a tuple.
   */
@@ -71,8 +73,21 @@ typedef struct HeapUpdateFailureData
     ItemPointerData ctid;
     TransactionId xmax;
     CommandId   cmax;
+   bool        traversed;
  } HeapUpdateFailureData;
  
+/* Result codes for HeapTupleSatisfiesVacuum */
+typedef enum
+{
+   HEAPTUPLE_DEAD,             /* tuple is dead and deletable */
+   HEAPTUPLE_LIVE,             /* tuple is live (committed, no deleter) */
+   HEAPTUPLE_RECENTLY_DEAD,    /* tuple is dead, but not deletable yet */
+   HEAPTUPLE_INSERT_IN_PROGRESS,   /* inserting xact is still in progress */
+   HEAPTUPLE_DELETE_IN_PROGRESS    /* deleting xact is still in progress */
+} HTSV_Result;
+
+/* struct definition is private to rewriteheap.c */
+typedef struct RewriteStateData *RewriteState;
  
  /* ----------------
   *     function prototypes for heap access method
@@ -98,8 +113,9 @@ extern Relation heap_openrv_extended(const RangeVar *relation,
  #define heap_close(r,l)  relation_close(r,l)
  
  /* struct definitions appear in relscan.h */
+typedef struct TableScanDescData *TableScanDesc;
  typedef struct HeapScanDescData *HeapScanDesc;
-typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc;
+typedef struct ParallelTableScanDescData *ParallelTableScanDesc;
  
  /*
   * HeapScanIsValid
@@ -107,53 +123,47 @@ typedef struct ParallelHeapScanDescData *ParallelHeapScanDesc;
   */
  #define HeapScanIsValid(scan) PointerIsValid(scan)
  
-extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
-              int nkeys, ScanKey key);
-extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys,
-                      ScanKey key);
-extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
-                    int nkeys, ScanKey key,
-                    bool allow_strat, bool allow_sync);
-extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
-                 int nkeys, ScanKey key);
-extern HeapScanDesc heap_beginscan_sampling(Relation relation,
-                       Snapshot snapshot, int nkeys, ScanKey key,
-                       bool allow_strat, bool allow_sync, bool allow_pagemode);
-extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
+extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
+              int nkeys, ScanKey key,
+              ParallelTableScanDesc parallel_scan,
+              bool allow_strat,
+              bool allow_sync,
+              bool allow_pagemode,
+              bool is_bitmapscan,
+              bool is_samplescan,
+              bool temp_snap);
+extern void heap_setscanlimits(TableScanDesc scan, BlockNumber startBlk,
                    BlockNumber endBlk);
-extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
-extern void heap_rescan(HeapScanDesc scan, ScanKey key);
-extern void heap_rescan_set_params(HeapScanDesc scan, ScanKey key,
+extern void heapgetpage(TableScanDesc scan, BlockNumber page);
+extern void heap_rescan(TableScanDesc scan, ScanKey key, bool set_params,
+           bool allow_strat, bool allow_sync, bool allow_pagemode);
+extern void heap_rescan_set_params(TableScanDesc scan, ScanKey key,
                        bool allow_strat, bool allow_sync, bool allow_pagemode);
-extern void heap_endscan(HeapScanDesc scan);
-extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
-
-extern Size heap_parallelscan_estimate(Snapshot snapshot);
-extern void heap_parallelscan_initialize(ParallelHeapScanDesc target,
-                            Relation relation, Snapshot snapshot);
-extern void heap_parallelscan_reinitialize(ParallelHeapScanDesc parallel_scan);
-extern HeapScanDesc heap_beginscan_parallel(Relation, ParallelHeapScanDesc);
-
-extern bool heap_fetch(Relation relation, Snapshot snapshot,
-          HeapTuple tuple, Buffer *userbuf, bool keep_buf,
+extern void heap_endscan(TableScanDesc scan);
+extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
+extern struct TupleTableSlot *heap_getnextslot(TableScanDesc sscan, ScanDirection direction,
+                struct TupleTableSlot *slot);
+extern HeapTuple heap_scan_getnext(TableScanDesc sscan, ScanDirection direction);
+
+extern bool heap_fetch(Relation relation, ItemPointer tid, Snapshot snapshot,
+          HeapTuple tuple, Buffer *userbuf,
            Relation stats_relation);
  extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
                        Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
                        bool *all_dead, bool first_call);
  extern bool heap_hot_search(ItemPointer tid, Relation relation,
                 Snapshot snapshot, bool *all_dead);
-
  extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
                     ItemPointer tid);
  extern void setLastTid(const ItemPointer tid);
  
  extern BulkInsertState GetBulkInsertState(void);
-extern void FreeBulkInsertState(BulkInsertState);
+extern void FreeBulkInsertState(BulkInsertState bistate);
  extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
  
  extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
             int options, BulkInsertState bistate);
-extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
+extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples,
                   CommandId cid, int options, BulkInsertState bistate);
  extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
             CommandId cid, Snapshot crosscheck, bool wait,
@@ -164,10 +174,11 @@ extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
             HeapTuple newtup,
             CommandId cid, Snapshot crosscheck, bool wait,
             HeapUpdateFailureData *hufd, LockTupleMode *lockmode);
-extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
+extern HTSU_Result heap_lock_tuple(Relation relation, ItemPointer tid,
                 CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
-               bool follow_update,
+               bool follow_update, HeapTuple tuple,
                 Buffer *buffer, HeapUpdateFailureData *hufd);
+
  extern void heap_inplace_update(Relation relation, HeapTuple tuple);
  extern bool heap_freeze_tuple(HeapTupleHeader tuple,
                   TransactionId relfrozenxid, TransactionId relminmxid,
@@ -182,7 +193,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid,
                    HeapTuple tup);
  
  extern void heap_sync(Relation relation);
-extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
+extern void heap_update_snapshot(TableScanDesc scan, Snapshot snapshot);
  
  /* in heap/pruneheap.c */
  extern void heap_page_prune_opt(Relation relation, Buffer buffer);
@@ -205,4 +216,26 @@ extern Size SyncScanShmemSize(void);
  struct VacuumParams;
  extern void heap_vacuum_rel(Relation onerel, int options,
                 struct VacuumParams *params, BufferAccessStrategy bstrategy);
+
+/* in heap/heapam_visibility.c */
+extern bool HeapTupleSatisfies(HeapTuple stup, Snapshot snapshot, Buffer buffer);
+extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple stup, CommandId curcid,
+                        Buffer buffer);
+extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple stup, TransactionId OldestXmin,
+                        Buffer buffer);
+extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
+                    uint16 infomask, TransactionId xid);
+extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
+extern bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin);
+
+/* in heap/rewriteheap.c */
+extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
+                  TransactionId OldestXmin, TransactionId FreezeXid,
+                  MultiXactId MultiXactCutoff, bool use_wal);
+extern void end_heap_rewrite(RewriteState state);
+extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
+                  HeapTuple newTuple);
+extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
+
  #endif                         /* HEAPAM_H */
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h

index e5289b8aa7d910b1abda75b604cdc6347b2cb20d..51a3ad74fa1ebee7d560061ac3d32a3c3907bf87 100644 (file)
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -22,15 +22,15 @@
  #include "storage/spin.h"
  
  /*
- * Shared state for parallel heap scan.
+ * Shared state for parallel table scan.
   *
- * Each backend participating in a parallel heap scan has its own
- * HeapScanDesc in backend-private memory, and those objects all contain
- * a pointer to this structure.  The information here must be sufficient
- * to properly initialize each new HeapScanDesc as workers join the scan,
- * and it must act as a font of block numbers for those workers.
+ * Each backend participating in a parallel table scan has its own
+ * TableScanDesc in backend-private memory, and those objects all contain a
+ * pointer to this structure.  The information here must be sufficient to
+ * properly initialize each new TableScanDesc as workers join the scan, and it
+ * must act as a font of block numbers for those workers.
   */
-typedef struct ParallelHeapScanDescData
+typedef struct ParallelTableScanDescData
  {
     Oid         phs_relid;      /* OID of relation to scan */
     bool        phs_syncscan;   /* report location to syncscan logic? */
@@ -41,9 +41,9 @@ typedef struct ParallelHeapScanDescData
                                          * workers so far. */
     bool        phs_snapshot_any;   /* SnapshotAny, not phs_snapshot_data? */
     char        phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
-} ParallelHeapScanDescData;
+} ParallelTableScanDescData;
  
-typedef struct HeapScanDescData
+typedef struct TableScanDescData
  {
     /* scan parameters */
     Relation    rs_rd;          /* heap relation descriptor */
@@ -62,16 +62,27 @@ typedef struct HeapScanDescData
     BlockNumber rs_startblock;  /* block # to start at */
     BlockNumber rs_numblocks;   /* max number of blocks to scan */
     /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
-   BufferAccessStrategy rs_strategy;   /* access strategy for reads */
     bool        rs_syncscan;    /* report location to syncscan logic? */
  
+   ParallelTableScanDesc rs_parallel;  /* parallel scan information */
+
+}          TableScanDescData;
+
+typedef struct HeapScanDescData
+{
+   /* scan parameters */
+   TableScanDescData rs_scan;  /* */
+
     /* scan current state */
     bool        rs_inited;      /* false = scan not init'd yet */
-   HeapTupleData rs_ctup;      /* current tuple in scan, if any */
     BlockNumber rs_cblock;      /* current block # in scan, if any */
     Buffer      rs_cbuf;        /* current buffer in scan, if any */
     /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
-   ParallelHeapScanDesc rs_parallel;   /* parallel scan information */
+
+   /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
+   BufferAccessStrategy rs_strategy;   /* access strategy for reads */
+
+   HeapTupleData rs_ctup;      /* current tuple in scan, if any */
  
     /* these fields only used in page-at-a-time mode and for bitmap scans */
     int         rs_cindex;      /* current tuple's index in vistuples */
@@ -79,6 +90,21 @@ typedef struct HeapScanDescData
     OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    /* their offsets */
  }          HeapScanDescData;
  
+
+typedef struct IndexFetchTableData
+{
+   Relation rel;
+} IndexFetchTableData;
+
+
+typedef struct IndexFetchHeapData
+{
+   IndexFetchTableData xs_base;
+
+   Buffer      xs_cbuf;        /* current heap buffer in scan, if any */
+   /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+} IndexFetchHeapData;
+
  /*
   * We use the same IndexScanDescData structure for both amgettuple-based
   * and amgetbitmap-based index scans.  Some fields are only relevant in
@@ -117,10 +143,10 @@ typedef struct IndexScanDescData
     HeapTuple   xs_hitup;       /* index data returned by AM, as HeapTuple */
     TupleDesc   xs_hitupdesc;   /* rowtype descriptor of xs_hitup */
  
-   /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */
-   HeapTupleData xs_ctup;      /* current heap tuple, if any */
-   Buffer      xs_cbuf;        /* current heap buffer in scan, if any */
-   /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
+   ItemPointerData xs_heaptid; /* result */
+   bool        xs_heap_continue;   /* T if must keep walking, potential further results */
+   IndexFetchTableData *xs_heapfetch;
+
     bool        xs_recheck;     /* T means scan keys must be rechecked */
  
     /*
@@ -134,9 +160,6 @@ typedef struct IndexScanDescData
     bool       *xs_orderbynulls;
     bool        xs_recheckorderby;
  
-   /* state data for traversing HOT chains in index_getnext */
-   bool        xs_continue_hot;    /* T if must keep walking HOT chain */
-
     /* parallel index scan information, in shared memory */
     ParallelIndexScanDesc parallel_scan;
  }          IndexScanDescData;
@@ -150,14 +173,17 @@ typedef struct ParallelIndexScanDescData
     char        ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
  }          ParallelIndexScanDescData;
  
-/* Struct for heap-or-index scans of system tables */
+struct TupleTableSlot;
+
+/* Struct for storage-or-index scans of system tables */
  typedef struct SysScanDescData
  {
     Relation    heap_rel;       /* catalog being scanned */
     Relation    irel;           /* NULL if doing heap scan */
-   HeapScanDesc scan;          /* only valid in heap-scan case */
+   TableScanDesc scan;     /* only valid in storage-scan case */
     IndexScanDesc iscan;        /* only valid in index-scan case */
     Snapshot    snapshot;       /* snapshot to unregister at end of scan */
+   struct TupleTableSlot *slot;
  }          SysScanDescData;
  
  #endif                         /* RELSCAN_H */
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h

index cfdf33b4bd672387286aeb802bd9fbf11b24470f..cc74012f72fd34263aeefc4b3fcaff9c9bddb4c0 100644 (file)
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -18,17 +18,6 @@
  #include "storage/relfilenode.h"
  #include "utils/relcache.h"
  
-/* struct definition is private to rewriteheap.c */
-typedef struct RewriteStateData *RewriteState;
-
-extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
-                  TransactionId OldestXmin, TransactionId FreezeXid,
-                  MultiXactId MultiXactCutoff, bool use_wal);
-extern void end_heap_rewrite(RewriteState state);
-extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
-                  HeapTuple newTuple);
-extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
-
  /*
   * On-Disk data format for an individual logical rewrite mapping.
   */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h

new file mode 100644 (file)

index 0000000..7364afa
--- /dev/null
+++ b/src/include/access/tableam.h
@@ -0,0 +1,750 @@
+/*-------------------------------------------------------------------------
+ *
+ * tableam.h
+ *   POSTGRES table access method definitions.
+ *
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tableam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLEAM_H
+#define TABLEAM_H
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "catalog/index.h"
+#include "executor/tuptable.h"
+#include "nodes/execnodes.h"
+#include "nodes/nodes.h"
+#include "fmgr.h"
+#include "utils/guc.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/snapshot.h"
+#include "utils/tqual.h"
+
+
+#define DEFAULT_TABLE_ACCESS_METHOD    "heap"
+
+extern char *default_table_access_method;
+extern bool synchronize_seqscans;
+
+/*
+ * Storage routine function hooks
+ */
+typedef bool (*SnapshotSatisfies_function) (Relation rel,
+                                           TupleTableSlot *slot,
+                                           Snapshot snapshot);
+
+typedef void (*TupleInsert_function) (Relation rel, TupleTableSlot *slot, CommandId cid,
+                                    int options, BulkInsertState bistate);
+
+typedef void (*TupleInsertSpeculative_function) (Relation rel,
+                                                TupleTableSlot *slot,
+                                                CommandId cid,
+                                                int options,
+                                                BulkInsertState bistate,
+                                                uint32 specToken);
+
+
+typedef void (*TupleCompleteSpeculative_function) (Relation rel,
+                                                 TupleTableSlot *slot,
+                                                 uint32 specToken,
+                                                 bool succeeded);
+
+typedef HTSU_Result (*TupleDelete_function) (Relation relation,
+                                            ItemPointer tid,
+                                            CommandId cid,
+                                            Snapshot snapshot,
+                                            Snapshot crosscheck,
+                                            bool wait,
+                                            HeapUpdateFailureData *hufd,
+                                            bool changingPart);
+
+typedef HTSU_Result (*TupleUpdate_function) (Relation relation,
+                                            ItemPointer otid,
+                                            TupleTableSlot *slot,
+                                            CommandId cid,
+                                            Snapshot snapshot,
+                                            Snapshot crosscheck,
+                                            bool wait,
+                                            HeapUpdateFailureData *hufd,
+                                            LockTupleMode *lockmode,
+                                            bool *update_indexes);
+
+typedef bool (*TupleFetchRowVersion_function) (Relation relation,
+                                              ItemPointer tid,
+                                              Snapshot snapshot,
+                                              TupleTableSlot *slot,
+                                              Relation stats_relation);
+
+typedef HTSU_Result (*TupleLock_function) (Relation relation,
+                                          ItemPointer tid,
+                                          Snapshot snapshot,
+                                          TupleTableSlot *slot,
+                                          CommandId cid,
+                                          LockTupleMode mode,
+                                          LockWaitPolicy wait_policy,
+                                          uint8 flags,
+                                          HeapUpdateFailureData *hufd);
+
+typedef void (*MultiInsert_function) (Relation relation, TupleTableSlot **slots, int nslots,
+                                     CommandId cid, int options, BulkInsertState bistate);
+
+typedef void (*TupleGetLatestTid_function) (Relation relation,
+                                           Snapshot snapshot,
+                                           ItemPointer tid);
+
+struct VacuumParams;
+typedef void (*RelationVacuum_function)(Relation onerel, int options,
+               struct VacuumParams *params, BufferAccessStrategy bstrategy);
+typedef void (*RelationScanAnalyzeNextBlock_function)(TableScanDesc scan, BlockNumber blockno,
+                                                     BufferAccessStrategy bstrategy);
+typedef bool (*RelationScanAnalyzeNextTuple_function)(TableScanDesc scan, TransactionId OldestXmin,
+                                                     double *liverows, double *deadrows, TupleTableSlot *slot);
+
+typedef void (*RelationCopyForCluster_function)(Relation NewHeap, Relation OldHeap, Relation OldIndex,
+                                      bool use_sort,
+                                      TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                                      double *num_tuples, double *tups_vacuumed, double *tups_recently_dead);
+
+typedef void (*RelationSync_function) (Relation relation);
+
+typedef const TupleTableSlotOps* (*SlotCallbacks_function) (Relation relation);
+
+typedef TableScanDesc (*ScanBegin_function) (Relation relation,
+                                           Snapshot snapshot,
+                                           int nkeys, ScanKey key,
+                                           ParallelTableScanDesc parallel_scan,
+                                           bool allow_strat,
+                                           bool allow_sync,
+                                           bool allow_pagemode,
+                                           bool is_bitmapscan,
+                                           bool is_samplescan,
+                                           bool temp_snap);
+
+typedef struct IndexFetchTableData* (*BeginIndexFetchTable_function) (Relation relation);
+typedef void (*ResetIndexFetchTable_function) (struct IndexFetchTableData* data);
+typedef void (*EndIndexFetchTable_function) (struct IndexFetchTableData* data);
+
+typedef void (*ScanSetlimits_function) (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks);
+
+typedef TupleTableSlot *(*ScanGetnextSlot_function) (TableScanDesc scan,
+                                                    ScanDirection direction, TupleTableSlot *slot);
+
+typedef void (*ScanEnd_function) (TableScanDesc scan);
+
+
+typedef void (*ScanRescan_function) (TableScanDesc scan, ScanKey key, bool set_params,
+                                    bool allow_strat, bool allow_sync, bool allow_pagemode);
+typedef void (*ScanUpdateSnapshot_function) (TableScanDesc scan, Snapshot snapshot);
+
+typedef bool (*TupleFetchFollow_function)(struct IndexFetchTableData *scan,
+                                         ItemPointer tid,
+                                         Snapshot snapshot,
+                                         TupleTableSlot *slot,
+                                         bool *call_again, bool *all_dead);
+
+typedef double (*IndexBuildRangeScan_function)(Relation heapRelation,
+                                              Relation indexRelation,
+                                              IndexInfo *indexInfo,
+                                              bool allow_sync,
+                                              bool anyvisible,
+                                              BlockNumber start_blockno,
+                                              BlockNumber end_blockno,
+                                              IndexBuildCallback callback,
+                                              void *callback_state,
+                                              TableScanDesc scan);
+struct ValidateIndexState;
+typedef void (*IndexValidateScan_function)(Relation heapRelation,
+                                          Relation indexRelation,
+                                          IndexInfo *indexInfo,
+                                          Snapshot snapshot,
+                                          struct ValidateIndexState *state);
+
+typedef bool (*BitmapPagescan_function)(TableScanDesc scan,
+                                       TBMIterateResult *tbmres);
+
+typedef bool (*BitmapPagescanNext_function)(TableScanDesc scan,
+                                           TupleTableSlot *slot);
+
+struct SampleScanState;
+typedef bool (*SampleScanNextBlock_function)(TableScanDesc scan, struct SampleScanState *scanstate);
+typedef bool (*SampleScanNextTuple_function)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot);
+
+/*
+ * API struct for a table AM.  Note this must be allocated in a
+ * server-lifetime manner, typically as a static const struct.
+ */
+typedef struct TableAmRoutine
+{
+   NodeTag     type;
+
+   SlotCallbacks_function slot_callbacks;
+
+   SnapshotSatisfies_function snapshot_satisfies;
+
+   /* Operations on physical tuples */
+   TupleInsert_function tuple_insert;
+   TupleInsertSpeculative_function tuple_insert_speculative;
+   TupleCompleteSpeculative_function tuple_complete_speculative;
+   TupleUpdate_function tuple_update;
+   TupleDelete_function tuple_delete;
+   TupleFetchRowVersion_function tuple_fetch_row_version;
+   TupleLock_function tuple_lock;
+   MultiInsert_function multi_insert;
+   TupleGetLatestTid_function tuple_get_latest_tid;
+   TupleFetchFollow_function tuple_fetch_follow;
+
+   RelationVacuum_function relation_vacuum;
+   RelationScanAnalyzeNextBlock_function scan_analyze_next_block;
+   RelationScanAnalyzeNextTuple_function scan_analyze_next_tuple;
+   RelationCopyForCluster_function relation_copy_for_cluster;
+   RelationSync_function relation_sync;
+
+   /* Operations on relation scans */
+   ScanBegin_function scan_begin;
+   ScanSetlimits_function scansetlimits;
+   ScanGetnextSlot_function scan_getnextslot;
+
+   BitmapPagescan_function scan_bitmap_pagescan;
+   BitmapPagescanNext_function scan_bitmap_pagescan_next;
+
+   SampleScanNextBlock_function scan_sample_next_block;
+   SampleScanNextTuple_function scan_sample_next_tuple;
+
+   ScanEnd_function scan_end;
+   ScanRescan_function scan_rescan;
+   ScanUpdateSnapshot_function scan_update_snapshot;
+
+   BeginIndexFetchTable_function begin_index_fetch;
+   EndIndexFetchTable_function reset_index_fetch;
+   EndIndexFetchTable_function end_index_fetch;
+
+
+   IndexBuildRangeScan_function index_build_range_scan;
+   IndexValidateScan_function index_validate_scan;
+}          TableAmRoutine;
+
+static inline const TupleTableSlotOps*
+table_slot_callbacks(Relation relation)
+{
+   const TupleTableSlotOps *tts_cb;
+
+   tts_cb = relation->rd_tableamroutine->slot_callbacks(relation);
+
+   return tts_cb;
+}
+
+/*
+ * INLINE functions
+ */
+extern TupleTableSlot* table_gimmegimmeslot(Relation relation, List **reglist);
+
+/*
+ * table_fetch_row_version     - retrieve tuple with given tid
+ *
+ *  XXX: This shouldn't just take a tid, but tid + additional information
+ */
+static inline bool
+table_fetch_row_version(Relation r,
+                       ItemPointer tid,
+                       Snapshot snapshot,
+                       TupleTableSlot *slot,
+                       Relation stats_relation)
+{
+   return r->rd_tableamroutine->tuple_fetch_row_version(r, tid,
+                                                        snapshot, slot,
+                                                        stats_relation);
+}
+
+
+/*
+ * table_lock_tuple - lock a tuple in shared or exclusive mode
+ *
+ *  XXX: This shouldn't just take a tid, but tid + additional information
+ */
+static inline HTSU_Result
+table_lock_tuple(Relation relation, ItemPointer tid, Snapshot snapshot,
+                TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
+                LockWaitPolicy wait_policy, uint8 flags,
+                HeapUpdateFailureData *hufd)
+{
+   return relation->rd_tableamroutine->tuple_lock(relation, tid, snapshot, slot,
+                                               cid, mode, wait_policy,
+                                               flags, hufd);
+}
+
+/* ----------------
+ *     heap_beginscan_parallel - join a parallel scan
+ *
+ *     Caller must hold a suitable lock on the correct relation.
+ * ----------------
+ */
+static inline TableScanDesc
+table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan)
+{
+   Snapshot    snapshot;
+
+   Assert(RelationGetRelid(relation) == parallel_scan->phs_relid);
+
+   if (!parallel_scan->phs_snapshot_any)
+   {
+       /* Snapshot was serialized -- restore it */
+       snapshot = RestoreSnapshot(parallel_scan->phs_snapshot_data);
+       RegisterSnapshot(snapshot);
+   }
+   else
+   {
+       /* SnapshotAny passed by caller (not serialized) */
+       snapshot = SnapshotAny;
+   }
+
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, 0, NULL, parallel_scan,
+                                               true, true, true, false, false, !parallel_scan->phs_snapshot_any);
+}
+
+/*
+ * heap_setscanlimits - restrict range of a heapscan
+ *
+ * startBlk is the page to start at
+ * numBlks is number of pages to scan (InvalidBlockNumber means "all")
+ */
+static inline void
+table_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
+{
+   sscan->rs_rd->rd_tableamroutine->scansetlimits(sscan, startBlk, numBlks);
+}
+
+
+/* ----------------
+ *     heap_beginscan  - begin relation scan
+ *
+ * heap_beginscan is the "standard" case.
+ *
+ * heap_beginscan_catalog differs in setting up its own temporary snapshot.
+ *
+ * heap_beginscan_strat offers an extended API that lets the caller control
+ * whether a nondefault buffer access strategy can be used, and whether
+ * syncscan can be chosen (possibly resulting in the scan not starting from
+ * block zero).  Both of these default to true with plain heap_beginscan.
+ *
+ * heap_beginscan_bm is an alternative entry point for setting up a
+ * TableScanDesc for a bitmap heap scan.  Although that scan technology is
+ * really quite unlike a standard seqscan, there is just enough commonality
+ * to make it worth using the same data structure.
+ *
+ * heap_beginscan_sampling is an alternative entry point for setting up a
+ * TableScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
+ * using the same data structure although the behavior is rather different.
+ * In addition to the options offered by heap_beginscan_strat, this call
+ * also allows control of whether page-mode visibility checking is used.
+ * ----------------
+ */
+static inline TableScanDesc
+table_beginscan(Relation relation, Snapshot snapshot,
+                 int nkeys, ScanKey key)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               true, true, true, false, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
+{
+   Oid         relid = RelationGetRelid(relation);
+   Snapshot    snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
+
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               true, true, true, false, false, true);
+}
+
+static inline TableScanDesc
+table_beginscan_strat(Relation relation, Snapshot snapshot,
+                       int nkeys, ScanKey key,
+                       bool allow_strat, bool allow_sync)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               allow_strat, allow_sync, true,
+                                               false, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_bm(Relation relation, Snapshot snapshot,
+                    int nkeys, ScanKey key)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               false, false, true, true, false, false);
+}
+
+static inline TableScanDesc
+table_beginscan_sampling(Relation relation, Snapshot snapshot,
+                          int nkeys, ScanKey key,
+                          bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, snapshot, nkeys, key, NULL,
+                                               allow_strat, allow_sync, allow_pagemode,
+                                               false, true, false);
+}
+
+static inline TableScanDesc
+table_beginscan_analyze(Relation relation)
+{
+   return relation->rd_tableamroutine->scan_begin(relation, NULL, 0, NULL, NULL,
+                                               true, false, true,
+                                               false, true, false);
+}
+
+
+/* ----------------
+ *     heap_rescan     - restart a relation scan
+ * ----------------
+ */
+static inline void
+table_rescan(TableScanDesc scan,
+              ScanKey key)
+{
+   scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, false, false, false, false);
+}
+
+/* ----------------
+ *     heap_rescan_set_params  - restart a relation scan after changing params
+ *
+ * This call allows changing the buffer strategy, syncscan, and pagemode
+ * options before starting a fresh scan.  Note that although the actual use
+ * of syncscan might change (effectively, enabling or disabling reporting),
+ * the previously selected startblock will be kept.
+ * ----------------
+ */
+static inline void
+table_rescan_set_params(TableScanDesc scan, ScanKey key,
+                         bool allow_strat, bool allow_sync, bool allow_pagemode)
+{
+   scan->rs_rd->rd_tableamroutine->scan_rescan(scan, key, true,
+                                            allow_strat, allow_sync, (allow_pagemode && IsMVCCSnapshot(scan->rs_snapshot)));
+}
+
+/* ----------------
+ *     heap_endscan    - end relation scan
+ *
+ *     See how to integrate with index scans.
+ *     Check handling if reldesc caching.
+ * ----------------
+ */
+static inline void
+table_endscan(TableScanDesc scan)
+{
+   scan->rs_rd->rd_tableamroutine->scan_end(scan);
+}
+
+
+/* ----------------
+ *     heap_update_snapshot
+ *
+ *     Update snapshot info in heap scan descriptor.
+ * ----------------
+ */
+static inline void
+table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot)
+{
+   scan->rs_rd->rd_tableamroutine->scan_update_snapshot(scan, snapshot);
+}
+
+
+static inline bool
+table_scan_bitmap_pagescan(TableScanDesc scan,
+                          TBMIterateResult *tbmres)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan(scan, tbmres);
+}
+
+static inline bool
+table_scan_bitmap_pagescan_next(TableScanDesc scan, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_bitmap_pagescan_next(scan, slot);
+}
+
+static inline bool
+table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_sample_next_block(scan, scanstate);
+}
+
+static inline bool
+table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_sample_next_tuple(scan, scanstate, slot);
+}
+
+static inline void
+table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
+{
+   scan->rs_rd->rd_tableamroutine->scan_analyze_next_block(scan, blockno, bstrategy);
+}
+
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableamroutine->scan_analyze_next_tuple(scan, OldestXmin, liverows, deadrows, slot);
+}
+
+static inline TupleTableSlot *
+table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
+{
+   slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
+   return sscan->rs_rd->rd_tableamroutine->scan_getnextslot(sscan, direction, slot);
+}
+
+static inline IndexFetchTableData*
+table_begin_index_fetch_table(Relation rel)
+{
+   return rel->rd_tableamroutine->begin_index_fetch(rel);
+}
+
+static inline void
+table_reset_index_fetch_table(struct IndexFetchTableData* scan)
+{
+   scan->rel->rd_tableamroutine->reset_index_fetch(scan);
+}
+
+static inline void
+table_end_index_fetch_table(struct IndexFetchTableData* scan)
+{
+   scan->rel->rd_tableamroutine->end_index_fetch(scan);
+}
+
+/*
+ * Insert a tuple from a slot into table AM routine
+ */
+static inline void
+table_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
+              int options, BulkInsertState bistate)
+{
+   relation->rd_tableamroutine->tuple_insert(relation, slot, cid, options,
+                                             bistate);
+}
+
+static inline void
+table_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid,
+                        int options, BulkInsertState bistate, uint32 specToken)
+{
+   relation->rd_tableamroutine->tuple_insert_speculative(relation, slot, cid, options,
+                                                         bistate, specToken);
+}
+
+static inline void
+table_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken,
+                               bool succeeded)
+{
+   return relation->rd_tableamroutine->tuple_complete_speculative(relation, slot, specToken, succeeded);
+}
+
+/*
+ * Delete a tuple from tid using table AM routine
+ */
+static inline HTSU_Result
+table_delete(Relation relation, ItemPointer tid, CommandId cid,
+            Snapshot snapshot, Snapshot crosscheck, bool wait,
+            HeapUpdateFailureData *hufd, bool changingPart)
+{
+   return relation->rd_tableamroutine->tuple_delete(relation, tid, cid,
+                                                    snapshot, crosscheck,
+                                                    wait, hufd, changingPart);
+}
+
+/*
+ * update a tuple from tid using table AM routine
+ */
+static inline HTSU_Result
+table_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
+            CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait,
+            HeapUpdateFailureData *hufd, LockTupleMode *lockmode,
+            bool *update_indexes)
+{
+   return relation->rd_tableamroutine->tuple_update(relation, otid, slot,
+                                                    cid, snapshot, crosscheck,
+                                                    wait, hufd,
+                                                    lockmode, update_indexes);
+}
+
+static inline bool
+table_fetch_follow(struct IndexFetchTableData *scan,
+                  ItemPointer tid,
+                  Snapshot snapshot,
+                  TupleTableSlot *slot,
+                  bool *call_again, bool *all_dead)
+{
+
+   return scan->rel->rd_tableamroutine->tuple_fetch_follow(scan, tid, snapshot,
+                                                          slot, call_again,
+                                                          all_dead);
+}
+
+static inline bool
+table_fetch_follow_check(Relation rel,
+                        ItemPointer tid,
+                        Snapshot snapshot,
+                        bool *all_dead)
+{
+   IndexFetchTableData *scan = table_begin_index_fetch_table(rel);
+   TupleTableSlot *slot = table_gimmegimmeslot(rel, NULL);
+   bool call_again = false;
+   bool found;
+
+   found = table_fetch_follow(scan, tid, snapshot, slot, &call_again, all_dead);
+
+   table_end_index_fetch_table(scan);
+   ExecDropSingleTupleTableSlot(slot);
+
+   return found;
+}
+
+/*
+ * table_multi_insert  - insert multiple tuple into a table
+ */
+static inline void
+table_multi_insert(Relation relation, TupleTableSlot **slots, int nslots,
+                    CommandId cid, int options, BulkInsertState bistate)
+{
+   relation->rd_tableamroutine->multi_insert(relation, slots, nslots,
+                                          cid, options, bistate);
+}
+
+static inline void
+table_get_latest_tid(Relation relation,
+                      Snapshot snapshot,
+                      ItemPointer tid)
+{
+   relation->rd_tableamroutine->tuple_get_latest_tid(relation, snapshot, tid);
+}
+
+
+static inline void
+table_vacuum_rel(Relation rel, int options,
+            struct VacuumParams *params, BufferAccessStrategy bstrategy)
+{
+   rel->rd_tableamroutine->relation_vacuum(rel, options, params, bstrategy);
+}
+
+
+/* XXX: Move arguments to struct? */
+static inline void
+table_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex,
+                    bool use_sort,
+                    TransactionId OldestXmin, TransactionId FreezeXid, MultiXactId MultiXactCutoff,
+                    double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
+{
+   OldHeap->rd_tableamroutine->relation_copy_for_cluster(OldHeap, NewHeap, OldIndex,
+                                                         use_sort,
+                                                         OldestXmin, FreezeXid, MultiXactCutoff,
+                                                     num_tuples, tups_vacuumed, tups_recently_dead);
+}
+
+/*
+ * table_sync      - sync a heap, for use when no WAL has been written
+ */
+static inline void
+table_sync(Relation rel)
+{
+   rel->rd_tableamroutine->relation_sync(rel);
+}
+
+static inline double
+table_index_build_scan(Relation heapRelation,
+                      Relation indexRelation,
+                      IndexInfo *indexInfo,
+                      bool allow_sync,
+                      IndexBuildCallback callback,
+                      void *callback_state,
+                      TableScanDesc scan)
+{
+   return heapRelation->rd_tableamroutine->index_build_range_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       allow_sync,
+       false,
+       0,
+       InvalidBlockNumber,
+       callback,
+       callback_state,
+       scan);
+}
+
+static inline void
+table_index_validate_scan(Relation heapRelation,
+                         Relation indexRelation,
+                         IndexInfo *indexInfo,
+                         Snapshot snapshot,
+                         struct ValidateIndexState *state)
+{
+   heapRelation->rd_tableamroutine->index_validate_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       snapshot,
+       state);
+}
+
+static inline double
+table_index_build_range_scan(Relation heapRelation,
+                            Relation indexRelation,
+                            IndexInfo *indexInfo,
+                            bool allow_sync,
+                            bool anyvisible,
+                            BlockNumber start_blockno,
+                            BlockNumber numblocks,
+                            IndexBuildCallback callback,
+                            void *callback_state,
+                            TableScanDesc scan)
+{
+   return heapRelation->rd_tableamroutine->index_build_range_scan(
+       heapRelation,
+       indexRelation,
+       indexInfo,
+       allow_sync,
+       anyvisible,
+       start_blockno,
+       numblocks,
+       callback,
+       callback_state,
+       scan);
+}
+
+/*
+ * Return true iff tuple in slot satisfies the snapshot.
+ *
+ * Notes:
+ * Assumes slot's tuple is valid.
+ * Hint bits in the HeapTuple's t_infomask may be updated as a side effect;
+ * if so, the indicated buffer is marked dirty.
+ *
+ * XXX: Add _tuple_ to name?
+ */
+static inline bool
+table_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
+{
+   return rel->rd_tableamroutine->snapshot_satisfies(rel, slot, snapshot);
+}
+
+extern BlockNumber table_parallelscan_nextpage(TableScanDesc scan);
+extern void table_parallelscan_startblock_init(TableScanDesc scan);
+extern Size table_parallelscan_estimate(Snapshot snapshot);
+extern void table_parallelscan_initialize(ParallelTableScanDesc target,
+                                         Relation relation, Snapshot snapshot);
+extern void table_parallelscan_reinitialize(ParallelTableScanDesc parallel_scan);
+
+extern const TableAmRoutine * GetTableAmRoutine(Oid amhandler);
+extern const TableAmRoutine * GetTableAmRoutineByAmId(Oid amoid);
+extern const TableAmRoutine * GetHeapamTableAmRoutine(void);
+
+extern bool check_default_table_access_method(char **newval, void **extra,
+                                   GucSource source);
+
+#endif     /* TABLEAM_H */
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h

index 3ecd4737e5decf00ed55d3ad300c68c482508f12..3947d7b915baba7d441f2c580b615c0b59c4831d 100644 (file)
--- a/src/include/access/tsmapi.h
+++ b/src/include/access/tsmapi.h
@@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node,
                                           int nparams,
                                           uint32 seed);
  
-typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks);
  
  typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
                                                   BlockNumber blockno,
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index 56a341a62228e183ac724c2468fefe6bef24a8f8..cedc19fbcf5a8e0f46030ae07fb567065d70682d 100644 (file)
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -45,6 +45,7 @@ extern Relation heap_create(const char *relname,
             Oid reltablespace,
             Oid relid,
             Oid relfilenode,
+           Oid accessmtd,
             TupleDesc tupDesc,
             char relkind,
             char relpersistence,
@@ -59,6 +60,7 @@ extern Oid heap_create_with_catalog(const char *relname,
                          Oid reltypeid,
                          Oid reloftypeid,
                          Oid ownerid,
+                        Oid accessmtd,
                          TupleDesc tupdesc,
                          List *cooked_constraints,
                          char relkind,
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 35a29f3498f1b423d4ba33e848af5ef4d339d625..5e53001d78cfa1857e5dcac08790ac167aae484f 100644 (file)
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -20,7 +20,7 @@
  
  #define DEFAULT_INDEX_TYPE "btree"
  
-/* Typedef for callback function for IndexBuildHeapScan */
+/* Typedef for callback function for table_index_build_scan */
  typedef void (*IndexBuildCallback) (Relation index,
                                     HeapTuple htup,
                                     Datum *values,
@@ -37,6 +37,15 @@ typedef enum
     INDEX_DROP_SET_DEAD
  } IndexStateFlagsAction;
  
+/* state info for validate_index bulkdelete callback */
+typedef struct ValidateIndexState
+{
+   Tuplesortstate *tuplesort;  /* for sorting the index TIDs */
+   /* statistics (for debug purposes only): */
+   double      htups,
+               itups,
+               tups_inserted;
+} ValidateIndexState;
  
  extern void index_check_primary_key(Relation heapRel,
                         IndexInfo *indexInfo,
@@ -111,24 +120,6 @@ extern void index_build(Relation heapRelation,
             bool isreindex,
             bool parallel);
  
-extern double IndexBuildHeapScan(Relation heapRelation,
-                  Relation indexRelation,
-                  IndexInfo *indexInfo,
-                  bool allow_sync,
-                  IndexBuildCallback callback,
-                  void *callback_state,
-                  HeapScanDesc scan);
-extern double IndexBuildHeapRangeScan(Relation heapRelation,
-                       Relation indexRelation,
-                       IndexInfo *indexInfo,
-                       bool allow_sync,
-                       bool anyvisible,
-                       BlockNumber start_blockno,
-                       BlockNumber end_blockno,
-                       IndexBuildCallback callback,
-                       void *callback_state,
-                       HeapScanDesc scan);
-
  extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
  
  extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);
@@ -155,4 +146,45 @@ extern void RestoreReindexState(void *reindexstate);
  
  extern void IndexSetParentIndex(Relation idx, Oid parentOid);
  
+
+/*
+ * itemptr_encode - Encode ItemPointer as int64/int8
+ *
+ * This representation must produce values encoded as int64 that sort in the
+ * same order as their corresponding original TID values would (using the
+ * default int8 opclass to produce a result equivalent to the default TID
+ * opclass).
+ *
+ * As noted in validate_index(), this can be significantly faster.
+ */
+static inline int64
+itemptr_encode(ItemPointer itemptr)
+{
+   BlockNumber block = ItemPointerGetBlockNumber(itemptr);
+   OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
+   int64       encoded;
+
+   /*
+    * Use the 16 least significant bits for the offset.  32 adjacent bits are
+    * used for the block number.  Since remaining bits are unused, there
+    * cannot be negative encoded values (We assume a two's complement
+    * representation).
+    */
+   encoded = ((uint64) block << 16) | (uint16) offset;
+
+   return encoded;
+}
+
+/*
+ * itemptr_decode - Decode int64/int8 representation back to ItemPointer
+ */
+static inline void
+itemptr_decode(ItemPointer itemptr, int64 encoded)
+{
+   BlockNumber block = (BlockNumber) (encoded >> 16);
+   OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
+
+   ItemPointerSet(itemptr, block, offset);
+}
+
  #endif                         /* INDEX_H */
diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat

index bef53a319af46a379b93b763bee0998a32ddb99e..0f44c420940c72f35e2ca807087e3e1a56bad11e 100644 (file)
--- a/src/include/catalog/pg_am.dat
+++ b/src/include/catalog/pg_am.dat
@@ -30,5 +30,8 @@
  { oid => '3580', oid_symbol => 'BRIN_AM_OID',
    descr => 'block range index (BRIN) access method',
    amname => 'brin', amhandler => 'brinhandler', amtype => 'i' },
+{ oid => '4001', oid_symbol => 'HEAP_TABLE_AM_OID',
+  descr => 'heap table access method',
+  amname => 'heap', amhandler => 'heap_tableam_handler', amtype => 't' },
  
  ]
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h

index 57d65f830fc2ce826b73d56b92a570a79e14ebc0..6db7e4b21e94f0284de37fb40660fe66f3363193 100644 (file)
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -53,6 +53,7 @@ typedef FormData_pg_am *Form_pg_am;
   * Allowed values for amtype
   */
  #define AMTYPE_INDEX                   'i' /* index access method */
+#define AMTYPE_TABLE                   't' /* table access method */
  
  #endif                         /* EXPOSE_TO_CLIENT_CODE */
  
diff --git a/src/include/catalog/pg_class.dat b/src/include/catalog/pg_class.dat

index 5a884a852b531538a1853e747483afe42c2021ee..b43c37ff14579306d4a4855d9f26853c295133d5 100644 (file)
--- a/src/include/catalog/pg_class.dat
+++ b/src/include/catalog/pg_class.dat
@@ -22,7 +22,7 @@
  
  { oid => '1247',
    relname => 'pg_type', relnamespace => 'PGNSP', reltype => '71',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
    reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
    reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
    relpersistence => 'p', relkind => 'r', relnatts => '31', relchecks => '0',
@@ -33,7 +33,7 @@
    reloptions => '_null_', relpartbound => '_null_' },
  { oid => '1249',
    relname => 'pg_attribute', relnamespace => 'PGNSP', reltype => '75',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
    reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
    reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
    relpersistence => 'p', relkind => 'r', relnatts => '24', relchecks => '0',
@@ -44,7 +44,7 @@
    reloptions => '_null_', relpartbound => '_null_' },
  { oid => '1255',
    relname => 'pg_proc', relnamespace => 'PGNSP', reltype => '81',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
    reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
    reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
    relpersistence => 'p', relkind => 'r', relnatts => '29', relchecks => '0',
@@ -55,7 +55,7 @@
    reloptions => '_null_', relpartbound => '_null_' },
  { oid => '1259',
    relname => 'pg_class', relnamespace => 'PGNSP', reltype => '83',
-  reloftype => '0', relowner => 'PGUID', relam => '0', relfilenode => '0',
+  reloftype => '0', relowner => 'PGUID', relam => 'PGHEAPAM', relfilenode => '0',
    reltablespace => '0', relpages => '0', reltuples => '0', relallvisible => '0',
    reltoastrelid => '0', relhasindex => 'f', relisshared => 'f',
    relpersistence => 'p', relkind => 'r', relnatts => '33', relchecks => '0',
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 84e63c6d06a2ad0de1d19c68655a82848a3e7e6c..873a5b8d22b2f0a3d5215897bbc8ad0150920492 100644 (file)
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -36,7 +36,7 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
     Oid         reloftype;      /* OID of entry in pg_type for underlying
                                  * composite type */
     Oid         relowner;       /* class owner */
-   Oid         relam;          /* index access method; 0 if not an index */
+   Oid         relam;          /* access method; 0 if not a table / index */
     Oid         relfilenode;    /* identifier of physical storage file */
  
     /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat

index f79fcfe029f1545599759c0f22e89a952bc86ea1..112fe90ada9c53039e4140878f61f1437120b891 100644 (file)
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -816,6 +816,12 @@
    proname => 'int4', prorettype => 'int4', proargtypes => 'float4',
    prosrc => 'ftoi4' },
  
+# Table access method handlers
+{ oid => '4002', oid_symbol => 'HEAP_TABLE_AM_HANDLER_OID', 
+  descr => 'row-oriented heap table access method handler',
+  proname => 'heap_tableam_handler', provolatile => 'v', prorettype => 'table_am_handler',
+  proargtypes => 'internal', prosrc => 'heap_tableam_handler' },
+
  # Index access method handlers
  { oid => '330', descr => 'btree index access method handler',
    proname => 'bthandler', provolatile => 'v', prorettype => 'index_am_handler',
@@ -6918,6 +6924,13 @@
  { oid => '3312', descr => 'I/O',
    proname => 'tsm_handler_out', prorettype => 'cstring',
    proargtypes => 'tsm_handler', prosrc => 'tsm_handler_out' },
+{ oid => '3425', descr => 'I/O',
+  proname => 'table_am_handler_in', proisstrict => 'f',
+  prorettype => 'table_am_handler', proargtypes => 'cstring',
+  prosrc => 'table_am_handler_in' },
+{ oid => '3426', descr => 'I/O',
+  proname => 'table_am_handler_out', prorettype => 'cstring',
+  proargtypes => 'table_am_handler', prosrc => 'table_am_handler_out' },
  
  # tablesample method handlers
  { oid => '3313', descr => 'BERNOULLI tablesample method handler',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat

index d295eae1b99aa3994034f60b9e7a0cf8c2bdd752..f37856d2b4f52d4a7025a706bddd2c4a22735374 100644 (file)
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -580,6 +580,11 @@
    typcategory => 'P', typinput => 'tsm_handler_in',
    typoutput => 'tsm_handler_out', typreceive => '-', typsend => '-',
    typalign => 'i' },
+{ oid => '3998',
+  typname => 'table_am_handler', typlen => '4', typbyval => 't', typtype => 'p',
+  typcategory => 'P', typinput => 'table_am_handler_in',
+  typoutput => 'table_am_handler_out', typreceive => '-', typsend => '-',
+  typalign => 'i' },
  { oid => '3831',
    descr => 'pseudo-type representing a polymorphic base type that is a range',
    typname => 'anyrange', typlen => '-1', typbyval => 'f', typtype => 'p',
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h

index 1031448c1451b7d60e466caa4c5539d2985ef144..0f02baee14083009fa509ca15c1781afc5a1359c 100644 (file)
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -35,8 +35,8 @@ typedef struct TriggerData
     HeapTuple   tg_trigtuple;
     HeapTuple   tg_newtuple;
     Trigger    *tg_trigger;
-   Buffer      tg_trigtuplebuf;
-   Buffer      tg_newtuplebuf;
+   TupleTableSlot *tg_trigslot;
+   TupleTableSlot *tg_newslot;
     Tuplestorestate *tg_oldtable;
     Tuplestorestate *tg_newtable;
  } TriggerData;
@@ -186,15 +186,15 @@ extern void ExecBSInsertTriggers(EState *estate,
  extern void ExecASInsertTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecBRInsertTriggers(EState *estate,
+extern bool ExecBRInsertTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      TupleTableSlot *slot);
  extern void ExecARInsertTriggers(EState *estate,
                      ResultRelInfo *relinfo,
-                    HeapTuple trigtuple,
+                    TupleTableSlot *slot,
                      List *recheckIndexes,
                      TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecIRInsertTriggers(EState *estate,
+extern bool ExecIRInsertTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      TupleTableSlot *slot);
  extern void ExecBSDeleteTriggers(EState *estate,
@@ -221,7 +221,7 @@ extern void ExecBSUpdateTriggers(EState *estate,
  extern void ExecASUpdateTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecBRUpdateTriggers(EState *estate,
+extern bool ExecBRUpdateTriggers(EState *estate,
                      EPQState *epqstate,
                      ResultRelInfo *relinfo,
                      ItemPointer tupleid,
@@ -231,10 +231,10 @@ extern void ExecARUpdateTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      ItemPointer tupleid,
                      HeapTuple fdw_trigtuple,
-                    HeapTuple newtuple,
+                    TupleTableSlot *slot,
                      List *recheckIndexes,
                      TransitionCaptureState *transition_capture);
-extern TupleTableSlot *ExecIRUpdateTriggers(EState *estate,
+extern bool ExecIRUpdateTriggers(EState *estate,
                      ResultRelInfo *relinfo,
                      HeapTuple trigtuple,
                      TupleTableSlot *slot);
@@ -258,9 +258,9 @@ extern bool AfterTriggerPendingOnRel(Oid relid);
   * in utils/adt/ri_triggers.c
   */
  extern bool RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
-                             HeapTuple old_row, HeapTuple new_row);
+                             TupleTableSlot *old_slot, TupleTableSlot *new_slot);
  extern bool RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
-                             HeapTuple old_row, HeapTuple new_row);
+                             TupleTableSlot *old_slot, TupleTableSlot  *new_slot);
  extern bool RI_Initial_Check(Trigger *trigger,
                  Relation fk_rel, Relation pk_rel);
  
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index 2feec628c03212fb07105fef73d8ff0e5ff398ab..ded4baf00475db575acb24efb30cd7dd9b6d2427 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -183,19 +183,14 @@ extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
  extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo);
  extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti, bool missing_ok);
  extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
+extern TupleTableSlot *EvalPlanQualSlot(EPQState *epqstate,
+            Relation relation, Index rti);
  extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
-            Relation relation, Index rti, int lockmode,
-            ItemPointer tid, TransactionId priorXmax);
-extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation,
-                 int lockmode, LockWaitPolicy wait_policy, ItemPointer tid,
-                 TransactionId priorXmax);
+            Relation relation, Index rti, TupleTableSlot *slot);
  extern void EvalPlanQualInit(EPQState *epqstate, EState *estate,
                  Plan *subplan, List *auxrowmarks, int epqParam);
  extern void EvalPlanQualSetPlan(EPQState *epqstate,
                     Plan *subplan, List *auxrowmarks);
-extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
-                    HeapTuple tuple);
-extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
  
  #define EvalPlanQualSetSlot(epqstate, slot)  ((epqstate)->origslot = (slot))
  extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
@@ -486,6 +481,10 @@ extern void ReScanExprContext(ExprContext *econtext);
  
  extern ExprContext *MakePerTupleExprContext(EState *estate);
  
+extern TupleTableSlot *ExecTriggerGetOldSlot(EState *estate, Relation rel);
+extern TupleTableSlot *ExecTriggerGetNewSlot(EState *estate, Relation rel);
+extern TupleTableSlot *ExecTriggerGetReturnSlot(EState *estate, Relation rel);
+
  /* Get an EState's per-output-tuple exprcontext, making it if first use */
  #define GetPerTupleExprContext(estate) \
     ((estate)->es_per_tuple_exprcontext ? \
@@ -554,9 +553,8 @@ extern int  ExecCleanTargetListLength(List *targetlist);
   */
  extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
  extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
-extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid,
-                     EState *estate, bool noDupErr, bool *specConflict,
-                     List *arbiterIndexes);
+extern List *ExecInsertIndexTuples(TupleTableSlot *slot, EState *estate, bool noDupErr,
+                     bool *specConflict, List *arbiterIndexes);
  extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate,
                           ItemPointer conflictTid, List *arbiterIndexes);
  extern void check_exclusion_constraint(Relation heap, Relation index,
diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h

index d2616968ac4c21ce6e60ca33aabc9a98cebec687..d17d0de64f032d3b4f255333095b3adb7f8f9b8e 100644 (file)
--- a/src/include/executor/spi.h
+++ b/src/include/executor/spi.h
@@ -25,7 +25,7 @@ typedef struct SPITupleTable
     uint64      alloced;        /* # of alloced vals */
     uint64      free;           /* # of free vals */
     TupleDesc   tupdesc;        /* tuple descriptor */
-   HeapTuple  *vals;           /* tuples */
+   HeapTuple *vals;            /* tuples */
     slist_node  next;           /* link for internal bookkeeping */
     SubTransactionId subid;     /* subxact in which tuptable was created */
  } SPITupleTable;
@@ -122,7 +122,7 @@ extern CachedPlan *SPI_plan_get_cached_plan(SPIPlanPtr plan);
  extern HeapTuple SPI_copytuple(HeapTuple tuple);
  extern HeapTupleHeader SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc);
  extern HeapTuple SPI_modifytuple(Relation rel, HeapTuple tuple, int natts,
-               int *attnum, Datum *Values, const char *Nulls);
+                                   int *attnum, Datum *Values, const char *Nulls);
  extern int SPI_fnumber(TupleDesc tupdesc, const char *fname);
  extern char *SPI_fname(TupleDesc tupdesc, int fnumber);
  extern char *SPI_getvalue(HeapTuple tuple, TupleDesc tupdesc, int fnumber);
diff --git a/src/include/executor/tqueue.h b/src/include/executor/tqueue.h

index 0fe36392525bae4678654e7605bfac25059b1400..8bcbe2fbff60ae10499828f40a336fd68314bcdd 100644 (file)
--- a/src/include/executor/tqueue.h
+++ b/src/include/executor/tqueue.h
@@ -27,6 +27,6 @@ extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle);
  extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle);
  extern void DestroyTupleQueueReader(TupleQueueReader *reader);
  extern HeapTuple TupleQueueReaderNext(TupleQueueReader *reader,
-                    bool nowait, bool *done);
+                                        bool nowait, bool *done);
  
  #endif                         /* TQUEUE_H */
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h

index 5c390a9669675c4d01f6016c9f3841968e389077..c87689b3dda9aad7ebb9bfde39fe4642a02d8061 100644 (file)
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -15,6 +15,7 @@
  #define TUPTABLE_H
  
  #include "access/htup.h"
+#include "access/sysattr.h"
  #include "access/tupdesc.h"
  #include "storage/buf.h"
  
@@ -125,6 +126,10 @@ typedef struct TupleTableSlot
  #define FIELDNO_TUPLETABLESLOT_ISNULL 6
     bool       *tts_isnull;     /* current per-attribute isnull flags */
     MemoryContext tts_mcxt;     /* slot itself is in this context */
+
+   ItemPointerData tts_tid;    /* XXX describe */
+   Oid     tts_tableOid;   /* XXX describe */
+
  } TupleTableSlot;
  
  /* routines for a TupleTableSlot implementation */
@@ -238,6 +243,7 @@ typedef struct VirtualTupleTableSlot
     char       *data;       /* data for materialized slots */
  } VirtualTupleTableSlot;
  
+#include <access/htup_details.h>
  typedef struct HeapTupleTableSlot
  {
     TupleTableSlot base;
@@ -246,6 +252,7 @@ typedef struct HeapTupleTableSlot
     HeapTuple   tuple;      /* physical tuple */
  #define FIELDNO_HEAPTUPLETABLESLOT_OFF 2
     uint32      off;        /* saved state for slot_deform_heap_tuple */
+   HeapTupleData tupdata;
  } HeapTupleTableSlot;
  
  /* heap tuple residing in a buffer */
@@ -323,6 +330,9 @@ extern void slot_getmissingattrs(TupleTableSlot *slot, int startAttNum,
  extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
  
  
+// FIXME: remove
+extern bool ExecSlotCompare(TupleTableSlot *slot1, TupleTableSlot *slot2);
+
  #ifndef FRONTEND
  
  /*
@@ -395,6 +405,12 @@ slot_getsysattr(TupleTableSlot *slot, int attnum, bool *isnull)
  {
     AssertArg(attnum < 0);      /* caller error */
  
+   if (attnum == TableOidAttributeNumber)
+   {
+       *isnull = false;
+       return slot->tts_tableOid;
+   }
+
     /* Fetch the system attribute from the underlying tuple. */
     return slot->tts_ops->getsysattr(slot, attnum, isnull);
  }
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h

index c14eb546c64136523ee3755dd557302203d3d0a3..508b0eece84dd6a8635eb9f138d57b5593a5b69e 100644 (file)
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -121,10 +121,11 @@ typedef void (*EndDirectModify_function) (ForeignScanState *node);
  typedef RowMarkType (*GetForeignRowMarkType_function) (RangeTblEntry *rte,
                                                        LockClauseStrength strength);
  
-typedef HeapTuple (*RefetchForeignRow_function) (EState *estate,
-                                                ExecRowMark *erm,
-                                                Datum rowid,
-                                                bool *updated);
+typedef TupleTableSlot *(*RefetchForeignRow_function) (EState *estate,
+                                                      ExecRowMark *erm,
+                                                      Datum rowid,
+                                                      TupleTableSlot *slot,
+                                                      bool *updated);
  
  typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
                                              struct ExplainState *es);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 5ed0f40f6970fbfa58ad27669064a204214fd776..359ed0a86cd34487c1230c1f38d67734fc74061d 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -525,7 +525,7 @@ typedef struct EState
  
     /* Stuff used for firing triggers: */
     List       *es_trig_target_relations;   /* trigger-only ResultRelInfos */
-   TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */
+   TupleTableSlot *es_trig_return_slot; /* for trigger output tuples */
     TupleTableSlot *es_trig_oldtup_slot;    /* for TriggerEnabled */
     TupleTableSlot *es_trig_newtup_slot;    /* for TriggerEnabled */
  
@@ -568,7 +568,8 @@ typedef struct EState
      * remember if the tuple has been returned already.  Arrays are of size
      * es_range_table_size and are indexed by scan node scanrelid - 1.
      */
-   HeapTuple  *es_epqTuple;    /* array of EPQ substitute tuples */
+   //TableTuple *es_epqTuple;  /* array of EPQ substitute tuples */
+   TupleTableSlot **es_epqTupleSlot;
     bool       *es_epqTupleSet; /* true if EPQ tuple is provided */
     bool       *es_epqScanDone; /* true if EPQ tuple has been fetched */
  
@@ -1268,7 +1269,7 @@ typedef struct ScanState
  {
     PlanState   ps;             /* its first field is NodeTag */
     Relation    ss_currentRelation;
-   HeapScanDesc ss_currentScanDesc;
+   TableScanDesc ss_currentScanDesc;
     TupleTableSlot *ss_ScanTupleSlot;
  } ScanState;
  
@@ -1298,6 +1299,9 @@ typedef struct SampleScanState
     bool        use_pagemode;   /* use page-at-a-time visibility checking? */
     bool        begun;          /* false means need to call BeginSampleScan */
     uint32      seed;           /* random seed */
+   int64       donetuples;     /* number of tuples already returned */
+   bool        haveblock;      /* has a block for sampling been determined */
+   bool        done;           /* exhausted all tuples? */
  } SampleScanState;
  
  /*
@@ -1526,6 +1530,7 @@ typedef struct BitmapHeapScanState
     Buffer      pvmbuffer;
     long        exact_pages;
     long        lossy_pages;
+   int         return_empty_tuples;
     TBMIterator *prefetch_iterator;
     int         prefetch_pages;
     int         prefetch_target;
@@ -2256,7 +2261,7 @@ typedef struct LockRowsState
     PlanState   ps;             /* its first field is NodeTag */
     List       *lr_arowMarks;   /* List of ExecAuxRowMarks */
     EPQState    lr_epqstate;    /* for evaluating EvalPlanQual rechecks */
-   HeapTuple  *lr_curtuples;   /* locked tuples (one entry per RT entry) */
+   TupleTableSlot **lr_curtuples; /* locked tuples (one entry per RT entry) */
     int         lr_ntables;     /* length of lr_curtuples[] array */
  } LockRowsState;
  
diff --git a/src/include/nodes/lockoptions.h b/src/include/nodes/lockoptions.h

index 24afd6efd41dc776930da730bb8613d82077c117..5f579781a12a62a8a20d71c1e810594174e7b9c5 100644 (file)
--- a/src/include/nodes/lockoptions.h
+++ b/src/include/nodes/lockoptions.h
@@ -43,4 +43,9 @@ typedef enum LockWaitPolicy
     LockWaitError
  } LockWaitPolicy;
  
+/* Follow tuples whose update is in progress if lock modes don't conflict  */
+#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS    (1 << 0)
+/* Follow update chain and lock lastest version of tuple */
+#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION      (1 << 1)
+
  #endif                         /* LOCKOPTIONS_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index cac6ff0eda49ffd1cefe1f605d43a140e82218b7..5391c41d9e859d12558c91426f5525754653673b 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -503,6 +503,7 @@ typedef enum NodeTag
     T_InlineCodeBlock,          /* in nodes/parsenodes.h */
     T_FdwRoutine,               /* in foreign/fdwapi.h */
     T_IndexAmRoutine,           /* in access/amapi.h */
+   T_TableAmRoutine,           /* in access/tableam.h */
     T_TsmRoutine,               /* in access/tsmapi.h */
     T_ForeignKeyCacheInfo,      /* in utils/rel.h */
     T_CallContext               /* in nodes/parsenodes.h */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index e5bdc1cec576d052f6ea436f24ccde802605c5fd..bb20fdd488efe395a4f8ae78a1e61fa8b2966e9c 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2032,6 +2032,7 @@ typedef struct CreateStmt
     List       *options;        /* options from WITH clause */
     OnCommitAction oncommit;    /* what do we do at COMMIT? */
     char       *tablespacename; /* table space to use, or NULL */
+   char       *accessMethod;   /* table access method */
     bool        if_not_exists;  /* just do nothing if it already exists? */
  } CreateStmt;
  
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index b886ed35349b93e6292d09afef327d00ff8a2d03..0bdc0bbf506ba3fab2b2b8e1c6e46556c71fd15e 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -111,6 +111,7 @@ typedef struct IntoClause
  
     RangeVar   *rel;            /* target relation name */
     List       *colNames;       /* column names to assign, or NIL */
+   char       *accessMethod;   /* table access method */
     List       *options;        /* options from WITH clause */
     OnCommitAction onCommit;    /* what do we do at COMMIT? */
     char       *tableSpaceName; /* table space to use, or NULL */
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h

index 31532e97690f69b0dd9c38aaa69158958b7392a2..f6b829da2cdc36918bb648a84787e1b7ec49888a 100644 (file)
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator;
  typedef struct TBMSharedIterator TBMSharedIterator;
  
  /* Result structure for tbm_iterate */
-typedef struct
+typedef struct TBMIterateResult
  {
     BlockNumber blockno;        /* page number containing tuples */
     int         ntuples;        /* -1 indicates lossy result */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h

index 3cce3906a0e6de9b2b5ed1b9d98c41785a88d9e1..95915bdc92d53b6ce2cf8efd1ce1285e1e10ae4d 100644 (file)
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -20,7 +20,6 @@
  #include "storage/relfilenode.h"
  #include "utils/relcache.h"
  #include "utils/snapmgr.h"
-#include "utils/tqual.h"
  
  typedef void *Block;
  
@@ -268,8 +267,8 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
  
     if (old_snapshot_threshold >= 0
         && (snapshot) != NULL
-       && ((snapshot)->satisfies == HeapTupleSatisfiesMVCC
-           || (snapshot)->satisfies == HeapTupleSatisfiesToast)
+       && ((snapshot)->visibility_type == MVCC_VISIBILITY
+           || (snapshot)->visibility_type == TOAST_VISIBILITY)
         && !XLogRecPtrIsInvalid((snapshot)->lsn)
         && PageGetLSN(page) > (snapshot)->lsn)
         TestForOldSnapshot_impl(snapshot, relation);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h

index 2217081dcc35acf4aa1f39a16c827dbd584fa1de..9187cbbcf3bd29616dd19156ff995613a95ff0eb 100644 (file)
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -131,6 +131,12 @@ typedef struct RelationData
     /* use "struct" here to avoid needing to include htup.h: */
     struct HeapTupleData *rd_indextuple;    /* all of pg_index tuple */
  
+   /*
+    * Underlying table access method support
+    */
+   Oid         rd_tableamhandler;  /* OID of table AM handler function */
+   const struct TableAmRoutine *rd_tableamroutine; /* table AM's API struct */
+
     /*
      * index access support info (used only for an index relation)
      *
@@ -432,6 +438,12 @@ typedef struct ViewOptions
   */
  #define RelationGetDescr(relation) ((relation)->rd_att)
  
+/*
+ * RelationGetTableamRoutine
+ *     Returns the table AM routine for a relation.
+ */
+#define RelationGettableamRoutine(relation) ((relation)->rd_tableamroutine)
+
  /*
   * RelationGetRelationName
   *     Returns the rel's name.
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h

index a99d6b6681dbe9ae420ef5107efad2f26528f720..de5b096bf7d8a8e2fe40ba7993838b49c6bc6c97 100644 (file)
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -76,6 +76,8 @@ extern void RelationInitIndexAccessInfo(Relation relation);
  struct PublicationActions;
  extern struct PublicationActions *GetRelationPublicationActions(Relation relation);
  
+extern void RelationInitTableAccessMethod(Relation relation);
+
  /*
   * Routines to support ereport() reports of relation-related errors
   */
@@ -98,6 +100,7 @@ extern Relation RelationBuildLocalRelation(const char *relname,
                            Oid relnamespace,
                            TupleDesc tupDesc,
                            Oid relid,
+                          Oid accessmtd,
                            Oid relfilenode,
                            Oid reltablespace,
                            bool shared_relation,
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h

index a8a5a8f4c076399345be7a24782b098d4567c5be..77e25fb5615b0df8f4856ccf292c8da640a177a8 100644 (file)
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -19,6 +19,16 @@
  #include "lib/pairingheap.h"
  #include "storage/buf.h"
  
+typedef enum tuple_visibility_type
+{
+   MVCC_VISIBILITY = 0,        /* HeapTupleSatisfiesMVCC */
+   SELF_VISIBILITY,            /* HeapTupleSatisfiesSelf */
+   ANY_VISIBILITY,             /* HeapTupleSatisfiesAny */
+   TOAST_VISIBILITY,           /* HeapTupleSatisfiesToast */
+   DIRTY_VISIBILITY,           /* HeapTupleSatisfiesDirty */
+   HISTORIC_MVCC_VISIBILITY,   /* HeapTupleSatisfiesHistoricMVCC */
+   NON_VACUUMABLE_VISIBILTY    /* HeapTupleSatisfiesNonVacuumable */
+}          tuple_visibility_type;
  
  typedef struct SnapshotData *Snapshot;
  
@@ -52,7 +62,7 @@ typedef bool (*SnapshotSatisfiesFunc) (HeapTuple htup,
   */
  typedef struct SnapshotData
  {
-   SnapshotSatisfiesFunc satisfies;    /* tuple test function */
+   tuple_visibility_type visibility_type;  /* tuple visibility test type */
  
     /*
      * The remaining fields are used only for MVCC snapshots, and are normally
@@ -124,6 +134,7 @@ typedef enum
     HeapTupleInvisible,
     HeapTupleSelfUpdated,
     HeapTupleUpdated,
+   HeapTupleDeleted,
     HeapTupleBeingUpdated,
     HeapTupleWouldBlock         /* can be returned by heap_tuple_lock */
  } HTSU_Result;
diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h

index d3b6e99bb4ff0d7ae930a2f8db4b3ce6602eb679..1fe9cc64024617273ff6f6c7aa33b7446c067272 100644 (file)
--- a/src/include/utils/tqual.h
+++ b/src/include/utils/tqual.h
@@ -18,7 +18,6 @@
  #include "utils/snapshot.h"
  #include "access/xlogdefs.h"
  
-
  /* Static variables representing various special snapshot semantics */
  extern PGDLLIMPORT SnapshotData SnapshotSelfData;
  extern PGDLLIMPORT SnapshotData SnapshotAnyData;
@@ -29,60 +28,8 @@ extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
  
  /* This macro encodes the knowledge of which snapshots are MVCC-safe */
  #define IsMVCCSnapshot(snapshot)  \
-   ((snapshot)->satisfies == HeapTupleSatisfiesMVCC || \
-    (snapshot)->satisfies == HeapTupleSatisfiesHistoricMVCC)
-
-/*
- * HeapTupleSatisfiesVisibility
- *     True iff heap tuple satisfies a time qual.
- *
- * Notes:
- * Assumes heap tuple is valid.
- * Beware of multiple evaluations of snapshot argument.
- * Hint bits in the HeapTuple's t_infomask may be updated as a side effect;
- * if so, the indicated buffer is marked dirty.
- */
-#define HeapTupleSatisfiesVisibility(tuple, snapshot, buffer) \
-   ((*(snapshot)->satisfies) (tuple, snapshot, buffer))
-
-/* Result codes for HeapTupleSatisfiesVacuum */
-typedef enum
-{
-   HEAPTUPLE_DEAD,             /* tuple is dead and deletable */
-   HEAPTUPLE_LIVE,             /* tuple is live (committed, no deleter) */
-   HEAPTUPLE_RECENTLY_DEAD,    /* tuple is dead, but not deletable yet */
-   HEAPTUPLE_INSERT_IN_PROGRESS,   /* inserting xact is still in progress */
-   HEAPTUPLE_DELETE_IN_PROGRESS    /* deleting xact is still in progress */
-} HTSV_Result;
-
-/* These are the "satisfies" test routines for the various snapshot types */
-extern bool HeapTupleSatisfiesMVCC(HeapTuple htup,
-                      Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesSelf(HeapTuple htup,
-                      Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesAny(HeapTuple htup,
-                     Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesToast(HeapTuple htup,
-                       Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesDirty(HeapTuple htup,
-                       Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup,
-                               Snapshot snapshot, Buffer buffer);
-extern bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup,
-                              Snapshot snapshot, Buffer buffer);
-
-/* Special "satisfies" routines with different APIs */
-extern HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup,
-                        CommandId curcid, Buffer buffer);
-extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup,
-                        TransactionId OldestXmin, Buffer buffer);
-extern bool HeapTupleIsSurelyDead(HeapTuple htup,
-                     TransactionId OldestXmin);
-extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
-
-extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
-                    uint16 infomask, TransactionId xid);
-extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+   ((snapshot)->visibility_type == MVCC_VISIBILITY || \
+    (snapshot)->visibility_type == HISTORIC_MVCC_VISIBILITY)
  
  /*
   * To avoid leaking too much knowledge about reorderbuffer implementation
@@ -101,14 +48,14 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
   * local variable of type SnapshotData, and initialize it with this macro.
   */
  #define InitDirtySnapshot(snapshotdata)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
+   ((snapshotdata).visibility_type = DIRTY_VISIBILITY)
  
  /*
   * Similarly, some initialization is required for a NonVacuumable snapshot.
   * The caller must supply the xmin horizon to use (e.g., RecentGlobalXmin).
   */
  #define InitNonVacuumableSnapshot(snapshotdata, xmin_horizon)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesNonVacuumable, \
+   ((snapshotdata).visibility_type = NON_VACUUMABLE_VISIBILTY, \
      (snapshotdata).xmin = (xmin_horizon))
  
  /*
@@ -116,7 +63,7 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
   * to set lsn and whenTaken correctly to support snapshot_too_old.
   */
  #define InitToastSnapshot(snapshotdata, l, w)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesToast, \
+   ((snapshotdata).visibility_type = TOAST_VISIBILITY, \
      (snapshotdata).lsn = (l),                  \
      (snapshotdata).whenTaken = (w))
  
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 32908b66251f9e34f6f50babed5eca3ac3194d30..334096e9793bfd6dd39b58bfeaed486322279669 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -105,7 +105,7 @@ typedef struct TuplesortInstrumentation
   *
   * The "cluster" API stores/sorts full HeapTuples including all visibility
   * info. The sort keys are specified by reference to a btree index that is
- * defined on the relation to be sorted.  Note that putheaptuple/getheaptuple
+ * defined on the relation to be sorted.  Note that putheaptupleslot/getheaptuple
   * go with this API, not the "begin_heap" one!
   *
   * The "index_btree" API stores/sorts IndexTuples (preserving all their
diff --git a/src/test/isolation/expected/partition-key-update-1.out b/src/test/isolation/expected/partition-key-update-1.out

index 37fe6a7b277fe00117afa61750dd81857aae1086..a632d7f7bad9e3f2fc88f06e95b368ec7f26a792 100644 (file)
--- a/src/test/isolation/expected/partition-key-update-1.out
+++ b/src/test/isolation/expected/partition-key-update-1.out
@@ -15,7 +15,7 @@ step s1u: UPDATE foo SET a=2 WHERE a=1;
  step s2d: DELETE FROM foo WHERE a=1; <waiting ...>
  step s1c: COMMIT;
  step s2d: <... completed>
-error in steps s1c s2d: ERROR:  tuple to be deleted was already moved to another partition due to concurrent update
+error in steps s1c s2d: ERROR:  tuple to be locked was already moved to another partition due to concurrent update
  step s2c: COMMIT;
  
  starting permutation: s1b s2b s2d s1u s2c s1c
diff --git a/src/test/regress/expected/create_am.out b/src/test/regress/expected/create_am.out

index 47dd885c4e9b1ff8d1595197bc926c855b7d2fa5..e15ba33a0884758e2636fa6fff3e37a5f291d765 100644 (file)
--- a/src/test/regress/expected/create_am.out
+++ b/src/test/regress/expected/create_am.out
@@ -99,3 +99,82 @@ HINT:  Use DROP ... CASCADE to drop the dependent objects too.
  -- Drop access method cascade
  DROP ACCESS METHOD gist2 CASCADE;
  NOTICE:  drop cascades to index grect2ind2
+-- Create a heap2 table am handler with heapam handler
+CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler;
+SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2;
+ amname |      amhandler       | amtype 
+--------+----------------------+--------
+ heap   | heap_tableam_handler | t
+ heap2  | heap_tableam_handler | t
+(2 rows)
+
+CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2;
+INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series');
+SELECT count(*) FROM tbl_heap2;
+ count 
+-------
+    10
+(1 row)
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tbl_heap2';
+  relname  | relkind | amname 
+-----------+---------+--------
+ tbl_heap2 | r       | heap2
+(1 row)
+
+-- create table as using heap2
+CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblas_heap2';
+   relname   | relkind | amname 
+-------------+---------+--------
+ tblas_heap2 | r       | heap2
+(1 row)
+
+--
+-- select into doesn't support new syntax, so it should be
+-- default access method.
+--
+SELECT INTO tblselectinto_heap from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method')
+from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblselectinto_heap';
+      relname       | relkind | ?column? 
+--------------------+---------+----------
+ tblselectinto_heap | r       | t
+(1 row)
+
+DROP TABLE tblselectinto_heap;
+-- create materialized view using heap2
+CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS
+       SELECT * FROM tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'mv_heap2';
+ relname  | relkind | amname 
+----------+---------+--------
+ mv_heap2 | m       | heap2
+(1 row)
+
+-- Try creating the unsupported relation kinds with using syntax
+CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2;
+ERROR:  syntax error at or near "USING"
+LINE 1: CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2...
+                              ^
+CREATE SEQUENCE test_seq USING heap2;
+ERROR:  syntax error at or near "USING"
+LINE 1: CREATE SEQUENCE test_seq USING heap2;
+                                 ^
+-- Drop table access method, but fails as objects depends on it
+DROP ACCESS METHOD heap2;
+ERROR:  cannot drop access method heap2 because other objects depend on it
+DETAIL:  table tbl_heap2 depends on access method heap2
+table tblas_heap2 depends on access method heap2
+materialized view mv_heap2 depends on access method heap2
+HINT:  Use DROP ... CASCADE to drop the dependent objects too.
+-- Drop table access method with cascade
+DROP ACCESS METHOD heap2 CASCADE;
+NOTICE:  drop cascades to 3 other objects
+DETAIL:  drop cascades to table tbl_heap2
+drop cascades to table tblas_heap2
+drop cascades to materialized view mv_heap2
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 6072f6bdb1fb0998f85ef35d6cfdf72009797325..4cd92c20dd19250478ccec14c382387f64d97211 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -1741,11 +1741,24 @@ WHERE p1.amhandler = 0;
  -----+--------
  (0 rows)
  
--- Check for amhandler functions with the wrong signature
+-- Check for index amhandler functions with the wrong signature
  SELECT p1.oid, p1.amname, p2.oid, p2.proname
  FROM pg_am AS p1, pg_proc AS p2
-WHERE p2.oid = p1.amhandler AND
-    (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset
+WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND
+    (p2.prorettype != 'index_am_handler'::regtype
+     OR p2.proretset
+     OR p2.pronargs != 1
+     OR p2.proargtypes[0] != 'internal'::regtype);
+ oid | amname | oid | proname 
+-----+--------+-----+---------
+(0 rows)
+
+-- Check for table amhandler functions with the wrong signature
+SELECT p1.oid, p1.amname, p2.oid, p2.proname
+FROM pg_am AS p1, pg_proc AS p2
+WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND
+    (p2.prorettype != 'table_am_handler'::regtype
+     OR p2.proretset
       OR p2.pronargs != 1
       OR p2.proargtypes[0] != 'internal'::regtype);
   oid | amname | oid | proname 
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out

index b1419d4bc21dbd5b520a2cac69a740205e9dac63..0e38d571c025bd14427e35eb8b640cbefff66d6a 100644 (file)
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -502,11 +502,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR
  -----+---------
  (0 rows)
  
--- Indexes should have an access method, others not.
+-- All tables and indexes should have an access method.
  SELECT p1.oid, p1.relname
  FROM pg_class as p1
-WHERE (p1.relkind = 'i' AND p1.relam = 0) OR
-    (p1.relkind != 'i' AND p1.relam != 0);
+WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and
+    p1.relam = 0;
   oid | relname 
  -----+---------
  (0 rows)
diff --git a/src/test/regress/sql/create_am.sql b/src/test/regress/sql/create_am.sql

index 3e0ac104f3cca3d59f33f83c808b7d2954bc2fc6..2c7b4813e8b6712d9e1820965e7721c9388fb200 100644 (file)
--- a/src/test/regress/sql/create_am.sql
+++ b/src/test/regress/sql/create_am.sql
@@ -66,3 +66,50 @@ DROP ACCESS METHOD gist2;
  
  -- Drop access method cascade
  DROP ACCESS METHOD gist2 CASCADE;
+
+-- Create a heap2 table am handler with heapam handler
+CREATE ACCESS METHOD heap2 TYPE TABLE HANDLER heap_tableam_handler;
+
+SELECT amname, amhandler, amtype FROM pg_am where amtype = 't' ORDER BY 1, 2;
+
+CREATE TABLE tbl_heap2(f1 int, f2 char(100)) using heap2;
+INSERT INTO tbl_heap2 VALUES(generate_series(1,10), 'Test series');
+SELECT count(*) FROM tbl_heap2;
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tbl_heap2';
+
+-- create table as using heap2
+CREATE TABLE tblas_heap2 using heap2 AS select * from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblas_heap2';
+
+--
+-- select into doesn't support new syntax, so it should be
+-- default access method.
+--
+SELECT INTO tblselectinto_heap from tbl_heap2;
+SELECT r.relname, r.relkind, a.amname = current_setting('default_table_access_method')
+from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'tblselectinto_heap';
+
+DROP TABLE tblselectinto_heap;
+
+-- create materialized view using heap2
+CREATE MATERIALIZED VIEW mv_heap2 USING heap2 AS
+       SELECT * FROM tbl_heap2;
+
+SELECT r.relname, r.relkind, a.amname from pg_class as r, pg_am as a
+       where a.oid = r.relam AND r.relname = 'mv_heap2';
+
+-- Try creating the unsupported relation kinds with using syntax
+CREATE VIEW test_view USING heap2 AS SELECT * FROM tbl_heap2;
+
+CREATE SEQUENCE test_seq USING heap2;
+
+
+-- Drop table access method, but fails as objects depends on it
+DROP ACCESS METHOD heap2;
+
+-- Drop table access method with cascade
+DROP ACCESS METHOD heap2 CASCADE;
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index 91c68f4204e34cef55c2e01e1fa08d130a018127..2664252d9438d31c2c178eba9f5c801a335b4124 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -1169,15 +1169,25 @@ SELECT p1.oid, p1.amname
  FROM pg_am AS p1
  WHERE p1.amhandler = 0;
  
--- Check for amhandler functions with the wrong signature
+-- Check for index amhandler functions with the wrong signature
  
  SELECT p1.oid, p1.amname, p2.oid, p2.proname
  FROM pg_am AS p1, pg_proc AS p2
-WHERE p2.oid = p1.amhandler AND
-    (p2.prorettype != 'index_am_handler'::regtype OR p2.proretset
+WHERE p2.oid = p1.amhandler AND p1.amtype = 'i' AND
+    (p2.prorettype != 'index_am_handler'::regtype
+     OR p2.proretset
       OR p2.pronargs != 1
       OR p2.proargtypes[0] != 'internal'::regtype);
  
+-- Check for table amhandler functions with the wrong signature
+
+SELECT p1.oid, p1.amname, p2.oid, p2.proname
+FROM pg_am AS p1, pg_proc AS p2
+WHERE p2.oid = p1.amhandler AND p1.amtype = 's' AND
+    (p2.prorettype != 'table_am_handler'::regtype
+     OR p2.proretset
+     OR p2.pronargs != 1
+     OR p2.proargtypes[0] != 'internal'::regtype);
  
  -- **************** pg_amop ****************
  
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql

index f9aeea32144f204427830ecf0cf95227a7a1c0fc..2efa229d40a0784d98a6ecc73a2d914d5cf7dcae 100644 (file)
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -367,12 +367,11 @@ WHERE relkind NOT IN ('r', 'i', 'S', 't', 'v', 'm', 'c', 'f', 'p') OR
      relpersistence NOT IN ('p', 'u', 't') OR
      relreplident NOT IN ('d', 'n', 'f', 'i');
  
--- Indexes should have an access method, others not.
-
+-- All tables and indexes should have an access method.
  SELECT p1.oid, p1.relname
  FROM pg_class as p1
-WHERE (p1.relkind = 'i' AND p1.relam = 0) OR
-    (p1.relkind != 'i' AND p1.relam != 0);
+WHERE p1.relkind NOT IN ('S', 'v', 'f', 'c') and
+    p1.relam = 0;
  
  -- **************** pg_attribute ****************
  
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 9fe950b29dbb81e4bda9f2529f0b2849149ba43c..e4c9f81fe2792081ef29956e0aacd6957cff1dc4 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1608,8 +1608,8 @@ ParallelHashGrowth
  ParallelHashJoinBatch
  ParallelHashJoinBatchAccessor
  ParallelHashJoinState
-ParallelHeapScanDesc
-ParallelHeapScanDescData
+ParallelTableScanDesc
+ParallelTableScanDescData
  ParallelIndexScanDesc
  ParallelSlot
  ParallelState
author	Andres Freund <andres@anarazel.de>
	Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
committer	Andres Freund <andres@anarazel.de>
	Tue, 11 Dec 2018 01:36:11 +0000 (17:36 -0800)
contrib/amcheck/verify_nbtree.c		patch \| blob \| blame \| history
contrib/bloom/blinsert.c		patch \| blob \| blame \| history
contrib/pg_visibility/pg_visibility.c		patch \| blob \| blame \| history
contrib/pgrowlocks/pgrowlocks.c		patch \| blob \| blame \| history
contrib/pgstattuple/pgstatapprox.c		patch \| blob \| blame \| history
contrib/pgstattuple/pgstattuple.c		patch \| blob \| blame \| history
contrib/postgres_fdw/postgres_fdw.c		patch \| blob \| blame \| history
contrib/tsm_system_rows/tsm_system_rows.c		patch \| blob \| blame \| history
contrib/tsm_system_time/tsm_system_time.c		patch \| blob \| blame \| history
doc/src/sgml/fdwhandler.sgml		patch \| blob \| blame \| history
src/backend/access/Makefile		patch \| blob \| blame \| history
src/backend/access/brin/brin.c		patch \| blob \| blame \| history
src/backend/access/gin/gininsert.c		patch \| blob \| blame \| history
src/backend/access/gist/gistbuild.c		patch \| blob \| blame \| history
src/backend/access/gist/gistget.c		patch \| blob \| blame \| history
src/backend/access/hash/hash.c		patch \| blob \| blame \| history
src/backend/access/hash/hashsearch.c		patch \| blob \| blame \| history
src/backend/access/heap/Makefile		patch \| blob \| blame \| history
src/backend/access/heap/heapam.c		patch \| blob \| blame \| history
src/backend/access/heap/heapam_handler.c	[new file with mode: 0644]	patch \| blob
src/backend/access/heap/heapam_visibility.c	[moved from src/backend/utils/time/tqual.c with 96% similarity]	patch \| blob \| blame \| history
src/backend/access/heap/rewriteheap.c		patch \| blob \| blame \| history
src/backend/access/heap/tuptoaster.c		patch \| blob \| blame \| history
src/backend/access/index/genam.c		patch \| blob \| blame \| history
src/backend/access/index/indexam.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtinsert.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtree.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtsearch.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtsort.c		patch \| blob \| blame \| history
src/backend/access/spgist/spginsert.c		patch \| blob \| blame \| history
src/backend/access/spgist/spgscan.c		patch \| blob \| blame \| history
src/backend/access/table/Makefile	[new file with mode: 0644]	patch \| blob
src/backend/access/table/tableam.c	[new file with mode: 0644]	patch \| blob
src/backend/access/table/tableamapi.c	[new file with mode: 0644]	patch \| blob
src/backend/access/tablesample/system.c		patch \| blob \| blame \| history
src/backend/bootstrap/bootparse.y		patch \| blob \| blame \| history
src/backend/bootstrap/bootstrap.c		patch \| blob \| blame \| history
src/backend/catalog/aclchk.c		patch \| blob \| blame \| history
src/backend/catalog/genbki.pl		patch \| blob \| blame \| history
src/backend/catalog/heap.c		patch \| blob \| blame \| history
src/backend/catalog/index.c		patch \| blob \| blame \| history
src/backend/catalog/partition.c		patch \| blob \| blame \| history
src/backend/catalog/pg_conversion.c		patch \| blob \| blame \| history
src/backend/catalog/pg_db_role_setting.c		patch \| blob \| blame \| history
src/backend/catalog/pg_publication.c		patch \| blob \| blame \| history
src/backend/catalog/pg_subscription.c		patch \| blob \| blame \| history
src/backend/catalog/toasting.c		patch \| blob \| blame \| history
src/backend/commands/amcmds.c		patch \| blob \| blame \| history
src/backend/commands/analyze.c		patch \| blob \| blame \| history
src/backend/commands/cluster.c		patch \| blob \| blame \| history
src/backend/commands/constraint.c		patch \| blob \| blame \| history
src/backend/commands/copy.c		patch \| blob \| blame \| history
src/backend/commands/createas.c		patch \| blob \| blame \| history
src/backend/commands/dbcommands.c		patch \| blob \| blame \| history
src/backend/commands/indexcmds.c		patch \| blob \| blame \| history
src/backend/commands/matview.c		patch \| blob \| blame \| history
src/backend/commands/tablecmds.c		patch \| blob \| blame \| history
src/backend/commands/tablespace.c		patch \| blob \| blame \| history
src/backend/commands/trigger.c		patch \| blob \| blame \| history
src/backend/commands/typecmds.c		patch \| blob \| blame \| history
src/backend/commands/vacuum.c		patch \| blob \| blame \| history
src/backend/executor/execAmi.c		patch \| blob \| blame \| history
src/backend/executor/execCurrent.c		patch \| blob \| blame \| history
src/backend/executor/execExprInterp.c		patch \| blob \| blame \| history
src/backend/executor/execIndexing.c		patch \| blob \| blame \| history
src/backend/executor/execMain.c		patch \| blob \| blame \| history
src/backend/executor/execPartition.c		patch \| blob \| blame \| history
src/backend/executor/execReplication.c		patch \| blob \| blame \| history
src/backend/executor/execScan.c		patch \| blob \| blame \| history
src/backend/executor/execTuples.c		patch \| blob \| blame \| history
src/backend/executor/execUtils.c		patch \| blob \| blame \| history
src/backend/executor/nodeBitmapHeapscan.c		patch \| blob \| blame \| history
src/backend/executor/nodeForeignscan.c		patch \| blob \| blame \| history
src/backend/executor/nodeGather.c		patch \| blob \| blame \| history
src/backend/executor/nodeGatherMerge.c		patch \| blob \| blame \| history
src/backend/executor/nodeIndexonlyscan.c		patch \| blob \| blame \| history
src/backend/executor/nodeIndexscan.c		patch \| blob \| blame \| history
src/backend/executor/nodeLockRows.c		patch \| blob \| blame \| history
src/backend/executor/nodeModifyTable.c		patch \| blob \| blame \| history
src/backend/executor/nodeSamplescan.c		patch \| blob \| blame \| history
src/backend/executor/nodeSeqscan.c		patch \| blob \| blame \| history
src/backend/executor/nodeTidscan.c		patch \| blob \| blame \| history
src/backend/executor/spi.c		patch \| blob \| blame \| history
src/backend/executor/tqueue.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| blame \| history
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/partitioning/partbounds.c		patch \| blob \| blame \| history
src/backend/postmaster/autovacuum.c		patch \| blob \| blame \| history
src/backend/postmaster/pgstat.c		patch \| blob \| blame \| history
src/backend/replication/logical/launcher.c		patch \| blob \| blame \| history
src/backend/replication/logical/snapbuild.c		patch \| blob \| blame \| history
src/backend/replication/logical/worker.c		patch \| blob \| blame \| history
src/backend/rewrite/rewriteDefine.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/predicate.c		patch \| blob \| blame \| history
src/backend/utils/adt/pseudotypes.c		patch \| blob \| blame \| history
src/backend/utils/adt/ri_triggers.c		patch \| blob \| blame \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/backend/utils/adt/tid.c		patch \| blob \| blame \| history
src/backend/utils/cache/relcache.c		patch \| blob \| blame \| history
src/backend/utils/init/postinit.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc.c		patch \| blob \| blame \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| blame \| history
src/backend/utils/time/Makefile		patch \| blob \| blame \| history
src/backend/utils/time/snapmgr.c		patch \| blob \| blame \| history
src/include/access/genam.h		patch \| blob \| blame \| history
src/include/access/heapam.h		patch \| blob \| blame \| history
src/include/access/relscan.h		patch \| blob \| blame \| history
src/include/access/rewriteheap.h		patch \| blob \| blame \| history
src/include/access/tableam.h	[new file with mode: 0644]	patch \| blob
src/include/access/tsmapi.h		patch \| blob \| blame \| history
src/include/catalog/heap.h		patch \| blob \| blame \| history
src/include/catalog/index.h		patch \| blob \| blame \| history
src/include/catalog/pg_am.dat		patch \| blob \| blame \| history
src/include/catalog/pg_am.h		patch \| blob \| blame \| history
src/include/catalog/pg_class.dat		patch \| blob \| blame \| history
src/include/catalog/pg_class.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.dat		patch \| blob \| blame \| history
src/include/catalog/pg_type.dat		patch \| blob \| blame \| history
src/include/commands/trigger.h		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/executor/spi.h		patch \| blob \| blame \| history
src/include/executor/tqueue.h		patch \| blob \| blame \| history
src/include/executor/tuptable.h		patch \| blob \| blame \| history
src/include/foreign/fdwapi.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/nodes/lockoptions.h		patch \| blob \| blame \| history
src/include/nodes/nodes.h		patch \| blob \| blame \| history
src/include/nodes/parsenodes.h		patch \| blob \| blame \| history
src/include/nodes/primnodes.h		patch \| blob \| blame \| history
src/include/nodes/tidbitmap.h		patch \| blob \| blame \| history
src/include/storage/bufmgr.h		patch \| blob \| blame \| history
src/include/utils/rel.h		patch \| blob \| blame \| history
src/include/utils/relcache.h		patch \| blob \| blame \| history
src/include/utils/snapshot.h		patch \| blob \| blame \| history
src/include/utils/tqual.h		patch \| blob \| blame \| history
src/include/utils/tuplesort.h		patch \| blob \| blame \| history
src/test/isolation/expected/partition-key-update-1.out		patch \| blob \| blame \| history
src/test/regress/expected/create_am.out		patch \| blob \| blame \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| blame \| history
src/test/regress/expected/type_sanity.out		patch \| blob \| blame \| history
src/test/regress/sql/create_am.sql		patch \| blob \| blame \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| blame \| history
src/test/regress/sql/type_sanity.sql		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history