Change the way WAL records are constructed.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 14 Aug 2014 10:23:54 +0000 (13:23 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 10 Nov 2014 19:24:36 +0000 (21:24 +0200)
60 files changed:
contrib/pg_xlogdump/pg_xlogdump.c
src/backend/access/brin/brin.c
src/backend/access/brin/brin_pageops.c
src/backend/access/brin/brin_revmap.c
src/backend/access/brin/brin_xlog.c
src/backend/access/gin/ginbtree.c
src/backend/access/gin/gindatapage.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/backend/access/gin/ginutil.c
src/backend/access/gin/ginvacuum.c
src/backend/access/gin/ginxlog.c
src/backend/access/gist/gist.c
src/backend/access/gist/gistbuild.c
src/backend/access/gist/gistxlog.c
src/backend/access/heap/heapam.c
src/backend/access/heap/rewriteheap.c
src/backend/access/nbtree/nbtinsert.c
src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtxlog.c
src/backend/access/rmgrdesc/brindesc.c
src/backend/access/rmgrdesc/gindesc.c
src/backend/access/rmgrdesc/gistdesc.c
src/backend/access/rmgrdesc/heapdesc.c
src/backend/access/rmgrdesc/nbtdesc.c
src/backend/access/rmgrdesc/spgdesc.c
src/backend/access/rmgrdesc/xlogdesc.c
src/backend/access/spgist/spgdoinsert.c
src/backend/access/spgist/spginsert.c
src/backend/access/spgist/spgvacuum.c
src/backend/access/spgist/spgxlog.c
src/backend/access/transam/README
src/backend/access/transam/clog.c
src/backend/access/transam/multixact.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xloginsert.c
src/backend/access/transam/xlogreader.c
src/backend/access/transam/xlogutils.c
src/backend/catalog/storage.c
src/backend/commands/dbcommands.c
src/backend/commands/sequence.c
src/backend/commands/tablespace.c
src/backend/replication/logical/decode.c
src/backend/replication/logical/snapbuild.c
src/backend/storage/ipc/standby.c
src/backend/utils/cache/relmapper.c
src/include/access/brin_xlog.h
src/include/access/gin_private.h
src/include/access/gist_private.h
src/include/access/heapam_xlog.h
src/include/access/nbtree.h
src/include/access/spgist_private.h
src/include/access/xlog.h
src/include/access/xlog_internal.h
src/include/access/xloginsert.h
src/include/access/xlogrecord.h
src/include/access/xlogutils.h

index 7f151f961c87b799ac1aa63c7518d5e191ef5ac4..6d867da9f35c1608eb4edda160015bdf332ddf07 100644 (file)
@@ -384,51 +384,87 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogRecPtr ReadRecPtr, XLogRecord
 {
        const char         *id;
        const RmgrDescData *desc = &RmgrDescTable[record->xl_rmid];
+       int                     nblockrefs;
+       uint8      *blockrefids;
+       int                     i;
+       RelFileNode rnode;
+       ForkNumber      forknum;
+       BlockNumber blk;
+       XLogRecordBlockData *bkpb;
 
        id = desc->rm_identify(record->xl_info);
        if (id == NULL)
                id = psprintf("UNKNOWN (%x)", record->xl_info & ~XLR_INFO_MASK);
 
-       printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, bkp: %u%u%u%u, desc: %s ",
+       config->already_displayed_records++;
+
+       printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
                   desc->rm_name,
                   record->xl_len, record->xl_tot_len,
                   record->xl_xid,
                   (uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
-                  (uint32) (record->xl_prev >> 32), (uint32) record->xl_prev,
-                  !!(XLR_BKP_BLOCK(0) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(1) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(2) & record->xl_info),
-                  !!(XLR_BKP_BLOCK(3) & record->xl_info),
-                  id);
+                  (uint32) (record->xl_prev >> 32), (uint32) record->xl_prev);
+
+       /* print block references (short format) */
+       blockrefids = XLogRecGetBlockRefIds(record, &nblockrefs);
+       if (!config->bkp_details)
+       {
+               for (i = 0; i < nblockrefs; i++)
+               {
+                       uint8           id = blockrefids[i];
+
+                       bkpb = XLogRecGetBlockRef(record, id, NULL);
+                       XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+                       if (forknum != MAIN_FORKNUM)
+                               printf("blkref #%u: rel %u/%u/%u fork %s blk %u",
+                                          id,
+                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                          forkNames[forknum],
+                                          blk);
+                       else
+                               printf("blkref #%u: rel %u/%u/%u blk %u",
+                                          id,
+                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                          blk);
+                       if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+                               printf(" FPW");
+                       printf(", ");
+               }
+       }
+       printf("desc: %s ", id);
 
        /* the desc routine will printf the description directly to stdout */
        desc->rm_desc(NULL, record);
 
        putchar('\n');
 
+       /* print block references (detailed format) */
        if (config->bkp_details)
        {
-               int                     bkpnum;
-               char       *blk = (char *) XLogRecGetData(record) + record->xl_len;
-
-               for (bkpnum = 0; bkpnum < XLR_MAX_BKP_BLOCKS; bkpnum++)
+               for (i = 0; i < nblockrefs; i++)
                {
-                       BkpBlock        bkpb;
-
-                       if (!(XLR_BKP_BLOCK(bkpnum) & record->xl_info))
-                               continue;
-
-                       memcpy(&bkpb, blk, sizeof(BkpBlock));
-                       blk += sizeof(BkpBlock);
-                       blk += BLCKSZ - bkpb.hole_length;
-
-                       printf("\tbackup bkp #%u; rel %u/%u/%u; fork: %s; block: %u; hole: offset: %u, length: %u\n",
-                                  bkpnum,
-                                  bkpb.node.spcNode, bkpb.node.dbNode, bkpb.node.relNode,
-                                  forkNames[bkpb.fork],
-                                  bkpb.block, bkpb.hole_offset, bkpb.hole_length);
+                       uint8           id = blockrefids[i];
+                       char       *blkdata;
+
+                       bkpb = XLogRecGetBlockRef(record, id, &blkdata);
+
+                       XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+                       printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
+                                  id,
+                                  rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                  forkNames[forknum],
+                                  blk);
+                       if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+                       {
+                               XLogRecordBlockImage *blkimg = (XLogRecordBlockImage *) blkdata;
+                               printf(" (FPW); hole: offset: %u, length: %u\n",
+                                          blkimg->hole_offset, blkimg->hole_length);
+                       }
+                       printf("\n");
                }
        }
+
+       pfree(blockrefids);
 }
 
 /*
index bd35cf6696acac2f207e6fb77d3076865d387d80..cb645e3d4596a77038b587233ca7aab2071421de 100644 (file)
@@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
        {
                xl_brin_createidx xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata;
                Page            page;
 
-               xlrec.node = index->rd_node;
                xlrec.version = BRIN_CURRENT_VERSION;
                xlrec.pagesPerRange = BrinGetPagesPerRange(index);
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &xlrec;
-               rdata.len = SizeOfBrinCreateIdx;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
+               XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
 
                page = BufferGetPage(meta);
                PageSetLSN(page, recptr);
index 50f1dec1631a376461fc7f4d4d62a2c5b56f450b..0b6fbeb603cd49117ee58832709dc0ff4c614a72 100644 (file)
@@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
                /* XLOG stuff */
                if (RelationNeedsWAL(idxrel))
                {
-                       BlockNumber blk = BufferGetBlockNumber(oldbuf);
                        xl_brin_samepage_update xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[2];
                        uint8           info = XLOG_BRIN_SAMEPAGE_UPDATE;
 
-                       xlrec.node = idxrel->rd_node;
-                       ItemPointerSetBlockNumber(&xlrec.tid, blk);
-                       ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBrinSamepageUpdate;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
+                       xlrec.offnum = oldoff;
 
-                       rdata[1].data = (char *) newtup;
-                       rdata[1].len = newsz;
-                       rdata[1].buffer = oldbuf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
 
-                       recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+                       XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
+                       XLogRegisterBufData(0, (char *) newtup, newsz);
+
+                       recptr = XLogInsert(RM_BRIN_ID, info);
 
                        PageSetLSN(oldpage, recptr);
                }
@@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
                {
                        xl_brin_update xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[4];
                        uint8           info;
 
                        info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
 
-                       xlrec.insert.node = idxrel->rd_node;
-                       ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
+                       xlrec.insert.offnum = newoff;
                        xlrec.insert.heapBlk = heapBlk;
-                       xlrec.insert.tuplen = newsz;
-                       xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
                        xlrec.insert.pagesPerRange = pagesPerRange;
-                       ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+                       xlrec.oldOffnum = oldoff;
+
+                       XLogBeginInsert();
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBrinUpdate;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
+                       /* new page */
+                       XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
 
-                       rdata[1].data = (char *) newtup;
-                       rdata[1].len = newsz;
-                       rdata[1].buffer = extended ? InvalidBuffer : newbuf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = &(rdata[2]);
+                       XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+                       XLogRegisterBufData(0, (char *) newtup, newsz);
 
-                       rdata[2].data = (char *) NULL;
-                       rdata[2].len = 0;
-                       rdata[2].buffer = revmapbuf;
-                       rdata[2].buffer_std = true;
-                       rdata[2].next = &(rdata[3]);
+                       /* revmap page */
+                       XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
 
-                       rdata[3].data = (char *) NULL;
-                       rdata[3].len = 0;
-                       rdata[3].buffer = oldbuf;
-                       rdata[3].buffer_std = true;
-                       rdata[3].next = NULL;
+                       /* old page */
+                       XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
 
-                       recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+                       recptr = XLogInsert(RM_BRIN_ID, info);
 
                        PageSetLSN(oldpage, recptr);
                        PageSetLSN(newpage, recptr);
@@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
        {
                xl_brin_insert xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
                uint8           info;
 
                info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
-               xlrec.node = idxrel->rd_node;
                xlrec.heapBlk = heapBlk;
                xlrec.pagesPerRange = pagesPerRange;
-               xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
-               xlrec.tuplen = itemsz;
-               ItemPointerSet(&xlrec.tid, blk, off);
-
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBrinInsert;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].buffer_std = false;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = (char *) tup;
-               rdata[1].len = itemsz;
-               rdata[1].buffer = extended ? InvalidBuffer : *buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = (char *) NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = revmapbuf;
-               rdata[2].buffer_std = false;
-               rdata[2].next = NULL;
-
-               recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+               xlrec.offnum = off;
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
+
+               XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+               XLogRegisterBufData(0, (char *) tup, itemsz);
+
+               XLogRegisterBuffer(1, revmapbuf, 0);
+
+               recptr = XLogInsert(RM_BRIN_ID, info);
 
                PageSetLSN(page, recptr);
                PageSetLSN(BufferGetPage(revmapbuf), recptr);
index 272c74e6b6e10cc885a2d684c58503411e32bd44..adc7d0b8473bc2b8f129ef152c4e94229ba6508a 100644 (file)
@@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
        {
                xl_brin_revmap_extend xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.node = revmap->rm_irel->rd_node;
                xlrec.targetBlk = mapBlk;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBrinRevmapExtend;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].buffer_std = false;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = (char *) NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = revmap->rm_metaBuf;
-               rdata[1].buffer_std = false;
-               rdata[1].next = NULL;
-
-               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
+               XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
+
+               XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
+
+               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
                PageSetLSN(metapage, recptr);
                PageSetLSN(page, recptr);
        }
index ebef984e7f161e7870fa3d31d841f631da12abb9..19701a1ad214d433d75699f36f7356a33f7c04e2 100644 (file)
@@ -26,11 +26,8 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
        Buffer          buf;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
        /* create the index' metapage */
-       buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true);
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buf);
        Assert(BufferIsValid(buf));
        page = (Page) BufferGetPage(buf);
        brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
@@ -45,48 +42,45 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
  */
 static void
 brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
-                                               xl_brin_insert *xlrec, BrinTuple *tuple)
+                                               xl_brin_insert *xlrec)
 {
-       BlockNumber blkno;
        Buffer          buffer;
        Page            page;
        XLogRedoAction action;
 
-       blkno = ItemPointerGetBlockNumber(&xlrec->tid);
-
        /*
         * If we inserted the first and only tuple on the page, re-initialize the
         * page from scratch.
         */
        if (record->xl_info & XLOG_BRIN_INIT_PAGE)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         xlrec->node, MAIN_FORKNUM, blkno,
-                                                                         RBM_ZERO, false, &buffer);
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
                page = BufferGetPage(buffer);
                brin_page_init(page, BRIN_PAGETYPE_REGULAR);
                action = BLK_NEEDS_REDO;
        }
        else
        {
-               action = XLogReadBufferForRedo(lsn, record, 0,
-                                                                          xlrec->node, blkno, &buffer);
+               action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
        }
 
        /* insert the index item into the page */
        if (action == BLK_NEEDS_REDO)
        {
                OffsetNumber offnum;
+               BrinTuple  *tuple;
+               Size            tuplen;
+
+               tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
 
                Assert(tuple->bt_blkno == xlrec->heapBlk);
 
                page = (Page) BufferGetPage(buffer);
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
 
-               offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true,
-                                                        false);
+               offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
                if (offnum == InvalidOffsetNumber)
                        elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
 
@@ -97,14 +91,17 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
                UnlockReleaseBuffer(buffer);
 
        /* update the revmap */
-       action = XLogReadBufferForRedo(lsn, record, 1, xlrec->node,
-                                                                  xlrec->revmapBlk, &buffer);
+       action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
+               ItemPointerData tid;
+               BlockNumber blkno = BufferGetBlockNumber(buffer);
+
+               ItemPointerSet(&tid, blkno, xlrec->offnum);
                page = (Page) BufferGetPage(buffer);
 
                brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
-                                                               xlrec->tid);
+                                                               tid);
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -121,11 +118,8 @@ static void
 brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
 {
        xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
-       BrinTuple  *newtup;
 
-       newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert);
-
-       brin_xlog_insert_update(lsn, record, xlrec, newtup);
+       brin_xlog_insert_update(lsn, record, xlrec);
 }
 
 /*
@@ -135,17 +129,11 @@ static void
 brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
 {
        xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
-       BlockNumber blkno;
        Buffer          buffer;
-       BrinTuple  *newtup;
        XLogRedoAction action;
 
-       newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
-
        /* First remove the old tuple */
-       blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
-       action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
-                                                                  blkno, &buffer);
+       action = XLogReadBufferForRedo(lsn, record, 2, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                Page            page;
@@ -153,7 +141,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
 
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
+               offnum = xlrec->oldOffnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_update: invalid max offset number");
 
@@ -164,7 +152,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Then insert the new tuple and update revmap, like in an insertion. */
-       brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup);
+       brin_xlog_insert_update(lsn, record, &xlrec->insert);
 
        if (BufferIsValid(buffer))
                UnlockReleaseBuffer(buffer);
@@ -177,27 +165,23 @@ static void
 brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
 {
        xl_brin_samepage_update *xlrec;
-       BlockNumber blkno;
        Buffer          buffer;
        XLogRedoAction action;
 
        xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
-       blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
-       action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
-                                                                  &buffer);
+       action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
-               int                     tuplen;
+               Size            tuplen;
                BrinTuple  *mmtuple;
                Page            page;
                OffsetNumber offnum;
 
-               tuplen = record->xl_len - SizeOfBrinSamepageUpdate;
-               mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
+               mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
 
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
 
@@ -225,12 +209,16 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
        Buffer          metabuf;
        Buffer          buf;
        Page            page;
+       BlockNumber targetBlk;
        XLogRedoAction action;
 
        xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
+
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+       Assert(xlrec->targetBlk == targetBlk);
+
        /* Update the metapage */
-       action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node,
-                                                                  BRIN_METAPAGE_BLKNO, &metabuf);
+       action = XLogReadBufferForRedo(lsn, record, 0, &metabuf);
        if (action == BLK_NEEDS_REDO)
        {
                Page            metapg;
@@ -251,7 +239,7 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
         * image here.
         */
 
-       buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
+       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buf);
        page = (Page) BufferGetPage(buf);
        brin_page_init(page, BRIN_PAGETYPE_REVMAP);
 
index 5365477000adc378d865b31f1095957369d0c09f..a241894e726c97b88a0d33b49970e192a0c3757e 100644 (file)
@@ -326,7 +326,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                           Buffer childbuf, GinStatsData *buildStats)
 {
        Page            page = BufferGetPage(stack->buffer);
-       XLogRecData *payloadrdata;
        GinPlaceToPageRC rc;
        uint16          xlflags = 0;
        Page            childpage = NULL;
@@ -351,12 +350,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
        /*
         * Try to put the incoming tuple on the page. placeToPage will decide if
         * the page needs to be split.
+        *
+        * WAL-logging this operation is a bit funny:
+        *
+        * We're responsible for calling XLogBeginInsert() and XLogInsert().
+        * XLogBeginInsert() must be called before placeToPage, because placeToPage
+        * register some data to the WAL record.
+        *
+        * If placeToPage returns INSERTED, placeToPage has already called
+        * START_CRIT_SECTION(), and we're responsible for calling
+        * END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
+        * registering any data required to replay the operation with
+        * XLogRegisterData(0, ...). It may only add data to block index 0; the
+        * main data of the WAL record is reserved for this function.
+        *
+        * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
+        * Splits happen infrequently, so we just make a full-page image of all
+        * the pages involved.
         */
+
+       if (RelationNeedsWAL(btree->index))
+               XLogBeginInsert();
+
        rc = btree->placeToPage(btree, stack->buffer, stack,
                                                        insertdata, updateblkno,
-                                                       &payloadrdata, &newlpage, &newrpage);
+                                                       &newlpage, &newrpage);
        if (rc == UNMODIFIED)
+       {
+               XLogResetInsertion();
                return true;
+       }
        else if (rc == INSERTED)
        {
                /* placeToPage did START_CRIT_SECTION() */
@@ -372,17 +395,18 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                if (RelationNeedsWAL(btree->index))
                {
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[3];
                        ginxlogInsert xlrec;
                        BlockIdData childblknos[2];
 
-                       xlrec.node = btree->index->rd_node;
-                       xlrec.blkno = BufferGetBlockNumber(stack->buffer);
+                       /*
+                        * placetopage already registered stack->buffer as block 0.
+                        */
                        xlrec.flags = xlflags;
 
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(ginxlogInsert);
+                       if (childbuf != InvalidBuffer)
+                               XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+
+                       XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
 
                        /*
                         * Log information about child if this was an insertion of a
@@ -390,26 +414,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                         */
                        if (childbuf != InvalidBuffer)
                        {
-                               rdata[0].next = &rdata[1];
-
                                BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
                                BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
-
-                               rdata[1].buffer = InvalidBuffer;
-                               rdata[1].data = (char *) childblknos;
-                               rdata[1].len = sizeof(BlockIdData) * 2;
-                               rdata[1].next = &rdata[2];
-
-                               rdata[2].buffer = childbuf;
-                               rdata[2].buffer_std = false;
-                               rdata[2].data = NULL;
-                               rdata[2].len = 0;
-                               rdata[2].next = payloadrdata;
+                               XLogRegisterData((char *) childblknos,
+                                                                        sizeof(BlockIdData) * 2);
                        }
-                       else
-                               rdata[0].next = payloadrdata;
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
                        PageSetLSN(page, recptr);
                        if (childbuf != InvalidBuffer)
                                PageSetLSN(childpage, recptr);
@@ -421,10 +432,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
        }
        else if (rc == SPLIT)
        {
-               /* Didn't fit, have to split */
+               /* Didn't fit, had to split */
                Buffer          rbuffer;
                BlockNumber savedRightLink;
-               XLogRecData rdata[2];
                ginxlogSplit data;
                Buffer          lbuffer = InvalidBuffer;
                Page            newrootpg = NULL;
@@ -448,7 +458,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                 */
 
                data.node = btree->index->rd_node;
-               data.rblkno = BufferGetBlockNumber(rbuffer);
                data.flags = xlflags;
                if (childbuf != InvalidBuffer)
                {
@@ -462,23 +471,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                else
                        data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogSplit);
-
-               if (childbuf != InvalidBuffer)
-               {
-                       rdata[0].next = &rdata[1];
-
-                       rdata[1].buffer = childbuf;
-                       rdata[1].buffer_std = false;
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].next = payloadrdata;
-               }
-               else
-                       rdata[0].next = payloadrdata;
-
                if (stack->parent == NULL)
                {
                        /*
@@ -496,12 +488,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                                        buildStats->nEntryPages++;
                        }
 
-                       /*
-                        * root never has a right-link, so we borrow the rrlink field to
-                        * store the root block number.
-                        */
-                       data.rrlink = BufferGetBlockNumber(stack->buffer);
-                       data.lblkno = BufferGetBlockNumber(lbuffer);
+                       data.rrlink = InvalidBlockNumber;
                        data.flags |= GIN_SPLIT_ROOT;
 
                        GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
@@ -524,7 +511,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                {
                        /* split non-root page */
                        data.rrlink = savedRightLink;
-                       data.lblkno = BufferGetBlockNumber(stack->buffer);
 
                        GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
                        GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
@@ -572,7 +558,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
+                       /*
+                        * We just take full page images of all the split pages. Splits
+                        * are uncommon enough that it's not worth complicating the code
+                        * to be more efficient.
+                        */
+                       if (stack->parent == NULL)
+                       {
+                               XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                       }
+                       else
+                       {
+                               XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                               XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+                       }
+                       if (BufferIsValid(childbuf))
+                               XLogRegisterBuffer(3, childbuf, 0);
+
+                       XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
+
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
                        PageSetLSN(BufferGetPage(stack->buffer), recptr);
                        PageSetLSN(BufferGetPage(rbuffer), recptr);
                        if (stack->parent == NULL)
index 97cd706c08e58b91c9481c52f30b8af4a1045b6f..685c6fcb3a664204c62c732ad364670c078863e5 100644 (file)
@@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
 static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
                                          GinBtreeStack *stack,
                                          void *insertdata, BlockNumber updateblkno,
-                                         XLogRecData **prdata, Page *newlpage, Page *newrpage);
+                                         Page *newlpage, Page *newrpage);
 
 static disassembledLeaf *disassembleLeaf(Page page);
 static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
 static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
                           int nNewItems);
 
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
-                                                          disassembledLeaf *leaf);
+static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
 static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
 static void dataPlaceToPageLeafSplit(Buffer buf,
                                                 disassembledLeaf *leaf,
                                                 ItemPointerData lbound, ItemPointerData rbound,
-                                                XLogRecData **prdata, Page lpage, Page rpage);
+                                                Page lpage, Page rpage);
 
 /*
  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
@@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
  */
 static GinPlaceToPageRC
 dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
-                                       void *insertdata, XLogRecData **prdata,
-                                       Page *newlpage, Page *newrpage)
+                                       void *insertdata, Page *newlpage, Page *newrpage)
 {
        GinBtreeDataLeafInsertData *items = insertdata;
        ItemPointer newItems = &items->items[items->curitem];
@@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                 */
                MemoryContextSwitchTo(oldCxt);
                if (RelationNeedsWAL(btree->index))
-                       *prdata = constructLeafRecompressWALData(buf, leaf);
-               else
-                       *prdata = NULL;
+                       registerLeafRecompressWALData(buf, leaf);
                START_CRIT_SECTION();
                dataPlaceToPageLeafRecompress(buf, leaf);
 
@@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                *newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
 
                dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
-                                                                prdata, *newlpage, *newrpage);
+                                                                *newlpage, *newrpage);
 
                Assert(GinPageRightMost(page) ||
                           ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
@@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
         */
        if (removedsomething)
        {
-               XLogRecData *payloadrdata = NULL;
                bool            modified;
 
                /*
@@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
                }
 
                if (RelationNeedsWAL(indexrel))
-                       payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+               {
+                       XLogBeginInsert();
+                       registerLeafRecompressWALData(buffer, leaf);
+               }
                START_CRIT_SECTION();
                dataPlaceToPageLeafRecompress(buffer, leaf);
 
@@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
                if (RelationNeedsWAL(indexrel))
                {
                        XLogRecPtr      recptr;
-                       XLogRecData rdata;
-                       ginxlogVacuumDataLeafPage xlrec;
 
-                       xlrec.node = indexrel->rd_node;
-                       xlrec.blkno = BufferGetBlockNumber(buffer);
-
-                       rdata.buffer = InvalidBuffer;
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
-                       rdata.next = payloadrdata;
-
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
                        PageSetLSN(page, recptr);
                }
 
@@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
  * Construct a ginxlogRecompressDataLeaf record representing the changes
  * in *leaf.
  */
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
 {
        int                     nmodified = 0;
        char       *walbufbegin;
        char       *walbufend;
-       XLogRecData *rdata;
        dlist_iter      iter;
        int                     segno;
        ginxlogRecompressDataLeaf *recompress_xlog;
@@ -872,11 +859,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
        }
 
        walbufbegin = palloc(
-                                                sizeof(ginxlogRecompressDataLeaf) +
-                                                BLCKSZ +               /* max size needed to hold the segment
-                                                                                * data */
-                                                nmodified * 2 +                /* (segno + action) per action */
-                                                sizeof(XLogRecData));
+               sizeof(ginxlogRecompressDataLeaf) +
+               BLCKSZ  +               /* max size needed to hold the segment data */
+               nmodified *2            /* (segno + action) per action */
+               );
        walbufend = walbufbegin;
 
        recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
@@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
                        segno++;
        }
 
-       rdata = (XLogRecData *) MAXALIGN(walbufend);
-       rdata->buffer = buf;
-       rdata->buffer_std = TRUE;
-       rdata->data = walbufbegin;
-       rdata->len = walbufend - walbufbegin;
-       rdata->next = NULL;
 
-       return rdata;
+       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+       XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
+
 }
 
 /*
@@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
 static void
 dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
                                                 ItemPointerData lbound, ItemPointerData rbound,
-                                                XLogRecData **prdata, Page lpage, Page rpage)
+                                                Page lpage, Page rpage)
 {
        char       *ptr;
        int                     segsize;
@@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
        dlist_node *firstright;
        leafSegmentInfo *seginfo;
 
-       /* these must be static so they can be returned to caller */
-       static ginxlogSplitDataLeaf split_xlog;
-       static XLogRecData rdata[3];
-
        /* Initialize temporary pages to hold the new left and right pages */
        GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
        GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
@@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
        Assert(rsize == leaf->rsize);
        GinDataPageSetDataSize(rpage, rsize);
        *GinDataPageGetRightBound(rpage) = rbound;
-
-       /* Create WAL record */
-       split_xlog.lsize = lsize;
-       split_xlog.rsize = rsize;
-       split_xlog.lrightbound = lbound;
-       split_xlog.rrightbound = rbound;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &split_xlog;
-       rdata[0].len = sizeof(ginxlogSplitDataLeaf);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
-       rdata[1].len = lsize;
-       rdata[1].next = &rdata[2];
-
-       rdata[2].buffer = InvalidBuffer;
-       rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
-       rdata[2].len = rsize;
-       rdata[2].next = NULL;
-
-       *prdata = rdata;
 }
 
 /*
@@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
  *
  * In addition to inserting the given item, the downlink of the existing item
  * at 'off' is updated to point to 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
  */
 static GinPlaceToPageRC
 dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                                void *insertdata, BlockNumber updateblkno,
-                                               XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                               Page *newlpage, Page *newrpage)
 {
        Page            page = BufferGetPage(buf);
        OffsetNumber off = stack->off;
        PostingItem *pitem;
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata;
+       /* this must be static so it can be returned to caller */
        static ginxlogInsertDataInternal data;
 
        /* split if we have to */
        if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
        {
                dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
-                                                         prdata, newlpage, newrpage);
+                                                         newlpage, newrpage);
                return SPLIT;
        }
 
-       *prdata = &rdata;
        Assert(GinPageIsData(page));
 
        START_CRIT_SECTION();
@@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
        pitem = (PostingItem *) insertdata;
        GinDataPageAddPostingItem(page, pitem, off);
 
-       data.offset = off;
-       data.newitem = *pitem;
+       if (RelationNeedsWAL(btree->index))
+       {
+               data.offset = off;
+               data.newitem = *pitem;
 
-       rdata.buffer = buf;
-       rdata.buffer_std = TRUE;
-       rdata.data = (char *) &data;
-       rdata.len = sizeof(ginxlogInsertDataInternal);
-       rdata.next = NULL;
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) &data,
+                                                       sizeof(ginxlogInsertDataInternal));
+       }
 
        return INSERTED;
 }
@@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 static GinPlaceToPageRC
 dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                void *insertdata, BlockNumber updateblkno,
-                               XLogRecData **prdata,
                                Page *newlpage, Page *newrpage)
 {
        Page            page = BufferGetPage(buf);
@@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
 
        if (GinPageIsLeaf(page))
                return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
-                                                                  prdata, newlpage, newrpage);
+                                                                  newlpage, newrpage);
        else
                return dataPlaceToPageInternal(btree, buf, stack,
                                                                           insertdata, updateblkno,
-                                                                          prdata, newlpage, newrpage);
+                                                                          newlpage, newrpage);
 }
 
 /*
@@ -1202,7 +1158,7 @@ static void
 dataSplitPageInternal(GinBtree btree, Buffer origbuf,
                                          GinBtreeStack *stack,
                                          void *insertdata, BlockNumber updateblkno,
-                                         XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                         Page *newlpage, Page *newrpage)
 {
        Page            oldpage = BufferGetPage(origbuf);
        OffsetNumber off = stack->off;
@@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
        Page            lpage;
        Page            rpage;
        OffsetNumber separator;
-
-       /* these must be static so they can be returned to caller */
-       static ginxlogSplitDataInternal data;
-       static XLogRecData rdata[4];
-       static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+       PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
 
        lpage = PageGetTempPage(oldpage);
        rpage = PageGetTempPage(oldpage);
        GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
        GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
 
-       *prdata = rdata;
-
        /*
         * First construct a new list of PostingItems, which includes all the old
         * items, and the new item.
@@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
        /* set up right bound for right page */
        *GinDataPageGetRightBound(rpage) = oldbound;
 
-       data.separator = separator;
-       data.nitem = nitems;
-       data.rightbound = oldbound;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogSplitDataInternal);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = (char *) allitems;
-       rdata[1].len = nitems * sizeof(PostingItem);
-       rdata[1].next = NULL;
-
        *newlpage = lpage;
        *newrpage = rpage;
 }
@@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                ginxlogCreatePostingTree data;
 
-               data.node = index->rd_node;
-               data.blkno = blkno;
                data.size = rootsize;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogCreatePostingTree);
-               rdata[0].next = &rdata[1];
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
 
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
-               rdata[1].len = rootsize;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+                                                rootsize);
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
                PageSetLSN(page, recptr);
        }
 
index 84dc1e228c1436e85b3b6c8589e594c7ce11616c..c73b15fc8c585df442d50a5c78bbc4c6f6513840 100644 (file)
@@ -22,7 +22,7 @@
 static void entrySplitPage(GinBtree btree, Buffer origbuf,
                           GinBtreeStack *stack,
                           void *insertPayload,
-                          BlockNumber updateblkno, XLogRecData **prdata,
+                          BlockNumber updateblkno,
                           Page *newlpage, Page *newrpage);
 
 /*
@@ -515,33 +515,36 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
  * On insertion to an internal node, in addition to inserting the given item,
  * the downlink of the existing item at 'off' is updated to point to
  * 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
  */
 static GinPlaceToPageRC
 entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                                 void *insertPayload, BlockNumber updateblkno,
-                                XLogRecData **prdata, Page *newlpage, Page *newrpage)
+                                Page *newlpage, Page *newrpage)
 {
        GinBtreeEntryInsertData *insertData = insertPayload;
        Page            page = BufferGetPage(buf);
        OffsetNumber off = stack->off;
        OffsetNumber placed;
-       int                     cnt = 0;
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata[3];
+       /*
+        * this must be static so it can be returned to caller. XXX: If we
+        * can rely on XLogRegisterData to copy the data, this isn't necessary
+        */
        static ginxlogInsertEntry data;
 
        /* quick exit if it doesn't fit */
        if (!entryIsEnoughSpace(btree, buf, off, insertData))
        {
                entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
-                                          prdata, newlpage, newrpage);
+                                          newlpage, newrpage);
                return SPLIT;
        }
 
        START_CRIT_SECTION();
 
-       *prdata = rdata;
        entryPreparePage(btree, page, off, insertData, updateblkno);
 
        placed = PageAddItem(page,
@@ -552,21 +555,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
                elog(ERROR, "failed to add item to index page in \"%s\"",
                         RelationGetRelationName(btree->index));
 
-       data.isDelete = insertData->isDelete;
-       data.offset = off;
-
-       rdata[cnt].buffer = buf;
-       rdata[cnt].buffer_std = true;
-       rdata[cnt].data = (char *) &data;
-       rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
-       rdata[cnt].next = &rdata[cnt + 1];
-       cnt++;
-
-       rdata[cnt].buffer = buf;
-       rdata[cnt].buffer_std = true;
-       rdata[cnt].data = (char *) insertData->entry;
-       rdata[cnt].len = IndexTupleSize(insertData->entry);
-       rdata[cnt].next = NULL;
+       if (RelationNeedsWAL(btree->index))
+       {
+               data.isDelete = insertData->isDelete;
+               data.offset = off;
+
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) &data,
+                                                       offsetof(ginxlogInsertEntry, tuple));
+               XLogRegisterBufData(0, (char *) insertData->entry,
+                                                       IndexTupleSize(insertData->entry));
+       }
 
        return INSERTED;
 }
@@ -581,7 +580,7 @@ static void
 entrySplitPage(GinBtree btree, Buffer origbuf,
                           GinBtreeStack *stack,
                           void *insertPayload,
-                          BlockNumber updateblkno, XLogRecData **prdata,
+                          BlockNumber updateblkno,
                           Page *newlpage, Page *newrpage)
 {
        GinBtreeEntryInsertData *insertData = insertPayload;
@@ -590,7 +589,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                                maxoff,
                                separator = InvalidOffsetNumber;
        Size            totalsize = 0;
-       Size            tupstoresize;
        Size            lsize = 0,
                                size;
        char       *ptr;
@@ -599,13 +597,8 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
        Page            lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Page            rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Size            pageSize = PageGetPageSize(lpage);
+       char            tupstore[2 * BLCKSZ];
 
-       /* these must be static so they can be returned to caller */
-       static XLogRecData rdata[2];
-       static ginxlogSplitEntry data;
-       static char tupstore[2 * BLCKSZ];
-
-       *prdata = rdata;
        entryPreparePage(btree, lpage, off, insertData, updateblkno);
 
        /*
@@ -638,7 +631,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                ptr += size;
                totalsize += size + sizeof(ItemIdData);
        }
-       tupstoresize = ptr - tupstore;
 
        /*
         * Initialize the left and right pages, and copy all the tuples back to
@@ -673,19 +665,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
                ptr += MAXALIGN(IndexTupleSize(itup));
        }
 
-       data.separator = separator;
-       data.nitem = maxoff;
-
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogSplitEntry);
-       rdata[0].next = &rdata[1];
-
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].data = tupstore;
-       rdata[1].len = tupstoresize;
-       rdata[1].next = NULL;
-
        *newlpage = lpage;
        *newrpage = rpage;
 }
index ed581977f54079e19e85ad51a7765ac09f073c6c..32c8cb1ae4288068ecdce97649097c1fc9346343 100644 (file)
@@ -106,26 +106,19 @@ writeListPage(Relation index, Buffer buffer,
 
        if (RelationNeedsWAL(index))
        {
-               XLogRecData rdata[2];
                ginxlogInsertListPage data;
                XLogRecPtr      recptr;
 
-               data.node = index->rd_node;
-               data.blkno = BufferGetBlockNumber(buffer);
                data.rightlink = rightlink;
                data.ntuples = ntuples;
 
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogInsertListPage);
-               rdata[0].next = rdata + 1;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
 
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].data = workspace;
-               rdata[1].len = size;
-               rdata[1].next = NULL;
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+               XLogRegisterBufData(0, workspace, size);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
                PageSetLSN(page, recptr);
        }
 
@@ -222,25 +215,22 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
        Buffer          metabuffer;
        Page            metapage;
        GinMetaPageData *metadata = NULL;
-       XLogRecData rdata[2];
        Buffer          buffer = InvalidBuffer;
        Page            page = NULL;
        ginxlogUpdateMeta data;
        bool            separateList = false;
        bool            needCleanup = false;
+       bool            needWal;
 
        if (collector->ntuples == 0)
                return;
 
+       needWal = RelationNeedsWAL(index);
+
        data.node = index->rd_node;
        data.ntuples = 0;
        data.newRightlink = data.prevTail = InvalidBlockNumber;
 
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].data = (char *) &data;
-       rdata[0].len = sizeof(ginxlogUpdateMeta);
-       rdata[0].next = NULL;
-
        metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
@@ -280,6 +270,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                memset(&sublist, 0, sizeof(GinMetaPageData));
                makeSublist(index, collector->tuples, collector->ntuples, &sublist);
 
+               if (needWal)
+                       XLogBeginInsert();
+
                /*
                 * metapage was unlocked, see above
                 */
@@ -312,14 +305,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                        LockBuffer(buffer, GIN_EXCLUSIVE);
                        page = BufferGetPage(buffer);
 
-                       rdata[0].next = rdata + 1;
-
-                       rdata[1].buffer = buffer;
-                       rdata[1].buffer_std = true;
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].next = NULL;
-
                        Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
 
                        START_CRIT_SECTION();
@@ -333,6 +318,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
 
                        metadata->nPendingPages += sublist.nPendingPages;
                        metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+                       if (needWal)
+                               XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
                }
        }
        else
@@ -345,6 +333,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                int                     i,
                                        tupsize;
                char       *ptr;
+               char       *collectordata;
 
                buffer = ReadBuffer(index, metadata->tail);
                LockBuffer(buffer, GIN_EXCLUSIVE);
@@ -353,16 +342,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                off = (PageIsEmpty(page)) ? FirstOffsetNumber :
                        OffsetNumberNext(PageGetMaxOffsetNumber(page));
 
-               rdata[0].next = rdata + 1;
-
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               ptr = rdata[1].data = (char *) palloc(collector->sumsize);
-               rdata[1].len = collector->sumsize;
-               rdata[1].next = NULL;
+               collectordata = ptr = (char *) palloc(collector->sumsize);
 
                data.ntuples = collector->ntuples;
 
+               if (needWal)
+                       XLogBeginInsert();
+
                START_CRIT_SECTION();
 
                /*
@@ -387,7 +373,12 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
                        off++;
                }
 
-               Assert((ptr - rdata[1].data) <= collector->sumsize);
+               Assert((ptr - collectordata) <= collector->sumsize);
+               if (needWal)
+               {
+                       XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+                       XLogRegisterBufData(1, collectordata, collector->sumsize);
+               }
 
                metadata->tailFreeSize = PageGetExactFreeSpace(page);
 
@@ -399,13 +390,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
         */
        MarkBufferDirty(metabuffer);
 
-       if (RelationNeedsWAL(index))
+       if (needWal)
        {
                XLogRecPtr      recptr;
 
                memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+               XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
                PageSetLSN(metapage, recptr);
 
                if (buffer != InvalidBuffer)
@@ -521,20 +515,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                int                     i;
                int64           nDeletedHeapTuples = 0;
                ginxlogDeleteListPages data;
-               XLogRecData rdata[1];
                Buffer          buffers[GIN_NDELETE_AT_ONCE];
 
-               data.node = index->rd_node;
-
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].data = (char *) &data;
-               rdata[0].len = sizeof(ginxlogDeleteListPages);
-               rdata[0].next = NULL;
-
                data.ndeleted = 0;
                while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
                {
-                       data.toDelete[data.ndeleted] = blknoToDelete;
                        buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
                        LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
                        page = BufferGetPage(buffers[data.ndeleted]);
@@ -557,6 +542,13 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                if (stats)
                        stats->pages_deleted += data.ndeleted;
 
+               /*
+                * This operation touches an unusually large number of pages, so
+                * prepare the XLogInsert machinery for that before entering the
+                * critical section.
+                */
+               XLogEnsureRecordSpace(data.ndeleted + 1, 0);
+
                START_CRIT_SECTION();
 
                metadata->head = blknoToDelete;
@@ -587,9 +579,17 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
                {
                        XLogRecPtr      recptr;
 
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+                       for (i = 0; i < data.ndeleted; i++)
+                               XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
+
                        memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+                       XLogRegisterData((char *) &data,
+                                                        sizeof(ginxlogDeleteListPages));
+
+                       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
                        PageSetLSN(metapage, recptr);
 
                        for (i = 0; i < data.ndeleted; i++)
index 370884ed17fee19a8bfda824ce27e5b18f405137..c1ad0fd8c4db24b244d71679c9418d22a3c01f83 100644 (file)
@@ -347,15 +347,13 @@ ginbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
                Page            page;
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
 
                page = BufferGetPage(RootBuffer);
                PageSetLSN(page, recptr);
index 1f8db9de6d94bb7cfaf0de4d5faa4da010d5468c..1f4bd64cd4d85f6939146741efb59fb36a729934 100644 (file)
@@ -603,19 +603,17 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
        {
                XLogRecPtr      recptr;
                ginxlogUpdateMeta data;
-               XLogRecData rdata;
 
                data.node = index->rd_node;
                data.ntuples = 0;
                data.newRightlink = data.prevTail = InvalidBlockNumber;
                memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
 
-               rdata.buffer = InvalidBuffer;
-               rdata.data = (char *) &data;
-               rdata.len = sizeof(ginxlogUpdateMeta);
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
                PageSetLSN(metapage, recptr);
        }
 
index 3a61321a835d984a7cc9365bfadc63d9a884918a..6a00383622b7ce6978f391f3c4b9ebb487f09b2c 100644 (file)
@@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
 {
        Page            page = BufferGetPage(buffer);
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
-       ginxlogVacuumPage xlrec;
-       uint16          lower;
-       uint16          upper;
 
        /* This is only used for entry tree leaf pages. */
        Assert(!GinPageIsData(page));
@@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
        if (!RelationNeedsWAL(index))
                return;
 
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
-
-       /* Assume we can omit data between pd_lower and pd_upper */
-       lower = ((PageHeader) page)->pd_lower;
-       upper = ((PageHeader) page)->pd_upper;
-
-       Assert(lower < BLCKSZ);
-       Assert(upper < BLCKSZ);
-
-       if (lower >= SizeOfPageHeaderData &&
-               upper > lower &&
-               upper <= BLCKSZ)
-       {
-               xlrec.hole_offset = lower;
-               xlrec.hole_length = upper - lower;
-       }
-       else
-       {
-               /* No "hole" to compress out */
-               xlrec.hole_offset = 0;
-               xlrec.hole_length = 0;
-       }
-
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(ginxlogVacuumPage);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &rdata[1];
-
-       if (xlrec.hole_length == 0)
-       {
-               rdata[1].data = (char *) page;
-               rdata[1].len = BLCKSZ;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
-       }
-       else
-       {
-               /* must skip the hole */
-               rdata[1].data = (char *) page;
-               rdata[1].len = xlrec.hole_offset;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &rdata[2];
-
-               rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
-               rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
-               rdata[2].buffer = InvalidBuffer;
-               rdata[2].next = NULL;
-       }
+       /*
+        * Always create a full-page, we don't track the changes on the page
+        * at any more fine-grained level. This could obviously be improved...
+        */
+       XLogBeginInsert();
+       XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
 
-       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
+       recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
        PageSetLSN(page, recptr);
 }
 
@@ -292,48 +245,26 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
        if (RelationNeedsWAL(gvs->index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
                ginxlogDeletePage data;
 
-               data.node = gvs->index->rd_node;
-               data.blkno = deleteBlkno;
-               data.parentBlkno = parentBlkno;
+               /*
+                * We can't pass REGBUF_STANDARD for the deleted page, because we
+                * didn't set pd_lower on pre-9.4 versions. The page might've been
+                * binary-upgraded from an older version, and hence not have pd_lower
+                * set correctly. Ditto for the left page, but removing the item from
+                * the parent updated its pd_lower, so we know that's OK at this point.
+                */
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, dBuffer, 0);
+               XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, lBuffer, 0);
+
                data.parentOffset = myoff;
-               data.leftBlkno = leftBlkno;
                data.rightLink = GinPageGetOpaque(page)->rightlink;
 
-               /*
-                * We can't pass buffer_std = TRUE, because we didn't set pd_lower on
-                * pre-9.4 versions. The page might've been binary-upgraded from an
-                * older version, and hence not have pd_lower set correctly. Ditto for
-                * the left page, but removing the item from the parent updated its
-                * pd_lower, so we know that's OK at this point.
-                */
-               rdata[0].buffer = dBuffer;
-               rdata[0].buffer_std = FALSE;
-               rdata[0].data = NULL;
-               rdata[0].len = 0;
-               rdata[0].next = rdata + 1;
-
-               rdata[1].buffer = pBuffer;
-               rdata[1].buffer_std = TRUE;
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].next = rdata + 2;
-
-               rdata[2].buffer = lBuffer;
-               rdata[2].buffer_std = FALSE;
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].next = rdata + 3;
-
-               rdata[3].buffer = InvalidBuffer;
-               rdata[3].buffer_std = FALSE;
-               rdata[3].len = sizeof(ginxlogDeletePage);
-               rdata[3].data = (char *) &data;
-               rdata[3].next = NULL;
-
-               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
+               XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+               recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
                PageSetLSN(page, recptr);
                PageSetLSN(parentPage, recptr);
                PageSetLSN(BufferGetPage(lBuffer), recptr);
index d0553bb8f729753bf39099a3841105a107274559..a7a66d885992590b32793b0c71b751e69299f7e7 100644 (file)
 static MemoryContext opCtx;            /* working memory for operations */
 
 static void
-ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
-                                                       int block_index,
-                                                       RelFileNode node, BlockNumber blkno)
+ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
 {
        Buffer          buffer;
        Page            page;
 
-       if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, block_id, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
-
                GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
 
                PageSetLSN(page, lsn);
@@ -44,16 +40,12 @@ ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
 static void
 ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
        Buffer          RootBuffer,
                                MetaBuffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
-       Assert(BufferIsValid(MetaBuffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &MetaBuffer);
+       Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
        page = (Page) BufferGetPage(MetaBuffer);
 
        GinInitMetabuffer(MetaBuffer);
@@ -61,8 +53,8 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
        PageSetLSN(page, lsn);
        MarkBufferDirty(MetaBuffer);
 
-       RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
-       Assert(BufferIsValid(RootBuffer));
+       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &RootBuffer);
+       Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
        page = (Page) BufferGetPage(RootBuffer);
 
        GinInitBuffer(RootBuffer, GIN_LEAF);
@@ -82,11 +74,7 @@ ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_ptree records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(data->node, data->blkno, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = (Page) BufferGetPage(buffer);
 
        GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
@@ -332,31 +320,35 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
 {
        ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
        Buffer          buffer;
-       char       *payload;
+#ifdef NOT_USED
        BlockNumber leftChildBlkno = InvalidBlockNumber;
+#endif
        BlockNumber rightChildBlkno = InvalidBlockNumber;
        bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
 
-       payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
-
        /*
         * First clear incomplete-split flag on child page if this finishes a
         * split.
         */
        if (!isLeaf)
        {
+               char       *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+#ifdef NOT_USED
                leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+#endif
                payload += sizeof(BlockIdData);
                rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
                payload += sizeof(BlockIdData);
 
-               ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
+               ginRedoClearIncompleteSplit(lsn, record, 1);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
-                                                         data->blkno, &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                Page            page = BufferGetPage(buffer);
+               Size            len;
+               char       *payload = XLogRecGetBlockData(record, 0, &len);
 
                /* How to insert the payload is tree-type specific */
                if (data->flags & GIN_INSERT_ISDATA)
@@ -377,162 +369,34 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 }
 
-static void
-ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
-{
-       ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
-       IndexTuple      itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
-       OffsetNumber i;
-
-       for (i = 0; i < data->separator; i++)
-       {
-               if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-                       elog(ERROR, "failed to add item to gin index page");
-               itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-       }
-
-       for (i = data->separator; i < data->nitem; i++)
-       {
-               if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
-                       elog(ERROR, "failed to add item to gin index page");
-               itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
-       }
-}
-
-static void
-ginRedoSplitData(Page lpage, Page rpage, void *rdata)
-{
-       bool            isleaf = GinPageIsLeaf(lpage);
-
-       if (isleaf)
-       {
-               ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
-               Pointer         lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
-               Pointer         rptr = lptr + data->lsize;
-
-               Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
-               Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
-
-               memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
-               memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
-
-               GinDataPageSetDataSize(lpage, data->lsize);
-               GinDataPageSetDataSize(rpage, data->rsize);
-               *GinDataPageGetRightBound(lpage) = data->lrightbound;
-               *GinDataPageGetRightBound(rpage) = data->rrightbound;
-       }
-       else
-       {
-               ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
-               PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
-               OffsetNumber i;
-               OffsetNumber maxoff;
-
-               for (i = 0; i < data->separator; i++)
-                       GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
-               for (i = data->separator; i < data->nitem; i++)
-                       GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
-
-               /* set up right key */
-               maxoff = GinPageGetOpaque(lpage)->maxoff;
-               *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
-               *GinDataPageGetRightBound(rpage) = data->rightbound;
-       }
-}
-
 static void
 ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 {
        ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
        Buffer          lbuffer,
-                               rbuffer;
-       Page            lpage,
-                               rpage;
-       uint32          flags;
-       uint32          lflags,
-                               rflags;
-       char       *payload;
+                               rbuffer,
+                               rootbuf;
        bool            isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
-       bool            isData = (data->flags & GIN_INSERT_ISDATA) != 0;
        bool            isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
 
-       payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
-
        /*
         * First clear incomplete-split flag on child page if this finishes a
         * split
         */
        if (!isLeaf)
-               ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
-
-       flags = 0;
-       if (isLeaf)
-               flags |= GIN_LEAF;
-       if (isData)
-               flags |= GIN_DATA;
-       if (isLeaf && isData)
-               flags |= GIN_COMPRESSED;
-
-       lflags = rflags = flags;
-       if (!isRoot)
-               lflags |= GIN_INCOMPLETE_SPLIT;
-
-       lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
-       Assert(BufferIsValid(lbuffer));
-       lpage = (Page) BufferGetPage(lbuffer);
-       GinInitBuffer(lbuffer, lflags);
-
-       rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
-       Assert(BufferIsValid(rbuffer));
-       rpage = (Page) BufferGetPage(rbuffer);
-       GinInitBuffer(rbuffer, rflags);
-
-       GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
-       GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
-
-       /* Do the tree-type specific portion to restore the page contents */
-       if (isData)
-               ginRedoSplitData(lpage, rpage, payload);
-       else
-               ginRedoSplitEntry(lpage, rpage, payload);
+               ginRedoClearIncompleteSplit(lsn, record, 3);
 
-       PageSetLSN(rpage, lsn);
-       MarkBufferDirty(rbuffer);
+       if (XLogReadBufferForRedo(lsn, record, 0, &lbuffer) != BLK_RESTORED)
+               elog(ERROR, "GIN split record did not contain a full-page image of left page");
 
-       PageSetLSN(lpage, lsn);
-       MarkBufferDirty(lbuffer);
+       if (XLogReadBufferForRedo(lsn, record, 1, &rbuffer) != BLK_RESTORED)
+               elog(ERROR, "GIN split record did not contain a full-page image of right page");
 
        if (isRoot)
        {
-               BlockNumber rootBlkno = data->rrlink;
-               Buffer          rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
-               Page            rootPage = BufferGetPage(rootBuf);
-
-               GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
-
-               if (isData)
-               {
-                       Assert(rootBlkno != GIN_ROOT_BLKNO);
-                       ginDataFillRoot(NULL, BufferGetPage(rootBuf),
-                                                       BufferGetBlockNumber(lbuffer),
-                                                       BufferGetPage(lbuffer),
-                                                       BufferGetBlockNumber(rbuffer),
-                                                       BufferGetPage(rbuffer));
-               }
-               else
-               {
-                       Assert(rootBlkno == GIN_ROOT_BLKNO);
-                       ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
-                                                        BufferGetBlockNumber(lbuffer),
-                                                        BufferGetPage(lbuffer),
-                                                        BufferGetBlockNumber(rbuffer),
-                                                        BufferGetPage(rbuffer));
-               }
-
-               PageSetLSN(rootPage, lsn);
-
-               MarkBufferDirty(rootBuf);
-               UnlockReleaseBuffer(rootBuf);
+               if (XLogReadBufferForRedo(lsn, record, 2, &rootbuf) != BLK_RESTORED)
+                       elog(ERROR, "GIN split record did not contain a full-page image of root page");
+               UnlockReleaseBuffer(rootbuf);
        }
 
        UnlockReleaseBuffer(rbuffer);
@@ -546,52 +410,27 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
 static void
 ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
 {
-       ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
-       char       *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
        Buffer          buffer;
-       Page            page;
-
-       Assert(xlrec->hole_offset < BLCKSZ);
-       Assert(xlrec->hole_length < BLCKSZ);
-
-       /* Backup blocks are not used, we'll re-initialize the page always. */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
 
-       buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
-       if (!BufferIsValid(buffer))
-               return;
-       page = (Page) BufferGetPage(buffer);
-
-       if (xlrec->hole_length == 0)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) != BLK_RESTORED)
        {
-               memcpy((char *) page, blk, BLCKSZ);
+               elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
        }
-       else
-       {
-               memcpy((char *) page, blk, xlrec->hole_offset);
-               /* must zero-fill the hole */
-               MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
-               memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
-                          blk + xlrec->hole_offset,
-                          BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
-       }
-
-       PageSetLSN(page, lsn);
-
-       MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 }
 
 static void
 ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
 {
-       ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
        Buffer          buffer;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                Page            page = BufferGetPage(buffer);
+               Size            len;
+               ginxlogVacuumDataLeafPage *xlrec;
+
+               xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
 
                Assert(GinPageIsLeaf(page));
                Assert(GinPageIsData(page));
@@ -613,22 +452,18 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
        Buffer          lbuffer;
        Page            page;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &dbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(dbuffer);
-
                Assert(GinPageIsData(page));
                GinPageGetOpaque(page)->flags = GIN_DELETED;
                PageSetLSN(page, lsn);
                MarkBufferDirty(dbuffer);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
-                                                         &pbuffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 1, &pbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(pbuffer);
-
                Assert(GinPageIsData(page));
                Assert(!GinPageIsLeaf(page));
                GinPageDeletePostingItem(page, data->parentOffset);
@@ -636,11 +471,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
                MarkBufferDirty(pbuffer);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
-                                                         &lbuffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 2, &lbuffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(lbuffer);
-
                Assert(GinPageIsData(page));
                GinPageGetOpaque(page)->rightlink = data->rightLink;
                PageSetLSN(page, lsn);
@@ -668,9 +501,8 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
         * image, so restore the metapage unconditionally without looking at the
         * LSN, to avoid torn page hazards.
         */
-       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
-       if (!BufferIsValid(metabuffer))
-               return;                                 /* assume index was deleted, nothing to do */
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &metabuffer);
+       Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
        memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
@@ -682,17 +514,18 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * insert into tail page
                 */
-               if (XLogReadBufferForRedo(lsn, record, 0, data->node,
-                                                                 data->metadata.tail, &buffer)
-                       == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = BufferGetPage(buffer);
                        OffsetNumber off;
                        int                     i;
                        Size            tupsize;
+                       char       *payload;
                        IndexTuple      tuples;
+                       Size            totaltupsize;
 
-                       tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+                       payload = XLogRecGetBlockData(record, 1, &totaltupsize);
+                       tuples = (IndexTuple) payload;
 
                        if (PageIsEmpty(page))
                                off = FirstOffsetNumber;
@@ -711,6 +544,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
 
                                off++;
                        }
+                       Assert(payload + totaltupsize == (char *) tuples);
 
                        /*
                         * Increase counter of heap tuples
@@ -728,8 +562,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * New tail
                 */
-               if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = BufferGetPage(buffer);
 
@@ -755,15 +588,12 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                                off = FirstOffsetNumber;
        int                     i,
                                tupsize;
-       IndexTuple      tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
-
-       /*
-        * Backup blocks are not used, we always re-initialize the page.
-        */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       char       *payload;
+       IndexTuple      tuples;
+       Size            totaltupsize;
 
-       buffer = XLogReadBuffer(data->node, data->blkno, true);
-       Assert(BufferIsValid(buffer));
+       /* We always re-initialize the page. */
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = BufferGetPage(buffer);
 
        GinInitBuffer(buffer, GIN_LIST);
@@ -779,6 +609,9 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                GinPageGetOpaque(page)->maxoff = 0;
        }
 
+       payload = XLogRecGetBlockData(record, 0, &totaltupsize);
+
+       tuples = (IndexTuple) payload;
        for (i = 0; i < data->ntuples; i++)
        {
                tupsize = IndexTupleSize(tuples);
@@ -791,6 +624,7 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
                tuples = (IndexTuple) (((char *) tuples) + tupsize);
                off++;
        }
+       Assert((char *) tuples == payload + totaltupsize);
 
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
@@ -806,14 +640,12 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
        Page            metapage;
        int                     i;
 
-       /* Backup blocks are not used in delete_listpage records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
-       if (!BufferIsValid(metabuffer))
-               return;                                 /* assume index was deleted, nothing to do */
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &metabuffer);
+       Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
        metapage = BufferGetPage(metabuffer);
 
+       GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
+
        memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
        PageSetLSN(metapage, lsn);
        MarkBufferDirty(metabuffer);
@@ -838,7 +670,8 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
                Buffer          buffer;
                Page            page;
 
-               buffer = XLogReadBuffer(data->node, data->toDelete[i], true);
+               XLogReadBufferForRedoExtended(lsn, record, i + 1, RBM_ZERO, false,
+                                                                         &buffer);
                page = BufferGetPage(buffer);
                GinInitBuffer(buffer, GIN_DELETED);
 
index 644b882b7d4ba44abaeeafa5744b79fb4cf0ec5e..2141045f994be5ff8089071aad1c254fa21adfa6 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "access/genam.h"
 #include "access/gist_private.h"
+#include "access/xloginsert.h"
 #include "catalog/index.h"
 #include "catalog/pg_collation.h"
 #include "miscadmin.h"
@@ -394,6 +395,14 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
                        GistPageSetNSN(ptr->page, oldnsn);
                }
 
+               /*
+                * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
+                * insertion for that. NB: The number of pages and data segments
+                * specified here must match the calculations in gistXLogSplit()!
+                */
+               if (RelationNeedsWAL(rel))
+                       XLogEnsureRecordSpace(npage, 1 + npage * 2);
+
                START_CRIT_SECTION();
 
                /*
index 2143096c66b50d16e604e7aff8c80dc20e06bbb2..5acc986585a8e2d6aeac800f867fdf8899f4c5a9 100644 (file)
@@ -183,14 +183,11 @@ gistbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
 
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
 
-               recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+               recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
                PageSetLSN(page, recptr);
        }
        else
index 2999d211916861cbbaeff9f3928e1403ecd1ae1a..ed95ea2b2d437676f5bdf757fcc7deb5c6b5ae5b 100644 (file)
 #include "access/xlogutils.h"
 #include "utils/memutils.h"
 
-typedef struct
-{
-       gistxlogPage *header;
-       IndexTuple *itup;
-} NewPage;
-
-typedef struct
-{
-       gistxlogPageSplit *data;
-       NewPage    *page;
-} PageSplitRecord;
-
 static MemoryContext opCtx;            /* working memory for operations */
 
 /*
@@ -44,8 +32,7 @@ static MemoryContext opCtx;           /* working memory for operations */
  * action.)
  */
 static void
-gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
-                                                RelFileNode node, BlockNumber childblkno)
+gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
 {
        Buffer          buffer;
        Page            page;
@@ -55,8 +42,7 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
         * Note that we still update the page even if it was restored from a full
         * page image, because the updated NSN is not included in the image.
         */
-       action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
-                                                                  &buffer);
+       action = XLogReadBufferForRedo(lsn, record, block_id, &buffer);
        if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
        {
                page = BufferGetPage(buffer);
@@ -77,18 +63,20 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
 static void
 gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
 {
-       char       *begin = XLogRecGetData(record);
-       gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
+       gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       char       *data;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(buffer);
+               char       *begin;
+               char       *data;
+               Size            datalen;
+               int                     ninserted = 0;
 
-               data = begin + sizeof(gistxlogPageUpdate);
+               data = begin = XLogRecGetBlockData(record, 0, &datalen);
+
+               page = (Page) BufferGetPage(buffer);
 
                /* Delete old tuples */
                if (xldata->ntodelete > 0)
@@ -105,12 +93,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
                }
 
                /* add tuples */
-               if (data - begin < record->xl_len)
+               if (data - begin < datalen)
                {
                        OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
                        OffsetNumberNext(PageGetMaxOffsetNumber(page));
 
-                       while (data - begin < record->xl_len)
+                       while (data - begin < datalen)
                        {
                                IndexTuple      itup = (IndexTuple) data;
                                Size            sz = IndexTupleSize(itup);
@@ -123,9 +111,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
                                        elog(ERROR, "failed to add item to GiST index page, size %d bytes",
                                                 (int) sz);
                                off++;
+                               ninserted++;
                        }
                }
 
+               Assert(ninserted == xldata->ntoinsert);
+
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -137,58 +128,50 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
         * that even if the target page no longer exists, we still attempt to
         * replay the change on the child page.
         */
-       if (BlockNumberIsValid(xldata->leftchild))
-               gistRedoClearFollowRight(lsn, record, 1,
-                                                                xldata->node, xldata->leftchild);
+       if (XLogRecHasBlockRef(record, 1))
+               gistRedoClearFollowRight(lsn, record, 1);
 
        if (BufferIsValid(buffer))
                UnlockReleaseBuffer(buffer);
 }
 
-static void
-decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
+/*
+ * Returns an array of index pointers.
+ */
+static IndexTuple *
+decodePageSplitRecord(char *begin, int len, int *n)
 {
-       char       *begin = XLogRecGetData(record),
-                          *ptr;
-       int                     j,
-                               i = 0;
+       char       *ptr;
+       int                     i = 0;
+       IndexTuple *tuples;
+
+       /* extract the number of tuples */
+       memcpy(n, begin, sizeof(int));
+       ptr = begin + sizeof(int);
 
-       decoded->data = (gistxlogPageSplit *) begin;
-       decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
+       tuples = palloc(*n * sizeof(IndexTuple));
 
-       ptr = begin + sizeof(gistxlogPageSplit);
-       for (i = 0; i < decoded->data->npage; i++)
+       for (i = 0; i < *n; i++)
        {
-               Assert(ptr - begin < record->xl_len);
-               decoded->page[i].header = (gistxlogPage *) ptr;
-               ptr += sizeof(gistxlogPage);
-
-               decoded->page[i].itup = (IndexTuple *)
-                       palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
-               j = 0;
-               while (j < decoded->page[i].header->num)
-               {
-                       Assert(ptr - begin < record->xl_len);
-                       decoded->page[i].itup[j] = (IndexTuple) ptr;
-                       ptr += IndexTupleSize((IndexTuple) ptr);
-                       j++;
-               }
+               Assert(ptr - begin < len);
+               tuples[i] = (IndexTuple) ptr;
+               ptr += IndexTupleSize((IndexTuple) ptr);
        }
+       Assert(ptr - begin == len);
+
+       return tuples;
 }
 
 static void
 gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
 {
        gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
-       PageSplitRecord xlrec;
        Buffer          firstbuffer = InvalidBuffer;
        Buffer          buffer;
        Page            page;
        int                     i;
        bool            isrootsplit = false;
 
-       decodePageSplitRecord(&xlrec, record);
-
        /*
         * We must hold lock on the first-listed page throughout the action,
         * including while updating the left child page (if any).  We can unlock
@@ -198,32 +181,40 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* loop around all pages */
-       for (i = 0; i < xlrec.data->npage; i++)
+       for (i = 0; i < xldata->npage; i++)
        {
-               NewPage    *newpage = xlrec.page + i;
                int                     flags;
-
-               if (newpage->header->blkno == GIST_ROOT_BLKNO)
+               char       *data;
+               Size            datalen;
+               int                     num;
+               BlockNumber     blkno;
+               IndexTuple *tuples;
+
+               XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
+               if (blkno == GIST_ROOT_BLKNO)
                {
                        Assert(i == 0);
                        isrootsplit = true;
                }
 
-               buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
-               Assert(BufferIsValid(buffer));
+               XLogReadBufferForRedoExtended(lsn, record, i + 1, RBM_ZERO, false,
+                                                                         &buffer);
                page = (Page) BufferGetPage(buffer);
+               data = XLogRecGetBlockData(record, i + 1, &datalen);
+
+               tuples = decodePageSplitRecord(data, datalen, &num);
 
                /* ok, clear buffer */
-               if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
+               if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
                        flags = F_LEAF;
                else
                        flags = 0;
                GISTInitBuffer(buffer, flags);
 
                /* and fill it */
-               gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
+               gistfillbuffer(page, tuples, num, FirstOffsetNumber);
 
-               if (newpage->header->blkno == GIST_ROOT_BLKNO)
+               if (blkno == GIST_ROOT_BLKNO)
                {
                        GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
                        GistPageSetNSN(page, xldata->orignsn);
@@ -231,12 +222,17 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
                }
                else
                {
-                       if (i < xlrec.data->npage - 1)
-                               GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
+                       if (i < xldata->npage - 1)
+                       {
+                               BlockNumber nextblkno;
+
+                               XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
+                               GistPageGetOpaque(page)->rightlink = nextblkno;
+                       }
                        else
                                GistPageGetOpaque(page)->rightlink = xldata->origrlink;
                        GistPageSetNSN(page, xldata->orignsn);
-                       if (i < xlrec.data->npage - 1 && !isrootsplit &&
+                       if (i < xldata->npage - 1 && !isrootsplit &&
                                xldata->markfollowright)
                                GistMarkFollowRight(page);
                        else
@@ -253,9 +249,8 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Fix follow-right data on left child page, if any */
-       if (BlockNumberIsValid(xldata->leftchild))
-               gistRedoClearFollowRight(lsn, record, 0,
-                                                                xldata->node, xldata->leftchild);
+       if (XLogRecHasBlockRef(record, 0))
+               gistRedoClearFollowRight(lsn, record, 0);
 
        /* Finally, release lock on the first page */
        UnlockReleaseBuffer(firstbuffer);
@@ -264,15 +259,11 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
 static void
 gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
+       Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
        page = (Page) BufferGetPage(buffer);
 
        GISTInitBuffer(buffer, F_LEAF);
@@ -336,70 +327,49 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
                          BlockNumber origrlink, GistNSN orignsn,
                          Buffer leftchildbuf, bool markfollowright)
 {
-       XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
        gistxlogPageSplit xlrec;
        SplitedPageLayout *ptr;
-       int                     npage = 0,
-                               cur;
+       int                     npage = 0;
        XLogRecPtr      recptr;
+       int                     i;
 
        for (ptr = dist; ptr; ptr = ptr->next)
                npage++;
 
-       /*
-        * the caller should've checked this already, but doesn't hurt to check
-        * again.
-        */
-       if (npage > GIST_MAX_SPLIT_PAGES)
-               elog(ERROR, "GiST page split into too many halves");
-
-       xlrec.node = node;
-       xlrec.origblkno = blkno;
        xlrec.origrlink = origrlink;
        xlrec.orignsn = orignsn;
        xlrec.origleaf = page_is_leaf;
        xlrec.npage = (uint16) npage;
-       xlrec.leftchild =
-               BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
        xlrec.markfollowright = markfollowright;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(gistxlogPageSplit);
-       rdata[0].buffer = InvalidBuffer;
-
-       cur = 1;
+       XLogBeginInsert();
 
        /*
         * Include a full page image of the child buf. (only necessary if a
         * checkpoint happened since the child page was split)
         */
        if (BufferIsValid(leftchildbuf))
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = NULL;
-               rdata[cur].len = 0;
-               rdata[cur].buffer = leftchildbuf;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
+               XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
 
+       /*
+        * NOTE: We register a lot of data. The caller must've called
+        * XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
+        * because we're already in a critical section. If you change the number
+        * of buffer or data registrations here, make sure you modify the
+        * XLogEnsureRecordSpace() calls accordingly!
+        */
+       XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
+
+       i = 1;
        for (ptr = dist; ptr; ptr = ptr->next)
        {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].buffer = InvalidBuffer;
-               rdata[cur].data = (char *) &(ptr->block);
-               rdata[cur].len = sizeof(gistxlogPage);
-               cur++;
-
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].buffer = InvalidBuffer;
-               rdata[cur].data = (char *) (ptr->list);
-               rdata[cur].len = ptr->lenlist;
-               cur++;
+               XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
+               XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
+               XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
+               i++;
        }
-       rdata[cur - 1].next = NULL;
 
-       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
+       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
 
        return recptr;
 }
@@ -413,9 +383,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
  *
  * Note that both the todelete array and the tuples are marked as belonging
  * to the target buffer; they need not be stored in XLOG if XLogInsert decides
- * to log the whole buffer contents instead.  Also, we take care that there's
- * at least one rdata item referencing the buffer, even when ntodelete and
- * ituplen are both zero; this ensures that XLogInsert knows about the buffer.
+ * to log the whole buffer contents instead.
  */
 XLogRecPtr
 gistXLogUpdate(RelFileNode node, Buffer buffer,
@@ -423,57 +391,31 @@ gistXLogUpdate(RelFileNode node, Buffer buffer,
                           IndexTuple *itup, int ituplen,
                           Buffer leftchildbuf)
 {
-       XLogRecData rdata[MaxIndexTuplesPerPage + 3];
        gistxlogPageUpdate xlrec;
-       int                     cur,
-                               i;
+       int                     i;
        XLogRecPtr      recptr;
 
-       xlrec.node = node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.ntodelete = ntodelete;
-       xlrec.leftchild =
-               BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
-
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = sizeof(gistxlogPageUpdate);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       xlrec.ntoinsert = ituplen;
 
-       rdata[1].data = (char *) todelete;
-       rdata[1].len = sizeof(OffsetNumber) * ntodelete;
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
 
-       cur = 2;
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+       XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
 
        /* new tuples */
        for (i = 0; i < ituplen; i++)
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = (char *) (itup[i]);
-               rdata[cur].len = IndexTupleSize(itup[i]);
-               rdata[cur].buffer = buffer;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
+               XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
 
        /*
         * Include a full page image of the child buf. (only necessary if a
         * checkpoint happened since the child page was split)
         */
        if (BufferIsValid(leftchildbuf))
-       {
-               rdata[cur - 1].next = &(rdata[cur]);
-               rdata[cur].data = NULL;
-               rdata[cur].len = 0;
-               rdata[cur].buffer = leftchildbuf;
-               rdata[cur].buffer_std = true;
-               cur++;
-       }
-       rdata[cur - 1].next = NULL;
+               XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
 
-       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
+       recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
 
        return recptr;
 }
index 43098f444224a087d72543db9dce8da43fe28ef2..3382d61ebb614db0e3c431909f3a2f2ca627c3ed 100644 (file)
@@ -2132,84 +2132,64 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
                Page            page = BufferGetPage(buffer);
                uint8           info = XLOG_HEAP_INSERT;
-               bool            need_tuple_data;
+               int                     bufflags = 0;
 
                /*
-                * For logical decoding, we need the tuple even if we're doing a full
-                * page write, so make sure to log it separately. (XXX We could
-                * alternatively store a pointer into the FPW).
-                *
-                * Also, if this is a catalog, we need to transmit combocids to
-                * properly decode, so log that as well.
+                * If this is a catalog, we need to transmit combocids to properly
+                * decode, so log that as well.
                 */
-               need_tuple_data = RelationIsLogicallyLogged(relation);
                if (RelationIsAccessibleInLogicalDecoding(relation))
                        log_heap_new_cid(relation, heaptup);
 
-               xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = heaptup->t_self;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapInsert;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
-               xlhdr.t_infomask = heaptup->t_data->t_infomask;
-               xlhdr.t_hoff = heaptup->t_data->t_hoff;
-
                /*
-                * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
-                * to write the whole page to the xlog, we don't need to store
-                * xl_heap_header in the xlog.
+                * If this is the single and first tuple on page, we can reinit the
+                * page instead of restoring the whole thing.  Set flag, and hide
+                * buffer references from XLogInsert.
                 */
-               rdata[1].data = (char *) &xlhdr;
-               rdata[1].len = SizeOfHeapHeader;
-               rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = &(rdata[2]);
-
-               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-               rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
-               rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
-               rdata[2].buffer = need_tuple_data ? InvalidBuffer : buffer;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
+               if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
+                       PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+               {
+                       info |= XLOG_HEAP_INIT_PAGE;
+                       bufflags |= REGBUF_WILL_INIT;
+               }
 
                /*
-                * Make a separate rdata entry for the tuple's buffer if we're doing
-                * logical decoding, so that an eventual FPW doesn't remove the
-                * tuple's data.
+                * For logical decoding, we need the tuple even if we're doing a full
+                * page write, so make sure it's included even if we take a full-page
+                * image. (XXX We could alternatively store a pointer into the FPW).
                 */
-               if (need_tuple_data)
+               if (RelationIsLogicallyLogged(relation))
                {
-                       rdata[2].next = &(rdata[3]);
-
-                       rdata[3].data = NULL;
-                       rdata[3].len = 0;
-                       rdata[3].buffer = buffer;
-                       rdata[3].buffer_std = true;
-                       rdata[3].next = NULL;
-
                        xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+                       bufflags |= REGBUF_KEEP_DATA;
                }
 
+               xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
+               xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+               Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
+
+               xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
+               xlhdr.t_infomask = heaptup->t_data->t_infomask;
+               xlhdr.t_hoff = heaptup->t_data->t_hoff;
+
                /*
-                * If this is the single and first tuple on page, we can reinit the
-                * page instead of restoring the whole thing.  Set flag, and hide
-                * buffer references from XLogInsert.
+                * note we mark xlhdr as belonging to buffer; if XLogInsert decides
+                * to write the whole page to the xlog, we don't need to store
+                * xl_heap_header in the xlog.
                 */
-               if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
-                       PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
-               {
-                       info |= XLOG_HEAP_INIT_PAGE;
-                       rdata[1].buffer = rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
-               }
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+               XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
+               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+               XLogRegisterBufData(0,
+                        (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+                        heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits));
 
-               recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, info);
 
                PageSetLSN(page, recptr);
        }
@@ -2397,6 +2377,13 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                break;
 
                        RelationPutHeapTuple(relation, buffer, heaptup);
+
+                       /*
+                        * We don't use heap_multi_insert for catalog tuples yet, but
+                        * better be prepared...
+                        */
+                       if (needwal && need_cids)
+                               log_heap_new_cid(relation, heaptup);
                }
 
                if (PageIsAllVisible(page))
@@ -2419,12 +2406,12 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                {
                        XLogRecPtr      recptr;
                        xl_heap_multi_insert *xlrec;
-                       XLogRecData rdata[3];
                        uint8           info = XLOG_HEAP2_MULTI_INSERT;
                        char       *tupledata;
                        int                     totaldatalen;
                        char       *scratchptr = scratch;
                        bool            init;
+                       int                     bufflags = 0;
 
                        /*
                         * If the page was previously empty, we can reinit the page
@@ -2450,8 +2437,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                        tupledata = scratchptr;
 
                        xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
-                       xlrec->node = relation->rd_node;
-                       xlrec->blkno = BufferGetBlockNumber(buffer);
                        xlrec->ntuples = nthispage;
 
                        /*
@@ -2481,64 +2466,39 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                           datalen);
                                tuphdr->datalen = datalen;
                                scratchptr += datalen;
-
-                               /*
-                                * We don't use heap_multi_insert for catalog tuples yet, but
-                                * better be prepared...
-                                */
-                               if (need_cids)
-                                       log_heap_new_cid(relation, heaptup);
                        }
                        totaldatalen = scratchptr - tupledata;
                        Assert((scratchptr - scratch) < BLCKSZ);
 
-                       rdata[0].data = (char *) xlrec;
-                       rdata[0].len = tupledata - scratch;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &rdata[1];
-
-                       rdata[1].data = tupledata;
-                       rdata[1].len = totaldatalen;
-                       rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
-
-                       /*
-                        * Make a separate rdata entry for the tuple's buffer if we're
-                        * doing logical decoding, so that an eventual FPW doesn't remove
-                        * the tuple's data.
-                        */
                        if (need_tuple_data)
-                       {
-                               rdata[1].next = &(rdata[2]);
-
-                               rdata[2].data = NULL;
-                               rdata[2].len = 0;
-                               rdata[2].buffer = buffer;
-                               rdata[2].buffer_std = true;
-                               rdata[2].next = NULL;
                                xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
-                       }
 
                        /*
-                        * If we're going to reinitialize the whole page using the WAL
-                        * record, hide buffer reference from XLogInsert.
+                        * Signal that this is the last xl_heap_multi_insert record
+                        * emitted by this call to heap_multi_insert(). Needed for logical
+                        * decoding so it knows when to cleanup temporary data.
                         */
+                       if (ndone + nthispage == ntuples)
+                               xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+
                        if (init)
                        {
-                               rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
                                info |= XLOG_HEAP_INIT_PAGE;
+                               bufflags |= REGBUF_WILL_INIT;
                        }
-
                        /*
-                        * Signal that this is the last xl_heap_multi_insert record
-                        * emitted by this call to heap_multi_insert(). Needed for logical
-                        * decoding so it knows when to cleanup temporary data.
+                        * If we're doing logical decoding, include the new tuple data
+                        * even if we take a full-page image of the page.
                         */
-                       if (ndone + nthispage == ntuples)
-                               xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+                       if (need_tuple_data)
+                               bufflags |= REGBUF_KEEP_DATA;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) xlrec, tupledata - scratch);
+                       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
 
-                       recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+                       XLogRegisterBufData(0, tupledata, totaldatalen);
+                       recptr = XLogInsert(RM_HEAP2_ID, info);
 
                        PageSetLSN(page, recptr);
                }
@@ -2909,7 +2869,6 @@ l1:
        {
                xl_heap_delete xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
 
                /* For logical decode we need combocids to properly decode the catalog */
                if (RelationIsAccessibleInLogicalDecoding(relation))
@@ -2918,19 +2877,21 @@ l1:
                xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
                xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
                                                                                          tp.t_data->t_infomask2);
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tp.t_self;
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
                xlrec.xmax = new_xmax;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapDelete;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               if (old_key_tuple != NULL)
+               {
+                       if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+                       else
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+               }
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
                /*
                 * Log replica identity of the deleted tuple if there is one
@@ -2943,27 +2904,14 @@ l1:
                        xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
                        xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
 
-                       rdata[1].next = &(rdata[2]);
-                       rdata[2].data = (char *) &xlhdr;
-                       rdata[2].len = SizeOfHeapHeader;
-                       rdata[2].buffer = InvalidBuffer;
-                       rdata[2].next = NULL;
-
-                       rdata[2].next = &(rdata[3]);
-                       rdata[3].data = (char *) old_key_tuple->t_data
-                               + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[3].len = old_key_tuple->t_len
-                               - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[3].buffer = InvalidBuffer;
-                       rdata[3].next = NULL;
-
-                       if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
-                       else
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+                       XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
+                       XLogRegisterData((char *) old_key_tuple->t_data
+                                                                + offsetof(HeapTupleHeaderData, t_bits),
+                                                                old_key_tuple->t_len
+                                                                - offsetof(HeapTupleHeaderData, t_bits));
                }
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
 
                PageSetLSN(page, recptr);
        }
@@ -4735,25 +4683,17 @@ failed:
        {
                xl_heap_lock xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tuple->t_self;
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
                xlrec.locking_xid = xid;
                xlrec.infobits_set = compute_infobits(new_infomask,
                                                                                          tuple->t_data->t_infomask2);
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapLock;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = *buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
 
                PageSetLSN(page, recptr);
        }
@@ -5342,26 +5282,18 @@ l4:
                {
                        xl_heap_lock_updated xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[2];
                        Page            page = BufferGetPage(buf);
 
-                       xlrec.target.node = rel->rd_node;
-                       xlrec.target.tid = mytup.t_self;
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+
+                       xlrec.offnum = ItemPointerGetOffsetNumber(&mytup.t_self);
                        xlrec.xmax = new_xmax;
                        xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfHeapLockUpdated;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &(rdata[1]);
-
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-                       rdata[1].buffer = buf;
-                       rdata[1].buffer_std = true;
-                       rdata[1].next = NULL;
+                       XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated);
 
-                       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+                       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED);
 
                        PageSetLSN(page, recptr);
                }
@@ -5489,23 +5421,16 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
        {
                xl_heap_inplace xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = relation->rd_node;
-               xlrec.target.tid = tuple->t_self;
+               xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfHeapInplace;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
 
-               rdata[1].data = (char *) htup + htup->t_hoff;
-               rdata[1].len = newlen;
-               rdata[1].buffer = buffer;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
 
-               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
+               recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
 
                PageSetLSN(page, recptr);
        }
@@ -6507,17 +6432,14 @@ log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
 {
        xl_heap_cleanup_info xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata;
 
        xlrec.node = rnode;
        xlrec.latestRemovedXid = latestRemovedXid;
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = SizeOfHeapCleanupInfo;
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo);
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO);
 
        return recptr;
 }
@@ -6542,23 +6464,19 @@ log_heap_clean(Relation reln, Buffer buffer,
                           TransactionId latestRemovedXid)
 {
        xl_heap_clean xlrec;
-       uint8           info;
        XLogRecPtr      recptr;
-       XLogRecData rdata[4];
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
 
-       xlrec.node = reln->rd_node;
-       xlrec.block = BufferGetBlockNumber(buffer);
        xlrec.latestRemovedXid = latestRemovedXid;
        xlrec.nredirected = nredirected;
        xlrec.ndead = ndead;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapClean;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapClean);
+
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
        /*
         * The OffsetNumber arrays are not actually in the buffer, but we pretend
@@ -6569,49 +6487,18 @@ log_heap_clean(Relation reln, Buffer buffer,
         * even if no item pointers changed state.
         */
        if (nredirected > 0)
-       {
-               rdata[1].data = (char *) redirected;
-               rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
-       }
-       else
-       {
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-       }
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
-       rdata[1].next = &(rdata[2]);
+               XLogRegisterBufData(0, (char *) redirected,
+                                                nredirected * sizeof(OffsetNumber) * 2);
 
        if (ndead > 0)
-       {
-               rdata[2].data = (char *) nowdead;
-               rdata[2].len = ndead * sizeof(OffsetNumber);
-       }
-       else
-       {
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-       }
-       rdata[2].buffer = buffer;
-       rdata[2].buffer_std = true;
-       rdata[2].next = &(rdata[3]);
+               XLogRegisterBufData(0, (char *) nowdead,
+                                                ndead * sizeof(OffsetNumber));
 
        if (nunused > 0)
-       {
-               rdata[3].data = (char *) nowunused;
-               rdata[3].len = nunused * sizeof(OffsetNumber);
-       }
-       else
-       {
-               rdata[3].data = NULL;
-               rdata[3].len = 0;
-       }
-       rdata[3].buffer = buffer;
-       rdata[3].buffer_std = true;
-       rdata[3].next = NULL;
+               XLogRegisterBufData(0, (char *) nowunused,
+                                                nunused * sizeof(OffsetNumber));
 
-       info = XLOG_HEAP2_CLEAN;
-       recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN);
 
        return recptr;
 }
@@ -6626,35 +6513,28 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
 {
        xl_heap_freeze_page xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata[2];
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
        /* nor when there are no tuples to freeze */
        Assert(ntuples > 0);
 
-       xlrec.node = reln->rd_node;
-       xlrec.block = BufferGetBlockNumber(buffer);
        xlrec.cutoff_xid = cutoff_xid;
        xlrec.ntuples = ntuples;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapFreezePage;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
 
        /*
         * The freeze plan array is not actually in the buffer, but pretend that
         * it is.  When XLogInsert stores the whole buffer, the freeze plan need
         * not be stored too.
         */
-       rdata[1].data = (char *) tuples;
-       rdata[1].len = ntuples * sizeof(xl_heap_freeze_tuple);
-       rdata[1].buffer = buffer;
-       rdata[1].buffer_std = true;
-       rdata[1].next = NULL;
+       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+       XLogRegisterBufData(0, (char *) tuples,
+                                        ntuples * sizeof(xl_heap_freeze_tuple));
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
 
        return recptr;
 }
@@ -6674,38 +6554,17 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
 {
        xl_heap_visible xlrec;
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
 
        Assert(BufferIsValid(heap_buffer));
        Assert(BufferIsValid(vm_buffer));
 
-       xlrec.node = rnode;
-       xlrec.block = BufferGetBlockNumber(heap_buffer);
        xlrec.cutoff_xid = cutoff_xid;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
+       XLogRegisterBuffer(1, heap_buffer, XLogHintBitIsNeeded() ? REGBUF_STANDARD : (REGBUF_STANDARD | REGBUF_NO_IMAGE));
+       XLogRegisterBuffer(0, vm_buffer, 0);
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapVisible;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
-
-       rdata[1].data = NULL;
-       rdata[1].len = 0;
-       rdata[1].buffer = vm_buffer;
-       rdata[1].buffer_std = false;
-       rdata[1].next = NULL;
-
-       if (XLogHintBitIsNeeded())
-       {
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = heap_buffer;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
-       }
-
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE);
 
        return recptr;
 }
@@ -6721,22 +6580,23 @@ log_heap_update(Relation reln, Buffer oldbuf,
                                bool all_visible_cleared, bool new_all_visible_cleared)
 {
        xl_heap_update xlrec;
-       xl_heap_header_len xlhdr;
-       xl_heap_header_len xlhdr_idx;
+       xl_heap_header xlhdr;
+       xl_heap_header xlhdr_idx;
        uint8           info;
        uint16          prefix_suffix[2];
        uint16          prefixlen = 0,
                                suffixlen = 0;
        XLogRecPtr      recptr;
-       XLogRecData rdata[9];
        Page            page = BufferGetPage(newbuf);
        bool            need_tuple_data = RelationIsLogicallyLogged(reln);
-       int                     nr;
-       Buffer          newbufref;
+       bool            init;
+       int                     bufflags;
 
        /* Caller should not call me on a non-WAL-logged relation */
        Assert(RelationNeedsWAL(reln));
 
+       XLogBeginInsert();
+
        if (HeapTupleIsHeapOnly(newtup))
                info = XLOG_HEAP_HOT_UPDATE;
        else
@@ -6794,103 +6654,97 @@ log_heap_update(Relation reln, Buffer oldbuf,
                        suffixlen = 0;
        }
 
-       xlrec.target.node = reln->rd_node;
-       xlrec.target.tid = oldtup->t_self;
-       xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
-       xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
-                                                                                         oldtup->t_data->t_infomask2);
-       xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+       /* Prepare main WAL data chain */
        xlrec.flags = 0;
        if (all_visible_cleared)
                xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
-       xlrec.newtid = newtup->t_self;
        if (new_all_visible_cleared)
                xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
        if (prefixlen > 0)
                xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
        if (suffixlen > 0)
                xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+       if (need_tuple_data)
+       {
+               xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+               if (old_key_tuple)
+               {
+                       if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+                       else
+                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+               }
+       }
 
        /* If new tuple is the single and first tuple on page... */
        if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
                PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
        {
                info |= XLOG_HEAP_INIT_PAGE;
-               newbufref = InvalidBuffer;
+               init = true;
        }
        else
-               newbufref = newbuf;
+               init = false;
 
-       rdata[0].data = NULL;
-       rdata[0].len = 0;
-       rdata[0].buffer = oldbuf;
-       rdata[0].buffer_std = true;
-       rdata[0].next = &(rdata[1]);
+       /* Prepare WAL data for the old page */
+       xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
+       xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+       xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+                                                                                         oldtup->t_data->t_infomask2);
+
+       /* Prepare WAL data for the new page */
+       xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
+       xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+
+       bufflags = REGBUF_STANDARD;
+       if (init)
+               bufflags |= REGBUF_WILL_INIT;
+       if (need_tuple_data)
+               bufflags |= REGBUF_KEEP_DATA;
 
-       rdata[1].data = (char *) &xlrec;
-       rdata[1].len = SizeOfHeapUpdate;
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = &(rdata[2]);
+       XLogRegisterBuffer(0, newbuf, bufflags);
+       if (oldbuf != newbuf)
+               XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
 
-       /* prefix and/or suffix length fields */
+       XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
+
+       /*
+        * Prepare WAL data for the new tuple.
+        */
        if (prefixlen > 0 || suffixlen > 0)
        {
                if (prefixlen > 0 && suffixlen > 0)
                {
                        prefix_suffix[0] = prefixlen;
                        prefix_suffix[1] = suffixlen;
-                       rdata[2].data = (char *) &prefix_suffix;
-                       rdata[2].len = 2 * sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &prefix_suffix, sizeof(uint16) * 2);
                }
                else if (prefixlen > 0)
                {
-                       rdata[2].data = (char *) &prefixlen;
-                       rdata[2].len = sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &prefixlen, sizeof(uint16));
                }
                else
                {
-                       rdata[2].data = (char *) &suffixlen;
-                       rdata[2].len = sizeof(uint16);
+                       XLogRegisterBufData(0, (char *) &suffixlen, sizeof(uint16));
                }
-               rdata[2].buffer = newbufref;
-               rdata[2].buffer_std = true;
-               rdata[2].next = &(rdata[3]);
-               nr = 3;
        }
-       else
-               nr = 2;
 
-       xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
-       xlhdr.header.t_infomask = newtup->t_data->t_infomask;
-       xlhdr.header.t_hoff = newtup->t_data->t_hoff;
-       Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
-       xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
-
-       /*
-        * As with insert records, we need not store this rdata segment if we
-        * decide to store the whole buffer instead, unless we're doing logical
-        * decoding.
-        */
-       rdata[nr].data = (char *) &xlhdr;
-       rdata[nr].len = SizeOfHeapHeaderLen;
-       rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-       rdata[nr].buffer_std = true;
-       rdata[nr].next = &(rdata[nr + 1]);
-       nr++;
+       xlhdr.t_infomask2 = newtup->t_data->t_infomask2;
+       xlhdr.t_infomask = newtup->t_data->t_infomask;
+       xlhdr.t_hoff = newtup->t_data->t_hoff;
+       Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
 
        /*
         * PG73FORMAT: write bitmap [+ padding] [+ oid] + data
         *
         * The 'data' doesn't include the common prefix or suffix.
         */
+       XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
        if (prefixlen == 0)
        {
-               rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
-               rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
-               rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
+               XLogRegisterBufData(0,
+                        ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+                       newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - suffixlen);
        }
        else
        {
@@ -6901,75 +6755,33 @@ log_heap_update(Relation reln, Buffer oldbuf,
                /* bitmap [+ padding] [+ oid] */
                if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
                {
-                       rdata[nr - 1].next = &(rdata[nr]);
-                       rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].len = newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-                       rdata[nr].buffer_std = true;
-                       rdata[nr].next = NULL;
-                       nr++;
+                       XLogRegisterBufData(0,
+                         ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+                               newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits));
                }
 
                /* data after common prefix */
-               rdata[nr - 1].next = &(rdata[nr]);
-               rdata[nr].data = ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen;
-               rdata[nr].len = newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen;
-               rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
+               XLogRegisterBufData(0,
+                                ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen,
+                               newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen);
        }
 
-       /*
-        * Separate storage for the FPW buffer reference of the new page in the
-        * wal_level >= logical case.
-        */
-       if (need_tuple_data)
+       /* We need to log a tuple identity */
+       if (need_tuple_data && old_key_tuple)
        {
-               rdata[nr - 1].next = &(rdata[nr]);
-
-               rdata[nr].data = NULL,
-                       rdata[nr].len = 0;
-               rdata[nr].buffer = newbufref;
-               rdata[nr].buffer_std = true;
-               rdata[nr].next = NULL;
-               nr++;
+               /* don't really need this, but its more comfy to decode */
+               xlhdr_idx.t_infomask2 = old_key_tuple->t_data->t_infomask2;
+               xlhdr_idx.t_infomask = old_key_tuple->t_data->t_infomask;
+               xlhdr_idx.t_hoff = old_key_tuple->t_data->t_hoff;
 
-               xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
-
-               /* We need to log a tuple identity */
-               if (old_key_tuple)
-               {
-                       /* don't really need this, but its more comfy to decode */
-                       xlhdr_idx.header.t_infomask2 = old_key_tuple->t_data->t_infomask2;
-                       xlhdr_idx.header.t_infomask = old_key_tuple->t_data->t_infomask;
-                       xlhdr_idx.header.t_hoff = old_key_tuple->t_data->t_hoff;
-                       xlhdr_idx.t_len = old_key_tuple->t_len;
-
-                       rdata[nr - 1].next = &(rdata[nr]);
-                       rdata[nr].data = (char *) &xlhdr_idx;
-                       rdata[nr].len = SizeOfHeapHeaderLen;
-                       rdata[nr].buffer = InvalidBuffer;
-                       rdata[nr].next = &(rdata[nr + 1]);
-                       nr++;
-
-                       /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-                       rdata[nr].data = (char *) old_key_tuple->t_data
-                               + offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].len = old_key_tuple->t_len
-                               - offsetof(HeapTupleHeaderData, t_bits);
-                       rdata[nr].buffer = InvalidBuffer;
-                       rdata[nr].next = NULL;
-                       nr++;
+               XLogRegisterData((char *) &xlhdr_idx, SizeOfHeapHeader);
 
-                       if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
-                       else
-                               xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
-               }
+               /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+               XLogRegisterData((char *) old_key_tuple->t_data + offsetof(HeapTupleHeaderData, t_bits),
+                                                old_key_tuple->t_len - offsetof(HeapTupleHeaderData, t_bits));
        }
 
-       recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+       recptr = XLogInsert(RM_HEAP_ID, info);
 
        return recptr;
 }
@@ -6986,15 +6798,14 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
        xl_heap_new_cid xlrec;
 
        XLogRecPtr      recptr;
-       XLogRecData rdata[1];
        HeapTupleHeader hdr = tup->t_data;
 
        Assert(ItemPointerIsValid(&tup->t_self));
        Assert(tup->t_tableOid != InvalidOid);
 
        xlrec.top_xid = GetTopTransactionId();
-       xlrec.target.node = relation->rd_node;
-       xlrec.target.tid = tup->t_self;
+       xlrec.target_node = relation->rd_node;
+       xlrec.target_tid = tup->t_self;
 
        /*
         * If the tuple got inserted & deleted in the same TX we definitely have a
@@ -7035,12 +6846,15 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
                xlrec.combocid = InvalidCommandId;
        }
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = SizeOfHeapNewCid;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = NULL;
+       /*
+        * Note that we don't need to register the buffer here, because this
+        * operation does not modify the page. The insert/update/delete that
+        * called us certainly did, but that's WAL-logged separately.
+        */
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, SizeOfHeapNewCid);
 
-       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, rdata);
+       recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID);
 
        return recptr;
 }
@@ -7179,7 +6993,7 @@ heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* Backup blocks are not used in cleanup_info records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 }
 
 /*
@@ -7195,8 +7009,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
        BlockNumber blkno;
        XLogRedoAction action;
 
-       rnode = xlrec->node;
-       blkno = xlrec->block;
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
 
        /*
         * We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -7213,9 +7026,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
         * If we have a full-page image, restore it (using a cleanup lock) and
         * we're done.
         */
-       action = XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                                  rnode, MAIN_FORKNUM, blkno,
-                                                                                  RBM_NORMAL, true, &buffer);
+       action = XLogReadBufferForRedoExtended(lsn, record, 0, RBM_NORMAL, true,
+                                                                                  &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                Page            page = (Page) BufferGetPage(buffer);
@@ -7226,11 +7038,13 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
                int                     nredirected;
                int                     ndead;
                int                     nunused;
+               Size            datalen;
+
+               redirected = (OffsetNumber *) XLogRecGetBlockData(record, 0, &datalen);
 
                nredirected = xlrec->nredirected;
                ndead = xlrec->ndead;
-               end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
-               redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
+               end = (OffsetNumber *) ((char *) redirected + datalen);
                nowdead = redirected + (nredirected * 2);
                nowunused = nowdead + ndead;
                nunused = (end - nowunused);
@@ -7263,7 +7077,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO)
-               XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
+               XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
 }
 
 /*
@@ -7278,14 +7092,14 @@ static void
 heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
 {
        xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
+       Buffer          vmbuffer = InvalidBuffer;
        Buffer          buffer;
        Page            page;
        RelFileNode rnode;
        BlockNumber blkno;
        XLogRedoAction action;
 
-       rnode = xlrec->node;
-       blkno = xlrec->block;
+       XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
 
        /*
         * If there are any Hot Standby transactions running that have an xmin
@@ -7304,7 +7118,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
         * truncated later in recovery, we don't need to update the page, but we'd
         * better still update the visibility map.
         */
-       action = XLogReadBufferForRedo(lsn, record, 1, rnode, blkno, &buffer);
+       action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
                /*
@@ -7320,9 +7134,9 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
                 * XLOG record's LSN, we mustn't mark the page all-visible, because
                 * the subsequent update won't be replayed to clear the flag.
                 */
-               page = BufferGetPage(buffer);
-               PageSetAllVisible(page);
-               MarkBufferDirty(buffer);
+                       page = BufferGetPage(buffer);
+                       PageSetAllVisible(page);
+                       MarkBufferDirty(buffer);
        }
        else if (action == BLK_RESTORED)
        {
@@ -7341,12 +7155,16 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
         * the visibility map bit does so before checking the page LSN, so any
         * bits that need to be cleared will still be cleared.
         */
-       if (record->xl_info & XLR_BKP_BLOCK(0))
-               (void) RestoreBackupBlock(lsn, record, 0, false, false);
-       else
+       if (XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO_ON_ERROR, false,
+                                                                         &vmbuffer) == BLK_NEEDS_REDO)
        {
                Relation        reln;
-               Buffer          vmbuffer = InvalidBuffer;
+
+               /*
+                * XLogReplayBufferExtended locked the buffer. But visibilitymap_set
+                * will handle locking itself.
+                */
+               LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
                reln = CreateFakeRelcacheEntry(rnode);
                visibilitymap_pin(reln, blkno, &vmbuffer);
@@ -7369,6 +7187,8 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
                ReleaseBuffer(vmbuffer);
                FreeFakeRelcacheEntry(reln);
        }
+       else if (BufferIsValid(vmbuffer))
+               UnlockReleaseBuffer(vmbuffer);
 }
 
 /*
@@ -7380,7 +7200,6 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
        xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
        TransactionId cutoff_xid = xlrec->cutoff_xid;
        Buffer          buffer;
-       Page            page;
        int                     ntup;
 
        /*
@@ -7388,12 +7207,19 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
         * consider the frozen xids as running.
         */
        if (InHotStandby)
-               ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+       {
+               RelFileNode rnode;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
-                                                         &buffer) == BLK_NEEDS_REDO)
+               XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+               ResolveRecoveryConflictWithSnapshot(cutoff_xid, rnode);
+       }
+
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = BufferGetPage(buffer);
+               Page            page = BufferGetPage(buffer);
+               xl_heap_freeze_tuple *tuples;
+
+               tuples = (xl_heap_freeze_tuple *) XLogRecGetBlockData(record, 0, NULL);
 
                /* now execute freeze plan for each frozen tuple */
                for (ntup = 0; ntup < xlrec->ntuples; ntup++)
@@ -7402,7 +7228,7 @@ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
                        ItemId          lp;
                        HeapTupleHeader tuple;
 
-                       xlrec_tp = &xlrec->tuples[ntup];
+                       xlrec_tp = &tuples[ntup];
                        lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
                        tuple = (HeapTupleHeader) PageGetItem(page, lp);
 
@@ -7449,14 +7275,15 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
        xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       OffsetNumber offnum;
        ItemId          lp = NULL;
        HeapTupleHeader htup;
        BlockNumber blkno;
        RelFileNode target_node;
+       ItemPointerData target_tid;
 
-       blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-       target_node = xlrec->target.node;
+       XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+       ItemPointerSetBlockNumber(&target_tid, blkno);
+       ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7473,16 +7300,14 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
                FreeFakeRelcacheEntry(reln);
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 0, target_node, blkno, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(buffer);
+               page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
-               if (PageGetMaxOffsetNumber(page) >= offnum)
-                       lp = PageGetItemId(page, offnum);
+               if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
+                       lp = PageGetItemId(page, xlrec->offnum);
 
-               if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+               if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
                        elog(PANIC, "heap_delete_redo: invalid lp");
 
                htup = (HeapTupleHeader) PageGetItem(page, lp);
@@ -7502,7 +7327,7 @@ heap_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
                        PageClearAllVisible(page);
 
                /* Make sure there is no forward chain link in t_ctid */
-               htup->t_ctid = xlrec->target.tid;
+               htup->t_ctid = target_tid;
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
@@ -7516,7 +7341,6 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       OffsetNumber offnum;
        struct
        {
                HeapTupleHeaderData hdr;
@@ -7528,10 +7352,12 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
        Size            freespace = 0;
        RelFileNode target_node;
        BlockNumber blkno;
+       ItemPointerData target_tid;
        XLogRedoAction action;
 
-       target_node = xlrec->target.node;
-       blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+       XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+       ItemPointerSetBlockNumber(&target_tid, blkno);
+       ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7554,35 +7380,46 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
         */
        if (record->xl_info & XLOG_HEAP_INIT_PAGE)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         target_node, MAIN_FORKNUM, blkno,
-                                                                         RBM_ZERO, false, &buffer);
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
                page = BufferGetPage(buffer);
                PageInit(page, BufferGetPageSize(buffer), 0);
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0, target_node, blkno,
-                                                                          &buffer);
-
+               action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
+               Size            datalen;
+               char       *data;
+
+               /*
+                * The new tuple is normally stored as buffer 0's data. But if
+                * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, it's part of the main
+                * data, after the xl_heap_insert struct.
+                */
+               if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+               {
+                       data = XLogRecGetData(record) + SizeOfHeapInsert;
+                       datalen = record->xl_len - SizeOfHeapInsert;
+               }
+               else
+                       data = XLogRecGetBlockData(record, 0, &datalen);
+
                page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
-               if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+               if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
                        elog(PANIC, "heap_insert_redo: invalid max offset number");
 
-               newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
-               Assert(newlen <= MaxHeapTupleSize);
-               memcpy((char *) &xlhdr,
-                          (char *) xlrec + SizeOfHeapInsert,
-                          SizeOfHeapHeader);
+               newlen = datalen - SizeOfHeapHeader;
+               Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
+               memcpy((char *) &xlhdr, data, SizeOfHeapHeader);
+               data += SizeOfHeapHeader;
+
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
                memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-                          (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
+                          data,
                           newlen);
                newlen += offsetof(HeapTupleHeaderData, t_bits);
                htup->t_infomask2 = xlhdr.t_infomask2;
@@ -7590,10 +7427,10 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
                htup->t_hoff = xlhdr.t_hoff;
                HeapTupleHeaderSetXmin(htup, record->xl_xid);
                HeapTupleHeaderSetCmin(htup, FirstCommandId);
-               htup->t_ctid = xlrec->target.tid;
+               htup->t_ctid = target_tid;
 
-               offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
-               if (offnum == InvalidOffsetNumber)
+               if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
+                                               true, true) == InvalidOffsetNumber)
                        elog(PANIC, "heap_insert_redo: failed to add tuple");
 
                freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7618,7 +7455,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
+               XLogRecordPageWithFreeSpace(target_node, blkno, freespace);
 }
 
 /*
@@ -7627,7 +7464,6 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
 static void
 heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
 {
-       char       *recdata = XLogRecGetData(record);
        xl_heap_multi_insert *xlrec;
        RelFileNode rnode;
        BlockNumber blkno;
@@ -7649,20 +7485,9 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
         * Insertion doesn't overwrite MVCC data, so no conflict processing is
         * required.
         */
+       xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
 
-       xlrec = (xl_heap_multi_insert *) recdata;
-       recdata += SizeOfHeapMultiInsert;
-
-       rnode = xlrec->node;
-       blkno = xlrec->blkno;
-
-       /*
-        * If we're reinitializing the page, the tuples are stored in order from
-        * FirstOffsetNumber. Otherwise there's an array of offsets in the WAL
-        * record.
-        */
-       if (!isinit)
-               recdata += sizeof(OffsetNumber) * xlrec->ntuples;
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7681,24 +7506,35 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
 
        if (isinit)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         rnode, MAIN_FORKNUM, blkno,
-                                                                         RBM_ZERO, false, &buffer);
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
                page = BufferGetPage(buffer);
                PageInit(page, BufferGetPageSize(buffer), 0);
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0, rnode, blkno, &buffer);
-
+               action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
        if (action == BLK_NEEDS_REDO)
        {
-               page = BufferGetPage(buffer);
+               char       *tupdata;
+               char       *endptr;
+               Size            len;
+
+               /* Tuples are stored as block data */
+               tupdata = XLogRecGetBlockData(record, 0, &len);
+               endptr = tupdata + len;
+
+               page = (Page) BufferGetPage(buffer);
+
                for (i = 0; i < xlrec->ntuples; i++)
                {
                        OffsetNumber offnum;
                        xl_multi_insert_tuple *xlhdr;
 
+                       /*
+                        * If we're reinitializing the page, the tuples are stored in order
+                        * from FirstOffsetNumber. Otherwise there's an array of offsets in
+                        * the WAL record, and the tuples come after that.
+                        */
                        if (isinit)
                                offnum = FirstOffsetNumber + i;
                        else
@@ -7706,8 +7542,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                                elog(PANIC, "heap_multi_insert_redo: invalid max offset number");
 
-                       xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata);
-                       recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+                       xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
+                       tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
 
                        newlen = xlhdr->datalen;
                        Assert(newlen <= MaxHeapTupleSize);
@@ -7715,9 +7551,9 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
                        /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
                        memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-                                  (char *) recdata,
+                                  (char *) tupdata,
                                   newlen);
-                       recdata += newlen;
+                       tupdata += newlen;
 
                        newlen += offsetof(HeapTupleHeaderData, t_bits);
                        htup->t_infomask2 = xlhdr->t_infomask2;
@@ -7732,6 +7568,8 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
                        if (offnum == InvalidOffsetNumber)
                                elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
                }
+               if (tupdata != endptr)
+                       elog(PANIC, "heap_multi_insert_redo: total tuple length mismatch");
 
                freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
@@ -7755,7 +7593,7 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
         * totally accurate anyway.
         */
        if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace);
+               XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
 }
 
 /*
@@ -7768,6 +7606,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        RelFileNode rnode;
        BlockNumber oldblk;
        BlockNumber newblk;
+       ItemPointerData newtid;
        Buffer          obuffer,
                                nbuffer;
        Page            page;
@@ -7775,7 +7614,6 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        ItemId          lp = NULL;
        HeapTupleData oldtup;
        HeapTupleHeader htup;
-       char       *recdata;
        uint16          prefixlen = 0,
                                suffixlen = 0;
        char       *newp;
@@ -7784,7 +7622,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                HeapTupleHeaderData hdr;
                char            data[MaxHeapTupleSize];
        }                       tbuf;
-       xl_heap_header_len xlhdr;
+       xl_heap_header xlhdr;
        uint32          newlen;
        Size            freespace = 0;
        XLogRedoAction oldaction;
@@ -7794,9 +7632,17 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        oldtup.t_data = NULL;
        oldtup.t_len = 0;
 
-       rnode = xlrec->target.node;
-       newblk = ItemPointerGetBlockNumber(&xlrec->newtid);
-       oldblk = ItemPointerGetBlockNumber(&xlrec->target.tid);
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &newblk);
+       if (XLogRecHasBlockRef(record, 1))
+       {
+               /* HOT updates are never done across pages */
+               Assert(!hot_update);
+               XLogRecGetBlockTag(record, 1, NULL, NULL, &oldblk);
+       }
+       else
+               oldblk = newblk;
+
+       ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7824,12 +7670,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         */
 
        /* Deal with old tuple version */
-       oldaction = XLogReadBufferForRedo(lsn, record, 0, rnode, oldblk, &obuffer);
+       oldaction = XLogReadBufferForRedo(lsn, record, (oldblk == newblk) ? 0 : 1,
+                                                                         &obuffer);
        if (oldaction == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(obuffer);
-
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               page = BufferGetPage(obuffer);
+               offnum = xlrec->old_offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -7852,7 +7698,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
                /* Set forward chain link in t_ctid */
-               htup->t_ctid = xlrec->newtid;
+               htup->t_ctid = newtid;
 
                /* Mark the page as a candidate for pruning */
                PageSetPrunable(page, record->xl_xid);
@@ -7874,16 +7720,14 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        }
        else if (record->xl_info & XLOG_HEAP_INIT_PAGE)
        {
-               XLogReadBufferForRedoExtended(lsn, record, 1,
-                                                                         rnode, MAIN_FORKNUM, newblk,
-                                                                         RBM_ZERO, false, &nbuffer);
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false,
+                                                                         &nbuffer);
                page = (Page) BufferGetPage(nbuffer);
                PageInit(page, BufferGetPageSize(nbuffer), 0);
                newaction = BLK_NEEDS_REDO;
        }
        else
-               newaction = XLogReadBufferForRedo(lsn, record, 1, rnode, newblk,
-                                                                                 &nbuffer);
+               newaction = XLogReadBufferForRedo(lsn, record, 0, &nbuffer);
 
        /*
         * The visibility map may need to be fixed even if the heap page is
@@ -7891,7 +7735,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         */
        if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
        {
-               Relation        reln = CreateFakeRelcacheEntry(xlrec->target.node);
+               Relation        reln = CreateFakeRelcacheEntry(rnode);
                Buffer          vmbuffer = InvalidBuffer;
 
                visibilitymap_pin(reln, newblk, &vmbuffer);
@@ -7903,14 +7747,20 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
        /* Deal with new tuple */
        if (newaction == BLK_NEEDS_REDO)
        {
-               page = (Page) BufferGetPage(nbuffer);
+               char       *recdata;
+               char       *recdataend;
+               Size            datalen;
+               Size            tuplen;
+
+               recdata = XLogRecGetBlockData(record, 0, &datalen);
+               recdataend = recdata + datalen;
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
+               page = BufferGetPage(nbuffer);
+
+               offnum = xlrec->new_offnum;
                if (PageGetMaxOffsetNumber(page) + 1 < offnum)
                        elog(PANIC, "heap_update_redo: invalid max offset number");
 
-               recdata = (char *) xlrec + SizeOfHeapUpdate;
-
                if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
                {
                        Assert(newblk == oldblk);
@@ -7924,10 +7774,12 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        recdata += sizeof(uint16);
                }
 
-               memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen);
-               recdata += SizeOfHeapHeaderLen;
+               memcpy((char *) &xlhdr, recdata, SizeOfHeapHeader);
+               recdata += SizeOfHeapHeader;
+
+               tuplen = recdataend - recdata;
+               Assert(tuplen <= MaxHeapTupleSize);
 
-               Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize);
                htup = &tbuf.hdr;
                MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
 
@@ -7941,7 +7793,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        int                     len;
 
                        /* copy bitmap [+ padding] [+ oid] from WAL record */
-                       len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
+                       len = xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
                        memcpy(newp, recdata, len);
                        recdata += len;
                        newp += len;
@@ -7951,7 +7803,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                        newp += prefixlen;
 
                        /* copy new tuple data from WAL record */
-                       len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
+                       len = tuplen - (xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
                        memcpy(newp, recdata, len);
                        recdata += len;
                        newp += len;
@@ -7962,24 +7814,26 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                         * copy bitmap [+ padding] [+ oid] + data from record, all in one
                         * go
                         */
-                       memcpy(newp, recdata, xlhdr.t_len);
-                       recdata += xlhdr.t_len;
-                       newp += xlhdr.t_len;
+                       memcpy(newp, recdata, tuplen);
+                       recdata += tuplen;
+                       newp += tuplen;
                }
+               Assert(recdata == recdataend);
+
                /* copy suffix from old tuple */
                if (suffixlen > 0)
                        memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
 
-               newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
-               htup->t_infomask2 = xlhdr.header.t_infomask2;
-               htup->t_infomask = xlhdr.header.t_infomask;
-               htup->t_hoff = xlhdr.header.t_hoff;
+               newlen = offsetof(HeapTupleHeaderData, t_bits) + tuplen + prefixlen + suffixlen;
+               htup->t_infomask2 = xlhdr.t_infomask2;
+               htup->t_infomask = xlhdr.t_infomask;
+               htup->t_hoff = xlhdr.t_hoff;
 
                HeapTupleHeaderSetXmin(htup, record->xl_xid);
                HeapTupleHeaderSetCmin(htup, FirstCommandId);
                HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
                /* Make sure there is no forward chain link in t_ctid */
-               htup->t_ctid = xlrec->newtid;
+               htup->t_ctid = newtid;
 
                offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
                if (offnum == InvalidOffsetNumber)
@@ -7993,6 +7847,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
                PageSetLSN(page, lsn);
                MarkBufferDirty(nbuffer);
        }
+
        if (BufferIsValid(nbuffer) && nbuffer != obuffer)
                UnlockReleaseBuffer(nbuffer);
        if (BufferIsValid(obuffer))
@@ -8014,9 +7869,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
         * totally accurate anyway.
         */
        if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
-               XLogRecordPageWithFreeSpace(xlrec->target.node,
-                                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                                       freespace);
+               XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
 }
 
 static void
@@ -8029,13 +7882,11 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
        ItemId          lp = NULL;
        HeapTupleHeader htup;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&xlrec->target.tid),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8055,7 +7906,9 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
                {
                        HeapTupleHeaderClearHotUpdated(htup);
                        /* Make sure there is no forward chain link in t_ctid */
-                       htup->t_ctid = xlrec->target.tid;
+                       ItemPointerSet(&htup->t_ctid,
+                                                  BufferGetBlockNumber(buffer),
+                                                  offnum);
                }
                HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
@@ -8077,12 +7930,11 @@ heap_xlog_lock_updated(XLogRecPtr lsn, XLogRecord *record)
        ItemId          lp = NULL;
        HeapTupleHeader htup;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8112,15 +7964,15 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
        ItemId          lp = NULL;
        HeapTupleHeader htup;
        uint32          oldlen;
-       uint32          newlen;
+       Size            newlen;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
+               char       *newtup = XLogRecGetBlockData(record, 0, &newlen);
+
                page = BufferGetPage(buffer);
 
-               offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               offnum = xlrec->offnum;
                if (PageGetMaxOffsetNumber(page) >= offnum)
                        lp = PageGetItemId(page, offnum);
 
@@ -8130,13 +7982,10 @@ heap_xlog_inplace(XLogRecPtr lsn, XLogRecord *record)
                htup = (HeapTupleHeader) PageGetItem(page, lp);
 
                oldlen = ItemIdGetLength(lp) - htup->t_hoff;
-               newlen = record->xl_len - SizeOfHeapInplace;
                if (oldlen != newlen)
                        elog(PANIC, "heap_inplace_redo: wrong tuple length");
 
-               memcpy((char *) htup + htup->t_hoff,
-                          (char *) xlrec + SizeOfHeapInplace,
-                          newlen);
+               memcpy((char *) htup + htup->t_hoff, newtup, newlen);
 
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
index bea52460a086cde3f3743df42aebedf7d88b5197..344de0ba2ed501a4be73b5e499c16fa811058bfe 100644 (file)
@@ -865,7 +865,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
        hash_seq_init(&seq_status, state->rs_logical_mappings);
        while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
        {
-               XLogRecData rdata[2];
                char       *waldata;
                char       *waldata_start;
                xl_heap_rewrite_mapping xlrec;
@@ -889,11 +888,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
                xlrec.offset = src->off;
                xlrec.start_lsn = state->rs_begin_lsn;
 
-               rdata[0].data = (char *) (&xlrec);
-               rdata[0].len = sizeof(xlrec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
                /* write all mappings consecutively */
                len = src->num_mappings * sizeof(LogicalRewriteMappingData);
                waldata_start = waldata = palloc(len);
@@ -934,13 +928,12 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
                                                        written, len)));
                src->off += len;
 
-               rdata[1].data = waldata_start;
-               rdata[1].len = len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
+               XLogRegisterData(waldata_start, len);
 
                /* write xlog record */
-               XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
+               XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
 
                pfree(waldata_start);
        }
index bcaba7e5e8400f1b144131043d5818b2ffa50c33..de3557efabb28760bc347cbe59c7a641fdf4d596 100644 (file)
@@ -837,37 +837,25 @@ _bt_insertonpg(Relation rel,
                if (RelationNeedsWAL(rel))
                {
                        xl_btree_insert xlrec;
-                       BlockNumber xlleftchild;
                        xl_btree_metadata xlmeta;
                        uint8           xlinfo;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata[4];
-                       XLogRecData *nextrdata;
                        IndexTupleData trunctuple;
 
-                       xlrec.target.node = rel->rd_node;
-                       ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
+                       xlrec.offnum = itup_off;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = SizeOfBtreeInsert;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = nextrdata = &(rdata[1]);
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
 
                        if (P_ISLEAF(lpageop))
                                xlinfo = XLOG_BTREE_INSERT_LEAF;
                        else
                        {
                                /*
-                                * Include the block number of the left child, whose
-                                * INCOMPLETE_SPLIT flag was cleared.
+                                * Register the left child whose INCOMPLETE_SPLIT flag was
+                                * cleared.
                                 */
-                               xlleftchild = BufferGetBlockNumber(cbuf);
-                               nextrdata->data = (char *) &xlleftchild;
-                               nextrdata->len = sizeof(BlockNumber);
-                               nextrdata->buffer = cbuf;
-                               nextrdata->buffer_std = true;
-                               nextrdata->next = nextrdata + 1;
-                               nextrdata++;
+                               XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
 
                                xlinfo = XLOG_BTREE_INSERT_UPPER;
                        }
@@ -879,33 +867,25 @@ _bt_insertonpg(Relation rel,
                                xlmeta.fastroot = metad->btm_fastroot;
                                xlmeta.fastlevel = metad->btm_fastlevel;
 
-                               nextrdata->data = (char *) &xlmeta;
-                               nextrdata->len = sizeof(xl_btree_metadata);
-                               nextrdata->buffer = InvalidBuffer;
-                               nextrdata->next = nextrdata + 1;
-                               nextrdata++;
+                               XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+                               XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
 
                                xlinfo = XLOG_BTREE_INSERT_META;
                        }
 
                        /* Read comments in _bt_pgaddtup */
+                       XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
                        if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
                        {
                                trunctuple = *itup;
                                trunctuple.t_info = sizeof(IndexTupleData);
-                               nextrdata->data = (char *) &trunctuple;
-                               nextrdata->len = sizeof(IndexTupleData);
+                               XLogRegisterBufData(0, (char *) &trunctuple,
+                                                                sizeof(IndexTupleData));
                        }
                        else
-                       {
-                               nextrdata->data = (char *) itup;
-                               nextrdata->len = IndexTupleDSize(*itup);
-                       }
-                       nextrdata->buffer = buf;
-                       nextrdata->buffer_std = true;
-                       nextrdata->next = NULL;
+                               XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
 
-                       recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+                       recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                        if (BufferIsValid(metabuf))
                        {
@@ -1260,25 +1240,24 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                xl_btree_split xlrec;
                uint8           xlinfo;
                XLogRecPtr      recptr;
-               XLogRecData rdata[7];
-               XLogRecData *lastrdata;
-               BlockNumber cblkno;
-
-               xlrec.node = rel->rd_node;
-               xlrec.leftsib = origpagenumber;
-               xlrec.rightsib = rightpagenumber;
-               xlrec.rnext = ropaque->btpo_next;
+
                xlrec.level = ropaque->btpo.level;
                xlrec.firstright = firstright;
+               xlrec.newitemoff = newitemoff;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeSplit;
-               rdata[0].buffer = InvalidBuffer;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
 
-               lastrdata = &rdata[0];
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
+               /* Log the right sibling, because we've changed its prev-pointer. */
+               if (!P_RIGHTMOST(ropaque))
+                       XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
+               if (BufferIsValid(cbuf))
+                       XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
 
                /*
-                * Log the new item and its offset, if it was inserted on the left
+                * Log the new item, if it was inserted on the left
                 * page. (If it was put on the right page, we don't need to explicitly
                 * WAL log it because it's included with all the other items on the
                 * right page.) Show the new item as belonging to the left page
@@ -1287,29 +1266,11 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                 * though, to support archive compression of these records.
                 */
                if (newitemonleft)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = (char *) &newitemoff;
-                       lastrdata->len = sizeof(OffsetNumber);
-                       lastrdata->buffer = InvalidBuffer;
-
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = (char *) newitem;
-                       lastrdata->len = MAXALIGN(newitemsz);
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
+                       XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
 
                /* Log left page */
                if (!isleaf)
                {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
                        /*
                         * We must also log the left page's high key, because the right
                         * page's leftmost key is suppressed on non-leaf levels.  Show it
@@ -1319,43 +1280,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                         */
                        itemid = PageGetItemId(origpage, P_HIKEY);
                        item = (IndexTuple) PageGetItem(origpage, itemid);
-                       lastrdata->data = (char *) item;
-                       lastrdata->len = MAXALIGN(IndexTupleSize(item));
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
-
-               if (isleaf && !newitemonleft)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       /*
-                        * Although we don't need to WAL-log anything on the left page, we
-                        * still need XLogInsert to consider storing a full-page image of
-                        * the left page, so make an empty entry referencing that buffer.
-                        * This also ensures that the left page is always backup block 0.
-                        */
-                       lastrdata->data = NULL;
-                       lastrdata->len = 0;
-                       lastrdata->buffer = buf;        /* backup block 0 */
-                       lastrdata->buffer_std = true;
-               }
-
-               /*
-                * Log block number of left child, whose INCOMPLETE_SPLIT flag this
-                * insertion clears.
-                */
-               if (!isleaf)
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       cblkno = BufferGetBlockNumber(cbuf);
-                       lastrdata->data = (char *) &cblkno;
-                       lastrdata->len = sizeof(BlockNumber);
-                       lastrdata->buffer = cbuf;       /* backup block 1 */
-                       lastrdata->buffer_std = true;
+                       XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
                }
 
                /*
@@ -1370,35 +1295,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
                 * and so the item pointers can be reconstructed.  See comments for
                 * _bt_restore_page().
                 */
-               lastrdata->next = lastrdata + 1;
-               lastrdata++;
-
-               lastrdata->data = (char *) rightpage +
-                       ((PageHeader) rightpage)->pd_upper;
-               lastrdata->len = ((PageHeader) rightpage)->pd_special -
-                       ((PageHeader) rightpage)->pd_upper;
-               lastrdata->buffer = InvalidBuffer;
-
-               /* Log the right sibling, because we've changed its' prev-pointer. */
-               if (!P_RIGHTMOST(ropaque))
-               {
-                       lastrdata->next = lastrdata + 1;
-                       lastrdata++;
-
-                       lastrdata->data = NULL;
-                       lastrdata->len = 0;
-                       lastrdata->buffer = sbuf;       /* bkp block 1 (leaf) or 2 (non-leaf) */
-                       lastrdata->buffer_std = true;
-               }
-
-               lastrdata->next = NULL;
+               XLogRegisterBufData(1,
+                                                (char *) rightpage + ((PageHeader) rightpage)->pd_upper,
+                                                ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
 
                if (isroot)
                        xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
                else
                        xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
 
-               recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                PageSetLSN(origpage, recptr);
                PageSetLSN(rightpage, recptr);
@@ -2090,34 +1996,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
        {
                xl_btree_newroot xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
+               xl_btree_metadata md;
 
-               xlrec.node = rel->rd_node;
                xlrec.rootblk = rootblknum;
                xlrec.level = metad->btm_level;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeNewroot;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
+
+               XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+               md.root = rootblknum;
+               md.level = metad->btm_level;
+               md.fastroot = rootblknum;
+               md.fastlevel = metad->btm_level;
+
+               XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
                /*
                 * Direct access to page is not good but faster - we should implement
                 * some new func in page API.
                 */
-               rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
-               rdata[1].len = ((PageHeader) rootpage)->pd_special -
-                       ((PageHeader) rootpage)->pd_upper;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &(rdata[2]);
-
-               /* Make a full-page image of the left child if needed */
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = lbuf;
-               rdata[2].next = NULL;
-
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
+               XLogRegisterBufData(0,
+                                                (char *) rootpage + ((PageHeader) rootpage)->pd_upper,
+                                                ((PageHeader) rootpage)->pd_special -
+                                                        ((PageHeader) rootpage)->pd_upper);
+
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
 
                PageSetLSN(lpage, recptr);
                PageSetLSN(rootpage, recptr);
index 6093215c43df363718155ba3b8fbe0a587ebbaaa..cbcf8f7956006255d44a9e31189f1ada0be18cba 100644 (file)
@@ -38,8 +38,7 @@ static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
 static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
                                           BTStack stack, Buffer *topparent, OffsetNumber *topoff,
                                           BlockNumber *target, BlockNumber *rightsib);
-static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
-                                  TransactionId latestRemovedXid);
+static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid);
 
 /*
  *     _bt_initmetapage() -- Fill a page buffer with a correct metapage image
@@ -236,18 +235,25 @@ _bt_getroot(Relation rel, int access)
                {
                        xl_btree_newroot xlrec;
                        XLogRecPtr      recptr;
-                       XLogRecData rdata;
+                       xl_btree_metadata md;
+
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+                       XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+                       md.root = rootblkno;
+                       md.level = 0;
+                       md.fastroot = rootblkno;
+                       md.fastlevel = 0;
+
+                       XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
 
-                       xlrec.node = rel->rd_node;
                        xlrec.rootblk = rootblkno;
                        xlrec.level = 0;
 
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = SizeOfBtreeNewroot;
-                       rdata.buffer = InvalidBuffer;
-                       rdata.next = NULL;
+                       XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
 
-                       recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
+                       recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
 
                        PageSetLSN(rootpage, recptr);
                        PageSetLSN(metapg, recptr);
@@ -528,39 +534,23 @@ _bt_checkpage(Relation rel, Buffer buf)
 static void
 _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
 {
-       if (!RelationNeedsWAL(rel))
-               return;
-
-       /* No ereport(ERROR) until changes are logged */
-       START_CRIT_SECTION();
+       xl_btree_reuse_page xlrec_reuse;
 
        /*
-        * We don't do MarkBufferDirty here because we're about to initialise the
-        * page, and nobody else can see it yet.
+        * Note that we don't register the buffer with the record, because this
+        * operation doesn't modify the page. This record only exists to provide
+        * a conflict point for Hot Standby.
         */
 
        /* XLOG stuff */
-       {
-               XLogRecData rdata[1];
-               xl_btree_reuse_page xlrec_reuse;
-
-               xlrec_reuse.node = rel->rd_node;
-               xlrec_reuse.block = blkno;
-               xlrec_reuse.latestRemovedXid = latestRemovedXid;
-               rdata[0].data = (char *) &xlrec_reuse;
-               rdata[0].len = SizeOfBtreeReusePage;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
-
-               XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
+       xlrec_reuse.node = rel->rd_node;
+       xlrec_reuse.block = blkno;
+       xlrec_reuse.latestRemovedXid = latestRemovedXid;
 
-               /*
-                * We don't do PageSetLSN here because we're about to initialise the
-                * page, so no need.
-                */
-       }
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
 
-       END_CRIT_SECTION();
+       XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
 }
 
 /*
@@ -633,7 +623,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
                                         * WAL record that will allow us to conflict with queries
                                         * running on standby.
                                         */
-                                       if (XLogStandbyInfoActive())
+                                       if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
                                        {
                                                BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 
@@ -830,17 +820,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
        if (RelationNeedsWAL(rel))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                xl_btree_vacuum xlrec_vacuum;
 
-               xlrec_vacuum.node = rel->rd_node;
-               xlrec_vacuum.block = BufferGetBlockNumber(buf);
-
                xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
-               rdata[0].data = (char *) &xlrec_vacuum;
-               rdata[0].len = SizeOfBtreeVacuum;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
 
                /*
                 * The target-offsets array is not in the buffer, but pretend that it
@@ -848,20 +834,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
                 * need not be stored too.
                 */
                if (nitems > 0)
-               {
-                       rdata[1].data = (char *) itemnos;
-                       rdata[1].len = nitems * sizeof(OffsetNumber);
-               }
-               else
-               {
-                       rdata[1].data = NULL;
-                       rdata[1].len = 0;
-               }
-               rdata[1].buffer = buf;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+                       XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
 
                PageSetLSN(page, recptr);
        }
@@ -919,36 +894,23 @@ _bt_delitems_delete(Relation rel, Buffer buf,
        if (RelationNeedsWAL(rel))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata[3];
                xl_btree_delete xlrec_delete;
 
-               xlrec_delete.node = rel->rd_node;
                xlrec_delete.hnode = heapRel->rd_node;
-               xlrec_delete.block = BufferGetBlockNumber(buf);
                xlrec_delete.nitems = nitems;
 
-               rdata[0].data = (char *) &xlrec_delete;
-               rdata[0].len = SizeOfBtreeDelete;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+               XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
 
                /*
                 * We need the target-offsets array whether or not we store the whole
                 * buffer, to allow us to find the latestRemovedXid on a standby
                 * server.
                 */
-               rdata[1].data = (char *) itemnos;
-               rdata[1].len = nitems * sizeof(OffsetNumber);
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &(rdata[2]);
-
-               rdata[2].data = NULL;
-               rdata[2].len = 0;
-               rdata[2].buffer = buf;
-               rdata[2].buffer_std = true;
-               rdata[2].next = NULL;
+               XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
 
                PageSetLSN(page, recptr);
        }
@@ -1493,33 +1455,26 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
        {
                xl_btree_mark_page_halfdead xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
-               xlrec.target.node = rel->rd_node;
-               ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
+               xlrec.poffset = topoff;
                xlrec.leafblk = leafblkno;
                if (target != leafblkno)
                        xlrec.topparent = target;
                else
                        xlrec.topparent = InvalidBlockNumber;
 
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
+
                page = BufferGetPage(leafbuf);
                opaque = (BTPageOpaque) PageGetSpecialPointer(page);
                xlrec.leftblk = opaque->btpo_prev;
                xlrec.rightblk = opaque->btpo_next;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeMarkPageHalfDead;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-
-               rdata[1].data = NULL;
-               rdata[1].len = 0;
-               rdata[1].buffer = topparent;
-               rdata[1].buffer_std = true;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
 
-               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
 
                page = BufferGetPage(topparent);
                PageSetLSN(page, recptr);
@@ -1755,6 +1710,12 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
         * Here we begin doing the deletion.
         */
 
+       /*
+        * The WAL record needs at most 5 buffer references, which is more than
+        * the default allowance.
+        */
+       XLogEnsureRecordSpace(5, 0);
+
        /* No ereport(ERROR) until changes are logged */
        START_CRIT_SECTION();
 
@@ -1827,63 +1788,44 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
                xl_btree_metadata xlmeta;
                uint8           xlinfo;
                XLogRecPtr      recptr;
-               XLogRecData rdata[4];
-               XLogRecData *nextrdata;
 
-               xlrec.node = rel->rd_node;
+               XLogBeginInsert();
+
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+               if (BufferIsValid(lbuf))
+                       XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+               XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
+               if (target != leafblkno)
+                       XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
 
                /* information on the unlinked block */
-               xlrec.deadblk = target;
                xlrec.leftsib = leftsib;
                xlrec.rightsib = rightsib;
                xlrec.btpo_xact = opaque->btpo.xact;
 
                /* information needed to recreate the leaf block (if not the target) */
-               xlrec.leafblk = leafblkno;
                xlrec.leafleftsib = leafleftsib;
                xlrec.leafrightsib = leafrightsib;
                xlrec.topparent = nextchild;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = SizeOfBtreeUnlinkPage;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = nextrdata = &(rdata[1]);
+               XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
 
                if (BufferIsValid(metabuf))
                {
+                       XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
+
                        xlmeta.root = metad->btm_root;
                        xlmeta.level = metad->btm_level;
                        xlmeta.fastroot = metad->btm_fastroot;
                        xlmeta.fastlevel = metad->btm_fastlevel;
 
-                       nextrdata->data = (char *) &xlmeta;
-                       nextrdata->len = sizeof(xl_btree_metadata);
-                       nextrdata->buffer = InvalidBuffer;
-                       nextrdata->next = nextrdata + 1;
-                       nextrdata++;
+                       XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
                        xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
                }
                else
                        xlinfo = XLOG_BTREE_UNLINK_PAGE;
 
-               nextrdata->data = NULL;
-               nextrdata->len = 0;
-               nextrdata->buffer = rbuf;
-               nextrdata->buffer_std = true;
-               nextrdata->next = NULL;
-
-               if (BufferIsValid(lbuf))
-               {
-                       nextrdata->next = nextrdata + 1;
-                       nextrdata++;
-                       nextrdata->data = NULL;
-                       nextrdata->len = 0;
-                       nextrdata->buffer = lbuf;
-                       nextrdata->buffer_std = true;
-                       nextrdata->next = NULL;
-               }
-
-               recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+               recptr = XLogInsert(RM_BTREE_ID, xlinfo);
 
                if (BufferIsValid(metabuf))
                {
index 13951be62af2e376ee1f669ba81782ec1e93055b..1dd909b2fa21a97827f086ce1ccc44ca2ed17ff9 100644 (file)
@@ -72,17 +72,23 @@ _bt_restore_page(Page page, char *from, int len)
 }
 
 static void
-_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
-                                BlockNumber root, uint32 level,
-                                BlockNumber fastroot, uint32 fastlevel)
+_bt_restore_meta(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
 {
        Buffer          metabuf;
        Page            metapg;
        BTMetaPageData *md;
        BTPageOpaque pageop;
+       xl_btree_metadata *xlrec;
+       char       *ptr;
+       Size            len;
 
-       metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
-       Assert(BufferIsValid(metabuf));
+       XLogReadBufferForRedoExtended(lsn, record, block_id, RBM_ZERO, false,
+                                                                 &metabuf);
+       ptr = XLogRecGetBlockData(record, block_id, &len);
+
+       Assert(len == sizeof(xl_btree_metadata));
+       Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+       xlrec = (xl_btree_metadata *) ptr;
        metapg = BufferGetPage(metabuf);
 
        _bt_pageinit(metapg, BufferGetPageSize(metabuf));
@@ -90,10 +96,10 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
        md = BTPageGetMeta(metapg);
        md->btm_magic = BTREE_MAGIC;
        md->btm_version = BTREE_VERSION;
-       md->btm_root = root;
-       md->btm_level = level;
-       md->btm_fastroot = fastroot;
-       md->btm_fastlevel = fastlevel;
+       md->btm_root = xlrec->root;
+       md->btm_level = xlrec->level;
+       md->btm_fastroot = xlrec->fastroot;
+       md->btm_fastlevel = xlrec->fastlevel;
 
        pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
        pageop->btpo_flags = BTP_META;
@@ -117,18 +123,14 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
  * types that can insert a downlink: insert, split, and newroot.
  */
 static void
-_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
-                                                  int block_index,
-                                                  RelFileNode rnode, BlockNumber cblock)
+_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
 {
        Buffer          buf;
 
-       if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, block_id, &buf) == BLK_NEEDS_REDO)
        {
                Page            page = (Page) BufferGetPage(buf);
                BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
-
                Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
                pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
 
@@ -146,32 +148,6 @@ btree_xlog_insert(bool isleaf, bool ismeta,
        xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
-       char       *datapos;
-       int                     datalen;
-       xl_btree_metadata md;
-       BlockNumber cblkno = 0;
-       int                     main_blk_index;
-
-       datapos = (char *) xlrec + SizeOfBtreeInsert;
-       datalen = record->xl_len - SizeOfBtreeInsert;
-
-       /*
-        * if this insert finishes a split at lower level, extract the block
-        * number of the (left) child.
-        */
-       if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
-       {
-               memcpy(&cblkno, datapos, sizeof(BlockNumber));
-               Assert(cblkno != 0);
-               datapos += sizeof(BlockNumber);
-               datalen -= sizeof(BlockNumber);
-       }
-       if (ismeta)
-       {
-               memcpy(&md, datapos, sizeof(xl_btree_metadata));
-               datapos += sizeof(xl_btree_metadata);
-               datalen -= sizeof(xl_btree_metadata);
-       }
 
        /*
         * Insertion to an internal page finishes an incomplete split at the child
@@ -183,21 +159,15 @@ btree_xlog_insert(bool isleaf, bool ismeta,
         * cannot be updates happening.
         */
        if (!isleaf)
+               _bt_clear_incomplete_split(lsn, record, 1);
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               _bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
-               main_blk_index = 1;
-       }
-       else
-               main_blk_index = 0;
+               Size            datalen;
+               char       *datapos = XLogRecGetBlockData(record, 0, &datalen);
 
-       if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
-                                                         ItemPointerGetBlockNumber(&(xlrec->target.tid)),
-                                                         &buffer) == BLK_NEEDS_REDO)
-       {
                page = BufferGetPage(buffer);
 
-               if (PageAddItem(page, (Item) datapos, datalen,
-                                               ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
+               if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
                                                false, false) == InvalidOffsetNumber)
                        elog(PANIC, "btree_insert_redo: failed to add item");
 
@@ -215,9 +185,7 @@ btree_xlog_insert(bool isleaf, bool ismeta,
         * obsolete link from the metapage.
         */
        if (ismeta)
-               _bt_restore_meta(xlrec->target.node, lsn,
-                                                md.root, md.level,
-                                                md.fastroot, md.fastlevel);
+               _bt_restore_meta(lsn, record, 2);
 }
 
 static void
@@ -231,56 +199,19 @@ btree_xlog_split(bool onleft, bool isroot,
        Page            rpage;
        BTPageOpaque ropaque;
        char       *datapos;
-       int                     datalen;
-       OffsetNumber newitemoff = 0;
-       Item            newitem = NULL;
-       Size            newitemsz = 0;
+       Size            datalen;
        Item            left_hikey = NULL;
        Size            left_hikeysz = 0;
-       BlockNumber cblkno = InvalidBlockNumber;
-
-       datapos = (char *) xlrec + SizeOfBtreeSplit;
-       datalen = record->xl_len - SizeOfBtreeSplit;
-
-       /* Extract newitemoff and newitem, if present */
-       if (onleft)
-       {
-               memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
-               datapos += sizeof(OffsetNumber);
-               datalen -= sizeof(OffsetNumber);
-       }
-       if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
-       {
-               /*
-                * We assume that 16-bit alignment is enough to apply IndexTupleSize
-                * (since it's fetching from a uint16 field) and also enough for
-                * PageAddItem to insert the tuple.
-                */
-               newitem = (Item) datapos;
-               newitemsz = MAXALIGN(IndexTupleSize(newitem));
-               datapos += newitemsz;
-               datalen -= newitemsz;
-       }
-
-       /* Extract left hikey and its size (still assuming 16-bit alignment) */
-       if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
-       {
-               left_hikey = (Item) datapos;
-               left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
-               datapos += left_hikeysz;
-               datalen -= left_hikeysz;
-       }
-
-       /*
-        * If this insertion finishes an incomplete split, get the block number of
-        * the child.
-        */
-       if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
-       {
-               memcpy(&cblkno, datapos, sizeof(BlockNumber));
-               datapos += sizeof(BlockNumber);
-               datalen -= sizeof(BlockNumber);
-       }
+       BlockNumber     leftsib;
+       BlockNumber     rightsib;
+       BlockNumber     rnext;
+
+       XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
+       if (XLogRecHasBlockRef(record, 2))
+               XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext);
+       else
+               rnext = P_NONE;
 
        /*
         * Clear the incomplete split flag on the left sibling of the child page
@@ -288,18 +219,18 @@ btree_xlog_split(bool onleft, bool isroot,
         * before locking the other pages)
         */
        if (!isleaf)
-               _bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
+               _bt_clear_incomplete_split(lsn, record, 3);
 
        /* Reconstruct right (new) sibling page from scratch */
-       rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
-       Assert(BufferIsValid(rbuf));
+       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &rbuf);
+       datapos = XLogRecGetBlockData(record, 1, &datalen);
        rpage = (Page) BufferGetPage(rbuf);
 
        _bt_pageinit(rpage, BufferGetPageSize(rbuf));
        ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
 
-       ropaque->btpo_prev = xlrec->leftsib;
-       ropaque->btpo_next = xlrec->rnext;
+       ropaque->btpo_prev = leftsib;
+       ropaque->btpo_next = rnext;
        ropaque->btpo.level = xlrec->level;
        ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
        ropaque->btpo_cycleid = 0;
@@ -324,8 +255,7 @@ btree_xlog_split(bool onleft, bool isroot,
        /* don't release the buffer yet; we touch right page's first item below */
 
        /* Now reconstruct left (original) sibling page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
-                                                         &lbuf) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &lbuf) == BLK_NEEDS_REDO)
        {
                /*
                 * To retain the same physical order of the tuples that they had, we
@@ -339,9 +269,31 @@ btree_xlog_split(bool onleft, bool isroot,
                Page            lpage = (Page) BufferGetPage(lbuf);
                BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
                OffsetNumber off;
+               Item            newitem;
+               Size            newitemsz = 0;
                Page            newlpage;
                OffsetNumber leftoff;
 
+               datapos = XLogRecGetBlockData(record, 0, &datalen);
+
+               if (onleft)
+               {
+                       newitem = (Item) datapos;
+                       newitemsz = MAXALIGN(IndexTupleSize(newitem));
+                       datapos += newitemsz;
+                       datalen -= newitemsz;
+               }
+
+               /* Extract left hikey and its size (assuming 16-bit alignment) */
+               if (!isleaf)
+               {
+                       left_hikey = (Item) datapos;
+                       left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
+                       datapos += left_hikeysz;
+                       datalen -= left_hikeysz;
+               }
+               Assert(datalen == 0);
+
                newlpage = PageGetTempPageCopySpecial(lpage);
 
                /* Set high key */
@@ -358,7 +310,7 @@ btree_xlog_split(bool onleft, bool isroot,
                        Item            item;
 
                        /* add the new item if it was inserted on left page */
-                       if (onleft && off == newitemoff)
+                       if (onleft && off == xlrec->newitemoff)
                        {
                                if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
                                                                false, false) == InvalidOffsetNumber)
@@ -376,7 +328,7 @@ btree_xlog_split(bool onleft, bool isroot,
                }
 
                /* cope with possibility that newitem goes at the end */
-               if (onleft && off == newitemoff)
+               if (onleft && off == xlrec->newitemoff)
                {
                        if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
                                                        false, false) == InvalidOffsetNumber)
@@ -390,7 +342,7 @@ btree_xlog_split(bool onleft, bool isroot,
                lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
                if (isleaf)
                        lopaque->btpo_flags |= BTP_LEAF;
-               lopaque->btpo_next = xlrec->rightsib;
+               lopaque->btpo_next = rightsib;
                lopaque->btpo_cycleid = 0;
 
                PageSetLSN(lpage, lsn);
@@ -410,22 +362,16 @@ btree_xlog_split(bool onleft, bool isroot,
         * replay, because no other index update can be in progress, and readers
         * will cope properly when following an obsolete left-link.
         */
-       if (xlrec->rnext != P_NONE)
+       if (rnext != P_NONE)
        {
-               /*
-                * the backup block containing right sibling is 1 or 2, depending
-                * whether this was a leaf or internal page.
-                */
-               int                     rnext_index = isleaf ? 1 : 2;
                Buffer          buffer;
 
-               if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
-                                                                 xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
                {
                        Page            page = (Page) BufferGetPage(buffer);
                        BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
-                       pageop->btpo_prev = xlrec->rightsib;
+                       pageop->btpo_prev = rightsib;
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
@@ -466,9 +412,13 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
         */
        if (HotStandbyActiveInReplay())
        {
+               RelFileNode thisrnode;
+               BlockNumber thisblkno;
                BlockNumber blkno;
 
-               for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++)
+               XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
+
+               for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
                {
                        /*
                         * We use RBM_NORMAL_NO_LOG mode because it's not an error
@@ -483,7 +433,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
                         * buffer manager we could optimise this so that if the block is
                         * not in shared_buffers we confirm it as unpinned.
                         */
-                       buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno,
+                       buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
                                                                                        RBM_NORMAL_NO_LOG);
                        if (BufferIsValid(buffer))
                        {
@@ -497,20 +447,23 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
         * Like in btvacuumpage(), we need to take a cleanup lock on every leaf
         * page. See nbtree/README for details.
         */
-       if (XLogReadBufferForRedoExtended(lsn, record, 0,
-                                                                         xlrec->node, MAIN_FORKNUM, xlrec->block,
-                                                                         RBM_NORMAL, true, &buffer)
+       if (XLogReadBufferForRedoExtended(lsn, record, 0, RBM_NORMAL, true, &buffer)
                == BLK_NEEDS_REDO)
        {
+               char       *ptr;
+               Size            len;
+
+               ptr = XLogRecGetBlockData(record, 0, &len);
+
                page = (Page) BufferGetPage(buffer);
 
-               if (record->xl_len > SizeOfBtreeVacuum)
+               if (len > 0)
                {
                        OffsetNumber *unused;
                        OffsetNumber *unend;
 
-                       unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
-                       unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
+                       unused = (OffsetNumber *) ptr;
+                       unend = (OffsetNumber *) ((char *) ptr + len);
 
                        if ((unend - unused) > 0)
                                PageIndexMultiDelete(page, unused, unend - unused);
@@ -542,13 +495,16 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
  * XXX optimise later with something like XLogPrefetchBuffer()
  */
 static TransactionId
-btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
+btree_xlog_delete_get_latestRemovedXid(XLogRecord *record)
 {
+       xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
        OffsetNumber *unused;
        Buffer          ibuffer,
                                hbuffer;
        Page            ipage,
                                hpage;
+       RelFileNode rnode;
+       BlockNumber blkno;
        ItemId          iitemid,
                                hitemid;
        IndexTuple      itup;
@@ -588,9 +544,11 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
         * InvalidTransactionId to cancel all HS transactions.  That's probably
         * overkill, but it's safe, and certainly better than panicking here.
         */
-       ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+       XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+       ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
        if (!BufferIsValid(ibuffer))
                return InvalidTransactionId;
+       LockBuffer(ibuffer, BT_READ);
        ipage = (Page) BufferGetPage(ibuffer);
 
        /*
@@ -611,12 +569,13 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
                 * Locate the heap page that the index tuple points at
                 */
                hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-               hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
+               hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
                if (!BufferIsValid(hbuffer))
                {
                        UnlockReleaseBuffer(ibuffer);
                        return InvalidTransactionId;
                }
+               LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
                hpage = (Page) BufferGetPage(hbuffer);
 
                /*
@@ -698,17 +657,19 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
         */
        if (InHotStandby)
        {
-               TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
+               TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
+               RelFileNode rnode;
 
-               ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
+               XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+
+               ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
        }
 
        /*
         * We don't need to take a cleanup lock to apply these changes. See
         * nbtree/README for details.
         */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
 
@@ -739,14 +700,11 @@ static void
 btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
 {
        xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
-       BlockNumber parent;
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
        IndexTupleData trunctuple;
 
-       parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-
        /*
         * In normal operation, we would lock all the pages this WAL record
         * touches before changing any of them.  In WAL replay, it should be okay
@@ -756,8 +714,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* parent page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
        {
                OffsetNumber poffset;
                ItemId          itemid;
@@ -768,7 +725,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
-               poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+               poffset = xlrec->poffset;
 
                nextoffset = OffsetNumberNext(poffset);
                itemid = PageGetItemId(page, nextoffset);
@@ -788,8 +745,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* Rewrite the leaf page as a halfdead page */
-       buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -825,14 +781,12 @@ static void
 btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
 {
        xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
-       BlockNumber target;
        BlockNumber leftsib;
        BlockNumber rightsib;
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
 
-       target = xlrec->deadblk;
        leftsib = xlrec->leftsib;
        rightsib = xlrec->rightsib;
 
@@ -845,8 +799,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* Fix left-link of right sibling */
-       if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
-               == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
        {
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -861,8 +814,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
        /* Fix right-link of left sibling, if any */
        if (leftsib != P_NONE)
        {
-               if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
-                       == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        page = (Page) BufferGetPage(buffer);
                        pageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -876,8 +828,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* Rewrite target page as empty deleted page */
-       buffer = XLogReadBuffer(xlrec->node, target, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -898,7 +849,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
         * itself, update the leaf to point to the next remaining child in the
         * branch.
         */
-       if (target != xlrec->leafblk)
+       if (XLogRecHasBlockRef(record, 3))
        {
                /*
                 * There is no real data on the page, so we just re-create it from
@@ -906,8 +857,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
                 */
                IndexTupleData trunctuple;
 
-               buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true);
-               Assert(BufferIsValid(buffer));
+               XLogReadBufferForRedoExtended(lsn, record, 3, RBM_ZERO, false, &buffer);
                page = (Page) BufferGetPage(buffer);
                pageop = (BTPageOpaque) PageGetSpecialPointer(page);
 
@@ -936,15 +886,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
 
        /* Update metapage if needed */
        if (info == XLOG_BTREE_UNLINK_PAGE_META)
-       {
-               xl_btree_metadata md;
-
-               memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
-                          sizeof(xl_btree_metadata));
-               _bt_restore_meta(xlrec->node, lsn,
-                                                md.root, md.level,
-                                                md.fastroot, md.fastlevel);
-       }
+               _bt_restore_meta(lsn, record, 4);
 }
 
 static void
@@ -954,9 +896,10 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
        Buffer          buffer;
        Page            page;
        BTPageOpaque pageop;
+       char       *ptr;
+       Size            len;
 
-       buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = (Page) BufferGetPage(buffer);
 
        _bt_pageinit(page, BufferGetPageSize(buffer));
@@ -969,30 +912,20 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
                pageop->btpo_flags |= BTP_LEAF;
        pageop->btpo_cycleid = 0;
 
-       if (record->xl_len > SizeOfBtreeNewroot)
+       if (xlrec->level > 0)
        {
-               IndexTuple      itup;
-               BlockNumber cblkno;
-
-               _bt_restore_page(page,
-                                                (char *) xlrec + SizeOfBtreeNewroot,
-                                                record->xl_len - SizeOfBtreeNewroot);
-               /* extract block number of the left-hand split page */
-               itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
-               cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
-               Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+               ptr = XLogRecGetBlockData(record, 0, &len);
+               _bt_restore_page(page, ptr, len);
 
                /* Clear the incomplete-split flag in left child */
-               _bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
+               _bt_clear_incomplete_split(lsn, record, 1);
        }
 
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       _bt_restore_meta(xlrec->node, lsn,
-                                        xlrec->rootblk, xlrec->level,
-                                        xlrec->rootblk, xlrec->level);
+       _bt_restore_meta(lsn, record, 2);
 }
 
 static void
@@ -1015,9 +948,6 @@ btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
                ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
                                                                                        xlrec->node);
        }
-
-       /* Backup blocks are not used in reuse_page records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
 }
 
 
index 97dc3c0fa9115290db63ae412fd047d2601c6596..ffc70aea3dd8471ca68f2449338a42e410e81cf0 100644 (file)
@@ -27,54 +27,39 @@ brin_desc(StringInfo buf, XLogRecord *record)
        {
                xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
 
-               appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u",
-                                                xlrec->version, xlrec->pagesPerRange,
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode);
+               appendStringInfo(buf, "v%d pagesPerRange %u",
+                                                xlrec->version, xlrec->pagesPerRange);
        }
        else if (info == XLOG_BRIN_INSERT)
        {
                xl_brin_insert *xlrec = (xl_brin_insert *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode,
-                                                xlrec->heapBlk, xlrec->revmapBlk,
+               appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
+                                                xlrec->heapBlk,
                                                 xlrec->pagesPerRange,
-                                                ItemPointerGetBlockNumber(&xlrec->tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->tid));
+                                                xlrec->offnum);
        }
        else if (info == XLOG_BRIN_UPDATE)
        {
                xl_brin_update *xlrec = (xl_brin_update *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
-                                                xlrec->insert.node.spcNode, xlrec->insert.node.dbNode,
-                                                xlrec->insert.node.relNode,
-                                                xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
+               appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
+                                                xlrec->insert.heapBlk,
                                                 xlrec->insert.pagesPerRange,
-                                                ItemPointerGetBlockNumber(&xlrec->oldtid),
-                                                ItemPointerGetOffsetNumber(&xlrec->oldtid),
-                                                ItemPointerGetBlockNumber(&xlrec->insert.tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->insert.tid));
+                                                xlrec->oldOffnum,
+                                                xlrec->insert.offnum);
        }
        else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
        {
                xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode,
-                                                ItemPointerGetBlockNumber(&xlrec->tid),
-                                                ItemPointerGetOffsetNumber(&xlrec->tid));
+               appendStringInfo(buf, "offnum %u", xlrec->offnum);
        }
        else if (info == XLOG_BRIN_REVMAP_EXTEND)
        {
                xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u targetBlk %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->targetBlk);
+               appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
        }
 }
 
index 2f783cee2bbd77477f274d91103d9d1a0dd418af..a4dc755bca4b0956fe8f08fa864f594426af7912 100644 (file)
 #include "postgres.h"
 
 #include "access/gin_private.h"
+#include "access/xlogutils.h"
 #include "lib/stringinfo.h"
 #include "storage/relfilenode.h"
 
-static void
-desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
-{
-       appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
-                                        node.spcNode, node.dbNode, node.relNode, blkno);
-}
-
 static void
 desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
 {
@@ -85,18 +79,17 @@ gin_desc(StringInfo buf, XLogRecord *record)
        switch (info)
        {
                case XLOG_GIN_CREATE_INDEX:
-                       desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
+                       /* no further information */
                        break;
                case XLOG_GIN_CREATE_PTREE:
-                       desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_INSERT:
                        {
                                ginxlogInsert *xlrec = (ginxlogInsert *) rec;
                                char       *payload = rec + sizeof(ginxlogInsert);
 
-                               desc_node(buf, xlrec->node, xlrec->blkno);
-                               appendStringInfo(buf, " isdata: %c isleaf: %c",
+                               appendStringInfo(buf, "isdata: %c isleaf: %c",
                                                          (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
                                                         (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
                                if (!(xlrec->flags & GIN_INSERT_ISLEAF))
@@ -119,7 +112,7 @@ gin_desc(StringInfo buf, XLogRecord *record)
                                        ginxlogRecompressDataLeaf *insertData =
                                        (ginxlogRecompressDataLeaf *) payload;
 
-                                       if (record->xl_info & XLR_BKP_BLOCK(0))
+                                       if (XLogRecHasBlockImage(record, 0))
                                                appendStringInfo(buf, " (full page image)");
                                        else
                                                desc_recompress_leaf(buf, insertData);
@@ -139,39 +132,38 @@ gin_desc(StringInfo buf, XLogRecord *record)
                        {
                                ginxlogSplit *xlrec = (ginxlogSplit *) rec;
 
-                               desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
-                               appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
+                               appendStringInfo(buf, "isrootsplit: %c",
+                                                                (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
                                appendStringInfo(buf, " isdata: %c isleaf: %c",
                                                          (xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
                                                         (xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
                        }
                        break;
                case XLOG_GIN_VACUUM_PAGE:
-                       desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
                        {
                                ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
 
-                               desc_node(buf, xlrec->node, xlrec->blkno);
-                               if (record->xl_info & XLR_BKP_BLOCK(0))
+                               if (XLogRecHasBlockImage(record, 0))
                                        appendStringInfo(buf, " (full page image)");
                                else
                                        desc_recompress_leaf(buf, &xlrec->data);
                        }
                        break;
                case XLOG_GIN_DELETE_PAGE:
-                       desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_UPDATE_META_PAGE:
-                       desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO);
+                       /* no further information */
                        break;
                case XLOG_GIN_INSERT_LISTPAGE:
-                       desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_GIN_DELETE_LISTPAGE:
-                       appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted);
-                       desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO);
+                       appendStringInfo(buf, "ndeleted: %d",
+                                                        ((ginxlogDeleteListPages *) rec)->ndeleted);
                        break;
        }
 }
index db3ba13ccdd0bcf0f37ee29a83e597ae126d4720..931f15a3e026d46534e40dbf5a4629441d21f6ae 100644 (file)
 #include "lib/stringinfo.h"
 #include "storage/relfilenode.h"
 
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
-       appendStringInfo(buf, "rel %u/%u/%u",
-                                        node.spcNode, node.dbNode, node.relNode);
-}
-
 static void
 out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
 {
-       out_target(buf, xlrec->node);
-       appendStringInfo(buf, "; block number %u", xlrec->blkno);
 }
 
 static void
 out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
 {
-       appendStringInfoString(buf, "page_split: ");
-       out_target(buf, xlrec->node);
-       appendStringInfo(buf, "; block number %u splits to %d pages",
-                                        xlrec->origblkno, xlrec->npage);
+       appendStringInfo(buf, "page_split: splits to %d pages",
+                                        xlrec->npage);
 }
 
 void
index ee2c073f71f73564f113f7da8166063cba2db48d..76ff2a14a439c27cc478804375479ab134e6efba 100644 (file)
 
 #include "access/heapam_xlog.h"
 
-static void
-out_target(StringInfo buf, xl_heaptid *target)
-{
-       appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
-                        target->node.spcNode, target->node.dbNode, target->node.relNode,
-                                        ItemPointerGetBlockNumber(&(target->tid)),
-                                        ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
 static void
 out_infobits(StringInfo buf, uint8 infobits)
 {
@@ -51,13 +42,13 @@ heap_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_insert *xlrec = (xl_heap_insert *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
        else if (info == XLOG_HEAP_DELETE)
        {
                xl_heap_delete *xlrec = (xl_heap_delete *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
                appendStringInfoChar(buf, ' ');
                out_infobits(buf, xlrec->infobits_set);
        }
@@ -65,24 +56,24 @@ heap_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_update *xlrec = (xl_heap_update *) rec;
 
-               out_target(buf, &(xlrec->target));
-               appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+               appendStringInfo(buf, "off %u xmax %u",
+                                                xlrec->old_offnum,
+                                                xlrec->old_xmax);
                out_infobits(buf, xlrec->old_infobits_set);
-               appendStringInfo(buf, "; new tid %u/%u xmax %u",
-                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+               appendStringInfo(buf, "; new off %u xmax %u",
+                                                xlrec->new_offnum,
                                                 xlrec->new_xmax);
        }
        else if (info == XLOG_HEAP_HOT_UPDATE)
        {
                xl_heap_update *xlrec = (xl_heap_update *) rec;
 
-               out_target(buf, &(xlrec->target));
-               appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+               appendStringInfo(buf, "off %u xmax %u",
+                                                xlrec->old_offnum,
+                                                xlrec->old_xmax);
                out_infobits(buf, xlrec->old_infobits_set);
-               appendStringInfo(buf, "; new tid %u/%u xmax %u",
-                                                ItemPointerGetBlockNumber(&(xlrec->newtid)),
-                                                ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+               appendStringInfo(buf, "; new off %u xmax %u",
+                                                xlrec->new_offnum,
                                                 xlrec->new_xmax);
        }
        else if (info == XLOG_HEAP_LOCK)
@@ -90,15 +81,14 @@ heap_desc(StringInfo buf, XLogRecord *record)
                xl_heap_lock *xlrec = (xl_heap_lock *) rec;
 
                appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
-               out_target(buf, &(xlrec->target));
-               appendStringInfoChar(buf, ' ');
+               appendStringInfo(buf, "off %u ", xlrec->offnum);
                out_infobits(buf, xlrec->infobits_set);
        }
        else if (info == XLOG_HEAP_INPLACE)
        {
                xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
 }
 void
@@ -112,18 +102,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_clean *xlrec = (xl_heap_clean *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block,
-                                                xlrec->latestRemovedXid);
+               appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
        }
        else if (info == XLOG_HEAP2_FREEZE_PAGE)
        {
                xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block,
+               appendStringInfo(buf, "cutoff xid %u ntuples %u",
                                                 xlrec->cutoff_xid, xlrec->ntuples);
        }
        else if (info == XLOG_HEAP2_CLEANUP_INFO)
@@ -136,17 +121,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
        {
                xl_heap_visible *xlrec = (xl_heap_visible *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u",
-                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                xlrec->node.relNode, xlrec->block);
+               appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
        }
        else if (info == XLOG_HEAP2_MULTI_INSERT)
        {
                xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
 
-               appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
-                               xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
-                                                xlrec->blkno, xlrec->ntuples);
+               appendStringInfo(buf, "%d tuples", xlrec->ntuples);
        }
        else if (info == XLOG_HEAP2_LOCK_UPDATED)
        {
@@ -154,13 +135,18 @@ heap2_desc(StringInfo buf, XLogRecord *record)
 
                appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
                                                 xlrec->infobits_set);
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "off %u", xlrec->offnum);
        }
        else if (info == XLOG_HEAP2_NEW_CID)
        {
                xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
 
-               out_target(buf, &(xlrec->target));
+               appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
+                                                xlrec->target_node.spcNode,
+                                                xlrec->target_node.dbNode,
+                                                xlrec->target_node.relNode,
+                                                ItemPointerGetBlockNumber(&(xlrec->target_tid)),
+                                                ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
                appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
                                                 xlrec->cmin, xlrec->cmax, xlrec->combocid);
        }
index 8b63f2b6ba9fbc2570456dfc860648fb1acb8d79..9b89cc2a4ccfccf8c6c6f3146b66201260d95e0c 100644 (file)
 
 #include "access/nbtree.h"
 
-static void
-out_target(StringInfo buf, xl_btreetid *target)
-{
-       appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
-                        target->node.spcNode, target->node.dbNode, target->node.relNode,
-                                        ItemPointerGetBlockNumber(&(target->tid)),
-                                        ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
 void
 btree_desc(StringInfo buf, XLogRecord *record)
 {
@@ -39,7 +30,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_insert *xlrec = (xl_btree_insert *) rec;
 
-                               out_target(buf, &(xlrec->target));
+                               appendStringInfo(buf, "off %u", xlrec->offnum);
                                break;
                        }
                case XLOG_BTREE_SPLIT_L:
@@ -49,11 +40,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_split *xlrec = (xl_btree_split *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u ",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode);
-                               appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
-                                                                xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
+                               appendStringInfo(buf, "level %u, firstright %d",
                                                                 xlrec->level, xlrec->firstright);
                                break;
                        }
@@ -61,9 +48,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode, xlrec->block,
+                               appendStringInfo(buf, "lastBlockVacuumed %u",
                                                                 xlrec->lastBlockVacuumed);
                                break;
                        }
@@ -71,18 +56,14 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_delete *xlrec = (xl_btree_delete *) rec;
 
-                               appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
-                                                                xlrec->block,
-                                                                xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
+                               appendStringInfo(buf, "%d items", xlrec->nitems);
                                break;
                        }
                case XLOG_BTREE_MARK_PAGE_HALFDEAD:
                        {
                                xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
 
-                               out_target(buf, &(xlrec->target));
-                               appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
+                               appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
                                                                 xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
                                break;
                        }
@@ -91,22 +72,17 @@ btree_desc(StringInfo buf, XLogRecord *record)
                        {
                                xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; ",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
-                               appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
-                                                                xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
-                               appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
-                                                                xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
+                               appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
+                                                                xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
+                               appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
+                                                                xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
                                break;
                        }
                case XLOG_BTREE_NEWROOT:
                        {
                                xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
 
-                               appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u",
-                                                                xlrec->node.spcNode, xlrec->node.dbNode,
-                                                                xlrec->node.relNode,
-                                                                xlrec->rootblk, xlrec->level);
+                               appendStringInfo(buf, "lev %u", xlrec->level);
                                break;
                        }
                case XLOG_BTREE_REUSE_PAGE:
index 3ee0427dcb6a798a237ae411ac1230a59fa0ce69..6c16776d3ebe39661189f43600225275df1db975 100644 (file)
 
 #include "access/spgist_private.h"
 
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
-       appendStringInfo(buf, "rel %u/%u/%u ",
-                                        node.spcNode, node.dbNode, node.relNode);
-}
-
 void
 spg_desc(StringInfo buf, XLogRecord *record)
 {
@@ -38,48 +31,55 @@ spg_desc(StringInfo buf, XLogRecord *record)
                                                         ((RelFileNode *) rec)->relNode);
                        break;
                case XLOG_SPGIST_ADD_LEAF:
-                       out_target(buf, ((spgxlogAddLeaf *) rec)->node);
-                       appendStringInfo(buf, "%u",
-                                                        ((spgxlogAddLeaf *) rec)->blknoLeaf);
+                       {
+                               spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
+
+                               appendStringInfo(buf, "add leaf to page");
+                               appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
+                                                                xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
+                                                                xlrec->offnumParent);
+                               if (xlrec->newPage)
+                                       appendStringInfo(buf, " (newpage)");
+                               if (xlrec->storesNulls)
+                                       appendStringInfo(buf, " (nulls)");
+                       }
                        break;
                case XLOG_SPGIST_MOVE_LEAFS:
-                       out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
-                       appendStringInfo(buf, "%u leafs from page %u to page %u",
-                                                        ((spgxlogMoveLeafs *) rec)->nMoves,
-                                                        ((spgxlogMoveLeafs *) rec)->blknoSrc,
-                                                        ((spgxlogMoveLeafs *) rec)->blknoDst);
+                       appendStringInfo(buf, "%u leafs",
+                                                        ((spgxlogMoveLeafs *) rec)->nMoves);
                        break;
                case XLOG_SPGIST_ADD_NODE:
-                       out_target(buf, ((spgxlogAddNode *) rec)->node);
-                       appendStringInfo(buf, "%u:%u",
-                                                        ((spgxlogAddNode *) rec)->blkno,
+                       appendStringInfo(buf, "off %u",
                                                         ((spgxlogAddNode *) rec)->offnum);
                        break;
                case XLOG_SPGIST_SPLIT_TUPLE:
-                       out_target(buf, ((spgxlogSplitTuple *) rec)->node);
-                       appendStringInfo(buf, "%u:%u to %u:%u",
-                                                        ((spgxlogSplitTuple *) rec)->blknoPrefix,
+                       appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
                                                         ((spgxlogSplitTuple *) rec)->offnumPrefix,
-                                                        ((spgxlogSplitTuple *) rec)->blknoPostfix,
-                                                        ((spgxlogSplitTuple *) rec)->offnumPostfix);
+                                                        ((spgxlogSplitTuple *) rec)->offnumPostfix,
+                                                        ((spgxlogSplitTuple *) rec)->postfixBlkSame,
+                                                        ((spgxlogSplitTuple *) rec)->newPage
+                               );
                        break;
                case XLOG_SPGIST_PICKSPLIT:
-                       out_target(buf, ((spgxlogPickSplit *) rec)->node);
+                       {
+                               spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
+
+                               appendStringInfo(buf, "ndel %u; nins %u",
+                                                                xlrec->nDelete, xlrec->nInsert);
+                               if (xlrec->innerIsParent)
+                                       appendStringInfo(buf, " (innerIsParent)");
+                               if (xlrec->isRootSplit)
+                                       appendStringInfo(buf, " (isRootSplit)");
+                       }
                        break;
                case XLOG_SPGIST_VACUUM_LEAF:
-                       out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
-                       appendStringInfo(buf, "page %u",
-                                                        ((spgxlogVacuumLeaf *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_SPGIST_VACUUM_ROOT:
-                       out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
-                       appendStringInfo(buf, "page %u",
-                                                        ((spgxlogVacuumRoot *) rec)->blkno);
+                       /* no further information */
                        break;
                case XLOG_SPGIST_VACUUM_REDIRECT:
-                       out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
-                       appendStringInfo(buf, "page %u, newest XID %u",
-                                                        ((spgxlogVacuumRedirect *) rec)->blkno,
+                       appendStringInfo(buf, "newest XID %u",
                                                 ((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
                        break;
        }
index e0957ff3a8ce433c091772e313dffffe51191c94..06edb0f45cf19fca5ba00ebd0eec04c441c4007c 100644 (file)
@@ -76,11 +76,7 @@ xlog_desc(StringInfo buf, XLogRecord *record)
        }
        else if (info == XLOG_FPI)
        {
-               BkpBlock   *bkp = (BkpBlock *) rec;
-
-               appendStringInfo(buf, "%s block %u",
-                                                relpathperm(bkp->node, bkp->fork),
-                                                bkp->block);
+               /* no further information to print */
        }
        else if (info == XLOG_BACKUP_END)
        {
index 21a071ab19932179758fb6b0e19009aa625043d6..59944b223d044076d2cbcbd938ad4a5bc3d4098b 100644 (file)
@@ -16,8 +16,8 @@
 #include "postgres.h"
 
 #include "access/genam.h"
-#include "access/xloginsert.h"
 #include "access/spgist_private.h"
+#include "access/xloginsert.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "utils/rel.h"
@@ -202,25 +202,17 @@ static void
 addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
                   SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
 {
-       XLogRecData rdata[4];
        spgxlogAddLeaf xlrec;
 
-       xlrec.node = index->rd_node;
-       xlrec.blknoLeaf = current->blkno;
        xlrec.newPage = isNew;
        xlrec.storesNulls = isNulls;
 
        /* these will be filled below as needed */
        xlrec.offnumLeaf = InvalidOffsetNumber;
        xlrec.offnumHeadLeaf = InvalidOffsetNumber;
-       xlrec.blknoParent = InvalidBlockNumber;
        xlrec.offnumParent = InvalidOffsetNumber;
        xlrec.nodeI = 0;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
-       ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
        START_CRIT_SECTION();
 
        if (current->offnum == InvalidOffsetNumber ||
@@ -237,13 +229,10 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
                /* Must update parent's downlink if any */
                if (parent->buffer != InvalidBuffer)
                {
-                       xlrec.blknoParent = parent->blkno;
                        xlrec.offnumParent = parent->offnum;
                        xlrec.nodeI = parent->node;
 
                        saveNodeLink(index, parent, current->blkno, current->offnum);
-
-                       ACCEPT_RDATA_BUFFER(parent->buffer, 3);
                }
        }
        else
@@ -303,12 +292,20 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+               XLogRegisterData((char *) leafTuple, leafTuple->size);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               if (xlrec.offnumParent != InvalidOffsetNumber)
+                       XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
 
                PageSetLSN(current->page, recptr);
 
                /* update parent only if we actually changed it */
-               if (xlrec.blknoParent != InvalidBlockNumber)
+               if (xlrec.offnumParent != InvalidOffsetNumber)
                {
                        PageSetLSN(parent->page, recptr);
                }
@@ -399,7 +396,6 @@ moveLeafs(Relation index, SpGistState *state,
        OffsetNumber *toDelete;
        OffsetNumber *toInsert;
        BlockNumber nblkno;
-       XLogRecData rdata[7];
        spgxlogMoveLeafs xlrec;
        char       *leafdata,
                           *leafptr;
@@ -455,20 +451,6 @@ moveLeafs(Relation index, SpGistState *state,
        nblkno = BufferGetBlockNumber(nbuf);
        Assert(nblkno != current->blkno);
 
-       /* prepare WAL info */
-       xlrec.node = index->rd_node;
-       STORE_STATE(state, xlrec.stateSrc);
-
-       xlrec.blknoSrc = current->blkno;
-       xlrec.blknoDst = nblkno;
-       xlrec.nMoves = nDelete;
-       xlrec.replaceDead = replaceDead;
-       xlrec.storesNulls = isNulls;
-
-       xlrec.blknoParent = parent->blkno;
-       xlrec.offnumParent = parent->offnum;
-       xlrec.nodeI = parent->node;
-
        leafdata = leafptr = palloc(size);
 
        START_CRIT_SECTION();
@@ -533,15 +515,29 @@ moveLeafs(Relation index, SpGistState *state,
        {
                XLogRecPtr      recptr;
 
-               ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
-               ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
-               ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
-               ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
-               ACCEPT_RDATA_BUFFER(current->buffer, 4);
-               ACCEPT_RDATA_BUFFER(nbuf, 5);
-               ACCEPT_RDATA_BUFFER(parent->buffer, 6);
+               /* prepare WAL info */
+               STORE_STATE(state, xlrec.stateSrc);
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
+               xlrec.nMoves = nDelete;
+               xlrec.replaceDead = replaceDead;
+               xlrec.storesNulls = isNulls;
+
+               xlrec.offnumParent = parent->offnum;
+               xlrec.nodeI = parent->node;
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * nDelete);
+               XLogRegisterData((char *) toInsert,
+                                                sizeof(OffsetNumber) * nInsert);
+               XLogRegisterData((char *) leafdata, leafptr - leafdata);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
+               XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
 
                PageSetLSN(current->page, recptr);
                PageSetLSN(npage, recptr);
@@ -701,8 +697,6 @@ doPickSplit(Relation index, SpGistState *state,
        int                     currentFreeSpace;
        int                     totalLeafSizes;
        bool            allTheSame;
-       XLogRecData rdata[10];
-       int                     nRdata;
        spgxlogPickSplit xlrec;
        char       *leafdata,
                           *leafptr;
@@ -725,7 +719,6 @@ doPickSplit(Relation index, SpGistState *state,
        newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
        leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
 
-       xlrec.node = index->rd_node;
        STORE_STATE(state, xlrec.stateSrc);
 
        /*
@@ -971,10 +964,6 @@ doPickSplit(Relation index, SpGistState *state,
        }
 
        /*
-        * Because a WAL record can't involve more than four buffers, we can only
-        * afford to deal with two leaf pages in each picksplit action, ie the
-        * current page and at most one other.
-        *
         * The new leaf tuples converted from the existing ones should require the
         * same or less space, and therefore should all fit onto one page
         * (although that's not necessarily the current page, since we can't
@@ -1108,17 +1097,13 @@ doPickSplit(Relation index, SpGistState *state,
        }
 
        /* Start preparing WAL record */
-       xlrec.blknoSrc = current->blkno;
-       xlrec.blknoDest = InvalidBlockNumber;
        xlrec.nDelete = 0;
        xlrec.initSrc = isNew;
        xlrec.storesNulls = isNulls;
+       xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
 
        leafdata = leafptr = (char *) palloc(totalLeafSizes);
 
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
-       nRdata = 1;
-
        /* Here we begin making the changes to the target pages */
        START_CRIT_SECTION();
 
@@ -1150,12 +1135,6 @@ doPickSplit(Relation index, SpGistState *state,
                else
                {
                        xlrec.nDelete = nToDelete;
-                       ACCEPT_RDATA_DATA(toDelete,
-                                                         sizeof(OffsetNumber) * nToDelete,
-                                                         nRdata);
-                       nRdata++;
-                       ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
-                       nRdata++;
 
                        if (!state->isBuild)
                        {
@@ -1240,25 +1219,8 @@ doPickSplit(Relation index, SpGistState *state,
        if (newLeafBuffer != InvalidBuffer)
        {
                MarkBufferDirty(newLeafBuffer);
-               /* also save block number for WAL */
-               xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
-               if (!xlrec.initDest)
-               {
-                       ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
-                       nRdata++;
-               }
        }
 
-       xlrec.nInsert = nToInsert;
-       ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
-       nRdata++;
-       ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
-       nRdata++;
-
        /* Remember current buffer, since we're about to change "current" */
        saveCurrent = *current;
 
@@ -1276,7 +1238,6 @@ doPickSplit(Relation index, SpGistState *state,
                current->blkno = parent->blkno;
                current->buffer = parent->buffer;
                current->page = parent->page;
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) innerTuple, innerTuple->size,
@@ -1285,14 +1246,11 @@ doPickSplit(Relation index, SpGistState *state,
                /*
                 * Update parent node link and mark parent page dirty
                 */
-               xlrec.blknoParent = parent->blkno;
+               xlrec.innerIsParent = true;
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
                saveNodeLink(index, parent, current->blkno, current->offnum);
 
-               ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
-               nRdata++;
-
                /*
                 * Update redirection link (in old current buffer)
                 */
@@ -1314,7 +1272,6 @@ doPickSplit(Relation index, SpGistState *state,
                current->buffer = newInnerBuffer;
                current->blkno = BufferGetBlockNumber(current->buffer);
                current->page = BufferGetPage(current->buffer);
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) innerTuple, innerTuple->size,
@@ -1326,16 +1283,11 @@ doPickSplit(Relation index, SpGistState *state,
                /*
                 * Update parent node link and mark parent page dirty
                 */
-               xlrec.blknoParent = parent->blkno;
+               xlrec.innerIsParent = (parent->buffer == current->buffer);
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
                saveNodeLink(index, parent, current->blkno, current->offnum);
 
-               ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
-               nRdata++;
-               ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
-               nRdata++;
-
                /*
                 * Update redirection link (in old current buffer)
                 */
@@ -1357,8 +1309,8 @@ doPickSplit(Relation index, SpGistState *state,
 
                SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
                xlrec.initInner = true;
+               xlrec.innerIsParent = false;
 
-               xlrec.blknoInner = current->blkno;
                xlrec.offnumInner = current->offnum =
                        PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
                                                InvalidOffsetNumber, false, false);
@@ -1367,7 +1319,6 @@ doPickSplit(Relation index, SpGistState *state,
                                 innerTuple->size);
 
                /* No parent link to update, nor redirection to do */
-               xlrec.blknoParent = InvalidBlockNumber;
                xlrec.offnumParent = InvalidOffsetNumber;
                xlrec.nodeI = 0;
 
@@ -1381,9 +1332,46 @@ doPickSplit(Relation index, SpGistState *state,
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
+               int                     flags;
+
+               XLogBeginInsert();
+
+               xlrec.nInsert = nToInsert;
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
+
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * xlrec.nDelete);
+               XLogRegisterData((char *) toInsert,
+                                                sizeof(OffsetNumber) * xlrec.nInsert);
+               XLogRegisterData((char *) leafPageSelect,
+                                                sizeof(uint8) * xlrec.nInsert);
+               XLogRegisterData((char *) innerTuple, innerTuple->size);
+               XLogRegisterData(leafdata, leafptr - leafdata);
+
+               flags = REGBUF_STANDARD;
+               if (xlrec.initSrc)
+                       flags |= REGBUF_WILL_INIT;
+               if (BufferIsValid(saveCurrent.buffer))
+                       XLogRegisterBuffer(0, saveCurrent.buffer, flags);
+
+               if (BufferIsValid(newLeafBuffer))
+               {
+                       flags = REGBUF_STANDARD;
+                       if (xlrec.initDest)
+                               flags |= REGBUF_WILL_INIT;
+                       XLogRegisterBuffer(1, newLeafBuffer, flags);
+               }
+               XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
+               if (parent->buffer != InvalidBuffer)
+               {
+                       if (parent->buffer != current->buffer)
+                               XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
+                       else
+                               Assert(xlrec.innerIsParent);
+               }
 
                /* Issue the WAL record */
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
 
                /* Update page LSNs on all affected pages */
                if (newLeafBuffer != InvalidBuffer)
@@ -1489,7 +1477,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
                                 int nodeN, Datum nodeLabel)
 {
        SpGistInnerTuple newInnerTuple;
-       XLogRecData rdata[5];
        spgxlogAddNode xlrec;
 
        /* Should not be applied to nulls */
@@ -1499,25 +1486,18 @@ spgAddNodeAction(Relation index, SpGistState *state,
        newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
 
        /* Prepare WAL record */
-       xlrec.node = index->rd_node;
        STORE_STATE(state, xlrec.stateSrc);
-       xlrec.blkno = current->blkno;
        xlrec.offnum = current->offnum;
 
        /* we don't fill these unless we need to change the parent downlink */
-       xlrec.blknoParent = InvalidBlockNumber;
+       xlrec.parentBlk = -1;
        xlrec.offnumParent = InvalidOffsetNumber;
        xlrec.nodeI = 0;
 
        /* we don't fill these unless tuple has to be moved */
-       xlrec.blknoNew = InvalidBlockNumber;
        xlrec.offnumNew = InvalidOffsetNumber;
        xlrec.newPage = false;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
-       ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
        if (PageGetExactFreeSpace(current->page) >=
                newInnerTuple->size - innerTuple->size)
        {
@@ -1539,7 +1519,13 @@ spgAddNodeAction(Relation index, SpGistState *state,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+                       XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+                       XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+
+                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
 
                        PageSetLSN(current->page, recptr);
                }
@@ -1565,7 +1551,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
 
                saveCurrent = *current;
 
-               xlrec.blknoParent = parent->blkno;
                xlrec.offnumParent = parent->offnum;
                xlrec.nodeI = parent->node;
 
@@ -1580,8 +1565,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
                current->blkno = BufferGetBlockNumber(current->buffer);
                current->page = BufferGetPage(current->buffer);
 
-               xlrec.blknoNew = current->blkno;
-
                /*
                 * Let's just make real sure new current isn't same as old.  Right now
                 * that's impossible, but if SpGistGetBuffer ever got smart enough to
@@ -1590,17 +1573,19 @@ spgAddNodeAction(Relation index, SpGistState *state,
                 * replay would be subtly wrong, so I think a mere assert isn't enough
                 * here.
                 */
-               if (xlrec.blknoNew == xlrec.blkno)
+               if (current->blkno == saveCurrent.blkno)
                        elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
 
                /*
                 * New current and parent buffer will both be modified; but note that
                 * parent buffer could be same as either new or old current.
                 */
-               ACCEPT_RDATA_BUFFER(current->buffer, 3);
-               if (parent->buffer != current->buffer &&
-                       parent->buffer != saveCurrent.buffer)
-                       ACCEPT_RDATA_BUFFER(parent->buffer, 4);
+               if (parent->buffer == saveCurrent.buffer)
+                       xlrec.parentBlk = 0;
+               else if (parent->buffer == current->buffer)
+                       xlrec.parentBlk = 1;
+               else
+                       xlrec.parentBlk = 2;
 
                START_CRIT_SECTION();
 
@@ -1647,7 +1632,20 @@ spgAddNodeAction(Relation index, SpGistState *state,
                {
                        XLogRecPtr      recptr;
 
-                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+                       XLogBeginInsert();
+
+                       /* orig page */
+                       XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
+                       /* new page */
+                       XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
+                       /* parent page (if different from orig and new) */
+                       if (xlrec.parentBlk == 2)
+                               XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+                       XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+                       recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
 
                        /* we don't bother to check if any of these are redundant */
                        PageSetLSN(current->page, recptr);
@@ -1682,7 +1680,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        BlockNumber postfixBlkno;
        OffsetNumber postfixOffset;
        int                     i;
-       XLogRecData rdata[5];
        spgxlogSplitTuple xlrec;
        Buffer          newBuffer = InvalidBuffer;
 
@@ -1725,14 +1722,8 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        postfixTuple->allTheSame = innerTuple->allTheSame;
 
        /* prep data for WAL record */
-       xlrec.node = index->rd_node;
        xlrec.newPage = false;
 
-       ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
-       ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
-       ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
-       ACCEPT_RDATA_BUFFER(current->buffer, 3);
-
        /*
         * If we can't fit both tuples on the current page, get a new page for the
         * postfix tuple.  In particular, can't split to the root page.
@@ -1752,7 +1743,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
                                                                        GBUF_INNER_PARITY(current->blkno + 1),
                                                                        postfixTuple->size + sizeof(ItemIdData),
                                                                        &xlrec.newPage);
-               ACCEPT_RDATA_BUFFER(newBuffer, 4);
        }
 
        START_CRIT_SECTION();
@@ -1767,27 +1757,28 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        if (xlrec.offnumPrefix != current->offnum)
                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                         prefixTuple->size);
-       xlrec.blknoPrefix = current->blkno;
 
        /*
         * put postfix tuple into appropriate page
         */
        if (newBuffer == InvalidBuffer)
        {
-               xlrec.blknoPostfix = postfixBlkno = current->blkno;
+               postfixBlkno = current->blkno;
                xlrec.offnumPostfix = postfixOffset =
                        SpGistPageAddNewItem(state, current->page,
                                                                 (Item) postfixTuple, postfixTuple->size,
                                                                 NULL, false);
+               xlrec.postfixBlkSame = true;
        }
        else
        {
-               xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
+               postfixBlkno = BufferGetBlockNumber(newBuffer);
                xlrec.offnumPostfix = postfixOffset =
                        SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
                                                                 (Item) postfixTuple, postfixTuple->size,
                                                                 NULL, false);
                MarkBufferDirty(newBuffer);
+               xlrec.postfixBlkSame = false;
        }
 
        /*
@@ -1808,7 +1799,23 @@ spgSplitNodeAction(Relation index, SpGistState *state,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+               XLogRegisterData((char *) prefixTuple, prefixTuple->size);
+               XLogRegisterData((char *) postfixTuple, postfixTuple->size);
+
+               XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+               if (newBuffer != InvalidBuffer)
+               {
+                       int             flags;
+
+                       flags = REGBUF_STANDARD;
+                       if (xlrec.newPage)
+                               flags |= REGBUF_WILL_INIT;
+                       XLogRegisterBuffer(1, newBuffer, flags);
+               }
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
 
                PageSetLSN(current->page, recptr);
 
index e1dfc8e358076086109c4f79c8fa3ff78bc41f7f..f168ac5c5cf7e3cd7d05ffae88897c2608c97a7a 100644 (file)
@@ -105,15 +105,18 @@ spgbuild(PG_FUNCTION_ARGS)
        if (RelationNeedsWAL(index))
        {
                XLogRecPtr      recptr;
-               XLogRecData rdata;
 
-               /* WAL data is just the relfilenode */
-               rdata.data = (char *) &(index->rd_node);
-               rdata.len = sizeof(RelFileNode);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
+               /*
+                * Replay will re-initialize the pages, so don't take full pages
+                * images.  No other data to log.
+                */
+               XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+               XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+               XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
 
                PageSetLSN(BufferGetPage(metabuffer), recptr);
                PageSetLSN(BufferGetPage(rootbuffer), recptr);
index 2e05d22b74967ffa4b48c3c84c8d1780a48f076f..c0cab7ee1f186329ea5721e3310a01c098ada860 100644 (file)
@@ -127,7 +127,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
 {
        Page            page = BufferGetPage(buffer);
        spgxlogVacuumLeaf xlrec;
-       XLogRecData rdata[8];
        OffsetNumber toDead[MaxIndexTuplesPerPage];
        OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
        OffsetNumber moveSrc[MaxIndexTuplesPerPage];
@@ -323,23 +322,26 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
        if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
                elog(ERROR, "inconsistent counts of deletable tuples");
 
-       /* Prepare WAL record */
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
-       STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
-       ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
-       ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
-       ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
-       ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
-       ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
-       ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
-       ACCEPT_RDATA_BUFFER(buffer, 7);
-
        /* Do the updates */
        START_CRIT_SECTION();
 
+       /* Prepare WAL record */
+       if (RelationNeedsWAL(index))
+       {
+               XLogBeginInsert();
+
+               STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
+               /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+               XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
+               XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
+               XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
+               XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
+               XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
+               XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
+       }
+
        spgPageIndexMultiDelete(&bds->spgstate, page,
                                                        toDead, xlrec.nDead,
                                                        SPGIST_DEAD, SPGIST_DEAD,
@@ -389,7 +391,9 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | REGBUF_FORCE_IMAGE);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
 
                PageSetLSN(page, recptr);
        }
@@ -407,12 +411,10 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
 {
        Page            page = BufferGetPage(buffer);
        spgxlogVacuumRoot xlrec;
-       XLogRecData rdata[3];
        OffsetNumber toDelete[MaxIndexTuplesPerPage];
        OffsetNumber i,
                                max = PageGetMaxOffsetNumber(page);
 
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.nDelete = 0;
 
        /* Scan page, identify tuples to delete, accumulate stats */
@@ -448,15 +450,6 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
        if (xlrec.nDelete == 0)
                return;                                 /* nothing more to do */
 
-       /* Prepare WAL record */
-       xlrec.node = index->rd_node;
-       STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
-       ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
-       /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
-       ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
-       ACCEPT_RDATA_BUFFER(buffer, 2);
-
        /* Do the update */
        START_CRIT_SECTION();
 
@@ -469,7 +462,19 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
        {
                XLogRecPtr      recptr;
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
+               XLogBeginInsert();
+
+               /* Prepare WAL record */
+               STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
+               /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+               XLogRegisterData((char *) toDelete,
+                                                sizeof(OffsetNumber) * xlrec.nDelete);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
 
                PageSetLSN(page, recptr);
        }
@@ -499,10 +504,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
        OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
        OffsetNumber itemnos[MaxIndexTuplesPerPage];
        spgxlogVacuumRedirect xlrec;
-       XLogRecData rdata[3];
 
-       xlrec.node = index->rd_node;
-       xlrec.blkno = BufferGetBlockNumber(buffer);
        xlrec.nToPlaceholder = 0;
        xlrec.newestRedirectXid = InvalidTransactionId;
 
@@ -585,11 +587,15 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
        {
                XLogRecPtr      recptr;
 
-               ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0);
-               ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
-               ACCEPT_RDATA_BUFFER(buffer, 2);
+               XLogBeginInsert();
+
+               XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
+               XLogRegisterData((char *) itemToPlaceholder,
+                                                sizeof(OffsetNumber) * xlrec.nToPlaceholder);
+
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
 
-               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
+               recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
 
                PageSetLSN(page, recptr);
        }
index 920739436ac37c2a75b06150b00e816119ea7cd1..24990aa24ab14923e8ce6c9f93a731c94ec5d1f2 100644 (file)
@@ -73,31 +73,27 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
 static void
 spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
 {
-       RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
        Buffer          buffer;
        Page            page;
 
-       /* Backup blocks are not used in create_index records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
-       buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
        page = (Page) BufferGetPage(buffer);
        SpGistInitMetapage(page);
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buffer);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
        SpGistInitBuffer(buffer, SPGIST_LEAF);
        page = (Page) BufferGetPage(buffer);
        PageSetLSN(page, lsn);
        MarkBufferDirty(buffer);
        UnlockReleaseBuffer(buffer);
 
-       buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 2, RBM_ZERO, false, &buffer);
+       Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
        SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
        page = (Page) BufferGetPage(buffer);
        PageSetLSN(page, lsn);
@@ -128,15 +124,13 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
         */
        if (xldata->newPage)
        {
-               buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
                SpGistInitBuffer(buffer,
                                         SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 0,
-                                                                          xldata->node, xldata->blknoLeaf,
-                                                                          &buffer);
+               action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
 
        if (action == BLK_NEEDS_REDO)
        {
@@ -164,7 +158,8 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                {
                        /* replacing a DEAD tuple */
                        PageIndexTupleDelete(page, xldata->offnumLeaf);
-                       if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
+                       if (PageAddItem(page,
+                                                       (Item) leafTuple, leafTupleHdr.size,
                                         xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         leafTupleHdr.size);
@@ -177,13 +172,14 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* update parent downlink if necessary */
-       if (xldata->blknoParent != InvalidBlockNumber)
+       if (xldata->offnumParent != InvalidOffsetNumber)
        {
-               if (XLogReadBufferForRedo(lsn, record, 1,
-                                                                 xldata->node, xldata->blknoParent,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
                {
                        SpGistInnerTuple tuple;
+                       BlockNumber             blknoLeaf;
+
+                       XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
 
                        page = BufferGetPage(buffer);
 
@@ -191,7 +187,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
                                                                  PageGetItemId(page, xldata->offnumParent));
 
                        spgUpdateNodeLink(tuple, xldata->nodeI,
-                                                         xldata->blknoLeaf, xldata->offnumLeaf);
+                                                         blknoLeaf, xldata->offnumLeaf);
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
@@ -213,6 +209,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
        Buffer          buffer;
        Page            page;
        XLogRedoAction action;
+       BlockNumber blknoDst;
+
+       XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
 
        fillFakeState(&state, xldata->stateSrc);
 
@@ -235,15 +234,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
        /* Insert tuples on the dest page (do first, so redirect is valid) */
        if (xldata->newPage)
        {
-               buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
+               XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buffer);
                SpGistInitBuffer(buffer,
                                         SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                          xldata->node, xldata->blknoDst,
-                                                                          &buffer);
+               action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
+
        if (action == BLK_NEEDS_REDO)
        {
                int                     i;
@@ -260,7 +258,8 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                         * field.
                         */
                        leafTuple = ptr;
-                       memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
+                       memcpy(&leafTupleHdr, leafTuple,
+                                  sizeof(SpGistLeafTupleData));
 
                        addOrReplaceTuple(page, (Item) leafTuple,
                                                          leafTupleHdr.size, toInsert[i]);
@@ -274,14 +273,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* Delete tuples from the source page, inserting a redirection pointer */
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
+
                spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
                                                state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
                                                                SPGIST_PLACEHOLDER,
-                                                               xldata->blknoDst,
+                                                               blknoDst,
                                                                toInsert[nInsert - 1]);
 
                PageSetLSN(page, lsn);
@@ -291,8 +290,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(buffer);
 
        /* And update the parent downlink */
-       if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
        {
                SpGistInnerTuple tuple;
 
@@ -302,7 +300,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
                                                                  PageGetItemId(page, xldata->offnumParent));
 
                spgUpdateNodeLink(tuple, xldata->nodeI,
-                                                 xldata->blknoDst, toInsert[nInsert - 1]);
+                                                 blknoDst, toInsert[nInsert - 1]);
 
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
@@ -321,7 +319,6 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
        SpGistState state;
        Buffer          buffer;
        Page            page;
-       int                     bbi;
        XLogRedoAction action;
 
        ptr += sizeof(spgxlogAddNode);
@@ -331,17 +328,18 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
 
        fillFakeState(&state, xldata->stateSrc);
 
-       if (xldata->blknoNew == InvalidBlockNumber)
+       if (!XLogRecHasBlockRef(record, 1))
        {
                /* update in place */
-               Assert(xldata->blknoParent == InvalidBlockNumber);
-               if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               Assert(xldata->parentBlk == -1);
+               if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
+
                        PageIndexTupleDelete(page, xldata->offnum);
                        if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
-                                                       xldata->offnum, false, false) != xldata->offnum)
+                                                       xldata->offnum,
+                                                       false, false) != xldata->offnum)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         innerTupleHdr.size);
 
@@ -353,30 +351,36 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
        }
        else
        {
+               BlockNumber blkno;
+               BlockNumber blknoNew;
+
+               XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
+               XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
+
                /*
                 * In normal operation we would have all three pages (source, dest,
                 * and parent) locked simultaneously; but in WAL replay it should be
                 * safe to update them one at a time, as long as we do it in the right
-                * order.
+                * order. We must insert the new tuple before replacing the old tuple
+                * with the redirect tuple.
                 *
                 * The logic here depends on the assumption that blkno != blknoNew,
                 * else we can't tell which BKP bit goes with which page, and the LSN
-                * checks could go wrong too.
+                * checks could go wrong too. XXX does this comment still make sense?
                 */
-               Assert(xldata->blkno != xldata->blknoNew);
+               Assert(blkno != blknoNew);
 
                /* Install new tuple first so redirect is valid */
                if (xldata->newPage)
                {
-                       buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
                        /* AddNode is not used for nulls pages */
+                       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+                                                                                 &buffer);
                        SpGistInitBuffer(buffer, 0);
                        action = BLK_NEEDS_REDO;
                }
                else
-                       action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                                  xldata->node, xldata->blknoNew,
-                                                                                  &buffer);
+                       action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
                if (action == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
@@ -385,25 +389,28 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                                          innerTupleHdr.size, xldata->offnumNew);
 
                        /*
-                        * If parent is in this same page, don't advance LSN; doing so
-                        * would fool us into not applying the parent downlink update
-                        * below.  We'll update the LSN when we fix the parent downlink.
+                        * If parent is in this same page, update it now.
                         */
-                       if (xldata->blknoParent != xldata->blknoNew)
+                       if (xldata->parentBlk == 1)
                        {
-                               PageSetLSN(page, lsn);
+                               SpGistInnerTuple parentTuple;
+
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
+                                                                 PageGetItemId(page, xldata->offnumParent));
+
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
                        }
+                       PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
                }
                if (BufferIsValid(buffer))
                        UnlockReleaseBuffer(buffer);
 
                /* Delete old tuple, replacing it with redirect or placeholder tuple */
-               if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                                 &buffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
                {
                        SpGistDeadTuple dt;
-
                        page = BufferGetPage(buffer);
 
                        if (state.isBuild)
@@ -412,11 +419,12 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                                                          InvalidOffsetNumber);
                        else
                                dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
-                                                                         xldata->blknoNew,
+                                                                         blknoNew,
                                                                          xldata->offnumNew);
 
                        PageIndexTupleDelete(page, xldata->offnum);
-                       if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
+                       if (PageAddItem(page, (Item) dt, dt->size,
+                                                       xldata->offnum,
                                                        false, false) != xldata->offnum)
                                elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                         dt->size);
@@ -427,61 +435,49 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
                                SpGistPageGetOpaque(page)->nRedirection++;
 
                        /*
-                        * If parent is in this same page, don't advance LSN; doing so
-                        * would fool us into not applying the parent downlink update
-                        * below.  We'll update the LSN when we fix the parent downlink.
+                        * If parent is in this same page, don't advance LSN.  We'll
+                        * update it when we fix the parent downlink.
                         */
-                       if (xldata->blknoParent != xldata->blkno)
+                       if (xldata->parentBlk == 0)
                        {
-                               PageSetLSN(page, lsn);
+                               SpGistInnerTuple parentTuple;
+
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
+                                                                 PageGetItemId(page, xldata->offnumParent));
+
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
                        }
+                       PageSetLSN(page, lsn);
                        MarkBufferDirty(buffer);
                }
                if (BufferIsValid(buffer))
                        UnlockReleaseBuffer(buffer);
 
                /*
-                * Update parent downlink.  Since parent could be in either of the
-                * previous two buffers, it's a bit tricky to determine which BKP bit
-                * applies.
+                * Update parent downlink (if we didn't do it as part of the source
+                * or destination page update already).
                 */
-               if (xldata->blknoParent == xldata->blkno)
-                       bbi = 0;
-               else if (xldata->blknoParent == xldata->blknoNew)
-                       bbi = 1;
-               else
-                       bbi = 2;
-
-               if (record->xl_info & XLR_BKP_BLOCK(bbi))
+               if (xldata->parentBlk == 2)
                {
-                       if (bbi == 2)           /* else we already did it */
-                               (void) RestoreBackupBlock(lsn, record, bbi, false, false);
-                       action = BLK_RESTORED;
-                       buffer = InvalidBuffer;
-               }
-               else
-               {
-                       action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
-                                                                                  xldata->blknoParent, &buffer);
-                       Assert(action != BLK_RESTORED);
-               }
-               if (action == BLK_NEEDS_REDO)
-               {
-                       SpGistInnerTuple innerTuple;
+                       if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
+                       {
+                               SpGistInnerTuple parentTuple;
 
-                       page = BufferGetPage(buffer);
+                               page = BufferGetPage(buffer);
 
-                       innerTuple = (SpGistInnerTuple) PageGetItem(page,
+                               parentTuple = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
 
-                       spgUpdateNodeLink(innerTuple, xldata->nodeI,
-                                                         xldata->blknoNew, xldata->offnumNew);
+                               spgUpdateNodeLink(parentTuple, xldata->nodeI,
+                                                                 blknoNew, xldata->offnumNew);
 
-                       PageSetLSN(page, lsn);
-                       MarkBufferDirty(buffer);
+                               PageSetLSN(page, lsn);
+                               MarkBufferDirty(buffer);
+                       }
+                       if (BufferIsValid(buffer))
+                               UnlockReleaseBuffer(buffer);
                }
-               if (BufferIsValid(buffer))
-                       UnlockReleaseBuffer(buffer);
        }
 }
 
@@ -496,6 +492,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
        SpGistInnerTupleData postfixTupleHdr;
        Buffer          buffer;
        Page            page;
+       XLogRedoAction action;
 
        ptr += sizeof(spgxlogSplitTuple);
        prefixTuple = ptr;
@@ -513,22 +510,18 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
         */
 
        /* insert postfix tuple first to avoid dangling link */
-       if (xldata->blknoPostfix != xldata->blknoPrefix)
+       if (!xldata->postfixBlkSame)
        {
-               XLogRedoAction action;
-
                if (xldata->newPage)
                {
-                       buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
+                       XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+                                                                                 &buffer);
                        /* SplitTuple is not used for nulls pages */
                        SpGistInitBuffer(buffer, 0);
                        action = BLK_NEEDS_REDO;
                }
                else
-                       action = XLogReadBufferForRedo(lsn, record, 1,
-                                                                                  xldata->node, xldata->blknoPostfix,
-                                                                                  &buffer);
-
+                       action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
                if (action == BLK_NEEDS_REDO)
                {
                        page = BufferGetPage(buffer);
@@ -544,18 +537,19 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
        }
 
        /* now handle the original page */
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
+
                PageIndexTupleDelete(page, xldata->offnumPrefix);
                if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
                                 xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
                        elog(ERROR, "failed to add item of size %u to SPGiST index page",
                                 prefixTupleHdr.size);
 
-               if (xldata->blknoPostfix == xldata->blknoPrefix)
-                       addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
+               if (xldata->postfixBlkSame)
+                       addOrReplaceTuple(page, (Item) postfixTuple,
+                                                         postfixTupleHdr.size,
                                                          xldata->offnumPostfix);
 
                PageSetLSN(page, lsn);
@@ -578,14 +572,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        uint8      *leafPageSelect;
        Buffer          srcBuffer;
        Buffer          destBuffer;
+       Buffer          innerBuffer;
        Page            srcPage;
        Page            destPage;
-       Buffer          innerBuffer;
        Page            page;
-       int                     bbi;
        int                     i;
+       BlockNumber     blknoInner;
        XLogRedoAction action;
 
+       XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
+
        fillFakeState(&state, xldata->stateSrc);
 
        ptr += SizeOfSpgxlogPickSplit;
@@ -603,13 +599,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 
        /* now ptr points to the list of leaf tuples */
 
-       /*
-        * It's a bit tricky to identify which pages have been handled as
-        * full-page images, so we explicitly count each referenced buffer.
-        */
-       bbi = 0;
-
-       if (SpGistBlockIsRoot(xldata->blknoSrc))
+       if (xldata->isRootSplit)
        {
                /* when splitting root, we touch it only in the guise of new inner */
                srcBuffer = InvalidBuffer;
@@ -618,8 +608,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        else if (xldata->initSrc)
        {
                /* just re-init the source page */
-               srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
-               Assert(BufferIsValid(srcBuffer));
+               XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false,
+                                                                         &srcBuffer);
                srcPage = (Page) BufferGetPage(srcBuffer);
 
                SpGistInitBuffer(srcBuffer,
@@ -634,23 +624,22 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                 * inserting leaf tuples and the new inner tuple, else the added
                 * redirect tuple will be a dangling link.)
                 */
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoSrc,
-                                                                 &srcBuffer) == BLK_NEEDS_REDO)
+               srcPage = NULL;
+               if (XLogReadBufferForRedo(lsn, record, 0, &srcBuffer) == BLK_NEEDS_REDO)
                {
                        srcPage = BufferGetPage(srcBuffer);
 
                        /*
-                        * We have it a bit easier here than in doPickSplit(), because we
-                        * know the inner tuple's location already, so we can inject the
-                        * correct redirection tuple now.
+                        * We have it a bit easier here than in doPickSplit(),
+                        * because we know the inner tuple's location already, so
+                        * we can inject the correct redirection tuple now.
                         */
                        if (!state.isBuild)
                                spgPageIndexMultiDelete(&state, srcPage,
                                                                                toDelete, xldata->nDelete,
                                                                                SPGIST_REDIRECT,
                                                                                SPGIST_PLACEHOLDER,
-                                                                               xldata->blknoInner,
+                                                                               blknoInner,
                                                                                xldata->offnumInner);
                        else
                                spgPageIndexMultiDelete(&state, srcPage,
@@ -662,15 +651,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 
                        /* don't update LSN etc till we're done with it */
                }
-               else
-               {
-                       srcPage = NULL;         /* don't do any page updates */
-               }
-               bbi++;
        }
 
        /* try to access dest page if any */
-       if (xldata->blknoDest == InvalidBlockNumber)
+       if (!XLogRecHasBlockRef(record, 1))
        {
                destBuffer = InvalidBuffer;
                destPage = NULL;
@@ -678,8 +662,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        else if (xldata->initDest)
        {
                /* just re-init the dest page */
-               destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
-               Assert(BufferIsValid(destBuffer));
+               XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+                                                                         &destBuffer);
                destPage = (Page) BufferGetPage(destBuffer);
 
                SpGistInitBuffer(destBuffer,
@@ -692,17 +676,9 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                 * We could probably release the page lock immediately in the
                 * full-page-image case, but for safety let's hold it till later.
                 */
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoDest,
-                                                                 &destBuffer) == BLK_NEEDS_REDO)
-               {
+               destPage = NULL;
+               if (XLogReadBufferForRedo(lsn, record, 1, &destBuffer) == BLK_NEEDS_REDO)
                        destPage = (Page) BufferGetPage(destBuffer);
-               }
-               else
-               {
-                       destPage = NULL;        /* don't do any page updates */
-               }
-               bbi++;
        }
 
        /* restore leaf tuples to src and/or dest page */
@@ -739,14 +715,13 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        /* restore new inner tuple */
        if (xldata->initInner)
        {
-               innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
-               SpGistInitBuffer(innerBuffer,
-                                                (xldata->storesNulls ? SPGIST_NULLS : 0));
+               XLogReadBufferForRedoExtended(lsn, record, 2, RBM_ZERO, false,
+                                                                         &innerBuffer);
+               SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
                action = BLK_NEEDS_REDO;
        }
        else
-               action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
-                                                                          xldata->blknoInner, &innerBuffer);
+               action = XLogReadBufferForRedo(lsn, record, 2, &innerBuffer);
 
        if (action == BLK_NEEDS_REDO)
        {
@@ -756,14 +731,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                                                  xldata->offnumInner);
 
                /* if inner is also parent, update link while we're here */
-               if (xldata->blknoInner == xldata->blknoParent)
+               if (xldata->innerIsParent)
                {
                        SpGistInnerTuple parent;
 
                        parent = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
                        spgUpdateNodeLink(parent, xldata->nodeI,
-                                                         xldata->blknoInner, xldata->offnumInner);
+                                                         blknoInner, xldata->offnumInner);
                }
 
                PageSetLSN(page, lsn);
@@ -771,7 +746,6 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
        }
        if (BufferIsValid(innerBuffer))
                UnlockReleaseBuffer(innerBuffer);
-       bbi++;
 
        /*
         * Now we can release the leaf-page locks.  It's okay to do this before
@@ -783,18 +757,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                UnlockReleaseBuffer(destBuffer);
 
        /* update parent downlink, unless we did it above */
-       if (xldata->blknoParent == InvalidBlockNumber)
-       {
-               /* no parent cause we split the root */
-               Assert(SpGistBlockIsRoot(xldata->blknoInner));
-       }
-       else if (xldata->blknoInner != xldata->blknoParent)
+       if (XLogRecHasBlockRef(record, 3))
        {
                Buffer          parentBuffer;
 
-               if (XLogReadBufferForRedo(lsn, record, bbi,
-                                                                 xldata->node, xldata->blknoParent,
-                                                                 &parentBuffer) == BLK_NEEDS_REDO)
+               if (XLogReadBufferForRedo(lsn, record, 3, &parentBuffer) == BLK_NEEDS_REDO)
                {
                        SpGistInnerTuple parent;
 
@@ -803,7 +770,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                        parent = (SpGistInnerTuple) PageGetItem(page,
                                                                  PageGetItemId(page, xldata->offnumParent));
                        spgUpdateNodeLink(parent, xldata->nodeI,
-                                                         xldata->blknoInner, xldata->offnumInner);
+                                                         blknoInner, xldata->offnumInner);
 
                        PageSetLSN(page, lsn);
                        MarkBufferDirty(parentBuffer);
@@ -811,6 +778,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
                if (BufferIsValid(parentBuffer))
                        UnlockReleaseBuffer(parentBuffer);
        }
+       else
+               Assert(xldata->innerIsParent || xldata->isRootSplit);
 }
 
 static void
@@ -844,8 +813,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
        ptr += sizeof(OffsetNumber) * xldata->nChain;
        chainDest = (OffsetNumber *) ptr;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
 
@@ -907,11 +875,9 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
 
        toDelete = xldata->offsets;
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
                page = BufferGetPage(buffer);
-
                /* The tuple numbers are in order */
                PageIndexMultiDelete(page, toDelete, xldata->nDelete);
 
@@ -929,6 +895,7 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
        spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
        OffsetNumber *itemToPlaceholder;
        Buffer          buffer;
+       Page            page;
 
        itemToPlaceholder = xldata->offsets;
 
@@ -939,17 +906,23 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
        if (InHotStandby)
        {
                if (TransactionIdIsValid(xldata->newestRedirectXid))
+               {
+                       RelFileNode node;
+
+                       XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
                        ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
-                                                                                               xldata->node);
+                                                                                               node);
+               }
        }
 
-       if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
-                                                         &buffer) == BLK_NEEDS_REDO)
+       if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
        {
-               Page            page = BufferGetPage(buffer);
-               SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
+               SpGistPageOpaque opaque;
                int                     i;
 
+               page = BufferGetPage(buffer);
+               opaque = SpGistPageGetOpaque(page);
+
                /* Convert redirect pointers to plain placeholders */
                for (i = 0; i < xldata->nToPlaceholder; i++)
                {
index 92b12fbb6c2af4b2f86f28fe8fb76f8372ab1549..959e093e920711eb50e815de40c1a4db3bda3d9a 100644 (file)
@@ -440,102 +440,234 @@ happen before the WAL record is inserted; see notes in SyncOneBuffer().)
 Note that marking a buffer dirty with MarkBufferDirty() should only
 happen iff you write a WAL record; see Writing Hints below.
 
-5. If the relation requires WAL-logging, build a WAL log record and pass it
-to XLogInsert(); then update the page's LSN using the returned XLOG
-location.  For instance,
+5. If the relation requires WAL-logging, build a WAL record using
+XLogBeginInsert and XLogRegister* functions, and insert it.
+(See "Constructing a WAL record" below).  Then update the page's LSN using the
+returned XLOG location.  For instance,
 
-               recptr = XLogInsert(rmgr_id, info, rdata);
+               XLogBeginInsert();
+               XLogRegisterBuffer(...)
+               XLogRegisterData(...)
+               recptr = XLogInsert(rmgr_id, info);
 
                PageSetLSN(dp, recptr);
-               // Note that we no longer do PageSetTLI() from 9.3 onwards
-               // since that field on a page has now changed its meaning.
 
 6. END_CRIT_SECTION()
 
 7. Unlock and unpin the buffer(s).
 
-XLogInsert's "rdata" argument is an array of pointer/size items identifying
-chunks of data to be written in the XLOG record, plus optional shared-buffer
-IDs for chunks that are in shared buffers rather than temporary variables.
-The "rdata" array must mention (at least once) each of the shared buffers
-being modified, unless the action is such that the WAL replay routine can
-reconstruct the entire page contents.  XLogInsert includes the logic that
-tests to see whether a shared buffer has been modified since the last
-checkpoint.  If not, the entire page contents are logged rather than just the
-portion(s) pointed to by "rdata".
-
-Because XLogInsert drops the rdata components associated with buffers it
-chooses to log in full, the WAL replay routines normally need to test to see
-which buffers were handled that way --- otherwise they may be misled about
-what the XLOG record actually contains.  XLOG records that describe multi-page
-changes therefore require some care to design: you must be certain that you
-know what data is indicated by each "BKP" bit.  An example of the trickiness
-is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
-page and BKP(1) is associated with the destination page --- but if these are
-the same page, only BKP(0) would have been set.
-
-For this reason as well as the risk of deadlocking on buffer locks, it's best
-to design WAL records so that they reflect small atomic actions involving just
-one or a few pages.  The current XLOG infrastructure cannot handle WAL records
-involving references to more than four shared buffers, anyway.
-
-In the case where the WAL record contains enough information to re-generate
-the entire contents of a page, do *not* show that page's buffer ID in the
-rdata array, even if some of the rdata items point into the buffer.  This is
-because you don't want XLogInsert to log the whole page contents.  The
-standard replay-routine pattern for this case is
-
-       buffer = XLogReadBuffer(rnode, blkno, true);
-       Assert(BufferIsValid(buffer));
-       page = (Page) BufferGetPage(buffer);
-
-       ... initialize the page ...
-
-       PageSetLSN(page, lsn);
-       MarkBufferDirty(buffer);
-       UnlockReleaseBuffer(buffer);
-
-In the case where the WAL record provides only enough information to
-incrementally update the page, the rdata array *must* mention the buffer
-ID at least once; otherwise there is no defense against torn-page problems.
-The standard replay-routine pattern for this case is
-
-       if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
-       {
-               page = (Page) BufferGetPage(buffer);
-
-               ... apply the change ...
-
-               PageSetLSN(page, lsn);
-               MarkBufferDirty(buffer);
-       }
-       if (BufferIsValid(buffer))
-               UnlockReleaseBuffer(buffer);
-
-XLogReadBufferForRedo reads the page from disk, and checks what action needs to
-be taken to the page.  If the XLR_BKP_BLOCK(N) flag is set, it restores the
-full page image and returns BLK_RESTORED.  If there is no full page image, but
-page cannot be found or if the change has already been replayed (i.e. the
-page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
-respectively.  Usually, the redo routine only needs to pay attention to the
-BLK_NEEDS_REDO return code, which means that the routine should apply the
-incremental change.  In any case, the caller is responsible for unlocking and
-releasing the buffer.  Note that XLogReadBufferForRedo returns the buffer
-locked even if no redo is required, unless the page does not exist.
-
-As noted above, for a multi-page update you need to be able to determine
-which XLR_BKP_BLOCK(N) flag applies to each page.  If a WAL record reflects
-a combination of fully-rewritable and incremental updates, then the rewritable
-pages don't count for the XLR_BKP_BLOCK(N) numbering.  (XLR_BKP_BLOCK(N) is
-associated with the N'th distinct buffer ID seen in the "rdata" array, and
-per the above discussion, fully-rewritable buffers shouldn't be mentioned in
-"rdata".)
+Complex changes (such as a multilevel index insertion) normally need to be
+described by a series of atomic-action WAL records.  The intermediate states
+must be self-consistent, so that if the replay is interrupted between any
+two actions, the system is fully functional.  In btree indexes, for example,
+a page split requires a new page to be allocated, and an insertion of a new
+key in the parent btree level, but for locking reasons this has to be
+reflected by two separate WAL records.  Replaying the first record, to
+allocate the new page and move tuples to it, sets a flag on the page to
+indicate that the key has not been inserted to the parent yet.  Replaying the
+second record clears the flag.  This intermediate state is never seen by
+other backends during normal operation, because the lock on the child page
+is held across the two actions, but will be seen if the operation is
+interrupted before writing the second WAL record.  The search algorithm works
+with the intermediate state as normal, but if an insertion encounters a page
+with the incomplete-split flag set, it will finish the interrupted split by
+inserting the key to the parent, before proceeding.
+
+
+Constructing a WAL record
+-------------------------
+
+A WAL record consists of a header common to all WAL record types,
+record-specific data, and information about the data blocks modified.  Each
+modified data block is identified by an ID number, and can optionally have
+more record-specific data associated with the block.  If XLogInsert decides
+that a full-page image of a block needs to be taken, the data associated
+with that block is not included.
+
+The API for constructing a WAL record consists of five functions:
+XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
+and XLogInsert.  First, call XLogBeginInsert().  Then register all the buffers
+modified, and data needed to replay the changes, using XLogRegister*
+functions.  Finally, insert the constructed record to the WAL by calling
+XLogInsert().
+
+       XLogBeginInsert();
+
+       /* register buffers modified as part of this WAL-logged action */
+       XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
+       XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
+
+       /* register data that is always included in the WAL record */
+       XLogRegisterData(&xlrec, SizeOfFictionalAction);
+
+       /*
+        * register data associated with a buffer. This will not be included
+        * in the record if a full-page image is taken.
+        */
+       XLogRegisterBufData(0, tuple->data, tuple->len);
+
+       /* more data associated with the buffer */
+       XLogRegisterBufData(0, data2, len2);
+
+       /*
+        * Ok, all the data and buffers to include in the WAL record have
+        * been registered. Insert the record.
+        */
+       recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
+
+Details of the API functions:
+
+void XLogBeginInsert(void)
+
+    Must be called before XLogRegisterBuffer and XLogRegisterData.
+
+void XLogResetInsertion(void)
+
+    Clear any currently registered data and buffers from the WAL record
+    construction workspace.  This is only needed if you have already called
+    XLogBeginInsert(), but decide to not insert the record after all.
+
+void XLogEnsureRecordSpace(int nbuffers, int nrdatas)
+
+    Normally, the WAL record construction buffers have the following limits:
+
+        * Max 4 registered buffers
+        * Max 20 chunks of registered data
+
+    These default limits are enough for most record types that change some
+    on-disk structures.  For the odd case that requires more data, or needs to
+    modify more buffers, these limits can be raised by calling
+    XLogEnsureRecordSpace().  XLogEnsureRecordSpace() must be called before
+    XLogBeginInsert(), and outside a critical section.
+
+void XLogRegisterBuffer(uint8 block_id, Buffer buf, int flags);
+
+    XLogRegisterBuffer adds information about a data block to the WAL record.
+    block_id is an arbitrary number used to identify this page reference in
+    the redo routine.  The information needed to re-find the page at redo -
+    relfilenode, fork, and block number - are included in the WAL record.
+
+    XLogInsert will automatically include a full copy of the page contents, if
+    this is the first modification of the buffer since the last checkpoint.
+    It is important to register every buffer modified by the action with
+    XLogRegisterBuffer, to avoid torn-page hazards.
+
+    The flags control when and how the buffer contents are included in the
+    WAL record.  Normally, a full-page image is taken only if the page has not
+    been modified since the last checkpoint, and only if full_page_writes=on
+    or an online backup is in progress.  The REGBUF_FORCE_IMAGE flag can be
+    used to force a full-page image to always be included; that is useful
+    e.g. for an operation that rewrites most of the page, so that tracking the
+    details is not worth it.  For the rare case where it is not necessary to
+    protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
+    full page image from being taken.  REGBUF_WILL_INIT also suppresses a full
+    page image, but the redo routine must re-generate the page from scratch,
+    without looking at the old page contents.  Re-initializing the page
+    protects from torn page hazards like a full page image does.  If the
+    REGBUF_KEEP_DATA flag is given, the per-buffer data registered with
+    XLogRegisterBufData() is included in the WAL record even if a full-page
+    image is taken.
+
+    The REGBUF_STANDARD flag can be specified together with the other flags to
+    indicate that the page follows the standard page layout.  It causes the
+    area between pd_lower and pd_upper to be left out from the image, reducing
+    WAL volume.
+
+void XLogRegisterData(char *data, int len);
+
+    XLogRegisterData is used to include arbitrary data in the WAL record.  If
+    XLogRegisterData() is called multiple times, the data are appended, and
+    will be made available to the redo routine as one contiguous chunk.
+
+void XLogRegisterBufData(uint8 block_id, char *data, int len);
+
+    XLogRegisterBufData is used to include data associated with a particular
+    buffer that was registered earlier with XLogRegisterBuffer().  If
+    XLogRegisterBufData() is called multiple times with the same 'id', the
+    data are appended, and will be made available to the redo routine as one
+    contiguous chunk.
+
+    If a full-page image of the buffer is taken at insertion, the data is not
+    included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
+
+
+Writing a REDO routine
+----------------------
+
+A REDO routine uses the data and page references included in the WAL record
+to reconstruct the new state of the page.  To access the data and pages
+included in the WAL record, the following functions are available:
+
+char *
+XLogRecGetData(XLogRecord *record)
+
+    Returns the "main" chunk of data included in the WAL record.  That is, the
+    data included in the record with XLogRegisterData(...).
+
+XLogRedoAction
+XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
+                      Buffer *buf)
+
+    Reads a block associated with the WAL record currently being replayed, with
+    the given block id.  The block is read into a shared buffer and locked in
+    exclusive mode.  The shared buffer is returned in *buf, or InvalidBuffer if
+    the page could not be found.  Returns one of the following result codes:
+
+         BLK_NEEDS_REDO    - block needs to be replayed
+         BLK_DONE          - block was already replayed
+         BLK_RESTORED      - block was restored from a full-page image
+         BLK_NOTFOUND      - block was not found (and hence does not need to be
+                             replayed
+
+    The REDO routine must redo the actions to the page if XLogReplayBuffer
+    returns BLK_NEEDS_REDO.  In other cases, no further action is required,
+    although the result code can be used to distinguish the reason.
+
+    After modifying the page (if it was necessary), the REDO routine must
+    unlock and release the buffer.  Note that XLogReplayBuffer locks the page
+    even if no action is required.
+
+XLogRedoAction
+XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
+                              uint8 block_id, ReadBufferMode mode,
+                              bool get_cleanup_lock, Buffer *buf)
+
+    Like XLogReadBufferForRedo(), but with a few extra options.
+
+    'mode' can be passed to e.g force the page to be zeroed, instead of reading
+    it from disk.  This RBM_ZERO mode should be used to re-initialize pages
+    registered in the REGBUF_WILL_INIT mode in XLogRegisterBuffer().
+
+    if 'get_cleanup_lock' is TRUE, a stronger "cleanup" lock on the page is
+    acquired, instead of a reguler exclusive-lock.
+
+char *
+XLogGetBlockData(XLogRecord *record, uint8 block_id, Size *len)
+
+    Returns a chunk of data for block with given id, included in the WAL record
+    (with XLogRegisterBufData(id, ...)).  The length of the data is returned in
+    *len.  This is typically used after XLogReplayBuffer() returned
+    BLK_NEEDS_REDO, to get the information required to redo the actions on the
+    page.  If no data with the given id is included, perhaps because a
+    full-page image of the associated buffer was taken instead, an error is
+    thrown.
+
+bool
+XLogRecHasBlockRef(XLogRecord *record, uint8 block_id);
+
+    Returns true if the record has a block reference with given ID.
+
+void
+XLogRecGetBlockTag(XLogRecord *record, uint8 block_id, RelFileNode *rnode,
+                   ForkNumber *forknum, BlockNumber *blknum);
+
+    Returns the relfilenode, fork number, and block number of the block
+    registered with the given ID.
+
 
 When replaying a WAL record that describes changes on multiple pages, you
 must be careful to lock the pages properly to prevent concurrent Hot Standby
-queries from seeing an inconsistent state.  If this requires that two
-or more buffer locks be held concurrently, you must lock the pages in
-appropriate order, and not release the locks until all the changes are done.
+queries from seeing an inconsistent state.
 
 Note that we must only use PageSetLSN/PageGetLSN() when we know the action
 is serialised. Only Startup process may modify data blocks during recovery,
@@ -545,23 +677,6 @@ either an exclusive buffer lock or a shared lock plus buffer header lock,
 or be writing the data block directly rather than through shared buffers
 while holding AccessExclusiveLock on the relation.
 
-Due to all these constraints, complex changes (such as a multilevel index
-insertion) normally need to be described by a series of atomic-action WAL
-records. The intermediate states must be self-consistent, so that if the
-replay is interrupted between any two actions, the system is fully
-functional. In btree indexes, for example, a page split requires a new page
-to be allocated, and an insertion of a new key in the parent btree level,
-but for locking reasons this has to be reflected by two separate WAL
-records. Replaying the first record, to allocate the new page and move
-tuples to it, sets a flag on the page to indicate that the key has not been
-inserted to the parent yet. Replaying the second record clears the flag.
-This intermediate state is never seen by other backends during normal
-operation, because the lock on the child page is held across the two
-actions, but will be seen if the operation is interrupted before writing
-the second WAL record. The search algorithm works with the intermediate
-state as normal, but if an insertion encounters a page with the
-incomplete-split flag set, it will finish the interrupted split by
-inserting the key to the parent, before proceeding.
 
 Writing Hints
 -------------
index 5ee070bd0a980aa15716e452ae48a6f03c0b8730..37e0a284a3dad7eb2706f735cb828c0d3846811b 100644 (file)
@@ -699,13 +699,9 @@ CLOGPagePrecedes(int page1, int page2)
 static void
 WriteZeroPageXlogRec(int pageno)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
 }
 
 /*
@@ -717,14 +713,11 @@ WriteZeroPageXlogRec(int pageno)
 static void
 WriteTruncateXlogRec(int pageno)
 {
-       XLogRecData rdata;
        XLogRecPtr      recptr;
 
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
        XLogFlush(recptr);
 }
 
@@ -737,7 +730,7 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in clog records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == CLOG_ZEROPAGE)
        {
index bfbe738530e7bfd4b5d8c2e967ad48717d08ea4c..33aed44d907fc631ad8f52eb02bec3234dc5fc28 100644 (file)
@@ -720,7 +720,6 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 {
        MultiXactId multi;
        MultiXactOffset offset;
-       XLogRecData rdata[2];
        xl_multixact_create xlrec;
 
        debug_elog3(DEBUG2, "Create: %s",
@@ -796,17 +795,11 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
         * the status flags in one XLogRecData, then all the xids in another one?
         * Not clear that it's worth the trouble though.
         */
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = SizeOfMultiXactCreate;
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
+       XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
 
-       rdata[1].data = (char *) members;
-       rdata[1].len = nmembers * sizeof(MultiXactMember);
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = NULL;
-
-       (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
+       (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
 
        /* Now enter the information into the OFFSETs and MEMBERs logs */
        RecordNewMultiXact(multi, offset, nmembers, members);
@@ -2704,13 +2697,9 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 static void
 WriteMZeroPageXlogRec(int pageno, uint8 info)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&pageno);
-       rdata.len = sizeof(int);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&pageno), sizeof(int));
+       (void) XLogInsert(RM_MULTIXACT_ID, info);
 }
 
 /*
@@ -2722,7 +2711,7 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in multixact records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
        {
index d23c292edcdc3a398cd43a5fcac5fbd68a3edaf2..633af63b7c7a4bf4d8a34de0f403a4e1bec8eb63 100644 (file)
@@ -889,14 +889,20 @@ typedef struct TwoPhaseRecordOnDisk
 
 /*
  * During prepare, the state file is assembled in memory before writing it
- * to WAL and the actual state file.  We use a chain of XLogRecData blocks
- * so that we will be able to pass the state file contents directly to
- * XLogInsert.
+ * to WAL and the actual state file.  We use a chain of StateFileChunk blocks.
  */
+typedef struct StateFileChunk
+{
+       char       *data;
+       uint32          len;
+       struct StateFileChunk *next;
+} StateFileChunk;
+
 static struct xllist
 {
-       XLogRecData *head;                      /* first data block in the chain */
-       XLogRecData *tail;                      /* last block in chain */
+       StateFileChunk *head;           /* first data block in the chain */
+       StateFileChunk *tail;           /* last block in chain */
+       uint32          num_chunks;
        uint32          bytes_free;             /* free bytes left in tail block */
        uint32          total_len;              /* total data bytes in chain */
 }      records;
@@ -917,11 +923,11 @@ save_state_data(const void *data, uint32 len)
 
        if (padlen > records.bytes_free)
        {
-               records.tail->next = palloc0(sizeof(XLogRecData));
+               records.tail->next = palloc0(sizeof(StateFileChunk));
                records.tail = records.tail->next;
-               records.tail->buffer = InvalidBuffer;
                records.tail->len = 0;
                records.tail->next = NULL;
+               records.num_chunks++;
 
                records.bytes_free = Max(padlen, 512);
                records.tail->data = palloc(records.bytes_free);
@@ -951,8 +957,7 @@ StartPrepare(GlobalTransaction gxact)
        SharedInvalidationMessage *invalmsgs;
 
        /* Initialize linked list */
-       records.head = palloc0(sizeof(XLogRecData));
-       records.head->buffer = InvalidBuffer;
+       records.head = palloc0(sizeof(StateFileChunk));
        records.head->len = 0;
        records.head->next = NULL;
 
@@ -960,6 +965,7 @@ StartPrepare(GlobalTransaction gxact)
        records.head->data = palloc(records.bytes_free);
 
        records.tail = records.head;
+       records.num_chunks = 1;
 
        records.total_len = 0;
 
@@ -1019,7 +1025,7 @@ EndPrepare(GlobalTransaction gxact)
        TransactionId xid = pgxact->xid;
        TwoPhaseFileHeader *hdr;
        char            path[MAXPGPATH];
-       XLogRecData *record;
+       StateFileChunk *record;
        pg_crc32        statefile_crc;
        pg_crc32        bogus_crc;
        int                     fd;
@@ -1117,12 +1123,18 @@ EndPrepare(GlobalTransaction gxact)
         * We save the PREPARE record's location in the gxact for later use by
         * CheckPointTwoPhase.
         */
+       XLogEnsureRecordSpace(0, records.num_chunks);
+
        START_CRIT_SECTION();
 
        MyPgXact->delayChkpt = true;
 
-       gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
-                                                                       records.head);
+       XLogBeginInsert();
+       for (record = records.head; record != NULL; record = record->next)
+       {
+               XLogRegisterData(record->data, record->len);
+       }
+       gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
        XLogFlush(gxact->prepare_lsn);
 
        /* If we crash now, we have prepared: WAL replay will fix things */
@@ -1180,6 +1192,7 @@ EndPrepare(GlobalTransaction gxact)
        SyncRepWaitForLSN(gxact->prepare_lsn);
 
        records.tail = records.head = NULL;
+       records.num_chunks = 0;
 }
 
 /*
@@ -2071,8 +2084,6 @@ RecordTransactionCommitPrepared(TransactionId xid,
                                                                SharedInvalidationMessage *invalmsgs,
                                                                bool initfileinval)
 {
-       XLogRecData rdata[4];
-       int                     lastrdata = 0;
        xl_xact_commit_prepared xlrec;
        XLogRecPtr      recptr;
 
@@ -2094,39 +2105,24 @@ RecordTransactionCommitPrepared(TransactionId xid,
        xlrec.crec.nsubxacts = nchildren;
        xlrec.crec.nmsgs = ninvalmsgs;
 
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactCommitPrepared;
-       rdata[0].buffer = InvalidBuffer;
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
+               XLogRegisterData((char *) children,
+                                                        nchildren * sizeof(TransactionId));
+
        /* dump cache invalidation messages */
        if (ninvalmsgs > 0)
-       {
-               rdata[lastrdata].next = &(rdata[3]);
-               rdata[3].data = (char *) invalmsgs;
-               rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
-               rdata[3].buffer = InvalidBuffer;
-               lastrdata = 3;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) invalmsgs,
+                                                        ninvalmsgs * sizeof(SharedInvalidationMessage));
 
-       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
 
        /*
         * We don't currently try to sleep before flush here ... nor is there any
@@ -2169,8 +2165,6 @@ RecordTransactionAbortPrepared(TransactionId xid,
                                                           int nrels,
                                                           RelFileNode *rels)
 {
-       XLogRecData rdata[3];
-       int                     lastrdata = 0;
        xl_xact_abort_prepared xlrec;
        XLogRecPtr      recptr;
 
@@ -2189,30 +2183,20 @@ RecordTransactionAbortPrepared(TransactionId xid,
        xlrec.arec.xact_time = GetCurrentTimestamp();
        xlrec.arec.nrels = nrels;
        xlrec.arec.nsubxacts = nchildren;
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactAbortPrepared;
-       rdata[0].buffer = InvalidBuffer;
+
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) children,
+                                                        nchildren * sizeof(TransactionId));
 
-       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
 
        /* Always flush, since we're about to remove the 2PC state file */
        XLogFlush(recptr);
index 6f92bad07ca2698568946f0f5fc998794db7fd6e..0e60f178b4197dcfd5add22608c81d0ebcc09927 100644 (file)
@@ -571,7 +571,6 @@ AssignTransactionId(TransactionState s)
                if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
                        log_unknown_top)
                {
-                       XLogRecData rdata[2];
                        xl_xact_assignment xlrec;
 
                        /*
@@ -582,17 +581,12 @@ AssignTransactionId(TransactionState s)
                        Assert(TransactionIdIsValid(xlrec.xtop));
                        xlrec.nsubxacts = nUnreportedXids;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = MinSizeOfXactAssignment;
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = &rdata[1];
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
+                       XLogRegisterData((char *) unreportedXids,
+                                                                nUnreportedXids * sizeof(TransactionId));
 
-                       rdata[1].data = (char *) unreportedXids;
-                       rdata[1].len = nUnreportedXids * sizeof(TransactionId);
-                       rdata[1].buffer = InvalidBuffer;
-                       rdata[1].next = NULL;
-
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
 
                        nUnreportedXids = 0;
                        /* mark top, not current xact as having been logged */
@@ -1087,8 +1081,6 @@ RecordTransactionCommit(void)
                if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
                        XLogLogicalInfoActive())
                {
-                       XLogRecData rdata[4];
-                       int                     lastrdata = 0;
                        xl_xact_commit xlrec;
 
                        /*
@@ -1107,63 +1099,38 @@ RecordTransactionCommit(void)
                        xlrec.nrels = nrels;
                        xlrec.nsubxacts = nchildren;
                        xlrec.nmsgs = nmsgs;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactCommit;
-                       rdata[0].buffer = InvalidBuffer;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
                        /* dump rels to delete */
                        if (nrels > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) rels;
-                               rdata[1].len = nrels * sizeof(RelFileNode);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
+                               XLogRegisterData((char *) rels,
+                                                                        nrels * sizeof(RelFileNode));
                        /* dump committed child Xids */
                        if (nchildren > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].data = (char *) children;
-                               rdata[2].len = nchildren * sizeof(TransactionId);
-                               rdata[2].buffer = InvalidBuffer;
-                               lastrdata = 2;
-                       }
+                               XLogRegisterData((char *) children,
+                                                                        nchildren * sizeof(TransactionId));
                        /* dump shared cache invalidation messages */
                        if (nmsgs > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[3]);
-                               rdata[3].data = (char *) invalMessages;
-                               rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
-                               rdata[3].buffer = InvalidBuffer;
-                               lastrdata = 3;
-                       }
-                       rdata[lastrdata].next = NULL;
-
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+                               XLogRegisterData((char *) invalMessages,
+                                                                        nmsgs * sizeof(SharedInvalidationMessage));
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
                }
                else
                {
-                       XLogRecData rdata[2];
-                       int                     lastrdata = 0;
                        xl_xact_commit_compact xlrec;
 
                        xlrec.xact_time = xactStopTimestamp;
                        xlrec.nsubxacts = nchildren;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactCommitCompact;
-                       rdata[0].buffer = InvalidBuffer;
+
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
                        /* dump committed child Xids */
                        if (nchildren > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) children;
-                               rdata[1].len = nchildren * sizeof(TransactionId);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
-                       rdata[lastrdata].next = NULL;
+                               XLogRegisterData((char *) children,
+                                                                        nchildren * sizeof(TransactionId));
 
-                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
+                       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
                }
        }
 
@@ -1436,8 +1403,6 @@ RecordTransactionAbort(bool isSubXact)
        RelFileNode *rels;
        int                     nchildren;
        TransactionId *children;
-       XLogRecData rdata[3];
-       int                     lastrdata = 0;
        xl_xact_abort xlrec;
 
        /*
@@ -1486,30 +1451,20 @@ RecordTransactionAbort(bool isSubXact)
        }
        xlrec.nrels = nrels;
        xlrec.nsubxacts = nchildren;
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactAbort;
-       rdata[0].buffer = InvalidBuffer;
+
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
+
        /* dump rels to delete */
        if (nrels > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) rels;
-               rdata[1].len = nrels * sizeof(RelFileNode);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
        /* dump committed child Xids */
        if (nchildren > 0)
-       {
-               rdata[lastrdata].next = &(rdata[2]);
-               rdata[2].data = (char *) children;
-               rdata[2].len = nchildren * sizeof(TransactionId);
-               rdata[2].buffer = InvalidBuffer;
-               lastrdata = 2;
-       }
-       rdata[lastrdata].next = NULL;
+               XLogRegisterData((char *) children,
+                                                nchildren * sizeof(TransactionId));
 
-       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
 
        /*
         * Report the latest async abort LSN, so that the WAL writer knows to
@@ -2351,6 +2306,9 @@ AbortTransaction(void)
        AbortBufferIO();
        UnlockBuffers();
 
+       /* Reset WAL record construction buffers */
+       XLogResetInsertion();
+
        /*
         * Also clean up any open wait for lock, since the lock manager will choke
         * if we try to wait for another lock before doing this.
@@ -4299,6 +4257,9 @@ AbortSubTransaction(void)
        AbortBufferIO();
        UnlockBuffers();
 
+       /* Reset WAL record construction buffers */
+       XLogResetInsertion();
+
        /*
         * Also clean up any open wait for lock, since the lock manager will choke
         * if we try to wait for another lock before doing this.
@@ -4943,7 +4904,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in xact records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_XACT_COMMIT_COMPACT)
        {
index 99f702cb804ece6fad877daf445d3cc2697635e1..27edbc69f37f8588032e9b71e9cd12c372045080 100644 (file)
@@ -866,16 +866,19 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
        bool            inserted;
        XLogRecord *rechdr = (XLogRecord *) rdata->data;
        bool            isLogSwitch = (rechdr->xl_rmid == RM_XLOG_ID &&
-                                                          rechdr->xl_info == XLOG_SWITCH);
+                                          (rechdr->xl_info & XLR_RMGR_INFO_MASK) == XLOG_SWITCH);
        XLogRecPtr      StartPos;
        XLogRecPtr      EndPos;
 
+       /* The first chunk should contain just the record header */
+       Assert(rdata->len == SizeOfXLogRecord);
+
        /* cross-check on whether we should be here or not */
        if (!XLogInsertAllowed())
                elog(ERROR, "cannot make new WAL entries during recovery");
 
        /*
-        * Calculate CRC of the data, including all the backup blocks
+        * Calculate CRC of the data
         *
         * Note that the record header isn't added into the CRC initially since we
         * don't know the prev-link yet.  Thus, the CRC will represent the CRC of
@@ -1054,29 +1057,24 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
        if (XLOG_DEBUG)
        {
                StringInfoData buf;
+               StringInfoData recordbuf;
                MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
 
+               /*
+                * We have to piece together the WAL record data from the
+                * XLogRecData entries, so that we can pass it to the rm_desc
+                * function as one contiguous chunk.
+                */
+               initStringInfo(&recordbuf);
+               for (rdt = rdata; rdt != NULL; rdt = rdt->next)
+                       appendBinaryStringInfo(&recordbuf, rdt->data, rdt->len);
+
                initStringInfo(&buf);
                appendStringInfo(&buf, "INSERT @ %X/%X: ",
                                                 (uint32) (EndPos >> 32), (uint32) EndPos);
-               xlog_outrec(&buf, rechdr);
-               if (rdata->data != NULL)
-               {
-                       StringInfoData recordbuf;
-
-                       /*
-                        * We have to piece together the WAL record data from the
-                        * XLogRecData entries, so that we can pass it to the rm_desc
-                        * function as one contiguous chunk.
-                        */
-                       initStringInfo(&recordbuf);
-                       appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
-                       for (; rdata != NULL; rdata = rdata->next)
-                               appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
-
-                       appendStringInfoString(&buf, " - ");
-                       xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
-               }
+               xlog_outrec(&buf, (XLogRecord *) recordbuf.data);
+               appendStringInfoString(&buf, " - ");
+               xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
                elog(LOG, "%s", buf.data);
 
                MemoryContextSwitchTo(oldCxt);
@@ -1234,9 +1232,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
        XLogRecPtr      CurrPos;
        XLogPageHeader pagehdr;
 
-       /* The first chunk is the record header */
-       Assert(rdata->len == SizeOfXLogRecord);
-
        /*
         * Get a pointer to the right place in the right WAL buffer to start
         * inserting to.
@@ -7192,6 +7187,9 @@ InitXLOGAccess(void)
        (void) GetRedoRecPtr();
        /* Also update our copy of doPageWrites. */
        doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
+
+       /* Also initialize the working areas to construct WAL records */
+       InitXLogInsert();
 }
 
 /*
@@ -7488,7 +7486,6 @@ CreateCheckPoint(int flags)
        CheckPoint      checkPoint;
        XLogRecPtr      recptr;
        XLogCtlInsert *Insert = &XLogCtl->Insert;
-       XLogRecData rdata;
        uint32          freespace;
        XLogSegNo       _logSegNo;
        XLogRecPtr      curInsert;
@@ -7758,15 +7755,11 @@ CreateCheckPoint(int flags)
        /*
         * Now insert the checkpoint record into XLOG.
         */
-       rdata.data = (char *) (&checkPoint);
-       rdata.len = sizeof(checkPoint);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
        recptr = XLogInsert(RM_XLOG_ID,
                                                shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
-                                               XLOG_CHECKPOINT_ONLINE,
-                                               &rdata);
+                                               XLOG_CHECKPOINT_ONLINE);
 
        XLogFlush(recptr);
 
@@ -7906,7 +7899,6 @@ static void
 CreateEndOfRecoveryRecord(void)
 {
        xl_end_of_recovery xlrec;
-       XLogRecData rdata;
        XLogRecPtr      recptr;
 
        /* sanity check */
@@ -7924,12 +7916,9 @@ CreateEndOfRecoveryRecord(void)
 
        START_CRIT_SECTION();
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xl_end_of_recovery);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
-       recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
+       recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
 
        XLogFlush(recptr);
 
@@ -8305,13 +8294,9 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
 void
 XLogPutNextOid(Oid nextOid)
 {
-       XLogRecData rdata;
-
-       rdata.data = (char *) (&nextOid);
-       rdata.len = sizeof(Oid);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&nextOid), sizeof(Oid));
+       (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
 
        /*
         * We need not flush the NEXTOID record immediately, because any of the
@@ -8347,15 +8332,10 @@ XLogRecPtr
 RequestXLogSwitch(void)
 {
        XLogRecPtr      RecPtr;
-       XLogRecData rdata;
 
-       /* XLOG SWITCH, alone among xlog record types, has no data */
-       rdata.buffer = InvalidBuffer;
-       rdata.data = NULL;
-       rdata.len = 0;
-       rdata.next = NULL;
-
-       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
+       /* XLOG SWITCH has no data */
+       XLogBeginInsert();
+       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
 
        return RecPtr;
 }
@@ -8367,18 +8347,15 @@ XLogRecPtr
 XLogRestorePoint(const char *rpName)
 {
        XLogRecPtr      RecPtr;
-       XLogRecData rdata;
        xl_restore_point xlrec;
 
        xlrec.rp_time = GetCurrentTimestamp();
        strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
 
-       rdata.buffer = InvalidBuffer;
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xl_restore_point);
-       rdata.next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
 
-       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
+       RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
 
        ereport(LOG,
                        (errmsg("restore point \"%s\" created at %X/%X",
@@ -8410,7 +8387,6 @@ XLogReportParameters(void)
                 */
                if (wal_level != ControlFile->wal_level || XLogIsNeeded())
                {
-                       XLogRecData rdata;
                        xl_parameter_change xlrec;
                        XLogRecPtr      recptr;
 
@@ -8421,12 +8397,10 @@ XLogReportParameters(void)
                        xlrec.wal_level = wal_level;
                        xlrec.wal_log_hints = wal_log_hints;
 
-                       rdata.buffer = InvalidBuffer;
-                       rdata.data = (char *) &xlrec;
-                       rdata.len = sizeof(xlrec);
-                       rdata.next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 
-                       recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
+                       recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
                        XLogFlush(recptr);
                }
 
@@ -8484,14 +8458,10 @@ UpdateFullPageWrites(void)
         */
        if (XLogStandbyInfoActive() && !RecoveryInProgress())
        {
-               XLogRecData rdata;
-
-               rdata.data = (char *) (&fullPageWrites);
-               rdata.len = sizeof(bool);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
 
-               XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
+               XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
        }
 
        if (!fullPageWrites)
@@ -8560,8 +8530,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
 {
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
-       /* Backup blocks are not used by XLOG rmgr */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
+       Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
 
        if (info == XLOG_NEXTOID)
        {
@@ -8748,14 +8718,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
        }
        else if (info == XLOG_FPI)
        {
-               char       *data;
-               BkpBlock        bkpb;
+               Buffer          buffer;
 
                /*
-                * Full-page image (FPI) records contain a backup block stored
-                * "inline" in the normal data since the locking when writing hint
-                * records isn't sufficient to use the normal backup block mechanism,
-                * which assumes exclusive lock on the buffer supplied.
+                * Full-page image (FPI) records contain nothing else but a backup
+                * block. The block reference must include a full-page image -
+                * otherwise there would be no point in this record.
                 *
                 * Since the only change in these backup block are hint bits, there
                 * are no recovery conflicts generated.
@@ -8764,11 +8732,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
                 * smgr implementation has no need to implement anything. Which means
                 * nothing is needed in md.c etc
                 */
-               data = XLogRecGetData(record);
-               memcpy(&bkpb, data, sizeof(BkpBlock));
-               data += sizeof(BkpBlock);
-
-               RestoreBackupBlockContents(lsn, bkpb, data, false, false);
+               if (XLogReadBufferForRedo(lsn, record, 0, &buffer) != BLK_RESTORED)
+                       elog(ERROR, "unexpected XLogOpenBuffer result when restoring backup block");
+               UnlockReleaseBuffer(buffer);
        }
        else if (info == XLOG_BACKUP_END)
        {
@@ -8868,6 +8834,8 @@ static void
 xlog_outrec(StringInfo buf, XLogRecord *record)
 {
        int                     i;
+       uint8      *blockrefids;
+       int                     nblockrefs;
 
        appendStringInfo(buf, "prev %X/%X; xid %u",
                                         (uint32) (record->xl_prev >> 32),
@@ -8877,10 +8845,30 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
        appendStringInfo(buf, "; len %u",
                                         record->xl_len);
 
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               if (record->xl_info & XLR_BKP_BLOCK(i))
-                       appendStringInfo(buf, "; bkpb%d", i);
+       /* decode block references */
+       blockrefids = XLogRecGetBlockRefIds(record, &nblockrefs);
+       for (i = 0; i < nblockrefs; i++)
+       {
+               uint8           id = blockrefids[i];
+               XLogRecordBlockData *bkpb = XLogRecGetBlockRef(record, id, NULL);
+               RelFileNode rnode;
+               ForkNumber forknum;
+               BlockNumber blk   ;
+
+               XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+               if (forknum != MAIN_FORKNUM)
+                       appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
+                                                        id,
+                                                        rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                        forknum,
+                                                        blk);
+               else
+                       appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
+                                                        id,
+                                                        rnode.spcNode, rnode.dbNode, rnode.relNode,
+                                                        blk);
+               if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+                       appendStringInfo(buf, " FPW");
        }
 }
 #endif   /* WAL_DEBUG */
@@ -9409,7 +9397,6 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
        XLogRecPtr      startpoint;
        XLogRecPtr      stoppoint;
        TimeLineID      stoptli;
-       XLogRecData rdata;
        pg_time_t       stamp_time;
        char            strfbuf[128];
        char            histfilepath[MAXPGPATH];
@@ -9616,11 +9603,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
        /*
         * Write the backup-end xlog record
         */
-       rdata.data = (char *) (&startpoint);
-       rdata.len = sizeof(startpoint);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-       stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
+       stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
        stoptli = ThisTimeLineID;
 
        /*
index b83343bf5bdd26f94845de25c69a775e41f66bca..eebbc1e00832126f780b16406e14f03c30748bcd 100644 (file)
@@ -3,6 +3,12 @@
  * xloginsert.c
  *             Functions for constructing WAL records
  *
+ * Constructing a WAL record begins with a call to XLogBeginInsert,
+ * followed by a number of XLogRegister* calls. The registered data is
+ * collected in private working memory, and finally assembled into a chain
+ * of XLogRecData structs by a call to XLogRecordAssemble(). See
+ * access/transam/README for details.
+ *
  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
 #include "utils/memutils.h"
 #include "pg_trace.h"
 
+/*
+ * For each block reference registered with XLogRegisterBuffer, we fill in
+ * a registered_buffer struct.
+ */
+typedef struct
+{
+       RelFileNode rnode;                      /* identifies the relation and block */
+       ForkNumber      forkno;
+       BlockNumber block;
+       Page            page;                   /* page content */
+       uint8           block_id;               /* ID the buffer was registered with */
+       uint8           flags;                  /* REGBUF_* flags */
+       uint32          rdata_sz;               /* total length of data in rdata chain */
+       XLogRecData *rdata_head;        /* head of the chain of data registered with
+                                                                * this block */
+       XLogRecData *rdata_tail;        /* last entry in the chain, or &rdata_head if
+                                                                * empty */
+
+       /* temporary areas used by XLogRecordAssemble() */
+       XLogRecordBlockData dtbuf_xlg;
+       char            padding[MAXIMUM_ALIGNOF];
+       XLogRecordBlockImage dtbuf_blkimg;
+}      registered_buffer;
+
+static registered_buffer *registered_buffers;
+static int     num_registered_buffers = 0; /* # of structs in use */
+static int     max_registered_buffers;         /* allocated size */
+
+/*
+ * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
+ * with XLogRegisterData(...).
+ */
+static XLogRecData *mainrdata_head;
+static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
+static int     mainrdata_len;          /* total # of bytes in chain */
+
+/*
+ * These are used to hold the record header while constructing a record.
+ * 'rechdr' is not a plain variable, but is palloc at initialization,
+ * because we want it to be MAXALIGNed and padding bytes zeroed.
+ */
+static XLogRecData hdr_rdt;
+static XLogRecord *rechdr;
+
+/*
+ * An array of XLogRecData structs, for assembling a WAL record. num_rdatas
+ * is the number of entries currently used, and allocated_rdatas is the
+ * allocated size of the array. max_rdatas is the number of entries that can
+ * be used for WAL data, the rest are reserved for use in XLogRecordAssemble
+ * for buffer information.
+ */
+static XLogRecData *rdatas;
+static int     num_rdatas;
+static int     max_rdatas;
+static int     allocated_rdatas;
+
+/*
+ * XLogRecordAssemble() needs at most 5 XLogRecData entries for each
+ * registered buffer: XLogRecordBlockData, XLogRecordBlockImage, block
+ * content before "hole", block content after "hole", and padding between
+ * full-page image and block data if both are included.
+ */
+#define EXTRA_RDATAS_PER_BUFFER                5
+
+static bool begininsert_called = false;
+
+/* Memory context to hold the registered buffer and data references. */
+static MemoryContext xloginsert_cxt;
+
 static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
-                                  XLogRecData *rdata,
                                   XLogRecPtr RedoRecPtr, bool doPageWrites,
-                                  XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal);
-static void XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb);
+                                  XLogRecPtr *fpw_lsn);
+
+/*
+ * Begin constructing a WAL record. This must be called before the
+ * XLogRegister* functions and XLogInsert().
+ */
+void
+XLogBeginInsert(void)
+{
+       Assert(num_registered_buffers == 0);
+       Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
+       Assert(mainrdata_len == 0);
+       Assert(!begininsert_called);
+
+       /* cross-check on whether we should be here or not */
+       if (!XLogInsertAllowed())
+               elog(ERROR, "cannot make new WAL entries during recovery");
+
+       begininsert_called = true;
+}
+
+/*
+ * Ensure that there are enough buffer and data slots in the working area,
+ * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
+ * calls.
+ *
+ * There is always space for a small number of buffers and data chunks, enough
+ * for most record types. This function is for the exceptional cases that need
+ * more.
+ */
+void
+XLogEnsureRecordSpace(int nbuffers, int ndatas)
+{
+       int                     extrardatas;
+       int                     needrdatas;
+
+       /*
+        * This must be called before entering a critical section, because
+        * allocating memory inside a critical section can fail. repalloc() will
+        * check the same, but better to check it here too so that we fail
+        * consistently even if the arrays happen to be large enough already.
+        */
+       Assert(CritSectionCount == 0);
+
+       /* the minimum values can't be decreased */
+       if (nbuffers < XLR_NORMAL_BKP_BLOCKS)
+               nbuffers = XLR_NORMAL_BKP_BLOCKS;
+       if (ndatas < XLR_NORMAL_RDATAS)
+               ndatas = XLR_NORMAL_RDATAS;
+
+       if (nbuffers > XLR_MAX_BKP_BLOCKS)
+               elog(ERROR, "maximum number of WAL record block references exceeded");
+
+       if (nbuffers > max_registered_buffers)
+       {
+               registered_buffers = (registered_buffer *)
+                       repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
+
+               /*
+                * At least the padding bytes in the structs must be zeroed, because
+                * they are included in WAL data, but initialize it all for tidiness.
+                */
+               MemSet(&registered_buffers[max_registered_buffers], 0,
+                       (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
+               max_registered_buffers = nbuffers;
+       }
+
+       extrardatas = EXTRA_RDATAS_PER_BUFFER * max_registered_buffers;
+       needrdatas = ndatas + extrardatas;
+       if (needrdatas > allocated_rdatas)
+       {
+               rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * needrdatas);
+               allocated_rdatas = needrdatas;
+       }
+       max_rdatas = allocated_rdatas - extrardatas;
+}
+
+/*
+ * Reset WAL record construction buffers.
+ */
+void
+XLogResetInsertion(void)
+{
+       num_rdatas = 0;
+       num_registered_buffers = 0;
+       mainrdata_len = 0;
+       mainrdata_last = (XLogRecData *) &mainrdata_head;
+       begininsert_called = false;
+}
+
+/*
+ * Register a reference to a buffer with the WAL record being constructed.
+ * This must be called for every page that the WAL-logged operation modifies.
+ */
+void
+XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
+{
+       registered_buffer *regbuf;
+
+       /* WILL_INIT and NO_IMAGE don't make sense with FORCE_IMAGE */
+       Assert(!((flags & REGBUF_FORCE_IMAGE) &&
+                        (flags & (REGBUF_WILL_INIT | REGBUF_NO_IMAGE))));
+       Assert(begininsert_called);
+
+       if (num_registered_buffers > max_registered_buffers)
+               elog(ERROR, "too many registered buffers");
+
+       regbuf = &registered_buffers[num_registered_buffers];
+
+       regbuf->block_id = block_id;
+       BufferGetTag(buffer, &regbuf->rnode, &regbuf->forkno, &regbuf->block);
+       regbuf->page = BufferGetPage(buffer);
+       regbuf->flags = flags;
+       regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+       regbuf->rdata_sz = 0;
+
+       num_registered_buffers++;
+}
+
+/*
+ * Register a block reference for a block that's not in the shared buffer pool.
+ */
+void
+XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
+                                 BlockNumber blknum, Page page, uint8 flags)
+{
+       registered_buffer *regbuf;
+
+       if (num_registered_buffers > max_registered_buffers)
+               elog(ERROR, "too many registered buffers");
+
+       /* This is currently only used to WAL-log a full-page image of a page */
+       Assert(flags & REGBUF_FORCE_IMAGE);
+
+       regbuf = &registered_buffers[num_registered_buffers];
+
+       Assert(begininsert_called);
+
+       regbuf->block_id = block_id;
+       regbuf->rnode = *rnode;
+       regbuf->forkno = forknum;
+       regbuf->block = blknum;
+       regbuf->page = page;
+       regbuf->flags = flags;
+       regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
+       regbuf->rdata_sz = 0;
+
+       num_registered_buffers++;
+}
+
+/*
+ * Add data to the WAL record that's being constructed.
+ *
+ * The data is appended to the "main chunk", available at replay with
+ * XLogGetRecdata().
+ */
+void
+XLogRegisterData(char *data, int len)
+{
+       XLogRecData *rdata;
+
+       Assert(begininsert_called);
+
+       if (num_rdatas >= max_rdatas)
+               elog(ERROR, "too much WAL data");
+       rdata = &rdatas[num_rdatas++];
+
+       rdata->data = data;
+       rdata->len = len;
+       /*
+        * we use the mainrdata_last pointer to track the end of the chain,
+        * so no need to clear 'next' here.
+        */
+
+       mainrdata_last->next = rdata;
+       mainrdata_last = rdata;
+
+       mainrdata_len += len;
+}
 
 /*
- * Insert an XLOG record having the specified RMID and info bytes,
- * with the body of the record being the data chunk(s) described by
- * the rdata chain (see xloginsert.h for notes about rdata).
+ * Add buffer-specific data to the WAL record that's being constructed.
+ *
+ * Block_id must reference a block previously registered with
+ * XLogRegisterBuffer(), and the data will be omitted from the WAL record if
+ * a full image of the page is included.
+ */
+void
+XLogRegisterBufData(uint8 block_id, char *data, int len)
+{
+       registered_buffer *regbuf;
+       XLogRecData *rdata;
+       int                     i;
+
+       Assert(begininsert_called);
+
+       /* find the registered buffer struct */
+       for (i = 0; i < num_registered_buffers; i++)
+       {
+               if (registered_buffers[i].block_id == block_id)
+                       break;
+       }
+       if (i >= num_registered_buffers)
+               elog(ERROR, "no block with id %d registered with WAL insertion",
+                        block_id);
+       regbuf = &registered_buffers[i];
+
+       if (num_rdatas >= max_rdatas)
+               elog(ERROR, "too much WAL data");
+       rdata = &rdatas[num_rdatas++];
+
+       rdata->data = data;
+       rdata->len = len;
+
+       regbuf->rdata_tail->next = rdata;
+       regbuf->rdata_tail = rdata;
+       regbuf->rdata_sz += len;
+}
+
+/*
+ * Insert an XLOG record having the specified RMID and info bytes, with the
+ * body of the record being the data and buffer references registered earlier
+ * with XLogRegister* calls.
  *
  * Returns XLOG pointer to end of record (beginning of next record).
  * This can be used as LSN for data pages affected by the logged action.
  * (LSN is the XLOG point up to which the XLOG must be flushed to disk
  * before the data page can be written out.  This implements the basic
  * WAL rule "write the log before the data".)
- *
- * NB: this routine feels free to scribble on the XLogRecData structs,
- * though not on the data they reference.  This is OK since the XLogRecData
- * structs are always just temporaries in the calling code.
  */
 XLogRecPtr
-XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
+XLogInsert(RmgrId rmid, uint8 info)
 {
-       XLogRecPtr      RedoRecPtr;
-       bool            doPageWrites;
        XLogRecPtr      EndPos;
-       XLogRecPtr      fpw_lsn;
-       XLogRecData *rdt;
-       XLogRecData *rdt_lastnormal;
+
+       /* XLogBeginInsert() must have been called. */
+       if (!begininsert_called)
+               elog(ERROR, "XLogBeginInsert was not called");
 
        /* info's high bits are reserved for use by me */
        if (info & XLR_INFO_MASK)
@@ -67,280 +352,288 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
         */
        if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
        {
+               XLogResetInsertion();
                EndPos = SizeOfXLogLongPHD;             /* start of 1st chkpt record */
                return EndPos;
        }
 
-       /*
-        * Get values needed to decide whether to do full-page writes. Since we
-        * don't yet have an insertion lock, these could change under us, but
-        * XLogInsertRecord will recheck them once it has a lock.
-        */
-       GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
-
-       /*
-        * Assemble an XLogRecData chain representing the WAL record, including
-        * any backup blocks needed.
-        *
-        * We may have to loop back to here if a race condition is detected in
-        * XLogInsertRecord.  We could prevent the race by doing all this work
-        * while holding an insertion lock, but it seems better to avoid doing CRC
-        * calculations while holding one.
-        */
-retry:
-       rdt = XLogRecordAssemble(rmid, info, rdata, RedoRecPtr, doPageWrites,
-                                                        &fpw_lsn, &rdt_lastnormal);
-
-       EndPos = XLogInsertRecord(rdt, fpw_lsn);
-
-       if (EndPos == InvalidXLogRecPtr)
+       do
        {
+               XLogRecPtr      RedoRecPtr;
+               bool            doPageWrites;
+               XLogRecPtr      fpw_lsn;
+               XLogRecData *rdt;
+
                /*
-                * Undo the changes we made to the rdata chain, and retry.
-                *
-                * XXX: This doesn't undo *all* the changes; the XLogRecData
-                * entries for buffers that we had already decided to back up have
-                * had their data-pointers cleared. That's OK, as long as we
-                * decide to back them up on the next iteration as well. Hence,
-                * don't allow "doPageWrites" value to go from true to false after
-                * we've modified the rdata chain.
+                * Get values needed to decide whether to do full-page writes. Since
+                * we don't yet have an insertion lock, these could change under us,
+                * but XLogInsertRecData will recheck them once it has a lock.
                 */
-               bool            newDoPageWrites;
+               GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
 
-               GetFullPageWriteInfo(&RedoRecPtr, &newDoPageWrites);
-               doPageWrites = doPageWrites || newDoPageWrites;
-               rdt_lastnormal->next = NULL;
+               rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
+                                                                &fpw_lsn);
 
-               goto retry;
-       }
+               EndPos = XLogInsertRecord(rdt, fpw_lsn);
+       } while (EndPos == InvalidXLogRecPtr);
+
+       XLogResetInsertion();
 
        return EndPos;
 }
 
 /*
- * Assemble a full WAL record, including backup blocks, from an XLogRecData
- * chain, ready for insertion with XLogInsertRecord(). The record header
- * fields are filled in, except for the xl_prev field and CRC.
+ * Assemble a WAL record from the registered data and buffers into an
+ * XLogRecData chain, ready for insertion with XLogInsertRecord().
  *
- * The rdata chain is modified, adding entries for full-page images.
- * *rdt_lastnormal is set to point to the last normal (ie. not added by
- * this function) entry. It can be used to reset the chain to its original
- * state.
+ * The record header fields are filled in, except for the xl_prev field and
+ * CRC.
  *
- * If the rdata chain contains any buffer references, and a full-page image
- * was not taken of all the buffers, *fpw_lsn is set to the lowest LSN among
- * such pages. This signals that the assembled record is only good for
- * insertion on the assumption that the RedoRecPtr and doPageWrites values
- * were up-to-date.
+ * If there are any registered buffers, and a full-page image was not taken
+ * of all them, *page_writes_omitted is set to true. This signals that the
+ * assembled record is only good for insertion on the assumption that the
+ * RedoRecPtr and doPageWrites values were up-to-date.
  */
 static XLogRecData *
-XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata,
+XLogRecordAssemble(RmgrId rmid, uint8 info,
                                   XLogRecPtr RedoRecPtr, bool doPageWrites,
-                                  XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal)
+                                  XLogRecPtr *fpw_lsn)
 {
-       bool            isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
        XLogRecData *rdt;
-       Buffer          dtbuf[XLR_MAX_BKP_BLOCKS];
-       bool            dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
-       uint32          len,
-                               total_len;
-       unsigned        i;
+       uint32          total_len;
+       int                     i;
+       int                     used_rdatas = num_rdatas;
 
        /*
-        * These need to be static because they are returned to the caller as part
-        * of the XLogRecData chain.
+        * Note: this function can be called multiple times for the same record.
+        * All the modifications we do to the rdata chains below must handle that.
         */
-       static BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
-       static XLogRecData hdr_rdt;
-       static XLogRecord *rechdr;
-
-       if (rechdr == NULL)
-       {
-               static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
-
-               rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
-               MemSet(rechdr, 0, SizeOfXLogRecord);
-       }
 
        /* The record begins with the header */
        hdr_rdt.data = (char *) rechdr;
        hdr_rdt.len = SizeOfXLogRecord;
-       hdr_rdt.next = rdata;
+       rdt = &hdr_rdt;
        total_len = SizeOfXLogRecord;
 
-       /*
-        * Here we scan the rdata chain, to determine which buffers must be backed
-        * up.
-        *
-        * We add entries for backup blocks to the chain, so that they don't need
-        * any special treatment in the critical section where the chunks are
-        * copied into the WAL buffers. Those entries have to be unlinked from the
-        * chain if we have to loop back here.
-        */
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       /* followed by main data, if any */
+       if (mainrdata_len > 0)
        {
-               dtbuf[i] = InvalidBuffer;
-               dtbuf_bkp[i] = false;
+               rdt->next = mainrdata_head;
+               rdt = mainrdata_last;
+               total_len += mainrdata_len;
+       }
+       else
+       {
+               /*
+                * If there's no main data, also set the XLR_NO_RMGR_DATA flag.  This
+                * provides a useful bit of extra error checking in ReadRecord.
+                */
+               info |= XLR_NO_RMGR_DATA;
        }
 
+       /*
+        * Make additional rdata chain entries for the backup blocks.
+        */
        *fpw_lsn = InvalidXLogRecPtr;
-       len = 0;
-       for (rdt = rdata;;)
+       for (i = 0; i < num_registered_buffers; i++)
        {
-               if (rdt->buffer == InvalidBuffer)
-               {
-                       /* Simple data, just include it */
-                       len += rdt->len;
-               }
+               registered_buffer *regbuf = &registered_buffers[i];
+               XLogRecordBlockData *bkpb = &regbuf->dtbuf_xlg;
+               bool            needs_backup;
+               bool            needs_data;
+               int                     padlen;
+
+               /* Determine if this block needs to be backed up */
+               if (regbuf->flags & REGBUF_FORCE_IMAGE)
+                       needs_backup = true;
+               else if (regbuf->flags & (REGBUF_WILL_INIT | REGBUF_NO_IMAGE))
+                       needs_backup = false;
+               else if (!doPageWrites)
+                       needs_backup = false;
                else
                {
-                       /* Find info for buffer */
-                       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+                       /*
+                        * We assume page LSN is first data on *every* page that can be
+                        * passed to XLogInsert, whether it has the standard page layout
+                        * or not.
+                        */
+                       XLogRecPtr page_lsn = PageGetLSN(regbuf->page);
+
+                       needs_backup = (page_lsn <= RedoRecPtr);
+                       if (!needs_backup)
                        {
-                               if (rdt->buffer == dtbuf[i])
-                               {
-                                       /* Buffer already referenced by earlier chain item */
-                                       if (dtbuf_bkp[i])
-                                       {
-                                               rdt->data = NULL;
-                                               rdt->len = 0;
-                                       }
-                                       else if (rdt->data)
-                                               len += rdt->len;
-                                       break;
-                               }
-                               if (dtbuf[i] == InvalidBuffer)
-                               {
-                                       /* OK, put it in this slot */
-                                       XLogRecPtr      page_lsn;
-                                       bool            needs_backup;
-
-                                       dtbuf[i] = rdt->buffer;
-
-                                       /*
-                                        * Determine whether the buffer has to be backed up.
-                                        *
-                                        * We assume page LSN is first data on *every* page that
-                                        * can be passed to XLogInsert, whether it has the
-                                        * standard page layout or not. We don't need to take the
-                                        * buffer header lock for PageGetLSN because we hold an
-                                        * exclusive lock on the page and/or the relation.
-                                        */
-                                       page_lsn = PageGetLSN(BufferGetPage(rdt->buffer));
-                                       if (!doPageWrites)
-                                               needs_backup = false;
-                                       else if (page_lsn <= RedoRecPtr)
-                                               needs_backup = true;
-                                       else
-                                               needs_backup = false;
-
-                                       if (needs_backup)
-                                       {
-                                               /*
-                                                * The page needs to be backed up, so set up BkpBlock
-                                                */
-                                               XLogFillBkpBlock(rdt->buffer, rdt->buffer_std,
-                                                                                &(dtbuf_xlg[i]));
-                                               dtbuf_bkp[i] = true;
-                                               rdt->data = NULL;
-                                               rdt->len = 0;
-                                       }
-                                       else
-                                       {
-                                               if (rdt->data)
-                                                       len += rdt->len;
-                                               if (*fpw_lsn == InvalidXLogRecPtr ||
-                                                       page_lsn < *fpw_lsn)
-                                               {
-                                                       *fpw_lsn = page_lsn;
-                                               }
-                                       }
-                                       break;
-                               }
+                               if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
+                                       *fpw_lsn = page_lsn;
                        }
-                       if (i >= XLR_MAX_BKP_BLOCKS)
-                               elog(PANIC, "can backup at most %d blocks per xlog record",
-                                        XLR_MAX_BKP_BLOCKS);
                }
-               /* Break out of loop when rdt points to last chain item */
-               if (rdt->next == NULL)
-                       break;
+
+               /* Determine if the buffer data needs to included */
+               if (regbuf->rdata_sz == 0)
+                       needs_data = false;
+               else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
+                       needs_data = true;
+               else
+                       needs_data = !needs_backup;
+
+               /*
+                * Construct an XLogRecordBlockData struct for this block reference,
+                * and an XLogRecData to point to it.
+                */
+               rdt->next = &rdatas[used_rdatas++];
                rdt = rdt->next;
-       }
-       total_len += len;
 
-       /*
-        * Make additional rdata chain entries for the backup blocks, so that we
-        * don't need to special-case them in the write loop.  This modifies the
-        * original rdata chain, but we keep a pointer to the last regular entry,
-        * rdt_lastnormal, so that we can undo this if we have to start over.
-        *
-        * At the exit of this loop, total_len includes the backup block data.
-        *
-        * Also set the appropriate info bits to show which buffers were backed
-        * up. The XLR_BKP_BLOCK(N) bit corresponds to the N'th distinct buffer
-        * value (ignoring InvalidBuffer) appearing in the rdata chain.
-        */
-       *rdt_lastnormal = rdt;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
-       {
-               BkpBlock   *bkpb;
-               char       *page;
+               bkpb->id = regbuf->block_id;
+               bkpb->fork_flags = (uint8) regbuf->forkno;
+               bkpb->block = regbuf->block;
 
-               if (!dtbuf_bkp[i])
-                       continue;
+               if (i > 0 && RelFileNodeEquals(bkpb->node, registered_buffers[i - 1].rnode))
+               {
+                       bkpb->fork_flags |= BKPBLOCK_SAME_REL;
 
-               info |= XLR_BKP_BLOCK(i);
+                       /*
+                        * Zero the relation information. It's not nominally included, but
+                        * might end up in the record as padding bytes anyway.
+                        */
+                       bkpb->node.spcNode = 0;
+                       bkpb->node.dbNode = 0;
+                       bkpb->node.relNode = 0;
 
-               bkpb = &(dtbuf_xlg[i]);
-               page = (char *) BufferGetBlock(dtbuf[i]);
+                       rdt->len = SizeOfXLogRecordBlockDataSameRel;
+               }
+               else
+               {
+                       bkpb->node = regbuf->rnode;
 
-               rdt->next = &(dtbuf_rdt1[i]);
-               rdt = rdt->next;
+                       rdt->len = SizeOfXLogRecordBlockData;
+               }
 
-               rdt->data = (char *) bkpb;
-               rdt->len = sizeof(BkpBlock);
-               total_len += sizeof(BkpBlock);
+               if (regbuf->flags & REGBUF_WILL_INIT)
+                       bkpb->fork_flags |= BKPBLOCK_WILL_INIT;
 
-               rdt->next = &(dtbuf_rdt2[i]);
-               rdt = rdt->next;
+               rdt->data = (char *) bkpb;
+               total_len += rdt->len;
 
-               if (bkpb->hole_length == 0)
+               /*
+                * Add padding, so that the actual block data is MAXALIGNed. (We take
+                * advantage of the fact that we have reserved some padding bytes
+                * immediately after the XLogRecordBlockData struct, so we can just
+                * make this XLogRecData entry larger, instead of adding a new entry.
+                * This saves a few cycles when assembling the final record)
+                */
+               if ((needs_backup || needs_data) && (total_len % MAXIMUM_ALIGNOF) != 0)
                {
-                       rdt->data = page;
-                       rdt->len = BLCKSZ;
-                       total_len += BLCKSZ;
-                       rdt->next = NULL;
+                       padlen = MAXIMUM_ALIGNOF - (total_len % MAXIMUM_ALIGNOF);
+                       rdt->len += padlen;
+                       total_len += padlen;
                }
-               else
+
+               bkpb->data_length = 0;
+
+               if (needs_backup)
                {
-                       /* must skip the hole */
-                       rdt->data = page;
-                       rdt->len = bkpb->hole_offset;
-                       total_len += bkpb->hole_offset;
+                       Page            page = regbuf->page;
+                       XLogRecordBlockImage *bimg = &regbuf->dtbuf_blkimg;
 
-                       rdt->next = &(dtbuf_rdt3[i]);
+                       /*
+                        * The page needs to be backed up, so set up *bimg
+                        */
+                       if (regbuf->flags & REGBUF_STANDARD)
+                       {
+                               /* Assume we can omit data between pd_lower and pd_upper */
+                               uint16          lower = ((PageHeader) page)->pd_lower;
+                               uint16          upper = ((PageHeader) page)->pd_upper;
+
+                               if (lower >= SizeOfPageHeaderData &&
+                                       upper > lower &&
+                                       upper <= BLCKSZ)
+                               {
+                                       bimg->hole_offset = lower;
+                                       bimg->hole_length = upper - lower;
+                               }
+                               else
+                               {
+                                       /* No "hole" to compress out */
+                                       bimg->hole_offset = 0;
+                                       bimg->hole_length = 0;
+                               }
+                       }
+                       else
+                       {
+                               /* Not a standard page header, don't try to eliminate "hole" */
+                               bimg->hole_offset = 0;
+                               bimg->hole_length = 0;
+                       }
+
+                       /* Fill in the remaining fields in the XLogRecordBlockData struct */
+                       bkpb->fork_flags |= BKPBLOCK_HAS_IMAGE;
+
+                       bkpb->data_length += sizeof(XLogRecordBlockImage) + BLCKSZ - bimg->hole_length;
+
+                       /*
+                        * Construct XLogRecData entries for the XLogRecordBlockImage
+                        * struct and the page content.
+                        */
+                       rdt->next = &rdatas[used_rdatas++];
+                       rdt = rdt->next;
+                       rdt->data = (char *) bimg;
+                       rdt->len = sizeof(XLogRecordBlockImage);
+
+                       rdt->next = &rdatas[used_rdatas++];
                        rdt = rdt->next;
+                       if (bimg->hole_length == 0)
+                       {
+                               rdt->data = page;
+                               rdt->len = BLCKSZ;
+                       }
+                       else
+                       {
+                               /* must skip the hole */
+                               rdt->data = page;
+                               rdt->len = bimg->hole_offset;
+
+                               rdt->next = &rdatas[used_rdatas++];
+                               rdt = rdt->next;
 
-                       rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
-                       rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
-                       total_len += rdt->len;
-                       rdt->next = NULL;
+                               rdt->data = page + (bimg->hole_offset + bimg->hole_length);
+                               rdt->len = BLCKSZ - (bimg->hole_offset + bimg->hole_length);
+                       }
+
+                       /*
+                        * If we have per-block data in addition to the full-page iamge,
+                        * add padding to make it aligned.
+                        */
+                       if (needs_data && (bkpb->data_length % MAXIMUM_ALIGNOF) != 0)
+                       {
+                               padlen = MAXIMUM_ALIGNOF - (bkpb->data_length % MAXIMUM_ALIGNOF);
+
+                               rdt->next = &rdatas[used_rdatas++];
+                               rdt = rdt->next;
+
+                               rdt->data = regbuf->padding;
+                               rdt->len = padlen;
+
+                               bkpb->data_length += padlen;
+                       }
                }
+
+               if (needs_data)
+               {
+                       /*
+                        * Link the caller-supplied rdata chain for this buffer to the
+                        * overall list.
+                        */
+                       bkpb->fork_flags |= BKPBLOCK_HAS_DATA;
+                       bkpb->data_length += regbuf->rdata_sz;
+
+                       rdt->next = regbuf->rdata_head;
+                       rdt = regbuf->rdata_tail;
+               }
+
+               total_len += bkpb->data_length;
        }
+       Assert(used_rdatas <= allocated_rdatas);
 
-       /*
-        * We disallow len == 0 because it provides a useful bit of extra error
-        * checking in ReadRecord.  This means that all callers of XLogInsert
-        * must supply at least some not-in-a-buffer data.  However, we make an
-        * exception for XLOG SWITCH records because we don't want them to ever
-        * cross a segment boundary.
-        */
-       if (len == 0 && !isLogSwitch)
-               elog(PANIC, "invalid xlog record length %u", rechdr->xl_len);
+       rdt->next = NULL;
 
        /*
         * Fill in the fields in the record header. Prev-link is filled in later,
@@ -349,7 +642,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata,
         */
        rechdr->xl_xid = GetCurrentTransactionIdIfAny();
        rechdr->xl_tot_len = total_len;
-       rechdr->xl_len = len;           /* doesn't include backup blocks */
+       rechdr->xl_len = mainrdata_len;         /* doesn't include backup blocks */
        rechdr->xl_info = info;
        rechdr->xl_rmid = rmid;
        rechdr->xl_prev = InvalidXLogRecPtr;
@@ -419,55 +712,52 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
         */
        RedoRecPtr = GetRedoRecPtr();
 
-       /*
-        * We assume page LSN is first data on *every* page that can be passed to
-        * XLogInsert, whether it has the standard page layout or not. Since we're
-        * only holding a share-lock on the page, we must take the buffer header
-        * lock when we look at the LSN.
-        */
+   /*
+       * We assume page LSN is first data on *every* page that can be passed to
+       * XLogInsert, whether it has the standard page layout or not. Since we're
+       * only holding a share-lock on the page, we must take the buffer header
+       * lock when we look at the LSN.
+       */
        lsn = BufferGetLSNAtomic(buffer);
 
        if (lsn <= RedoRecPtr)
        {
-               XLogRecData rdata[2];
-               BkpBlock        bkpb;
+               int                     flags;
                char            copied_buffer[BLCKSZ];
                char       *origdata = (char *) BufferGetBlock(buffer);
-
-               /* Make a BkpBlock struct representing the buffer */
-               XLogFillBkpBlock(buffer, buffer_std, &bkpb);
+               RelFileNode rnode;
+               ForkNumber      forkno;
+               BlockNumber blkno;
 
                /*
                 * Copy buffer so we don't have to worry about concurrent hint bit or
                 * lsn updates. We assume pd_lower/upper cannot be changed without an
                 * exclusive lock, so the contents bkp are not racy.
-                *
-                * With buffer_std set to false, XLogFillBkpBlock() sets hole_length
-                * and hole_offset to 0; so the following code is safe for either
-                * case.
                 */
-               memcpy(copied_buffer, origdata, bkpb.hole_offset);
-               memcpy(copied_buffer + bkpb.hole_offset,
-                          origdata + bkpb.hole_offset + bkpb.hole_length,
-                          BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
+               if (buffer_std)
+               {
+                       /* Assume we can omit data between pd_lower and pd_upper */
+                       Page            page = BufferGetPage(buffer);
+                       uint16          lower = ((PageHeader) page)->pd_lower;
+                       uint16          upper = ((PageHeader) page)->pd_upper;
 
-               /*
-                * Header for backup block.
-                */
-               rdata[0].data = (char *) &bkpb;
-               rdata[0].len = sizeof(BkpBlock);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+                       memcpy(copied_buffer, origdata, lower);
+                       memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
+               }
+               else
+                       memcpy(copied_buffer, origdata, BLCKSZ);
 
-               /*
-                * Save copy of the buffer.
-                */
-               rdata[1].data = copied_buffer;
-               rdata[1].len = BLCKSZ - bkpb.hole_length;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
 
-               recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+               flags = REGBUF_FORCE_IMAGE;
+               if (buffer_std)
+                       flags |= REGBUF_STANDARD;
+
+               BufferGetTag(buffer, &rnode, &forkno, &blkno);
+
+               XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
+
+               recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
        }
 
        return recptr;
@@ -489,71 +779,16 @@ XLogRecPtr
 log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
                        Page page, bool page_std)
 {
-       BkpBlock        bkpb;
+       int             flags;
        XLogRecPtr      recptr;
-       XLogRecData rdata[3];
-
-       /* NO ELOG(ERROR) from here till newpage op is logged */
-       START_CRIT_SECTION();
-
-       bkpb.node = *rnode;
-       bkpb.fork = forkNum;
-       bkpb.block = blkno;
 
+       flags = REGBUF_FORCE_IMAGE;
        if (page_std)
-       {
-               /* Assume we can omit data between pd_lower and pd_upper */
-               uint16          lower = ((PageHeader) page)->pd_lower;
-               uint16          upper = ((PageHeader) page)->pd_upper;
+               flags |= REGBUF_STANDARD;
 
-               if (lower >= SizeOfPageHeaderData &&
-                       upper > lower &&
-                       upper <= BLCKSZ)
-               {
-                       bkpb.hole_offset = lower;
-                       bkpb.hole_length = upper - lower;
-               }
-               else
-               {
-                       /* No "hole" to compress out */
-                       bkpb.hole_offset = 0;
-                       bkpb.hole_length = 0;
-               }
-       }
-       else
-       {
-               /* Not a standard page header, don't try to eliminate "hole" */
-               bkpb.hole_offset = 0;
-               bkpb.hole_length = 0;
-       }
-
-       rdata[0].data = (char *) &bkpb;
-       rdata[0].len = sizeof(BkpBlock);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &(rdata[1]);
-
-       if (bkpb.hole_length == 0)
-       {
-               rdata[1].data = (char *) page;
-               rdata[1].len = BLCKSZ;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
-       }
-       else
-       {
-               /* must skip the hole */
-               rdata[1].data = (char *) page;
-               rdata[1].len = bkpb.hole_offset;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = &rdata[2];
-
-               rdata[2].data = (char *) page + (bkpb.hole_offset + bkpb.hole_length);
-               rdata[2].len = BLCKSZ - (bkpb.hole_offset + bkpb.hole_length);
-               rdata[2].buffer = InvalidBuffer;
-               rdata[2].next = NULL;
-       }
-
-       recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+       XLogBeginInsert();
+       XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
+       recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
 
        /*
         * The page may be uninitialized. If so, we can't set the LSN because that
@@ -564,8 +799,6 @@ log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
                PageSetLSN(page, recptr);
        }
 
-       END_CRIT_SECTION();
-
        return recptr;
 }
 
@@ -595,39 +828,35 @@ log_newpage_buffer(Buffer buffer, bool page_std)
        return log_newpage(&rnode, forkNum, blkno, page, page_std);
 }
 
-/*
- * Fill a BkpBlock for a buffer.
- */
-static void
-XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb)
+void
+InitXLogInsert(void)
 {
-       BufferGetTag(buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
+       /* Initialize the working areas */
+       if (xloginsert_cxt == NULL)
+       {
+               xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
+                                                                                          "WAL record construction",
+                                                                                          ALLOCSET_DEFAULT_MINSIZE,
+                                                                                          ALLOCSET_DEFAULT_INITSIZE,
+                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
+       }
 
-       if (buffer_std)
+       if (registered_buffers == NULL)
        {
-               /* Assume we can omit data between pd_lower and pd_upper */
-               Page            page = BufferGetPage(buffer);
-               uint16          lower = ((PageHeader) page)->pd_lower;
-               uint16          upper = ((PageHeader) page)->pd_upper;
-
-               if (lower >= SizeOfPageHeaderData &&
-                       upper > lower &&
-                       upper <= BLCKSZ)
-               {
-                       bkpb->hole_offset = lower;
-                       bkpb->hole_length = upper - lower;
-               }
-               else
-               {
-                       /* No "hole" to compress out */
-                       bkpb->hole_offset = 0;
-                       bkpb->hole_length = 0;
-               }
+               registered_buffers = (registered_buffer *)
+                       MemoryContextAllocZero(xloginsert_cxt,
+                                                 sizeof(registered_buffer) * XLR_NORMAL_BKP_BLOCKS);
+               max_registered_buffers = XLR_NORMAL_BKP_BLOCKS;
        }
-       else
+       if (rdatas == NULL)
        {
-               /* Not a standard page header, don't try to eliminate "hole" */
-               bkpb->hole_offset = 0;
-               bkpb->hole_length = 0;
+               allocated_rdatas = XLR_NORMAL_RDATAS +
+                       EXTRA_RDATAS_PER_BUFFER * XLR_NORMAL_BKP_BLOCKS;
+               rdatas = MemoryContextAlloc(xloginsert_cxt,
+                                                                       sizeof(XLogRecData) * allocated_rdatas);
+               max_rdatas = XLR_NORMAL_RDATAS;
        }
+
+       if (rechdr == NULL)
+               rechdr = palloc0(SizeOfXLogRecord);
 }
index 7d573cc585d3cc6e13421ed108e328852788d253..64876f79983395adef3309344c4e161c1c620369 100644 (file)
@@ -433,7 +433,7 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
        /*
         * Special processing if it's an XLOG SWITCH record
         */
-       if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
+       if (record->xl_rmid == RM_XLOG_ID && record->xl_info == (XLOG_SWITCH | XLR_NO_RMGR_DATA))
        {
                /* Pretend it extends to end of segment */
                state->EndRecPtr += XLogSegSize - 1;
@@ -580,15 +580,14 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
                                          bool randAccess)
 {
        /*
-        * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
-        * required.
+        * xl_len == 0 is only allowed if the XLR_NO_RMGR_DATA flag is also set.
         */
-       if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
+       if (record->xl_info & XLR_NO_RMGR_DATA)
        {
                if (record->xl_len != 0)
                {
                        report_invalid_record(state,
-                                                                 "invalid xlog switch record at %X/%X",
+                                                                 "record with invalid length at %X/%X",
                                                                  (uint32) (RecPtr >> 32), (uint32) RecPtr);
                        return false;
                }
@@ -600,9 +599,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
                                                          (uint32) (RecPtr >> 32), (uint32) RecPtr);
                return false;
        }
-       if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
-               record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
-               XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+       if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len)
        {
                report_invalid_record(state,
                                                          "invalid record length at %X/%X",
@@ -669,13 +666,12 @@ static bool
 ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
 {
        pg_crc32        crc;
-       int                     i;
        uint32          len = record->xl_len;
-       BkpBlock        bkpb;
+       XLogRecordBlockData bkpb;
        char       *blk;
        size_t          remaining = record->xl_tot_len;
 
-       /* First the rmgr data */
+       /* Check that the rmgr data length is sane */
        if (remaining < SizeOfXLogRecord + len)
        {
                /* ValidXLogRecordHeader() should've caught this already... */
@@ -684,46 +680,103 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
                return false;
        }
        remaining -= SizeOfXLogRecord + len;
-       INIT_CRC32C(crc);
-       COMP_CRC32C(crc, XLogRecGetData(record), len);
 
-       /* Add in the backup blocks, if any */
-       blk = (char *) XLogRecGetData(record) + len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       /* Validate block data, if any */
+       if (remaining > 0)
        {
-               uint32          blen;
-
-               if (!(record->xl_info & XLR_BKP_BLOCK(i)))
-                       continue;
-
-               if (remaining < sizeof(BkpBlock))
+               blk = (char *) XLogRecGetData(record) + len;
+               do
                {
-                       report_invalid_record(state,
-                                                         "invalid backup block size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               memcpy(&bkpb, blk, sizeof(BkpBlock));
+                       if (remaining < SizeOfXLogRecordBlockDataSameRel)
+                       {
+                               report_invalid_record(state,
+                                                                         "invalid total length in record at %X/%X",
+                                                                         (uint32) (recptr >> 32), (uint32) recptr);
+                               return false;
+                       }
+                       memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+                       if (bkpb.fork_flags & BKPBLOCK_SAME_REL)
+                       {
+                               remaining -= SizeOfXLogRecordBlockDataSameRel;
+                               blk += SizeOfXLogRecordBlockDataSameRel;
+                       }
+                       else
+                       {
+                               if (remaining < SizeOfXLogRecordBlockData)
+                               {
+                                       report_invalid_record(state,
+                                                                                 "invalid total length in record at %X/%X",
+                                                                                 (uint32) (recptr >> 32), (uint32) recptr);
+                                       return false;
+                               }
+
+                               remaining -= SizeOfXLogRecordBlockData;
+                               blk += SizeOfXLogRecordBlockData;
+                       }
 
-               if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
-               {
-                       report_invalid_record(state,
-                                                                 "incorrect hole size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
+                       /* The block data begins at next MAXALIGN boundary */
+                       if (bkpb.data_length > 0)
+                       {
+                               char *blkdata = (char *) MAXALIGN(blk);
+
+                               if (remaining < blkdata - blk)
+                               {
+                                       report_invalid_record(state,
+                                                                                 "invalid block data length in record at %X/%X",
+                                                                                 (uint32) (recptr >> 32), (uint32) recptr);
+                                       return false;
+                               }
+                               remaining -= (blkdata - blk);
+                               blk = blkdata;
+                       }
 
-               if (remaining < blen)
-               {
-                       report_invalid_record(state,
-                                                         "invalid backup block size in record at %X/%X",
-                                                                 (uint32) (recptr >> 32), (uint32) recptr);
-                       return false;
-               }
-               remaining -= blen;
-               COMP_CRC32C(crc, blk, blen);
-               blk += blen;
+                       if (remaining < bkpb.data_length)
+                       {
+                               report_invalid_record(state,
+                                                                         "invalid block data length in record at %X/%X",
+                                                                         (uint32) (recptr >> 32), (uint32) recptr);
+                               return false;
+                       }
+
+                       /* If it's a full-page image, check the XLogRecordBlockImage struct */
+                       if (bkpb.fork_flags & BKPBLOCK_HAS_IMAGE)
+                       {
+                               XLogRecordBlockImage *blkimg;
+
+                               if (bkpb.data_length < sizeof(XLogRecordBlockImage))
+                               {
+                                       report_invalid_record(state,
+                                                                                 "invalid block image size in record at %X/%X",
+                                                                                 (uint32) (recptr >> 32), (uint32) recptr);
+                                       return false;
+                               }
+                               blkimg = (XLogRecordBlockImage *) blk;
+
+                               /* Check the length and offset of the "hole" */
+                               if (blkimg->hole_offset + blkimg->hole_length > BLCKSZ)
+                               {
+                                       report_invalid_record(state,
+                                                                                 "incorrect hole size in record at %X/%X",
+                                                                                 (uint32) (recptr >> 32), (uint32) recptr);
+                                       return false;
+                               }
+
+                               /*
+                                * Check the data length equals block size - hole (unless more
+                                * data follows.
+                                */
+                               if (!(bkpb.fork_flags & BKPBLOCK_HAS_DATA) &&
+                                       bkpb.data_length != sizeof(XLogRecordBlockImage) + BLCKSZ - blkimg->hole_length)
+                               {
+                                       report_invalid_record(state,
+                                                                                 "incorrect hole size in record at %X/%X",
+                                                                                 (uint32) (recptr >> 32), (uint32) recptr);
+                                       return false;
+                               }
+                       }
+                       remaining -= bkpb.data_length;
+                       blk += bkpb.data_length;
+               } while (remaining > 0);
        }
 
        /* Check that xl_tot_len agrees with our calculation */
@@ -735,7 +788,10 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
                return false;
        }
 
-       /* Finally include the record header */
+       /* Calculate the CRC */
+       INIT_CRC32C(crc);
+       COMP_CRC32C(crc, XLogRecGetData(record), record->xl_tot_len - SizeOfXLogRecord);
+       /* include the record header last */
        COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
        FIN_CRC32C(crc);
 
@@ -985,3 +1041,206 @@ out:
 }
 
 #endif   /* FRONTEND */
+
+
+/*
+ * Functions for decoding the data and block references in a record.
+ */
+
+/*
+ * Returns true if the record contains a block reference with given ID.
+ */
+bool
+XLogRecHasBlockRef(XLogRecord *record, uint8 block_id)
+{
+       XLogRecordBlockData *bkpb;
+
+       bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+       return bkpb != NULL;
+}
+
+/*
+ * Returns true if the record contains a full-page image for the given ID.
+ */
+bool
+XLogRecHasBlockImage(XLogRecord *record, uint8 block_id)
+{
+       XLogRecordBlockData *bkpb;
+
+       bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+       if (bkpb == NULL)
+               return false;
+
+       return (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE) != 0;
+}
+
+/*
+ * Returns information about the block that a block reference refers to.
+ */
+void
+XLogRecGetBlockTag(XLogRecord *record, uint8 block_id,
+                                  RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
+{
+       XLogRecordBlockData *bkpb;
+
+       bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+       if (bkpb == NULL)
+       {
+#ifdef FRONTEND
+               if (rnode)
+                       rnode->spcNode = rnode->dbNode = rnode->relNode = InvalidOid;
+               if (forknum)
+                       *forknum = InvalidForkNumber;
+               if (blknum)
+                       *blknum = InvalidBlockNumber;
+#else
+               elog(ERROR, "failed to locate backup block with ID %d", block_id);
+#endif
+       }
+       else
+       {
+               if (rnode)
+                       *rnode = bkpb->node;
+               if (forknum)
+                       *forknum = bkpb->fork_flags & BKPBLOCK_FORK_MASK;
+               if (blknum)
+                       *blknum = bkpb->block;
+       }
+}
+
+/*
+ * Returns the data associated with a block reference, or NULL if there is
+ * no data (e.g. because a full-page image was taken instead).
+ */
+char *
+XLogRecGetBlockData(XLogRecord *record, uint8 block_id, Size *len)
+{
+       XLogRecordBlockData *bkpb;
+       char       *payload;
+
+       bkpb = XLogRecGetBlockRef(record, block_id, &payload);
+       if (!(bkpb->fork_flags & BKPBLOCK_HAS_DATA))
+       {
+               /* no data */
+               if (len)
+                       *len = 0;
+               return NULL;
+       }
+       else if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+       {
+               /* both a full-page image and data */
+               XLogRecordBlockImage *img = (XLogRecordBlockImage *) payload;
+               payload += sizeof(XLogRecordBlockImage);
+
+               payload += BLCKSZ - img->hole_length;
+               payload = (char *) MAXALIGN(payload);
+
+               if (len)
+                       *len = ((char *) img) + bkpb->data_length - payload;
+               return payload;
+       }
+       else
+       {
+               /* just data */
+               if (len)
+                       *len = bkpb->data_length;
+               return payload;
+       }
+}
+
+/*
+ * Return all the block reference IDs in a WAL record.
+ *
+ * The returned array is palloc'd, and its length is stored in *num_refs.
+ */
+uint8 *
+XLogRecGetBlockRefIds(XLogRecord *record, int *num_refs)
+{
+       XLogRecordBlockData bkpb;
+       char       *blk;
+       char       *end;
+       uint8      *out;
+       int                     n;
+       uint8           ids[XLR_MAX_BKP_BLOCKS];
+
+       blk = (char *) XLogRecGetData(record) + record->xl_len;
+       end = ((char *) record) + record->xl_tot_len;
+
+       /* Extract the entries into a local array first */
+       for (n = 0; blk < end; n++)
+       {
+               memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+               if (bkpb.fork_flags & BKPBLOCK_SAME_REL)
+                       blk += SizeOfXLogRecordBlockDataSameRel;
+               else
+                       blk += SizeOfXLogRecordBlockData;
+               if (bkpb.data_length > 0)
+                       blk = (char *) MAXALIGN(blk);
+               blk += bkpb.data_length;
+
+               ids[n] = bkpb.id;
+       }
+
+       *num_refs = n;
+
+       /* Allocate a return array and copy the data to it */
+       out = palloc(n * sizeof(uint8));
+       memcpy(out, ids, n * sizeof(uint8));
+
+       return out;
+}
+
+
+/*
+ * Returns a BkpBlock struct and payload of a block reference.
+ *
+ * NOTE: This is a internal function, used by the backend code that deals
+ * with WAL records.  redo/desc routines and other higher-level functions
+ * should use the XLogBlockRefGetTag() and XLogGetPayload() functions instead.
+ */
+XLogRecordBlockData *
+XLogRecGetBlockRef(XLogRecord *record, uint8 block_id, char **content)
+{
+       static XLogRecordBlockData bkpb;
+       char       *blk;
+       char       *end;
+
+       end = ((char *) record) + record->xl_tot_len;
+
+       /* Locate requested XLogRecordBlockData in the record */
+       blk = (char *) XLogRecGetData(record) + record->xl_len;
+       while (blk < end)
+       {
+               uint8           flags;
+               Assert(blk < ((char *) record) + record->xl_tot_len);
+
+               flags = *((uint8 *) (blk + offsetof(XLogRecordBlockData, fork_flags)));
+               if (flags & BKPBLOCK_SAME_REL)
+               {
+                       memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+                       /* The relfilenode field stays unchanged */
+                       blk += SizeOfXLogRecordBlockDataSameRel;
+               }
+               else
+               {
+                       memcpy(&bkpb, blk, SizeOfXLogRecordBlockData);
+                       blk += SizeOfXLogRecordBlockData;
+               }
+
+               if (bkpb.data_length > 0)
+                       blk = (char *) MAXALIGN(blk);
+
+               if (bkpb.id == block_id)
+               {
+                       /* Found it */
+                       if (content)
+                               *content = blk;
+                       return &bkpb;
+               }
+
+               blk += bkpb.data_length;
+       }
+
+       /* Caller specified a bogus block_id */
+       return NULL;
+}
index 1a21dac85382ed991a11603e62bccab9745e5ae1..95afcb49be8e3aab7121316c27261cb8e7a9b4cf 100644 (file)
 #include "utils/rel.h"
 
 
+static Buffer RestoreBackupBlockContents(XLogRecPtr lsn, XLogRecordBlockData *bkpb,
+                                                XLogRecordBlockImage *blk,
+                                                bool get_cleanup_lock, bool keep_buffer);
+static Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
+                                  uint8 block_id, bool get_cleanup_lock, bool keep_buffer);
+
 /*
  * During XLOG replay, we may see XLOG records for incremental updates of
  * pages that no longer exist, because their relation was later dropped or
@@ -253,9 +259,8 @@ XLogCheckInvalidPages(void)
  *
  * 'lsn' is the LSN of the record being replayed.  It is compared with the
  * page's LSN to determine if the record has already been replayed.
- * 'rnode' and 'blkno' point to the block being replayed (main fork number
- * is implied, use XLogReadBufferForRedoExtended for other forks).
- * 'block_index' identifies the backup block in the record for the page.
+ * 'block_id' is the ID number the block was registered with, when the WAL
+ * record was created.
  *
  * Returns one of the following:
  *
@@ -274,13 +279,11 @@ XLogCheckInvalidPages(void)
  * definitely necessary.)
  */
 XLogRedoAction
-XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
-                                         RelFileNode rnode, BlockNumber blkno,
+XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
                                          Buffer *buf)
 {
-       return XLogReadBufferForRedoExtended(lsn, record, block_index,
-                                                                                rnode, MAIN_FORKNUM, blkno,
-                                                                                RBM_NORMAL, false, buf);
+       return XLogReadBufferForRedoExtended(lsn, record, block_id, RBM_NORMAL,
+                                                                                false, buf);
 }
 
 /*
@@ -296,20 +299,28 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
  */
 XLogRedoAction
 XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
-                                                         int block_index, RelFileNode rnode,
-                                                         ForkNumber forkno, BlockNumber blkno,
+                                                         uint8 block_id,
                                                          ReadBufferMode mode, bool get_cleanup_lock,
                                                          Buffer *buf)
 {
-       if (record->xl_info & XLR_BKP_BLOCK(block_index))
-       {
-               *buf = RestoreBackupBlock(lsn, record, block_index,
-                                                                 get_cleanup_lock, true);
+       struct XLogRecordBlockData *bkpb;
+
+       *buf = RestoreBackupBlock(lsn, record, block_id, get_cleanup_lock, true);
+       if (*buf != InvalidBuffer)
                return BLK_RESTORED;
-       }
        else
        {
-               *buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
+               bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+               if (bkpb == NULL)
+                       elog(ERROR, "failed to locate backup block with ID %d", block_id);
+
+               if ((bkpb->fork_flags & BKPBLOCK_WILL_INIT) != 0 && mode != RBM_ZERO)
+                       elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
+
+               *buf = XLogReadBufferExtended(bkpb->node,
+                                                                         bkpb->fork_flags & BKPBLOCK_FORK_MASK,
+                                                                         bkpb->block,
+                                                                         mode);
                if (BufferIsValid(*buf))
                {
                        LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
@@ -323,36 +334,6 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
        }
 }
 
-/*
- * XLogReadBuffer
- *             Read a page during XLOG replay.
- *
- * This is a shorthand of XLogReadBufferExtended() followed by
- * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
- * fork.
- *
- * (Getting the buffer lock is not really necessary during single-process
- * crash recovery, but some subroutines such as MarkBufferDirty will complain
- * if we don't have the lock.  In hot standby mode it's definitely necessary.)
- *
- * The returned buffer is exclusively-locked.
- *
- * For historical reasons, instead of a ReadBufferMode argument, this only
- * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
- */
-Buffer
-XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
-{
-       Buffer          buf;
-
-       buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
-                                                                init ? RBM_ZERO : RBM_NORMAL);
-       if (BufferIsValid(buf))
-               LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-
-       return buf;
-}
-
 /*
  * XLogReadBufferExtended
  *             Read a page during XLOG replay
@@ -372,6 +353,11 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
  * to imply that the page should be dropped or truncated later.
+ *
+ * NB: A redo function should normally not call this directly. To get a page
+ * to modify, use XLogReplayBuffer instead. It is important that all pages
+ * modified by a WAL record are registered in the WAL records, or they will be
+ * invisible to tools that that need to know which pages are modified.
  */
 Buffer
 XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
@@ -461,7 +447,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
  *
  * lsn: LSN of the XLOG record being replayed
  * record: the complete XLOG record
- * block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
+ * block_id: which backup block to restore
  * get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
  * keep_buffer: TRUE to return the buffer still locked and pinned
  *
@@ -488,37 +474,28 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
  * pages, to prevent inconsistent states from being visible to other backends.
  * (Again, that's only important in hot standby mode.)
  */
-Buffer
-RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
+static Buffer
+RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
                                   bool get_cleanup_lock, bool keep_buffer)
 {
-       BkpBlock        bkpb;
+       XLogRecordBlockData   *bkpb;
        char       *blk;
-       int                     i;
 
-       /* Locate requested BkpBlock in the record */
-       blk = (char *) XLogRecGetData(record) + record->xl_len;
-       for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+       bkpb = XLogRecGetBlockRef(record, block_id, &blk);
+       if (!bkpb)
        {
-               if (!(record->xl_info & XLR_BKP_BLOCK(i)))
-                       continue;
-
-               memcpy(&bkpb, blk, sizeof(BkpBlock));
-               blk += sizeof(BkpBlock);
-
-               if (i == block_index)
-               {
-                       /* Found it, apply the update */
-                       return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
-                                                                                         keep_buffer);
-               }
-
-               blk += BLCKSZ - bkpb.hole_length;
+               /* Caller specified a bogus block_id */
+               elog(PANIC, "failed to restore backup block with ID %d", block_id);
        }
 
-       /* Caller specified a bogus block_index */
-       elog(ERROR, "failed to restore block_index %d", block_index);
-       return InvalidBuffer;           /* keep compiler quiet */
+       /* Found it, apply the update */
+       if (!(bkpb->fork_flags & BKPBLOCK_HAS_IMAGE))
+               return InvalidBuffer;
+
+       return RestoreBackupBlockContents(lsn, bkpb,
+                                                                         (XLogRecordBlockImage *) blk,
+                                                                         get_cleanup_lock,
+                                                                         keep_buffer);
 }
 
 /*
@@ -526,14 +503,18 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
  *
  * Restores a full-page image from BkpBlock and a data pointer.
  */
-Buffer
-RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
+static Buffer
+RestoreBackupBlockContents(XLogRecPtr lsn, XLogRecordBlockData *bkpb,
+                                                  XLogRecordBlockImage *blkimg,
                                                   bool get_cleanup_lock, bool keep_buffer)
 {
        Buffer          buffer;
        Page            page;
+       char       *blk;
 
-       buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
+       buffer = XLogReadBufferExtended(bkpb->node,
+                                                                       bkpb->fork_flags & BKPBLOCK_FORK_MASK,
+                                                                       bkpb->block,
                                                                        RBM_ZERO);
        Assert(BufferIsValid(buffer));
        if (get_cleanup_lock)
@@ -543,18 +524,20 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
 
        page = (Page) BufferGetPage(buffer);
 
-       if (bkpb.hole_length == 0)
+       blk = ((char *) blkimg) + sizeof(XLogRecordBlockImage);
+
+       if (blkimg->hole_length == 0)
        {
                memcpy((char *) page, blk, BLCKSZ);
        }
        else
        {
-               memcpy((char *) page, blk, bkpb.hole_offset);
+               memcpy((char *) page, blk, blkimg->hole_offset);
                /* must zero-fill the hole */
-               MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
-               memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
-                          blk + bkpb.hole_offset,
-                          BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
+               MemSet((char *) page + blkimg->hole_offset, 0, blkimg->hole_length);
+               memcpy((char *) page + (blkimg->hole_offset + blkimg->hole_length),
+                          blk + blkimg->hole_offset,
+                          BLCKSZ - (blkimg->hole_offset + blkimg->hole_length));
        }
 
        /*
index 46780e71d69c50378ab217547a89d3ac63a14f52..2c6b5f275871aa843fc5748e04c902cc30487d41 100644 (file)
@@ -125,7 +125,6 @@ void
 log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
 {
        xl_smgr_create xlrec;
-       XLogRecData rdata;
 
        /*
         * Make an XLOG entry reporting the file creation.
@@ -133,12 +132,9 @@ log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
        xlrec.rnode = *rnode;
        xlrec.forkNum = forkNum;
 
-       rdata.data = (char *) &xlrec;
-       rdata.len = sizeof(xlrec);
-       rdata.buffer = InvalidBuffer;
-       rdata.next = NULL;
-
-       XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+       XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE);
 }
 
 /*
@@ -268,18 +264,15 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
                 * Make an XLOG entry reporting the file truncation.
                 */
                XLogRecPtr      lsn;
-               XLogRecData rdata;
                xl_smgr_truncate xlrec;
 
                xlrec.blkno = nblocks;
                xlrec.rnode = rel->rd_node;
 
-               rdata.data = (char *) &xlrec;
-               rdata.len = sizeof(xlrec);
-               rdata.buffer = InvalidBuffer;
-               rdata.next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 
-               lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
+               lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE);
 
                /*
                 * Flush, because otherwise the truncation of the main relation might
@@ -484,7 +477,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in smgr records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_SMGR_CREATE)
        {
@@ -505,7 +498,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
                /*
                 * Forcibly create relation if it doesn't exist (which suggests that
                 * it was dropped somewhere later in the WAL sequence).  As in
-                * XLogReadBuffer, we prefer to recreate the rel and replay the log as
+                * XLogReplayBuffer, we prefer to recreate the rel and replay the log as
                 * best we can until the drop is seen.
                 */
                smgrcreate(reln, MAIN_FORKNUM, true);
index 94c82d37410c59fb1aa6b9e791760758416582b6..4546bebf28d874c5ee1905044774e0bdca94a285 100644 (file)
@@ -619,19 +619,17 @@ createdb(const CreatedbStmt *stmt)
                        /* Record the filesystem change in XLOG */
                        {
                                xl_dbase_create_rec xlrec;
-                               XLogRecData rdata[1];
 
                                xlrec.db_id = dboid;
                                xlrec.tablespace_id = dsttablespace;
                                xlrec.src_db_id = src_dboid;
                                xlrec.src_tablespace_id = srctablespace;
 
-                               rdata[0].data = (char *) &xlrec;
-                               rdata[0].len = sizeof(xl_dbase_create_rec);
-                               rdata[0].buffer = InvalidBuffer;
-                               rdata[0].next = NULL;
+                               XLogBeginInsert();
+                               XLogRegisterData((char *) &xlrec,
+                                                                        sizeof(xl_dbase_create_rec));
 
-                               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+                               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE);
                        }
                }
                heap_endscan(scan);
@@ -1226,19 +1224,16 @@ movedb(const char *dbname, const char *tblspcname)
                 */
                {
                        xl_dbase_create_rec xlrec;
-                       XLogRecData rdata[1];
 
                        xlrec.db_id = db_id;
                        xlrec.tablespace_id = dst_tblspcoid;
                        xlrec.src_db_id = db_id;
                        xlrec.src_tablespace_id = src_tblspcoid;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(xl_dbase_create_rec);
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
 
-                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE);
                }
 
                /*
@@ -1330,17 +1325,14 @@ movedb(const char *dbname, const char *tblspcname)
         */
        {
                xl_dbase_drop_rec xlrec;
-               XLogRecData rdata[1];
 
                xlrec.db_id = db_id;
                xlrec.tablespace_id = src_tblspcoid;
 
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_dbase_drop_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
 
-               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+               (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP);
        }
 
        /* Now it's safe to release the database lock */
@@ -1870,17 +1862,14 @@ remove_dbtablespaces(Oid db_id)
                /* Record the filesystem change in XLOG */
                {
                        xl_dbase_drop_rec xlrec;
-                       XLogRecData rdata[1];
 
                        xlrec.db_id = db_id;
                        xlrec.tablespace_id = dsttablespace;
 
-                       rdata[0].data = (char *) &xlrec;
-                       rdata[0].len = sizeof(xl_dbase_drop_rec);
-                       rdata[0].buffer = InvalidBuffer;
-                       rdata[0].next = NULL;
+                       XLogBeginInsert();
+                       XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
 
-                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+                       (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP);
                }
 
                pfree(dstpath);
@@ -2048,7 +2037,7 @@ dbase_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in dbase records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_DBASE_CREATE)
        {
index e5f7765d556065bc9e293a8aca7530c64095b065..7825b815d866bae2be9eed9b295f7c53a15062e2 100644 (file)
@@ -372,20 +372,16 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
+
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
                xlrec.node = rel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) tuple->t_data;
-               rdata[1].len = tuple->t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) tuple->t_data, tuple->t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -454,21 +450,17 @@ AlterSequence(AlterSeqStmt *stmt)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                Page            page = BufferGetPage(buf);
 
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
                xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -706,7 +698,6 @@ nextval_internal(Oid relid)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
 
                /*
                 * We don't log the current state of the tuple, but rather the state
@@ -714,6 +705,8 @@ nextval_internal(Oid relid)
                 * that many future WAL records, at the cost that we lose those
                 * sequence values if we crash.
                 */
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
                /* set values that will be saved in xlog */
                seq->last_value = next;
@@ -721,17 +714,11 @@ nextval_internal(Oid relid)
                seq->log_cnt = 0;
 
                xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -894,21 +881,16 @@ do_setval(Oid relid, int64 next, bool iscalled)
        {
                xl_seq_rec      xlrec;
                XLogRecPtr      recptr;
-               XLogRecData rdata[2];
                Page            page = BufferGetPage(buf);
 
-               xlrec.node = seqrel->rd_node;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_seq_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
+               XLogBeginInsert();
+               XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
 
-               rdata[1].data = (char *) seqtuple.t_data;
-               rdata[1].len = seqtuple.t_len;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               xlrec.node = seqrel->rd_node;
+               XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+               XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
 
                PageSetLSN(page, recptr);
        }
@@ -1563,14 +1545,10 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
        xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
        sequence_magic *sm;
 
-       /* Backup blocks are not used in seq records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
        if (info != XLOG_SEQ_LOG)
                elog(PANIC, "seq_redo: unknown op code %u", info);
 
-       buffer = XLogReadBuffer(xlrec->node, 0, true);
-       Assert(BufferIsValid(buffer));
+       XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
        page = (Page) BufferGetPage(buffer);
 
        /*
index 378e355adcc8abb9158dcb532f304d5d476af742..f8e19b97fd2d18143a93de3192ea53aeac7756cc 100644 (file)
@@ -354,20 +354,15 @@ CreateTableSpace(CreateTableSpaceStmt *stmt)
        /* Record the filesystem change in XLOG */
        {
                xl_tblspc_create_rec xlrec;
-               XLogRecData rdata[2];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
 
-               rdata[1].data = (char *) location;
-               rdata[1].len = strlen(location) + 1;
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec,
+                                                        offsetof(xl_tblspc_create_rec, ts_path));
+               XLogRegisterData((char *) location, strlen(location) + 1);
 
-               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
+               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
        }
 
        /*
@@ -508,15 +503,13 @@ DropTableSpace(DropTableSpaceStmt *stmt)
        /* Record the filesystem change in XLOG */
        {
                xl_tblspc_drop_rec xlrec;
-               XLogRecData rdata[1];
 
                xlrec.ts_id = tablespaceoid;
-               rdata[0].data = (char *) &xlrec;
-               rdata[0].len = sizeof(xl_tblspc_drop_rec);
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = NULL;
 
-               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+               (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
        }
 
        /*
@@ -1406,7 +1399,7 @@ tblspc_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in tblspc records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_TBLSPC_CREATE)
        {
index 8e78aafda7cbf01914df86fd03a5657eda93fea3..2313eb7d3edb916cc87d12771d6beafe15112975 100644 (file)
@@ -31,6 +31,8 @@
 #include "access/transam.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
+#include "access/xlogutils.h"
+#include "access/xlogrecord.h"
 #include "access/xlogreader.h"
 
 #include "catalog/pg_control.h"
@@ -46,8 +48,7 @@ typedef struct XLogRecordBuffer
 {
        XLogRecPtr      origptr;
        XLogRecPtr      endptr;
-       XLogRecord      record;
-       char       *record_data;
+       XLogRecord *record;
 } XLogRecordBuffer;
 
 /* RMGR Handlers */
@@ -85,11 +86,10 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
 
        buf.origptr = ctx->reader->ReadRecPtr;
        buf.endptr = ctx->reader->EndRecPtr;
-       buf.record = *record;
-       buf.record_data = XLogRecGetData(record);
+       buf.record = record;
 
        /* cast so we get a warning when new rmgrs are added */
-       switch ((RmgrIds) buf.record.xl_rmid)
+       switch ((RmgrIds) buf.record->xl_rmid)
        {
                        /*
                         * Rmgrs we care about for logical decoding. Add new rmgrs in
@@ -135,7 +135,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
                case RM_BRIN_ID:
                        break;
                case RM_NEXT_ID:
-                       elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
+                       elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record->xl_rmid);
        }
 }
 
@@ -146,7 +146,7 @@ static void
 DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
-       uint8           info = buf->record.xl_info & ~XLR_INFO_MASK;
+       uint8           info = buf->record->xl_info & ~XLR_INFO_MASK;
 
        switch (info)
        {
@@ -185,7 +185,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
        ReorderBuffer *reorder = ctx->reorder;
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        uint8           info = r->xl_info & ~XLR_INFO_MASK;
 
        /* no point in doing anything yet, data could not be decoded anyway */
@@ -200,7 +200,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                TransactionId *subxacts = NULL;
                                SharedInvalidationMessage *invals = NULL;
 
-                               xlrec = (xl_xact_commit *) buf->record_data;
+                               xlrec = (xl_xact_commit *) XLogRecGetData(r);
 
                                subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
                                invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
@@ -220,7 +220,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                SharedInvalidationMessage *invals = NULL;
 
                                /* Prepared commits contain a normal commit record... */
-                               prec = (xl_xact_commit_prepared *) buf->record_data;
+                               prec = (xl_xact_commit_prepared *) XLogRecGetData(r);
                                xlrec = &prec->crec;
 
                                subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -237,7 +237,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                        {
                                xl_xact_commit_compact *xlrec;
 
-                               xlrec = (xl_xact_commit_compact *) buf->record_data;
+                               xlrec = (xl_xact_commit_compact *) XLogRecGetData(r);
 
                                DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
                                                         xlrec->xact_time,
@@ -250,7 +250,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                xl_xact_abort *xlrec;
                                TransactionId *sub_xids;
 
-                               xlrec = (xl_xact_abort *) buf->record_data;
+                               xlrec = (xl_xact_abort *) XLogRecGetData(r);
 
                                sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
 
@@ -265,7 +265,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                TransactionId *sub_xids;
 
                                /* prepared abort contain a normal commit abort... */
-                               prec = (xl_xact_abort_prepared *) buf->record_data;
+                               prec = (xl_xact_abort_prepared *) XLogRecGetData(r);
                                xlrec = &prec->arec;
 
                                sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
@@ -282,7 +282,7 @@ DecodeXactOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                                int                     i;
                                TransactionId *sub_xid;
 
-                               xlrec = (xl_xact_assignment *) buf->record_data;
+                               xlrec = (xl_xact_assignment *) XLogRecGetData(r);
 
                                sub_xid = &xlrec->xsub[0];
 
@@ -316,14 +316,14 @@ static void
 DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
        SnapBuild  *builder = ctx->snapshot_builder;
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        uint8           info = r->xl_info & ~XLR_INFO_MASK;
 
        switch (info)
        {
                case XLOG_RUNNING_XACTS:
                        {
-                               xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
+                               xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
 
                                SnapBuildProcessRunningXacts(builder, buf->origptr, running);
 
@@ -352,8 +352,8 @@ DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       uint8           info = buf->record.xl_info & XLOG_HEAP_OPMASK;
-       TransactionId xid = buf->record.xl_xid;
+       uint8           info = buf->record->xl_info & XLOG_HEAP_OPMASK;
+       TransactionId xid = buf->record->xl_xid;
        SnapBuild  *builder = ctx->snapshot_builder;
 
        /* no point in doing anything yet */
@@ -370,7 +370,7 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
                        {
                                xl_heap_new_cid *xlrec;
 
-                               xlrec = (xl_heap_new_cid *) buf->record_data;
+                               xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
                                SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
 
                                break;
@@ -405,8 +405,8 @@ DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       uint8           info = buf->record.xl_info & XLOG_HEAP_OPMASK;
-       TransactionId xid = buf->record.xl_xid;
+       uint8           info = buf->record->xl_info & XLOG_HEAP_OPMASK;
+       TransactionId xid = buf->record->xl_xid;
        SnapBuild  *builder = ctx->snapshot_builder;
 
        /* no point in doing anything yet */
@@ -576,19 +576,21 @@ DecodeAbort(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
 static void
 DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        xl_heap_insert *xlrec;
        ReorderBufferChange *change;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_insert *) buf->record_data;
+       xlrec = (xl_heap_insert *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_INSERT;
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
        {
@@ -615,57 +617,43 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        xl_heap_update *xlrec;
-       xl_heap_header_len xlhdr;
        ReorderBufferChange *change;
        char       *data;
+       Size            datalen;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_update *) buf->record_data;
+       xlrec = (xl_heap_update *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_UPDATE;
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
-
-       /* caution, remaining data in record is not aligned */
-       data = buf->record_data + SizeOfHeapUpdate;
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
        {
-               Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
-
-               memcpy(&xlhdr, data, sizeof(xlhdr));
-               data += offsetof(xl_heap_header_len, header);
+               data = XLogRecGetBlockData(r, 0, &datalen);
+               Assert(datalen > (SizeOfHeapUpdate + SizeOfHeapHeader));
 
                change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
-               DecodeXLogTuple(data,
-                                               xlhdr.t_len + SizeOfHeapHeader,
-                                               change->data.tp.newtuple);
-               /* skip over the rest of the tuple header */
-               data += SizeOfHeapHeader;
-               /* skip over the tuple data */
-               data += xlhdr.t_len;
+               DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
        }
 
        if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
        {
-               memcpy(&xlhdr, data, sizeof(xlhdr));
-               data += offsetof(xl_heap_header_len, header);
+               /* caution, remaining data in record is not aligned */
+               data = XLogRecGetData(r) + SizeOfHeapUpdate;
+               datalen = r->xl_len - SizeOfHeapUpdate;
 
                change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
 
-               DecodeXLogTuple(data,
-                                               xlhdr.t_len + SizeOfHeapHeader,
-                                               change->data.tp.oldtuple);
-#ifdef NOT_USED
-               data += SizeOfHeapHeader;
-               data += xlhdr.t_len;
-#endif
+               DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
        }
 
        change->data.tp.clear_toast_afterwards = true;
@@ -681,20 +669,22 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        xl_heap_delete *xlrec;
        ReorderBufferChange *change;
+       RelFileNode target_node;
 
-       xlrec = (xl_heap_delete *) buf->record_data;
+       xlrec = (xl_heap_delete *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->target.node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+       if (target_node.dbNode != ctx->slot->data.database)
                return;
 
        change = ReorderBufferGetChange(ctx->reorder);
        change->action = REORDER_BUFFER_CHANGE_DELETE;
 
-       memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+       memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
 
        /* old primary key stored */
        if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
@@ -721,19 +711,21 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 static void
 DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 {
-       XLogRecord *r = &buf->record;
+       XLogRecord *r = buf->record;
        xl_heap_multi_insert *xlrec;
        int                     i;
        char       *data;
        bool            isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+       RelFileNode     rnode;
 
-       xlrec = (xl_heap_multi_insert *) buf->record_data;
+       xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
 
        /* only interested in our database */
-       if (xlrec->node.dbNode != ctx->slot->data.database)
+       XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL);
+       if (rnode.dbNode != ctx->slot->data.database)
                return;
 
-       data = buf->record_data + SizeOfHeapMultiInsert;
+       data = XLogRecGetData(r) + SizeOfHeapMultiInsert;
 
        /*
         * OffsetNumbers (which are not of interest to us) are stored when
@@ -751,7 +743,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 
                change = ReorderBufferGetChange(ctx->reorder);
                change->action = REORDER_BUFFER_CHANGE_INSERT;
-               memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode));
+               memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode));
 
                /*
                 * CONTAINS_NEW_TUPLE will always be set currently as multi_insert
index 71c5fe2490a187c2cddf1b91e6fddeafc804a8cf..4b4536c4f40fed9ae0f6d6b933cb446be36b1ff1 100644 (file)
@@ -697,7 +697,7 @@ SnapBuildProcessNewCid(SnapBuild *builder, TransactionId xid,
        ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
 
        ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
-                                                                xlrec->target.node, xlrec->target.tid,
+                                                                xlrec->target_node, xlrec->target_tid,
                                                                 xlrec->cmin, xlrec->cmax,
                                                                 xlrec->combocid);
 
index 8c3720bc7370b5e907e0bfe471be8829e66647a7..fa6afe6a6d878d20497bb9f6f15f898b4d8c8afc 100644 (file)
@@ -764,7 +764,7 @@ standby_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in standby records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        /* Do nothing if we're not in hot standby mode */
        if (standbyState == STANDBY_DISABLED)
@@ -928,8 +928,6 @@ static XLogRecPtr
 LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 {
        xl_running_xacts xlrec;
-       XLogRecData rdata[2];
-       int                     lastrdata = 0;
        XLogRecPtr      recptr;
 
        xlrec.xcnt = CurrRunningXacts->xcnt;
@@ -940,23 +938,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
        xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
 
        /* Header */
-       rdata[0].data = (char *) (&xlrec);
-       rdata[0].len = MinSizeOfXactRunningXacts;
-       rdata[0].buffer = InvalidBuffer;
+       XLogBeginInsert();
+       XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
 
        /* array of TransactionIds */
        if (xlrec.xcnt > 0)
-       {
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) CurrRunningXacts->xids;
-               rdata[1].len = (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId);
-               rdata[1].buffer = InvalidBuffer;
-               lastrdata = 1;
-       }
+               XLogRegisterData((char *) CurrRunningXacts->xids,
+                                                (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
 
-       rdata[lastrdata].next = NULL;
-
-       recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
+       recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
 
        if (CurrRunningXacts->subxid_overflow)
                elog(trace_recovery(DEBUG2),
@@ -996,22 +986,15 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 static void
 LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
 {
-       XLogRecData rdata[2];
        xl_standby_locks xlrec;
 
        xlrec.nlocks = nlocks;
 
-       rdata[0].data = (char *) &xlrec;
-       rdata[0].len = offsetof(xl_standby_locks, locks);
-       rdata[0].buffer = InvalidBuffer;
-       rdata[0].next = &rdata[1];
-
-       rdata[1].data = (char *) locks;
-       rdata[1].len = nlocks * sizeof(xl_standby_lock);
-       rdata[1].buffer = InvalidBuffer;
-       rdata[1].next = NULL;
+       XLogBeginInsert();
+       XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
+       XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
 
-       (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
+       (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
 }
 
 /*
index d1f64e58c8c9a3fd7c1f63d1ac0d45a2ef9b2df4..7a60ccf5ee319148c5e814e9f937800f09dc6ab8 100644 (file)
@@ -754,7 +754,6 @@ write_relmap_file(bool shared, RelMapFile *newmap,
        if (write_wal)
        {
                xl_relmap_update xlrec;
-               XLogRecData rdata[2];
                XLogRecPtr      lsn;
 
                /* now errors are fatal ... */
@@ -764,16 +763,11 @@ write_relmap_file(bool shared, RelMapFile *newmap,
                xlrec.tsid = tsid;
                xlrec.nbytes = sizeof(RelMapFile);
 
-               rdata[0].data = (char *) (&xlrec);
-               rdata[0].len = MinSizeOfRelmapUpdate;
-               rdata[0].buffer = InvalidBuffer;
-               rdata[0].next = &(rdata[1]);
-               rdata[1].data = (char *) newmap;
-               rdata[1].len = sizeof(RelMapFile);
-               rdata[1].buffer = InvalidBuffer;
-               rdata[1].next = NULL;
+               XLogBeginInsert();
+               XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
+               XLogRegisterData((char *) newmap, sizeof(RelMapFile));
 
-               lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, rdata);
+               lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
 
                /* As always, WAL must hit the disk before the data update does */
                XLogFlush(lsn);
@@ -912,7 +906,7 @@ relmap_redo(XLogRecPtr lsn, XLogRecord *record)
        uint8           info = record->xl_info & ~XLR_INFO_MASK;
 
        /* Backup blocks are not used in relmap records */
-       Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+       Assert(!XLogRecHasAnyBlockRefs(record));
 
        if (info == XLOG_RELMAP_UPDATE)
        {
index d748db4d0c6b0b0fe98ef2979c1e053870c90267..77acd95551deb2e33c65e0c2706ab5936c686ba2 100644 (file)
  */
 #define XLOG_BRIN_INIT_PAGE            0x80
 
-/* This is what we need to know about a BRIN index create */
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
 typedef struct xl_brin_createidx
 {
        BlockNumber pagesPerRange;
-       RelFileNode node;
        uint16          version;
 } xl_brin_createidx;
 #define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
 
 /*
  * This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
  */
 typedef struct xl_brin_insert
 {
-       RelFileNode node;
        BlockNumber heapBlk;
 
        /* extra information needed to update the revmap */
-       BlockNumber revmapBlk;
        BlockNumber pagesPerRange;
 
-       uint16          tuplen;
-       ItemPointerData tid;
-       /* tuple data follows at end of struct */
+       /* offset number in the main page to insert the tuple to. */
+       OffsetNumber offnum;
 } xl_brin_insert;
 
-#define SizeOfBrinInsert       (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinInsert       (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
 
 /*
- * A cross-page update is the same as an insert, but also store the old tid.
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xlog_brin_update:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
  */
 typedef struct xl_brin_update
 {
-       ItemPointerData oldtid;
+       /* offset number of old tuple on old page */
+       OffsetNumber oldOffnum;
+
        xl_brin_insert insert;
 } xl_brin_update;
 
 #define SizeOfBrinUpdate       (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
 
-/* This is what we need to know about a BRIN tuple samepage update */
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
 typedef struct xl_brin_samepage_update
 {
-       RelFileNode node;
-       ItemPointerData tid;
-       /* tuple data follows at end of struct */
+       OffsetNumber offnum;
 } xl_brin_samepage_update;
 
-#define SizeOfBrinSamepageUpdate               (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinSamepageUpdate               (sizeof(OffsetNumber))
 
-/* This is what we need to know about a revmap extension */
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
 typedef struct xl_brin_revmap_extend
 {
-       RelFileNode node;
+       /*
+        * This is actually redundant - the block number is stored as part of
+        * backup block 1.
+        */
        BlockNumber targetBlk;
 } xl_brin_revmap_extend;
 
index 6a09dc990e4338eb2e2f8a88d4c07ff3bd546cce..2768155ec43cd8ae50cabaf1407fcfab8a9bb5c0 100644 (file)
@@ -13,7 +13,6 @@
 #include "access/genam.h"
 #include "access/gin.h"
 #include "access/itup.h"
-#include "access/xloginsert.h"
 #include "fmgr.h"
 #include "storage/bufmgr.h"
 #include "utils/rbtree.h"
@@ -391,22 +390,22 @@ typedef struct
 
 typedef struct ginxlogCreatePostingTree
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint32          size;
        /* A compressed posting list follows */
 } ginxlogCreatePostingTree;
 
-#define XLOG_GIN_INSERT  0x20
-
 /*
  * The format of the insertion record varies depending on the page type.
  * ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
  */
+
+#define XLOG_GIN_INSERT  0x20
+
 typedef struct
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint16          flags;                  /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
 
        /*
@@ -471,14 +470,17 @@ typedef struct
        PostingItem newitem;
 } ginxlogInsertDataInternal;
 
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
 #define XLOG_GIN_SPLIT 0x30
 
 typedef struct ginxlogSplit
 {
        RelFileNode node;
-       BlockNumber lblkno;
-       BlockNumber rblkno;
        BlockNumber rrlink;                     /* right link, or root's blocknumber if root
                                                                 * split */
        BlockNumber leftChildBlkno; /* valid on a non-leaf split */
@@ -532,15 +534,6 @@ typedef struct
  */
 #define XLOG_GIN_VACUUM_PAGE   0x40
 
-typedef struct ginxlogVacuumPage
-{
-       RelFileNode node;
-       BlockNumber blkno;
-       uint16          hole_offset;    /* number of bytes before "hole" */
-       uint16          hole_length;    /* number of bytes in "hole" */
-       /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
 /*
  * Vacuuming posting tree leaf page is WAL-logged like recompression caused
  * by insertion.
@@ -549,26 +542,28 @@ typedef struct ginxlogVacuumPage
 
 typedef struct ginxlogVacuumDataLeafPage
 {
-       RelFileNode node;
-       BlockNumber blkno;
-
        ginxlogRecompressDataLeaf data;
 } ginxlogVacuumDataLeafPage;
 
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
 #define XLOG_GIN_DELETE_PAGE   0x50
 
 typedef struct ginxlogDeletePage
 {
-       RelFileNode node;
-       BlockNumber blkno;
-       BlockNumber parentBlkno;
        OffsetNumber parentOffset;
-       BlockNumber leftBlkno;
        BlockNumber rightLink;
 } ginxlogDeletePage;
 
 #define XLOG_GIN_UPDATE_META_PAGE 0x60
 
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
 typedef struct ginxlogUpdateMeta
 {
        RelFileNode node;
@@ -585,22 +580,23 @@ typedef struct ginxlogUpdateMeta
 
 typedef struct ginxlogInsertListPage
 {
-       RelFileNode node;
-       BlockNumber blkno;
        BlockNumber rightlink;
        int32           ntuples;
        /* array of inserted tuples follows */
 } ginxlogInsertListPage;
 
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
 #define XLOG_GIN_DELETE_LISTPAGE  0x80
 
 #define GIN_NDELETE_AT_ONCE 16
 typedef struct ginxlogDeleteListPages
 {
-       RelFileNode node;
        GinMetaPageData metadata;
        int32           ndeleted;
-       BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
 } ginxlogDeleteListPages;
 
 
@@ -667,7 +663,7 @@ typedef struct GinBtreeData
 
        /* insert methods */
        OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
-       GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+       GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
        void       *(*prepareDownlink) (GinBtree, Buffer);
        void            (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
 
index 21daf3b2b6adf2588b3d1fa749b8ecd4f434db8b..af5a42335944d643ae5878040fc2ce71e83f405f 100644 (file)
@@ -185,34 +185,33 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
 #define XLOG_GIST_CREATE_INDEX         0x50
  /* #define XLOG_GIST_PAGE_DELETE               0x60 */        /* not used anymore */
 
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ *                              downlink for the split page, the left half of the split
+ */
 typedef struct gistxlogPageUpdate
 {
-       RelFileNode node;
-       BlockNumber blkno;
-
-       /*
-        * If this operation completes a page split, by inserting a downlink for
-        * the split page, leftchild points to the left half of the split.
-        */
-       BlockNumber leftchild;
-
        /* number of deleted offsets */
        uint16          ntodelete;
+       uint16          ntoinsert;
 
        /*
-        * follow: 1. todelete OffsetNumbers 2. tuples to insert
+        * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
         */
 } gistxlogPageUpdate;
 
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ *                              downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
 typedef struct gistxlogPageSplit
 {
-       RelFileNode node;
-       BlockNumber origblkno;          /* splitted page */
        BlockNumber origrlink;          /* rightlink of the page before split */
        GistNSN         orignsn;                /* NSN of the page before split */
        bool            origleaf;               /* was splitted page a leaf page? */
 
-       BlockNumber leftchild;          /* like in gistxlogPageUpdate */
        uint16          npage;                  /* # of pages in the split */
        bool            markfollowright;        /* set F_FOLLOW_RIGHT flags */
 
index 1d64264b010c53338d73802ae81bd03c9202678c..d4cba2e54c6d4eb69316ffe9dbd107d81b1faac4 100644 (file)
 #define XLOG_HEAP_CONTAINS_OLD                                         \
        (XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
 
-/*
- * All what we need to find changed tuple
- *
- * NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
- * bytes for alignment.  We don't want to store the pad space in the XLOG,
- * so use SizeOfHeapTid for space calculations.  Similar comments apply for
- * the other xl_FOO structs.
- */
-typedef struct xl_heaptid
-{
-       RelFileNode node;
-       ItemPointerData tid;            /* changed tuple id */
-} xl_heaptid;
-
-#define SizeOfHeapTid          (offsetof(xl_heaptid, tid) + SizeOfIptrData)
-
 /* This is what we need to know about delete */
 typedef struct xl_heap_delete
 {
-       xl_heaptid      target;                 /* deleted tuple id */
        TransactionId xmax;                     /* xmax of the deleted tuple */
+       OffsetNumber offnum;            /* deleted tuple's offset */
        uint8           infobits_set;   /* infomask bits */
        uint8           flags;
 } xl_heap_delete;
@@ -122,45 +106,32 @@ typedef struct xl_heap_header
 
 #define SizeOfHeapHeader       (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
 
-/*
- * Variant of xl_heap_header that contains the length of the tuple, which is
- * useful if the length of the tuple cannot be computed using the overall
- * record length. E.g. because there are several tuples inside a single
- * record.
- */
-typedef struct xl_heap_header_len
-{
-       uint16          t_len;
-       xl_heap_header header;
-} xl_heap_header_len;
-
-#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
-
 /* This is what we need to know about insert */
 typedef struct xl_heap_insert
 {
-       xl_heaptid      target;                 /* inserted tuple id */
+       OffsetNumber offnum;            /* inserted tuple's offset */
        uint8           flags;
-       /* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
+
+       /* xl_heap_header & TUPLE DATA in backup block 0 */
 } xl_heap_insert;
 
 #define SizeOfHeapInsert       (offsetof(xl_heap_insert, flags) + sizeof(uint8))
 
 /*
- * This is what we need to know about a multi-insert. The record consists of
- * xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple
- * data for each tuple. 'offsets' array is omitted if the whole page is
- * reinitialized (XLOG_HEAP_INIT_PAGE)
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * Block data 0 consists of an array of an xl_multi_insert_tuple struct and
+ * tuple data for each tuple.
  */
 typedef struct xl_heap_multi_insert
 {
-       RelFileNode node;
-       BlockNumber blkno;
        uint8           flags;
        uint16          ntuples;
        OffsetNumber offsets[1];
-
-       /* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
 } xl_heap_multi_insert;
 
 #define SizeOfHeapMultiInsert  offsetof(xl_heap_multi_insert, offsets)
@@ -176,34 +147,40 @@ typedef struct xl_multi_insert_tuple
 
 #define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
 
-/* This is what we need to know about update|hot_update */
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
 typedef struct xl_heap_update
 {
-       xl_heaptid      target;                 /* deleted tuple id */
        TransactionId old_xmax;         /* xmax of the old tuple */
-       TransactionId new_xmax;         /* xmax of the new tuple */
-       ItemPointerData newtid;         /* new inserted tuple id */
+       OffsetNumber old_offnum;        /* old tuple's offset */
        uint8           old_infobits_set;               /* infomask bits to set on old tuple */
        uint8           flags;
+       TransactionId new_xmax;         /* xmax of the new tuple */
+       OffsetNumber new_offnum;        /* new tuple's offset */
+       /*
+        * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
+        * set, a xl_heap_header struct and tuple data for the old tuple follows.
+        */
 
        /*
+        * Block 0:
         * If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
-        * set, the prefix and/or suffix come next, as one or two uint16s.
+        * set, the prefix and/or suffix come first, as one or two uint16s.
         *
-        * After that, xl_heap_header_len and new tuple data follow.  The new
-        * tuple data and length don't include the prefix and suffix, which are
-        * copied from the old tuple on replay.  The new tuple data is omitted if
-        * a full-page image of the page was taken (unless the
-        * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
-        * anyway).
+        * After that, xl_heap_header and new tuple data follow.  The new tuple
+        * data doesn't include the prefix and suffix, which are copied from the
+        * old tuple on replay.
         *
-        * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
-        * set, another xl_heap_header_len struct and tuple data for the old tuple
-        * follows.
+        * If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
+        * included even if a full-page image was taken.
         */
 } xl_heap_update;
 
-#define SizeOfHeapUpdate       (offsetof(xl_heap_update, flags) + sizeof(uint8))
+#define SizeOfHeapUpdate       (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
 
 /*
  * This is what we need to know about vacuum page cleanup/redirect
@@ -218,12 +195,10 @@ typedef struct xl_heap_update
  */
 typedef struct xl_heap_clean
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId latestRemovedXid;
        uint16          nredirected;
        uint16          ndead;
-       /* OFFSET NUMBERS FOLLOW */
+       /* OFFSET NUMBERS are in the block reference 0 */
 } xl_heap_clean;
 
 #define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
@@ -251,8 +226,8 @@ typedef struct xl_heap_cleanup_info
 /* This is what we need to know about lock */
 typedef struct xl_heap_lock
 {
-       xl_heaptid      target;                 /* locked tuple id */
        TransactionId locking_xid;      /* might be a MultiXactId not xid */
+       OffsetNumber offnum;            /* locked tuple's offset on page */
        int8            infobits_set;   /* infomask and infomask2 bits to set */
 } xl_heap_lock;
 
@@ -261,8 +236,8 @@ typedef struct xl_heap_lock
 /* This is what we need to know about locking an updated version of a row */
 typedef struct xl_heap_lock_updated
 {
-       xl_heaptid      target;
        TransactionId xmax;
+       OffsetNumber offnum;
        uint8           infobits_set;
 } xl_heap_lock_updated;
 
@@ -271,11 +246,11 @@ typedef struct xl_heap_lock_updated
 /* This is what we need to know about in-place update */
 typedef struct xl_heap_inplace
 {
-       xl_heaptid      target;                 /* updated tuple id */
+       OffsetNumber offnum;            /* updated tuple's offset on page */
        /* TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_inplace;
 
-#define SizeOfHeapInplace      (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
+#define SizeOfHeapInplace      (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
 
 /*
  * This struct represents a 'freeze plan', which is what we need to know about
@@ -299,20 +274,21 @@ typedef struct xl_heap_freeze_tuple
  */
 typedef struct xl_heap_freeze_page
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId cutoff_xid;
        uint16          ntuples;
-       xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
+       xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER]; /* stored in backup block 0 */
 } xl_heap_freeze_page;
 
 #define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
 
-/* This is what we need to know about setting a visibility map bit */
+/*
+ *  This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
 typedef struct xl_heap_visible
 {
-       RelFileNode node;
-       BlockNumber block;
        TransactionId cutoff_xid;
 } xl_heap_visible;
 
@@ -338,10 +314,11 @@ typedef struct xl_heap_new_cid
        /*
         * Store the relfilenode/ctid pair to facilitate lookups.
         */
-       xl_heaptid      target;
+       RelFileNode target_node;
+       ItemPointerData target_tid;
 } xl_heap_new_cid;
 
-#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
 
 /* logical rewrite xlog record header */
 typedef struct xl_heap_rewrite_mapping
index c8bb3f5d668dbd22d94a0866b3baa40eedb58ae2..7161a761b4e1881561b9b958ae94ee656c12e8de 100644 (file)
@@ -227,15 +227,6 @@ typedef struct BTMetaPageData
 #define XLOG_BTREE_REUSE_PAGE  0xD0    /* old page is about to be reused from
                                                                                 * FSM */
 
-/*
- * All that we need to find changed index tuple
- */
-typedef struct xl_btreetid
-{
-       RelFileNode node;
-       ItemPointerData tid;            /* changed tuple id */
-} xl_btreetid;
-
 /*
  * All that we need to regenerate the meta-data page
  */
@@ -252,16 +243,17 @@ typedef struct xl_btree_metadata
  *
  * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
  * Note that INSERT_META implies it's not a leaf page.
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
  */
 typedef struct xl_btree_insert
 {
-       xl_btreetid target;                     /* inserted tuple id */
-       /* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
-       /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
-       /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
+       OffsetNumber offnum;
 } xl_btree_insert;
 
-#define SizeOfBtreeInsert      (offsetof(xl_btreetid, tid) + SizeOfIptrData)
+#define SizeOfBtreeInsert      (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
 
 /*
  * On insert with split, we save all the items going into the right sibling
@@ -278,45 +270,49 @@ typedef struct xl_btree_insert
  * the root page, and thus that a newroot record rather than an insert or
  * split record should follow.  Note that a split record never carries a
  * metapage update --- we'll do that in the parent-level update.
+ *
+ * Backup Blk 0: original page / new left page
+ * Backup Blk 1: new right page
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
  */
 typedef struct xl_btree_split
 {
-       RelFileNode node;
-       BlockNumber leftsib;            /* orig page / new left page */
-       BlockNumber rightsib;           /* new right page */
-       BlockNumber rnext;                      /* next block (orig page's rightlink) */
        uint32          level;                  /* tree level of page being split */
        OffsetNumber firstright;        /* first item moved to right page */
+       OffsetNumber newitemoff;        /* new item's offset (if placed on left page) */
+} xl_btree_split;
 
+typedef struct xl_btree_split_left
+{
        /*
-        * In the _L variants, next are OffsetNumber newitemoff and the new item.
+        * In the _L variants, next is the new item.
         * (In the _R variants, the new item is one of the right page's tuples.)
-        * The new item, but not newitemoff, is suppressed if XLogInsert chooses
-        * to store the left page's whole page image.
         *
         * If level > 0, an IndexTuple representing the HIKEY of the left page
         * follows.  We don't need this on leaf pages, because it's the same as
-        * the leftmost key in the new right page.  Also, it's suppressed if
-        * XLogInsert chooses to store the left page's whole page image.
-        *
-        * If level > 0, BlockNumber of the page whose incomplete-split flag this
-        * insertion clears. (not aligned)
-        *
+        * the leftmost key in the new right page.
+        */
+} xl_btree_split_left;
+
+typedef struct xl_btree_split_right
+{
+       /*
         * Last are the right page's tuples in the form used by _bt_restore_page.
         */
-} xl_btree_split;
+} xl_btree_split_right;
 
-#define SizeOfBtreeSplit       (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit       (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
 
 /*
  * This is what we need to know about delete of individual leaf index tuples.
  * The WAL record can represent deletion of any number of index tuples on a
  * single index page when *not* executed by VACUUM.
+ *
+ * Backup Blk 0: index page
  */
 typedef struct xl_btree_delete
 {
-       RelFileNode node;                       /* RelFileNode of the index */
-       BlockNumber block;
        RelFileNode hnode;                      /* RelFileNode of the heap the index currently
                                                                 * points at */
        int                     nitems;
@@ -361,8 +357,6 @@ typedef struct xl_btree_reuse_page
  */
 typedef struct xl_btree_vacuum
 {
-       RelFileNode node;
-       BlockNumber block;
        BlockNumber lastBlockVacuumed;
 
        /* TARGET OFFSET NUMBERS FOLLOW */
@@ -376,10 +370,13 @@ typedef struct xl_btree_vacuum
  * remove this tuple's downlink and the *following* tuple's key).  Note that
  * the leaf page is empty, so we don't need to store its content --- it is
  * just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
  */
 typedef struct xl_btree_mark_page_halfdead
 {
-       xl_btreetid target;                     /* deleted tuple id in parent page */
+       OffsetNumber poffset;           /* deleted tuple id in parent page */
 
        /* information needed to recreate the leaf page: */
        BlockNumber leafblk;            /* leaf block ultimately being deleted */
@@ -394,11 +391,15 @@ typedef struct xl_btree_mark_page_halfdead
  * This is what we need to know about deletion of a btree page.  Note we do
  * not store any content for the deleted page --- it is just rewritten as empty
  * during recovery, apart from resetting the btpo.xact.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage
  */
 typedef struct xl_btree_unlink_page
 {
-       RelFileNode node;
-       BlockNumber deadblk;            /* target block being deleted */
        BlockNumber leftsib;            /* target block's left sibling, if any */
        BlockNumber rightsib;           /* target block's right sibling */
 
@@ -406,7 +407,6 @@ typedef struct xl_btree_unlink_page
         * Information needed to recreate the leaf page, when target is an
         * internal page.
         */
-       BlockNumber leafblk;
        BlockNumber leafleftsib;
        BlockNumber leafrightsib;
        BlockNumber topparent;          /* next child down in the branch */
@@ -423,13 +423,15 @@ typedef struct xl_btree_unlink_page
  *
  * Note that although this implies rewriting the metadata page, we don't need
  * an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
  */
 typedef struct xl_btree_newroot
 {
-       RelFileNode node;
-       BlockNumber rootblk;            /* location of new root */
+       BlockNumber rootblk;            /* location of new root (redundant with blk 0) */
        uint32          level;                  /* its tree level */
-       /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
 } xl_btree_newroot;
 
 #define SizeOfBtreeNewroot     (offsetof(xl_btree_newroot, level) + sizeof(uint32))
index 3330644651c316339709e461900f73b9252b09a3..18bc9bf1b0cf69d6ded7cd7c5acf64bcff606e16 100644 (file)
@@ -18,7 +18,6 @@
 #include "access/spgist.h"
 #include "nodes/tidbitmap.h"
 #include "storage/buf.h"
-#include "storage/relfilenode.h"
 #include "utils/relcache.h"
 
 
@@ -351,35 +350,8 @@ typedef SpGistDeadTupleData *SpGistDeadTuple;
 
 /*
  * XLOG stuff
- *
- * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
  */
 
-#define ACCEPT_RDATA_DATA(p, s, i)     \
-       do { \
-               Assert((i) < lengthof(rdata)); \
-               rdata[i].data = (char *) (p); \
-               rdata[i].len = (s); \
-               rdata[i].buffer = InvalidBuffer; \
-               rdata[i].buffer_std = true; \
-               rdata[i].next = NULL; \
-               if ((i) > 0) \
-                       rdata[(i) - 1].next = rdata + (i); \
-       } while(0)
-
-#define ACCEPT_RDATA_BUFFER(b, i)  \
-       do { \
-               Assert((i) < lengthof(rdata)); \
-               rdata[i].data = NULL; \
-               rdata[i].len = 0; \
-               rdata[i].buffer = (b); \
-               rdata[i].buffer_std = true; \
-               rdata[i].next = NULL; \
-               if ((i) > 0) \
-                       rdata[(i) - 1].next = rdata + (i); \
-       } while(0)
-
-
 /* XLOG record types for SPGiST */
 #define XLOG_SPGIST_CREATE_INDEX       0x00
 #define XLOG_SPGIST_ADD_LEAF           0x10
@@ -408,36 +380,36 @@ typedef struct spgxlogState
                (d).isBuild = (s)->isBuild; \
        } while(0)
 
-
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
 typedef struct spgxlogAddLeaf
 {
-       RelFileNode node;
-
-       BlockNumber blknoLeaf;          /* destination page for leaf tuple */
        bool            newPage;                /* init dest page? */
        bool            storesNulls;    /* page is in the nulls tree? */
        OffsetNumber offnumLeaf;        /* offset where leaf tuple gets placed */
        OffsetNumber offnumHeadLeaf;    /* offset of head tuple in chain, if any */
 
-       BlockNumber blknoParent;        /* where the parent downlink is, if any */
-       OffsetNumber offnumParent;
+       OffsetNumber offnumParent;      /* where the parent downlink is, if any */
        uint16          nodeI;
 
        /* new leaf tuple follows (unaligned!) */
 } spgxlogAddLeaf;
 
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
 typedef struct spgxlogMoveLeafs
 {
-       RelFileNode node;
-
-       BlockNumber blknoSrc;           /* source leaf page */
-       BlockNumber blknoDst;           /* destination leaf page */
        uint16          nMoves;                 /* number of tuples moved from source page */
        bool            newPage;                /* init dest page? */
        bool            replaceDead;    /* are we replacing a DEAD source tuple? */
        bool            storesNulls;    /* pages are in the nulls tree? */
 
-       BlockNumber blknoParent;        /* where the parent downlink is */
+       /* where the parent downlink is */
        OffsetNumber offnumParent;
        uint16          nodeI;
 
@@ -452,11 +424,6 @@ typedef struct spgxlogMoveLeafs
         * Note: if replaceDead is true then there is only one inserted tuple
         * number and only one leaf tuple in the data, because we are not copying
         * the dead tuple from the source
-        *
-        * Buffer references in the rdata array are:
-        *              Src page
-        *              Dest page
-        *              Parent page
         *----------
         */
        OffsetNumber offsets[1];
@@ -464,21 +431,29 @@ typedef struct spgxlogMoveLeafs
 
 #define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
 
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ *               the old and new
+ */
 typedef struct spgxlogAddNode
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number of original inner tuple */
+       /* Backup blk 0, page containing original inner tuple */
        OffsetNumber offnum;            /* offset of original inner tuple */
 
-       BlockNumber blknoParent;        /* where parent downlink is, if updated */
-       OffsetNumber offnumParent;
-       uint16          nodeI;
-
-       BlockNumber blknoNew;           /* where new tuple goes, if not same place */
+       /* Backup blk 1, where new tuple goes, if not same place */
        OffsetNumber offnumNew;
        bool            newPage;                /* init new page? */
 
+       /* Backup Blk 2, where parent downlink is, if updated */
+       char            parentBlk;      /* 0: parent == original page,
+                                                        * 1: parent == new page,
+                                                        * 2: parent == different page (blk ref 2)
+                                                        * -1: parent not updated*/
+       OffsetNumber offnumParent;
+       uint16          nodeI;
+
        spgxlogState stateSrc;
 
        /*
@@ -486,16 +461,20 @@ typedef struct spgxlogAddNode
         */
 } spgxlogAddNode;
 
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
 typedef struct spgxlogSplitTuple
 {
-       RelFileNode node;
-
-       BlockNumber blknoPrefix;        /* where the prefix tuple goes */
+       /* where the prefix tuple goes */
        OffsetNumber offnumPrefix;
 
-       BlockNumber blknoPostfix;       /* where the postfix tuple goes */
+       /* where the postfix tuple goes */
        OffsetNumber offnumPostfix;
-       bool            newPage;                /* need to init that page? */
+       bool            newPage;                        /* need to init that page? */
+       bool            postfixBlkSame;         /* was postfix tuple put on same page as
+                                                                        * prefix? */
 
        /*
         * new prefix inner tuple follows, then new postfix inner tuple
@@ -503,24 +482,30 @@ typedef struct spgxlogSplitTuple
         */
 } spgxlogSplitTuple;
 
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
 typedef struct spgxlogPickSplit
 {
-       RelFileNode node;
+       bool            isRootSplit;
 
-       BlockNumber blknoSrc;           /* original leaf page */
-       BlockNumber blknoDest;          /* other leaf page, if any */
        uint16          nDelete;                /* n to delete from Src */
        uint16          nInsert;                /* n to insert on Src and/or Dest */
        bool            initSrc;                /* re-init the Src page? */
        bool            initDest;               /* re-init the Dest page? */
 
-       BlockNumber blknoInner;         /* where to put new inner tuple */
+       /* where to put new inner tuple */
        OffsetNumber offnumInner;
        bool            initInner;              /* re-init the Inner page? */
 
        bool            storesNulls;    /* pages are in the nulls tree? */
 
-       BlockNumber blknoParent;        /* where the parent downlink is, if any */
+       /* where the parent downlink is, if any */
+       bool            innerIsParent;  /* is parent the same as inner page? */
        OffsetNumber offnumParent;
        uint16          nodeI;
 
@@ -532,13 +517,10 @@ typedef struct spgxlogPickSplit
         *              array of inserted tuple numbers, length nInsert
         *              array of page selector bytes for inserted tuples, length nInsert
         *              new inner tuple (unaligned!)
-        *              list of leaf tuples, length nInsert (unaligned!)
+        *              list of leaf tuples, length nInsert (must be maxaligned)
+        * the tuple number and page selector arrays are padded to maxalign
+        * boundaries so that the leaf tuples will be suitably aligned
         *
-        * Buffer references in the rdata array are:
-        *              Src page (only if not root and not being init'd)
-        *              Dest page (if used and not being init'd)
-        *              Inner page (only if not being init'd)
-        *              Parent page (if any; could be same as Inner)
         *----------
         */
        OffsetNumber    offsets[1];
@@ -548,9 +530,6 @@ typedef struct spgxlogPickSplit
 
 typedef struct spgxlogVacuumLeaf
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nDead;                  /* number of tuples to become DEAD */
        uint16          nPlaceholder;   /* number of tuples to become PLACEHOLDER */
        uint16          nMove;                  /* number of tuples to move */
@@ -576,9 +555,6 @@ typedef struct spgxlogVacuumLeaf
 typedef struct spgxlogVacuumRoot
 {
        /* vacuum a root page when it is also a leaf */
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nDelete;                /* number of tuples to delete */
 
        spgxlogState stateSrc;
@@ -591,9 +567,6 @@ typedef struct spgxlogVacuumRoot
 
 typedef struct spgxlogVacuumRedirect
 {
-       RelFileNode node;
-
-       BlockNumber blkno;                      /* block number to clean */
        uint16          nToPlaceholder; /* number of redirects to make placeholders */
        OffsetNumber firstPlaceholder;          /* first placeholder tuple to remove */
        TransactionId newestRedirectXid;        /* newest XID of removed redirects */
index 6f8b5f46e100d2fe8d76ec0c450744c039f43929..0a70aaa370cdfa6d86afff0067ae9041d3c7cd5d 100644 (file)
@@ -186,7 +186,9 @@ typedef struct CheckpointStatsData
 
 extern CheckpointStatsData CheckpointStats;
 
-extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn);
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
index 19b2ef8d90d862951c9fb3493c31ffaa63dd3b5b..8714a2a19b865fb40828b0e361f82b88522f7a33 100644 (file)
@@ -203,6 +203,17 @@ typedef struct xl_end_of_recovery
        TimeLineID      PrevTimeLineID; /* previous TLI we forked off from */
 } xl_end_of_recovery;
 
+/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+       struct XLogRecData *next;       /* next struct in chain, or NULL */
+       char       *data;                       /* start of rmgr data to include */
+       uint32          len;                    /* length of rmgr data to include */
+} XLogRecData;
+
 /*
  * Method table for resource managers.
  *
index 30c2e84cbc9a076d980660ce13dbf61fa1378742..4fb5b4a9ad718a028e0e3cee3f3d6a0c483e2359 100644 (file)
 #include "storage/relfilenode.h"
 
 /*
- * The rmgr data to be written by XLogInsert() is defined by a chain of
- * one or more XLogRecData structs.  (Multiple structs would be used when
- * parts of the source data aren't physically adjacent in memory, or when
- * multiple associated buffers need to be specified.)
- *
- * If buffer is valid then XLOG will check if buffer must be backed up
- * (ie, whether this is first change of that page since last checkpoint).
- * If so, the whole page contents are attached to the XLOG record, and XLOG
- * sets XLR_BKP_BLOCK(N) bit in xl_info.  Note that the buffer must be pinned
- * and exclusive-locked by the caller, so that it won't change under us.
- * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
- * this XLogRecData struct into the XLOG record, since we assume it's present
- * in the buffer.  Therefore, rmgr redo routines MUST pay attention to
- * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
- * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- *
- * When buffer is valid, caller must set buffer_std to indicate whether the
- * page uses standard pd_lower/pd_upper header fields.  If this is true, then
- * XLOG is allowed to omit the free space between pd_lower and pd_upper from
- * the backed-up page image.  Note that even when buffer_std is false, the
- * page MUST have an LSN field as its first eight bytes!
- *
- * Note: data can be NULL to indicate no rmgr data associated with this chain
- * entry.  This can be sensible (ie, not a wasted entry) if buffer is valid.
- * The implication is that the buffer has been changed by the operation being
- * logged, and so may need to be backed up, but the change can be redone using
- * only information already present elsewhere in the XLOG entry.
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_BKP_BLOCKS block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
  */
-typedef struct XLogRecData
-{
-       char       *data;                       /* start of rmgr data to include */
-       uint32          len;                    /* length of rmgr data to include */
-       Buffer          buffer;                 /* buffer associated with data, if any */
-       bool            buffer_std;             /* buffer has standard pd_lower/pd_upper */
-       struct XLogRecData *next;       /* next struct in chain, or NULL */
-} XLogRecData;
+#define XLR_NORMAL_BKP_BLOCKS          4
+#define XLR_NORMAL_RDATAS                      20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE     (1<<0)  /* force a full-page image */
+#define REGBUF_WILL_INIT       (1<<1)  /* page will be re-initialized at replay
+                                                                        * (implies NO_IMAGE) */
+#define REGBUF_NO_IMAGE                (1<<2)  /* don't take a full-page image */
+#define REGBUF_STANDARD                (1<<3)  /* page follows "standard" page layout,
+                                                                        * (data between pd_lower and pd_upper
+                                                                        * will be skipped) */
+#define REGBUF_KEEP_DATA       (1<<4)  /* don't omit data if a full-page image is
+                                                                        * taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+                                 ForkNumber forknum, BlockNumber blknum, char *page,
+                                 uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
 
-extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
 extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
                        BlockNumber blk, char *page, bool page_std);
 extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
 extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
-extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern void InitXLogInsert(void);
 
 #endif   /* XLOGINSERT_H */
index ab0fb1c50046e77a92e90ef5d4c99d9056cb1db2..b1e04214196403f207b3eb324b84b68e8b9a8112 100644 (file)
  * The overall layout of an XLOG record is:
  *             Fixed-size header (XLogRecord struct)
  *             rmgr-specific data
- *             BkpBlock
- *             backup block data
- *             BkpBlock
- *             backup block data
+ *             XLogRecordBlockData struct
+ *             block data
+ *             XLogRecordBlockData struct
+ *             block data
  *             ...
  *
- * where there can be zero to four backup blocks (as signaled by xl_info flag
- * bits).  XLogRecord structs always start on MAXALIGN boundaries in the WAL
- * files, and we round up SizeOfXLogRecord so that the rmgr data is also
- * guaranteed to begin on a MAXALIGN boundary.  However, no padding is added
- * to align BkpBlock structs or backup block data.
+ * There can be zero or more XLogRecordBlockDatas, and 0 or more bytes of
+ * rmgr-specific data not associated with a block.  XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, and we round up
+ * SizeOfXLogRecord so that the rmgr data is also guaranteed to begin on a
+ * MAXALIGN boundary.  The XLogRecordBlockData structs are not aligned, but
+ * the block data after them are.
  *
  * NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks.  xl_tot_len counts everything.  Neither
+ * and also not any block datas.  xl_tot_len counts everything.  Neither
  * length field is rounded up to an alignment boundary.
  */
 typedef struct XLogRecord
@@ -56,24 +57,62 @@ typedef struct XLogRecord
 
 #define SizeOfXLogRecord       MAXALIGN(sizeof(XLogRecord))
 
-#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
-
 /*
  * XLOG uses only low 4 bits of xl_info.  High 4 bits may be used by rmgr.
  */
 #define XLR_INFO_MASK                  0x0F
+#define XLR_RMGR_INFO_MASK             0xF0
 
 /*
- * If we backed up any disk blocks with the XLOG record, we use flag bits in
- * xl_info to signal it.  We support backup of up to 4 disk blocks per XLOG
- * record.
+ * xl_len == 0 is only allowed if this flag is set. This provides an extra
+ * cross-check when reading records.
  */
-#define XLR_BKP_BLOCK_MASK             0x0F    /* all info bits used for bkp blocks */
-#define XLR_MAX_BKP_BLOCKS             4
-#define XLR_BKP_BLOCK(iblk)            (0x08 >> (iblk))                /* iblk in 0..3 */
+#define XLR_NO_RMGR_DATA               0x02
 
 /*
- * Header info for a backup block appended to an XLOG record.
+ * Header info for block data appended to an XLOG record.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockData struct!
+ * So, the struct must be copied to aligned local storage before use. The
+ * block data itself is aligned at a MAXALIGN boundary, for the convenience
+ * of redo routines. 'data_length' is the length of the payload data, not
+ * including the XLogRecordBlockData struct or padding.
+ */
+typedef struct XLogRecordBlockData
+{
+       uint8           id;                             /* block reference ID */
+       uint8           fork_flags;             /* fork within the relation, and flags */
+       uint16          data_length;    /* number of payload bytes */
+       BlockNumber block;                      /* block number */
+
+       /*
+        * Relation containing the block. This is omitted if BKPBLOCK_SAME_REL
+        * flag is set!
+        */
+       RelFileNode node;
+
+       /*
+        * BLOCK DATA OR XLogRecordBlockImage struct FOLLOWS AT NEXT MAXALIGN
+        * BOUNDARY
+        */
+} XLogRecordBlockData;
+
+#define SizeOfXLogRecordBlockData (offsetof(XLogRecordBlockData, node) + sizeof(RelFileNode))
+#define SizeOfXLogRecordBlockDataSameRel (offsetof(XLogRecordBlockData, block) + sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK     0x0F
+#define BKPBLOCK_FLAG_MASK     0xF0
+#define BKPBLOCK_HAS_IMAGE     0x10    /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA      0x20
+#define BKPBLOCK_WILL_INIT     0x40    /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL      0x80    /* RelFileNode omitted, same as previous */
+
+/*
+ * Full-page image data appended to an XLOG record.
  *
  * As a trivial form of data compression, the XLOG code is aware that
  * PG data pages usually contain an unused "hole" in the middle, which
@@ -82,19 +121,36 @@ typedef struct XLogRecord
  * XLOG record's CRC, either).  Hence, the amount of block data actually
  * present following the BkpBlock struct is BLCKSZ - hole_length bytes.
  *
- * Note that we don't attempt to align either the BkpBlock struct or the
- * block's data.  So, the struct must be copied to aligned local storage
- * before use.
+ * Note that we don't attempt to align the block content.
  */
-typedef struct BkpBlock
+typedef struct XLogRecordBlockImage
 {
-       RelFileNode node;                       /* relation containing block */
-       ForkNumber      fork;                   /* fork within the relation */
-       BlockNumber block;                      /* block number */
        uint16          hole_offset;    /* number of bytes before "hole" */
        uint16          hole_length;    /* number of bytes in "hole" */
 
-       /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
-} BkpBlock;
+       /* ACTUAL BLOCK CONTENT FOLLOWS AT END OF STRUCT */
+} XLogRecordBlockImage;
+
+/*
+ * The number of block references in a WAL record is currently limited by
+ * the fact that we store the block ID as an 8-bit integer.
+ */
+#define XLR_MAX_BKP_BLOCKS             256
+
+
+/* prototypes for XLogRecord decoding functions, in xlogreader.c */
+#define XLogRecGetData(record) ((char *) (record) + SizeOfXLogRecord)
+#define XLogRecHasAnyBlockRefs(record) ((record)->xl_tot_len > SizeOfXLogRecord + (record)->xl_len)
+
+extern bool XLogRecHasBlockRef(XLogRecord *record, uint8 block_id);
+extern bool XLogRecHasBlockImage(XLogRecord *record, uint8 block_id);
+extern uint8 *XLogRecGetBlockRefIds(XLogRecord *record, int *num_refs);
+extern char *XLogRecGetBlockData(XLogRecord *record, uint8 block_id, Size *len);
+extern void XLogRecGetBlockTag(XLogRecord *record, uint8 block_id,
+                                RelFileNode *rnode, ForkNumber *forknum,
+                                BlockNumber *blknum);
+
+extern XLogRecordBlockData *XLogRecGetBlockRef(XLogRecord *record,
+                                  uint8 block_id, char **content);
 
 #endif   /* XLOGRECORD_H */
index 8d906967232c8a27412fcdb0156965e57aa46bed..490cea3c770aaecefc1995f934bcddfb2738c55d 100644 (file)
@@ -34,25 +34,15 @@ typedef enum
 } XLogRedoAction;
 
 extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
-                                         int block_index, RelFileNode rnode, BlockNumber blkno,
-                                         Buffer *buf);
+                                         uint8 buffer_id, Buffer *buf);
 extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
-                                                         XLogRecord *record, int block_index,
-                                                         RelFileNode rnode, ForkNumber forkno,
-                                                         BlockNumber blkno,
+                                                         XLogRecord *record, uint8 buffer_id,
                                                          ReadBufferMode mode, bool get_cleanup_lock,
                                                          Buffer *buf);
 
-extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
 extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
                                           BlockNumber blkno, ReadBufferMode mode);
 
-extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
-                                  int block_index,
-                                  bool get_cleanup_lock, bool keep_buffer);
-extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
-                                                char *blk, bool get_cleanup_lock, bool keep_buffer);
-
 extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
 extern void FreeFakeRelcacheEntry(Relation fakerel);