{
const char *id;
const RmgrDescData *desc = &RmgrDescTable[record->xl_rmid];
+ int nblockrefs;
+ uint8 *blockrefids;
+ int i;
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blk;
+ XLogRecordBlockData *bkpb;
id = desc->rm_identify(record->xl_info);
if (id == NULL)
id = psprintf("UNKNOWN (%x)", record->xl_info & ~XLR_INFO_MASK);
- printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, bkp: %u%u%u%u, desc: %s ",
+ config->already_displayed_records++;
+
+ printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ",
desc->rm_name,
record->xl_len, record->xl_tot_len,
record->xl_xid,
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
- (uint32) (record->xl_prev >> 32), (uint32) record->xl_prev,
- !!(XLR_BKP_BLOCK(0) & record->xl_info),
- !!(XLR_BKP_BLOCK(1) & record->xl_info),
- !!(XLR_BKP_BLOCK(2) & record->xl_info),
- !!(XLR_BKP_BLOCK(3) & record->xl_info),
- id);
+ (uint32) (record->xl_prev >> 32), (uint32) record->xl_prev);
+
+ /* print block references (short format) */
+ blockrefids = XLogRecGetBlockRefIds(record, &nblockrefs);
+ if (!config->bkp_details)
+ {
+ for (i = 0; i < nblockrefs; i++)
+ {
+ uint8 id = blockrefids[i];
+
+ bkpb = XLogRecGetBlockRef(record, id, NULL);
+ XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+ if (forknum != MAIN_FORKNUM)
+ printf("blkref #%u: rel %u/%u/%u fork %s blk %u",
+ id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forkNames[forknum],
+ blk);
+ else
+ printf("blkref #%u: rel %u/%u/%u blk %u",
+ id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ blk);
+ if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+ printf(" FPW");
+ printf(", ");
+ }
+ }
+ printf("desc: %s ", id);
/* the desc routine will printf the description directly to stdout */
desc->rm_desc(NULL, record);
putchar('\n');
+ /* print block references (detailed format) */
if (config->bkp_details)
{
- int bkpnum;
- char *blk = (char *) XLogRecGetData(record) + record->xl_len;
-
- for (bkpnum = 0; bkpnum < XLR_MAX_BKP_BLOCKS; bkpnum++)
+ for (i = 0; i < nblockrefs; i++)
{
- BkpBlock bkpb;
-
- if (!(XLR_BKP_BLOCK(bkpnum) & record->xl_info))
- continue;
-
- memcpy(&bkpb, blk, sizeof(BkpBlock));
- blk += sizeof(BkpBlock);
- blk += BLCKSZ - bkpb.hole_length;
-
- printf("\tbackup bkp #%u; rel %u/%u/%u; fork: %s; block: %u; hole: offset: %u, length: %u\n",
- bkpnum,
- bkpb.node.spcNode, bkpb.node.dbNode, bkpb.node.relNode,
- forkNames[bkpb.fork],
- bkpb.block, bkpb.hole_offset, bkpb.hole_length);
+ uint8 id = blockrefids[i];
+ char *blkdata;
+
+ bkpb = XLogRecGetBlockRef(record, id, &blkdata);
+
+ XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+ printf("\tblkref #%u: rel %u/%u/%u fork %s blk %u",
+ id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forkNames[forknum],
+ blk);
+ if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+ {
+ XLogRecordBlockImage *blkimg = (XLogRecordBlockImage *) blkdata;
+ printf(" (FPW); hole: offset: %u, length: %u\n",
+ blkimg->hole_offset, blkimg->hole_length);
+ }
+ printf("\n");
}
}
+
+ pfree(blockrefids);
}
/*
{
xl_brin_createidx xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
Page page;
- xlrec.node = index->rd_node;
xlrec.version = BRIN_CURRENT_VERSION;
xlrec.pagesPerRange = BrinGetPagesPerRange(index);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfBrinCreateIdx;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
+ XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
page = BufferGetPage(meta);
PageSetLSN(page, recptr);
/* XLOG stuff */
if (RelationNeedsWAL(idxrel))
{
- BlockNumber blk = BufferGetBlockNumber(oldbuf);
xl_brin_samepage_update xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
- xlrec.node = idxrel->rd_node;
- ItemPointerSetBlockNumber(&xlrec.tid, blk);
- ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinSamepageUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ xlrec.offnum = oldoff;
- rdata[1].data = (char *) newtup;
- rdata[1].len = newsz;
- rdata[1].buffer = oldbuf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) newtup, newsz);
+
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr);
}
{
xl_brin_update xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[4];
uint8 info;
info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
- xlrec.insert.node = idxrel->rd_node;
- ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
+ xlrec.insert.offnum = newoff;
xlrec.insert.heapBlk = heapBlk;
- xlrec.insert.tuplen = newsz;
- xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
xlrec.insert.pagesPerRange = pagesPerRange;
- ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+ xlrec.oldOffnum = oldoff;
+
+ XLogBeginInsert();
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ /* new page */
+ XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
- rdata[1].data = (char *) newtup;
- rdata[1].len = newsz;
- rdata[1].buffer = extended ? InvalidBuffer : newbuf;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBufData(0, (char *) newtup, newsz);
- rdata[2].data = (char *) NULL;
- rdata[2].len = 0;
- rdata[2].buffer = revmapbuf;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
+ /* revmap page */
+ XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
- rdata[3].data = (char *) NULL;
- rdata[3].len = 0;
- rdata[3].buffer = oldbuf;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
+ /* old page */
+ XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(oldpage, recptr);
PageSetLSN(newpage, recptr);
{
xl_brin_insert xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
uint8 info;
info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
- xlrec.node = idxrel->rd_node;
xlrec.heapBlk = heapBlk;
xlrec.pagesPerRange = pagesPerRange;
- xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
- xlrec.tuplen = itemsz;
- ItemPointerSet(&xlrec.tid, blk, off);
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].buffer_std = false;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = (char *) tup;
- rdata[1].len = itemsz;
- rdata[1].buffer = extended ? InvalidBuffer : *buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = (char *) NULL;
- rdata[2].len = 0;
- rdata[2].buffer = revmapbuf;
- rdata[2].buffer_std = false;
- rdata[2].next = NULL;
-
- recptr = XLogInsert(RM_BRIN_ID, info, rdata);
+ xlrec.offnum = off;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
+
+ XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBufData(0, (char *) tup, itemsz);
+
+ XLogRegisterBuffer(1, revmapbuf, 0);
+
+ recptr = XLogInsert(RM_BRIN_ID, info);
PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(revmapbuf), recptr);
{
xl_brin_revmap_extend xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.node = revmap->rm_irel->rd_node;
xlrec.targetBlk = mapBlk;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBrinRevmapExtend;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].buffer_std = false;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = (char *) NULL;
- rdata[1].len = 0;
- rdata[1].buffer = revmap->rm_metaBuf;
- rdata[1].buffer_std = false;
- rdata[1].next = NULL;
-
- recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
+ XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
+
+ XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
+
+ recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
PageSetLSN(metapage, recptr);
PageSetLSN(page, recptr);
}
Buffer buf;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
/* create the index' metapage */
- buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buf);
Assert(BufferIsValid(buf));
page = (Page) BufferGetPage(buf);
brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
*/
static void
brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
- xl_brin_insert *xlrec, BrinTuple *tuple)
+ xl_brin_insert *xlrec)
{
- BlockNumber blkno;
Buffer buffer;
Page page;
XLogRedoAction action;
- blkno = ItemPointerGetBlockNumber(&xlrec->tid);
-
/*
* If we inserted the first and only tuple on the page, re-initialize the
* page from scratch.
*/
if (record->xl_info & XLOG_BRIN_INIT_PAGE)
{
- XLogReadBufferForRedoExtended(lsn, record, 0,
- xlrec->node, MAIN_FORKNUM, blkno,
- RBM_ZERO, false, &buffer);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
brin_page_init(page, BRIN_PAGETYPE_REGULAR);
action = BLK_NEEDS_REDO;
}
else
{
- action = XLogReadBufferForRedo(lsn, record, 0,
- xlrec->node, blkno, &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
}
/* insert the index item into the page */
if (action == BLK_NEEDS_REDO)
{
OffsetNumber offnum;
+ BrinTuple *tuple;
+ Size tuplen;
+
+ tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
Assert(tuple->bt_blkno == xlrec->heapBlk);
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
- offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true,
- false);
+ offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
if (offnum == InvalidOffsetNumber)
elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
UnlockReleaseBuffer(buffer);
/* update the revmap */
- action = XLogReadBufferForRedo(lsn, record, 1, xlrec->node,
- xlrec->revmapBlk, &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
+ ItemPointerData tid;
+ BlockNumber blkno = BufferGetBlockNumber(buffer);
+
+ ItemPointerSet(&tid, blkno, xlrec->offnum);
page = (Page) BufferGetPage(buffer);
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
- xlrec->tid);
+ tid);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
{
xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
- BrinTuple *newtup;
- newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert);
-
- brin_xlog_insert_update(lsn, record, xlrec, newtup);
+ brin_xlog_insert_update(lsn, record, xlrec);
}
/*
brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
{
xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
- BlockNumber blkno;
Buffer buffer;
- BrinTuple *newtup;
XLogRedoAction action;
- newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
-
/* First remove the old tuple */
- blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
- action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
- blkno, &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 2, &buffer);
if (action == BLK_NEEDS_REDO)
{
Page page;
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
+ offnum = xlrec->oldOffnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_update: invalid max offset number");
}
/* Then insert the new tuple and update revmap, like in an insertion. */
- brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup);
+ brin_xlog_insert_update(lsn, record, &xlrec->insert);
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
{
xl_brin_samepage_update *xlrec;
- BlockNumber blkno;
Buffer buffer;
XLogRedoAction action;
xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
- blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
- action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
- &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
- int tuplen;
+ Size tuplen;
BrinTuple *mmtuple;
Page page;
OffsetNumber offnum;
- tuplen = record->xl_len - SizeOfBrinSamepageUpdate;
- mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
+ mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
Buffer metabuf;
Buffer buf;
Page page;
+ BlockNumber targetBlk;
XLogRedoAction action;
xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
+
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
+ Assert(xlrec->targetBlk == targetBlk);
+
/* Update the metapage */
- action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node,
- BRIN_METAPAGE_BLKNO, &metabuf);
+ action = XLogReadBufferForRedo(lsn, record, 0, &metabuf);
if (action == BLK_NEEDS_REDO)
{
Page metapg;
* image here.
*/
- buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buf);
page = (Page) BufferGetPage(buf);
brin_page_init(page, BRIN_PAGETYPE_REVMAP);
Buffer childbuf, GinStatsData *buildStats)
{
Page page = BufferGetPage(stack->buffer);
- XLogRecData *payloadrdata;
GinPlaceToPageRC rc;
uint16 xlflags = 0;
Page childpage = NULL;
/*
* Try to put the incoming tuple on the page. placeToPage will decide if
* the page needs to be split.
+ *
+ * WAL-logging this operation is a bit funny:
+ *
+ * We're responsible for calling XLogBeginInsert() and XLogInsert().
+ * XLogBeginInsert() must be called before placeToPage, because placeToPage
+ * register some data to the WAL record.
+ *
+ * If placeToPage returns INSERTED, placeToPage has already called
+ * START_CRIT_SECTION(), and we're responsible for calling
+ * END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
+ * registering any data required to replay the operation with
+ * XLogRegisterData(0, ...). It may only add data to block index 0; the
+ * main data of the WAL record is reserved for this function.
+ *
+ * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
+ * Splits happen infrequently, so we just make a full-page image of all
+ * the pages involved.
*/
+
+ if (RelationNeedsWAL(btree->index))
+ XLogBeginInsert();
+
rc = btree->placeToPage(btree, stack->buffer, stack,
insertdata, updateblkno,
- &payloadrdata, &newlpage, &newrpage);
+ &newlpage, &newrpage);
if (rc == UNMODIFIED)
+ {
+ XLogResetInsertion();
return true;
+ }
else if (rc == INSERTED)
{
/* placeToPage did START_CRIT_SECTION() */
if (RelationNeedsWAL(btree->index))
{
XLogRecPtr recptr;
- XLogRecData rdata[3];
ginxlogInsert xlrec;
BlockIdData childblknos[2];
- xlrec.node = btree->index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(stack->buffer);
+ /*
+ * placetopage already registered stack->buffer as block 0.
+ */
xlrec.flags = xlflags;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(ginxlogInsert);
+ if (childbuf != InvalidBuffer)
+ XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+
+ XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
/*
* Log information about child if this was an insertion of a
*/
if (childbuf != InvalidBuffer)
{
- rdata[0].next = &rdata[1];
-
BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) childblknos;
- rdata[1].len = sizeof(BlockIdData) * 2;
- rdata[1].next = &rdata[2];
-
- rdata[2].buffer = childbuf;
- rdata[2].buffer_std = false;
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].next = payloadrdata;
+ XLogRegisterData((char *) childblknos,
+ sizeof(BlockIdData) * 2);
}
- else
- rdata[0].next = payloadrdata;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
PageSetLSN(page, recptr);
if (childbuf != InvalidBuffer)
PageSetLSN(childpage, recptr);
}
else if (rc == SPLIT)
{
- /* Didn't fit, have to split */
+ /* Didn't fit, had to split */
Buffer rbuffer;
BlockNumber savedRightLink;
- XLogRecData rdata[2];
ginxlogSplit data;
Buffer lbuffer = InvalidBuffer;
Page newrootpg = NULL;
*/
data.node = btree->index->rd_node;
- data.rblkno = BufferGetBlockNumber(rbuffer);
data.flags = xlflags;
if (childbuf != InvalidBuffer)
{
else
data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplit);
-
- if (childbuf != InvalidBuffer)
- {
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = childbuf;
- rdata[1].buffer_std = false;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = payloadrdata;
- }
- else
- rdata[0].next = payloadrdata;
-
if (stack->parent == NULL)
{
/*
buildStats->nEntryPages++;
}
- /*
- * root never has a right-link, so we borrow the rrlink field to
- * store the root block number.
- */
- data.rrlink = BufferGetBlockNumber(stack->buffer);
- data.lblkno = BufferGetBlockNumber(lbuffer);
+ data.rrlink = InvalidBlockNumber;
data.flags |= GIN_SPLIT_ROOT;
GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
{
/* split non-root page */
data.rrlink = savedRightLink;
- data.lblkno = BufferGetBlockNumber(stack->buffer);
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
+ /*
+ * We just take full page images of all the split pages. Splits
+ * are uncommon enough that it's not worth complicating the code
+ * to be more efficient.
+ */
+ if (stack->parent == NULL)
+ {
+ XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ }
+ else
+ {
+ XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+ }
+ if (BufferIsValid(childbuf))
+ XLogRegisterBuffer(3, childbuf, 0);
+
+ XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
PageSetLSN(BufferGetPage(stack->buffer), recptr);
PageSetLSN(BufferGetPage(rbuffer), recptr);
if (stack->parent == NULL)
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage);
+ Page *newlpage, Page *newrpage);
static disassembledLeaf *disassembleLeaf(Page page);
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
int nNewItems);
-static XLogRecData *constructLeafRecompressWALData(Buffer buf,
- disassembledLeaf *leaf);
+static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
static void dataPlaceToPageLeafSplit(Buffer buf,
disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage);
+ Page lpage, Page rpage);
/*
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
*/
static GinPlaceToPageRC
dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
- void *insertdata, XLogRecData **prdata,
- Page *newlpage, Page *newrpage)
+ void *insertdata, Page *newlpage, Page *newrpage)
{
GinBtreeDataLeafInsertData *items = insertdata;
ItemPointer newItems = &items->items[items->curitem];
*/
MemoryContextSwitchTo(oldCxt);
if (RelationNeedsWAL(btree->index))
- *prdata = constructLeafRecompressWALData(buf, leaf);
- else
- *prdata = NULL;
+ registerLeafRecompressWALData(buf, leaf);
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buf, leaf);
*newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
- prdata, *newlpage, *newrpage);
+ *newlpage, *newrpage);
Assert(GinPageRightMost(page) ||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
*/
if (removedsomething)
{
- XLogRecData *payloadrdata = NULL;
bool modified;
/*
}
if (RelationNeedsWAL(indexrel))
- payloadrdata = constructLeafRecompressWALData(buffer, leaf);
+ {
+ XLogBeginInsert();
+ registerLeafRecompressWALData(buffer, leaf);
+ }
START_CRIT_SECTION();
dataPlaceToPageLeafRecompress(buffer, leaf);
if (RelationNeedsWAL(indexrel))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- ginxlogVacuumDataLeafPage xlrec;
- xlrec.node = indexrel->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
- rdata.next = payloadrdata;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
PageSetLSN(page, recptr);
}
* Construct a ginxlogRecompressDataLeaf record representing the changes
* in *leaf.
*/
-static XLogRecData *
-constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
+static void
+registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
{
int nmodified = 0;
char *walbufbegin;
char *walbufend;
- XLogRecData *rdata;
dlist_iter iter;
int segno;
ginxlogRecompressDataLeaf *recompress_xlog;
}
walbufbegin = palloc(
- sizeof(ginxlogRecompressDataLeaf) +
- BLCKSZ + /* max size needed to hold the segment
- * data */
- nmodified * 2 + /* (segno + action) per action */
- sizeof(XLogRecData));
+ sizeof(ginxlogRecompressDataLeaf) +
+ BLCKSZ + /* max size needed to hold the segment data */
+ nmodified *2 /* (segno + action) per action */
+ );
walbufend = walbufbegin;
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
segno++;
}
- rdata = (XLogRecData *) MAXALIGN(walbufend);
- rdata->buffer = buf;
- rdata->buffer_std = TRUE;
- rdata->data = walbufbegin;
- rdata->len = walbufend - walbufbegin;
- rdata->next = NULL;
- return rdata;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
+
}
/*
static void
dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
ItemPointerData lbound, ItemPointerData rbound,
- XLogRecData **prdata, Page lpage, Page rpage)
+ Page lpage, Page rpage)
{
char *ptr;
int segsize;
dlist_node *firstright;
leafSegmentInfo *seginfo;
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataLeaf split_xlog;
- static XLogRecData rdata[3];
-
/* Initialize temporary pages to hold the new left and right pages */
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
Assert(rsize == leaf->rsize);
GinDataPageSetDataSize(rpage, rsize);
*GinDataPageGetRightBound(rpage) = rbound;
-
- /* Create WAL record */
- split_xlog.lsize = lsize;
- split_xlog.rsize = rsize;
- split_xlog.lrightbound = lbound;
- split_xlog.rrightbound = rbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &split_xlog;
- rdata[0].len = sizeof(ginxlogSplitDataLeaf);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
- rdata[1].len = lsize;
- rdata[1].next = &rdata[2];
-
- rdata[2].buffer = InvalidBuffer;
- rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
- rdata[2].len = rsize;
- rdata[2].next = NULL;
-
- *prdata = rdata;
}
/*
*
* In addition to inserting the given item, the downlink of the existing item
* at 'off' is updated to point to 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
*/
static GinPlaceToPageRC
dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
PostingItem *pitem;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata;
+ /* this must be static so it can be returned to caller */
static ginxlogInsertDataInternal data;
/* split if we have to */
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
{
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
return SPLIT;
}
- *prdata = &rdata;
Assert(GinPageIsData(page));
START_CRIT_SECTION();
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
- data.offset = off;
- data.newitem = *pitem;
+ if (RelationNeedsWAL(btree->index))
+ {
+ data.offset = off;
+ data.newitem = *pitem;
- rdata.buffer = buf;
- rdata.buffer_std = TRUE;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogInsertDataInternal);
- rdata.next = NULL;
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) &data,
+ sizeof(ginxlogInsertDataInternal));
+ }
return INSERTED;
}
static GinPlaceToPageRC
dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata,
Page *newlpage, Page *newrpage)
{
Page page = BufferGetPage(buf);
if (GinPageIsLeaf(page))
return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
else
return dataPlaceToPageInternal(btree, buf, stack,
insertdata, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
}
/*
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertdata, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
Page oldpage = BufferGetPage(origbuf);
OffsetNumber off = stack->off;
Page lpage;
Page rpage;
OffsetNumber separator;
-
- /* these must be static so they can be returned to caller */
- static ginxlogSplitDataInternal data;
- static XLogRecData rdata[4];
- static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
+ PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
lpage = PageGetTempPage(oldpage);
rpage = PageGetTempPage(oldpage);
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
- *prdata = rdata;
-
/*
* First construct a new list of PostingItems, which includes all the old
* items, and the new item.
/* set up right bound for right page */
*GinDataPageGetRightBound(rpage) = oldbound;
- data.separator = separator;
- data.nitem = nitems;
- data.rightbound = oldbound;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplitDataInternal);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) allitems;
- rdata[1].len = nitems * sizeof(PostingItem);
- rdata[1].next = NULL;
-
*newlpage = lpage;
*newrpage = rpage;
}
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata[2];
ginxlogCreatePostingTree data;
- data.node = index->rd_node;
- data.blkno = blkno;
data.size = rootsize;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogCreatePostingTree);
- rdata[0].next = &rdata[1];
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
- rdata[1].len = rootsize;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
+ rootsize);
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
PageSetLSN(page, recptr);
}
static void entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
+ BlockNumber updateblkno,
Page *newlpage, Page *newrpage);
/*
* On insertion to an internal node, in addition to inserting the given item,
* the downlink of the existing item at 'off' is updated to point to
* 'updateblkno'.
+ *
+ * On INSERTED, registers the buffer as buffer ID 0, with data.
+ * On SPLIT, returns rdata that represents the split pages in *prdata.
*/
static GinPlaceToPageRC
entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
void *insertPayload, BlockNumber updateblkno,
- XLogRecData **prdata, Page *newlpage, Page *newrpage)
+ Page *newlpage, Page *newrpage)
{
GinBtreeEntryInsertData *insertData = insertPayload;
Page page = BufferGetPage(buf);
OffsetNumber off = stack->off;
OffsetNumber placed;
- int cnt = 0;
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[3];
+ /*
+ * this must be static so it can be returned to caller. XXX: If we
+ * can rely on XLogRegisterData to copy the data, this isn't necessary
+ */
static ginxlogInsertEntry data;
/* quick exit if it doesn't fit */
if (!entryIsEnoughSpace(btree, buf, off, insertData))
{
entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
- prdata, newlpage, newrpage);
+ newlpage, newrpage);
return SPLIT;
}
START_CRIT_SECTION();
- *prdata = rdata;
entryPreparePage(btree, page, off, insertData, updateblkno);
placed = PageAddItem(page,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
- data.isDelete = insertData->isDelete;
- data.offset = off;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) &data;
- rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
- rdata[cnt].next = &rdata[cnt + 1];
- cnt++;
-
- rdata[cnt].buffer = buf;
- rdata[cnt].buffer_std = true;
- rdata[cnt].data = (char *) insertData->entry;
- rdata[cnt].len = IndexTupleSize(insertData->entry);
- rdata[cnt].next = NULL;
+ if (RelationNeedsWAL(btree->index))
+ {
+ data.isDelete = insertData->isDelete;
+ data.offset = off;
+
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) &data,
+ offsetof(ginxlogInsertEntry, tuple));
+ XLogRegisterBufData(0, (char *) insertData->entry,
+ IndexTupleSize(insertData->entry));
+ }
return INSERTED;
}
entrySplitPage(GinBtree btree, Buffer origbuf,
GinBtreeStack *stack,
void *insertPayload,
- BlockNumber updateblkno, XLogRecData **prdata,
+ BlockNumber updateblkno,
Page *newlpage, Page *newrpage)
{
GinBtreeEntryInsertData *insertData = insertPayload;
maxoff,
separator = InvalidOffsetNumber;
Size totalsize = 0;
- Size tupstoresize;
Size lsize = 0,
size;
char *ptr;
Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
Size pageSize = PageGetPageSize(lpage);
+ char tupstore[2 * BLCKSZ];
- /* these must be static so they can be returned to caller */
- static XLogRecData rdata[2];
- static ginxlogSplitEntry data;
- static char tupstore[2 * BLCKSZ];
-
- *prdata = rdata;
entryPreparePage(btree, lpage, off, insertData, updateblkno);
/*
ptr += size;
totalsize += size + sizeof(ItemIdData);
}
- tupstoresize = ptr - tupstore;
/*
* Initialize the left and right pages, and copy all the tuples back to
ptr += MAXALIGN(IndexTupleSize(itup));
}
- data.separator = separator;
- data.nitem = maxoff;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogSplitEntry);
- rdata[0].next = &rdata[1];
-
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = tupstore;
- rdata[1].len = tupstoresize;
- rdata[1].next = NULL;
-
*newlpage = lpage;
*newrpage = rpage;
}
if (RelationNeedsWAL(index))
{
- XLogRecData rdata[2];
ginxlogInsertListPage data;
XLogRecPtr recptr;
- data.node = index->rd_node;
- data.blkno = BufferGetBlockNumber(buffer);
data.rightlink = rightlink;
data.ntuples = ntuples;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogInsertListPage);
- rdata[0].next = rdata + 1;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
- rdata[1].buffer = InvalidBuffer;
- rdata[1].data = workspace;
- rdata[1].len = size;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
+ XLogRegisterBufData(0, workspace, size);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
PageSetLSN(page, recptr);
}
Buffer metabuffer;
Page metapage;
GinMetaPageData *metadata = NULL;
- XLogRecData rdata[2];
Buffer buffer = InvalidBuffer;
Page page = NULL;
ginxlogUpdateMeta data;
bool separateList = false;
bool needCleanup = false;
+ bool needWal;
if (collector->ntuples == 0)
return;
+ needWal = RelationNeedsWAL(index);
+
data.node = index->rd_node;
data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogUpdateMeta);
- rdata[0].next = NULL;
-
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
memset(&sublist, 0, sizeof(GinMetaPageData));
makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+ if (needWal)
+ XLogBeginInsert();
+
/*
* metapage was unlocked, see above
*/
LockBuffer(buffer, GIN_EXCLUSIVE);
page = BufferGetPage(buffer);
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = NULL;
-
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
START_CRIT_SECTION();
metadata->nPendingPages += sublist.nPendingPages;
metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+ if (needWal)
+ XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
}
}
else
int i,
tupsize;
char *ptr;
+ char *collectordata;
buffer = ReadBuffer(index, metadata->tail);
LockBuffer(buffer, GIN_EXCLUSIVE);
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- ptr = rdata[1].data = (char *) palloc(collector->sumsize);
- rdata[1].len = collector->sumsize;
- rdata[1].next = NULL;
+ collectordata = ptr = (char *) palloc(collector->sumsize);
data.ntuples = collector->ntuples;
+ if (needWal)
+ XLogBeginInsert();
+
START_CRIT_SECTION();
/*
off++;
}
- Assert((ptr - rdata[1].data) <= collector->sumsize);
+ Assert((ptr - collectordata) <= collector->sumsize);
+ if (needWal)
+ {
+ XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(1, collectordata, collector->sumsize);
+ }
metadata->tailFreeSize = PageGetExactFreeSpace(page);
*/
MarkBufferDirty(metabuffer);
- if (RelationNeedsWAL(index))
+ if (needWal)
{
XLogRecPtr recptr;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr);
if (buffer != InvalidBuffer)
int i;
int64 nDeletedHeapTuples = 0;
ginxlogDeleteListPages data;
- XLogRecData rdata[1];
Buffer buffers[GIN_NDELETE_AT_ONCE];
- data.node = index->rd_node;
-
- rdata[0].buffer = InvalidBuffer;
- rdata[0].data = (char *) &data;
- rdata[0].len = sizeof(ginxlogDeleteListPages);
- rdata[0].next = NULL;
-
data.ndeleted = 0;
while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
{
- data.toDelete[data.ndeleted] = blknoToDelete;
buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
page = BufferGetPage(buffers[data.ndeleted]);
if (stats)
stats->pages_deleted += data.ndeleted;
+ /*
+ * This operation touches an unusually large number of pages, so
+ * prepare the XLogInsert machinery for that before entering the
+ * critical section.
+ */
+ XLogEnsureRecordSpace(data.ndeleted + 1, 0);
+
START_CRIT_SECTION();
metadata->head = blknoToDelete;
{
XLogRecPtr recptr;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ for (i = 0; i < data.ndeleted; i++)
+ XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
+
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ XLogRegisterData((char *) &data,
+ sizeof(ginxlogDeleteListPages));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
PageSetLSN(metapage, recptr);
for (i = 0; i < data.ndeleted; i++)
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
Page page;
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
- XLogRecData rdata;
data.node = index->rd_node;
data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &data;
- rdata.len = sizeof(ginxlogUpdateMeta);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
PageSetLSN(metapage, recptr);
}
{
Page page = BufferGetPage(buffer);
XLogRecPtr recptr;
- XLogRecData rdata[3];
- ginxlogVacuumPage xlrec;
- uint16 lower;
- uint16 upper;
/* This is only used for entry tree leaf pages. */
Assert(!GinPageIsData(page));
if (!RelationNeedsWAL(index))
return;
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
-
- /* Assume we can omit data between pd_lower and pd_upper */
- lower = ((PageHeader) page)->pd_lower;
- upper = ((PageHeader) page)->pd_upper;
-
- Assert(lower < BLCKSZ);
- Assert(upper < BLCKSZ);
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- xlrec.hole_offset = lower;
- xlrec.hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- xlrec.hole_offset = 0;
- xlrec.hole_length = 0;
- }
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(ginxlogVacuumPage);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- if (xlrec.hole_length == 0)
- {
- rdata[1].data = (char *) page;
- rdata[1].len = BLCKSZ;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
- }
- else
- {
- /* must skip the hole */
- rdata[1].data = (char *) page;
- rdata[1].len = xlrec.hole_offset;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &rdata[2];
-
- rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
- }
+ /*
+ * Always create a full-page, we don't track the changes on the page
+ * at any more fine-grained level. This could obviously be improved...
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
PageSetLSN(page, recptr);
}
if (RelationNeedsWAL(gvs->index))
{
XLogRecPtr recptr;
- XLogRecData rdata[4];
ginxlogDeletePage data;
- data.node = gvs->index->rd_node;
- data.blkno = deleteBlkno;
- data.parentBlkno = parentBlkno;
+ /*
+ * We can't pass REGBUF_STANDARD for the deleted page, because we
+ * didn't set pd_lower on pre-9.4 versions. The page might've been
+ * binary-upgraded from an older version, and hence not have pd_lower
+ * set correctly. Ditto for the left page, but removing the item from
+ * the parent updated its pd_lower, so we know that's OK at this point.
+ */
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, dBuffer, 0);
+ XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, lBuffer, 0);
+
data.parentOffset = myoff;
- data.leftBlkno = leftBlkno;
data.rightLink = GinPageGetOpaque(page)->rightlink;
- /*
- * We can't pass buffer_std = TRUE, because we didn't set pd_lower on
- * pre-9.4 versions. The page might've been binary-upgraded from an
- * older version, and hence not have pd_lower set correctly. Ditto for
- * the left page, but removing the item from the parent updated its
- * pd_lower, so we know that's OK at this point.
- */
- rdata[0].buffer = dBuffer;
- rdata[0].buffer_std = FALSE;
- rdata[0].data = NULL;
- rdata[0].len = 0;
- rdata[0].next = rdata + 1;
-
- rdata[1].buffer = pBuffer;
- rdata[1].buffer_std = TRUE;
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].next = rdata + 2;
-
- rdata[2].buffer = lBuffer;
- rdata[2].buffer_std = FALSE;
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].next = rdata + 3;
-
- rdata[3].buffer = InvalidBuffer;
- rdata[3].buffer_std = FALSE;
- rdata[3].len = sizeof(ginxlogDeletePage);
- rdata[3].data = (char *) &data;
- rdata[3].next = NULL;
-
- recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
+ XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
PageSetLSN(page, recptr);
PageSetLSN(parentPage, recptr);
PageSetLSN(BufferGetPage(lBuffer), recptr);
static MemoryContext opCtx; /* working memory for operations */
static void
-ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- RelFileNode node, BlockNumber blkno)
+ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
{
Buffer buffer;
Page page;
- if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, block_id, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
-
GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
PageSetLSN(page, lsn);
static void
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Buffer RootBuffer,
MetaBuffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
- Assert(BufferIsValid(MetaBuffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &MetaBuffer);
+ Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
page = (Page) BufferGetPage(MetaBuffer);
GinInitMetabuffer(MetaBuffer);
PageSetLSN(page, lsn);
MarkBufferDirty(MetaBuffer);
- RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
- Assert(BufferIsValid(RootBuffer));
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &RootBuffer);
+ Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
page = (Page) BufferGetPage(RootBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF);
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_ptree records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(data->node, data->blkno, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
{
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
Buffer buffer;
- char *payload;
+#ifdef NOT_USED
BlockNumber leftChildBlkno = InvalidBlockNumber;
+#endif
BlockNumber rightChildBlkno = InvalidBlockNumber;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
- payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
-
/*
* First clear incomplete-split flag on child page if this finishes a
* split.
*/
if (!isLeaf)
{
+ char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
+
+#ifdef NOT_USED
leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
+#endif
payload += sizeof(BlockIdData);
rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
payload += sizeof(BlockIdData);
- ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
+ ginRedoClearIncompleteSplit(lsn, record, 1);
}
- if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
- data->blkno, &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
+ Size len;
+ char *payload = XLogRecGetBlockData(record, 0, &len);
/* How to insert the payload is tree-type specific */
if (data->flags & GIN_INSERT_ISDATA)
UnlockReleaseBuffer(buffer);
}
-static void
-ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
-{
- ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
- IndexTuple itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
- OffsetNumber i;
-
- for (i = 0; i < data->separator; i++)
- {
- if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
- elog(ERROR, "failed to add item to gin index page");
- itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
- }
-
- for (i = data->separator; i < data->nitem; i++)
- {
- if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
- elog(ERROR, "failed to add item to gin index page");
- itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
- }
-}
-
-static void
-ginRedoSplitData(Page lpage, Page rpage, void *rdata)
-{
- bool isleaf = GinPageIsLeaf(lpage);
-
- if (isleaf)
- {
- ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
- Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
- Pointer rptr = lptr + data->lsize;
-
- Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
- Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
-
- memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
- memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
-
- GinDataPageSetDataSize(lpage, data->lsize);
- GinDataPageSetDataSize(rpage, data->rsize);
- *GinDataPageGetRightBound(lpage) = data->lrightbound;
- *GinDataPageGetRightBound(rpage) = data->rrightbound;
- }
- else
- {
- ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
- PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
- OffsetNumber i;
- OffsetNumber maxoff;
-
- for (i = 0; i < data->separator; i++)
- GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
- for (i = data->separator; i < data->nitem; i++)
- GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
-
- /* set up right key */
- maxoff = GinPageGetOpaque(lpage)->maxoff;
- *GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
- *GinDataPageGetRightBound(rpage) = data->rightbound;
- }
-}
-
static void
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
{
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
Buffer lbuffer,
- rbuffer;
- Page lpage,
- rpage;
- uint32 flags;
- uint32 lflags,
- rflags;
- char *payload;
+ rbuffer,
+ rootbuf;
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
- bool isData = (data->flags & GIN_INSERT_ISDATA) != 0;
bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
- payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
-
/*
* First clear incomplete-split flag on child page if this finishes a
* split
*/
if (!isLeaf)
- ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
-
- flags = 0;
- if (isLeaf)
- flags |= GIN_LEAF;
- if (isData)
- flags |= GIN_DATA;
- if (isLeaf && isData)
- flags |= GIN_COMPRESSED;
-
- lflags = rflags = flags;
- if (!isRoot)
- lflags |= GIN_INCOMPLETE_SPLIT;
-
- lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
- Assert(BufferIsValid(lbuffer));
- lpage = (Page) BufferGetPage(lbuffer);
- GinInitBuffer(lbuffer, lflags);
-
- rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
- Assert(BufferIsValid(rbuffer));
- rpage = (Page) BufferGetPage(rbuffer);
- GinInitBuffer(rbuffer, rflags);
-
- GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
- GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
-
- /* Do the tree-type specific portion to restore the page contents */
- if (isData)
- ginRedoSplitData(lpage, rpage, payload);
- else
- ginRedoSplitEntry(lpage, rpage, payload);
+ ginRedoClearIncompleteSplit(lsn, record, 3);
- PageSetLSN(rpage, lsn);
- MarkBufferDirty(rbuffer);
+ if (XLogReadBufferForRedo(lsn, record, 0, &lbuffer) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of left page");
- PageSetLSN(lpage, lsn);
- MarkBufferDirty(lbuffer);
+ if (XLogReadBufferForRedo(lsn, record, 1, &rbuffer) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of right page");
if (isRoot)
{
- BlockNumber rootBlkno = data->rrlink;
- Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
- Page rootPage = BufferGetPage(rootBuf);
-
- GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
-
- if (isData)
- {
- Assert(rootBlkno != GIN_ROOT_BLKNO);
- ginDataFillRoot(NULL, BufferGetPage(rootBuf),
- BufferGetBlockNumber(lbuffer),
- BufferGetPage(lbuffer),
- BufferGetBlockNumber(rbuffer),
- BufferGetPage(rbuffer));
- }
- else
- {
- Assert(rootBlkno == GIN_ROOT_BLKNO);
- ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
- BufferGetBlockNumber(lbuffer),
- BufferGetPage(lbuffer),
- BufferGetBlockNumber(rbuffer),
- BufferGetPage(rbuffer));
- }
-
- PageSetLSN(rootPage, lsn);
-
- MarkBufferDirty(rootBuf);
- UnlockReleaseBuffer(rootBuf);
+ if (XLogReadBufferForRedo(lsn, record, 2, &rootbuf) != BLK_RESTORED)
+ elog(ERROR, "GIN split record did not contain a full-page image of root page");
+ UnlockReleaseBuffer(rootbuf);
}
UnlockReleaseBuffer(rbuffer);
static void
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
{
- ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
- char *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
Buffer buffer;
- Page page;
-
- Assert(xlrec->hole_offset < BLCKSZ);
- Assert(xlrec->hole_length < BLCKSZ);
-
- /* Backup blocks are not used, we'll re-initialize the page always. */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
- buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
- if (!BufferIsValid(buffer))
- return;
- page = (Page) BufferGetPage(buffer);
-
- if (xlrec->hole_length == 0)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) != BLK_RESTORED)
{
- memcpy((char *) page, blk, BLCKSZ);
+ elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
}
- else
- {
- memcpy((char *) page, blk, xlrec->hole_offset);
- /* must zero-fill the hole */
- MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
- memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
- blk + xlrec->hole_offset,
- BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
- }
-
- PageSetLSN(page, lsn);
-
- MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
}
static void
ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
{
- ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
Buffer buffer;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
+ Size len;
+ ginxlogVacuumDataLeafPage *xlrec;
+
+ xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
Assert(GinPageIsLeaf(page));
Assert(GinPageIsData(page));
Buffer lbuffer;
Page page;
- if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &dbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(dbuffer);
-
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED;
PageSetLSN(page, lsn);
MarkBufferDirty(dbuffer);
}
- if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
- &pbuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &pbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(pbuffer);
-
Assert(GinPageIsData(page));
Assert(!GinPageIsLeaf(page));
GinPageDeletePostingItem(page, data->parentOffset);
MarkBufferDirty(pbuffer);
}
- if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
- &lbuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 2, &lbuffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(lbuffer);
-
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->rightlink = data->rightLink;
PageSetLSN(page, lsn);
* image, so restore the metapage unconditionally without looking at the
* LSN, to avoid torn page hazards.
*/
- metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
- if (!BufferIsValid(metabuffer))
- return; /* assume index was deleted, nothing to do */
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &metabuffer);
+ Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
/*
* insert into tail page
*/
- if (XLogReadBufferForRedo(lsn, record, 0, data->node,
- data->metadata.tail, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
OffsetNumber off;
int i;
Size tupsize;
+ char *payload;
IndexTuple tuples;
+ Size totaltupsize;
- tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+ payload = XLogRecGetBlockData(record, 1, &totaltupsize);
+ tuples = (IndexTuple) payload;
if (PageIsEmpty(page))
off = FirstOffsetNumber;
off++;
}
+ Assert(payload + totaltupsize == (char *) tuples);
/*
* Increase counter of heap tuples
/*
* New tail
*/
- if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
{
Page page = BufferGetPage(buffer);
off = FirstOffsetNumber;
int i,
tupsize;
- IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
-
- /*
- * Backup blocks are not used, we always re-initialize the page.
- */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ char *payload;
+ IndexTuple tuples;
+ Size totaltupsize;
- buffer = XLogReadBuffer(data->node, data->blkno, true);
- Assert(BufferIsValid(buffer));
+ /* We always re-initialize the page. */
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_LIST);
GinPageGetOpaque(page)->maxoff = 0;
}
+ payload = XLogRecGetBlockData(record, 0, &totaltupsize);
+
+ tuples = (IndexTuple) payload;
for (i = 0; i < data->ntuples; i++)
{
tupsize = IndexTupleSize(tuples);
tuples = (IndexTuple) (((char *) tuples) + tupsize);
off++;
}
+ Assert((char *) tuples == payload + totaltupsize);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
Page metapage;
int i;
- /* Backup blocks are not used in delete_listpage records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
- if (!BufferIsValid(metabuffer))
- return; /* assume index was deleted, nothing to do */
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &metabuffer);
+ Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
metapage = BufferGetPage(metabuffer);
+ GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
+
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
PageSetLSN(metapage, lsn);
MarkBufferDirty(metabuffer);
Buffer buffer;
Page page;
- buffer = XLogReadBuffer(data->node, data->toDelete[i], true);
+ XLogReadBufferForRedoExtended(lsn, record, i + 1, RBM_ZERO, false,
+ &buffer);
page = BufferGetPage(buffer);
GinInitBuffer(buffer, GIN_DELETED);
#include "access/genam.h"
#include "access/gist_private.h"
+#include "access/xloginsert.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "miscadmin.h"
GistPageSetNSN(ptr->page, oldnsn);
}
+ /*
+ * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
+ * insertion for that. NB: The number of pages and data segments
+ * specified here must match the calculations in gistXLogSplit()!
+ */
+ if (RelationNeedsWAL(rel))
+ XLogEnsureRecordSpace(npage, 1 + npage * 2);
+
START_CRIT_SECTION();
/*
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
PageSetLSN(page, recptr);
}
else
#include "access/xlogutils.h"
#include "utils/memutils.h"
-typedef struct
-{
- gistxlogPage *header;
- IndexTuple *itup;
-} NewPage;
-
-typedef struct
-{
- gistxlogPageSplit *data;
- NewPage *page;
-} PageSplitRecord;
-
static MemoryContext opCtx; /* working memory for operations */
/*
* action.)
*/
static void
-gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
- RelFileNode node, BlockNumber childblkno)
+gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
{
Buffer buffer;
Page page;
* Note that we still update the page even if it was restored from a full
* page image, because the updated NSN is not included in the image.
*/
- action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
- &buffer);
+ action = XLogReadBufferForRedo(lsn, record, block_id, &buffer);
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
{
page = BufferGetPage(buffer);
static void
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
{
- char *begin = XLogRecGetData(record);
- gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
+ gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
Buffer buffer;
Page page;
- char *data;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(buffer);
+ char *begin;
+ char *data;
+ Size datalen;
+ int ninserted = 0;
- data = begin + sizeof(gistxlogPageUpdate);
+ data = begin = XLogRecGetBlockData(record, 0, &datalen);
+
+ page = (Page) BufferGetPage(buffer);
/* Delete old tuples */
if (xldata->ntodelete > 0)
}
/* add tuples */
- if (data - begin < record->xl_len)
+ if (data - begin < datalen)
{
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
OffsetNumberNext(PageGetMaxOffsetNumber(page));
- while (data - begin < record->xl_len)
+ while (data - begin < datalen)
{
IndexTuple itup = (IndexTuple) data;
Size sz = IndexTupleSize(itup);
elog(ERROR, "failed to add item to GiST index page, size %d bytes",
(int) sz);
off++;
+ ninserted++;
}
}
+ Assert(ninserted == xldata->ntoinsert);
+
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
* that even if the target page no longer exists, we still attempt to
* replay the change on the child page.
*/
- if (BlockNumberIsValid(xldata->leftchild))
- gistRedoClearFollowRight(lsn, record, 1,
- xldata->node, xldata->leftchild);
+ if (XLogRecHasBlockRef(record, 1))
+ gistRedoClearFollowRight(lsn, record, 1);
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
}
-static void
-decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
+/*
+ * Returns an array of index pointers.
+ */
+static IndexTuple *
+decodePageSplitRecord(char *begin, int len, int *n)
{
- char *begin = XLogRecGetData(record),
- *ptr;
- int j,
- i = 0;
+ char *ptr;
+ int i = 0;
+ IndexTuple *tuples;
+
+ /* extract the number of tuples */
+ memcpy(n, begin, sizeof(int));
+ ptr = begin + sizeof(int);
- decoded->data = (gistxlogPageSplit *) begin;
- decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
+ tuples = palloc(*n * sizeof(IndexTuple));
- ptr = begin + sizeof(gistxlogPageSplit);
- for (i = 0; i < decoded->data->npage; i++)
+ for (i = 0; i < *n; i++)
{
- Assert(ptr - begin < record->xl_len);
- decoded->page[i].header = (gistxlogPage *) ptr;
- ptr += sizeof(gistxlogPage);
-
- decoded->page[i].itup = (IndexTuple *)
- palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
- j = 0;
- while (j < decoded->page[i].header->num)
- {
- Assert(ptr - begin < record->xl_len);
- decoded->page[i].itup[j] = (IndexTuple) ptr;
- ptr += IndexTupleSize((IndexTuple) ptr);
- j++;
- }
+ Assert(ptr - begin < len);
+ tuples[i] = (IndexTuple) ptr;
+ ptr += IndexTupleSize((IndexTuple) ptr);
}
+ Assert(ptr - begin == len);
+
+ return tuples;
}
static void
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
{
gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
- PageSplitRecord xlrec;
Buffer firstbuffer = InvalidBuffer;
Buffer buffer;
Page page;
int i;
bool isrootsplit = false;
- decodePageSplitRecord(&xlrec, record);
-
/*
* We must hold lock on the first-listed page throughout the action,
* including while updating the left child page (if any). We can unlock
*/
/* loop around all pages */
- for (i = 0; i < xlrec.data->npage; i++)
+ for (i = 0; i < xldata->npage; i++)
{
- NewPage *newpage = xlrec.page + i;
int flags;
-
- if (newpage->header->blkno == GIST_ROOT_BLKNO)
+ char *data;
+ Size datalen;
+ int num;
+ BlockNumber blkno;
+ IndexTuple *tuples;
+
+ XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
+ if (blkno == GIST_ROOT_BLKNO)
{
Assert(i == 0);
isrootsplit = true;
}
- buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, i + 1, RBM_ZERO, false,
+ &buffer);
page = (Page) BufferGetPage(buffer);
+ data = XLogRecGetBlockData(record, i + 1, &datalen);
+
+ tuples = decodePageSplitRecord(data, datalen, &num);
/* ok, clear buffer */
- if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
+ if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
flags = F_LEAF;
else
flags = 0;
GISTInitBuffer(buffer, flags);
/* and fill it */
- gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
+ gistfillbuffer(page, tuples, num, FirstOffsetNumber);
- if (newpage->header->blkno == GIST_ROOT_BLKNO)
+ if (blkno == GIST_ROOT_BLKNO)
{
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
GistPageSetNSN(page, xldata->orignsn);
}
else
{
- if (i < xlrec.data->npage - 1)
- GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
+ if (i < xldata->npage - 1)
+ {
+ BlockNumber nextblkno;
+
+ XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
+ GistPageGetOpaque(page)->rightlink = nextblkno;
+ }
else
GistPageGetOpaque(page)->rightlink = xldata->origrlink;
GistPageSetNSN(page, xldata->orignsn);
- if (i < xlrec.data->npage - 1 && !isrootsplit &&
+ if (i < xldata->npage - 1 && !isrootsplit &&
xldata->markfollowright)
GistMarkFollowRight(page);
else
}
/* Fix follow-right data on left child page, if any */
- if (BlockNumberIsValid(xldata->leftchild))
- gistRedoClearFollowRight(lsn, record, 0,
- xldata->node, xldata->leftchild);
+ if (XLogRecHasBlockRef(record, 0))
+ gistRedoClearFollowRight(lsn, record, 0);
/* Finally, release lock on the first page */
UnlockReleaseBuffer(firstbuffer);
static void
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
+ Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
page = (Page) BufferGetPage(buffer);
GISTInitBuffer(buffer, F_LEAF);
BlockNumber origrlink, GistNSN orignsn,
Buffer leftchildbuf, bool markfollowright)
{
- XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
gistxlogPageSplit xlrec;
SplitedPageLayout *ptr;
- int npage = 0,
- cur;
+ int npage = 0;
XLogRecPtr recptr;
+ int i;
for (ptr = dist; ptr; ptr = ptr->next)
npage++;
- /*
- * the caller should've checked this already, but doesn't hurt to check
- * again.
- */
- if (npage > GIST_MAX_SPLIT_PAGES)
- elog(ERROR, "GiST page split into too many halves");
-
- xlrec.node = node;
- xlrec.origblkno = blkno;
xlrec.origrlink = origrlink;
xlrec.orignsn = orignsn;
xlrec.origleaf = page_is_leaf;
xlrec.npage = (uint16) npage;
- xlrec.leftchild =
- BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
xlrec.markfollowright = markfollowright;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(gistxlogPageSplit);
- rdata[0].buffer = InvalidBuffer;
-
- cur = 1;
+ XLogBeginInsert();
/*
* Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split)
*/
if (BufferIsValid(leftchildbuf))
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = NULL;
- rdata[cur].len = 0;
- rdata[cur].buffer = leftchildbuf;
- rdata[cur].buffer_std = true;
- cur++;
- }
+ XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
+ /*
+ * NOTE: We register a lot of data. The caller must've called
+ * XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
+ * because we're already in a critical section. If you change the number
+ * of buffer or data registrations here, make sure you modify the
+ * XLogEnsureRecordSpace() calls accordingly!
+ */
+ XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
+
+ i = 1;
for (ptr = dist; ptr; ptr = ptr->next)
{
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].buffer = InvalidBuffer;
- rdata[cur].data = (char *) &(ptr->block);
- rdata[cur].len = sizeof(gistxlogPage);
- cur++;
-
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].buffer = InvalidBuffer;
- rdata[cur].data = (char *) (ptr->list);
- rdata[cur].len = ptr->lenlist;
- cur++;
+ XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
+ XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
+ XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
+ i++;
}
- rdata[cur - 1].next = NULL;
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
return recptr;
}
*
* Note that both the todelete array and the tuples are marked as belonging
* to the target buffer; they need not be stored in XLOG if XLogInsert decides
- * to log the whole buffer contents instead. Also, we take care that there's
- * at least one rdata item referencing the buffer, even when ntodelete and
- * ituplen are both zero; this ensures that XLogInsert knows about the buffer.
+ * to log the whole buffer contents instead.
*/
XLogRecPtr
gistXLogUpdate(RelFileNode node, Buffer buffer,
IndexTuple *itup, int ituplen,
Buffer leftchildbuf)
{
- XLogRecData rdata[MaxIndexTuplesPerPage + 3];
gistxlogPageUpdate xlrec;
- int cur,
- i;
+ int i;
XLogRecPtr recptr;
- xlrec.node = node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.ntodelete = ntodelete;
- xlrec.leftchild =
- BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
-
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(gistxlogPageUpdate);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ xlrec.ntoinsert = ituplen;
- rdata[1].data = (char *) todelete;
- rdata[1].len = sizeof(OffsetNumber) * ntodelete;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
- cur = 2;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
/* new tuples */
for (i = 0; i < ituplen; i++)
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = (char *) (itup[i]);
- rdata[cur].len = IndexTupleSize(itup[i]);
- rdata[cur].buffer = buffer;
- rdata[cur].buffer_std = true;
- cur++;
- }
+ XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
/*
* Include a full page image of the child buf. (only necessary if a
* checkpoint happened since the child page was split)
*/
if (BufferIsValid(leftchildbuf))
- {
- rdata[cur - 1].next = &(rdata[cur]);
- rdata[cur].data = NULL;
- rdata[cur].len = 0;
- rdata[cur].buffer = leftchildbuf;
- rdata[cur].buffer_std = true;
- cur++;
- }
- rdata[cur - 1].next = NULL;
+ XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
- recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
return recptr;
}
xl_heap_insert xlrec;
xl_heap_header xlhdr;
XLogRecPtr recptr;
- XLogRecData rdata[4];
Page page = BufferGetPage(buffer);
uint8 info = XLOG_HEAP_INSERT;
- bool need_tuple_data;
+ int bufflags = 0;
/*
- * For logical decoding, we need the tuple even if we're doing a full
- * page write, so make sure to log it separately. (XXX We could
- * alternatively store a pointer into the FPW).
- *
- * Also, if this is a catalog, we need to transmit combocids to
- * properly decode, so log that as well.
+ * If this is a catalog, we need to transmit combocids to properly
+ * decode, so log that as well.
*/
- need_tuple_data = RelationIsLogicallyLogged(relation);
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, heaptup);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = heaptup->t_self;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
- xlhdr.t_infomask = heaptup->t_data->t_infomask;
- xlhdr.t_hoff = heaptup->t_data->t_hoff;
-
/*
- * note we mark rdata[1] as belonging to buffer; if XLogInsert decides
- * to write the whole page to the xlog, we don't need to store
- * xl_heap_header in the xlog.
+ * If this is the single and first tuple on page, we can reinit the
+ * page instead of restoring the whole thing. Set flag, and hide
+ * buffer references from XLogInsert.
*/
- rdata[1].data = (char *) &xlhdr;
- rdata[1].len = SizeOfHeapHeader;
- rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
-
- /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
- rdata[2].data = (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits);
- rdata[2].len = heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits);
- rdata[2].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
+ if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
+ PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
+ {
+ info |= XLOG_HEAP_INIT_PAGE;
+ bufflags |= REGBUF_WILL_INIT;
+ }
/*
- * Make a separate rdata entry for the tuple's buffer if we're doing
- * logical decoding, so that an eventual FPW doesn't remove the
- * tuple's data.
+ * For logical decoding, we need the tuple even if we're doing a full
+ * page write, so make sure it's included even if we take a full-page
+ * image. (XXX We could alternatively store a pointer into the FPW).
*/
- if (need_tuple_data)
+ if (RelationIsLogicallyLogged(relation))
{
- rdata[2].next = &(rdata[3]);
-
- rdata[3].data = NULL;
- rdata[3].len = 0;
- rdata[3].buffer = buffer;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
-
xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ bufflags |= REGBUF_KEEP_DATA;
}
+ xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
+ xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
+
+ xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
+ xlhdr.t_infomask = heaptup->t_data->t_infomask;
+ xlhdr.t_hoff = heaptup->t_data->t_hoff;
+
/*
- * If this is the single and first tuple on page, we can reinit the
- * page instead of restoring the whole thing. Set flag, and hide
- * buffer references from XLogInsert.
+ * note we mark xlhdr as belonging to buffer; if XLogInsert decides
+ * to write the whole page to the xlog, we don't need to store
+ * xl_heap_header in the xlog.
*/
- if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
- PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
- {
- info |= XLOG_HEAP_INIT_PAGE;
- rdata[1].buffer = rdata[2].buffer = rdata[3].buffer = InvalidBuffer;
- }
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+ XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
+ /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+ XLogRegisterBufData(0,
+ (char *) heaptup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+ heaptup->t_len - offsetof(HeapTupleHeaderData, t_bits));
- recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, info);
PageSetLSN(page, recptr);
}
break;
RelationPutHeapTuple(relation, buffer, heaptup);
+
+ /*
+ * We don't use heap_multi_insert for catalog tuples yet, but
+ * better be prepared...
+ */
+ if (needwal && need_cids)
+ log_heap_new_cid(relation, heaptup);
}
if (PageIsAllVisible(page))
{
XLogRecPtr recptr;
xl_heap_multi_insert *xlrec;
- XLogRecData rdata[3];
uint8 info = XLOG_HEAP2_MULTI_INSERT;
char *tupledata;
int totaldatalen;
char *scratchptr = scratch;
bool init;
+ int bufflags = 0;
/*
* If the page was previously empty, we can reinit the page
tupledata = scratchptr;
xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
- xlrec->node = relation->rd_node;
- xlrec->blkno = BufferGetBlockNumber(buffer);
xlrec->ntuples = nthispage;
/*
datalen);
tuphdr->datalen = datalen;
scratchptr += datalen;
-
- /*
- * We don't use heap_multi_insert for catalog tuples yet, but
- * better be prepared...
- */
- if (need_cids)
- log_heap_new_cid(relation, heaptup);
}
totaldatalen = scratchptr - tupledata;
Assert((scratchptr - scratch) < BLCKSZ);
- rdata[0].data = (char *) xlrec;
- rdata[0].len = tupledata - scratch;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- rdata[1].data = tupledata;
- rdata[1].len = totaldatalen;
- rdata[1].buffer = need_tuple_data ? InvalidBuffer : buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
-
- /*
- * Make a separate rdata entry for the tuple's buffer if we're
- * doing logical decoding, so that an eventual FPW doesn't remove
- * the tuple's data.
- */
if (need_tuple_data)
- {
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
- }
/*
- * If we're going to reinitialize the whole page using the WAL
- * record, hide buffer reference from XLogInsert.
+ * Signal that this is the last xl_heap_multi_insert record
+ * emitted by this call to heap_multi_insert(). Needed for logical
+ * decoding so it knows when to cleanup temporary data.
*/
+ if (ndone + nthispage == ntuples)
+ xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+
if (init)
{
- rdata[1].buffer = rdata[2].buffer = InvalidBuffer;
info |= XLOG_HEAP_INIT_PAGE;
+ bufflags |= REGBUF_WILL_INIT;
}
-
/*
- * Signal that this is the last xl_heap_multi_insert record
- * emitted by this call to heap_multi_insert(). Needed for logical
- * decoding so it knows when to cleanup temporary data.
+ * If we're doing logical decoding, include the new tuple data
+ * even if we take a full-page image of the page.
*/
- if (ndone + nthispage == ntuples)
- xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+ if (need_tuple_data)
+ bufflags |= REGBUF_KEEP_DATA;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) xlrec, tupledata - scratch);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
- recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+ XLogRegisterBufData(0, tupledata, totaldatalen);
+ recptr = XLogInsert(RM_HEAP2_ID, info);
PageSetLSN(page, recptr);
}
{
xl_heap_delete xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[4];
/* For logical decode we need combocids to properly decode the catalog */
if (RelationIsAccessibleInLogicalDecoding(relation))
xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tp.t_self;
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = new_xmax;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapDelete;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ if (old_key_tuple != NULL)
+ {
+ if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ else
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ }
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
/*
* Log replica identity of the deleted tuple if there is one
xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
- rdata[1].next = &(rdata[2]);
- rdata[2].data = (char *) &xlhdr;
- rdata[2].len = SizeOfHeapHeader;
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
-
- rdata[2].next = &(rdata[3]);
- rdata[3].data = (char *) old_key_tuple->t_data
- + offsetof(HeapTupleHeaderData, t_bits);
- rdata[3].len = old_key_tuple->t_len
- - offsetof(HeapTupleHeaderData, t_bits);
- rdata[3].buffer = InvalidBuffer;
- rdata[3].next = NULL;
-
- if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
- else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
+ XLogRegisterData((char *) old_key_tuple->t_data
+ + offsetof(HeapTupleHeaderData, t_bits),
+ old_key_tuple->t_len
+ - offsetof(HeapTupleHeaderData, t_bits));
}
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
PageSetLSN(page, recptr);
}
{
xl_heap_lock xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tuple->t_self;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
xlrec.locking_xid = xid;
xlrec.infobits_set = compute_infobits(new_infomask,
tuple->t_data->t_infomask2);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapLock;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = *buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
PageSetLSN(page, recptr);
}
{
xl_heap_lock_updated xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
- xlrec.target.node = rel->rd_node;
- xlrec.target.tid = mytup.t_self;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&mytup.t_self);
xlrec.xmax = new_xmax;
xlrec.infobits_set = compute_infobits(new_infomask, new_infomask2);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapLockUpdated;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = buf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated);
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED);
PageSetLSN(page, recptr);
}
{
xl_heap_inplace xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tuple->t_self;
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapInplace;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
- rdata[1].data = (char *) htup + htup->t_hoff;
- rdata[1].len = newlen;
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
- recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
PageSetLSN(page, recptr);
}
{
xl_heap_cleanup_info xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
xlrec.node = rnode;
xlrec.latestRemovedXid = latestRemovedXid;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfHeapCleanupInfo;
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapCleanupInfo);
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO);
return recptr;
}
TransactionId latestRemovedXid)
{
xl_heap_clean xlrec;
- uint8 info;
XLogRecPtr recptr;
- XLogRecData rdata[4];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
- xlrec.node = reln->rd_node;
- xlrec.block = BufferGetBlockNumber(buffer);
xlrec.latestRemovedXid = latestRemovedXid;
xlrec.nredirected = nredirected;
xlrec.ndead = ndead;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapClean;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapClean);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
/*
* The OffsetNumber arrays are not actually in the buffer, but we pretend
* even if no item pointers changed state.
*/
if (nredirected > 0)
- {
- rdata[1].data = (char *) redirected;
- rdata[1].len = nredirected * sizeof(OffsetNumber) * 2;
- }
- else
- {
- rdata[1].data = NULL;
- rdata[1].len = 0;
- }
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBufData(0, (char *) redirected,
+ nredirected * sizeof(OffsetNumber) * 2);
if (ndead > 0)
- {
- rdata[2].data = (char *) nowdead;
- rdata[2].len = ndead * sizeof(OffsetNumber);
- }
- else
- {
- rdata[2].data = NULL;
- rdata[2].len = 0;
- }
- rdata[2].buffer = buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
+ XLogRegisterBufData(0, (char *) nowdead,
+ ndead * sizeof(OffsetNumber));
if (nunused > 0)
- {
- rdata[3].data = (char *) nowunused;
- rdata[3].len = nunused * sizeof(OffsetNumber);
- }
- else
- {
- rdata[3].data = NULL;
- rdata[3].len = 0;
- }
- rdata[3].buffer = buffer;
- rdata[3].buffer_std = true;
- rdata[3].next = NULL;
+ XLogRegisterBufData(0, (char *) nowunused,
+ nunused * sizeof(OffsetNumber));
- info = XLOG_HEAP2_CLEAN;
- recptr = XLogInsert(RM_HEAP2_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEAN);
return recptr;
}
{
xl_heap_freeze_page xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
/* nor when there are no tuples to freeze */
Assert(ntuples > 0);
- xlrec.node = reln->rd_node;
- xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
xlrec.ntuples = ntuples;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapFreezePage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
/*
* The freeze plan array is not actually in the buffer, but pretend that
* it is. When XLogInsert stores the whole buffer, the freeze plan need
* not be stored too.
*/
- rdata[1].data = (char *) tuples;
- rdata[1].len = ntuples * sizeof(xl_heap_freeze_tuple);
- rdata[1].buffer = buffer;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBufData(0, (char *) tuples,
+ ntuples * sizeof(xl_heap_freeze_tuple));
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
return recptr;
}
{
xl_heap_visible xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
Assert(BufferIsValid(heap_buffer));
Assert(BufferIsValid(vm_buffer));
- xlrec.node = rnode;
- xlrec.block = BufferGetBlockNumber(heap_buffer);
xlrec.cutoff_xid = cutoff_xid;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
+ XLogRegisterBuffer(1, heap_buffer, XLogHintBitIsNeeded() ? REGBUF_STANDARD : (REGBUF_STANDARD | REGBUF_NO_IMAGE));
+ XLogRegisterBuffer(0, vm_buffer, 0);
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapVisible;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = vm_buffer;
- rdata[1].buffer_std = false;
- rdata[1].next = NULL;
-
- if (XLogHintBitIsNeeded())
- {
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = heap_buffer;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
- }
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VISIBLE);
return recptr;
}
bool all_visible_cleared, bool new_all_visible_cleared)
{
xl_heap_update xlrec;
- xl_heap_header_len xlhdr;
- xl_heap_header_len xlhdr_idx;
+ xl_heap_header xlhdr;
+ xl_heap_header xlhdr_idx;
uint8 info;
uint16 prefix_suffix[2];
uint16 prefixlen = 0,
suffixlen = 0;
XLogRecPtr recptr;
- XLogRecData rdata[9];
Page page = BufferGetPage(newbuf);
bool need_tuple_data = RelationIsLogicallyLogged(reln);
- int nr;
- Buffer newbufref;
+ bool init;
+ int bufflags;
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
+ XLogBeginInsert();
+
if (HeapTupleIsHeapOnly(newtup))
info = XLOG_HEAP_HOT_UPDATE;
else
suffixlen = 0;
}
- xlrec.target.node = reln->rd_node;
- xlrec.target.tid = oldtup->t_self;
- xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
- xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
- oldtup->t_data->t_infomask2);
- xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+ /* Prepare main WAL data chain */
xlrec.flags = 0;
if (all_visible_cleared)
xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
- xlrec.newtid = newtup->t_self;
if (new_all_visible_cleared)
xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
if (prefixlen > 0)
xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
if (suffixlen > 0)
xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+ if (need_tuple_data)
+ {
+ xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ if (old_key_tuple)
+ {
+ if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ else
+ xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ }
+ }
/* If new tuple is the single and first tuple on page... */
if (ItemPointerGetOffsetNumber(&(newtup->t_self)) == FirstOffsetNumber &&
PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
{
info |= XLOG_HEAP_INIT_PAGE;
- newbufref = InvalidBuffer;
+ init = true;
}
else
- newbufref = newbuf;
+ init = false;
- rdata[0].data = NULL;
- rdata[0].len = 0;
- rdata[0].buffer = oldbuf;
- rdata[0].buffer_std = true;
- rdata[0].next = &(rdata[1]);
+ /* Prepare WAL data for the old page */
+ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
+ xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
+ oldtup->t_data->t_infomask2);
+
+ /* Prepare WAL data for the new page */
+ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
+ xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+
+ bufflags = REGBUF_STANDARD;
+ if (init)
+ bufflags |= REGBUF_WILL_INIT;
+ if (need_tuple_data)
+ bufflags |= REGBUF_KEEP_DATA;
- rdata[1].data = (char *) &xlrec;
- rdata[1].len = SizeOfHeapUpdate;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
+ XLogRegisterBuffer(0, newbuf, bufflags);
+ if (oldbuf != newbuf)
+ XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
- /* prefix and/or suffix length fields */
+ XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
+
+ /*
+ * Prepare WAL data for the new tuple.
+ */
if (prefixlen > 0 || suffixlen > 0)
{
if (prefixlen > 0 && suffixlen > 0)
{
prefix_suffix[0] = prefixlen;
prefix_suffix[1] = suffixlen;
- rdata[2].data = (char *) &prefix_suffix;
- rdata[2].len = 2 * sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &prefix_suffix, sizeof(uint16) * 2);
}
else if (prefixlen > 0)
{
- rdata[2].data = (char *) &prefixlen;
- rdata[2].len = sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &prefixlen, sizeof(uint16));
}
else
{
- rdata[2].data = (char *) &suffixlen;
- rdata[2].len = sizeof(uint16);
+ XLogRegisterBufData(0, (char *) &suffixlen, sizeof(uint16));
}
- rdata[2].buffer = newbufref;
- rdata[2].buffer_std = true;
- rdata[2].next = &(rdata[3]);
- nr = 3;
}
- else
- nr = 2;
- xlhdr.header.t_infomask2 = newtup->t_data->t_infomask2;
- xlhdr.header.t_infomask = newtup->t_data->t_infomask;
- xlhdr.header.t_hoff = newtup->t_data->t_hoff;
- Assert(offsetof(HeapTupleHeaderData, t_bits) +prefixlen + suffixlen <= newtup->t_len);
- xlhdr.t_len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -prefixlen - suffixlen;
-
- /*
- * As with insert records, we need not store this rdata segment if we
- * decide to store the whole buffer instead, unless we're doing logical
- * decoding.
- */
- rdata[nr].data = (char *) &xlhdr;
- rdata[nr].len = SizeOfHeapHeaderLen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = &(rdata[nr + 1]);
- nr++;
+ xlhdr.t_infomask2 = newtup->t_data->t_infomask2;
+ xlhdr.t_infomask = newtup->t_data->t_infomask;
+ xlhdr.t_hoff = newtup->t_data->t_hoff;
+ Assert(offsetof(HeapTupleHeaderData, t_bits) + prefixlen + suffixlen <= newtup->t_len);
/*
* PG73FORMAT: write bitmap [+ padding] [+ oid] + data
*
* The 'data' doesn't include the common prefix or suffix.
*/
+ XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
if (prefixlen == 0)
{
- rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) -suffixlen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+ newtup->t_len - offsetof(HeapTupleHeaderData, t_bits) - suffixlen);
}
else
{
/* bitmap [+ padding] [+ oid] */
if (newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits) >0)
{
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits),
+ newtup->t_data->t_hoff - offsetof(HeapTupleHeaderData, t_bits));
}
/* data after common prefix */
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen;
- rdata[nr].len = newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen;
- rdata[nr].buffer = need_tuple_data ? InvalidBuffer : newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterBufData(0,
+ ((char *) newtup->t_data) + newtup->t_data->t_hoff + prefixlen,
+ newtup->t_len - newtup->t_data->t_hoff - prefixlen - suffixlen);
}
- /*
- * Separate storage for the FPW buffer reference of the new page in the
- * wal_level >= logical case.
- */
- if (need_tuple_data)
+ /* We need to log a tuple identity */
+ if (need_tuple_data && old_key_tuple)
{
- rdata[nr - 1].next = &(rdata[nr]);
-
- rdata[nr].data = NULL,
- rdata[nr].len = 0;
- rdata[nr].buffer = newbufref;
- rdata[nr].buffer_std = true;
- rdata[nr].next = NULL;
- nr++;
+ /* don't really need this, but its more comfy to decode */
+ xlhdr_idx.t_infomask2 = old_key_tuple->t_data->t_infomask2;
+ xlhdr_idx.t_infomask = old_key_tuple->t_data->t_infomask;
+ xlhdr_idx.t_hoff = old_key_tuple->t_data->t_hoff;
- xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
-
- /* We need to log a tuple identity */
- if (old_key_tuple)
- {
- /* don't really need this, but its more comfy to decode */
- xlhdr_idx.header.t_infomask2 = old_key_tuple->t_data->t_infomask2;
- xlhdr_idx.header.t_infomask = old_key_tuple->t_data->t_infomask;
- xlhdr_idx.header.t_hoff = old_key_tuple->t_data->t_hoff;
- xlhdr_idx.t_len = old_key_tuple->t_len;
-
- rdata[nr - 1].next = &(rdata[nr]);
- rdata[nr].data = (char *) &xlhdr_idx;
- rdata[nr].len = SizeOfHeapHeaderLen;
- rdata[nr].buffer = InvalidBuffer;
- rdata[nr].next = &(rdata[nr + 1]);
- nr++;
-
- /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
- rdata[nr].data = (char *) old_key_tuple->t_data
- + offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].len = old_key_tuple->t_len
- - offsetof(HeapTupleHeaderData, t_bits);
- rdata[nr].buffer = InvalidBuffer;
- rdata[nr].next = NULL;
- nr++;
+ XLogRegisterData((char *) &xlhdr_idx, SizeOfHeapHeader);
- if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
- else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
- }
+ /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
+ XLogRegisterData((char *) old_key_tuple->t_data + offsetof(HeapTupleHeaderData, t_bits),
+ old_key_tuple->t_len - offsetof(HeapTupleHeaderData, t_bits));
}
- recptr = XLogInsert(RM_HEAP_ID, info, rdata);
+ recptr = XLogInsert(RM_HEAP_ID, info);
return recptr;
}
xl_heap_new_cid xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[1];
HeapTupleHeader hdr = tup->t_data;
Assert(ItemPointerIsValid(&tup->t_self));
Assert(tup->t_tableOid != InvalidOid);
xlrec.top_xid = GetTopTransactionId();
- xlrec.target.node = relation->rd_node;
- xlrec.target.tid = tup->t_self;
+ xlrec.target_node = relation->rd_node;
+ xlrec.target_tid = tup->t_self;
/*
* If the tuple got inserted & deleted in the same TX we definitely have a
xlrec.combocid = InvalidCommandId;
}
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfHeapNewCid;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ /*
+ * Note that we don't need to register the buffer here, because this
+ * operation does not modify the page. The insert/update/delete that
+ * called us certainly did, but that's WAL-logged separately.
+ */
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapNewCid);
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID, rdata);
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_NEW_CID);
return recptr;
}
*/
/* Backup blocks are not used in cleanup_info records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
}
/*
BlockNumber blkno;
XLogRedoAction action;
- rnode = xlrec->node;
- blkno = xlrec->block;
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
* If we have a full-page image, restore it (using a cleanup lock) and
* we're done.
*/
- action = XLogReadBufferForRedoExtended(lsn, record, 0,
- rnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL, true, &buffer);
+ action = XLogReadBufferForRedoExtended(lsn, record, 0, RBM_NORMAL, true,
+ &buffer);
if (action == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buffer);
int nredirected;
int ndead;
int nunused;
+ Size datalen;
+
+ redirected = (OffsetNumber *) XLogRecGetBlockData(record, 0, &datalen);
nredirected = xlrec->nredirected;
ndead = xlrec->ndead;
- end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
- redirected = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
+ end = (OffsetNumber *) ((char *) redirected + datalen);
nowdead = redirected + (nredirected * 2);
nowunused = nowdead + ndead;
nunused = (end - nowunused);
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO)
- XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
+ XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
}
/*
heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
{
xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
+ Buffer vmbuffer = InvalidBuffer;
Buffer buffer;
Page page;
RelFileNode rnode;
BlockNumber blkno;
XLogRedoAction action;
- rnode = xlrec->node;
- blkno = xlrec->block;
+ XLogRecGetBlockTag(record, 1, &rnode, NULL, &blkno);
/*
* If there are any Hot Standby transactions running that have an xmin
* truncated later in recovery, we don't need to update the page, but we'd
* better still update the visibility map.
*/
- action = XLogReadBufferForRedo(lsn, record, 1, rnode, blkno, &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
/*
* XLOG record's LSN, we mustn't mark the page all-visible, because
* the subsequent update won't be replayed to clear the flag.
*/
- page = BufferGetPage(buffer);
- PageSetAllVisible(page);
- MarkBufferDirty(buffer);
+ page = BufferGetPage(buffer);
+ PageSetAllVisible(page);
+ MarkBufferDirty(buffer);
}
else if (action == BLK_RESTORED)
{
* the visibility map bit does so before checking the page LSN, so any
* bits that need to be cleared will still be cleared.
*/
- if (record->xl_info & XLR_BKP_BLOCK(0))
- (void) RestoreBackupBlock(lsn, record, 0, false, false);
- else
+ if (XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO_ON_ERROR, false,
+ &vmbuffer) == BLK_NEEDS_REDO)
{
Relation reln;
- Buffer vmbuffer = InvalidBuffer;
+
+ /*
+ * XLogReplayBufferExtended locked the buffer. But visibilitymap_set
+ * will handle locking itself.
+ */
+ LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
reln = CreateFakeRelcacheEntry(rnode);
visibilitymap_pin(reln, blkno, &vmbuffer);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
}
+ else if (BufferIsValid(vmbuffer))
+ UnlockReleaseBuffer(vmbuffer);
}
/*
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
TransactionId cutoff_xid = xlrec->cutoff_xid;
Buffer buffer;
- Page page;
int ntup;
/*
* consider the frozen xids as running.
*/
if (InHotStandby)
- ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+ {
+ RelFileNode rnode;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
- &buffer) == BLK_NEEDS_REDO)
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+ ResolveRecoveryConflictWithSnapshot(cutoff_xid, rnode);
+ }
+
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = BufferGetPage(buffer);
+ Page page = BufferGetPage(buffer);
+ xl_heap_freeze_tuple *tuples;
+
+ tuples = (xl_heap_freeze_tuple *) XLogRecGetBlockData(record, 0, NULL);
/* now execute freeze plan for each frozen tuple */
for (ntup = 0; ntup < xlrec->ntuples; ntup++)
ItemId lp;
HeapTupleHeader tuple;
- xlrec_tp = &xlrec->tuples[ntup];
+ xlrec_tp = &tuples[ntup];
lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
tuple = (HeapTupleHeader) PageGetItem(page, lp);
xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
Buffer buffer;
Page page;
- OffsetNumber offnum;
ItemId lp = NULL;
HeapTupleHeader htup;
BlockNumber blkno;
RelFileNode target_node;
+ ItemPointerData target_tid;
- blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
- target_node = xlrec->target.node;
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ItemPointerSetBlockNumber(&target_tid, blkno);
+ ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
FreeFakeRelcacheEntry(reln);
}
- if (XLogReadBufferForRedo(lsn, record, 0, target_node, blkno, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(buffer);
+ page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
- if (PageGetMaxOffsetNumber(page) >= offnum)
- lp = PageGetItemId(page, offnum);
+ if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
+ lp = PageGetItemId(page, xlrec->offnum);
- if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ if (PageGetMaxOffsetNumber(page) < xlrec->offnum || !ItemIdIsNormal(lp))
elog(PANIC, "heap_delete_redo: invalid lp");
htup = (HeapTupleHeader) PageGetItem(page, lp);
PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->target.tid;
+ htup->t_ctid = target_tid;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
- OffsetNumber offnum;
struct
{
HeapTupleHeaderData hdr;
Size freespace = 0;
RelFileNode target_node;
BlockNumber blkno;
+ ItemPointerData target_tid;
XLogRedoAction action;
- target_node = xlrec->target.node;
- blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+ ItemPointerSetBlockNumber(&target_tid, blkno);
+ ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
/*
* The visibility map may need to be fixed even if the heap page is
*/
if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{
- XLogReadBufferForRedoExtended(lsn, record, 0,
- target_node, MAIN_FORKNUM, blkno,
- RBM_ZERO, false, &buffer);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0, target_node, blkno,
- &buffer);
-
+ action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
+ Size datalen;
+ char *data;
+
+ /*
+ * The new tuple is normally stored as buffer 0's data. But if
+ * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, it's part of the main
+ * data, after the xl_heap_insert struct.
+ */
+ if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ {
+ data = XLogRecGetData(record) + SizeOfHeapInsert;
+ datalen = record->xl_len - SizeOfHeapInsert;
+ }
+ else
+ data = XLogRecGetBlockData(record, 0, &datalen);
+
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
- if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
elog(PANIC, "heap_insert_redo: invalid max offset number");
- newlen = record->xl_len - SizeOfHeapInsert - SizeOfHeapHeader;
- Assert(newlen <= MaxHeapTupleSize);
- memcpy((char *) &xlhdr,
- (char *) xlrec + SizeOfHeapInsert,
- SizeOfHeapHeader);
+ newlen = datalen - SizeOfHeapHeader;
+ Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
+ memcpy((char *) &xlhdr, data, SizeOfHeapHeader);
+ data += SizeOfHeapHeader;
+
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
- (char *) xlrec + SizeOfHeapInsert + SizeOfHeapHeader,
+ data,
newlen);
newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- htup->t_ctid = xlrec->target.tid;
+ htup->t_ctid = target_tid;
- offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
- if (offnum == InvalidOffsetNumber)
+ if (PageAddItem(page, (Item) htup, newlen, xlrec->offnum,
+ true, true) == InvalidOffsetNumber)
elog(PANIC, "heap_insert_redo: failed to add tuple");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace);
+ XLogRecordPageWithFreeSpace(target_node, blkno, freespace);
}
/*
static void
heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
{
- char *recdata = XLogRecGetData(record);
xl_heap_multi_insert *xlrec;
RelFileNode rnode;
BlockNumber blkno;
* Insertion doesn't overwrite MVCC data, so no conflict processing is
* required.
*/
+ xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
- xlrec = (xl_heap_multi_insert *) recdata;
- recdata += SizeOfHeapMultiInsert;
-
- rnode = xlrec->node;
- blkno = xlrec->blkno;
-
- /*
- * If we're reinitializing the page, the tuples are stored in order from
- * FirstOffsetNumber. Otherwise there's an array of offsets in the WAL
- * record.
- */
- if (!isinit)
- recdata += sizeof(OffsetNumber) * xlrec->ntuples;
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
/*
* The visibility map may need to be fixed even if the heap page is
if (isinit)
{
- XLogReadBufferForRedoExtended(lsn, record, 0,
- rnode, MAIN_FORKNUM, blkno,
- RBM_ZERO, false, &buffer);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = BufferGetPage(buffer);
PageInit(page, BufferGetPageSize(buffer), 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0, rnode, blkno, &buffer);
-
+ action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
- page = BufferGetPage(buffer);
+ char *tupdata;
+ char *endptr;
+ Size len;
+
+ /* Tuples are stored as block data */
+ tupdata = XLogRecGetBlockData(record, 0, &len);
+ endptr = tupdata + len;
+
+ page = (Page) BufferGetPage(buffer);
+
for (i = 0; i < xlrec->ntuples; i++)
{
OffsetNumber offnum;
xl_multi_insert_tuple *xlhdr;
+ /*
+ * If we're reinitializing the page, the tuples are stored in order
+ * from FirstOffsetNumber. Otherwise there's an array of offsets in
+ * the WAL record, and the tuples come after that.
+ */
if (isinit)
offnum = FirstOffsetNumber + i;
else
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_multi_insert_redo: invalid max offset number");
- xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(recdata);
- recdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
+ xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
+ tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
newlen = xlhdr->datalen;
Assert(newlen <= MaxHeapTupleSize);
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
- (char *) recdata,
+ (char *) tupdata,
newlen);
- recdata += newlen;
+ tupdata += newlen;
newlen += offsetof(HeapTupleHeaderData, t_bits);
htup->t_infomask2 = xlhdr->t_infomask2;
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_multi_insert_redo: failed to add tuple");
}
+ if (tupdata != endptr)
+ elog(PANIC, "heap_multi_insert_redo: total tuple length mismatch");
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
* totally accurate anyway.
*/
if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->node, blkno, freespace);
+ XLogRecordPageWithFreeSpace(rnode, blkno, freespace);
}
/*
RelFileNode rnode;
BlockNumber oldblk;
BlockNumber newblk;
+ ItemPointerData newtid;
Buffer obuffer,
nbuffer;
Page page;
ItemId lp = NULL;
HeapTupleData oldtup;
HeapTupleHeader htup;
- char *recdata;
uint16 prefixlen = 0,
suffixlen = 0;
char *newp;
HeapTupleHeaderData hdr;
char data[MaxHeapTupleSize];
} tbuf;
- xl_heap_header_len xlhdr;
+ xl_heap_header xlhdr;
uint32 newlen;
Size freespace = 0;
XLogRedoAction oldaction;
oldtup.t_data = NULL;
oldtup.t_len = 0;
- rnode = xlrec->target.node;
- newblk = ItemPointerGetBlockNumber(&xlrec->newtid);
- oldblk = ItemPointerGetBlockNumber(&xlrec->target.tid);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &newblk);
+ if (XLogRecHasBlockRef(record, 1))
+ {
+ /* HOT updates are never done across pages */
+ Assert(!hot_update);
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &oldblk);
+ }
+ else
+ oldblk = newblk;
+
+ ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
/*
* The visibility map may need to be fixed even if the heap page is
*/
/* Deal with old tuple version */
- oldaction = XLogReadBufferForRedo(lsn, record, 0, rnode, oldblk, &obuffer);
+ oldaction = XLogReadBufferForRedo(lsn, record, (oldblk == newblk) ? 0 : 1,
+ &obuffer);
if (oldaction == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(obuffer);
-
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ page = BufferGetPage(obuffer);
+ offnum = xlrec->old_offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
- htup->t_ctid = xlrec->newtid;
+ htup->t_ctid = newtid;
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, record->xl_xid);
}
else if (record->xl_info & XLOG_HEAP_INIT_PAGE)
{
- XLogReadBufferForRedoExtended(lsn, record, 1,
- rnode, MAIN_FORKNUM, newblk,
- RBM_ZERO, false, &nbuffer);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false,
+ &nbuffer);
page = (Page) BufferGetPage(nbuffer);
PageInit(page, BufferGetPageSize(nbuffer), 0);
newaction = BLK_NEEDS_REDO;
}
else
- newaction = XLogReadBufferForRedo(lsn, record, 1, rnode, newblk,
- &nbuffer);
+ newaction = XLogReadBufferForRedo(lsn, record, 0, &nbuffer);
/*
* The visibility map may need to be fixed even if the heap page is
*/
if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
{
- Relation reln = CreateFakeRelcacheEntry(xlrec->target.node);
+ Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
visibilitymap_pin(reln, newblk, &vmbuffer);
/* Deal with new tuple */
if (newaction == BLK_NEEDS_REDO)
{
- page = (Page) BufferGetPage(nbuffer);
+ char *recdata;
+ char *recdataend;
+ Size datalen;
+ Size tuplen;
+
+ recdata = XLogRecGetBlockData(record, 0, &datalen);
+ recdataend = recdata + datalen;
- offnum = ItemPointerGetOffsetNumber(&(xlrec->newtid));
+ page = BufferGetPage(nbuffer);
+
+ offnum = xlrec->new_offnum;
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_update_redo: invalid max offset number");
- recdata = (char *) xlrec + SizeOfHeapUpdate;
-
if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
{
Assert(newblk == oldblk);
recdata += sizeof(uint16);
}
- memcpy((char *) &xlhdr, recdata, SizeOfHeapHeaderLen);
- recdata += SizeOfHeapHeaderLen;
+ memcpy((char *) &xlhdr, recdata, SizeOfHeapHeader);
+ recdata += SizeOfHeapHeader;
+
+ tuplen = recdataend - recdata;
+ Assert(tuplen <= MaxHeapTupleSize);
- Assert(xlhdr.t_len + prefixlen + suffixlen <= MaxHeapTupleSize);
htup = &tbuf.hdr;
MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
int len;
/* copy bitmap [+ padding] [+ oid] from WAL record */
- len = xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
+ len = xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits);
memcpy(newp, recdata, len);
recdata += len;
newp += len;
newp += prefixlen;
/* copy new tuple data from WAL record */
- len = xlhdr.t_len - (xlhdr.header.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
+ len = tuplen - (xlhdr.t_hoff - offsetof(HeapTupleHeaderData, t_bits));
memcpy(newp, recdata, len);
recdata += len;
newp += len;
* copy bitmap [+ padding] [+ oid] + data from record, all in one
* go
*/
- memcpy(newp, recdata, xlhdr.t_len);
- recdata += xlhdr.t_len;
- newp += xlhdr.t_len;
+ memcpy(newp, recdata, tuplen);
+ recdata += tuplen;
+ newp += tuplen;
}
+ Assert(recdata == recdataend);
+
/* copy suffix from old tuple */
if (suffixlen > 0)
memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
- newlen = offsetof(HeapTupleHeaderData, t_bits) + xlhdr.t_len + prefixlen + suffixlen;
- htup->t_infomask2 = xlhdr.header.t_infomask2;
- htup->t_infomask = xlhdr.header.t_infomask;
- htup->t_hoff = xlhdr.header.t_hoff;
+ newlen = offsetof(HeapTupleHeaderData, t_bits) + tuplen + prefixlen + suffixlen;
+ htup->t_infomask2 = xlhdr.t_infomask2;
+ htup->t_infomask = xlhdr.t_infomask;
+ htup->t_hoff = xlhdr.t_hoff;
HeapTupleHeaderSetXmin(htup, record->xl_xid);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->newtid;
+ htup->t_ctid = newtid;
offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true);
if (offnum == InvalidOffsetNumber)
PageSetLSN(page, lsn);
MarkBufferDirty(nbuffer);
}
+
if (BufferIsValid(nbuffer) && nbuffer != obuffer)
UnlockReleaseBuffer(nbuffer);
if (BufferIsValid(obuffer))
* totally accurate anyway.
*/
if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
- XLogRecordPageWithFreeSpace(xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- freespace);
+ XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
}
static void
ItemId lp = NULL;
HeapTupleHeader htup;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&xlrec->target.tid),
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
{
HeapTupleHeaderClearHotUpdated(htup);
/* Make sure there is no forward chain link in t_ctid */
- htup->t_ctid = xlrec->target.tid;
+ ItemPointerSet(&htup->t_ctid,
+ BufferGetBlockNumber(buffer),
+ offnum);
}
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
ItemId lp = NULL;
HeapTupleHeader htup;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
ItemId lp = NULL;
HeapTupleHeader htup;
uint32 oldlen;
- uint32 newlen;
+ Size newlen;
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ char *newtup = XLogRecGetBlockData(record, 0, &newlen);
+
page = BufferGetPage(buffer);
- offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ offnum = xlrec->offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
lp = PageGetItemId(page, offnum);
htup = (HeapTupleHeader) PageGetItem(page, lp);
oldlen = ItemIdGetLength(lp) - htup->t_hoff;
- newlen = record->xl_len - SizeOfHeapInplace;
if (oldlen != newlen)
elog(PANIC, "heap_inplace_redo: wrong tuple length");
- memcpy((char *) htup + htup->t_hoff,
- (char *) xlrec + SizeOfHeapInplace,
- newlen);
+ memcpy((char *) htup + htup->t_hoff, newtup, newlen);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
hash_seq_init(&seq_status, state->rs_logical_mappings);
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
- XLogRecData rdata[2];
char *waldata;
char *waldata_start;
xl_heap_rewrite_mapping xlrec;
xlrec.offset = src->off;
xlrec.start_lsn = state->rs_begin_lsn;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = sizeof(xlrec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
/* write all mappings consecutively */
len = src->num_mappings * sizeof(LogicalRewriteMappingData);
waldata_start = waldata = palloc(len);
written, len)));
src->off += len;
- rdata[1].data = waldata_start;
- rdata[1].len = len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
+ XLogRegisterData(waldata_start, len);
/* write xlog record */
- XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
+ XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
pfree(waldata_start);
}
if (RelationNeedsWAL(rel))
{
xl_btree_insert xlrec;
- BlockNumber xlleftchild;
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[4];
- XLogRecData *nextrdata;
IndexTupleData trunctuple;
- xlrec.target.node = rel->rd_node;
- ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
+ xlrec.offnum = itup_off;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeInsert;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = nextrdata = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
if (P_ISLEAF(lpageop))
xlinfo = XLOG_BTREE_INSERT_LEAF;
else
{
/*
- * Include the block number of the left child, whose
- * INCOMPLETE_SPLIT flag was cleared.
+ * Register the left child whose INCOMPLETE_SPLIT flag was
+ * cleared.
*/
- xlleftchild = BufferGetBlockNumber(cbuf);
- nextrdata->data = (char *) &xlleftchild;
- nextrdata->len = sizeof(BlockNumber);
- nextrdata->buffer = cbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
xlinfo = XLOG_BTREE_INSERT_UPPER;
}
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
- nextrdata->data = (char *) &xlmeta;
- nextrdata->len = sizeof(xl_btree_metadata);
- nextrdata->buffer = InvalidBuffer;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+ XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
xlinfo = XLOG_BTREE_INSERT_META;
}
/* Read comments in _bt_pgaddtup */
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
{
trunctuple = *itup;
trunctuple.t_info = sizeof(IndexTupleData);
- nextrdata->data = (char *) &trunctuple;
- nextrdata->len = sizeof(IndexTupleData);
+ XLogRegisterBufData(0, (char *) &trunctuple,
+ sizeof(IndexTupleData));
}
else
- {
- nextrdata->data = (char *) itup;
- nextrdata->len = IndexTupleDSize(*itup);
- }
- nextrdata->buffer = buf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
+ XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
if (BufferIsValid(metabuf))
{
xl_btree_split xlrec;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[7];
- XLogRecData *lastrdata;
- BlockNumber cblkno;
-
- xlrec.node = rel->rd_node;
- xlrec.leftsib = origpagenumber;
- xlrec.rightsib = rightpagenumber;
- xlrec.rnext = ropaque->btpo_next;
+
xlrec.level = ropaque->btpo.level;
xlrec.firstright = firstright;
+ xlrec.newitemoff = newitemoff;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeSplit;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
- lastrdata = &rdata[0];
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
+ /* Log the right sibling, because we've changed its prev-pointer. */
+ if (!P_RIGHTMOST(ropaque))
+ XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
+ if (BufferIsValid(cbuf))
+ XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
/*
- * Log the new item and its offset, if it was inserted on the left
+ * Log the new item, if it was inserted on the left
* page. (If it was put on the right page, we don't need to explicitly
* WAL log it because it's included with all the other items on the
* right page.) Show the new item as belonging to the left page
* though, to support archive compression of these records.
*/
if (newitemonleft)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) &newitemoff;
- lastrdata->len = sizeof(OffsetNumber);
- lastrdata->buffer = InvalidBuffer;
-
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) newitem;
- lastrdata->len = MAXALIGN(newitemsz);
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
+ XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
/* Log left page */
if (!isleaf)
{
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
/*
* We must also log the left page's high key, because the right
* page's leftmost key is suppressed on non-leaf levels. Show it
*/
itemid = PageGetItemId(origpage, P_HIKEY);
item = (IndexTuple) PageGetItem(origpage, itemid);
- lastrdata->data = (char *) item;
- lastrdata->len = MAXALIGN(IndexTupleSize(item));
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
-
- if (isleaf && !newitemonleft)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- /*
- * Although we don't need to WAL-log anything on the left page, we
- * still need XLogInsert to consider storing a full-page image of
- * the left page, so make an empty entry referencing that buffer.
- * This also ensures that the left page is always backup block 0.
- */
- lastrdata->data = NULL;
- lastrdata->len = 0;
- lastrdata->buffer = buf; /* backup block 0 */
- lastrdata->buffer_std = true;
- }
-
- /*
- * Log block number of left child, whose INCOMPLETE_SPLIT flag this
- * insertion clears.
- */
- if (!isleaf)
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- cblkno = BufferGetBlockNumber(cbuf);
- lastrdata->data = (char *) &cblkno;
- lastrdata->len = sizeof(BlockNumber);
- lastrdata->buffer = cbuf; /* backup block 1 */
- lastrdata->buffer_std = true;
+ XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
}
/*
* and so the item pointers can be reconstructed. See comments for
* _bt_restore_page().
*/
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = (char *) rightpage +
- ((PageHeader) rightpage)->pd_upper;
- lastrdata->len = ((PageHeader) rightpage)->pd_special -
- ((PageHeader) rightpage)->pd_upper;
- lastrdata->buffer = InvalidBuffer;
-
- /* Log the right sibling, because we've changed its' prev-pointer. */
- if (!P_RIGHTMOST(ropaque))
- {
- lastrdata->next = lastrdata + 1;
- lastrdata++;
-
- lastrdata->data = NULL;
- lastrdata->len = 0;
- lastrdata->buffer = sbuf; /* bkp block 1 (leaf) or 2 (non-leaf) */
- lastrdata->buffer_std = true;
- }
-
- lastrdata->next = NULL;
+ XLogRegisterBufData(1,
+ (char *) rightpage + ((PageHeader) rightpage)->pd_upper,
+ ((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
if (isroot)
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
else
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
PageSetLSN(origpage, recptr);
PageSetLSN(rightpage, recptr);
{
xl_btree_newroot xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[3];
+ xl_btree_metadata md;
- xlrec.node = rel->rd_node;
xlrec.rootblk = rootblknum;
xlrec.level = metad->btm_level;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeNewroot;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
+
+ XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+ md.root = rootblknum;
+ md.level = metad->btm_level;
+ md.fastroot = rootblknum;
+ md.fastlevel = metad->btm_level;
+
+ XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
/*
* Direct access to page is not good but faster - we should implement
* some new func in page API.
*/
- rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
- rdata[1].len = ((PageHeader) rootpage)->pd_special -
- ((PageHeader) rootpage)->pd_upper;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
-
- /* Make a full-page image of the left child if needed */
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = lbuf;
- rdata[2].next = NULL;
-
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
+ XLogRegisterBufData(0,
+ (char *) rootpage + ((PageHeader) rootpage)->pd_upper,
+ ((PageHeader) rootpage)->pd_special -
+ ((PageHeader) rootpage)->pd_upper);
+
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
PageSetLSN(lpage, recptr);
PageSetLSN(rootpage, recptr);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
BlockNumber *target, BlockNumber *rightsib);
-static void _bt_log_reuse_page(Relation rel, BlockNumber blkno,
- TransactionId latestRemovedXid);
+static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid);
/*
* _bt_initmetapage() -- Fill a page buffer with a correct metapage image
{
xl_btree_newroot xlrec;
XLogRecPtr recptr;
- XLogRecData rdata;
+ xl_btree_metadata md;
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
+
+ md.root = rootblkno;
+ md.level = 0;
+ md.fastroot = rootblkno;
+ md.fastlevel = 0;
+
+ XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
- xlrec.node = rel->rd_node;
xlrec.rootblk = rootblkno;
xlrec.level = 0;
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfBtreeNewroot;
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
PageSetLSN(rootpage, recptr);
PageSetLSN(metapg, recptr);
static void
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
- if (!RelationNeedsWAL(rel))
- return;
-
- /* No ereport(ERROR) until changes are logged */
- START_CRIT_SECTION();
+ xl_btree_reuse_page xlrec_reuse;
/*
- * We don't do MarkBufferDirty here because we're about to initialise the
- * page, and nobody else can see it yet.
+ * Note that we don't register the buffer with the record, because this
+ * operation doesn't modify the page. This record only exists to provide
+ * a conflict point for Hot Standby.
*/
/* XLOG stuff */
- {
- XLogRecData rdata[1];
- xl_btree_reuse_page xlrec_reuse;
-
- xlrec_reuse.node = rel->rd_node;
- xlrec_reuse.block = blkno;
- xlrec_reuse.latestRemovedXid = latestRemovedXid;
- rdata[0].data = (char *) &xlrec_reuse;
- rdata[0].len = SizeOfBtreeReusePage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
-
- XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
+ xlrec_reuse.node = rel->rd_node;
+ xlrec_reuse.block = blkno;
+ xlrec_reuse.latestRemovedXid = latestRemovedXid;
- /*
- * We don't do PageSetLSN here because we're about to initialise the
- * page, so no need.
- */
- }
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
- END_CRIT_SECTION();
+ XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
}
/*
* WAL record that will allow us to conflict with queries
* running on standby.
*/
- if (XLogStandbyInfoActive())
+ if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (RelationNeedsWAL(rel))
{
XLogRecPtr recptr;
- XLogRecData rdata[2];
xl_btree_vacuum xlrec_vacuum;
- xlrec_vacuum.node = rel->rd_node;
- xlrec_vacuum.block = BufferGetBlockNumber(buf);
-
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
- rdata[0].data = (char *) &xlrec_vacuum;
- rdata[0].len = SizeOfBtreeVacuum;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
/*
* The target-offsets array is not in the buffer, but pretend that it
* need not be stored too.
*/
if (nitems > 0)
- {
- rdata[1].data = (char *) itemnos;
- rdata[1].len = nitems * sizeof(OffsetNumber);
- }
- else
- {
- rdata[1].data = NULL;
- rdata[1].len = 0;
- }
- rdata[1].buffer = buf;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
PageSetLSN(page, recptr);
}
if (RelationNeedsWAL(rel))
{
XLogRecPtr recptr;
- XLogRecData rdata[3];
xl_btree_delete xlrec_delete;
- xlrec_delete.node = rel->rd_node;
xlrec_delete.hnode = heapRel->rd_node;
- xlrec_delete.block = BufferGetBlockNumber(buf);
xlrec_delete.nitems = nitems;
- rdata[0].data = (char *) &xlrec_delete;
- rdata[0].len = SizeOfBtreeDelete;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+ XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
/*
* We need the target-offsets array whether or not we store the whole
* buffer, to allow us to find the latestRemovedXid on a standby
* server.
*/
- rdata[1].data = (char *) itemnos;
- rdata[1].len = nitems * sizeof(OffsetNumber);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &(rdata[2]);
-
- rdata[2].data = NULL;
- rdata[2].len = 0;
- rdata[2].buffer = buf;
- rdata[2].buffer_std = true;
- rdata[2].next = NULL;
+ XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
PageSetLSN(page, recptr);
}
{
xl_btree_mark_page_halfdead xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
- xlrec.target.node = rel->rd_node;
- ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
+ xlrec.poffset = topoff;
xlrec.leafblk = leafblkno;
if (target != leafblkno)
xlrec.topparent = target;
else
xlrec.topparent = InvalidBlockNumber;
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
+
page = BufferGetPage(leafbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
xlrec.leftblk = opaque->btpo_prev;
xlrec.rightblk = opaque->btpo_next;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeMarkPageHalfDead;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- rdata[1].data = NULL;
- rdata[1].len = 0;
- rdata[1].buffer = topparent;
- rdata[1].buffer_std = true;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
- recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
page = BufferGetPage(topparent);
PageSetLSN(page, recptr);
* Here we begin doing the deletion.
*/
+ /*
+ * The WAL record needs at most 5 buffer references, which is more than
+ * the default allowance.
+ */
+ XLogEnsureRecordSpace(5, 0);
+
/* No ereport(ERROR) until changes are logged */
START_CRIT_SECTION();
xl_btree_metadata xlmeta;
uint8 xlinfo;
XLogRecPtr recptr;
- XLogRecData rdata[4];
- XLogRecData *nextrdata;
- xlrec.node = rel->rd_node;
+ XLogBeginInsert();
+
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+ if (BufferIsValid(lbuf))
+ XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
+ XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
+ if (target != leafblkno)
+ XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
/* information on the unlinked block */
- xlrec.deadblk = target;
xlrec.leftsib = leftsib;
xlrec.rightsib = rightsib;
xlrec.btpo_xact = opaque->btpo.xact;
/* information needed to recreate the leaf block (if not the target) */
- xlrec.leafblk = leafblkno;
xlrec.leafleftsib = leafleftsib;
xlrec.leafrightsib = leafrightsib;
xlrec.topparent = nextchild;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = SizeOfBtreeUnlinkPage;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = nextrdata = &(rdata[1]);
+ XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
if (BufferIsValid(metabuf))
{
+ XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
+
xlmeta.root = metad->btm_root;
xlmeta.level = metad->btm_level;
xlmeta.fastroot = metad->btm_fastroot;
xlmeta.fastlevel = metad->btm_fastlevel;
- nextrdata->data = (char *) &xlmeta;
- nextrdata->len = sizeof(xl_btree_metadata);
- nextrdata->buffer = InvalidBuffer;
- nextrdata->next = nextrdata + 1;
- nextrdata++;
+ XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
}
else
xlinfo = XLOG_BTREE_UNLINK_PAGE;
- nextrdata->data = NULL;
- nextrdata->len = 0;
- nextrdata->buffer = rbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
-
- if (BufferIsValid(lbuf))
- {
- nextrdata->next = nextrdata + 1;
- nextrdata++;
- nextrdata->data = NULL;
- nextrdata->len = 0;
- nextrdata->buffer = lbuf;
- nextrdata->buffer_std = true;
- nextrdata->next = NULL;
- }
-
- recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
+ recptr = XLogInsert(RM_BTREE_ID, xlinfo);
if (BufferIsValid(metabuf))
{
}
static void
-_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
- BlockNumber root, uint32 level,
- BlockNumber fastroot, uint32 fastlevel)
+_bt_restore_meta(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
{
Buffer metabuf;
Page metapg;
BTMetaPageData *md;
BTPageOpaque pageop;
+ xl_btree_metadata *xlrec;
+ char *ptr;
+ Size len;
- metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
- Assert(BufferIsValid(metabuf));
+ XLogReadBufferForRedoExtended(lsn, record, block_id, RBM_ZERO, false,
+ &metabuf);
+ ptr = XLogRecGetBlockData(record, block_id, &len);
+
+ Assert(len == sizeof(xl_btree_metadata));
+ Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
+ xlrec = (xl_btree_metadata *) ptr;
metapg = BufferGetPage(metabuf);
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
md = BTPageGetMeta(metapg);
md->btm_magic = BTREE_MAGIC;
md->btm_version = BTREE_VERSION;
- md->btm_root = root;
- md->btm_level = level;
- md->btm_fastroot = fastroot;
- md->btm_fastlevel = fastlevel;
+ md->btm_root = xlrec->root;
+ md->btm_level = xlrec->level;
+ md->btm_fastroot = xlrec->fastroot;
+ md->btm_fastlevel = xlrec->fastlevel;
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
pageop->btpo_flags = BTP_META;
* types that can insert a downlink: insert, split, and newroot.
*/
static void
-_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- RelFileNode rnode, BlockNumber cblock)
+_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record, uint8 block_id)
{
Buffer buf;
- if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, block_id, &buf) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buf);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
-
Assert((pageop->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0);
pageop->btpo_flags &= ~BTP_INCOMPLETE_SPLIT;
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
Buffer buffer;
Page page;
- char *datapos;
- int datalen;
- xl_btree_metadata md;
- BlockNumber cblkno = 0;
- int main_blk_index;
-
- datapos = (char *) xlrec + SizeOfBtreeInsert;
- datalen = record->xl_len - SizeOfBtreeInsert;
-
- /*
- * if this insert finishes a split at lower level, extract the block
- * number of the (left) child.
- */
- if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
- {
- memcpy(&cblkno, datapos, sizeof(BlockNumber));
- Assert(cblkno != 0);
- datapos += sizeof(BlockNumber);
- datalen -= sizeof(BlockNumber);
- }
- if (ismeta)
- {
- memcpy(&md, datapos, sizeof(xl_btree_metadata));
- datapos += sizeof(xl_btree_metadata);
- datalen -= sizeof(xl_btree_metadata);
- }
/*
* Insertion to an internal page finishes an incomplete split at the child
* cannot be updates happening.
*/
if (!isleaf)
+ _bt_clear_incomplete_split(lsn, record, 1);
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
- _bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
- main_blk_index = 1;
- }
- else
- main_blk_index = 0;
+ Size datalen;
+ char *datapos = XLogRecGetBlockData(record, 0, &datalen);
- if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
- ItemPointerGetBlockNumber(&(xlrec->target.tid)),
- &buffer) == BLK_NEEDS_REDO)
- {
page = BufferGetPage(buffer);
- if (PageAddItem(page, (Item) datapos, datalen,
- ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
+ if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
false, false) == InvalidOffsetNumber)
elog(PANIC, "btree_insert_redo: failed to add item");
* obsolete link from the metapage.
*/
if (ismeta)
- _bt_restore_meta(xlrec->target.node, lsn,
- md.root, md.level,
- md.fastroot, md.fastlevel);
+ _bt_restore_meta(lsn, record, 2);
}
static void
Page rpage;
BTPageOpaque ropaque;
char *datapos;
- int datalen;
- OffsetNumber newitemoff = 0;
- Item newitem = NULL;
- Size newitemsz = 0;
+ Size datalen;
Item left_hikey = NULL;
Size left_hikeysz = 0;
- BlockNumber cblkno = InvalidBlockNumber;
-
- datapos = (char *) xlrec + SizeOfBtreeSplit;
- datalen = record->xl_len - SizeOfBtreeSplit;
-
- /* Extract newitemoff and newitem, if present */
- if (onleft)
- {
- memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
- datapos += sizeof(OffsetNumber);
- datalen -= sizeof(OffsetNumber);
- }
- if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
- {
- /*
- * We assume that 16-bit alignment is enough to apply IndexTupleSize
- * (since it's fetching from a uint16 field) and also enough for
- * PageAddItem to insert the tuple.
- */
- newitem = (Item) datapos;
- newitemsz = MAXALIGN(IndexTupleSize(newitem));
- datapos += newitemsz;
- datalen -= newitemsz;
- }
-
- /* Extract left hikey and its size (still assuming 16-bit alignment) */
- if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
- {
- left_hikey = (Item) datapos;
- left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
- datapos += left_hikeysz;
- datalen -= left_hikeysz;
- }
-
- /*
- * If this insertion finishes an incomplete split, get the block number of
- * the child.
- */
- if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
- {
- memcpy(&cblkno, datapos, sizeof(BlockNumber));
- datapos += sizeof(BlockNumber);
- datalen -= sizeof(BlockNumber);
- }
+ BlockNumber leftsib;
+ BlockNumber rightsib;
+ BlockNumber rnext;
+
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
+ if (XLogRecHasBlockRef(record, 2))
+ XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext);
+ else
+ rnext = P_NONE;
/*
* Clear the incomplete split flag on the left sibling of the child page
* before locking the other pages)
*/
if (!isleaf)
- _bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
+ _bt_clear_incomplete_split(lsn, record, 3);
/* Reconstruct right (new) sibling page from scratch */
- rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
- Assert(BufferIsValid(rbuf));
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &rbuf);
+ datapos = XLogRecGetBlockData(record, 1, &datalen);
rpage = (Page) BufferGetPage(rbuf);
_bt_pageinit(rpage, BufferGetPageSize(rbuf));
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
- ropaque->btpo_prev = xlrec->leftsib;
- ropaque->btpo_next = xlrec->rnext;
+ ropaque->btpo_prev = leftsib;
+ ropaque->btpo_next = rnext;
ropaque->btpo.level = xlrec->level;
ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
ropaque->btpo_cycleid = 0;
/* don't release the buffer yet; we touch right page's first item below */
/* Now reconstruct left (original) sibling page */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
- &lbuf) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &lbuf) == BLK_NEEDS_REDO)
{
/*
* To retain the same physical order of the tuples that they had, we
Page lpage = (Page) BufferGetPage(lbuf);
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
OffsetNumber off;
+ Item newitem;
+ Size newitemsz = 0;
Page newlpage;
OffsetNumber leftoff;
+ datapos = XLogRecGetBlockData(record, 0, &datalen);
+
+ if (onleft)
+ {
+ newitem = (Item) datapos;
+ newitemsz = MAXALIGN(IndexTupleSize(newitem));
+ datapos += newitemsz;
+ datalen -= newitemsz;
+ }
+
+ /* Extract left hikey and its size (assuming 16-bit alignment) */
+ if (!isleaf)
+ {
+ left_hikey = (Item) datapos;
+ left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
+ datapos += left_hikeysz;
+ datalen -= left_hikeysz;
+ }
+ Assert(datalen == 0);
+
newlpage = PageGetTempPageCopySpecial(lpage);
/* Set high key */
Item item;
/* add the new item if it was inserted on left page */
- if (onleft && off == newitemoff)
+ if (onleft && off == xlrec->newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
}
/* cope with possibility that newitem goes at the end */
- if (onleft && off == newitemoff)
+ if (onleft && off == xlrec->newitemoff)
{
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
false, false) == InvalidOffsetNumber)
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
if (isleaf)
lopaque->btpo_flags |= BTP_LEAF;
- lopaque->btpo_next = xlrec->rightsib;
+ lopaque->btpo_next = rightsib;
lopaque->btpo_cycleid = 0;
PageSetLSN(lpage, lsn);
* replay, because no other index update can be in progress, and readers
* will cope properly when following an obsolete left-link.
*/
- if (xlrec->rnext != P_NONE)
+ if (rnext != P_NONE)
{
- /*
- * the backup block containing right sibling is 1 or 2, depending
- * whether this was a leaf or internal page.
- */
- int rnext_index = isleaf ? 1 : 2;
Buffer buffer;
- if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
- xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(buffer);
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
- pageop->btpo_prev = xlrec->rightsib;
+ pageop->btpo_prev = rightsib;
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
*/
if (HotStandbyActiveInReplay())
{
+ RelFileNode thisrnode;
+ BlockNumber thisblkno;
BlockNumber blkno;
- for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++)
+ XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
+
+ for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
{
/*
* We use RBM_NORMAL_NO_LOG mode because it's not an error
* buffer manager we could optimise this so that if the block is
* not in shared_buffers we confirm it as unpinned.
*/
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno,
+ buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
RBM_NORMAL_NO_LOG);
if (BufferIsValid(buffer))
{
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details.
*/
- if (XLogReadBufferForRedoExtended(lsn, record, 0,
- xlrec->node, MAIN_FORKNUM, xlrec->block,
- RBM_NORMAL, true, &buffer)
+ if (XLogReadBufferForRedoExtended(lsn, record, 0, RBM_NORMAL, true, &buffer)
== BLK_NEEDS_REDO)
{
+ char *ptr;
+ Size len;
+
+ ptr = XLogRecGetBlockData(record, 0, &len);
+
page = (Page) BufferGetPage(buffer);
- if (record->xl_len > SizeOfBtreeVacuum)
+ if (len > 0)
{
OffsetNumber *unused;
OffsetNumber *unend;
- unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
- unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
+ unused = (OffsetNumber *) ptr;
+ unend = (OffsetNumber *) ((char *) ptr + len);
if ((unend - unused) > 0)
PageIndexMultiDelete(page, unused, unend - unused);
* XXX optimise later with something like XLogPrefetchBuffer()
*/
static TransactionId
-btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
+btree_xlog_delete_get_latestRemovedXid(XLogRecord *record)
{
+ xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
OffsetNumber *unused;
Buffer ibuffer,
hbuffer;
Page ipage,
hpage;
+ RelFileNode rnode;
+ BlockNumber blkno;
ItemId iitemid,
hitemid;
IndexTuple itup;
* InvalidTransactionId to cancel all HS transactions. That's probably
* overkill, but it's safe, and certainly better than panicking here.
*/
- ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+ ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
if (!BufferIsValid(ibuffer))
return InvalidTransactionId;
+ LockBuffer(ibuffer, BT_READ);
ipage = (Page) BufferGetPage(ibuffer);
/*
* Locate the heap page that the index tuple points at
*/
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
- hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
+ hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
if (!BufferIsValid(hbuffer))
{
UnlockReleaseBuffer(ibuffer);
return InvalidTransactionId;
}
+ LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
hpage = (Page) BufferGetPage(hbuffer);
/*
*/
if (InHotStandby)
{
- TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
+ TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
+ RelFileNode rnode;
- ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
+ XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
+
+ ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
}
/*
* We don't need to take a cleanup lock to apply these changes. See
* nbtree/README for details.
*/
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
{
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
- BlockNumber parent;
Buffer buffer;
Page page;
BTPageOpaque pageop;
IndexTupleData trunctuple;
- parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
-
/*
* In normal operation, we would lock all the pages this WAL record
* touches before changing any of them. In WAL replay, it should be okay
*/
/* parent page */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
{
OffsetNumber poffset;
ItemId itemid;
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
- poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ poffset = xlrec->poffset;
nextoffset = OffsetNumberNext(poffset);
itemid = PageGetItemId(page, nextoffset);
UnlockReleaseBuffer(buffer);
/* Rewrite the leaf page as a halfdead page */
- buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
{
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
- BlockNumber target;
BlockNumber leftsib;
BlockNumber rightsib;
Buffer buffer;
Page page;
BTPageOpaque pageop;
- target = xlrec->deadblk;
leftsib = xlrec->leftsib;
rightsib = xlrec->rightsib;
*/
/* Fix left-link of right sibling */
- if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
/* Fix right-link of left sibling, if any */
if (leftsib != P_NONE)
{
- if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
- == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
{
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
}
/* Rewrite target page as empty deleted page */
- buffer = XLogReadBuffer(xlrec->node, target, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
* itself, update the leaf to point to the next remaining child in the
* branch.
*/
- if (target != xlrec->leafblk)
+ if (XLogRecHasBlockRef(record, 3))
{
/*
* There is no real data on the page, so we just re-create it from
*/
IndexTupleData trunctuple;
- buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 3, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
/* Update metapage if needed */
if (info == XLOG_BTREE_UNLINK_PAGE_META)
- {
- xl_btree_metadata md;
-
- memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
- sizeof(xl_btree_metadata));
- _bt_restore_meta(xlrec->node, lsn,
- md.root, md.level,
- md.fastroot, md.fastlevel);
- }
+ _bt_restore_meta(lsn, record, 4);
}
static void
Buffer buffer;
Page page;
BTPageOpaque pageop;
+ char *ptr;
+ Size len;
- buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
_bt_pageinit(page, BufferGetPageSize(buffer));
pageop->btpo_flags |= BTP_LEAF;
pageop->btpo_cycleid = 0;
- if (record->xl_len > SizeOfBtreeNewroot)
+ if (xlrec->level > 0)
{
- IndexTuple itup;
- BlockNumber cblkno;
-
- _bt_restore_page(page,
- (char *) xlrec + SizeOfBtreeNewroot,
- record->xl_len - SizeOfBtreeNewroot);
- /* extract block number of the left-hand split page */
- itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
- cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
- Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
+ ptr = XLogRecGetBlockData(record, 0, &len);
+ _bt_restore_page(page, ptr, len);
/* Clear the incomplete-split flag in left child */
- _bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
+ _bt_clear_incomplete_split(lsn, record, 1);
}
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- _bt_restore_meta(xlrec->node, lsn,
- xlrec->rootblk, xlrec->level,
- xlrec->rootblk, xlrec->level);
+ _bt_restore_meta(lsn, record, 2);
}
static void
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node);
}
-
- /* Backup blocks are not used in reuse_page records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
}
{
xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
- appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u",
- xlrec->version, xlrec->pagesPerRange,
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode);
+ appendStringInfo(buf, "v%d pagesPerRange %u",
+ xlrec->version, xlrec->pagesPerRange);
}
else if (info == XLOG_BRIN_INSERT)
{
xl_brin_insert *xlrec = (xl_brin_insert *) rec;
- appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- xlrec->heapBlk, xlrec->revmapBlk,
+ appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
+ xlrec->heapBlk,
xlrec->pagesPerRange,
- ItemPointerGetBlockNumber(&xlrec->tid),
- ItemPointerGetOffsetNumber(&xlrec->tid));
+ xlrec->offnum);
}
else if (info == XLOG_BRIN_UPDATE)
{
xl_brin_update *xlrec = (xl_brin_update *) rec;
- appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
- xlrec->insert.node.spcNode, xlrec->insert.node.dbNode,
- xlrec->insert.node.relNode,
- xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
+ appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
+ xlrec->insert.heapBlk,
xlrec->insert.pagesPerRange,
- ItemPointerGetBlockNumber(&xlrec->oldtid),
- ItemPointerGetOffsetNumber(&xlrec->oldtid),
- ItemPointerGetBlockNumber(&xlrec->insert.tid),
- ItemPointerGetOffsetNumber(&xlrec->insert.tid));
+ xlrec->oldOffnum,
+ xlrec->insert.offnum);
}
else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
{
xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
- appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- ItemPointerGetBlockNumber(&xlrec->tid),
- ItemPointerGetOffsetNumber(&xlrec->tid));
+ appendStringInfo(buf, "offnum %u", xlrec->offnum);
}
else if (info == XLOG_BRIN_REVMAP_EXTEND)
{
xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
- appendStringInfo(buf, "rel %u/%u/%u targetBlk %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->targetBlk);
+ appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
}
}
#include "postgres.h"
#include "access/gin_private.h"
+#include "access/xlogutils.h"
#include "lib/stringinfo.h"
#include "storage/relfilenode.h"
-static void
-desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
-{
- appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
- node.spcNode, node.dbNode, node.relNode, blkno);
-}
-
static void
desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
{
switch (info)
{
case XLOG_GIN_CREATE_INDEX:
- desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
+ /* no further information */
break;
case XLOG_GIN_CREATE_PTREE:
- desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_INSERT:
{
ginxlogInsert *xlrec = (ginxlogInsert *) rec;
char *payload = rec + sizeof(ginxlogInsert);
- desc_node(buf, xlrec->node, xlrec->blkno);
- appendStringInfo(buf, " isdata: %c isleaf: %c",
+ appendStringInfo(buf, "isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
ginxlogRecompressDataLeaf *insertData =
(ginxlogRecompressDataLeaf *) payload;
- if (record->xl_info & XLR_BKP_BLOCK(0))
+ if (XLogRecHasBlockImage(record, 0))
appendStringInfo(buf, " (full page image)");
else
desc_recompress_leaf(buf, insertData);
{
ginxlogSplit *xlrec = (ginxlogSplit *) rec;
- desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
- appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
+ appendStringInfo(buf, "isrootsplit: %c",
+ (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
appendStringInfo(buf, " isdata: %c isleaf: %c",
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
}
break;
case XLOG_GIN_VACUUM_PAGE:
- desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
{
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
- desc_node(buf, xlrec->node, xlrec->blkno);
- if (record->xl_info & XLR_BKP_BLOCK(0))
+ if (XLogRecHasBlockImage(record, 0))
appendStringInfo(buf, " (full page image)");
else
desc_recompress_leaf(buf, &xlrec->data);
}
break;
case XLOG_GIN_DELETE_PAGE:
- desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_UPDATE_META_PAGE:
- desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO);
+ /* no further information */
break;
case XLOG_GIN_INSERT_LISTPAGE:
- desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ /* no further information */
break;
case XLOG_GIN_DELETE_LISTPAGE:
- appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted);
- desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO);
+ appendStringInfo(buf, "ndeleted: %d",
+ ((ginxlogDeleteListPages *) rec)->ndeleted);
break;
}
}
#include "lib/stringinfo.h"
#include "storage/relfilenode.h"
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
- appendStringInfo(buf, "rel %u/%u/%u",
- node.spcNode, node.dbNode, node.relNode);
-}
-
static void
out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
{
- out_target(buf, xlrec->node);
- appendStringInfo(buf, "; block number %u", xlrec->blkno);
}
static void
out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
{
- appendStringInfoString(buf, "page_split: ");
- out_target(buf, xlrec->node);
- appendStringInfo(buf, "; block number %u splits to %d pages",
- xlrec->origblkno, xlrec->npage);
+ appendStringInfo(buf, "page_split: splits to %d pages",
+ xlrec->npage);
}
void
#include "access/heapam_xlog.h"
-static void
-out_target(StringInfo buf, xl_heaptid *target)
-{
- appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
- target->node.spcNode, target->node.dbNode, target->node.relNode,
- ItemPointerGetBlockNumber(&(target->tid)),
- ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
static void
out_infobits(StringInfo buf, uint8 infobits)
{
{
xl_heap_insert *xlrec = (xl_heap_insert *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
else if (info == XLOG_HEAP_DELETE)
{
xl_heap_delete *xlrec = (xl_heap_delete *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
appendStringInfoChar(buf, ' ');
out_infobits(buf, xlrec->infobits_set);
}
{
xl_heap_update *xlrec = (xl_heap_update *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ appendStringInfo(buf, "off %u xmax %u",
+ xlrec->old_offnum,
+ xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set);
- appendStringInfo(buf, "; new tid %u/%u xmax %u",
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ appendStringInfo(buf, "; new off %u xmax %u",
+ xlrec->new_offnum,
xlrec->new_xmax);
}
else if (info == XLOG_HEAP_HOT_UPDATE)
{
xl_heap_update *xlrec = (xl_heap_update *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
+ appendStringInfo(buf, "off %u xmax %u",
+ xlrec->old_offnum,
+ xlrec->old_xmax);
out_infobits(buf, xlrec->old_infobits_set);
- appendStringInfo(buf, "; new tid %u/%u xmax %u",
- ItemPointerGetBlockNumber(&(xlrec->newtid)),
- ItemPointerGetOffsetNumber(&(xlrec->newtid)),
+ appendStringInfo(buf, "; new off %u xmax %u",
+ xlrec->new_offnum,
xlrec->new_xmax);
}
else if (info == XLOG_HEAP_LOCK)
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
- out_target(buf, &(xlrec->target));
- appendStringInfoChar(buf, ' ');
+ appendStringInfo(buf, "off %u ", xlrec->offnum);
out_infobits(buf, xlrec->infobits_set);
}
else if (info == XLOG_HEAP_INPLACE)
{
xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
}
void
{
xl_heap_clean *xlrec = (xl_heap_clean *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
- xlrec->latestRemovedXid);
+ appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
}
else if (info == XLOG_HEAP2_FREEZE_PAGE)
{
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
+ appendStringInfo(buf, "cutoff xid %u ntuples %u",
xlrec->cutoff_xid, xlrec->ntuples);
}
else if (info == XLOG_HEAP2_CLEANUP_INFO)
{
xl_heap_visible *xlrec = (xl_heap_visible *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block);
+ appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
}
else if (info == XLOG_HEAP2_MULTI_INSERT)
{
xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
- xlrec->blkno, xlrec->ntuples);
+ appendStringInfo(buf, "%d tuples", xlrec->ntuples);
}
else if (info == XLOG_HEAP2_LOCK_UPDATED)
{
appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
xlrec->infobits_set);
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
}
else if (info == XLOG_HEAP2_NEW_CID)
{
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
+ xlrec->target_node.spcNode,
+ xlrec->target_node.dbNode,
+ xlrec->target_node.relNode,
+ ItemPointerGetBlockNumber(&(xlrec->target_tid)),
+ ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
xlrec->cmin, xlrec->cmax, xlrec->combocid);
}
#include "access/nbtree.h"
-static void
-out_target(StringInfo buf, xl_btreetid *target)
-{
- appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
- target->node.spcNode, target->node.dbNode, target->node.relNode,
- ItemPointerGetBlockNumber(&(target->tid)),
- ItemPointerGetOffsetNumber(&(target->tid)));
-}
-
void
btree_desc(StringInfo buf, XLogRecord *record)
{
{
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
- out_target(buf, &(xlrec->target));
+ appendStringInfo(buf, "off %u", xlrec->offnum);
break;
}
case XLOG_BTREE_SPLIT_L:
{
xl_btree_split *xlrec = (xl_btree_split *) rec;
- appendStringInfo(buf, "rel %u/%u/%u ",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode);
- appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
- xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
+ appendStringInfo(buf, "level %u, firstright %d",
xlrec->level, xlrec->firstright);
break;
}
{
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode, xlrec->block,
+ appendStringInfo(buf, "lastBlockVacuumed %u",
xlrec->lastBlockVacuumed);
break;
}
{
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
- appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
- xlrec->block,
- xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
+ appendStringInfo(buf, "%d items", xlrec->nitems);
break;
}
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
{
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
- out_target(buf, &(xlrec->target));
- appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
+ appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
break;
}
{
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; ",
- xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
- appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
- xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
- appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
- xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
+ appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
+ xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
+ appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
+ xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
break;
}
case XLOG_BTREE_NEWROOT:
{
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
- appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u",
- xlrec->node.spcNode, xlrec->node.dbNode,
- xlrec->node.relNode,
- xlrec->rootblk, xlrec->level);
+ appendStringInfo(buf, "lev %u", xlrec->level);
break;
}
case XLOG_BTREE_REUSE_PAGE:
#include "access/spgist_private.h"
-static void
-out_target(StringInfo buf, RelFileNode node)
-{
- appendStringInfo(buf, "rel %u/%u/%u ",
- node.spcNode, node.dbNode, node.relNode);
-}
-
void
spg_desc(StringInfo buf, XLogRecord *record)
{
((RelFileNode *) rec)->relNode);
break;
case XLOG_SPGIST_ADD_LEAF:
- out_target(buf, ((spgxlogAddLeaf *) rec)->node);
- appendStringInfo(buf, "%u",
- ((spgxlogAddLeaf *) rec)->blknoLeaf);
+ {
+ spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
+
+ appendStringInfo(buf, "add leaf to page");
+ appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
+ xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
+ xlrec->offnumParent);
+ if (xlrec->newPage)
+ appendStringInfo(buf, " (newpage)");
+ if (xlrec->storesNulls)
+ appendStringInfo(buf, " (nulls)");
+ }
break;
case XLOG_SPGIST_MOVE_LEAFS:
- out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
- appendStringInfo(buf, "%u leafs from page %u to page %u",
- ((spgxlogMoveLeafs *) rec)->nMoves,
- ((spgxlogMoveLeafs *) rec)->blknoSrc,
- ((spgxlogMoveLeafs *) rec)->blknoDst);
+ appendStringInfo(buf, "%u leafs",
+ ((spgxlogMoveLeafs *) rec)->nMoves);
break;
case XLOG_SPGIST_ADD_NODE:
- out_target(buf, ((spgxlogAddNode *) rec)->node);
- appendStringInfo(buf, "%u:%u",
- ((spgxlogAddNode *) rec)->blkno,
+ appendStringInfo(buf, "off %u",
((spgxlogAddNode *) rec)->offnum);
break;
case XLOG_SPGIST_SPLIT_TUPLE:
- out_target(buf, ((spgxlogSplitTuple *) rec)->node);
- appendStringInfo(buf, "%u:%u to %u:%u",
- ((spgxlogSplitTuple *) rec)->blknoPrefix,
+ appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
((spgxlogSplitTuple *) rec)->offnumPrefix,
- ((spgxlogSplitTuple *) rec)->blknoPostfix,
- ((spgxlogSplitTuple *) rec)->offnumPostfix);
+ ((spgxlogSplitTuple *) rec)->offnumPostfix,
+ ((spgxlogSplitTuple *) rec)->postfixBlkSame,
+ ((spgxlogSplitTuple *) rec)->newPage
+ );
break;
case XLOG_SPGIST_PICKSPLIT:
- out_target(buf, ((spgxlogPickSplit *) rec)->node);
+ {
+ spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
+
+ appendStringInfo(buf, "ndel %u; nins %u",
+ xlrec->nDelete, xlrec->nInsert);
+ if (xlrec->innerIsParent)
+ appendStringInfo(buf, " (innerIsParent)");
+ if (xlrec->isRootSplit)
+ appendStringInfo(buf, " (isRootSplit)");
+ }
break;
case XLOG_SPGIST_VACUUM_LEAF:
- out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
- appendStringInfo(buf, "page %u",
- ((spgxlogVacuumLeaf *) rec)->blkno);
+ /* no further information */
break;
case XLOG_SPGIST_VACUUM_ROOT:
- out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
- appendStringInfo(buf, "page %u",
- ((spgxlogVacuumRoot *) rec)->blkno);
+ /* no further information */
break;
case XLOG_SPGIST_VACUUM_REDIRECT:
- out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
- appendStringInfo(buf, "page %u, newest XID %u",
- ((spgxlogVacuumRedirect *) rec)->blkno,
+ appendStringInfo(buf, "newest XID %u",
((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
break;
}
}
else if (info == XLOG_FPI)
{
- BkpBlock *bkp = (BkpBlock *) rec;
-
- appendStringInfo(buf, "%s block %u",
- relpathperm(bkp->node, bkp->fork),
- bkp->block);
+ /* no further information to print */
}
else if (info == XLOG_BACKUP_END)
{
#include "postgres.h"
#include "access/genam.h"
-#include "access/xloginsert.h"
#include "access/spgist_private.h"
+#include "access/xloginsert.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
{
- XLogRecData rdata[4];
spgxlogAddLeaf xlrec;
- xlrec.node = index->rd_node;
- xlrec.blknoLeaf = current->blkno;
xlrec.newPage = isNew;
xlrec.storesNulls = isNulls;
/* these will be filled below as needed */
xlrec.offnumLeaf = InvalidOffsetNumber;
xlrec.offnumHeadLeaf = InvalidOffsetNumber;
- xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
- ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
START_CRIT_SECTION();
if (current->offnum == InvalidOffsetNumber ||
/* Must update parent's downlink if any */
if (parent->buffer != InvalidBuffer)
{
- xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
-
- ACCEPT_RDATA_BUFFER(parent->buffer, 3);
}
}
else
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) leafTuple, leafTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ if (xlrec.offnumParent != InvalidOffsetNumber)
+ XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
PageSetLSN(current->page, recptr);
/* update parent only if we actually changed it */
- if (xlrec.blknoParent != InvalidBlockNumber)
+ if (xlrec.offnumParent != InvalidOffsetNumber)
{
PageSetLSN(parent->page, recptr);
}
OffsetNumber *toDelete;
OffsetNumber *toInsert;
BlockNumber nblkno;
- XLogRecData rdata[7];
spgxlogMoveLeafs xlrec;
char *leafdata,
*leafptr;
nblkno = BufferGetBlockNumber(nbuf);
Assert(nblkno != current->blkno);
- /* prepare WAL info */
- xlrec.node = index->rd_node;
- STORE_STATE(state, xlrec.stateSrc);
-
- xlrec.blknoSrc = current->blkno;
- xlrec.blknoDst = nblkno;
- xlrec.nMoves = nDelete;
- xlrec.replaceDead = replaceDead;
- xlrec.storesNulls = isNulls;
-
- xlrec.blknoParent = parent->blkno;
- xlrec.offnumParent = parent->offnum;
- xlrec.nodeI = parent->node;
-
leafdata = leafptr = palloc(size);
START_CRIT_SECTION();
{
XLogRecPtr recptr;
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
- ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
- ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
- ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
- ACCEPT_RDATA_BUFFER(current->buffer, 4);
- ACCEPT_RDATA_BUFFER(nbuf, 5);
- ACCEPT_RDATA_BUFFER(parent->buffer, 6);
+ /* prepare WAL info */
+ STORE_STATE(state, xlrec.stateSrc);
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
+ xlrec.nMoves = nDelete;
+ xlrec.replaceDead = replaceDead;
+ xlrec.storesNulls = isNulls;
+
+ xlrec.offnumParent = parent->offnum;
+ xlrec.nodeI = parent->node;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * nDelete);
+ XLogRegisterData((char *) toInsert,
+ sizeof(OffsetNumber) * nInsert);
+ XLogRegisterData((char *) leafdata, leafptr - leafdata);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
+ XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
PageSetLSN(current->page, recptr);
PageSetLSN(npage, recptr);
int currentFreeSpace;
int totalLeafSizes;
bool allTheSame;
- XLogRecData rdata[10];
- int nRdata;
spgxlogPickSplit xlrec;
char *leafdata,
*leafptr;
newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
- xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc);
/*
}
/*
- * Because a WAL record can't involve more than four buffers, we can only
- * afford to deal with two leaf pages in each picksplit action, ie the
- * current page and at most one other.
- *
* The new leaf tuples converted from the existing ones should require the
* same or less space, and therefore should all fit onto one page
* (although that's not necessarily the current page, since we can't
}
/* Start preparing WAL record */
- xlrec.blknoSrc = current->blkno;
- xlrec.blknoDest = InvalidBlockNumber;
xlrec.nDelete = 0;
xlrec.initSrc = isNew;
xlrec.storesNulls = isNulls;
+ xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
leafdata = leafptr = (char *) palloc(totalLeafSizes);
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
- nRdata = 1;
-
/* Here we begin making the changes to the target pages */
START_CRIT_SECTION();
else
{
xlrec.nDelete = nToDelete;
- ACCEPT_RDATA_DATA(toDelete,
- sizeof(OffsetNumber) * nToDelete,
- nRdata);
- nRdata++;
- ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
- nRdata++;
if (!state->isBuild)
{
if (newLeafBuffer != InvalidBuffer)
{
MarkBufferDirty(newLeafBuffer);
- /* also save block number for WAL */
- xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
- if (!xlrec.initDest)
- {
- ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
- nRdata++;
- }
}
- xlrec.nInsert = nToInsert;
- ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
- nRdata++;
- ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
- nRdata++;
-
/* Remember current buffer, since we're about to change "current" */
saveCurrent = *current;
current->blkno = parent->blkno;
current->buffer = parent->buffer;
current->page = parent->page;
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size,
/*
* Update parent node link and mark parent page dirty
*/
- xlrec.blknoParent = parent->blkno;
+ xlrec.innerIsParent = true;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
- ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
- nRdata++;
-
/*
* Update redirection link (in old current buffer)
*/
current->buffer = newInnerBuffer;
current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer);
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
SpGistPageAddNewItem(state, current->page,
(Item) innerTuple, innerTuple->size,
/*
* Update parent node link and mark parent page dirty
*/
- xlrec.blknoParent = parent->blkno;
+ xlrec.innerIsParent = (parent->buffer == current->buffer);
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
saveNodeLink(index, parent, current->blkno, current->offnum);
- ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
- nRdata++;
- ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
- nRdata++;
-
/*
* Update redirection link (in old current buffer)
*/
SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
xlrec.initInner = true;
+ xlrec.innerIsParent = false;
- xlrec.blknoInner = current->blkno;
xlrec.offnumInner = current->offnum =
PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
InvalidOffsetNumber, false, false);
innerTuple->size);
/* No parent link to update, nor redirection to do */
- xlrec.blknoParent = InvalidBlockNumber;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
+ int flags;
+
+ XLogBeginInsert();
+
+ xlrec.nInsert = nToInsert;
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
+
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * xlrec.nDelete);
+ XLogRegisterData((char *) toInsert,
+ sizeof(OffsetNumber) * xlrec.nInsert);
+ XLogRegisterData((char *) leafPageSelect,
+ sizeof(uint8) * xlrec.nInsert);
+ XLogRegisterData((char *) innerTuple, innerTuple->size);
+ XLogRegisterData(leafdata, leafptr - leafdata);
+
+ flags = REGBUF_STANDARD;
+ if (xlrec.initSrc)
+ flags |= REGBUF_WILL_INIT;
+ if (BufferIsValid(saveCurrent.buffer))
+ XLogRegisterBuffer(0, saveCurrent.buffer, flags);
+
+ if (BufferIsValid(newLeafBuffer))
+ {
+ flags = REGBUF_STANDARD;
+ if (xlrec.initDest)
+ flags |= REGBUF_WILL_INIT;
+ XLogRegisterBuffer(1, newLeafBuffer, flags);
+ }
+ XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
+ if (parent->buffer != InvalidBuffer)
+ {
+ if (parent->buffer != current->buffer)
+ XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
+ else
+ Assert(xlrec.innerIsParent);
+ }
/* Issue the WAL record */
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
/* Update page LSNs on all affected pages */
if (newLeafBuffer != InvalidBuffer)
int nodeN, Datum nodeLabel)
{
SpGistInnerTuple newInnerTuple;
- XLogRecData rdata[5];
spgxlogAddNode xlrec;
/* Should not be applied to nulls */
newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
/* Prepare WAL record */
- xlrec.node = index->rd_node;
STORE_STATE(state, xlrec.stateSrc);
- xlrec.blkno = current->blkno;
xlrec.offnum = current->offnum;
/* we don't fill these unless we need to change the parent downlink */
- xlrec.blknoParent = InvalidBlockNumber;
+ xlrec.parentBlk = -1;
xlrec.offnumParent = InvalidOffsetNumber;
xlrec.nodeI = 0;
/* we don't fill these unless tuple has to be moved */
- xlrec.blknoNew = InvalidBlockNumber;
xlrec.offnumNew = InvalidOffsetNumber;
xlrec.newPage = false;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
- ACCEPT_RDATA_BUFFER(current->buffer, 2);
-
if (PageGetExactFreeSpace(current->page) >=
newInnerTuple->size - innerTuple->size)
{
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
PageSetLSN(current->page, recptr);
}
saveCurrent = *current;
- xlrec.blknoParent = parent->blkno;
xlrec.offnumParent = parent->offnum;
xlrec.nodeI = parent->node;
current->blkno = BufferGetBlockNumber(current->buffer);
current->page = BufferGetPage(current->buffer);
- xlrec.blknoNew = current->blkno;
-
/*
* Let's just make real sure new current isn't same as old. Right now
* that's impossible, but if SpGistGetBuffer ever got smart enough to
* replay would be subtly wrong, so I think a mere assert isn't enough
* here.
*/
- if (xlrec.blknoNew == xlrec.blkno)
+ if (current->blkno == saveCurrent.blkno)
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
/*
* New current and parent buffer will both be modified; but note that
* parent buffer could be same as either new or old current.
*/
- ACCEPT_RDATA_BUFFER(current->buffer, 3);
- if (parent->buffer != current->buffer &&
- parent->buffer != saveCurrent.buffer)
- ACCEPT_RDATA_BUFFER(parent->buffer, 4);
+ if (parent->buffer == saveCurrent.buffer)
+ xlrec.parentBlk = 0;
+ else if (parent->buffer == current->buffer)
+ xlrec.parentBlk = 1;
+ else
+ xlrec.parentBlk = 2;
START_CRIT_SECTION();
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
+ XLogBeginInsert();
+
+ /* orig page */
+ XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
+ /* new page */
+ XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
+ /* parent page (if different from orig and new) */
+ if (xlrec.parentBlk == 2)
+ XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
+
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
/* we don't bother to check if any of these are redundant */
PageSetLSN(current->page, recptr);
BlockNumber postfixBlkno;
OffsetNumber postfixOffset;
int i;
- XLogRecData rdata[5];
spgxlogSplitTuple xlrec;
Buffer newBuffer = InvalidBuffer;
postfixTuple->allTheSame = innerTuple->allTheSame;
/* prep data for WAL record */
- xlrec.node = index->rd_node;
xlrec.newPage = false;
- ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
- ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
- ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
- ACCEPT_RDATA_BUFFER(current->buffer, 3);
-
/*
* If we can't fit both tuples on the current page, get a new page for the
* postfix tuple. In particular, can't split to the root page.
GBUF_INNER_PARITY(current->blkno + 1),
postfixTuple->size + sizeof(ItemIdData),
&xlrec.newPage);
- ACCEPT_RDATA_BUFFER(newBuffer, 4);
}
START_CRIT_SECTION();
if (xlrec.offnumPrefix != current->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTuple->size);
- xlrec.blknoPrefix = current->blkno;
/*
* put postfix tuple into appropriate page
*/
if (newBuffer == InvalidBuffer)
{
- xlrec.blknoPostfix = postfixBlkno = current->blkno;
+ postfixBlkno = current->blkno;
xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, current->page,
(Item) postfixTuple, postfixTuple->size,
NULL, false);
+ xlrec.postfixBlkSame = true;
}
else
{
- xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
+ postfixBlkno = BufferGetBlockNumber(newBuffer);
xlrec.offnumPostfix = postfixOffset =
SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
(Item) postfixTuple, postfixTuple->size,
NULL, false);
MarkBufferDirty(newBuffer);
+ xlrec.postfixBlkSame = false;
}
/*
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogRegisterData((char *) prefixTuple, prefixTuple->size);
+ XLogRegisterData((char *) postfixTuple, postfixTuple->size);
+
+ XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
+ if (newBuffer != InvalidBuffer)
+ {
+ int flags;
+
+ flags = REGBUF_STANDARD;
+ if (xlrec.newPage)
+ flags |= REGBUF_WILL_INIT;
+ XLogRegisterBuffer(1, newBuffer, flags);
+ }
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
PageSetLSN(current->page, recptr);
if (RelationNeedsWAL(index))
{
XLogRecPtr recptr;
- XLogRecData rdata;
- /* WAL data is just the relfilenode */
- rdata.data = (char *) &(index->rd_node);
- rdata.len = sizeof(RelFileNode);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
+ /*
+ * Replay will re-initialize the pages, so don't take full pages
+ * images. No other data to log.
+ */
+ XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
+ XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+ XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
PageSetLSN(BufferGetPage(metabuffer), recptr);
PageSetLSN(BufferGetPage(rootbuffer), recptr);
{
Page page = BufferGetPage(buffer);
spgxlogVacuumLeaf xlrec;
- XLogRecData rdata[8];
OffsetNumber toDead[MaxIndexTuplesPerPage];
OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber moveSrc[MaxIndexTuplesPerPage];
if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
elog(ERROR, "inconsistent counts of deletable tuples");
- /* Prepare WAL record */
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
- STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
- ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
- ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
- ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
- ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
- ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
- ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
- ACCEPT_RDATA_BUFFER(buffer, 7);
-
/* Do the updates */
START_CRIT_SECTION();
+ /* Prepare WAL record */
+ if (RelationNeedsWAL(index))
+ {
+ XLogBeginInsert();
+
+ STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
+ /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+ XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
+ XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
+ XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
+ XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
+ XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
+ XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
+ }
+
spgPageIndexMultiDelete(&bds->spgstate, page,
toDead, xlrec.nDead,
SPGIST_DEAD, SPGIST_DEAD,
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | REGBUF_FORCE_IMAGE);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
PageSetLSN(page, recptr);
}
{
Page page = BufferGetPage(buffer);
spgxlogVacuumRoot xlrec;
- XLogRecData rdata[3];
OffsetNumber toDelete[MaxIndexTuplesPerPage];
OffsetNumber i,
max = PageGetMaxOffsetNumber(page);
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nDelete = 0;
/* Scan page, identify tuples to delete, accumulate stats */
if (xlrec.nDelete == 0)
return; /* nothing more to do */
- /* Prepare WAL record */
- xlrec.node = index->rd_node;
- STORE_STATE(&bds->spgstate, xlrec.stateSrc);
-
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
- /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
- ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
- ACCEPT_RDATA_BUFFER(buffer, 2);
-
/* Do the update */
START_CRIT_SECTION();
{
XLogRecPtr recptr;
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
+ XLogBeginInsert();
+
+ /* Prepare WAL record */
+ STORE_STATE(&bds->spgstate, xlrec.stateSrc);
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
+ /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
+ XLogRegisterData((char *) toDelete,
+ sizeof(OffsetNumber) * xlrec.nDelete);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
PageSetLSN(page, recptr);
}
OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
OffsetNumber itemnos[MaxIndexTuplesPerPage];
spgxlogVacuumRedirect xlrec;
- XLogRecData rdata[3];
- xlrec.node = index->rd_node;
- xlrec.blkno = BufferGetBlockNumber(buffer);
xlrec.nToPlaceholder = 0;
xlrec.newestRedirectXid = InvalidTransactionId;
{
XLogRecPtr recptr;
- ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0);
- ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
- ACCEPT_RDATA_BUFFER(buffer, 2);
+ XLogBeginInsert();
+
+ XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
+ XLogRegisterData((char *) itemToPlaceholder,
+ sizeof(OffsetNumber) * xlrec.nToPlaceholder);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
- recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
+ recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
PageSetLSN(page, recptr);
}
static void
spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
- RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
Buffer buffer;
Page page;
- /* Backup blocks are not used in create_index records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
- buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
page = (Page) BufferGetPage(buffer);
SpGistInitMetapage(page);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buffer);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF);
page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
UnlockReleaseBuffer(buffer);
- buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 2, RBM_ZERO, false, &buffer);
+ Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
page = (Page) BufferGetPage(buffer);
PageSetLSN(page, lsn);
*/
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 0,
- xldata->node, xldata->blknoLeaf,
- &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 0, &buffer);
if (action == BLK_NEEDS_REDO)
{
{
/* replacing a DEAD tuple */
PageIndexTupleDelete(page, xldata->offnumLeaf);
- if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
+ if (PageAddItem(page,
+ (Item) leafTuple, leafTupleHdr.size,
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
leafTupleHdr.size);
UnlockReleaseBuffer(buffer);
/* update parent downlink if necessary */
- if (xldata->blknoParent != InvalidBlockNumber)
+ if (xldata->offnumParent != InvalidOffsetNumber)
{
- if (XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoParent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 1, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
+ BlockNumber blknoLeaf;
+
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
page = BufferGetPage(buffer);
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
- xldata->blknoLeaf, xldata->offnumLeaf);
+ blknoLeaf, xldata->offnumLeaf);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
Buffer buffer;
Page page;
XLogRedoAction action;
+ BlockNumber blknoDst;
+
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
fillFakeState(&state, xldata->stateSrc);
/* Insert tuples on the dest page (do first, so redirect is valid) */
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false, &buffer);
SpGistInitBuffer(buffer,
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoDst,
- &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
+
if (action == BLK_NEEDS_REDO)
{
int i;
* field.
*/
leafTuple = ptr;
- memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
+ memcpy(&leafTupleHdr, leafTuple,
+ sizeof(SpGistLeafTupleData));
addOrReplaceTuple(page, (Item) leafTuple,
leafTupleHdr.size, toInsert[i]);
UnlockReleaseBuffer(buffer);
/* Delete tuples from the source page, inserting a redirection pointer */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
- xldata->blknoDst,
+ blknoDst,
toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
UnlockReleaseBuffer(buffer);
/* And update the parent downlink */
- if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple tuple;
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(tuple, xldata->nodeI,
- xldata->blknoDst, toInsert[nInsert - 1]);
+ blknoDst, toInsert[nInsert - 1]);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
SpGistState state;
Buffer buffer;
Page page;
- int bbi;
XLogRedoAction action;
ptr += sizeof(spgxlogAddNode);
fillFakeState(&state, xldata->stateSrc);
- if (xldata->blknoNew == InvalidBlockNumber)
+ if (!XLogRecHasBlockRef(record, 1))
{
/* update in place */
- Assert(xldata->blknoParent == InvalidBlockNumber);
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ Assert(xldata->parentBlk == -1);
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
PageIndexTupleDelete(page, xldata->offnum);
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
- xldata->offnum, false, false) != xldata->offnum)
+ xldata->offnum,
+ false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
innerTupleHdr.size);
}
else
{
+ BlockNumber blkno;
+ BlockNumber blknoNew;
+
+ XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
+ XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
+
/*
* In normal operation we would have all three pages (source, dest,
* and parent) locked simultaneously; but in WAL replay it should be
* safe to update them one at a time, as long as we do it in the right
- * order.
+ * order. We must insert the new tuple before replacing the old tuple
+ * with the redirect tuple.
*
* The logic here depends on the assumption that blkno != blknoNew,
* else we can't tell which BKP bit goes with which page, and the LSN
- * checks could go wrong too.
+ * checks could go wrong too. XXX does this comment still make sense?
*/
- Assert(xldata->blkno != xldata->blknoNew);
+ Assert(blkno != blknoNew);
/* Install new tuple first so redirect is valid */
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
/* AddNode is not used for nulls pages */
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+ &buffer);
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoNew,
- &buffer);
+ action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
innerTupleHdr.size, xldata->offnumNew);
/*
- * If parent is in this same page, don't advance LSN; doing so
- * would fool us into not applying the parent downlink update
- * below. We'll update the LSN when we fix the parent downlink.
+ * If parent is in this same page, update it now.
*/
- if (xldata->blknoParent != xldata->blknoNew)
+ if (xldata->parentBlk == 1)
{
- PageSetLSN(page, lsn);
+ SpGistInnerTuple parentTuple;
+
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
+ PageGetItemId(page, xldata->offnumParent));
+
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
}
+ PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/* Delete old tuple, replacing it with redirect or placeholder tuple */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
SpGistDeadTuple dt;
-
page = BufferGetPage(buffer);
if (state.isBuild)
InvalidOffsetNumber);
else
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
- xldata->blknoNew,
+ blknoNew,
xldata->offnumNew);
PageIndexTupleDelete(page, xldata->offnum);
- if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
+ if (PageAddItem(page, (Item) dt, dt->size,
+ xldata->offnum,
false, false) != xldata->offnum)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
dt->size);
SpGistPageGetOpaque(page)->nRedirection++;
/*
- * If parent is in this same page, don't advance LSN; doing so
- * would fool us into not applying the parent downlink update
- * below. We'll update the LSN when we fix the parent downlink.
+ * If parent is in this same page, don't advance LSN. We'll
+ * update it when we fix the parent downlink.
*/
- if (xldata->blknoParent != xldata->blkno)
+ if (xldata->parentBlk == 0)
{
- PageSetLSN(page, lsn);
+ SpGistInnerTuple parentTuple;
+
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
+ PageGetItemId(page, xldata->offnumParent));
+
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
}
+ PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
/*
- * Update parent downlink. Since parent could be in either of the
- * previous two buffers, it's a bit tricky to determine which BKP bit
- * applies.
+ * Update parent downlink (if we didn't do it as part of the source
+ * or destination page update already).
*/
- if (xldata->blknoParent == xldata->blkno)
- bbi = 0;
- else if (xldata->blknoParent == xldata->blknoNew)
- bbi = 1;
- else
- bbi = 2;
-
- if (record->xl_info & XLR_BKP_BLOCK(bbi))
+ if (xldata->parentBlk == 2)
{
- if (bbi == 2) /* else we already did it */
- (void) RestoreBackupBlock(lsn, record, bbi, false, false);
- action = BLK_RESTORED;
- buffer = InvalidBuffer;
- }
- else
- {
- action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
- xldata->blknoParent, &buffer);
- Assert(action != BLK_RESTORED);
- }
- if (action == BLK_NEEDS_REDO)
- {
- SpGistInnerTuple innerTuple;
+ if (XLogReadBufferForRedo(lsn, record, 2, &buffer) == BLK_NEEDS_REDO)
+ {
+ SpGistInnerTuple parentTuple;
- page = BufferGetPage(buffer);
+ page = BufferGetPage(buffer);
- innerTuple = (SpGistInnerTuple) PageGetItem(page,
+ parentTuple = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
- spgUpdateNodeLink(innerTuple, xldata->nodeI,
- xldata->blknoNew, xldata->offnumNew);
+ spgUpdateNodeLink(parentTuple, xldata->nodeI,
+ blknoNew, xldata->offnumNew);
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
}
- if (BufferIsValid(buffer))
- UnlockReleaseBuffer(buffer);
}
}
SpGistInnerTupleData postfixTupleHdr;
Buffer buffer;
Page page;
+ XLogRedoAction action;
ptr += sizeof(spgxlogSplitTuple);
prefixTuple = ptr;
*/
/* insert postfix tuple first to avoid dangling link */
- if (xldata->blknoPostfix != xldata->blknoPrefix)
+ if (!xldata->postfixBlkSame)
{
- XLogRedoAction action;
-
if (xldata->newPage)
{
- buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+ &buffer);
/* SplitTuple is not used for nulls pages */
SpGistInitBuffer(buffer, 0);
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, 1,
- xldata->node, xldata->blknoPostfix,
- &buffer);
-
+ action = XLogReadBufferForRedo(lsn, record, 1, &buffer);
if (action == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
}
/* now handle the original page */
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
+
PageIndexTupleDelete(page, xldata->offnumPrefix);
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
elog(ERROR, "failed to add item of size %u to SPGiST index page",
prefixTupleHdr.size);
- if (xldata->blknoPostfix == xldata->blknoPrefix)
- addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
+ if (xldata->postfixBlkSame)
+ addOrReplaceTuple(page, (Item) postfixTuple,
+ postfixTupleHdr.size,
xldata->offnumPostfix);
PageSetLSN(page, lsn);
uint8 *leafPageSelect;
Buffer srcBuffer;
Buffer destBuffer;
+ Buffer innerBuffer;
Page srcPage;
Page destPage;
- Buffer innerBuffer;
Page page;
- int bbi;
int i;
+ BlockNumber blknoInner;
XLogRedoAction action;
+ XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
+
fillFakeState(&state, xldata->stateSrc);
ptr += SizeOfSpgxlogPickSplit;
/* now ptr points to the list of leaf tuples */
- /*
- * It's a bit tricky to identify which pages have been handled as
- * full-page images, so we explicitly count each referenced buffer.
- */
- bbi = 0;
-
- if (SpGistBlockIsRoot(xldata->blknoSrc))
+ if (xldata->isRootSplit)
{
/* when splitting root, we touch it only in the guise of new inner */
srcBuffer = InvalidBuffer;
else if (xldata->initSrc)
{
/* just re-init the source page */
- srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
- Assert(BufferIsValid(srcBuffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false,
+ &srcBuffer);
srcPage = (Page) BufferGetPage(srcBuffer);
SpGistInitBuffer(srcBuffer,
* inserting leaf tuples and the new inner tuple, else the added
* redirect tuple will be a dangling link.)
*/
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoSrc,
- &srcBuffer) == BLK_NEEDS_REDO)
+ srcPage = NULL;
+ if (XLogReadBufferForRedo(lsn, record, 0, &srcBuffer) == BLK_NEEDS_REDO)
{
srcPage = BufferGetPage(srcBuffer);
/*
- * We have it a bit easier here than in doPickSplit(), because we
- * know the inner tuple's location already, so we can inject the
- * correct redirection tuple now.
+ * We have it a bit easier here than in doPickSplit(),
+ * because we know the inner tuple's location already, so
+ * we can inject the correct redirection tuple now.
*/
if (!state.isBuild)
spgPageIndexMultiDelete(&state, srcPage,
toDelete, xldata->nDelete,
SPGIST_REDIRECT,
SPGIST_PLACEHOLDER,
- xldata->blknoInner,
+ blknoInner,
xldata->offnumInner);
else
spgPageIndexMultiDelete(&state, srcPage,
/* don't update LSN etc till we're done with it */
}
- else
- {
- srcPage = NULL; /* don't do any page updates */
- }
- bbi++;
}
/* try to access dest page if any */
- if (xldata->blknoDest == InvalidBlockNumber)
+ if (!XLogRecHasBlockRef(record, 1))
{
destBuffer = InvalidBuffer;
destPage = NULL;
else if (xldata->initDest)
{
/* just re-init the dest page */
- destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
- Assert(BufferIsValid(destBuffer));
+ XLogReadBufferForRedoExtended(lsn, record, 1, RBM_ZERO, false,
+ &destBuffer);
destPage = (Page) BufferGetPage(destBuffer);
SpGistInitBuffer(destBuffer,
* We could probably release the page lock immediately in the
* full-page-image case, but for safety let's hold it till later.
*/
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoDest,
- &destBuffer) == BLK_NEEDS_REDO)
- {
+ destPage = NULL;
+ if (XLogReadBufferForRedo(lsn, record, 1, &destBuffer) == BLK_NEEDS_REDO)
destPage = (Page) BufferGetPage(destBuffer);
- }
- else
- {
- destPage = NULL; /* don't do any page updates */
- }
- bbi++;
}
/* restore leaf tuples to src and/or dest page */
/* restore new inner tuple */
if (xldata->initInner)
{
- innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
- SpGistInitBuffer(innerBuffer,
- (xldata->storesNulls ? SPGIST_NULLS : 0));
+ XLogReadBufferForRedoExtended(lsn, record, 2, RBM_ZERO, false,
+ &innerBuffer);
+ SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
action = BLK_NEEDS_REDO;
}
else
- action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
- xldata->blknoInner, &innerBuffer);
+ action = XLogReadBufferForRedo(lsn, record, 2, &innerBuffer);
if (action == BLK_NEEDS_REDO)
{
xldata->offnumInner);
/* if inner is also parent, update link while we're here */
- if (xldata->blknoInner == xldata->blknoParent)
+ if (xldata->innerIsParent)
{
SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
- xldata->blknoInner, xldata->offnumInner);
+ blknoInner, xldata->offnumInner);
}
PageSetLSN(page, lsn);
}
if (BufferIsValid(innerBuffer))
UnlockReleaseBuffer(innerBuffer);
- bbi++;
/*
* Now we can release the leaf-page locks. It's okay to do this before
UnlockReleaseBuffer(destBuffer);
/* update parent downlink, unless we did it above */
- if (xldata->blknoParent == InvalidBlockNumber)
- {
- /* no parent cause we split the root */
- Assert(SpGistBlockIsRoot(xldata->blknoInner));
- }
- else if (xldata->blknoInner != xldata->blknoParent)
+ if (XLogRecHasBlockRef(record, 3))
{
Buffer parentBuffer;
- if (XLogReadBufferForRedo(lsn, record, bbi,
- xldata->node, xldata->blknoParent,
- &parentBuffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 3, &parentBuffer) == BLK_NEEDS_REDO)
{
SpGistInnerTuple parent;
parent = (SpGistInnerTuple) PageGetItem(page,
PageGetItemId(page, xldata->offnumParent));
spgUpdateNodeLink(parent, xldata->nodeI,
- xldata->blknoInner, xldata->offnumInner);
+ blknoInner, xldata->offnumInner);
PageSetLSN(page, lsn);
MarkBufferDirty(parentBuffer);
if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer);
}
+ else
+ Assert(xldata->innerIsParent || xldata->isRootSplit);
}
static void
ptr += sizeof(OffsetNumber) * xldata->nChain;
chainDest = (OffsetNumber *) ptr;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
toDelete = xldata->offsets;
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
page = BufferGetPage(buffer);
-
/* The tuple numbers are in order */
PageIndexMultiDelete(page, toDelete, xldata->nDelete);
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
OffsetNumber *itemToPlaceholder;
Buffer buffer;
+ Page page;
itemToPlaceholder = xldata->offsets;
if (InHotStandby)
{
if (TransactionIdIsValid(xldata->newestRedirectXid))
+ {
+ RelFileNode node;
+
+ XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
- xldata->node);
+ node);
+ }
}
- if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
- &buffer) == BLK_NEEDS_REDO)
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) == BLK_NEEDS_REDO)
{
- Page page = BufferGetPage(buffer);
- SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
+ SpGistPageOpaque opaque;
int i;
+ page = BufferGetPage(buffer);
+ opaque = SpGistPageGetOpaque(page);
+
/* Convert redirect pointers to plain placeholders */
for (i = 0; i < xldata->nToPlaceholder; i++)
{
Note that marking a buffer dirty with MarkBufferDirty() should only
happen iff you write a WAL record; see Writing Hints below.
-5. If the relation requires WAL-logging, build a WAL log record and pass it
-to XLogInsert(); then update the page's LSN using the returned XLOG
-location. For instance,
+5. If the relation requires WAL-logging, build a WAL record using
+XLogBeginInsert and XLogRegister* functions, and insert it.
+(See "Constructing a WAL record" below). Then update the page's LSN using the
+returned XLOG location. For instance,
- recptr = XLogInsert(rmgr_id, info, rdata);
+ XLogBeginInsert();
+ XLogRegisterBuffer(...)
+ XLogRegisterData(...)
+ recptr = XLogInsert(rmgr_id, info);
PageSetLSN(dp, recptr);
- // Note that we no longer do PageSetTLI() from 9.3 onwards
- // since that field on a page has now changed its meaning.
6. END_CRIT_SECTION()
7. Unlock and unpin the buffer(s).
-XLogInsert's "rdata" argument is an array of pointer/size items identifying
-chunks of data to be written in the XLOG record, plus optional shared-buffer
-IDs for chunks that are in shared buffers rather than temporary variables.
-The "rdata" array must mention (at least once) each of the shared buffers
-being modified, unless the action is such that the WAL replay routine can
-reconstruct the entire page contents. XLogInsert includes the logic that
-tests to see whether a shared buffer has been modified since the last
-checkpoint. If not, the entire page contents are logged rather than just the
-portion(s) pointed to by "rdata".
-
-Because XLogInsert drops the rdata components associated with buffers it
-chooses to log in full, the WAL replay routines normally need to test to see
-which buffers were handled that way --- otherwise they may be misled about
-what the XLOG record actually contains. XLOG records that describe multi-page
-changes therefore require some care to design: you must be certain that you
-know what data is indicated by each "BKP" bit. An example of the trickiness
-is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
-page and BKP(1) is associated with the destination page --- but if these are
-the same page, only BKP(0) would have been set.
-
-For this reason as well as the risk of deadlocking on buffer locks, it's best
-to design WAL records so that they reflect small atomic actions involving just
-one or a few pages. The current XLOG infrastructure cannot handle WAL records
-involving references to more than four shared buffers, anyway.
-
-In the case where the WAL record contains enough information to re-generate
-the entire contents of a page, do *not* show that page's buffer ID in the
-rdata array, even if some of the rdata items point into the buffer. This is
-because you don't want XLogInsert to log the whole page contents. The
-standard replay-routine pattern for this case is
-
- buffer = XLogReadBuffer(rnode, blkno, true);
- Assert(BufferIsValid(buffer));
- page = (Page) BufferGetPage(buffer);
-
- ... initialize the page ...
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
-
-In the case where the WAL record provides only enough information to
-incrementally update the page, the rdata array *must* mention the buffer
-ID at least once; otherwise there is no defense against torn-page problems.
-The standard replay-routine pattern for this case is
-
- if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
- {
- page = (Page) BufferGetPage(buffer);
-
- ... apply the change ...
-
- PageSetLSN(page, lsn);
- MarkBufferDirty(buffer);
- }
- if (BufferIsValid(buffer))
- UnlockReleaseBuffer(buffer);
-
-XLogReadBufferForRedo reads the page from disk, and checks what action needs to
-be taken to the page. If the XLR_BKP_BLOCK(N) flag is set, it restores the
-full page image and returns BLK_RESTORED. If there is no full page image, but
-page cannot be found or if the change has already been replayed (i.e. the
-page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
-respectively. Usually, the redo routine only needs to pay attention to the
-BLK_NEEDS_REDO return code, which means that the routine should apply the
-incremental change. In any case, the caller is responsible for unlocking and
-releasing the buffer. Note that XLogReadBufferForRedo returns the buffer
-locked even if no redo is required, unless the page does not exist.
-
-As noted above, for a multi-page update you need to be able to determine
-which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects
-a combination of fully-rewritable and incremental updates, then the rewritable
-pages don't count for the XLR_BKP_BLOCK(N) numbering. (XLR_BKP_BLOCK(N) is
-associated with the N'th distinct buffer ID seen in the "rdata" array, and
-per the above discussion, fully-rewritable buffers shouldn't be mentioned in
-"rdata".)
+Complex changes (such as a multilevel index insertion) normally need to be
+described by a series of atomic-action WAL records. The intermediate states
+must be self-consistent, so that if the replay is interrupted between any
+two actions, the system is fully functional. In btree indexes, for example,
+a page split requires a new page to be allocated, and an insertion of a new
+key in the parent btree level, but for locking reasons this has to be
+reflected by two separate WAL records. Replaying the first record, to
+allocate the new page and move tuples to it, sets a flag on the page to
+indicate that the key has not been inserted to the parent yet. Replaying the
+second record clears the flag. This intermediate state is never seen by
+other backends during normal operation, because the lock on the child page
+is held across the two actions, but will be seen if the operation is
+interrupted before writing the second WAL record. The search algorithm works
+with the intermediate state as normal, but if an insertion encounters a page
+with the incomplete-split flag set, it will finish the interrupted split by
+inserting the key to the parent, before proceeding.
+
+
+Constructing a WAL record
+-------------------------
+
+A WAL record consists of a header common to all WAL record types,
+record-specific data, and information about the data blocks modified. Each
+modified data block is identified by an ID number, and can optionally have
+more record-specific data associated with the block. If XLogInsert decides
+that a full-page image of a block needs to be taken, the data associated
+with that block is not included.
+
+The API for constructing a WAL record consists of five functions:
+XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
+and XLogInsert. First, call XLogBeginInsert(). Then register all the buffers
+modified, and data needed to replay the changes, using XLogRegister*
+functions. Finally, insert the constructed record to the WAL by calling
+XLogInsert().
+
+ XLogBeginInsert();
+
+ /* register buffers modified as part of this WAL-logged action */
+ XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
+
+ /* register data that is always included in the WAL record */
+ XLogRegisterData(&xlrec, SizeOfFictionalAction);
+
+ /*
+ * register data associated with a buffer. This will not be included
+ * in the record if a full-page image is taken.
+ */
+ XLogRegisterBufData(0, tuple->data, tuple->len);
+
+ /* more data associated with the buffer */
+ XLogRegisterBufData(0, data2, len2);
+
+ /*
+ * Ok, all the data and buffers to include in the WAL record have
+ * been registered. Insert the record.
+ */
+ recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
+
+Details of the API functions:
+
+void XLogBeginInsert(void)
+
+ Must be called before XLogRegisterBuffer and XLogRegisterData.
+
+void XLogResetInsertion(void)
+
+ Clear any currently registered data and buffers from the WAL record
+ construction workspace. This is only needed if you have already called
+ XLogBeginInsert(), but decide to not insert the record after all.
+
+void XLogEnsureRecordSpace(int nbuffers, int nrdatas)
+
+ Normally, the WAL record construction buffers have the following limits:
+
+ * Max 4 registered buffers
+ * Max 20 chunks of registered data
+
+ These default limits are enough for most record types that change some
+ on-disk structures. For the odd case that requires more data, or needs to
+ modify more buffers, these limits can be raised by calling
+ XLogEnsureRecordSpace(). XLogEnsureRecordSpace() must be called before
+ XLogBeginInsert(), and outside a critical section.
+
+void XLogRegisterBuffer(uint8 block_id, Buffer buf, int flags);
+
+ XLogRegisterBuffer adds information about a data block to the WAL record.
+ block_id is an arbitrary number used to identify this page reference in
+ the redo routine. The information needed to re-find the page at redo -
+ relfilenode, fork, and block number - are included in the WAL record.
+
+ XLogInsert will automatically include a full copy of the page contents, if
+ this is the first modification of the buffer since the last checkpoint.
+ It is important to register every buffer modified by the action with
+ XLogRegisterBuffer, to avoid torn-page hazards.
+
+ The flags control when and how the buffer contents are included in the
+ WAL record. Normally, a full-page image is taken only if the page has not
+ been modified since the last checkpoint, and only if full_page_writes=on
+ or an online backup is in progress. The REGBUF_FORCE_IMAGE flag can be
+ used to force a full-page image to always be included; that is useful
+ e.g. for an operation that rewrites most of the page, so that tracking the
+ details is not worth it. For the rare case where it is not necessary to
+ protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
+ full page image from being taken. REGBUF_WILL_INIT also suppresses a full
+ page image, but the redo routine must re-generate the page from scratch,
+ without looking at the old page contents. Re-initializing the page
+ protects from torn page hazards like a full page image does. If the
+ REGBUF_KEEP_DATA flag is given, the per-buffer data registered with
+ XLogRegisterBufData() is included in the WAL record even if a full-page
+ image is taken.
+
+ The REGBUF_STANDARD flag can be specified together with the other flags to
+ indicate that the page follows the standard page layout. It causes the
+ area between pd_lower and pd_upper to be left out from the image, reducing
+ WAL volume.
+
+void XLogRegisterData(char *data, int len);
+
+ XLogRegisterData is used to include arbitrary data in the WAL record. If
+ XLogRegisterData() is called multiple times, the data are appended, and
+ will be made available to the redo routine as one contiguous chunk.
+
+void XLogRegisterBufData(uint8 block_id, char *data, int len);
+
+ XLogRegisterBufData is used to include data associated with a particular
+ buffer that was registered earlier with XLogRegisterBuffer(). If
+ XLogRegisterBufData() is called multiple times with the same 'id', the
+ data are appended, and will be made available to the redo routine as one
+ contiguous chunk.
+
+ If a full-page image of the buffer is taken at insertion, the data is not
+ included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
+
+
+Writing a REDO routine
+----------------------
+
+A REDO routine uses the data and page references included in the WAL record
+to reconstruct the new state of the page. To access the data and pages
+included in the WAL record, the following functions are available:
+
+char *
+XLogRecGetData(XLogRecord *record)
+
+ Returns the "main" chunk of data included in the WAL record. That is, the
+ data included in the record with XLogRegisterData(...).
+
+XLogRedoAction
+XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
+ Buffer *buf)
+
+ Reads a block associated with the WAL record currently being replayed, with
+ the given block id. The block is read into a shared buffer and locked in
+ exclusive mode. The shared buffer is returned in *buf, or InvalidBuffer if
+ the page could not be found. Returns one of the following result codes:
+
+ BLK_NEEDS_REDO - block needs to be replayed
+ BLK_DONE - block was already replayed
+ BLK_RESTORED - block was restored from a full-page image
+ BLK_NOTFOUND - block was not found (and hence does not need to be
+ replayed
+
+ The REDO routine must redo the actions to the page if XLogReplayBuffer
+ returns BLK_NEEDS_REDO. In other cases, no further action is required,
+ although the result code can be used to distinguish the reason.
+
+ After modifying the page (if it was necessary), the REDO routine must
+ unlock and release the buffer. Note that XLogReplayBuffer locks the page
+ even if no action is required.
+
+XLogRedoAction
+XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
+ uint8 block_id, ReadBufferMode mode,
+ bool get_cleanup_lock, Buffer *buf)
+
+ Like XLogReadBufferForRedo(), but with a few extra options.
+
+ 'mode' can be passed to e.g force the page to be zeroed, instead of reading
+ it from disk. This RBM_ZERO mode should be used to re-initialize pages
+ registered in the REGBUF_WILL_INIT mode in XLogRegisterBuffer().
+
+ if 'get_cleanup_lock' is TRUE, a stronger "cleanup" lock on the page is
+ acquired, instead of a reguler exclusive-lock.
+
+char *
+XLogGetBlockData(XLogRecord *record, uint8 block_id, Size *len)
+
+ Returns a chunk of data for block with given id, included in the WAL record
+ (with XLogRegisterBufData(id, ...)). The length of the data is returned in
+ *len. This is typically used after XLogReplayBuffer() returned
+ BLK_NEEDS_REDO, to get the information required to redo the actions on the
+ page. If no data with the given id is included, perhaps because a
+ full-page image of the associated buffer was taken instead, an error is
+ thrown.
+
+bool
+XLogRecHasBlockRef(XLogRecord *record, uint8 block_id);
+
+ Returns true if the record has a block reference with given ID.
+
+void
+XLogRecGetBlockTag(XLogRecord *record, uint8 block_id, RelFileNode *rnode,
+ ForkNumber *forknum, BlockNumber *blknum);
+
+ Returns the relfilenode, fork number, and block number of the block
+ registered with the given ID.
+
When replaying a WAL record that describes changes on multiple pages, you
must be careful to lock the pages properly to prevent concurrent Hot Standby
-queries from seeing an inconsistent state. If this requires that two
-or more buffer locks be held concurrently, you must lock the pages in
-appropriate order, and not release the locks until all the changes are done.
+queries from seeing an inconsistent state.
Note that we must only use PageSetLSN/PageGetLSN() when we know the action
is serialised. Only Startup process may modify data blocks during recovery,
or be writing the data block directly rather than through shared buffers
while holding AccessExclusiveLock on the relation.
-Due to all these constraints, complex changes (such as a multilevel index
-insertion) normally need to be described by a series of atomic-action WAL
-records. The intermediate states must be self-consistent, so that if the
-replay is interrupted between any two actions, the system is fully
-functional. In btree indexes, for example, a page split requires a new page
-to be allocated, and an insertion of a new key in the parent btree level,
-but for locking reasons this has to be reflected by two separate WAL
-records. Replaying the first record, to allocate the new page and move
-tuples to it, sets a flag on the page to indicate that the key has not been
-inserted to the parent yet. Replaying the second record clears the flag.
-This intermediate state is never seen by other backends during normal
-operation, because the lock on the child page is held across the two
-actions, but will be seen if the operation is interrupted before writing
-the second WAL record. The search algorithm works with the intermediate
-state as normal, but if an insertion encounters a page with the
-incomplete-split flag set, it will finish the interrupted split by
-inserting the key to the parent, before proceeding.
Writing Hints
-------------
static void
WriteZeroPageXlogRec(int pageno)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
}
/*
static void
WriteTruncateXlogRec(int pageno)
{
- XLogRecData rdata;
XLogRecPtr recptr;
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
XLogFlush(recptr);
}
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in clog records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == CLOG_ZEROPAGE)
{
{
MultiXactId multi;
MultiXactOffset offset;
- XLogRecData rdata[2];
xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s",
* the status flags in one XLogRecData, then all the xids in another one?
* Not clear that it's worth the trouble though.
*/
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = SizeOfMultiXactCreate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
+ XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
- rdata[1].data = (char *) members;
- rdata[1].len = nmembers * sizeof(MultiXactMember);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
-
- (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
+ (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
/* Now enter the information into the OFFSETs and MEMBERs logs */
RecordNewMultiXact(multi, offset, nmembers, members);
static void
WriteMZeroPageXlogRec(int pageno, uint8 info)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&pageno);
- rdata.len = sizeof(int);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&pageno), sizeof(int));
+ (void) XLogInsert(RM_MULTIXACT_ID, info);
}
/*
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in multixact records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
/*
* During prepare, the state file is assembled in memory before writing it
- * to WAL and the actual state file. We use a chain of XLogRecData blocks
- * so that we will be able to pass the state file contents directly to
- * XLogInsert.
+ * to WAL and the actual state file. We use a chain of StateFileChunk blocks.
*/
+typedef struct StateFileChunk
+{
+ char *data;
+ uint32 len;
+ struct StateFileChunk *next;
+} StateFileChunk;
+
static struct xllist
{
- XLogRecData *head; /* first data block in the chain */
- XLogRecData *tail; /* last block in chain */
+ StateFileChunk *head; /* first data block in the chain */
+ StateFileChunk *tail; /* last block in chain */
+ uint32 num_chunks;
uint32 bytes_free; /* free bytes left in tail block */
uint32 total_len; /* total data bytes in chain */
} records;
if (padlen > records.bytes_free)
{
- records.tail->next = palloc0(sizeof(XLogRecData));
+ records.tail->next = palloc0(sizeof(StateFileChunk));
records.tail = records.tail->next;
- records.tail->buffer = InvalidBuffer;
records.tail->len = 0;
records.tail->next = NULL;
+ records.num_chunks++;
records.bytes_free = Max(padlen, 512);
records.tail->data = palloc(records.bytes_free);
SharedInvalidationMessage *invalmsgs;
/* Initialize linked list */
- records.head = palloc0(sizeof(XLogRecData));
- records.head->buffer = InvalidBuffer;
+ records.head = palloc0(sizeof(StateFileChunk));
records.head->len = 0;
records.head->next = NULL;
records.head->data = palloc(records.bytes_free);
records.tail = records.head;
+ records.num_chunks = 1;
records.total_len = 0;
TransactionId xid = pgxact->xid;
TwoPhaseFileHeader *hdr;
char path[MAXPGPATH];
- XLogRecData *record;
+ StateFileChunk *record;
pg_crc32 statefile_crc;
pg_crc32 bogus_crc;
int fd;
* We save the PREPARE record's location in the gxact for later use by
* CheckPointTwoPhase.
*/
+ XLogEnsureRecordSpace(0, records.num_chunks);
+
START_CRIT_SECTION();
MyPgXact->delayChkpt = true;
- gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
- records.head);
+ XLogBeginInsert();
+ for (record = records.head; record != NULL; record = record->next)
+ {
+ XLogRegisterData(record->data, record->len);
+ }
+ gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
XLogFlush(gxact->prepare_lsn);
/* If we crash now, we have prepared: WAL replay will fix things */
SyncRepWaitForLSN(gxact->prepare_lsn);
records.tail = records.head = NULL;
+ records.num_chunks = 0;
}
/*
SharedInvalidationMessage *invalmsgs,
bool initfileinval)
{
- XLogRecData rdata[4];
- int lastrdata = 0;
xl_xact_commit_prepared xlrec;
XLogRecPtr recptr;
xlrec.crec.nsubxacts = nchildren;
xlrec.crec.nmsgs = ninvalmsgs;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommitPrepared;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
+
/* dump cache invalidation messages */
if (ninvalmsgs > 0)
- {
- rdata[lastrdata].next = &(rdata[3]);
- rdata[3].data = (char *) invalmsgs;
- rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
- rdata[3].buffer = InvalidBuffer;
- lastrdata = 3;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) invalmsgs,
+ ninvalmsgs * sizeof(SharedInvalidationMessage));
- recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
+ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
/*
* We don't currently try to sleep before flush here ... nor is there any
int nrels,
RelFileNode *rels)
{
- XLogRecData rdata[3];
- int lastrdata = 0;
xl_xact_abort_prepared xlrec;
XLogRecPtr recptr;
xlrec.arec.xact_time = GetCurrentTimestamp();
xlrec.arec.nrels = nrels;
xlrec.arec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactAbortPrepared;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
+ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
/* Always flush, since we're about to remove the 2PC state file */
XLogFlush(recptr);
if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
log_unknown_top)
{
- XLogRecData rdata[2];
xl_xact_assignment xlrec;
/*
Assert(TransactionIdIsValid(xlrec.xtop));
xlrec.nsubxacts = nUnreportedXids;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = MinSizeOfXactAssignment;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
+ XLogRegisterData((char *) unreportedXids,
+ nUnreportedXids * sizeof(TransactionId));
- rdata[1].data = (char *) unreportedXids;
- rdata[1].len = nUnreportedXids * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
-
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
nUnreportedXids = 0;
/* mark top, not current xact as having been logged */
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
XLogLogicalInfoActive())
{
- XLogRecData rdata[4];
- int lastrdata = 0;
xl_xact_commit xlrec;
/*
xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren;
xlrec.nmsgs = nmsgs;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommit;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels,
+ nrels * sizeof(RelFileNode));
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
/* dump shared cache invalidation messages */
if (nmsgs > 0)
- {
- rdata[lastrdata].next = &(rdata[3]);
- rdata[3].data = (char *) invalMessages;
- rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
- rdata[3].buffer = InvalidBuffer;
- lastrdata = 3;
- }
- rdata[lastrdata].next = NULL;
-
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+ XLogRegisterData((char *) invalMessages,
+ nmsgs * sizeof(SharedInvalidationMessage));
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
}
else
{
- XLogRecData rdata[2];
- int lastrdata = 0;
xl_xact_commit_compact xlrec;
xlrec.xact_time = xactStopTimestamp;
xlrec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactCommitCompact;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) children;
- rdata[1].len = nchildren * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
}
}
RelFileNode *rels;
int nchildren;
TransactionId *children;
- XLogRecData rdata[3];
- int lastrdata = 0;
xl_xact_abort xlrec;
/*
}
xlrec.nrels = nrels;
xlrec.nsubxacts = nchildren;
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactAbort;
- rdata[0].buffer = InvalidBuffer;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
+
/* dump rels to delete */
if (nrels > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rels;
- rdata[1].len = nrels * sizeof(RelFileNode);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
+
/* dump committed child Xids */
if (nchildren > 0)
- {
- rdata[lastrdata].next = &(rdata[2]);
- rdata[2].data = (char *) children;
- rdata[2].len = nchildren * sizeof(TransactionId);
- rdata[2].buffer = InvalidBuffer;
- lastrdata = 2;
- }
- rdata[lastrdata].next = NULL;
+ XLogRegisterData((char *) children,
+ nchildren * sizeof(TransactionId));
- (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+ (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
/*
* Report the latest async abort LSN, so that the WAL writer knows to
AbortBufferIO();
UnlockBuffers();
+ /* Reset WAL record construction buffers */
+ XLogResetInsertion();
+
/*
* Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this.
AbortBufferIO();
UnlockBuffers();
+ /* Reset WAL record construction buffers */
+ XLogResetInsertion();
+
/*
* Also clean up any open wait for lock, since the lock manager will choke
* if we try to wait for another lock before doing this.
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in xact records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_XACT_COMMIT_COMPACT)
{
bool inserted;
XLogRecord *rechdr = (XLogRecord *) rdata->data;
bool isLogSwitch = (rechdr->xl_rmid == RM_XLOG_ID &&
- rechdr->xl_info == XLOG_SWITCH);
+ (rechdr->xl_info & XLR_RMGR_INFO_MASK) == XLOG_SWITCH);
XLogRecPtr StartPos;
XLogRecPtr EndPos;
+ /* The first chunk should contain just the record header */
+ Assert(rdata->len == SizeOfXLogRecord);
+
/* cross-check on whether we should be here or not */
if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery");
/*
- * Calculate CRC of the data, including all the backup blocks
+ * Calculate CRC of the data
*
* Note that the record header isn't added into the CRC initially since we
* don't know the prev-link yet. Thus, the CRC will represent the CRC of
if (XLOG_DEBUG)
{
StringInfoData buf;
+ StringInfoData recordbuf;
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
+ /*
+ * We have to piece together the WAL record data from the
+ * XLogRecData entries, so that we can pass it to the rm_desc
+ * function as one contiguous chunk.
+ */
+ initStringInfo(&recordbuf);
+ for (rdt = rdata; rdt != NULL; rdt = rdt->next)
+ appendBinaryStringInfo(&recordbuf, rdt->data, rdt->len);
+
initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ",
(uint32) (EndPos >> 32), (uint32) EndPos);
- xlog_outrec(&buf, rechdr);
- if (rdata->data != NULL)
- {
- StringInfoData recordbuf;
-
- /*
- * We have to piece together the WAL record data from the
- * XLogRecData entries, so that we can pass it to the rm_desc
- * function as one contiguous chunk.
- */
- initStringInfo(&recordbuf);
- appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
- for (; rdata != NULL; rdata = rdata->next)
- appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
-
- appendStringInfoString(&buf, " - ");
- xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
- }
+ xlog_outrec(&buf, (XLogRecord *) recordbuf.data);
+ appendStringInfoString(&buf, " - ");
+ xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
elog(LOG, "%s", buf.data);
MemoryContextSwitchTo(oldCxt);
XLogRecPtr CurrPos;
XLogPageHeader pagehdr;
- /* The first chunk is the record header */
- Assert(rdata->len == SizeOfXLogRecord);
-
/*
* Get a pointer to the right place in the right WAL buffer to start
* inserting to.
(void) GetRedoRecPtr();
/* Also update our copy of doPageWrites. */
doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
+
+ /* Also initialize the working areas to construct WAL records */
+ InitXLogInsert();
}
/*
CheckPoint checkPoint;
XLogRecPtr recptr;
XLogCtlInsert *Insert = &XLogCtl->Insert;
- XLogRecData rdata;
uint32 freespace;
XLogSegNo _logSegNo;
XLogRecPtr curInsert;
/*
* Now insert the checkpoint record into XLOG.
*/
- rdata.data = (char *) (&checkPoint);
- rdata.len = sizeof(checkPoint);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
recptr = XLogInsert(RM_XLOG_ID,
shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
- XLOG_CHECKPOINT_ONLINE,
- &rdata);
+ XLOG_CHECKPOINT_ONLINE);
XLogFlush(recptr);
CreateEndOfRecoveryRecord(void)
{
xl_end_of_recovery xlrec;
- XLogRecData rdata;
XLogRecPtr recptr;
/* sanity check */
START_CRIT_SECTION();
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xl_end_of_recovery);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
XLogFlush(recptr);
void
XLogPutNextOid(Oid nextOid)
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&nextOid);
- rdata.len = sizeof(Oid);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&nextOid), sizeof(Oid));
+ (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
/*
* We need not flush the NEXTOID record immediately, because any of the
RequestXLogSwitch(void)
{
XLogRecPtr RecPtr;
- XLogRecData rdata;
- /* XLOG SWITCH, alone among xlog record types, has no data */
- rdata.buffer = InvalidBuffer;
- rdata.data = NULL;
- rdata.len = 0;
- rdata.next = NULL;
-
- RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
+ /* XLOG SWITCH has no data */
+ XLogBeginInsert();
+ RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
return RecPtr;
}
XLogRestorePoint(const char *rpName)
{
XLogRecPtr RecPtr;
- XLogRecData rdata;
xl_restore_point xlrec;
xlrec.rp_time = GetCurrentTimestamp();
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xl_restore_point);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
- RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
+ RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
ereport(LOG,
(errmsg("restore point \"%s\" created at %X/%X",
*/
if (wal_level != ControlFile->wal_level || XLogIsNeeded())
{
- XLogRecData rdata;
xl_parameter_change xlrec;
XLogRecPtr recptr;
xlrec.wal_level = wal_level;
xlrec.wal_log_hints = wal_log_hints;
- rdata.buffer = InvalidBuffer;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
- recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
XLogFlush(recptr);
}
*/
if (XLogStandbyInfoActive() && !RecoveryInProgress())
{
- XLogRecData rdata;
-
- rdata.data = (char *) (&fullPageWrites);
- rdata.len = sizeof(bool);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
- XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
+ XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
}
if (!fullPageWrites)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
- /* Backup blocks are not used by XLOG rmgr */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ /* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
+ Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
if (info == XLOG_NEXTOID)
{
}
else if (info == XLOG_FPI)
{
- char *data;
- BkpBlock bkpb;
+ Buffer buffer;
/*
- * Full-page image (FPI) records contain a backup block stored
- * "inline" in the normal data since the locking when writing hint
- * records isn't sufficient to use the normal backup block mechanism,
- * which assumes exclusive lock on the buffer supplied.
+ * Full-page image (FPI) records contain nothing else but a backup
+ * block. The block reference must include a full-page image -
+ * otherwise there would be no point in this record.
*
* Since the only change in these backup block are hint bits, there
* are no recovery conflicts generated.
* smgr implementation has no need to implement anything. Which means
* nothing is needed in md.c etc
*/
- data = XLogRecGetData(record);
- memcpy(&bkpb, data, sizeof(BkpBlock));
- data += sizeof(BkpBlock);
-
- RestoreBackupBlockContents(lsn, bkpb, data, false, false);
+ if (XLogReadBufferForRedo(lsn, record, 0, &buffer) != BLK_RESTORED)
+ elog(ERROR, "unexpected XLogOpenBuffer result when restoring backup block");
+ UnlockReleaseBuffer(buffer);
}
else if (info == XLOG_BACKUP_END)
{
xlog_outrec(StringInfo buf, XLogRecord *record)
{
int i;
+ uint8 *blockrefids;
+ int nblockrefs;
appendStringInfo(buf, "prev %X/%X; xid %u",
(uint32) (record->xl_prev >> 32),
appendStringInfo(buf, "; len %u",
record->xl_len);
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- if (record->xl_info & XLR_BKP_BLOCK(i))
- appendStringInfo(buf, "; bkpb%d", i);
+ /* decode block references */
+ blockrefids = XLogRecGetBlockRefIds(record, &nblockrefs);
+ for (i = 0; i < nblockrefs; i++)
+ {
+ uint8 id = blockrefids[i];
+ XLogRecordBlockData *bkpb = XLogRecGetBlockRef(record, id, NULL);
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blk ;
+
+ XLogRecGetBlockTag(record, id, &rnode, &forknum, &blk);
+ if (forknum != MAIN_FORKNUM)
+ appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
+ id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forknum,
+ blk);
+ else
+ appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
+ id,
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ blk);
+ if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+ appendStringInfo(buf, " FPW");
}
}
#endif /* WAL_DEBUG */
XLogRecPtr startpoint;
XLogRecPtr stoppoint;
TimeLineID stoptli;
- XLogRecData rdata;
pg_time_t stamp_time;
char strfbuf[128];
char histfilepath[MAXPGPATH];
/*
* Write the backup-end xlog record
*/
- rdata.data = (char *) (&startpoint);
- rdata.len = sizeof(startpoint);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
- stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
+ stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
stoptli = ThisTimeLineID;
/*
* xloginsert.c
* Functions for constructing WAL records
*
+ * Constructing a WAL record begins with a call to XLogBeginInsert,
+ * followed by a number of XLogRegister* calls. The registered data is
+ * collected in private working memory, and finally assembled into a chain
+ * of XLogRecData structs by a call to XLogRecordAssemble(). See
+ * access/transam/README for details.
+ *
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
#include "utils/memutils.h"
#include "pg_trace.h"
+/*
+ * For each block reference registered with XLogRegisterBuffer, we fill in
+ * a registered_buffer struct.
+ */
+typedef struct
+{
+ RelFileNode rnode; /* identifies the relation and block */
+ ForkNumber forkno;
+ BlockNumber block;
+ Page page; /* page content */
+ uint8 block_id; /* ID the buffer was registered with */
+ uint8 flags; /* REGBUF_* flags */
+ uint32 rdata_sz; /* total length of data in rdata chain */
+ XLogRecData *rdata_head; /* head of the chain of data registered with
+ * this block */
+ XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if
+ * empty */
+
+ /* temporary areas used by XLogRecordAssemble() */
+ XLogRecordBlockData dtbuf_xlg;
+ char padding[MAXIMUM_ALIGNOF];
+ XLogRecordBlockImage dtbuf_blkimg;
+} registered_buffer;
+
+static registered_buffer *registered_buffers;
+static int num_registered_buffers = 0; /* # of structs in use */
+static int max_registered_buffers; /* allocated size */
+
+/*
+ * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
+ * with XLogRegisterData(...).
+ */
+static XLogRecData *mainrdata_head;
+static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
+static int mainrdata_len; /* total # of bytes in chain */
+
+/*
+ * These are used to hold the record header while constructing a record.
+ * 'rechdr' is not a plain variable, but is palloc at initialization,
+ * because we want it to be MAXALIGNed and padding bytes zeroed.
+ */
+static XLogRecData hdr_rdt;
+static XLogRecord *rechdr;
+
+/*
+ * An array of XLogRecData structs, for assembling a WAL record. num_rdatas
+ * is the number of entries currently used, and allocated_rdatas is the
+ * allocated size of the array. max_rdatas is the number of entries that can
+ * be used for WAL data, the rest are reserved for use in XLogRecordAssemble
+ * for buffer information.
+ */
+static XLogRecData *rdatas;
+static int num_rdatas;
+static int max_rdatas;
+static int allocated_rdatas;
+
+/*
+ * XLogRecordAssemble() needs at most 5 XLogRecData entries for each
+ * registered buffer: XLogRecordBlockData, XLogRecordBlockImage, block
+ * content before "hole", block content after "hole", and padding between
+ * full-page image and block data if both are included.
+ */
+#define EXTRA_RDATAS_PER_BUFFER 5
+
+static bool begininsert_called = false;
+
+/* Memory context to hold the registered buffer and data references. */
+static MemoryContext xloginsert_cxt;
+
static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
- XLogRecData *rdata,
XLogRecPtr RedoRecPtr, bool doPageWrites,
- XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal);
-static void XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb);
+ XLogRecPtr *fpw_lsn);
+
+/*
+ * Begin constructing a WAL record. This must be called before the
+ * XLogRegister* functions and XLogInsert().
+ */
+void
+XLogBeginInsert(void)
+{
+ Assert(num_registered_buffers == 0);
+ Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
+ Assert(mainrdata_len == 0);
+ Assert(!begininsert_called);
+
+ /* cross-check on whether we should be here or not */
+ if (!XLogInsertAllowed())
+ elog(ERROR, "cannot make new WAL entries during recovery");
+
+ begininsert_called = true;
+}
+
+/*
+ * Ensure that there are enough buffer and data slots in the working area,
+ * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
+ * calls.
+ *
+ * There is always space for a small number of buffers and data chunks, enough
+ * for most record types. This function is for the exceptional cases that need
+ * more.
+ */
+void
+XLogEnsureRecordSpace(int nbuffers, int ndatas)
+{
+ int extrardatas;
+ int needrdatas;
+
+ /*
+ * This must be called before entering a critical section, because
+ * allocating memory inside a critical section can fail. repalloc() will
+ * check the same, but better to check it here too so that we fail
+ * consistently even if the arrays happen to be large enough already.
+ */
+ Assert(CritSectionCount == 0);
+
+ /* the minimum values can't be decreased */
+ if (nbuffers < XLR_NORMAL_BKP_BLOCKS)
+ nbuffers = XLR_NORMAL_BKP_BLOCKS;
+ if (ndatas < XLR_NORMAL_RDATAS)
+ ndatas = XLR_NORMAL_RDATAS;
+
+ if (nbuffers > XLR_MAX_BKP_BLOCKS)
+ elog(ERROR, "maximum number of WAL record block references exceeded");
+
+ if (nbuffers > max_registered_buffers)
+ {
+ registered_buffers = (registered_buffer *)
+ repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
+
+ /*
+ * At least the padding bytes in the structs must be zeroed, because
+ * they are included in WAL data, but initialize it all for tidiness.
+ */
+ MemSet(®istered_buffers[max_registered_buffers], 0,
+ (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
+ max_registered_buffers = nbuffers;
+ }
+
+ extrardatas = EXTRA_RDATAS_PER_BUFFER * max_registered_buffers;
+ needrdatas = ndatas + extrardatas;
+ if (needrdatas > allocated_rdatas)
+ {
+ rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * needrdatas);
+ allocated_rdatas = needrdatas;
+ }
+ max_rdatas = allocated_rdatas - extrardatas;
+}
+
+/*
+ * Reset WAL record construction buffers.
+ */
+void
+XLogResetInsertion(void)
+{
+ num_rdatas = 0;
+ num_registered_buffers = 0;
+ mainrdata_len = 0;
+ mainrdata_last = (XLogRecData *) &mainrdata_head;
+ begininsert_called = false;
+}
+
+/*
+ * Register a reference to a buffer with the WAL record being constructed.
+ * This must be called for every page that the WAL-logged operation modifies.
+ */
+void
+XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
+{
+ registered_buffer *regbuf;
+
+ /* WILL_INIT and NO_IMAGE don't make sense with FORCE_IMAGE */
+ Assert(!((flags & REGBUF_FORCE_IMAGE) &&
+ (flags & (REGBUF_WILL_INIT | REGBUF_NO_IMAGE))));
+ Assert(begininsert_called);
+
+ if (num_registered_buffers > max_registered_buffers)
+ elog(ERROR, "too many registered buffers");
+
+ regbuf = ®istered_buffers[num_registered_buffers];
+
+ regbuf->block_id = block_id;
+ BufferGetTag(buffer, ®buf->rnode, ®buf->forkno, ®buf->block);
+ regbuf->page = BufferGetPage(buffer);
+ regbuf->flags = flags;
+ regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
+ regbuf->rdata_sz = 0;
+
+ num_registered_buffers++;
+}
+
+/*
+ * Register a block reference for a block that's not in the shared buffer pool.
+ */
+void
+XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
+ BlockNumber blknum, Page page, uint8 flags)
+{
+ registered_buffer *regbuf;
+
+ if (num_registered_buffers > max_registered_buffers)
+ elog(ERROR, "too many registered buffers");
+
+ /* This is currently only used to WAL-log a full-page image of a page */
+ Assert(flags & REGBUF_FORCE_IMAGE);
+
+ regbuf = ®istered_buffers[num_registered_buffers];
+
+ Assert(begininsert_called);
+
+ regbuf->block_id = block_id;
+ regbuf->rnode = *rnode;
+ regbuf->forkno = forknum;
+ regbuf->block = blknum;
+ regbuf->page = page;
+ regbuf->flags = flags;
+ regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
+ regbuf->rdata_sz = 0;
+
+ num_registered_buffers++;
+}
+
+/*
+ * Add data to the WAL record that's being constructed.
+ *
+ * The data is appended to the "main chunk", available at replay with
+ * XLogGetRecdata().
+ */
+void
+XLogRegisterData(char *data, int len)
+{
+ XLogRecData *rdata;
+
+ Assert(begininsert_called);
+
+ if (num_rdatas >= max_rdatas)
+ elog(ERROR, "too much WAL data");
+ rdata = &rdatas[num_rdatas++];
+
+ rdata->data = data;
+ rdata->len = len;
+ /*
+ * we use the mainrdata_last pointer to track the end of the chain,
+ * so no need to clear 'next' here.
+ */
+
+ mainrdata_last->next = rdata;
+ mainrdata_last = rdata;
+
+ mainrdata_len += len;
+}
/*
- * Insert an XLOG record having the specified RMID and info bytes,
- * with the body of the record being the data chunk(s) described by
- * the rdata chain (see xloginsert.h for notes about rdata).
+ * Add buffer-specific data to the WAL record that's being constructed.
+ *
+ * Block_id must reference a block previously registered with
+ * XLogRegisterBuffer(), and the data will be omitted from the WAL record if
+ * a full image of the page is included.
+ */
+void
+XLogRegisterBufData(uint8 block_id, char *data, int len)
+{
+ registered_buffer *regbuf;
+ XLogRecData *rdata;
+ int i;
+
+ Assert(begininsert_called);
+
+ /* find the registered buffer struct */
+ for (i = 0; i < num_registered_buffers; i++)
+ {
+ if (registered_buffers[i].block_id == block_id)
+ break;
+ }
+ if (i >= num_registered_buffers)
+ elog(ERROR, "no block with id %d registered with WAL insertion",
+ block_id);
+ regbuf = ®istered_buffers[i];
+
+ if (num_rdatas >= max_rdatas)
+ elog(ERROR, "too much WAL data");
+ rdata = &rdatas[num_rdatas++];
+
+ rdata->data = data;
+ rdata->len = len;
+
+ regbuf->rdata_tail->next = rdata;
+ regbuf->rdata_tail = rdata;
+ regbuf->rdata_sz += len;
+}
+
+/*
+ * Insert an XLOG record having the specified RMID and info bytes, with the
+ * body of the record being the data and buffer references registered earlier
+ * with XLogRegister* calls.
*
* Returns XLOG pointer to end of record (beginning of next record).
* This can be used as LSN for data pages affected by the logged action.
* (LSN is the XLOG point up to which the XLOG must be flushed to disk
* before the data page can be written out. This implements the basic
* WAL rule "write the log before the data".)
- *
- * NB: this routine feels free to scribble on the XLogRecData structs,
- * though not on the data they reference. This is OK since the XLogRecData
- * structs are always just temporaries in the calling code.
*/
XLogRecPtr
-XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
+XLogInsert(RmgrId rmid, uint8 info)
{
- XLogRecPtr RedoRecPtr;
- bool doPageWrites;
XLogRecPtr EndPos;
- XLogRecPtr fpw_lsn;
- XLogRecData *rdt;
- XLogRecData *rdt_lastnormal;
+
+ /* XLogBeginInsert() must have been called. */
+ if (!begininsert_called)
+ elog(ERROR, "XLogBeginInsert was not called");
/* info's high bits are reserved for use by me */
if (info & XLR_INFO_MASK)
*/
if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
{
+ XLogResetInsertion();
EndPos = SizeOfXLogLongPHD; /* start of 1st chkpt record */
return EndPos;
}
- /*
- * Get values needed to decide whether to do full-page writes. Since we
- * don't yet have an insertion lock, these could change under us, but
- * XLogInsertRecord will recheck them once it has a lock.
- */
- GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
-
- /*
- * Assemble an XLogRecData chain representing the WAL record, including
- * any backup blocks needed.
- *
- * We may have to loop back to here if a race condition is detected in
- * XLogInsertRecord. We could prevent the race by doing all this work
- * while holding an insertion lock, but it seems better to avoid doing CRC
- * calculations while holding one.
- */
-retry:
- rdt = XLogRecordAssemble(rmid, info, rdata, RedoRecPtr, doPageWrites,
- &fpw_lsn, &rdt_lastnormal);
-
- EndPos = XLogInsertRecord(rdt, fpw_lsn);
-
- if (EndPos == InvalidXLogRecPtr)
+ do
{
+ XLogRecPtr RedoRecPtr;
+ bool doPageWrites;
+ XLogRecPtr fpw_lsn;
+ XLogRecData *rdt;
+
/*
- * Undo the changes we made to the rdata chain, and retry.
- *
- * XXX: This doesn't undo *all* the changes; the XLogRecData
- * entries for buffers that we had already decided to back up have
- * had their data-pointers cleared. That's OK, as long as we
- * decide to back them up on the next iteration as well. Hence,
- * don't allow "doPageWrites" value to go from true to false after
- * we've modified the rdata chain.
+ * Get values needed to decide whether to do full-page writes. Since
+ * we don't yet have an insertion lock, these could change under us,
+ * but XLogInsertRecData will recheck them once it has a lock.
*/
- bool newDoPageWrites;
+ GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
- GetFullPageWriteInfo(&RedoRecPtr, &newDoPageWrites);
- doPageWrites = doPageWrites || newDoPageWrites;
- rdt_lastnormal->next = NULL;
+ rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
+ &fpw_lsn);
- goto retry;
- }
+ EndPos = XLogInsertRecord(rdt, fpw_lsn);
+ } while (EndPos == InvalidXLogRecPtr);
+
+ XLogResetInsertion();
return EndPos;
}
/*
- * Assemble a full WAL record, including backup blocks, from an XLogRecData
- * chain, ready for insertion with XLogInsertRecord(). The record header
- * fields are filled in, except for the xl_prev field and CRC.
+ * Assemble a WAL record from the registered data and buffers into an
+ * XLogRecData chain, ready for insertion with XLogInsertRecord().
*
- * The rdata chain is modified, adding entries for full-page images.
- * *rdt_lastnormal is set to point to the last normal (ie. not added by
- * this function) entry. It can be used to reset the chain to its original
- * state.
+ * The record header fields are filled in, except for the xl_prev field and
+ * CRC.
*
- * If the rdata chain contains any buffer references, and a full-page image
- * was not taken of all the buffers, *fpw_lsn is set to the lowest LSN among
- * such pages. This signals that the assembled record is only good for
- * insertion on the assumption that the RedoRecPtr and doPageWrites values
- * were up-to-date.
+ * If there are any registered buffers, and a full-page image was not taken
+ * of all them, *page_writes_omitted is set to true. This signals that the
+ * assembled record is only good for insertion on the assumption that the
+ * RedoRecPtr and doPageWrites values were up-to-date.
*/
static XLogRecData *
-XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata,
+XLogRecordAssemble(RmgrId rmid, uint8 info,
XLogRecPtr RedoRecPtr, bool doPageWrites,
- XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal)
+ XLogRecPtr *fpw_lsn)
{
- bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
XLogRecData *rdt;
- Buffer dtbuf[XLR_MAX_BKP_BLOCKS];
- bool dtbuf_bkp[XLR_MAX_BKP_BLOCKS];
- uint32 len,
- total_len;
- unsigned i;
+ uint32 total_len;
+ int i;
+ int used_rdatas = num_rdatas;
/*
- * These need to be static because they are returned to the caller as part
- * of the XLogRecData chain.
+ * Note: this function can be called multiple times for the same record.
+ * All the modifications we do to the rdata chains below must handle that.
*/
- static BkpBlock dtbuf_xlg[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt1[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt2[XLR_MAX_BKP_BLOCKS];
- static XLogRecData dtbuf_rdt3[XLR_MAX_BKP_BLOCKS];
- static XLogRecData hdr_rdt;
- static XLogRecord *rechdr;
-
- if (rechdr == NULL)
- {
- static char rechdrbuf[SizeOfXLogRecord + MAXIMUM_ALIGNOF];
-
- rechdr = (XLogRecord *) MAXALIGN(&rechdrbuf);
- MemSet(rechdr, 0, SizeOfXLogRecord);
- }
/* The record begins with the header */
hdr_rdt.data = (char *) rechdr;
hdr_rdt.len = SizeOfXLogRecord;
- hdr_rdt.next = rdata;
+ rdt = &hdr_rdt;
total_len = SizeOfXLogRecord;
- /*
- * Here we scan the rdata chain, to determine which buffers must be backed
- * up.
- *
- * We add entries for backup blocks to the chain, so that they don't need
- * any special treatment in the critical section where the chunks are
- * copied into the WAL buffers. Those entries have to be unlinked from the
- * chain if we have to loop back here.
- */
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ /* followed by main data, if any */
+ if (mainrdata_len > 0)
{
- dtbuf[i] = InvalidBuffer;
- dtbuf_bkp[i] = false;
+ rdt->next = mainrdata_head;
+ rdt = mainrdata_last;
+ total_len += mainrdata_len;
+ }
+ else
+ {
+ /*
+ * If there's no main data, also set the XLR_NO_RMGR_DATA flag. This
+ * provides a useful bit of extra error checking in ReadRecord.
+ */
+ info |= XLR_NO_RMGR_DATA;
}
+ /*
+ * Make additional rdata chain entries for the backup blocks.
+ */
*fpw_lsn = InvalidXLogRecPtr;
- len = 0;
- for (rdt = rdata;;)
+ for (i = 0; i < num_registered_buffers; i++)
{
- if (rdt->buffer == InvalidBuffer)
- {
- /* Simple data, just include it */
- len += rdt->len;
- }
+ registered_buffer *regbuf = ®istered_buffers[i];
+ XLogRecordBlockData *bkpb = ®buf->dtbuf_xlg;
+ bool needs_backup;
+ bool needs_data;
+ int padlen;
+
+ /* Determine if this block needs to be backed up */
+ if (regbuf->flags & REGBUF_FORCE_IMAGE)
+ needs_backup = true;
+ else if (regbuf->flags & (REGBUF_WILL_INIT | REGBUF_NO_IMAGE))
+ needs_backup = false;
+ else if (!doPageWrites)
+ needs_backup = false;
else
{
- /* Find info for buffer */
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ /*
+ * We assume page LSN is first data on *every* page that can be
+ * passed to XLogInsert, whether it has the standard page layout
+ * or not.
+ */
+ XLogRecPtr page_lsn = PageGetLSN(regbuf->page);
+
+ needs_backup = (page_lsn <= RedoRecPtr);
+ if (!needs_backup)
{
- if (rdt->buffer == dtbuf[i])
- {
- /* Buffer already referenced by earlier chain item */
- if (dtbuf_bkp[i])
- {
- rdt->data = NULL;
- rdt->len = 0;
- }
- else if (rdt->data)
- len += rdt->len;
- break;
- }
- if (dtbuf[i] == InvalidBuffer)
- {
- /* OK, put it in this slot */
- XLogRecPtr page_lsn;
- bool needs_backup;
-
- dtbuf[i] = rdt->buffer;
-
- /*
- * Determine whether the buffer has to be backed up.
- *
- * We assume page LSN is first data on *every* page that
- * can be passed to XLogInsert, whether it has the
- * standard page layout or not. We don't need to take the
- * buffer header lock for PageGetLSN because we hold an
- * exclusive lock on the page and/or the relation.
- */
- page_lsn = PageGetLSN(BufferGetPage(rdt->buffer));
- if (!doPageWrites)
- needs_backup = false;
- else if (page_lsn <= RedoRecPtr)
- needs_backup = true;
- else
- needs_backup = false;
-
- if (needs_backup)
- {
- /*
- * The page needs to be backed up, so set up BkpBlock
- */
- XLogFillBkpBlock(rdt->buffer, rdt->buffer_std,
- &(dtbuf_xlg[i]));
- dtbuf_bkp[i] = true;
- rdt->data = NULL;
- rdt->len = 0;
- }
- else
- {
- if (rdt->data)
- len += rdt->len;
- if (*fpw_lsn == InvalidXLogRecPtr ||
- page_lsn < *fpw_lsn)
- {
- *fpw_lsn = page_lsn;
- }
- }
- break;
- }
+ if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
+ *fpw_lsn = page_lsn;
}
- if (i >= XLR_MAX_BKP_BLOCKS)
- elog(PANIC, "can backup at most %d blocks per xlog record",
- XLR_MAX_BKP_BLOCKS);
}
- /* Break out of loop when rdt points to last chain item */
- if (rdt->next == NULL)
- break;
+
+ /* Determine if the buffer data needs to included */
+ if (regbuf->rdata_sz == 0)
+ needs_data = false;
+ else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
+ needs_data = true;
+ else
+ needs_data = !needs_backup;
+
+ /*
+ * Construct an XLogRecordBlockData struct for this block reference,
+ * and an XLogRecData to point to it.
+ */
+ rdt->next = &rdatas[used_rdatas++];
rdt = rdt->next;
- }
- total_len += len;
- /*
- * Make additional rdata chain entries for the backup blocks, so that we
- * don't need to special-case them in the write loop. This modifies the
- * original rdata chain, but we keep a pointer to the last regular entry,
- * rdt_lastnormal, so that we can undo this if we have to start over.
- *
- * At the exit of this loop, total_len includes the backup block data.
- *
- * Also set the appropriate info bits to show which buffers were backed
- * up. The XLR_BKP_BLOCK(N) bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- */
- *rdt_lastnormal = rdt;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
- {
- BkpBlock *bkpb;
- char *page;
+ bkpb->id = regbuf->block_id;
+ bkpb->fork_flags = (uint8) regbuf->forkno;
+ bkpb->block = regbuf->block;
- if (!dtbuf_bkp[i])
- continue;
+ if (i > 0 && RelFileNodeEquals(bkpb->node, registered_buffers[i - 1].rnode))
+ {
+ bkpb->fork_flags |= BKPBLOCK_SAME_REL;
- info |= XLR_BKP_BLOCK(i);
+ /*
+ * Zero the relation information. It's not nominally included, but
+ * might end up in the record as padding bytes anyway.
+ */
+ bkpb->node.spcNode = 0;
+ bkpb->node.dbNode = 0;
+ bkpb->node.relNode = 0;
- bkpb = &(dtbuf_xlg[i]);
- page = (char *) BufferGetBlock(dtbuf[i]);
+ rdt->len = SizeOfXLogRecordBlockDataSameRel;
+ }
+ else
+ {
+ bkpb->node = regbuf->rnode;
- rdt->next = &(dtbuf_rdt1[i]);
- rdt = rdt->next;
+ rdt->len = SizeOfXLogRecordBlockData;
+ }
- rdt->data = (char *) bkpb;
- rdt->len = sizeof(BkpBlock);
- total_len += sizeof(BkpBlock);
+ if (regbuf->flags & REGBUF_WILL_INIT)
+ bkpb->fork_flags |= BKPBLOCK_WILL_INIT;
- rdt->next = &(dtbuf_rdt2[i]);
- rdt = rdt->next;
+ rdt->data = (char *) bkpb;
+ total_len += rdt->len;
- if (bkpb->hole_length == 0)
+ /*
+ * Add padding, so that the actual block data is MAXALIGNed. (We take
+ * advantage of the fact that we have reserved some padding bytes
+ * immediately after the XLogRecordBlockData struct, so we can just
+ * make this XLogRecData entry larger, instead of adding a new entry.
+ * This saves a few cycles when assembling the final record)
+ */
+ if ((needs_backup || needs_data) && (total_len % MAXIMUM_ALIGNOF) != 0)
{
- rdt->data = page;
- rdt->len = BLCKSZ;
- total_len += BLCKSZ;
- rdt->next = NULL;
+ padlen = MAXIMUM_ALIGNOF - (total_len % MAXIMUM_ALIGNOF);
+ rdt->len += padlen;
+ total_len += padlen;
}
- else
+
+ bkpb->data_length = 0;
+
+ if (needs_backup)
{
- /* must skip the hole */
- rdt->data = page;
- rdt->len = bkpb->hole_offset;
- total_len += bkpb->hole_offset;
+ Page page = regbuf->page;
+ XLogRecordBlockImage *bimg = ®buf->dtbuf_blkimg;
- rdt->next = &(dtbuf_rdt3[i]);
+ /*
+ * The page needs to be backed up, so set up *bimg
+ */
+ if (regbuf->flags & REGBUF_STANDARD)
+ {
+ /* Assume we can omit data between pd_lower and pd_upper */
+ uint16 lower = ((PageHeader) page)->pd_lower;
+ uint16 upper = ((PageHeader) page)->pd_upper;
+
+ if (lower >= SizeOfPageHeaderData &&
+ upper > lower &&
+ upper <= BLCKSZ)
+ {
+ bimg->hole_offset = lower;
+ bimg->hole_length = upper - lower;
+ }
+ else
+ {
+ /* No "hole" to compress out */
+ bimg->hole_offset = 0;
+ bimg->hole_length = 0;
+ }
+ }
+ else
+ {
+ /* Not a standard page header, don't try to eliminate "hole" */
+ bimg->hole_offset = 0;
+ bimg->hole_length = 0;
+ }
+
+ /* Fill in the remaining fields in the XLogRecordBlockData struct */
+ bkpb->fork_flags |= BKPBLOCK_HAS_IMAGE;
+
+ bkpb->data_length += sizeof(XLogRecordBlockImage) + BLCKSZ - bimg->hole_length;
+
+ /*
+ * Construct XLogRecData entries for the XLogRecordBlockImage
+ * struct and the page content.
+ */
+ rdt->next = &rdatas[used_rdatas++];
+ rdt = rdt->next;
+ rdt->data = (char *) bimg;
+ rdt->len = sizeof(XLogRecordBlockImage);
+
+ rdt->next = &rdatas[used_rdatas++];
rdt = rdt->next;
+ if (bimg->hole_length == 0)
+ {
+ rdt->data = page;
+ rdt->len = BLCKSZ;
+ }
+ else
+ {
+ /* must skip the hole */
+ rdt->data = page;
+ rdt->len = bimg->hole_offset;
+
+ rdt->next = &rdatas[used_rdatas++];
+ rdt = rdt->next;
- rdt->data = page + (bkpb->hole_offset + bkpb->hole_length);
- rdt->len = BLCKSZ - (bkpb->hole_offset + bkpb->hole_length);
- total_len += rdt->len;
- rdt->next = NULL;
+ rdt->data = page + (bimg->hole_offset + bimg->hole_length);
+ rdt->len = BLCKSZ - (bimg->hole_offset + bimg->hole_length);
+ }
+
+ /*
+ * If we have per-block data in addition to the full-page iamge,
+ * add padding to make it aligned.
+ */
+ if (needs_data && (bkpb->data_length % MAXIMUM_ALIGNOF) != 0)
+ {
+ padlen = MAXIMUM_ALIGNOF - (bkpb->data_length % MAXIMUM_ALIGNOF);
+
+ rdt->next = &rdatas[used_rdatas++];
+ rdt = rdt->next;
+
+ rdt->data = regbuf->padding;
+ rdt->len = padlen;
+
+ bkpb->data_length += padlen;
+ }
}
+
+ if (needs_data)
+ {
+ /*
+ * Link the caller-supplied rdata chain for this buffer to the
+ * overall list.
+ */
+ bkpb->fork_flags |= BKPBLOCK_HAS_DATA;
+ bkpb->data_length += regbuf->rdata_sz;
+
+ rdt->next = regbuf->rdata_head;
+ rdt = regbuf->rdata_tail;
+ }
+
+ total_len += bkpb->data_length;
}
+ Assert(used_rdatas <= allocated_rdatas);
- /*
- * We disallow len == 0 because it provides a useful bit of extra error
- * checking in ReadRecord. This means that all callers of XLogInsert
- * must supply at least some not-in-a-buffer data. However, we make an
- * exception for XLOG SWITCH records because we don't want them to ever
- * cross a segment boundary.
- */
- if (len == 0 && !isLogSwitch)
- elog(PANIC, "invalid xlog record length %u", rechdr->xl_len);
+ rdt->next = NULL;
/*
* Fill in the fields in the record header. Prev-link is filled in later,
*/
rechdr->xl_xid = GetCurrentTransactionIdIfAny();
rechdr->xl_tot_len = total_len;
- rechdr->xl_len = len; /* doesn't include backup blocks */
+ rechdr->xl_len = mainrdata_len; /* doesn't include backup blocks */
rechdr->xl_info = info;
rechdr->xl_rmid = rmid;
rechdr->xl_prev = InvalidXLogRecPtr;
*/
RedoRecPtr = GetRedoRecPtr();
- /*
- * We assume page LSN is first data on *every* page that can be passed to
- * XLogInsert, whether it has the standard page layout or not. Since we're
- * only holding a share-lock on the page, we must take the buffer header
- * lock when we look at the LSN.
- */
+ /*
+ * We assume page LSN is first data on *every* page that can be passed to
+ * XLogInsert, whether it has the standard page layout or not. Since we're
+ * only holding a share-lock on the page, we must take the buffer header
+ * lock when we look at the LSN.
+ */
lsn = BufferGetLSNAtomic(buffer);
if (lsn <= RedoRecPtr)
{
- XLogRecData rdata[2];
- BkpBlock bkpb;
+ int flags;
char copied_buffer[BLCKSZ];
char *origdata = (char *) BufferGetBlock(buffer);
-
- /* Make a BkpBlock struct representing the buffer */
- XLogFillBkpBlock(buffer, buffer_std, &bkpb);
+ RelFileNode rnode;
+ ForkNumber forkno;
+ BlockNumber blkno;
/*
* Copy buffer so we don't have to worry about concurrent hint bit or
* lsn updates. We assume pd_lower/upper cannot be changed without an
* exclusive lock, so the contents bkp are not racy.
- *
- * With buffer_std set to false, XLogFillBkpBlock() sets hole_length
- * and hole_offset to 0; so the following code is safe for either
- * case.
*/
- memcpy(copied_buffer, origdata, bkpb.hole_offset);
- memcpy(copied_buffer + bkpb.hole_offset,
- origdata + bkpb.hole_offset + bkpb.hole_length,
- BLCKSZ - bkpb.hole_offset - bkpb.hole_length);
+ if (buffer_std)
+ {
+ /* Assume we can omit data between pd_lower and pd_upper */
+ Page page = BufferGetPage(buffer);
+ uint16 lower = ((PageHeader) page)->pd_lower;
+ uint16 upper = ((PageHeader) page)->pd_upper;
- /*
- * Header for backup block.
- */
- rdata[0].data = (char *) &bkpb;
- rdata[0].len = sizeof(BkpBlock);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ memcpy(copied_buffer, origdata, lower);
+ memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
+ }
+ else
+ memcpy(copied_buffer, origdata, BLCKSZ);
- /*
- * Save copy of the buffer.
- */
- rdata[1].data = copied_buffer;
- rdata[1].len = BLCKSZ - bkpb.hole_length;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
- recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+ flags = REGBUF_FORCE_IMAGE;
+ if (buffer_std)
+ flags |= REGBUF_STANDARD;
+
+ BufferGetTag(buffer, &rnode, &forkno, &blkno);
+
+ XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
+
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
}
return recptr;
log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
Page page, bool page_std)
{
- BkpBlock bkpb;
+ int flags;
XLogRecPtr recptr;
- XLogRecData rdata[3];
-
- /* NO ELOG(ERROR) from here till newpage op is logged */
- START_CRIT_SECTION();
-
- bkpb.node = *rnode;
- bkpb.fork = forkNum;
- bkpb.block = blkno;
+ flags = REGBUF_FORCE_IMAGE;
if (page_std)
- {
- /* Assume we can omit data between pd_lower and pd_upper */
- uint16 lower = ((PageHeader) page)->pd_lower;
- uint16 upper = ((PageHeader) page)->pd_upper;
+ flags |= REGBUF_STANDARD;
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- bkpb.hole_offset = lower;
- bkpb.hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- bkpb.hole_offset = 0;
- bkpb.hole_length = 0;
- }
- }
- else
- {
- /* Not a standard page header, don't try to eliminate "hole" */
- bkpb.hole_offset = 0;
- bkpb.hole_length = 0;
- }
-
- rdata[0].data = (char *) &bkpb;
- rdata[0].len = sizeof(BkpBlock);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
-
- if (bkpb.hole_length == 0)
- {
- rdata[1].data = (char *) page;
- rdata[1].len = BLCKSZ;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
- }
- else
- {
- /* must skip the hole */
- rdata[1].data = (char *) page;
- rdata[1].len = bkpb.hole_offset;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = &rdata[2];
-
- rdata[2].data = (char *) page + (bkpb.hole_offset + bkpb.hole_length);
- rdata[2].len = BLCKSZ - (bkpb.hole_offset + bkpb.hole_length);
- rdata[2].buffer = InvalidBuffer;
- rdata[2].next = NULL;
- }
-
- recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI, rdata);
+ XLogBeginInsert();
+ XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
/*
* The page may be uninitialized. If so, we can't set the LSN because that
PageSetLSN(page, recptr);
}
- END_CRIT_SECTION();
-
return recptr;
}
return log_newpage(&rnode, forkNum, blkno, page, page_std);
}
-/*
- * Fill a BkpBlock for a buffer.
- */
-static void
-XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb)
+void
+InitXLogInsert(void)
{
- BufferGetTag(buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
+ /* Initialize the working areas */
+ if (xloginsert_cxt == NULL)
+ {
+ xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
+ "WAL record construction",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ }
- if (buffer_std)
+ if (registered_buffers == NULL)
{
- /* Assume we can omit data between pd_lower and pd_upper */
- Page page = BufferGetPage(buffer);
- uint16 lower = ((PageHeader) page)->pd_lower;
- uint16 upper = ((PageHeader) page)->pd_upper;
-
- if (lower >= SizeOfPageHeaderData &&
- upper > lower &&
- upper <= BLCKSZ)
- {
- bkpb->hole_offset = lower;
- bkpb->hole_length = upper - lower;
- }
- else
- {
- /* No "hole" to compress out */
- bkpb->hole_offset = 0;
- bkpb->hole_length = 0;
- }
+ registered_buffers = (registered_buffer *)
+ MemoryContextAllocZero(xloginsert_cxt,
+ sizeof(registered_buffer) * XLR_NORMAL_BKP_BLOCKS);
+ max_registered_buffers = XLR_NORMAL_BKP_BLOCKS;
}
- else
+ if (rdatas == NULL)
{
- /* Not a standard page header, don't try to eliminate "hole" */
- bkpb->hole_offset = 0;
- bkpb->hole_length = 0;
+ allocated_rdatas = XLR_NORMAL_RDATAS +
+ EXTRA_RDATAS_PER_BUFFER * XLR_NORMAL_BKP_BLOCKS;
+ rdatas = MemoryContextAlloc(xloginsert_cxt,
+ sizeof(XLogRecData) * allocated_rdatas);
+ max_rdatas = XLR_NORMAL_RDATAS;
}
+
+ if (rechdr == NULL)
+ rechdr = palloc0(SizeOfXLogRecord);
}
/*
* Special processing if it's an XLOG SWITCH record
*/
- if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
+ if (record->xl_rmid == RM_XLOG_ID && record->xl_info == (XLOG_SWITCH | XLR_NO_RMGR_DATA))
{
/* Pretend it extends to end of segment */
state->EndRecPtr += XLogSegSize - 1;
bool randAccess)
{
/*
- * xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
- * required.
+ * xl_len == 0 is only allowed if the XLR_NO_RMGR_DATA flag is also set.
*/
- if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
+ if (record->xl_info & XLR_NO_RMGR_DATA)
{
if (record->xl_len != 0)
{
report_invalid_record(state,
- "invalid xlog switch record at %X/%X",
+ "record with invalid length at %X/%X",
(uint32) (RecPtr >> 32), (uint32) RecPtr);
return false;
}
(uint32) (RecPtr >> 32), (uint32) RecPtr);
return false;
}
- if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
- record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
- XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
+ if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len)
{
report_invalid_record(state,
"invalid record length at %X/%X",
ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
{
pg_crc32 crc;
- int i;
uint32 len = record->xl_len;
- BkpBlock bkpb;
+ XLogRecordBlockData bkpb;
char *blk;
size_t remaining = record->xl_tot_len;
- /* First the rmgr data */
+ /* Check that the rmgr data length is sane */
if (remaining < SizeOfXLogRecord + len)
{
/* ValidXLogRecordHeader() should've caught this already... */
return false;
}
remaining -= SizeOfXLogRecord + len;
- INIT_CRC32C(crc);
- COMP_CRC32C(crc, XLogRecGetData(record), len);
- /* Add in the backup blocks, if any */
- blk = (char *) XLogRecGetData(record) + len;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ /* Validate block data, if any */
+ if (remaining > 0)
{
- uint32 blen;
-
- if (!(record->xl_info & XLR_BKP_BLOCK(i)))
- continue;
-
- if (remaining < sizeof(BkpBlock))
+ blk = (char *) XLogRecGetData(record) + len;
+ do
{
- report_invalid_record(state,
- "invalid backup block size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- memcpy(&bkpb, blk, sizeof(BkpBlock));
+ if (remaining < SizeOfXLogRecordBlockDataSameRel)
+ {
+ report_invalid_record(state,
+ "invalid total length in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+ memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+ if (bkpb.fork_flags & BKPBLOCK_SAME_REL)
+ {
+ remaining -= SizeOfXLogRecordBlockDataSameRel;
+ blk += SizeOfXLogRecordBlockDataSameRel;
+ }
+ else
+ {
+ if (remaining < SizeOfXLogRecordBlockData)
+ {
+ report_invalid_record(state,
+ "invalid total length in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+
+ remaining -= SizeOfXLogRecordBlockData;
+ blk += SizeOfXLogRecordBlockData;
+ }
- if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
- {
- report_invalid_record(state,
- "incorrect hole size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
+ /* The block data begins at next MAXALIGN boundary */
+ if (bkpb.data_length > 0)
+ {
+ char *blkdata = (char *) MAXALIGN(blk);
+
+ if (remaining < blkdata - blk)
+ {
+ report_invalid_record(state,
+ "invalid block data length in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+ remaining -= (blkdata - blk);
+ blk = blkdata;
+ }
- if (remaining < blen)
- {
- report_invalid_record(state,
- "invalid backup block size in record at %X/%X",
- (uint32) (recptr >> 32), (uint32) recptr);
- return false;
- }
- remaining -= blen;
- COMP_CRC32C(crc, blk, blen);
- blk += blen;
+ if (remaining < bkpb.data_length)
+ {
+ report_invalid_record(state,
+ "invalid block data length in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+
+ /* If it's a full-page image, check the XLogRecordBlockImage struct */
+ if (bkpb.fork_flags & BKPBLOCK_HAS_IMAGE)
+ {
+ XLogRecordBlockImage *blkimg;
+
+ if (bkpb.data_length < sizeof(XLogRecordBlockImage))
+ {
+ report_invalid_record(state,
+ "invalid block image size in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+ blkimg = (XLogRecordBlockImage *) blk;
+
+ /* Check the length and offset of the "hole" */
+ if (blkimg->hole_offset + blkimg->hole_length > BLCKSZ)
+ {
+ report_invalid_record(state,
+ "incorrect hole size in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+
+ /*
+ * Check the data length equals block size - hole (unless more
+ * data follows.
+ */
+ if (!(bkpb.fork_flags & BKPBLOCK_HAS_DATA) &&
+ bkpb.data_length != sizeof(XLogRecordBlockImage) + BLCKSZ - blkimg->hole_length)
+ {
+ report_invalid_record(state,
+ "incorrect hole size in record at %X/%X",
+ (uint32) (recptr >> 32), (uint32) recptr);
+ return false;
+ }
+ }
+ remaining -= bkpb.data_length;
+ blk += bkpb.data_length;
+ } while (remaining > 0);
}
/* Check that xl_tot_len agrees with our calculation */
return false;
}
- /* Finally include the record header */
+ /* Calculate the CRC */
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc, XLogRecGetData(record), record->xl_tot_len - SizeOfXLogRecord);
+ /* include the record header last */
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
FIN_CRC32C(crc);
}
#endif /* FRONTEND */
+
+
+/*
+ * Functions for decoding the data and block references in a record.
+ */
+
+/*
+ * Returns true if the record contains a block reference with given ID.
+ */
+bool
+XLogRecHasBlockRef(XLogRecord *record, uint8 block_id)
+{
+ XLogRecordBlockData *bkpb;
+
+ bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+ return bkpb != NULL;
+}
+
+/*
+ * Returns true if the record contains a full-page image for the given ID.
+ */
+bool
+XLogRecHasBlockImage(XLogRecord *record, uint8 block_id)
+{
+ XLogRecordBlockData *bkpb;
+
+ bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+ if (bkpb == NULL)
+ return false;
+
+ return (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE) != 0;
+}
+
+/*
+ * Returns information about the block that a block reference refers to.
+ */
+void
+XLogRecGetBlockTag(XLogRecord *record, uint8 block_id,
+ RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
+{
+ XLogRecordBlockData *bkpb;
+
+ bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+ if (bkpb == NULL)
+ {
+#ifdef FRONTEND
+ if (rnode)
+ rnode->spcNode = rnode->dbNode = rnode->relNode = InvalidOid;
+ if (forknum)
+ *forknum = InvalidForkNumber;
+ if (blknum)
+ *blknum = InvalidBlockNumber;
+#else
+ elog(ERROR, "failed to locate backup block with ID %d", block_id);
+#endif
+ }
+ else
+ {
+ if (rnode)
+ *rnode = bkpb->node;
+ if (forknum)
+ *forknum = bkpb->fork_flags & BKPBLOCK_FORK_MASK;
+ if (blknum)
+ *blknum = bkpb->block;
+ }
+}
+
+/*
+ * Returns the data associated with a block reference, or NULL if there is
+ * no data (e.g. because a full-page image was taken instead).
+ */
+char *
+XLogRecGetBlockData(XLogRecord *record, uint8 block_id, Size *len)
+{
+ XLogRecordBlockData *bkpb;
+ char *payload;
+
+ bkpb = XLogRecGetBlockRef(record, block_id, &payload);
+ if (!(bkpb->fork_flags & BKPBLOCK_HAS_DATA))
+ {
+ /* no data */
+ if (len)
+ *len = 0;
+ return NULL;
+ }
+ else if (bkpb->fork_flags & BKPBLOCK_HAS_IMAGE)
+ {
+ /* both a full-page image and data */
+ XLogRecordBlockImage *img = (XLogRecordBlockImage *) payload;
+ payload += sizeof(XLogRecordBlockImage);
+
+ payload += BLCKSZ - img->hole_length;
+ payload = (char *) MAXALIGN(payload);
+
+ if (len)
+ *len = ((char *) img) + bkpb->data_length - payload;
+ return payload;
+ }
+ else
+ {
+ /* just data */
+ if (len)
+ *len = bkpb->data_length;
+ return payload;
+ }
+}
+
+/*
+ * Return all the block reference IDs in a WAL record.
+ *
+ * The returned array is palloc'd, and its length is stored in *num_refs.
+ */
+uint8 *
+XLogRecGetBlockRefIds(XLogRecord *record, int *num_refs)
+{
+ XLogRecordBlockData bkpb;
+ char *blk;
+ char *end;
+ uint8 *out;
+ int n;
+ uint8 ids[XLR_MAX_BKP_BLOCKS];
+
+ blk = (char *) XLogRecGetData(record) + record->xl_len;
+ end = ((char *) record) + record->xl_tot_len;
+
+ /* Extract the entries into a local array first */
+ for (n = 0; blk < end; n++)
+ {
+ memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+ if (bkpb.fork_flags & BKPBLOCK_SAME_REL)
+ blk += SizeOfXLogRecordBlockDataSameRel;
+ else
+ blk += SizeOfXLogRecordBlockData;
+ if (bkpb.data_length > 0)
+ blk = (char *) MAXALIGN(blk);
+ blk += bkpb.data_length;
+
+ ids[n] = bkpb.id;
+ }
+
+ *num_refs = n;
+
+ /* Allocate a return array and copy the data to it */
+ out = palloc(n * sizeof(uint8));
+ memcpy(out, ids, n * sizeof(uint8));
+
+ return out;
+}
+
+
+/*
+ * Returns a BkpBlock struct and payload of a block reference.
+ *
+ * NOTE: This is a internal function, used by the backend code that deals
+ * with WAL records. redo/desc routines and other higher-level functions
+ * should use the XLogBlockRefGetTag() and XLogGetPayload() functions instead.
+ */
+XLogRecordBlockData *
+XLogRecGetBlockRef(XLogRecord *record, uint8 block_id, char **content)
+{
+ static XLogRecordBlockData bkpb;
+ char *blk;
+ char *end;
+
+ end = ((char *) record) + record->xl_tot_len;
+
+ /* Locate requested XLogRecordBlockData in the record */
+ blk = (char *) XLogRecGetData(record) + record->xl_len;
+ while (blk < end)
+ {
+ uint8 flags;
+ Assert(blk < ((char *) record) + record->xl_tot_len);
+
+ flags = *((uint8 *) (blk + offsetof(XLogRecordBlockData, fork_flags)));
+ if (flags & BKPBLOCK_SAME_REL)
+ {
+ memcpy(&bkpb, blk, SizeOfXLogRecordBlockDataSameRel);
+ /* The relfilenode field stays unchanged */
+ blk += SizeOfXLogRecordBlockDataSameRel;
+ }
+ else
+ {
+ memcpy(&bkpb, blk, SizeOfXLogRecordBlockData);
+ blk += SizeOfXLogRecordBlockData;
+ }
+
+ if (bkpb.data_length > 0)
+ blk = (char *) MAXALIGN(blk);
+
+ if (bkpb.id == block_id)
+ {
+ /* Found it */
+ if (content)
+ *content = blk;
+ return &bkpb;
+ }
+
+ blk += bkpb.data_length;
+ }
+
+ /* Caller specified a bogus block_id */
+ return NULL;
+}
#include "utils/rel.h"
+static Buffer RestoreBackupBlockContents(XLogRecPtr lsn, XLogRecordBlockData *bkpb,
+ XLogRecordBlockImage *blk,
+ bool get_cleanup_lock, bool keep_buffer);
+static Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
+ uint8 block_id, bool get_cleanup_lock, bool keep_buffer);
+
/*
* During XLOG replay, we may see XLOG records for incremental updates of
* pages that no longer exist, because their relation was later dropped or
*
* 'lsn' is the LSN of the record being replayed. It is compared with the
* page's LSN to determine if the record has already been replayed.
- * 'rnode' and 'blkno' point to the block being replayed (main fork number
- * is implied, use XLogReadBufferForRedoExtended for other forks).
- * 'block_index' identifies the backup block in the record for the page.
+ * 'block_id' is the ID number the block was registered with, when the WAL
+ * record was created.
*
* Returns one of the following:
*
* definitely necessary.)
*/
XLogRedoAction
-XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
- RelFileNode rnode, BlockNumber blkno,
+XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
Buffer *buf)
{
- return XLogReadBufferForRedoExtended(lsn, record, block_index,
- rnode, MAIN_FORKNUM, blkno,
- RBM_NORMAL, false, buf);
+ return XLogReadBufferForRedoExtended(lsn, record, block_id, RBM_NORMAL,
+ false, buf);
}
/*
*/
XLogRedoAction
XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
- int block_index, RelFileNode rnode,
- ForkNumber forkno, BlockNumber blkno,
+ uint8 block_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf)
{
- if (record->xl_info & XLR_BKP_BLOCK(block_index))
- {
- *buf = RestoreBackupBlock(lsn, record, block_index,
- get_cleanup_lock, true);
+ struct XLogRecordBlockData *bkpb;
+
+ *buf = RestoreBackupBlock(lsn, record, block_id, get_cleanup_lock, true);
+ if (*buf != InvalidBuffer)
return BLK_RESTORED;
- }
else
{
- *buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
+ bkpb = XLogRecGetBlockRef(record, block_id, NULL);
+ if (bkpb == NULL)
+ elog(ERROR, "failed to locate backup block with ID %d", block_id);
+
+ if ((bkpb->fork_flags & BKPBLOCK_WILL_INIT) != 0 && mode != RBM_ZERO)
+ elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
+
+ *buf = XLogReadBufferExtended(bkpb->node,
+ bkpb->fork_flags & BKPBLOCK_FORK_MASK,
+ bkpb->block,
+ mode);
if (BufferIsValid(*buf))
{
LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
}
}
-/*
- * XLogReadBuffer
- * Read a page during XLOG replay.
- *
- * This is a shorthand of XLogReadBufferExtended() followed by
- * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
- * fork.
- *
- * (Getting the buffer lock is not really necessary during single-process
- * crash recovery, but some subroutines such as MarkBufferDirty will complain
- * if we don't have the lock. In hot standby mode it's definitely necessary.)
- *
- * The returned buffer is exclusively-locked.
- *
- * For historical reasons, instead of a ReadBufferMode argument, this only
- * supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
- */
-Buffer
-XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
-{
- Buffer buf;
-
- buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
- init ? RBM_ZERO : RBM_NORMAL);
- if (BufferIsValid(buf))
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-
- return buf;
-}
-
/*
* XLogReadBufferExtended
* Read a page during XLOG replay
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
* exist, and we don't check for all-zeroes. Thus, no log entry is made
* to imply that the page should be dropped or truncated later.
+ *
+ * NB: A redo function should normally not call this directly. To get a page
+ * to modify, use XLogReplayBuffer instead. It is important that all pages
+ * modified by a WAL record are registered in the WAL records, or they will be
+ * invisible to tools that that need to know which pages are modified.
*/
Buffer
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
*
* lsn: LSN of the XLOG record being replayed
* record: the complete XLOG record
- * block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
+ * block_id: which backup block to restore
* get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
* keep_buffer: TRUE to return the buffer still locked and pinned
*
* pages, to prevent inconsistent states from being visible to other backends.
* (Again, that's only important in hot standby mode.)
*/
-Buffer
-RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
+static Buffer
+RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, uint8 block_id,
bool get_cleanup_lock, bool keep_buffer)
{
- BkpBlock bkpb;
+ XLogRecordBlockData *bkpb;
char *blk;
- int i;
- /* Locate requested BkpBlock in the record */
- blk = (char *) XLogRecGetData(record) + record->xl_len;
- for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
+ bkpb = XLogRecGetBlockRef(record, block_id, &blk);
+ if (!bkpb)
{
- if (!(record->xl_info & XLR_BKP_BLOCK(i)))
- continue;
-
- memcpy(&bkpb, blk, sizeof(BkpBlock));
- blk += sizeof(BkpBlock);
-
- if (i == block_index)
- {
- /* Found it, apply the update */
- return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
- keep_buffer);
- }
-
- blk += BLCKSZ - bkpb.hole_length;
+ /* Caller specified a bogus block_id */
+ elog(PANIC, "failed to restore backup block with ID %d", block_id);
}
- /* Caller specified a bogus block_index */
- elog(ERROR, "failed to restore block_index %d", block_index);
- return InvalidBuffer; /* keep compiler quiet */
+ /* Found it, apply the update */
+ if (!(bkpb->fork_flags & BKPBLOCK_HAS_IMAGE))
+ return InvalidBuffer;
+
+ return RestoreBackupBlockContents(lsn, bkpb,
+ (XLogRecordBlockImage *) blk,
+ get_cleanup_lock,
+ keep_buffer);
}
/*
*
* Restores a full-page image from BkpBlock and a data pointer.
*/
-Buffer
-RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
+static Buffer
+RestoreBackupBlockContents(XLogRecPtr lsn, XLogRecordBlockData *bkpb,
+ XLogRecordBlockImage *blkimg,
bool get_cleanup_lock, bool keep_buffer)
{
Buffer buffer;
Page page;
+ char *blk;
- buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
+ buffer = XLogReadBufferExtended(bkpb->node,
+ bkpb->fork_flags & BKPBLOCK_FORK_MASK,
+ bkpb->block,
RBM_ZERO);
Assert(BufferIsValid(buffer));
if (get_cleanup_lock)
page = (Page) BufferGetPage(buffer);
- if (bkpb.hole_length == 0)
+ blk = ((char *) blkimg) + sizeof(XLogRecordBlockImage);
+
+ if (blkimg->hole_length == 0)
{
memcpy((char *) page, blk, BLCKSZ);
}
else
{
- memcpy((char *) page, blk, bkpb.hole_offset);
+ memcpy((char *) page, blk, blkimg->hole_offset);
/* must zero-fill the hole */
- MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
- memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
- blk + bkpb.hole_offset,
- BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
+ MemSet((char *) page + blkimg->hole_offset, 0, blkimg->hole_length);
+ memcpy((char *) page + (blkimg->hole_offset + blkimg->hole_length),
+ blk + blkimg->hole_offset,
+ BLCKSZ - (blkimg->hole_offset + blkimg->hole_length));
}
/*
log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
{
xl_smgr_create xlrec;
- XLogRecData rdata;
/*
* Make an XLOG entry reporting the file creation.
xlrec.rnode = *rnode;
xlrec.forkNum = forkNum;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+ XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE);
}
/*
* Make an XLOG entry reporting the file truncation.
*/
XLogRecPtr lsn;
- XLogRecData rdata;
xl_smgr_truncate xlrec;
xlrec.blkno = nblocks;
xlrec.rnode = rel->rd_node;
- rdata.data = (char *) &xlrec;
- rdata.len = sizeof(xlrec);
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
- lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
+ lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE);
/*
* Flush, because otherwise the truncation of the main relation might
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in smgr records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_SMGR_CREATE)
{
/*
* Forcibly create relation if it doesn't exist (which suggests that
* it was dropped somewhere later in the WAL sequence). As in
- * XLogReadBuffer, we prefer to recreate the rel and replay the log as
+ * XLogReplayBuffer, we prefer to recreate the rel and replay the log as
* best we can until the drop is seen.
*/
smgrcreate(reln, MAIN_FORKNUM, true);
/* Record the filesystem change in XLOG */
{
xl_dbase_create_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = dboid;
xlrec.tablespace_id = dsttablespace;
xlrec.src_db_id = src_dboid;
xlrec.src_tablespace_id = srctablespace;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_create_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ sizeof(xl_dbase_create_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+ (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE);
}
}
heap_endscan(scan);
*/
{
xl_dbase_create_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = dst_tblspcoid;
xlrec.src_db_id = db_id;
xlrec.src_tablespace_id = src_tblspcoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_create_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE, rdata);
+ (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE);
}
/*
*/
{
xl_dbase_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = src_tblspcoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+ (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP);
}
/* Now it's safe to release the database lock */
/* Record the filesystem change in XLOG */
{
xl_dbase_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.db_id = db_id;
xlrec.tablespace_id = dsttablespace;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_dbase_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec));
- (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP, rdata);
+ (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP);
}
pfree(dstpath);
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in dbase records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_DBASE_CREATE)
{
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
+
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
xlrec.node = rel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) tuple->t_data;
- rdata[1].len = tuple->t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) tuple->t_data, tuple->t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
+
xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
/*
* We don't log the current state of the tuple, but rather the state
* that many future WAL records, at the cost that we lose those
* sequence values if we crash.
*/
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
/* set values that will be saved in xlog */
seq->last_value = next;
seq->log_cnt = 0;
xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
{
xl_seq_rec xlrec;
XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buf);
- xlrec.node = seqrel->rd_node;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_seq_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
- rdata[1].data = (char *) seqtuple.t_data;
- rdata[1].len = seqtuple.t_len;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ xlrec.node = seqrel->rd_node;
+ XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
+ XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
- recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
+ recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
PageSetLSN(page, recptr);
}
xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record);
sequence_magic *sm;
- /* Backup blocks are not used in seq records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
-
if (info != XLOG_SEQ_LOG)
elog(PANIC, "seq_redo: unknown op code %u", info);
- buffer = XLogReadBuffer(xlrec->node, 0, true);
- Assert(BufferIsValid(buffer));
+ XLogReadBufferForRedoExtended(lsn, record, 0, RBM_ZERO, false, &buffer);
page = (Page) BufferGetPage(buffer);
/*
/* Record the filesystem change in XLOG */
{
xl_tblspc_create_rec xlrec;
- XLogRecData rdata[2];
xlrec.ts_id = tablespaceoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) location;
- rdata[1].len = strlen(location) + 1;
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec,
+ offsetof(xl_tblspc_create_rec, ts_path));
+ XLogRegisterData((char *) location, strlen(location) + 1);
- (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata);
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE);
}
/*
/* Record the filesystem change in XLOG */
{
xl_tblspc_drop_rec xlrec;
- XLogRecData rdata[1];
xlrec.ts_id = tablespaceoid;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = sizeof(xl_tblspc_drop_rec);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = NULL;
- (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata);
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xl_tblspc_drop_rec));
+
+ (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP);
}
/*
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in tblspc records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_TBLSPC_CREATE)
{
#include "access/transam.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
+#include "access/xlogutils.h"
+#include "access/xlogrecord.h"
#include "access/xlogreader.h"
#include "catalog/pg_control.h"
{
XLogRecPtr origptr;
XLogRecPtr endptr;
- XLogRecord record;
- char *record_data;
+ XLogRecord *record;
} XLogRecordBuffer;
/* RMGR Handlers */
buf.origptr = ctx->reader->ReadRecPtr;
buf.endptr = ctx->reader->EndRecPtr;
- buf.record = *record;
- buf.record_data = XLogRecGetData(record);
+ buf.record = record;
/* cast so we get a warning when new rmgrs are added */
- switch ((RmgrIds) buf.record.xl_rmid)
+ switch ((RmgrIds) buf.record->xl_rmid)
{
/*
* Rmgrs we care about for logical decoding. Add new rmgrs in
case RM_BRIN_ID:
break;
case RM_NEXT_ID:
- elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
+ elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record->xl_rmid);
}
}
DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
- uint8 info = buf->record.xl_info & ~XLR_INFO_MASK;
+ uint8 info = buf->record->xl_info & ~XLR_INFO_MASK;
switch (info)
{
{
SnapBuild *builder = ctx->snapshot_builder;
ReorderBuffer *reorder = ctx->reorder;
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK;
/* no point in doing anything yet, data could not be decoded anyway */
TransactionId *subxacts = NULL;
SharedInvalidationMessage *invals = NULL;
- xlrec = (xl_xact_commit *) buf->record_data;
+ xlrec = (xl_xact_commit *) XLogRecGetData(r);
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
invals = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
SharedInvalidationMessage *invals = NULL;
/* Prepared commits contain a normal commit record... */
- prec = (xl_xact_commit_prepared *) buf->record_data;
+ prec = (xl_xact_commit_prepared *) XLogRecGetData(r);
xlrec = &prec->crec;
subxacts = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
{
xl_xact_commit_compact *xlrec;
- xlrec = (xl_xact_commit_compact *) buf->record_data;
+ xlrec = (xl_xact_commit_compact *) XLogRecGetData(r);
DecodeCommit(ctx, buf, r->xl_xid, InvalidOid,
xlrec->xact_time,
xl_xact_abort *xlrec;
TransactionId *sub_xids;
- xlrec = (xl_xact_abort *) buf->record_data;
+ xlrec = (xl_xact_abort *) XLogRecGetData(r);
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
TransactionId *sub_xids;
/* prepared abort contain a normal commit abort... */
- prec = (xl_xact_abort_prepared *) buf->record_data;
+ prec = (xl_xact_abort_prepared *) XLogRecGetData(r);
xlrec = &prec->arec;
sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
int i;
TransactionId *sub_xid;
- xlrec = (xl_xact_assignment *) buf->record_data;
+ xlrec = (xl_xact_assignment *) XLogRecGetData(r);
sub_xid = &xlrec->xsub[0];
DecodeStandbyOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
SnapBuild *builder = ctx->snapshot_builder;
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
uint8 info = r->xl_info & ~XLR_INFO_MASK;
switch (info)
{
case XLOG_RUNNING_XACTS:
{
- xl_running_xacts *running = (xl_running_xacts *) buf->record_data;
+ xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
SnapBuildProcessRunningXacts(builder, buf->origptr, running);
static void
DecodeHeap2Op(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
- TransactionId xid = buf->record.xl_xid;
+ uint8 info = buf->record->xl_info & XLOG_HEAP_OPMASK;
+ TransactionId xid = buf->record->xl_xid;
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
{
xl_heap_new_cid *xlrec;
- xlrec = (xl_heap_new_cid *) buf->record_data;
+ xlrec = (xl_heap_new_cid *) XLogRecGetData(buf->record);
SnapBuildProcessNewCid(builder, xid, buf->origptr, xlrec);
break;
static void
DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- uint8 info = buf->record.xl_info & XLOG_HEAP_OPMASK;
- TransactionId xid = buf->record.xl_xid;
+ uint8 info = buf->record->xl_info & XLOG_HEAP_OPMASK;
+ TransactionId xid = buf->record->xl_xid;
SnapBuild *builder = ctx->snapshot_builder;
/* no point in doing anything yet */
static void
DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
xl_heap_insert *xlrec;
ReorderBufferChange *change;
+ RelFileNode target_node;
- xlrec = (xl_heap_insert *) buf->record_data;
+ xlrec = (xl_heap_insert *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
static void
DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
xl_heap_update *xlrec;
- xl_heap_header_len xlhdr;
ReorderBufferChange *change;
char *data;
+ Size datalen;
+ RelFileNode target_node;
- xlrec = (xl_heap_update *) buf->record_data;
+ xlrec = (xl_heap_update *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_UPDATE;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
-
- /* caution, remaining data in record is not aligned */
- data = buf->record_data + SizeOfHeapUpdate;
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
{
- Assert(r->xl_len > (SizeOfHeapUpdate + SizeOfHeapHeaderLen));
-
- memcpy(&xlhdr, data, sizeof(xlhdr));
- data += offsetof(xl_heap_header_len, header);
+ data = XLogRecGetBlockData(r, 0, &datalen);
+ Assert(datalen > (SizeOfHeapUpdate + SizeOfHeapHeader));
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
- DecodeXLogTuple(data,
- xlhdr.t_len + SizeOfHeapHeader,
- change->data.tp.newtuple);
- /* skip over the rest of the tuple header */
- data += SizeOfHeapHeader;
- /* skip over the tuple data */
- data += xlhdr.t_len;
+ DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
}
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
{
- memcpy(&xlhdr, data, sizeof(xlhdr));
- data += offsetof(xl_heap_header_len, header);
+ /* caution, remaining data in record is not aligned */
+ data = XLogRecGetData(r) + SizeOfHeapUpdate;
+ datalen = r->xl_len - SizeOfHeapUpdate;
change->data.tp.oldtuple = ReorderBufferGetTupleBuf(ctx->reorder);
- DecodeXLogTuple(data,
- xlhdr.t_len + SizeOfHeapHeader,
- change->data.tp.oldtuple);
-#ifdef NOT_USED
- data += SizeOfHeapHeader;
- data += xlhdr.t_len;
-#endif
+ DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
}
change->data.tp.clear_toast_afterwards = true;
static void
DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
xl_heap_delete *xlrec;
ReorderBufferChange *change;
+ RelFileNode target_node;
- xlrec = (xl_heap_delete *) buf->record_data;
+ xlrec = (xl_heap_delete *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->target.node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
return;
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_DELETE;
- memcpy(&change->data.tp.relnode, &xlrec->target.node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
/* old primary key stored */
if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
static void
DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
{
- XLogRecord *r = &buf->record;
+ XLogRecord *r = buf->record;
xl_heap_multi_insert *xlrec;
int i;
char *data;
bool isinit = (r->xl_info & XLOG_HEAP_INIT_PAGE) != 0;
+ RelFileNode rnode;
- xlrec = (xl_heap_multi_insert *) buf->record_data;
+ xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
/* only interested in our database */
- if (xlrec->node.dbNode != ctx->slot->data.database)
+ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL);
+ if (rnode.dbNode != ctx->slot->data.database)
return;
- data = buf->record_data + SizeOfHeapMultiInsert;
+ data = XLogRecGetData(r) + SizeOfHeapMultiInsert;
/*
* OffsetNumbers (which are not of interest to us) are stored when
change = ReorderBufferGetChange(ctx->reorder);
change->action = REORDER_BUFFER_CHANGE_INSERT;
- memcpy(&change->data.tp.relnode, &xlrec->node, sizeof(RelFileNode));
+ memcpy(&change->data.tp.relnode, &rnode, sizeof(RelFileNode));
/*
* CONTAINS_NEW_TUPLE will always be set currently as multi_insert
ReorderBufferXidSetCatalogChanges(builder->reorder, xid, lsn);
ReorderBufferAddNewTupleCids(builder->reorder, xlrec->top_xid, lsn,
- xlrec->target.node, xlrec->target.tid,
+ xlrec->target_node, xlrec->target_tid,
xlrec->cmin, xlrec->cmax,
xlrec->combocid);
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in standby records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
/* Do nothing if we're not in hot standby mode */
if (standbyState == STANDBY_DISABLED)
LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
{
xl_running_xacts xlrec;
- XLogRecData rdata[2];
- int lastrdata = 0;
XLogRecPtr recptr;
xlrec.xcnt = CurrRunningXacts->xcnt;
xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
/* Header */
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfXactRunningXacts;
- rdata[0].buffer = InvalidBuffer;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
/* array of TransactionIds */
if (xlrec.xcnt > 0)
- {
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) CurrRunningXacts->xids;
- rdata[1].len = (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId);
- rdata[1].buffer = InvalidBuffer;
- lastrdata = 1;
- }
+ XLogRegisterData((char *) CurrRunningXacts->xids,
+ (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
- rdata[lastrdata].next = NULL;
-
- recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS, rdata);
+ recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
if (CurrRunningXacts->subxid_overflow)
elog(trace_recovery(DEBUG2),
static void
LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
{
- XLogRecData rdata[2];
xl_standby_locks xlrec;
xlrec.nlocks = nlocks;
- rdata[0].data = (char *) &xlrec;
- rdata[0].len = offsetof(xl_standby_locks, locks);
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &rdata[1];
-
- rdata[1].data = (char *) locks;
- rdata[1].len = nlocks * sizeof(xl_standby_lock);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
+ XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
- (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK, rdata);
+ (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
}
/*
if (write_wal)
{
xl_relmap_update xlrec;
- XLogRecData rdata[2];
XLogRecPtr lsn;
/* now errors are fatal ... */
xlrec.tsid = tsid;
xlrec.nbytes = sizeof(RelMapFile);
- rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfRelmapUpdate;
- rdata[0].buffer = InvalidBuffer;
- rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) newmap;
- rdata[1].len = sizeof(RelMapFile);
- rdata[1].buffer = InvalidBuffer;
- rdata[1].next = NULL;
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
+ XLogRegisterData((char *) newmap, sizeof(RelMapFile));
- lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE, rdata);
+ lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
/* As always, WAL must hit the disk before the data update does */
XLogFlush(lsn);
uint8 info = record->xl_info & ~XLR_INFO_MASK;
/* Backup blocks are not used in relmap records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(!XLogRecHasAnyBlockRefs(record));
if (info == XLOG_RELMAP_UPDATE)
{
*/
#define XLOG_BRIN_INIT_PAGE 0x80
-/* This is what we need to know about a BRIN index create */
+/*
+ * This is what we need to know about a BRIN index create.
+ *
+ * Backup block 0: metapage
+ */
typedef struct xl_brin_createidx
{
BlockNumber pagesPerRange;
- RelFileNode node;
uint16 version;
} xl_brin_createidx;
#define SizeOfBrinCreateIdx (offsetof(xl_brin_createidx, version) + sizeof(uint16))
/*
* This is what we need to know about a BRIN tuple insert
+ *
+ * Backup block 0: main page, block data is the new BrinTuple.
+ * Backup block 1: revmap page
*/
typedef struct xl_brin_insert
{
- RelFileNode node;
BlockNumber heapBlk;
/* extra information needed to update the revmap */
- BlockNumber revmapBlk;
BlockNumber pagesPerRange;
- uint16 tuplen;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ /* offset number in the main page to insert the tuple to. */
+ OffsetNumber offnum;
} xl_brin_insert;
-#define SizeOfBrinInsert (offsetof(xl_brin_insert, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinInsert (offsetof(xl_brin_insert, offnum) + sizeof(OffsetNumber))
/*
- * A cross-page update is the same as an insert, but also store the old tid.
+ * A cross-page update is the same as an insert, but also stores information
+ * about the old tuple.
+ *
+ * Like in xlog_brin_update:
+ * Backup block 0: new page, block data includes the new BrinTuple.
+ * Backup block 1: revmap page
+ *
+ * And in addition:
+ * Backup block 2: old page
*/
typedef struct xl_brin_update
{
- ItemPointerData oldtid;
+ /* offset number of old tuple on old page */
+ OffsetNumber oldOffnum;
+
xl_brin_insert insert;
} xl_brin_update;
#define SizeOfBrinUpdate (offsetof(xl_brin_update, insert) + SizeOfBrinInsert)
-/* This is what we need to know about a BRIN tuple samepage update */
+/*
+ * This is what we need to know about a BRIN tuple samepage update
+ *
+ * Backup block 0: updated page, with new BrinTuple as block data
+ */
typedef struct xl_brin_samepage_update
{
- RelFileNode node;
- ItemPointerData tid;
- /* tuple data follows at end of struct */
+ OffsetNumber offnum;
} xl_brin_samepage_update;
-#define SizeOfBrinSamepageUpdate (offsetof(xl_brin_samepage_update, tid) + sizeof(ItemPointerData))
+#define SizeOfBrinSamepageUpdate (sizeof(OffsetNumber))
-/* This is what we need to know about a revmap extension */
+/*
+ * This is what we need to know about a revmap extension
+ *
+ * Backup block 0: metapage
+ * Backup block 1: new revmap page
+ */
typedef struct xl_brin_revmap_extend
{
- RelFileNode node;
+ /*
+ * This is actually redundant - the block number is stored as part of
+ * backup block 1.
+ */
BlockNumber targetBlk;
} xl_brin_revmap_extend;
#include "access/genam.h"
#include "access/gin.h"
#include "access/itup.h"
-#include "access/xloginsert.h"
#include "fmgr.h"
#include "storage/bufmgr.h"
#include "utils/rbtree.h"
typedef struct ginxlogCreatePostingTree
{
- RelFileNode node;
- BlockNumber blkno;
uint32 size;
/* A compressed posting list follows */
} ginxlogCreatePostingTree;
-#define XLOG_GIN_INSERT 0x20
-
/*
* The format of the insertion record varies depending on the page type.
* ginxlogInsert is the common part between all variants.
+ *
+ * Backup Blk 0: target page
+ * Backup Blk 1: left child, if this insertion finishes an incomplete split
*/
+
+#define XLOG_GIN_INSERT 0x20
+
typedef struct
{
- RelFileNode node;
- BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
/*
PostingItem newitem;
} ginxlogInsertDataInternal;
-
+/*
+ * Backup Blk 0: new left page (= original page, if not root split)
+ * Backup Blk 1: new right page
+ * Backup Blk 2: original page / new root page, if root split
+ * Backup Blk 3: left child, if this insertion completes an earlier split
+ */
#define XLOG_GIN_SPLIT 0x30
typedef struct ginxlogSplit
{
RelFileNode node;
- BlockNumber lblkno;
- BlockNumber rblkno;
BlockNumber rrlink; /* right link, or root's blocknumber if root
* split */
BlockNumber leftChildBlkno; /* valid on a non-leaf split */
*/
#define XLOG_GIN_VACUUM_PAGE 0x40
-typedef struct ginxlogVacuumPage
-{
- RelFileNode node;
- BlockNumber blkno;
- uint16 hole_offset; /* number of bytes before "hole" */
- uint16 hole_length; /* number of bytes in "hole" */
- /* entire page contents (minus the hole) follow at end of record */
-} ginxlogVacuumPage;
-
/*
* Vacuuming posting tree leaf page is WAL-logged like recompression caused
* by insertion.
typedef struct ginxlogVacuumDataLeafPage
{
- RelFileNode node;
- BlockNumber blkno;
-
ginxlogRecompressDataLeaf data;
} ginxlogVacuumDataLeafPage;
+/*
+ * Backup Blk 0: deleted page
+ * Backup Blk 1: parent
+ * Backup Blk 2: left sibling
+ */
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
{
- RelFileNode node;
- BlockNumber blkno;
- BlockNumber parentBlkno;
OffsetNumber parentOffset;
- BlockNumber leftBlkno;
BlockNumber rightLink;
} ginxlogDeletePage;
#define XLOG_GIN_UPDATE_META_PAGE 0x60
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1: tail page
+ */
typedef struct ginxlogUpdateMeta
{
RelFileNode node;
typedef struct ginxlogInsertListPage
{
- RelFileNode node;
- BlockNumber blkno;
BlockNumber rightlink;
int32 ntuples;
/* array of inserted tuples follows */
} ginxlogInsertListPage;
+/*
+ * Backup Blk 0: metapage
+ * Backup Blk 1 to (ndeleted + 1): deleted pages
+ */
+
#define XLOG_GIN_DELETE_LISTPAGE 0x80
#define GIN_NDELETE_AT_ONCE 16
typedef struct ginxlogDeleteListPages
{
- RelFileNode node;
GinMetaPageData metadata;
int32 ndeleted;
- BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
} ginxlogDeleteListPages;
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
- GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
+ GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
#define XLOG_GIST_CREATE_INDEX 0x50
/* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */
+/*
+ * Backup Blk 0: updated page.
+ * Backup Blk 1: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ */
typedef struct gistxlogPageUpdate
{
- RelFileNode node;
- BlockNumber blkno;
-
- /*
- * If this operation completes a page split, by inserting a downlink for
- * the split page, leftchild points to the left half of the split.
- */
- BlockNumber leftchild;
-
/* number of deleted offsets */
uint16 ntodelete;
+ uint16 ntoinsert;
/*
- * follow: 1. todelete OffsetNumbers 2. tuples to insert
+ * In payload of blk 0 : 1. todelete OffsetNumbers 2. tuples to insert
*/
} gistxlogPageUpdate;
+/*
+ * Backup Blk 0: If this operation completes a page split, by inserting a
+ * downlink for the split page, the left half of the split
+ * Backup Blk 1 - npage: split pages (1 is the original page)
+ */
typedef struct gistxlogPageSplit
{
- RelFileNode node;
- BlockNumber origblkno; /* splitted page */
BlockNumber origrlink; /* rightlink of the page before split */
GistNSN orignsn; /* NSN of the page before split */
bool origleaf; /* was splitted page a leaf page? */
- BlockNumber leftchild; /* like in gistxlogPageUpdate */
uint16 npage; /* # of pages in the split */
bool markfollowright; /* set F_FOLLOW_RIGHT flags */
#define XLOG_HEAP_CONTAINS_OLD \
(XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
-/*
- * All what we need to find changed tuple
- *
- * NB: on most machines, sizeof(xl_heaptid) will include some trailing pad
- * bytes for alignment. We don't want to store the pad space in the XLOG,
- * so use SizeOfHeapTid for space calculations. Similar comments apply for
- * the other xl_FOO structs.
- */
-typedef struct xl_heaptid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_heaptid;
-
-#define SizeOfHeapTid (offsetof(xl_heaptid, tid) + SizeOfIptrData)
-
/* This is what we need to know about delete */
typedef struct xl_heap_delete
{
- xl_heaptid target; /* deleted tuple id */
TransactionId xmax; /* xmax of the deleted tuple */
+ OffsetNumber offnum; /* deleted tuple's offset */
uint8 infobits_set; /* infomask bits */
uint8 flags;
} xl_heap_delete;
#define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
-/*
- * Variant of xl_heap_header that contains the length of the tuple, which is
- * useful if the length of the tuple cannot be computed using the overall
- * record length. E.g. because there are several tuples inside a single
- * record.
- */
-typedef struct xl_heap_header_len
-{
- uint16 t_len;
- xl_heap_header header;
-} xl_heap_header_len;
-
-#define SizeOfHeapHeaderLen (offsetof(xl_heap_header_len, header) + SizeOfHeapHeader)
-
/* This is what we need to know about insert */
typedef struct xl_heap_insert
{
- xl_heaptid target; /* inserted tuple id */
+ OffsetNumber offnum; /* inserted tuple's offset */
uint8 flags;
- /* xl_heap_header & TUPLE DATA FOLLOWS AT END OF STRUCT */
+
+ /* xl_heap_header & TUPLE DATA in backup block 0 */
} xl_heap_insert;
#define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8))
/*
- * This is what we need to know about a multi-insert. The record consists of
- * xl_heap_multi_insert header, followed by a xl_multi_insert_tuple and tuple
- * data for each tuple. 'offsets' array is omitted if the whole page is
- * reinitialized (XLOG_HEAP_INIT_PAGE)
+ * This is what we need to know about a multi-insert.
+ *
+ * The main data of the record consists of this xl_heap_multi_insert header.
+ * 'offsets' array is omitted if the whole page is reinitialized
+ * (XLOG_HEAP_INIT_PAGE).
+ *
+ * Block data 0 consists of an array of an xl_multi_insert_tuple struct and
+ * tuple data for each tuple.
*/
typedef struct xl_heap_multi_insert
{
- RelFileNode node;
- BlockNumber blkno;
uint8 flags;
uint16 ntuples;
OffsetNumber offsets[1];
-
- /* TUPLE DATA (xl_multi_insert_tuples) FOLLOW AT END OF STRUCT */
} xl_heap_multi_insert;
#define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets)
#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
-/* This is what we need to know about update|hot_update */
+/*
+ * This is what we need to know about update|hot_update
+ *
+ * Backup blk 0: new page
+ * Backup blk 1: old page, if different. (no data, just a reference to the blk)
+ */
typedef struct xl_heap_update
{
- xl_heaptid target; /* deleted tuple id */
TransactionId old_xmax; /* xmax of the old tuple */
- TransactionId new_xmax; /* xmax of the new tuple */
- ItemPointerData newtid; /* new inserted tuple id */
+ OffsetNumber old_offnum; /* old tuple's offset */
uint8 old_infobits_set; /* infomask bits to set on old tuple */
uint8 flags;
+ TransactionId new_xmax; /* xmax of the new tuple */
+ OffsetNumber new_offnum; /* new tuple's offset */
+ /*
+ * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
+ * set, a xl_heap_header struct and tuple data for the old tuple follows.
+ */
/*
+ * Block 0:
* If XLOG_HEAP_PREFIX_FROM_OLD or XLOG_HEAP_SUFFIX_FROM_OLD flags are
- * set, the prefix and/or suffix come next, as one or two uint16s.
+ * set, the prefix and/or suffix come first, as one or two uint16s.
*
- * After that, xl_heap_header_len and new tuple data follow. The new
- * tuple data and length don't include the prefix and suffix, which are
- * copied from the old tuple on replay. The new tuple data is omitted if
- * a full-page image of the page was taken (unless the
- * XLOG_HEAP_CONTAINS_NEW_TUPLE flag is set, in which case it's included
- * anyway).
+ * After that, xl_heap_header and new tuple data follow. The new tuple
+ * data doesn't include the prefix and suffix, which are copied from the
+ * old tuple on replay.
*
- * If XLOG_HEAP_CONTAINS_OLD_TUPLE or XLOG_HEAP_CONTAINS_OLD_KEY flags are
- * set, another xl_heap_header_len struct and tuple data for the old tuple
- * follows.
+ * If HEAP_CONTAINS_NEW_TUPLE_DATA flag is given, the tuple data is
+ * included even if a full-page image was taken.
*/
} xl_heap_update;
-#define SizeOfHeapUpdate (offsetof(xl_heap_update, flags) + sizeof(uint8))
+#define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
/*
* This is what we need to know about vacuum page cleanup/redirect
*/
typedef struct xl_heap_clean
{
- RelFileNode node;
- BlockNumber block;
TransactionId latestRemovedXid;
uint16 nredirected;
uint16 ndead;
- /* OFFSET NUMBERS FOLLOW */
+ /* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_clean;
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
- xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
+ OffsetNumber offnum; /* locked tuple's offset on page */
int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock;
/* This is what we need to know about locking an updated version of a row */
typedef struct xl_heap_lock_updated
{
- xl_heaptid target;
TransactionId xmax;
+ OffsetNumber offnum;
uint8 infobits_set;
} xl_heap_lock_updated;
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
- xl_heaptid target; /* updated tuple id */
+ OffsetNumber offnum; /* updated tuple's offset on page */
/* TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_inplace;
-#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
+#define SizeOfHeapInplace (offsetof(xl_heap_inplace, offnum) + sizeof(OffsetNumber))
/*
* This struct represents a 'freeze plan', which is what we need to know about
*/
typedef struct xl_heap_freeze_page
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
uint16 ntuples;
- xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
+ xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER]; /* stored in backup block 0 */
} xl_heap_freeze_page;
#define SizeOfHeapFreezePage offsetof(xl_heap_freeze_page, tuples)
-/* This is what we need to know about setting a visibility map bit */
+/*
+ * This is what we need to know about setting a visibility map bit
+ *
+ * Backup blk 0: visibility map buffer
+ * Backup blk 1: heap buffer
+ */
typedef struct xl_heap_visible
{
- RelFileNode node;
- BlockNumber block;
TransactionId cutoff_xid;
} xl_heap_visible;
/*
* Store the relfilenode/ctid pair to facilitate lookups.
*/
- xl_heaptid target;
+ RelFileNode target_node;
+ ItemPointerData target_tid;
} xl_heap_new_cid;
-#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target) + SizeOfHeapTid)
+#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
/* logical rewrite xlog record header */
typedef struct xl_heap_rewrite_mapping
#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from
* FSM */
-/*
- * All that we need to find changed index tuple
- */
-typedef struct xl_btreetid
-{
- RelFileNode node;
- ItemPointerData tid; /* changed tuple id */
-} xl_btreetid;
-
/*
* All that we need to regenerate the meta-data page
*/
*
* This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
* Note that INSERT_META implies it's not a leaf page.
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: child's left sibling, if INSERT_UPPER or INSERT_META
+ * Backup Blk 2: xl_btree_metadata, if INSERT_META
*/
typedef struct xl_btree_insert
{
- xl_btreetid target; /* inserted tuple id */
- /* BlockNumber finishes_split field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
- /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
- /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
+ OffsetNumber offnum;
} xl_btree_insert;
-#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData)
+#define SizeOfBtreeInsert (offsetof(xl_btree_insert, offnum) + sizeof(OffsetNumber))
/*
* On insert with split, we save all the items going into the right sibling
* the root page, and thus that a newroot record rather than an insert or
* split record should follow. Note that a split record never carries a
* metapage update --- we'll do that in the parent-level update.
+ *
+ * Backup Blk 0: original page / new left page
+ * Backup Blk 1: new right page
+ * Backup Blk 2: next block (orig page's rightlink), if any
+ * Backup Blk 3: child's left sibling, if non-leaf split
*/
typedef struct xl_btree_split
{
- RelFileNode node;
- BlockNumber leftsib; /* orig page / new left page */
- BlockNumber rightsib; /* new right page */
- BlockNumber rnext; /* next block (orig page's rightlink) */
uint32 level; /* tree level of page being split */
OffsetNumber firstright; /* first item moved to right page */
+ OffsetNumber newitemoff; /* new item's offset (if placed on left page) */
+} xl_btree_split;
+typedef struct xl_btree_split_left
+{
/*
- * In the _L variants, next are OffsetNumber newitemoff and the new item.
+ * In the _L variants, next is the new item.
* (In the _R variants, the new item is one of the right page's tuples.)
- * The new item, but not newitemoff, is suppressed if XLogInsert chooses
- * to store the left page's whole page image.
*
* If level > 0, an IndexTuple representing the HIKEY of the left page
* follows. We don't need this on leaf pages, because it's the same as
- * the leftmost key in the new right page. Also, it's suppressed if
- * XLogInsert chooses to store the left page's whole page image.
- *
- * If level > 0, BlockNumber of the page whose incomplete-split flag this
- * insertion clears. (not aligned)
- *
+ * the leftmost key in the new right page.
+ */
+} xl_btree_split_left;
+
+typedef struct xl_btree_split_right
+{
+ /*
* Last are the right page's tuples in the form used by _bt_restore_page.
*/
-} xl_btree_split;
+} xl_btree_split_right;
-#define SizeOfBtreeSplit (offsetof(xl_btree_split, firstright) + sizeof(OffsetNumber))
+#define SizeOfBtreeSplit (offsetof(xl_btree_split, newitemoff) + sizeof(OffsetNumber))
/*
* This is what we need to know about delete of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a
* single index page when *not* executed by VACUUM.
+ *
+ * Backup Blk 0: index page
*/
typedef struct xl_btree_delete
{
- RelFileNode node; /* RelFileNode of the index */
- BlockNumber block;
RelFileNode hnode; /* RelFileNode of the heap the index currently
* points at */
int nitems;
*/
typedef struct xl_btree_vacuum
{
- RelFileNode node;
- BlockNumber block;
BlockNumber lastBlockVacuumed;
/* TARGET OFFSET NUMBERS FOLLOW */
* remove this tuple's downlink and the *following* tuple's key). Note that
* the leaf page is empty, so we don't need to store its content --- it is
* just reinitialized during recovery using the rest of the fields.
+ *
+ * Backup Blk 0: leaf block
+ * Backup Blk 1: top parent
*/
typedef struct xl_btree_mark_page_halfdead
{
- xl_btreetid target; /* deleted tuple id in parent page */
+ OffsetNumber poffset; /* deleted tuple id in parent page */
/* information needed to recreate the leaf page: */
BlockNumber leafblk; /* leaf block ultimately being deleted */
* This is what we need to know about deletion of a btree page. Note we do
* not store any content for the deleted page --- it is just rewritten as empty
* during recovery, apart from resetting the btpo.xact.
+ *
+ * Backup Blk 0: target block being deleted
+ * Backup Blk 1: target block's left sibling, if any
+ * Backup Blk 2: target block's right sibling
+ * Backup Blk 3: leaf block (if different from target)
+ * Backup Blk 4: metapage
*/
typedef struct xl_btree_unlink_page
{
- RelFileNode node;
- BlockNumber deadblk; /* target block being deleted */
BlockNumber leftsib; /* target block's left sibling, if any */
BlockNumber rightsib; /* target block's right sibling */
* Information needed to recreate the leaf page, when target is an
* internal page.
*/
- BlockNumber leafblk;
BlockNumber leafleftsib;
BlockNumber leafrightsib;
BlockNumber topparent; /* next child down in the branch */
*
* Note that although this implies rewriting the metadata page, we don't need
* an xl_btree_metadata record --- the rootblk and level are sufficient.
+ *
+ * Backup Blk 0: new root page (2 tuples as payload, if splitting old root)
+ * Backup Blk 1: left child (if splitting an old root)
+ * Backup Blk 2: metapage
*/
typedef struct xl_btree_newroot
{
- RelFileNode node;
- BlockNumber rootblk; /* location of new root */
+ BlockNumber rootblk; /* location of new root (redundant with blk 0) */
uint32 level; /* its tree level */
- /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
} xl_btree_newroot;
#define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, level) + sizeof(uint32))
#include "access/spgist.h"
#include "nodes/tidbitmap.h"
#include "storage/buf.h"
-#include "storage/relfilenode.h"
#include "utils/relcache.h"
/*
* XLOG stuff
- *
- * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof
*/
-#define ACCEPT_RDATA_DATA(p, s, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = (char *) (p); \
- rdata[i].len = (s); \
- rdata[i].buffer = InvalidBuffer; \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-#define ACCEPT_RDATA_BUFFER(b, i) \
- do { \
- Assert((i) < lengthof(rdata)); \
- rdata[i].data = NULL; \
- rdata[i].len = 0; \
- rdata[i].buffer = (b); \
- rdata[i].buffer_std = true; \
- rdata[i].next = NULL; \
- if ((i) > 0) \
- rdata[(i) - 1].next = rdata + (i); \
- } while(0)
-
-
/* XLOG record types for SPGiST */
#define XLOG_SPGIST_CREATE_INDEX 0x00
#define XLOG_SPGIST_ADD_LEAF 0x10
(d).isBuild = (s)->isBuild; \
} while(0)
-
+/*
+ * Backup Blk 0: destination page for leaf tuple
+ * Backup Blk 1: parent page (if any)
+ */
typedef struct spgxlogAddLeaf
{
- RelFileNode node;
-
- BlockNumber blknoLeaf; /* destination page for leaf tuple */
bool newPage; /* init dest page? */
bool storesNulls; /* page is in the nulls tree? */
OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */
OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
- OffsetNumber offnumParent;
+ OffsetNumber offnumParent; /* where the parent downlink is, if any */
uint16 nodeI;
/* new leaf tuple follows (unaligned!) */
} spgxlogAddLeaf;
+/*
+ * Backup Blk 0: source leaf page
+ * Backup Blk 1: destination leaf page
+ * Backup Blk 2: parent page
+ */
typedef struct spgxlogMoveLeafs
{
- RelFileNode node;
-
- BlockNumber blknoSrc; /* source leaf page */
- BlockNumber blknoDst; /* destination leaf page */
uint16 nMoves; /* number of tuples moved from source page */
bool newPage; /* init dest page? */
bool replaceDead; /* are we replacing a DEAD source tuple? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is */
+ /* where the parent downlink is */
OffsetNumber offnumParent;
uint16 nodeI;
* Note: if replaceDead is true then there is only one inserted tuple
* number and only one leaf tuple in the data, because we are not copying
* the dead tuple from the source
- *
- * Buffer references in the rdata array are:
- * Src page
- * Dest page
- * Parent page
*----------
*/
OffsetNumber offsets[1];
#define SizeOfSpgxlogMoveLeafs offsetof(spgxlogMoveLeafs, offsets)
+/*
+ * Backup Blk 0: original page
+ * Backup Blk 1: where new tuple goes, if not same place
+ * Backup Blk 2: where parent downlink is, if updated and different from
+ * the old and new
+ */
typedef struct spgxlogAddNode
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number of original inner tuple */
+ /* Backup blk 0, page containing original inner tuple */
OffsetNumber offnum; /* offset of original inner tuple */
- BlockNumber blknoParent; /* where parent downlink is, if updated */
- OffsetNumber offnumParent;
- uint16 nodeI;
-
- BlockNumber blknoNew; /* where new tuple goes, if not same place */
+ /* Backup blk 1, where new tuple goes, if not same place */
OffsetNumber offnumNew;
bool newPage; /* init new page? */
+ /* Backup Blk 2, where parent downlink is, if updated */
+ char parentBlk; /* 0: parent == original page,
+ * 1: parent == new page,
+ * 2: parent == different page (blk ref 2)
+ * -1: parent not updated*/
+ OffsetNumber offnumParent;
+ uint16 nodeI;
+
spgxlogState stateSrc;
/*
*/
} spgxlogAddNode;
+/*
+ * Backup Blk 0: where the prefix tuple goes
+ * Backup Blk 1: where the postfix tuple goes (if different page)
+ */
typedef struct spgxlogSplitTuple
{
- RelFileNode node;
-
- BlockNumber blknoPrefix; /* where the prefix tuple goes */
+ /* where the prefix tuple goes */
OffsetNumber offnumPrefix;
- BlockNumber blknoPostfix; /* where the postfix tuple goes */
+ /* where the postfix tuple goes */
OffsetNumber offnumPostfix;
- bool newPage; /* need to init that page? */
+ bool newPage; /* need to init that page? */
+ bool postfixBlkSame; /* was postfix tuple put on same page as
+ * prefix? */
/*
* new prefix inner tuple follows, then new postfix inner tuple
*/
} spgxlogSplitTuple;
+/*
+ * Buffer references in the rdata array are:
+ * Backup Blk 0: Src page (only if not root)
+ * Backup Blk 1: Dest page (if used)
+ * Backup Blk 2: Inner page
+ * Backup Blk 3: Parent page (if any, and different from Inner)
+ */
typedef struct spgxlogPickSplit
{
- RelFileNode node;
+ bool isRootSplit;
- BlockNumber blknoSrc; /* original leaf page */
- BlockNumber blknoDest; /* other leaf page, if any */
uint16 nDelete; /* n to delete from Src */
uint16 nInsert; /* n to insert on Src and/or Dest */
bool initSrc; /* re-init the Src page? */
bool initDest; /* re-init the Dest page? */
- BlockNumber blknoInner; /* where to put new inner tuple */
+ /* where to put new inner tuple */
OffsetNumber offnumInner;
bool initInner; /* re-init the Inner page? */
bool storesNulls; /* pages are in the nulls tree? */
- BlockNumber blknoParent; /* where the parent downlink is, if any */
+ /* where the parent downlink is, if any */
+ bool innerIsParent; /* is parent the same as inner page? */
OffsetNumber offnumParent;
uint16 nodeI;
* array of inserted tuple numbers, length nInsert
* array of page selector bytes for inserted tuples, length nInsert
* new inner tuple (unaligned!)
- * list of leaf tuples, length nInsert (unaligned!)
+ * list of leaf tuples, length nInsert (must be maxaligned)
+ * the tuple number and page selector arrays are padded to maxalign
+ * boundaries so that the leaf tuples will be suitably aligned
*
- * Buffer references in the rdata array are:
- * Src page (only if not root and not being init'd)
- * Dest page (if used and not being init'd)
- * Inner page (only if not being init'd)
- * Parent page (if any; could be same as Inner)
*----------
*/
OffsetNumber offsets[1];
typedef struct spgxlogVacuumLeaf
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDead; /* number of tuples to become DEAD */
uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */
uint16 nMove; /* number of tuples to move */
typedef struct spgxlogVacuumRoot
{
/* vacuum a root page when it is also a leaf */
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nDelete; /* number of tuples to delete */
spgxlogState stateSrc;
typedef struct spgxlogVacuumRedirect
{
- RelFileNode node;
-
- BlockNumber blkno; /* block number to clean */
uint16 nToPlaceholder; /* number of redirects to make placeholders */
OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */
TransactionId newestRedirectXid; /* newest XID of removed redirects */
extern CheckpointStatsData CheckpointStats;
-extern XLogRecPtr XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn);
+struct XLogRecData;
+
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
TimeLineID PrevTimeLineID; /* previous TLI we forked off from */
} xl_end_of_recovery;
+/*
+ * The functions in xloginsert.c construct a chain of XLogRecData structs
+ * to represent the final WAL record.
+ */
+typedef struct XLogRecData
+{
+ struct XLogRecData *next; /* next struct in chain, or NULL */
+ char *data; /* start of rmgr data to include */
+ uint32 len; /* length of rmgr data to include */
+} XLogRecData;
+
/*
* Method table for resource managers.
*
#include "storage/relfilenode.h"
/*
- * The rmgr data to be written by XLogInsert() is defined by a chain of
- * one or more XLogRecData structs. (Multiple structs would be used when
- * parts of the source data aren't physically adjacent in memory, or when
- * multiple associated buffers need to be specified.)
- *
- * If buffer is valid then XLOG will check if buffer must be backed up
- * (ie, whether this is first change of that page since last checkpoint).
- * If so, the whole page contents are attached to the XLOG record, and XLOG
- * sets XLR_BKP_BLOCK(N) bit in xl_info. Note that the buffer must be pinned
- * and exclusive-locked by the caller, so that it won't change under us.
- * NB: when the buffer is backed up, we DO NOT insert the data pointed to by
- * this XLogRecData struct into the XLOG record, since we assume it's present
- * in the buffer. Therefore, rmgr redo routines MUST pay attention to
- * XLR_BKP_BLOCK(N) to know what is actually stored in the XLOG record.
- * The N'th XLR_BKP_BLOCK bit corresponds to the N'th distinct buffer
- * value (ignoring InvalidBuffer) appearing in the rdata chain.
- *
- * When buffer is valid, caller must set buffer_std to indicate whether the
- * page uses standard pd_lower/pd_upper header fields. If this is true, then
- * XLOG is allowed to omit the free space between pd_lower and pd_upper from
- * the backed-up page image. Note that even when buffer_std is false, the
- * page MUST have an LSN field as its first eight bytes!
- *
- * Note: data can be NULL to indicate no rmgr data associated with this chain
- * entry. This can be sensible (ie, not a wasted entry) if buffer is valid.
- * The implication is that the buffer has been changed by the operation being
- * logged, and so may need to be backed up, but the change can be redone using
- * only information already present elsewhere in the XLOG entry.
+ * The minimum size of the WAL construction working area. If you need to
+ * register more than XLR_NORMAL_BKP_BLOCKS block references or have more
+ * than XLR_NORMAL_RDATAS data chunks in a single WAL record, you must call
+ * XLogEnsureRecordSpace() first to allocate more working memory.
*/
-typedef struct XLogRecData
-{
- char *data; /* start of rmgr data to include */
- uint32 len; /* length of rmgr data to include */
- Buffer buffer; /* buffer associated with data, if any */
- bool buffer_std; /* buffer has standard pd_lower/pd_upper */
- struct XLogRecData *next; /* next struct in chain, or NULL */
-} XLogRecData;
+#define XLR_NORMAL_BKP_BLOCKS 4
+#define XLR_NORMAL_RDATAS 20
+
+/* flags for XLogRegisterBuffer */
+#define REGBUF_FORCE_IMAGE (1<<0) /* force a full-page image */
+#define REGBUF_WILL_INIT (1<<1) /* page will be re-initialized at replay
+ * (implies NO_IMAGE) */
+#define REGBUF_NO_IMAGE (1<<2) /* don't take a full-page image */
+#define REGBUF_STANDARD (1<<3) /* page follows "standard" page layout,
+ * (data between pd_lower and pd_upper
+ * will be skipped) */
+#define REGBUF_KEEP_DATA (1<<4) /* don't omit data if a full-page image is
+ * taken */
+
+/* prototypes for public functions in xloginsert.c: */
+extern void XLogBeginInsert(void);
+extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
+extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
+extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
+extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
+ ForkNumber forknum, BlockNumber blknum, char *page,
+ uint8 flags);
+extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogResetInsertion(void);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
-extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, char *page, bool page_std);
extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std);
-extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
+
+extern void InitXLogInsert(void);
#endif /* XLOGINSERT_H */
* The overall layout of an XLOG record is:
* Fixed-size header (XLogRecord struct)
* rmgr-specific data
- * BkpBlock
- * backup block data
- * BkpBlock
- * backup block data
+ * XLogRecordBlockData struct
+ * block data
+ * XLogRecordBlockData struct
+ * block data
* ...
*
- * where there can be zero to four backup blocks (as signaled by xl_info flag
- * bits). XLogRecord structs always start on MAXALIGN boundaries in the WAL
- * files, and we round up SizeOfXLogRecord so that the rmgr data is also
- * guaranteed to begin on a MAXALIGN boundary. However, no padding is added
- * to align BkpBlock structs or backup block data.
+ * There can be zero or more XLogRecordBlockDatas, and 0 or more bytes of
+ * rmgr-specific data not associated with a block. XLogRecord structs
+ * always start on MAXALIGN boundaries in the WAL files, and we round up
+ * SizeOfXLogRecord so that the rmgr data is also guaranteed to begin on a
+ * MAXALIGN boundary. The XLogRecordBlockData structs are not aligned, but
+ * the block data after them are.
*
* NOTE: xl_len counts only the rmgr data, not the XLogRecord header,
- * and also not any backup blocks. xl_tot_len counts everything. Neither
+ * and also not any block datas. xl_tot_len counts everything. Neither
* length field is rounded up to an alignment boundary.
*/
typedef struct XLogRecord
#define SizeOfXLogRecord MAXALIGN(sizeof(XLogRecord))
-#define XLogRecGetData(record) ((char*) (record) + SizeOfXLogRecord)
-
/*
* XLOG uses only low 4 bits of xl_info. High 4 bits may be used by rmgr.
*/
#define XLR_INFO_MASK 0x0F
+#define XLR_RMGR_INFO_MASK 0xF0
/*
- * If we backed up any disk blocks with the XLOG record, we use flag bits in
- * xl_info to signal it. We support backup of up to 4 disk blocks per XLOG
- * record.
+ * xl_len == 0 is only allowed if this flag is set. This provides an extra
+ * cross-check when reading records.
*/
-#define XLR_BKP_BLOCK_MASK 0x0F /* all info bits used for bkp blocks */
-#define XLR_MAX_BKP_BLOCKS 4
-#define XLR_BKP_BLOCK(iblk) (0x08 >> (iblk)) /* iblk in 0..3 */
+#define XLR_NO_RMGR_DATA 0x02
/*
- * Header info for a backup block appended to an XLOG record.
+ * Header info for block data appended to an XLOG record.
+ *
+ * Note that we don't attempt to align the XLogRecordBlockData struct!
+ * So, the struct must be copied to aligned local storage before use. The
+ * block data itself is aligned at a MAXALIGN boundary, for the convenience
+ * of redo routines. 'data_length' is the length of the payload data, not
+ * including the XLogRecordBlockData struct or padding.
+ */
+typedef struct XLogRecordBlockData
+{
+ uint8 id; /* block reference ID */
+ uint8 fork_flags; /* fork within the relation, and flags */
+ uint16 data_length; /* number of payload bytes */
+ BlockNumber block; /* block number */
+
+ /*
+ * Relation containing the block. This is omitted if BKPBLOCK_SAME_REL
+ * flag is set!
+ */
+ RelFileNode node;
+
+ /*
+ * BLOCK DATA OR XLogRecordBlockImage struct FOLLOWS AT NEXT MAXALIGN
+ * BOUNDARY
+ */
+} XLogRecordBlockData;
+
+#define SizeOfXLogRecordBlockData (offsetof(XLogRecordBlockData, node) + sizeof(RelFileNode))
+#define SizeOfXLogRecordBlockDataSameRel (offsetof(XLogRecordBlockData, block) + sizeof(BlockNumber))
+
+/*
+ * The fork number fits in the lower 4 bits in the fork_flags field. The upper
+ * bits are used for flags.
+ */
+#define BKPBLOCK_FORK_MASK 0x0F
+#define BKPBLOCK_FLAG_MASK 0xF0
+#define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
+#define BKPBLOCK_HAS_DATA 0x20
+#define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
+#define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
+
+/*
+ * Full-page image data appended to an XLOG record.
*
* As a trivial form of data compression, the XLOG code is aware that
* PG data pages usually contain an unused "hole" in the middle, which
* XLOG record's CRC, either). Hence, the amount of block data actually
* present following the BkpBlock struct is BLCKSZ - hole_length bytes.
*
- * Note that we don't attempt to align either the BkpBlock struct or the
- * block's data. So, the struct must be copied to aligned local storage
- * before use.
+ * Note that we don't attempt to align the block content.
*/
-typedef struct BkpBlock
+typedef struct XLogRecordBlockImage
{
- RelFileNode node; /* relation containing block */
- ForkNumber fork; /* fork within the relation */
- BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */
- /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */
-} BkpBlock;
+ /* ACTUAL BLOCK CONTENT FOLLOWS AT END OF STRUCT */
+} XLogRecordBlockImage;
+
+/*
+ * The number of block references in a WAL record is currently limited by
+ * the fact that we store the block ID as an 8-bit integer.
+ */
+#define XLR_MAX_BKP_BLOCKS 256
+
+
+/* prototypes for XLogRecord decoding functions, in xlogreader.c */
+#define XLogRecGetData(record) ((char *) (record) + SizeOfXLogRecord)
+#define XLogRecHasAnyBlockRefs(record) ((record)->xl_tot_len > SizeOfXLogRecord + (record)->xl_len)
+
+extern bool XLogRecHasBlockRef(XLogRecord *record, uint8 block_id);
+extern bool XLogRecHasBlockImage(XLogRecord *record, uint8 block_id);
+extern uint8 *XLogRecGetBlockRefIds(XLogRecord *record, int *num_refs);
+extern char *XLogRecGetBlockData(XLogRecord *record, uint8 block_id, Size *len);
+extern void XLogRecGetBlockTag(XLogRecord *record, uint8 block_id,
+ RelFileNode *rnode, ForkNumber *forknum,
+ BlockNumber *blknum);
+
+extern XLogRecordBlockData *XLogRecGetBlockRef(XLogRecord *record,
+ uint8 block_id, char **content);
#endif /* XLOGRECORD_H */
} XLogRedoAction;
extern XLogRedoAction XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record,
- int block_index, RelFileNode rnode, BlockNumber blkno,
- Buffer *buf);
+ uint8 buffer_id, Buffer *buf);
extern XLogRedoAction XLogReadBufferForRedoExtended(XLogRecPtr lsn,
- XLogRecord *record, int block_index,
- RelFileNode rnode, ForkNumber forkno,
- BlockNumber blkno,
+ XLogRecord *record, uint8 buffer_id,
ReadBufferMode mode, bool get_cleanup_lock,
Buffer *buf);
-extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, ReadBufferMode mode);
-extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,
- int block_index,
- bool get_cleanup_lock, bool keep_buffer);
-extern Buffer RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb,
- char *blk, bool get_cleanup_lock, bool keep_buffer);
-
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);