--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * bghinter.c
+ *
+ * The background hinter (bghinter) is new as of Postgres 9.2. It TODO: finish.
+ *
+ * The bghinter is started by the postmaster as soon as the startup subprocess
+ * finishes, or as soon as recovery begins if we are doing archive recovery.
+ * (TODO: Is this the right thing for the hinter to do?)
+ * It remains alive until the postmaster commands it to terminate.
+ * Normal termination is by SIGUSR2, which instructs the bghinter to exit(0).
+ * Emergency termination is by SIGQUIT; like any
+ * backend, the bghinter will simply abort and exit on SIGQUIT.
+ *
+ * If the bghinter exits unexpectedly, the postmaster treats that the same
+ * as a backend crash: shared memory may be corrupted, so remaining backends
+ * should be killed by SIGQUIT and then a recovery cycle started. (Even if
+ * shared memory isn't corrupted, we have lost information about which
+ * files need to be fsync'd for the next checkpoint, and so a system
+ * restart needs to be forced.)
+ *
+ *
+ * Portions Copyright (c) 2011, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/postmaster/bghinter.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "access/xlog_internal.h"
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bghinter.h"
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+#include "storage/smgr.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+
+
+/*
+ * GUC parameters
+ */
+int BgHinterDelay = 200;
+
+/*
+ * Flags set by interrupt handlers for later service in the main loop.
+ */
+static volatile sig_atomic_t bghinter_got_SIGHUP = false;
+static volatile sig_atomic_t bghinter_shutdown_requested = false;
+
+/* Prototypes for private functions */
+
+static void BgHinterNap(void);
+
+/* Signal handlers */
+
+static void bghinter_quickdie(SIGNAL_ARGS);
+static void BgHinterSigHupHandler(SIGNAL_ARGS);
+static void BgHinterShutdownHandler(SIGNAL_ARGS);
+
+/*
+ * Main entry point for bghinter process
+ *
+ * This is invoked from BootstrapMain, which has already created the basic
+ * execution environment, but not enabled signals yet.
+ */
+void
+BackgroundHinterMain(void)
+{
+ sigjmp_buf local_sigjmp_buf;
+ MemoryContext bghinter_context;
+
+ /*
+ * If possible, make this process a group leader, so that the postmaster
+ * can signal any child processes too. (bghinter probably never has any
+ * child processes, but for consistency we make all postmaster child
+ * processes do this.)
+ */
+#ifdef HAVE_SETSID
+ if (setsid() < 0)
+ elog(FATAL, "setsid() failed: %m");
+#endif
+
+ /*
+ * Properly accept or ignore signals the postmaster might send us
+ *
+ * SIGUSR1 is presently unused; keep it spare in case someday we want this
+ * process to participate in ProcSignal signalling.
+ */
+ pqsignal(SIGHUP, BgHinterSigHupHandler); /* set flag to read config file */
+ pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */
+ pqsignal(SIGTERM, BgHinterShutdownHandler); /* shutdown */
+ pqsignal(SIGQUIT, bghinter_quickdie); /* hard crash time */
+ pqsignal(SIGALRM, SIG_IGN);
+ pqsignal(SIGPIPE, SIG_IGN);
+ pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
+ pqsignal(SIGUSR2, SIG_IGN);
+
+ /*
+ * Reset some signals that are accepted by postmaster but not here
+ */
+ pqsignal(SIGCHLD, SIG_DFL);
+ pqsignal(SIGTTIN, SIG_DFL);
+ pqsignal(SIGTTOU, SIG_DFL);
+ pqsignal(SIGCONT, SIG_DFL);
+ pqsignal(SIGWINCH, SIG_DFL);
+
+ /* We allow SIGQUIT (quickdie) at all times */
+ sigdelset(&BlockSig, SIGQUIT);
+
+ /*
+ * Create a resource owner to keep track of our resources (currently only
+ * buffer pins).
+ */
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Background Hinter");
+
+ /*
+ * Create a memory context that we will do all our work in. We do this so
+ * that we can reset the context during error recovery and thereby avoid
+ * possible memory leaks. Formerly this code just ran in
+ * TopMemoryContext, but resetting that would be a really bad idea.
+ */
+ bghinter_context = AllocSetContextCreate(TopMemoryContext,
+ "Background Hinter",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ MemoryContextSwitchTo(bghinter_context);
+
+ /*
+ * If an exception is encountered, processing resumes here.
+ *
+ * See notes in postgres.c about the design of this coding.
+ */
+ if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+ {
+ /* Since not using PG_TRY, must reset error stack by hand */
+ error_context_stack = NULL;
+
+ /* Prevent interrupts while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /* Report the error to the server log */
+ EmitErrorReport();
+
+ /*
+ * These operations are really just a minimal subset of
+ * AbortTransaction(). We don't have very many resources to worry
+ * about in bghinter, but we do have LWLocks, buffers, and temp files.
+ */
+ LWLockReleaseAll();
+ AbortBufferIO();
+ UnlockBuffers();
+ /* buffer pins are released here: */
+ ResourceOwnerRelease(CurrentResourceOwner,
+ RESOURCE_RELEASE_BEFORE_LOCKS,
+ false, true);
+ /* we needn't bother with the other ResourceOwnerRelease phases */
+ AtEOXact_Buffers(false);
+ AtEOXact_Files();
+ AtEOXact_HashTables(false);
+
+ /*
+ * Now return to normal top-level context and clear ErrorContext for
+ * next time.
+ */
+ MemoryContextSwitchTo(bghinter_context);
+ FlushErrorState();
+
+ /* Flush any leaked data in the top-level context */
+ MemoryContextResetAndDeleteChildren(bghinter_context);
+
+ /* Now we can allow interrupts again */
+ RESUME_INTERRUPTS();
+
+ /*
+ * Sleep at least 1 second after any error. A write error is likely
+ * to be repeated, and we don't want to be filling the error logs as
+ * fast as we can.
+ */
+ pg_usleep(1000000L);
+
+ /*
+ * Close all open files after any error. This is helpful on Windows,
+ * where holding deleted files open causes various strange errors.
+ * It's not clear we need it elsewhere, but shouldn't hurt.
+ */
+ smgrcloseall();
+ }
+
+ /* We can now handle ereport(ERROR) */
+ PG_exception_stack = &local_sigjmp_buf;
+
+ /*
+ * Unblock signals (they were blocked when the postmaster forked us)
+ */
+ PG_SETMASK(&UnBlockSig);
+
+ /*
+ * Use the recovery target timeline ID during recovery
+ */
+ if (RecoveryInProgress())
+ ThisTimeLineID = GetRecoveryTargetTLI();
+
+ /*
+ * Loop forever
+ */
+ for (;;)
+ {
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (!PostmasterIsAlive())
+ exit(1);
+
+ if (bghinter_got_SIGHUP)
+ {
+ bghinter_got_SIGHUP = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ /* update global shmem state for sync rep */
+ }
+ if (bghinter_shutdown_requested)
+ {
+ /*
+ * From here on, elog(ERROR) should end with exit(1), not send
+ * control back to the sigsetjmp block above
+ */
+ ExitOnAnyError = true;
+ /* Normal exit from the bghinter is here */
+ proc_exit(0); /* done */
+ }
+
+ /*
+ * Do one cycle of dirty-buffer hinting.
+ */
+ // TODO BgHintBuffers();
+
+ /* Nap for the configured time. */
+ BgHinterNap();
+ }
+}
+
+/*
+ * BgHinterNap -- Nap for the configured time or until a signal is received.
+ */
+static void
+BgHinterNap(void)
+{
+ long udelay;
+
+ /*
+ * On some platforms, signals won't interrupt the sleep. To ensure we
+ * respond reasonably promptly when someone signals us, break down the
+ * sleep into 1-second increments, and check for interrupts after each
+ * nap.
+ */
+ udelay = BgHinterDelay * 1000L;
+
+ while (udelay > 999999L)
+ {
+ if (bghinter_got_SIGHUP || bghinter_shutdown_requested)
+ break;
+ pg_usleep(1000000L);
+ udelay -= 1000000L;
+ }
+
+ if (!(bghinter_got_SIGHUP || bghinter_shutdown_requested))
+ pg_usleep(udelay);
+}
+
+/* --------------------------------
+ * signal handler routines
+ * --------------------------------
+ */
+
+/*
+ * bghinter_quickdie() occurs when signalled SIGQUIT by the postmaster.
+ *
+ * Some backend has bought the farm,
+ * so we need to stop what we're doing and exit.
+ */
+static void
+bghinter_quickdie(SIGNAL_ARGS)
+{
+ PG_SETMASK(&BlockSig);
+
+ /*
+ * We DO NOT want to run proc_exit() callbacks -- we're here because
+ * shared memory may be corrupted, so we don't want to try to clean up our
+ * transaction. Just nail the windows shut and get out of town. Now that
+ * there's an atexit callback to prevent third-party code from breaking
+ * things by calling exit() directly, we have to reset the callbacks
+ * explicitly to make this work as intended.
+ */
+ on_exit_reset();
+
+ /*
+ * Note we do exit(2) not exit(0). This is to force the postmaster into a
+ * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+ * backend. This is necessary precisely because we don't clean up our
+ * shared memory state. (The "dead man switch" mechanism in pmsignal.c
+ * should ensure the postmaster sees this as a crash, too, but no harm in
+ * being doubly sure.)
+ */
+ exit(2);
+}
+
+/* SIGHUP: set flag to re-read config file at next convenient time */
+static void
+BgHinterSigHupHandler(SIGNAL_ARGS)
+{
+ bghinter_got_SIGHUP = true;
+}
+
+/* SIGTERM: set flag to shutdown and exit */
+static void
+BgHinterShutdownHandler(SIGNAL_ARGS)
+{
+ bghinter_shutdown_requested = true;
+}
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
BgWriterPID = 0,
+ BgHinterPID = 0,
CheckpointerPID = 0,
WalWriterPID = 0,
WalReceiverPID = 0,
#define StartupDataBase() StartChildProcess(StartupProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
+#define StartBackgroundHinter() StartChildProcess(BgHinterProcess)
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess)
#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
- * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
+ * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/bghinter.c, postmaster/walwriter.c,
* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
* postmaster/syslogger.c and postmaster/checkpointer.c
*/
{
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
+ if (BgHinterPID == 0)
+ BgHinterPID = StartBackgroundHinter();
if (CheckpointerPID == 0)
CheckpointerPID = StartCheckpointer();
}
signal_child(StartupPID, SIGHUP);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGHUP);
+ if (BgHinterPID != 0)
+ signal_child(BgHinterPID, SIGHUP);
if (CheckpointerPID != 0)
signal_child(CheckpointerPID, SIGHUP);
if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGTERM);
+ if (BgHinterPID != 0)
+ signal_child(BgHinterPID, SIGTERM);
/*
* If we're in recovery, we can't kill the startup process
signal_child(WalReceiverPID, SIGTERM);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGTERM);
+ if (BgHinterPID != 0)
+ signal_child(BgHinterPID, SIGTERM);
if (pmState == PM_RECOVERY)
{
/* only checkpointer is active in this state */
signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGQUIT);
+ if (BgHinterPID != 0)
+ signal_child(BgHinterPID, SIGQUIT);
if (CheckpointerPID != 0)
signal_child(CheckpointerPID, SIGQUIT);
if (WalWriterPID != 0)
*/
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
+ if (BgHinterPID == 0)
+ BgHinterPID = StartBackgroundHinter();
if (CheckpointerPID == 0)
CheckpointerPID = StartCheckpointer();
continue;
}
+ /*
+ * Was it the bghinter?
+ */
+ if (pid == BgHinterPID)
+ {
+ BgHinterPID = 0;
+ if (!EXIT_STATUS_0(exitstatus))
+ HandleChildCrash(pid, exitstatus,
+ _("background hinter process"));
+ continue;
+ }
+
/*
* Was it the checkpointer?
*/
}
/*
- * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
+ * HandleChildCrash -- cleanup after failed backend, bgwriter, bghinter, checkpointer,
* walwriter or autovacuum.
*
* The objectives here are to clean up our local state about the child
signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
+ /* Take care of the bghinter too */
+ if (pid == BgHinterPID)
+ BgHinterPID = 0;
+ else if (BgHinterPID != 0 && !FatalError)
+ {
+ ereport(DEBUG2,
+ (errmsg_internal("sending %s to process %d",
+ (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ (int) BgHinterPID)));
+ signal_child(BgHinterPID, (SendStop ? SIGSTOP : SIGQUIT));
+ }
+
/* Take care of the checkpointer too */
if (pid == CheckpointerPID)
CheckpointerPID = 0;
{
/*
* PM_WAIT_BACKENDS state ends when we have no regular backends
- * (including autovac workers) and no walwriter, autovac launcher
- * or bgwriter. If we are doing crash recovery then we expect the
+ * (including autovac workers) and no walwriter, autovac launcher,
+ * bgwriter or bghinter. If we are doing crash recovery then we expect the
* checkpointer to exit as well, otherwise not.
* The archiver, stats, and syslogger processes
* are disregarded since they are not connected to shared memory; we
StartupPID == 0 &&
WalReceiverPID == 0 &&
BgWriterPID == 0 &&
+ BgHinterPID == 0 &&
(CheckpointerPID == 0 || !FatalError) &&
WalWriterPID == 0 &&
AutoVacPID == 0)
Assert(StartupPID == 0);
Assert(WalReceiverPID == 0);
Assert(BgWriterPID == 0);
+ Assert(BgHinterPID == 0);
Assert(CheckpointerPID == 0);
Assert(WalWriterPID == 0);
Assert(AutoVacPID == 0);
*/
Assert(BgWriterPID == 0);
BgWriterPID = StartBackgroundWriter();
+ Assert(BgHinterPID == 0);
+ BgHinterPID = StartBackgroundHinter();
Assert(CheckpointerPID == 0);
CheckpointerPID = StartCheckpointer();
ereport(LOG,
(errmsg("could not fork background writer process: %m")));
break;
+ case BgHinterProcess:
+ ereport(LOG,
+ (errmsg("could not fork background hinter process: %m")));
+ break;
case CheckpointerProcess:
ereport(LOG,
(errmsg("could not fork checkpointer process: %m")));