Fix: 823: Watchdog dies and kills pgpool2 when network gets shortly interrupted.
authorMuhammad Usama <m.usama@gmail.com>
Tue, 11 Jun 2024 06:52:00 +0000 (11:52 +0500)
committerMuhammad Usama <m.usama@gmail.com>
Tue, 11 Jun 2024 06:58:32 +0000 (11:58 +0500)
With network monitoring enabled, a Pgpool node would shut down immediately if it
lost all network interfaces or assigned IP addresses, providing extra protection
by quickly removing a non-communicative node from the cluster.

The issue was that Pgpool responded to network blackout events even when network
monitoring was disabled. This fix ensures that the network monitoring socket is
not opened when network monitoring is not enabled, preventing unnecessary shutdowns.

src/watchdog/watchdog.c

index d9200002303b86a5c3a666089cb43d618642bbeb..e4444742616a763e78c37e80fe8516954cee81e9 100644 (file)
@@ -801,6 +801,7 @@ wd_cluster_initialize(void)
        g_cluster.de_escalation_pid = 0;
        g_cluster.unidentified_socks = NULL;
        g_cluster.command_server_sock = 0;
+       g_cluster.network_monitor_sock = 0;
        g_cluster.notify_clients = NULL;
        g_cluster.ipc_command_socks = NULL;
        g_cluster.wd_timer_commands = NULL;
@@ -1184,8 +1185,8 @@ watchdog_main(void)
        /* open the command server */
        g_cluster.command_server_sock = wd_create_command_server_socket();
 
-       /* try connecting to all watchdog nodes */
-       g_cluster.network_monitor_sock = create_monitoring_socket();
+       if (g_cluster.wdInterfaceToMonitor)
+               g_cluster.network_monitor_sock = create_monitoring_socket();
 
        if (any_interface_available() == false)
        {
@@ -1196,6 +1197,7 @@ watchdog_main(void)
                                 errhint("you can disable interface checking by setting wd_monitoring_interfaces_list = '' in pgpool config")));
        }
 
+       /* try connecting to all watchdog nodes */
        connect_with_all_configured_nodes();
 
        /* set the initial state of local node */
@@ -1403,9 +1405,12 @@ prepare_fds(fd_set *rmask, fd_set *wmask, fd_set *emask)
        if (fd_max < g_cluster.command_server_sock)
                fd_max = g_cluster.command_server_sock;
 
-       FD_SET(g_cluster.network_monitor_sock, rmask);
-       if (fd_max < g_cluster.network_monitor_sock)
-               fd_max = g_cluster.network_monitor_sock;
+       if (g_cluster.network_monitor_sock > 0)
+       {
+               FD_SET(g_cluster.network_monitor_sock, rmask);
+               if (fd_max < g_cluster.network_monitor_sock)
+                       fd_max = g_cluster.network_monitor_sock;
+       }
 
        /*
         * set write fdset for all waiting for connection sockets, while already