bug 310 - only issue the notify Restart if we actually change configuration
authorSteve Singer <ssinger@ca.afilias.info>
Wed, 14 Aug 2013 21:30:13 +0000 (17:30 -0400)
committerSteve Singer <ssinger@ca.afilias.info>
Wed, 14 Aug 2013 21:30:13 +0000 (17:30 -0400)
The FAILOVER_NODE event is processed in two parts by slon.
First failedNode(..) is called which reconfigures sl_subscribe,
sl_path and then the listen network. Once this is done notify
is used to signal the slon to restart.

Then the slon commits this transactions and waits for any
pending events from the failed origin to arrive from a third
provider. Slon then continues with the failover (failoverSet_int).

The problem was that when slon restarts it repeats the processing
the FAILOVER_NODE. It was possible for slon to keep restarting
before it gets to be caught up and recording the FAILOVER_NODE
event as processed.

In this patch we only restart slon if the configuration actually
is changed. After the first restart the configuration shouldn't
need changing so there should not be anymore restarts.

src/backend/slony1_funcs.sql

index 1519c6f16700071b1f8b0713574fbe1d891c0fb2..6e66c817a060c903a32d3f55a79b0b1f858da0af 100644 (file)
@@ -1208,8 +1208,10 @@ declare
        v_row                           record;
        v_row2                          record;
        v_failed                                        boolean;
+    v_restart_required          boolean;
 begin
        
+       v_restart_required:=false;
        --
        -- any nodes other than the backup receiving
        -- ANY subscription from a failed node
@@ -1219,7 +1221,9 @@ begin
                   where sub_provider=p_failed_node
                   and sub_receiver<>p_backup_node
                   and sub_receiver <> ALL (p_failed_nodes);
-
+       if found then
+          v_restart_required:=true;
+       end if;
        -- ----
        -- Terminate all connections of the failed node the hard way
        -- ----
@@ -1231,22 +1235,32 @@ begin
 
        update @NAMESPACE@.sl_path set pa_conninfo='<event pending>' WHERE
                          pa_server=p_failed_node;
-       
+
+       if found then
+          v_restart_required:=true;
+       end if;
+
        v_failed := exists (select 1 from @NAMESPACE@.sl_node 
                   where no_failed=true and no_id=p_failed_node);
 
-        if not v_failed then
+    if not v_failed then
                
                update @NAMESPACE@.sl_node set no_failed=true where no_id = ANY (p_failed_nodes)
-               and no_failed=false;
+                          and no_failed=false;
+               if found then
+                  v_restart_required:=true;
+               end if;
        end if; 
-       -- Rewrite sl_listen table
-       perform @NAMESPACE@.RebuildListenEntries();        
 
-       -- ----
-       -- Make sure the node daemon will restart
-       -- ----
-       notify "_@CLUSTERNAME@_Restart";
+       if v_restart_required then
+         -- Rewrite sl_listen table
+         perform @NAMESPACE@.RebuildListenEntries();      
+       
+         -- ----
+         -- Make sure the node daemon will restart
+         -- ----
+         notify "_@CLUSTERNAME@_Restart";
+    end if;
 
 
        -- ----