From: Muhammad Usama Date: Wed, 7 Aug 2019 15:22:01 +0000 (+0500) Subject: Fix for no primary on standby pgpool when primary is quarantined on master X-Git-Tag: V4_1_0_BETA1~46 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=3922c12c1f8efbc1b5f2e7def1e0ff921aafb989;p=pgpool2.git Fix for no primary on standby pgpool when primary is quarantined on master Master watchdog Pgpool sends primary_node_id = -1 in the backend status sync message if the primary node is quarantined on it. So standby watchdog Pgpool must not update its primary_node_id if the primary backend node id in sync message is invalid_node_id (-1) while the same sync message reports the backend status of the current primary node as "NOT DOWN". The issue was reported by "Tatsuo Ishii " and fixed by me --- diff --git a/src/main/pgpool_main.c b/src/main/pgpool_main.c index 371aa69cf..1a03927bc 100644 --- a/src/main/pgpool_main.c +++ b/src/main/pgpool_main.c @@ -4151,17 +4151,6 @@ sync_backend_from_watchdog(void) ereport(DEBUG1, (errmsg("primary node on master watchdog node \"%s\" is %d", backendStatus->nodeName, backendStatus->primary_node_id))); - if (Req_info->primary_node_id != backendStatus->primary_node_id) - { - /* Do not produce this log message if we are starting up the Pgpool-II */ - if (processState != INITIALIZING) - ereport(LOG, - (errmsg("primary node:%d on master watchdog node \"%s\" is different from local primary node:%d", - backendStatus->primary_node_id, backendStatus->nodeName, Req_info->primary_node_id))); - - Req_info->primary_node_id = backendStatus->primary_node_id; - primary_changed = true; - } /* * update the local backend status Also remove quarantine flags @@ -4204,6 +4193,34 @@ sync_backend_from_watchdog(void) } } } + + if (Req_info->primary_node_id != backendStatus->primary_node_id) + { + /* Do not produce this log message if we are starting up the Pgpool-II */ + if (processState != INITIALIZING) + ereport(LOG, + (errmsg("primary node:%d on master watchdog node \"%s\" is different from local primary node:%d", + backendStatus->primary_node_id, backendStatus->nodeName, Req_info->primary_node_id))); + /* + * master node returns primary_node_id = -1 when the node primary + * node is in quarantine state on the master. + * So we will not update our primary node id when the status of current primary node + * is not CON_DOWN while primary_node_id sent by master watchdong node is -1 + */ + if (backendStatus->primary_node_id == -1 && BACKEND_INFO(Req_info->primary_node_id).backend_status != CON_DOWN) + { + ereport(LOG, + (errmsg("primary node:%d on master watchdog node \"%s\" seems to be quarantined", + Req_info->primary_node_id, backendStatus->nodeName), + errdetail("keeping the current primary"))); + } + else + { + Req_info->primary_node_id = backendStatus->primary_node_id; + primary_changed = true; + } + } + pfree(backendStatus); if (reload_maste_node_id) diff --git a/src/watchdog/wd_json_data.c b/src/watchdog/wd_json_data.c index 7f3cb6625..4fa8dd77a 100644 --- a/src/watchdog/wd_json_data.c +++ b/src/watchdog/wd_json_data.c @@ -324,7 +324,7 @@ get_backend_node_status_json(WatchdogNode * wdNode) if (backend_status == CON_DOWN && pool_config->backend_desc->backend_info[i].quarantine) { /* - * since quarantine nodes are not cluster wide so send CON_WATI + * since quarantine nodes are not cluster wide so send CON_WAIT * status for quarantine nodes */ backend_status = CON_CONNECT_WAIT;