pacemaker  1.1.24-3850484742
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #ifndef _GNU_SOURCE
21 # define _GNU_SOURCE
22 #endif
23 
24 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <stdio.h>
27 #include <unistd.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <crm/common/ipc.h>
31 #include <crm/cluster/internal.h>
32 #include <crm/msg_xml.h>
33 #include <crm/stonith-ng.h>
34 
35 #define s_if_plural(i) (((i) == 1)? "" : "s")
36 
37 /* The peer cache remembers cluster nodes that have been seen.
38  * This is managed mostly automatically by libcluster, based on
39  * cluster membership events.
40  *
41  * Because cluster nodes can have conflicting names or UUIDs,
42  * the hash table key is a uniquely generated ID.
43  */
44 GHashTable *crm_peer_cache = NULL;
45 
46 /*
47  * The remote peer cache tracks pacemaker_remote nodes. While the
48  * value has the same type as the peer cache's, it is tracked separately for
49  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
50  * so the name (which is also the UUID) is used as the hash table key; there
51  * is no equivalent of membership events, so management is not automatic; and
52  * most users of the peer cache need to exclude pacemaker_remote nodes.
53  *
54  * That said, using a single cache would be more logical and less error-prone,
55  * so it would be a good idea to merge them one day.
56  *
57  * libcluster provides two avenues for populating the cache:
58  * crm_remote_peer_get(), crm_remote_peer_cache_add() and
59  * crm_remote_peer_cache_remove() directly manage it,
60  * while crm_remote_peer_cache_refresh() populates it via the CIB.
61  */
62 GHashTable *crm_remote_peer_cache = NULL;
63 
64 GHashTable *crm_known_peer_cache = NULL;
65 
66 unsigned long long crm_peer_seq = 0;
67 gboolean crm_have_quorum = FALSE;
68 static gboolean crm_autoreap = TRUE;
69 
70 int
72 {
73  if (crm_remote_peer_cache == NULL) {
74  return 0;
75  }
76  return g_hash_table_size(crm_remote_peer_cache);
77 }
78 
90 crm_node_t *
91 crm_remote_peer_get(const char *node_name)
92 {
93  crm_node_t *node;
94 
95  if (node_name == NULL) {
96  errno = -EINVAL;
97  return NULL;
98  }
99 
100  /* Return existing cache entry if one exists */
101  node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
102  if (node) {
103  return node;
104  }
105 
106  /* Allocate a new entry */
107  node = calloc(1, sizeof(crm_node_t));
108  if (node == NULL) {
109  return NULL;
110  }
111 
112  /* Populate the essential information */
113  node->flags = crm_remote_node;
114  node->uuid = strdup(node_name);
115  if (node->uuid == NULL) {
116  free(node);
117  errno = -ENOMEM;
118  return NULL;
119  }
120 
121  /* Add the new entry to the cache */
122  g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
123  crm_trace("added %s to remote cache", node_name);
124 
125  /* Update the entry's uname, ensuring peer status callbacks are called */
126  crm_update_peer_uname(node, node_name);
127  return node;
128 }
129 
138 void
139 crm_remote_peer_cache_add(const char *node_name)
140 {
141  CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
142 }
143 
144 void
145 crm_remote_peer_cache_remove(const char *node_name)
146 {
147  if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
148  crm_trace("removed %s from remote peer cache", node_name);
149  }
150 }
151 
163 static const char *
164 remote_state_from_cib(xmlNode *node_state)
165 {
166  const char *status;
167 
168  status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
169  if (status && !crm_is_true(status)) {
170  status = CRM_NODE_LOST;
171  } else {
172  status = CRM_NODE_MEMBER;
173  }
174  return status;
175 }
176 
177 /* user data for looping through remote node xpath searches */
178 struct refresh_data {
179  const char *field; /* XML attribute to check for node name */
180  gboolean has_state; /* whether to update node state based on XML */
181 };
182 
190 static void
191 remote_cache_refresh_helper(xmlNode *result, void *user_data)
192 {
193  struct refresh_data *data = user_data;
194  const char *remote = crm_element_value(result, data->field);
195  const char *state = NULL;
196  crm_node_t *node;
197 
198  CRM_CHECK(remote != NULL, return);
199 
200  /* Determine node's state, if the result has it */
201  if (data->has_state) {
202  state = remote_state_from_cib(result);
203  }
204 
205  /* Check whether cache already has entry for node */
206  node = g_hash_table_lookup(crm_remote_peer_cache, remote);
207 
208  if (node == NULL) {
209  /* Node is not in cache, so add a new entry for it */
210  node = crm_remote_peer_get(remote);
211  CRM_ASSERT(node);
212  if (state) {
213  crm_update_peer_state(__FUNCTION__, node, state, 0);
214  }
215 
216  } else if (is_set(node->flags, crm_node_dirty)) {
217  /* Node is in cache and hasn't been updated already, so mark it clean */
219  if (state) {
220  crm_update_peer_state(__FUNCTION__, node, state, 0);
221  }
222  }
223 }
224 
225 static void
226 mark_dirty(gpointer key, gpointer value, gpointer user_data)
227 {
228  set_bit(((crm_node_t*)value)->flags, crm_node_dirty);
229 }
230 
231 static gboolean
232 is_dirty(gpointer key, gpointer value, gpointer user_data)
233 {
234  return is_set(((crm_node_t*)value)->flags, crm_node_dirty);
235 }
236 
237 /* search string to find CIB resources entries for guest nodes */
238 #define XPATH_GUEST_NODE_CONFIG \
239  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
240  "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
241  "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
242 
243 /* search string to find CIB resources entries for remote nodes */
244 #define XPATH_REMOTE_NODE_CONFIG \
245  "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
246  "[@type='remote'][@provider='pacemaker']"
247 
248 /* search string to find CIB node status entries for pacemaker_remote nodes */
249 #define XPATH_REMOTE_NODE_STATUS \
250  "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
251  "[@" XML_NODE_IS_REMOTE "='true']"
252 
258 void
260 {
261  struct refresh_data data;
262 
263  crm_peer_init();
264 
265  /* First, we mark all existing cache entries as dirty,
266  * so that later we can remove any that weren't in the CIB.
267  * We don't empty the cache, because we need to detect changes in state.
268  */
269  g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
270 
271  /* Look for guest nodes and remote nodes in the status section */
272  data.field = "id";
273  data.has_state = TRUE;
275  remote_cache_refresh_helper, &data);
276 
277  /* Look for guest nodes and remote nodes in the configuration section,
278  * because they may have just been added and not have a status entry yet.
279  * In that case, the cached node state will be left NULL, so that the
280  * peer status callback isn't called until we're sure the node started
281  * successfully.
282  */
283  data.field = "value";
284  data.has_state = FALSE;
286  remote_cache_refresh_helper, &data);
287  data.field = "id";
288  data.has_state = FALSE;
290  remote_cache_refresh_helper, &data);
291 
292  /* Remove all old cache entries that weren't seen in the CIB */
293  g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
294 }
295 
296 gboolean
298 {
299  if(node == NULL) {
300  return FALSE;
301  }
302 
303  if (is_set(node->flags, crm_remote_node)) {
304  /* remote nodes are never considered active members. This
305  * guarantees they will never be considered for DC membership.*/
306  return FALSE;
307  }
308 #if SUPPORT_COROSYNC
309  if (is_openais_cluster()) {
310  return crm_is_corosync_peer_active(node);
311  }
312 #endif
313 #if SUPPORT_HEARTBEAT
314  if (is_heartbeat_cluster()) {
315  return crm_is_heartbeat_peer_active(node);
316  }
317 #endif
318  crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
319  return FALSE;
320 }
321 
322 static gboolean
323 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
324 {
325  crm_node_t *node = value;
326  crm_node_t *search = user_data;
327 
328  if (search == NULL) {
329  return FALSE;
330 
331  } else if (search->id && node->id != search->id) {
332  return FALSE;
333 
334  } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) {
335  return FALSE;
336 
337  } else if (crm_is_peer_active(value) == FALSE) {
338  crm_info("Removing node with name %s and id %u from membership cache",
339  (node->uname? node->uname : "unknown"), node->id);
340  return TRUE;
341  }
342  return FALSE;
343 }
344 
353 guint
354 reap_crm_member(uint32_t id, const char *name)
355 {
356  int matches = 0;
357  crm_node_t search;
358 
359  if (crm_peer_cache == NULL) {
360  crm_trace("Membership cache not initialized, ignoring purge request");
361  return 0;
362  }
363 
364  search.id = id;
365  search.uname = name ? strdup(name) : NULL;
366  matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
367  if(matches) {
368  crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
369  matches, s_if_plural(matches), search.id,
370  (search.uname? " and/or uname=" : ""),
371  (search.uname? search.uname : ""));
372 
373  } else {
374  crm_info("No peers with id=%u%s%s to purge from the membership cache",
375  search.id, (search.uname? " and/or uname=" : ""),
376  (search.uname? search.uname : ""));
377  }
378 
379  free(search.uname);
380  return matches;
381 }
382 
383 static void
384 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
385 {
386  guint *count = user_data;
387  crm_node_t *node = value;
388 
389  if (crm_is_peer_active(node)) {
390  *count = *count + 1;
391  }
392 }
393 
394 guint
396 {
397  guint count = 0;
398 
399  if (crm_peer_cache) {
400  g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
401  }
402  return count;
403 }
404 
405 static void
406 destroy_crm_node(gpointer data)
407 {
408  crm_node_t *node = data;
409 
410  crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
411 
412  free(node->addr);
413  free(node->uname);
414  free(node->state);
415  free(node->uuid);
416  free(node->expected);
417  free(node);
418 }
419 
420 void
422 {
423  if (crm_peer_cache == NULL) {
424  crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
425  }
426 
427  if (crm_remote_peer_cache == NULL) {
428  crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
429  }
430 
431  if (crm_known_peer_cache == NULL) {
432  crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
433  }
434 }
435 
436 void
438 {
439  if (crm_peer_cache != NULL) {
440  crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
441  g_hash_table_destroy(crm_peer_cache);
442  crm_peer_cache = NULL;
443  }
444 
445  if (crm_remote_peer_cache != NULL) {
446  crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
447  g_hash_table_destroy(crm_remote_peer_cache);
448  crm_remote_peer_cache = NULL;
449  }
450 
451  if (crm_known_peer_cache != NULL) {
452  crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
453  g_hash_table_destroy(crm_known_peer_cache);
454  crm_known_peer_cache = NULL;
455  }
456 
457 }
458 
459 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
460 
471 void
472 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
473 {
474  crm_status_callback = dispatch;
475 }
476 
488 void
489 crm_set_autoreap(gboolean autoreap)
490 {
491  crm_autoreap = autoreap;
492 }
493 
494 static void crm_dump_peer_hash(int level, const char *caller)
495 {
496  GHashTableIter iter;
497  const char *id = NULL;
498  crm_node_t *node = NULL;
499 
500  g_hash_table_iter_init(&iter, crm_peer_cache);
501  while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
502  do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
503  }
504 }
505 
506 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
507 {
508  if(value == user_data) {
509  return TRUE;
510  }
511  return FALSE;
512 }
513 
514 crm_node_t *
515 crm_find_peer_full(unsigned int id, const char *uname, int flags)
516 {
517  crm_node_t *node = NULL;
518 
519  CRM_ASSERT(id > 0 || uname != NULL);
520 
521  crm_peer_init();
522 
523  if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
524  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
525  }
526 
527  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
528  node = crm_find_peer(id, uname);
529  }
530  return node;
531 }
532 
533 crm_node_t *
534 crm_get_peer_full(unsigned int id, const char *uname, int flags)
535 {
536  crm_node_t *node = NULL;
537 
538  CRM_ASSERT(id > 0 || uname != NULL);
539 
540  crm_peer_init();
541 
542  if (flags & CRM_GET_PEER_REMOTE) {
543  node = g_hash_table_lookup(crm_remote_peer_cache, uname);
544  }
545 
546  if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
547  node = crm_get_peer(id, uname);
548  }
549  return node;
550 }
551 
552 crm_node_t *
553 crm_find_peer(unsigned int id, const char *uname)
554 {
555  GHashTableIter iter;
556  crm_node_t *node = NULL;
557  crm_node_t *by_id = NULL;
558  crm_node_t *by_name = NULL;
559 
560  CRM_ASSERT(id > 0 || uname != NULL);
561 
562  crm_peer_init();
563 
564  if (uname != NULL) {
565  g_hash_table_iter_init(&iter, crm_peer_cache);
566  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
567  if(node->uname && strcasecmp(node->uname, uname) == 0) {
568  crm_trace("Name match: %s = %p", node->uname, node);
569  by_name = node;
570  break;
571  }
572  }
573  }
574 
575  if (id > 0) {
576  g_hash_table_iter_init(&iter, crm_peer_cache);
577  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
578  if(node->id == id) {
579  crm_trace("ID match: %u = %p", node->id, node);
580  by_id = node;
581  break;
582  }
583  }
584  }
585 
586  node = by_id; /* Good default */
587  if(by_id == by_name) {
588  /* Nothing to do if they match (both NULL counts) */
589  crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
590 
591  } else if(by_id == NULL && by_name) {
592  crm_trace("Only one: %p for %u/%s", by_name, id, uname);
593 
594  if(id && by_name->id) {
595  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
596  crm_crit("Node %u and %u share the same name '%s'",
597  id, by_name->id, uname);
598  node = NULL; /* Create a new one */
599 
600  } else {
601  node = by_name;
602  }
603 
604  } else if(by_name == NULL && by_id) {
605  crm_trace("Only one: %p for %u/%s", by_id, id, uname);
606 
607  if(uname && by_id->uname) {
608  crm_dump_peer_hash(LOG_WARNING, __FUNCTION__);
609  crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
610  uname, by_id->uname, id, uname);
611  }
612 
613  } else if(uname && by_id->uname) {
614  if(safe_str_eq(uname, by_id->uname)) {
615  crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
616  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
617 
618  } else {
619  crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
620  crm_dump_peer_hash(LOG_INFO, __FUNCTION__);
621  crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE);
622  }
623 
624  } else if(id && by_name->id) {
625  crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
626 
627  } else {
628  /* Simple merge */
629 
630  /* Only corosync based clusters use nodeid's
631  *
632  * The functions that call crm_update_peer_state() only know nodeid
633  * so 'by_id' is authorative when merging
634  *
635  * Same for crm_update_peer_proc()
636  */
637  crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__);
638 
639  crm_info("Merging %p into %p", by_name, by_id);
640  g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
641  }
642 
643  return node;
644 }
645 
646 #if SUPPORT_COROSYNC
647 static guint
648 crm_remove_conflicting_peer(crm_node_t *node)
649 {
650  int matches = 0;
651  GHashTableIter iter;
652  crm_node_t *existing_node = NULL;
653 
654  if (node->id == 0 || node->uname == NULL) {
655  return 0;
656  }
657 
658 # if !SUPPORT_PLUGIN
659  if (corosync_cmap_has_config("nodelist") != 0) {
660  return 0;
661  }
662 # endif
663 
664  g_hash_table_iter_init(&iter, crm_peer_cache);
665  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
666  if (existing_node->id > 0
667  && existing_node->id != node->id
668  && existing_node->uname != NULL
669  && strcasecmp(existing_node->uname, node->uname) == 0) {
670 
671  if (crm_is_peer_active(existing_node)) {
672  continue;
673  }
674 
675  crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
676  existing_node->id, existing_node->uname, node->id);
677 
678  g_hash_table_iter_remove(&iter);
679  matches++;
680  }
681  }
682 
683  return matches;
684 }
685 #endif
686 
687 /* coverity[-alloc] Memory is referenced in one or both hashtables */
688 crm_node_t *
689 crm_get_peer(unsigned int id, const char *uname)
690 {
691  crm_node_t *node = NULL;
692  char *uname_lookup = NULL;
693 
694  CRM_ASSERT(id > 0 || uname != NULL);
695 
696  crm_peer_init();
697 
698  node = crm_find_peer(id, uname);
699 
700  /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
701  * we need to do a lookup of the node name using the id in the cluster membership. */
702  if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
703  uname_lookup = get_node_name(id);
704  }
705 
706  if (uname_lookup) {
707  uname = uname_lookup;
708  crm_trace("Inferred a name of '%s' for node %u", uname, id);
709 
710  /* try to turn up the node one more time now that we know the uname. */
711  if (node == NULL) {
712  node = crm_find_peer(id, uname);
713  }
714  }
715 
716 
717  if (node == NULL) {
718  char *uniqueid = crm_generate_uuid();
719 
720  node = calloc(1, sizeof(crm_node_t));
721  CRM_ASSERT(node);
722 
723  crm_info("Created entry %s/%p for node %s/%u (%d total)",
724  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
725  g_hash_table_replace(crm_peer_cache, uniqueid, node);
726  }
727 
728  if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
729  crm_info("Node %u is now known as %s", id, uname);
730  }
731 
732  if(id > 0 && node->id == 0) {
733  node->id = id;
734  }
735 
736  if (uname && (node->uname == NULL)) {
737  crm_update_peer_uname(node, uname);
738  }
739 
740  if(node->uuid == NULL) {
741  const char *uuid = crm_peer_uuid(node);
742 
743  if (uuid) {
744  crm_info("Node %u has uuid %s", id, uuid);
745 
746  } else {
747  crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
748  }
749  }
750 
751  free(uname_lookup);
752 
753  return node;
754 }
755 
767 crm_node_t *
768 crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes,
769  uint32_t children, const char *uuid, const char *uname, const char *addr,
770  const char *state)
771 {
772 #if SUPPORT_PLUGIN
773  gboolean addr_changed = FALSE;
774  gboolean votes_changed = FALSE;
775 #endif
776  crm_node_t *node = NULL;
777 
778  id = get_corosync_id(id, uuid);
779  node = crm_get_peer(id, uname);
780 
781  CRM_ASSERT(node != NULL);
782 
783  if (node->uuid == NULL) {
784  if (is_openais_cluster()) {
785  /* Yes, overrule whatever was passed in */
786  crm_peer_uuid(node);
787 
788  } else if (uuid != NULL) {
789  node->uuid = strdup(uuid);
790  }
791  }
792 
793  if (children > 0) {
794  if (crm_update_peer_proc(source, node, children, state) == NULL) {
795  return NULL;
796  }
797  }
798 
799  if (state != NULL) {
800  if (crm_update_peer_state(source, node, state, seen) == NULL) {
801  return NULL;
802  }
803  }
804 #if SUPPORT_HEARTBEAT
805  if (born != 0) {
806  node->born = born;
807  }
808 #endif
809 
810 #if SUPPORT_PLUGIN
811  /* These were only used by the plugin */
812  if (born != 0) {
813  node->born = born;
814  }
815 
816  if (votes > 0 && node->votes != votes) {
817  votes_changed = TRUE;
818  node->votes = votes;
819  }
820 
821  if (addr != NULL) {
822  if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) {
823  addr_changed = TRUE;
824  free(node->addr);
825  node->addr = strdup(addr);
826  }
827  }
828  if (addr_changed || votes_changed) {
829  crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T
830  " proc=%.32x", source, node->uname, node->id, node->state,
831  node->addr, addr_changed ? " (new)" : "", node->votes,
832  votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes);
833  }
834 #endif
835 
836  return node;
837 }
838 
850 void
852 {
853  CRM_CHECK(uname != NULL,
854  crm_err("Bug: can't update node name without name"); return);
855  CRM_CHECK(node != NULL,
856  crm_err("Bug: can't update node name to %s without node", uname);
857  return);
858 
859  if (safe_str_eq(uname, node->uname)) {
860  crm_debug("Node uname '%s' did not change", uname);
861  return;
862  }
863 
864  for (const char *c = uname; *c; ++c) {
865  if ((*c >= 'A') && (*c <= 'Z')) {
866  crm_warn("Node names with capitals are discouraged, consider changing '%s'",
867  uname);
868  break;
869  }
870  }
871 
872  free(node->uname);
873  node->uname = strdup(uname);
874  CRM_ASSERT(node->uname != NULL);
875 
876  if (crm_status_callback) {
878  }
879 
880 #if SUPPORT_COROSYNC
881  if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) {
882  crm_remove_conflicting_peer(node);
883  }
884 #endif
885 }
886 
903 crm_node_t *
904 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
905 {
906  uint32_t last = 0;
907  gboolean changed = FALSE;
908 
909  CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
910  source, peer2text(flag), status); return NULL);
911 
912  /* Pacemaker doesn't spawn processes on remote nodes */
913  if (is_set(node->flags, crm_remote_node)) {
914  return node;
915  }
916 
917  last = node->processes;
918  if (status == NULL) {
919  node->processes = flag;
920  if (node->processes != last) {
921  changed = TRUE;
922  }
923 
924  } else if (safe_str_eq(status, ONLINESTATUS)) {
925  if ((node->processes & flag) != flag) {
926  set_bit(node->processes, flag);
927  changed = TRUE;
928  }
929 #if SUPPORT_PLUGIN
930  } else if (safe_str_eq(status, CRM_NODE_MEMBER)) {
931  if (flag > 0 && node->processes != flag) {
932  node->processes = flag;
933  changed = TRUE;
934  }
935 #endif
936 
937  } else if (node->processes & flag) {
938  clear_bit(node->processes, flag);
939  changed = TRUE;
940  }
941 
942  if (changed) {
943  if (status == NULL && flag <= crm_proc_none) {
944  crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
945  node->id);
946  } else {
947  crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
948  peer2text(flag), status);
949  }
950 
951  /* Call the client callback first, then update the peer state,
952  * in case the node will be reaped
953  */
954  if (crm_status_callback) {
956  }
957 
958  /* The client callback shouldn't touch the peer caches,
959  * but as a safety net, bail if the peer cache was destroyed.
960  */
961  if (crm_peer_cache == NULL) {
962  return NULL;
963  }
964 
965  if (crm_autoreap) {
966  node = crm_update_peer_state(__FUNCTION__, node,
967  is_set(node->processes, crm_get_cluster_proc())?
969  }
970  } else {
971  crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
972  peer2text(flag), status);
973  }
974  return node;
975 }
976 
977 void
978 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
979 {
980  char *last = NULL;
981  gboolean changed = FALSE;
982 
983  CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
984  return);
985 
986  /* Remote nodes don't participate in joins */
987  if (is_set(node->flags, crm_remote_node)) {
988  return;
989  }
990 
991  last = node->expected;
992  if (expected != NULL && safe_str_neq(node->expected, expected)) {
993  node->expected = strdup(expected);
994  changed = TRUE;
995  }
996 
997  if (changed) {
998  crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
999  expected, last);
1000  free(last);
1001  } else {
1002  crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
1003  node->id, expected);
1004  }
1005 }
1006 
1023 static crm_node_t *
1024 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, uint64_t membership, GHashTableIter *iter)
1025 {
1026  gboolean is_member;
1027 
1028  CRM_CHECK(node != NULL,
1029  crm_err("Could not set state for unknown host to %s"
1030  CRM_XS " source=%s", state, source);
1031  return NULL);
1032 
1033  is_member = safe_str_eq(state, CRM_NODE_MEMBER);
1034  if (membership && is_member) {
1035  node->last_seen = membership;
1036  }
1037 
1038  if (state && safe_str_neq(node->state, state)) {
1039  char *last = node->state;
1040  enum crm_status_type status_type = is_set(node->flags, crm_remote_node)?
1042 
1043  node->state = strdup(state);
1044  crm_notice("Node %s state is now %s " CRM_XS
1045  " nodeid=%u previous=%s source=%s", node->uname, state,
1046  node->id, (last? last : "unknown"), source);
1047  if (crm_status_callback) {
1048  crm_status_callback(status_type, node, last);
1049  }
1050  free(last);
1051 
1052  if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) {
1053  /* We only autoreap from the peer cache, not the remote peer cache,
1054  * because the latter should be managed only by
1055  * crm_remote_peer_cache_refresh().
1056  */
1057  if(iter) {
1058  crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1059  g_hash_table_iter_remove(iter);
1060 
1061  } else {
1062  reap_crm_member(node->id, node->uname);
1063  }
1064  node = NULL;
1065  }
1066 
1067  } else {
1068  crm_trace("Node %s state is unchanged (%s) " CRM_XS
1069  " nodeid=%u source=%s", node->uname, state, node->id, source);
1070  }
1071  return node;
1072 }
1073 
1089 crm_node_t *
1090 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, uint64_t membership)
1091 {
1092  return crm_update_peer_state_iter(source, node, state, membership, NULL);
1093 }
1094 
1101 void
1102 crm_reap_unseen_nodes(uint64_t membership)
1103 {
1104  GHashTableIter iter;
1105  crm_node_t *node = NULL;
1106 
1107  crm_trace("Reaping unseen nodes...");
1108  g_hash_table_iter_init(&iter, crm_peer_cache);
1109  while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1110  if (node->last_seen != membership) {
1111  if (node->state) {
1112  /*
1113  * Calling crm_update_peer_state_iter() allows us to
1114  * remove the node from crm_peer_cache without
1115  * invalidating our iterator
1116  */
1117  crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter);
1118 
1119  } else {
1120  crm_info("State of node %s[%u] is still unknown",
1121  node->uname, node->id);
1122  }
1123  }
1124  }
1125 }
1126 
1127 int
1128 crm_terminate_member(int nodeid, const char *uname, void *unused)
1129 {
1130  /* Always use the synchronous, non-mainloop version */
1131  return stonith_api_kick(nodeid, uname, 120, TRUE);
1132 }
1133 
1134 int
1135 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1136 {
1137  return stonith_api_kick(nodeid, uname, 120, TRUE);
1138 }
1139 
1140 static crm_node_t *
1141 crm_find_known_peer(const char *id, const char *uname)
1142 {
1143  GHashTableIter iter;
1144  crm_node_t *node = NULL;
1145  crm_node_t *by_id = NULL;
1146  crm_node_t *by_name = NULL;
1147 
1148  if (uname) {
1149  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1150  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1151  if (node->uname && strcasecmp(node->uname, uname) == 0) {
1152  crm_trace("Name match: %s = %p", node->uname, node);
1153  by_name = node;
1154  break;
1155  }
1156  }
1157  }
1158 
1159  if (id) {
1160  g_hash_table_iter_init(&iter, crm_known_peer_cache);
1161  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1162  if(strcasecmp(node->uuid, id) == 0) {
1163  crm_trace("ID match: %s= %p", id, node);
1164  by_id = node;
1165  break;
1166  }
1167  }
1168  }
1169 
1170  node = by_id; /* Good default */
1171  if (by_id == by_name) {
1172  /* Nothing to do if they match (both NULL counts) */
1173  crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1174 
1175  } else if (by_id == NULL && by_name) {
1176  crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1177 
1178  if (id) {
1179  node = NULL;
1180 
1181  } else {
1182  node = by_name;
1183  }
1184 
1185  } else if (by_name == NULL && by_id) {
1186  crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1187 
1188  if (uname) {
1189  node = NULL;
1190  }
1191 
1192  } else if (uname && by_id->uname
1193  && safe_str_eq(uname, by_id->uname)) {
1194  /* Multiple nodes have the same uname in the CIB.
1195  * Return by_id. */
1196 
1197  } else if (id && by_name->uuid
1198  && safe_str_eq(id, by_name->uuid)) {
1199  /* Multiple nodes have the same id in the CIB.
1200  * Return by_name. */
1201  node = by_name;
1202 
1203  } else {
1204  node = NULL;
1205  }
1206 
1207  if (node == NULL) {
1208  crm_debug("Couldn't find node%s%s%s%s",
1209  id? " " : "",
1210  id? id : "",
1211  uname? " with name " : "",
1212  uname? uname : "");
1213  }
1214 
1215  return node;
1216 }
1217 
1218 static void
1219 known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1220 {
1221  const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1222  const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1223  crm_node_t * node = NULL;
1224 
1225  CRM_CHECK(id != NULL && uname !=NULL, return);
1226  node = crm_find_known_peer(id, uname);
1227 
1228  if (node == NULL) {
1229  char *uniqueid = crm_generate_uuid();
1230 
1231  node = calloc(1, sizeof(crm_node_t));
1232  CRM_ASSERT(node != NULL);
1233 
1234  node->uname = strdup(uname);
1235  CRM_ASSERT(node->uname != NULL);
1236 
1237  node->uuid = strdup(id);
1238  CRM_ASSERT(node->uuid != NULL);
1239 
1240  g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
1241 
1242  } else if (is_set(node->flags, crm_node_dirty)) {
1243  if (safe_str_neq(uname, node->uname)) {
1244  free(node->uname);
1245  node->uname = strdup(uname);
1246  CRM_ASSERT(node->uname != NULL);
1247  }
1248 
1249  /* Node is in cache and hasn't been updated already, so mark it clean */
1250  clear_bit(node->flags, crm_node_dirty);
1251  }
1252 
1253 }
1254 
1255 #define XPATH_MEMBER_NODE_CONFIG \
1256  "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
1257  "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
1258 
1259 static void
1260 crm_known_peer_cache_refresh(xmlNode *cib)
1261 {
1262  crm_peer_init();
1263 
1264  g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
1265 
1267  known_peer_cache_refresh_helper, NULL);
1268 
1269  /* Remove all old cache entries that weren't seen in the CIB */
1270  g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
1271 }
1272 
1273 void
1275 {
1277  crm_known_peer_cache_refresh(cib);
1278 }
1279 
1280 crm_node_t *
1281 crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
1282 {
1283  crm_node_t *node = NULL;
1284  char *id_str = NULL;
1285 
1286  CRM_ASSERT(id > 0 || uname != NULL);
1287 
1288  node = crm_find_peer_full(id, uname, flags);
1289 
1290  if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1291  return node;
1292  }
1293 
1294  if (id > 0) {
1295  id_str = crm_strdup_printf("%u", id);
1296  }
1297 
1298  node = crm_find_known_peer(id_str, uname);
1299 
1300  free(id_str);
1301  return node;
1302 }
uint32_t votes
Definition: internal.h:78
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:190
void crm_remote_peer_cache_add(const char *node_name)
Add a node to the remote peer cache.
Definition: membership.c:139
void crm_reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1102
#define crm_notice(fmt, args...)
Definition: logging.h:276
#define CRM_NODE_LOST
Definition: cluster.h:43
#define XPATH_REMOTE_NODE_STATUS
Definition: membership.c:249
GHashTable * crm_peer_cache
Definition: membership.c:44
gboolean is_openais_cluster(void)
Definition: cluster.c:630
#define crm_crit(fmt, args...)
Definition: logging.h:273
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:182
char * crm_generate_uuid(void)
Definition: utils.c:1279
uint64_t flags
Definition: cluster.h:76
void crm_peer_destroy(void)
Definition: membership.c:437
uint32_t id
Definition: cluster.h:73
gboolean is_heartbeat_cluster(void)
Definition: cluster.c:645
uint64_t born
Definition: cluster.h:74
char * uuid
Definition: cluster.h:83
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:2313
crm_node_t * crm_find_peer(unsigned int id, const char *uname)
Definition: membership.c:553
int get_corosync_id(int id, const char *uuid)
Definition: cluster.c:96
crm_node_t * crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:1281
gboolean crm_have_quorum
Definition: membership.c:67
crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:515
GHashTable * crm_remote_peer_cache
Definition: membership.c:62
char * addr
Definition: cluster.h:87
#define clear_bit(word, bit)
Definition: crm_internal.h:211
unsigned long long crm_peer_seq
Definition: membership.c:66
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:489
void crm_peer_caches_refresh(xmlNode *cib)
Definition: membership.c:1274
void crm_peer_init(void)
Definition: membership.c:421
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:145
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: corosync.c:510
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node&#39;s state and membership information.
Definition: membership.c:1090
char uname[MAX_NAME]
Definition: internal.h:81
int crm_remote_peer_cache_size(void)
Definition: membership.c:71
#define crm_warn(fmt, args...)
Definition: logging.h:275
#define set_bit(word, bit)
Definition: crm_internal.h:210
uint32_t processes
Definition: cluster.h:79
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Definition: membership.c:534
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:768
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:354
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:297
uint32_t id
Definition: internal.h:76
#define crm_debug(fmt, args...)
Definition: logging.h:279
#define XML_ATTR_ID
Definition: msg_xml.h:102
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:393
#define XPATH_GUEST_NODE_CONFIG
Definition: membership.c:238
crm_status_type
Definition: cluster.h:198
void crm_update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:978
#define crm_trace(fmt, args...)
Definition: logging.h:280
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
guint crm_strcase_hash(gconstpointer v)
Definition: strings.c:394
#define XML_ATTR_UNAME
Definition: msg_xml.h:130
int corosync_cmap_has_config(const char *prefix)
Definition: corosync.c:690
#define CRM_NODE_MEMBER
Definition: cluster.h:44
void crm_update_peer_uname(crm_node_t *node, const char *uname)
Definition: membership.c:851
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:245
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:472
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:468
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1128
char * expected
Definition: cluster.h:85
void(* crm_status_callback)(enum crm_status_type, crm_node_t *, const void *)
Definition: membership.c:459
#define CRM_XS
Definition: logging.h:42
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:259
guint crm_active_peers(void)
Definition: membership.c:395
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:91
#define crm_err(fmt, args...)
Definition: logging.h:274
Fencing aka. STONITH.
#define uint32_t
Definition: stdint.in.h:158
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1135
#define CRM_ASSERT(expr)
Definition: error.h:20
char data[0]
Definition: internal.h:86
char * state
Definition: cluster.h:84
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:179
#define U64T
Definition: config.h:765
Wrappers for and extensions to libqb IPC.
GHashTable * crm_known_peer_cache
Definition: membership.c:64
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:904
int32_t votes
Definition: cluster.h:78
char * uname
Definition: cluster.h:82
uint64_t last_seen
Definition: cluster.h:75
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:274
gboolean crm_is_true(const char *s)
Definition: strings.c:197
#define safe_str_eq(a, b)
Definition: util.h:74
#define ONLINESTATUS
Definition: util.h:53
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:647
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:689
#define XPATH_MEMBER_NODE_CONFIG
Definition: membership.c:1255
#define XPATH_REMOTE_NODE_CONFIG
Definition: membership.c:244
#define crm_info(fmt, args...)
Definition: logging.h:277
const char * crm_peer_uuid(crm_node_t *node)
Definition: cluster.c:135
uint64_t flags
Definition: remote.c:156
#define s_if_plural(i)
Definition: membership.c:35
#define int32_t
Definition: stdint.in.h:157
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:513