46 #include <sys/types.h> 47 #include <sys/socket.h> 52 #include <qb/qbipcc.h> 66 #define SAM_CMAP_S_FAILED "failed" 67 #define SAM_CMAP_S_REGISTERED "stopped" 68 #define SAM_CMAP_S_STARTED "running" 69 #define SAM_CMAP_S_Q_WAIT "waiting for quorum" 71 #define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM)) 72 #define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CMAP)) 73 #define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CMAP))) 147 uint64_t hc_period, last_hc;
153 svalue = ssvalue[
SAM_RP_MASK (sam_internal_data.recovery_policy)];
162 hc_period = sam_internal_data.time_interval;
171 last_hc = cs_timestamp_get();
195 static cs_error_t sam_cmap_destroy_pid_path (
void)
201 err =
cmap_iter_init(sam_internal_data.cmap_handle, sam_internal_data.cmap_pid_path, &iter);
206 while ((err =
cmap_iter_next(sam_internal_data.cmap_handle, iter, key_name, NULL, NULL)) ==
CS_OK) {
207 cmap_delete(sam_internal_data.cmap_handle, key_name);
225 snprintf(sam_internal_data.cmap_pid_path,
CMAP_KEYNAME_MAXLEN,
"resources.process.%d.", getpid());
230 goto destroy_finalize_error;
234 goto destroy_finalize_error;
239 destroy_finalize_error:
240 sam_cmap_destroy_pid_path ();
245 static void quorum_notification_fn (
249 uint32_t view_list_entries,
252 sam_internal_data.quorate =
quorate;
260 uint32_t quorum_type;
277 if ((err =
quorum_initialize (&sam_internal_data.quorum_handle, &quorum_callbacks, &quorum_type)) !=
CS_OK) {
282 goto exit_error_quorum;
285 if ((err =
quorum_fd_get (sam_internal_data.quorum_handle, &sam_internal_data.quorum_fd)) !=
CS_OK) {
286 goto exit_error_quorum;
293 goto exit_error_quorum;
302 sam_internal_data.warn_signal = SIGTERM;
304 sam_internal_data.am_i_child = 0;
306 sam_internal_data.user_data = NULL;
307 sam_internal_data.user_data_size = 0;
308 sam_internal_data.user_data_allocated = 0;
310 pthread_mutex_init (&sam_internal_data.lock, NULL);
323 static size_t sam_safe_write (
329 ssize_t tmp_bytes_write;
334 tmp_bytes_write = write (d, (
const char *)buf + bytes_write,
335 (nbyte - bytes_write > SSIZE_MAX) ? SSIZE_MAX : nbyte - bytes_write);
337 if (tmp_bytes_write == -1) {
338 if (!(errno == EAGAIN || errno == EINTR))
341 bytes_write += tmp_bytes_write;
343 }
while (bytes_write != nbyte);
345 return (bytes_write);
351 static size_t sam_safe_read (
357 ssize_t tmp_bytes_read;
362 tmp_bytes_read = read (d, (
char *)buf + bytes_read,
363 (nbyte - bytes_read > SSIZE_MAX) ? SSIZE_MAX : nbyte - bytes_read);
365 if (tmp_bytes_read == -1) {
366 if (!(errno == EAGAIN || errno == EINTR))
369 bytes_read += tmp_bytes_read;
372 }
while (bytes_read != nbyte && tmp_bytes_read != 0);
383 if (sam_safe_read (sam_internal_data.child_fd_in, &reply, sizeof (reply)) !=
sizeof (reply)) {
392 if (sam_safe_read (sam_internal_data.child_fd_in, &err, sizeof (err)) !=
sizeof (err)) {
424 pthread_mutex_lock (&sam_internal_data.lock);
426 *size = sam_internal_data.user_data_size;
428 pthread_mutex_unlock (&sam_internal_data.lock);
452 pthread_mutex_lock (&sam_internal_data.lock);
454 if (sam_internal_data.user_data_size == 0) {
460 if (size < sam_internal_data.user_data_size) {
466 memcpy (data, sam_internal_data.user_data, sam_internal_data.user_data_size);
468 pthread_mutex_unlock (&sam_internal_data.lock);
473 pthread_mutex_unlock (&sam_internal_data.lock);
498 pthread_mutex_lock (&sam_internal_data.lock);
500 if (sam_internal_data.am_i_child) {
505 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
511 if (sam_safe_write (sam_internal_data.child_fd_out, &size, sizeof (size)) !=
sizeof (size)) {
517 if (data != NULL && sam_safe_write (sam_internal_data.child_fd_out, data, size) != size) {
526 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
535 free (sam_internal_data.user_data);
536 sam_internal_data.user_data = NULL;
537 sam_internal_data.user_data_allocated = 0;
538 sam_internal_data.user_data_size = 0;
540 if (sam_internal_data.user_data_allocated < size) {
541 if ((new_data = realloc (sam_internal_data.user_data, size)) == NULL) {
547 sam_internal_data.user_data_allocated = size;
549 new_data = sam_internal_data.user_data;
551 sam_internal_data.user_data = new_data;
552 sam_internal_data.user_data_size = size;
554 memcpy (sam_internal_data.user_data, data, size);
557 pthread_mutex_unlock (&sam_internal_data.lock);
562 pthread_mutex_unlock (&sam_internal_data.lock);
577 recpol = sam_internal_data.recovery_policy;
580 pthread_mutex_lock (&sam_internal_data.lock);
585 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
587 pthread_mutex_unlock (&sam_internal_data.lock);
597 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
598 pthread_mutex_unlock (&sam_internal_data.lock);
603 pthread_mutex_unlock (&sam_internal_data.lock);
606 if (sam_internal_data.hc_callback)
607 if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) !=
sizeof (command))
627 pthread_mutex_lock (&sam_internal_data.lock);
630 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
632 pthread_mutex_unlock (&sam_internal_data.lock);
642 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
643 pthread_mutex_unlock (&sam_internal_data.lock);
648 pthread_mutex_unlock (&sam_internal_data.lock);
651 if (sam_internal_data.hc_callback)
652 if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, sizeof (command)) !=
sizeof (command))
670 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command))
694 free (sam_internal_data.user_data);
715 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command))
732 pthread_mutex_lock (&sam_internal_data.lock);
734 if (sam_internal_data.am_i_child) {
739 if (sam_safe_write (sam_internal_data.child_fd_out, &command, sizeof (command)) !=
sizeof (command)) {
745 if (sam_safe_write (sam_internal_data.child_fd_out, &warn_signal, sizeof (warn_signal)) !=
746 sizeof (warn_signal)) {
755 if ((err = sam_read_reply (sam_internal_data.child_fd_in)) !=
CS_OK) {
765 pthread_mutex_unlock (&sam_internal_data.lock);
770 pthread_mutex_unlock (&sam_internal_data.lock);
785 if (sam_safe_write (parent_fd_out, &reply,
sizeof (reply)) !=
sizeof (reply)) {
795 if (sam_safe_write (parent_fd_out, &reply,
sizeof (reply)) !=
sizeof (reply)) {
798 if (sam_safe_write (parent_fd_out, &err,
sizeof (err)) !=
sizeof (err)) {
806 static cs_error_t sam_parent_warn_signal_set (
815 if (sam_safe_read (parent_fd_in, &warn_signal,
sizeof (warn_signal)) !=
sizeof (warn_signal)) {
826 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
829 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
832 static cs_error_t sam_parent_wait_for_quorum (
837 struct pollfd pfds[2];
856 while (!sam_internal_data.quorate) {
857 pfds[0].fd = parent_fd_in;
861 pfds[1].fd = sam_internal_data.quorum_fd;
862 pfds[1].events = POLLIN;
865 poll_err = poll (pfds, 2, -1);
867 if (poll_err == -1) {
872 if (errno != EINTR) {
878 if (pfds[0].revents != 0) {
879 if (pfds[0].revents == POLLERR || pfds[0].revents == POLLHUP ||pfds[0].revents == POLLNVAL) {
887 if (pfds[1].revents != 0) {
900 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
907 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
928 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
931 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
941 if (!sam_internal_data.term_send) {
945 kill (child_pid, sam_internal_data.warn_signal);
947 sam_internal_data.term_send = 1;
952 kill (child_pid, SIGKILL);
959 static cs_error_t sam_parent_mark_child_failed (
965 recpol = sam_internal_data.recovery_policy;
967 sam_internal_data.term_send = 1;
972 return (sam_parent_kill_child (action, child_pid));
986 if (sam_safe_read (parent_fd_in, &size,
sizeof (size)) !=
sizeof (size)) {
992 user_data = malloc (size);
993 if (user_data == NULL) {
998 if (sam_safe_read (parent_fd_in, user_data, size) != size) {
1000 goto free_error_reply;
1006 goto free_error_reply;
1011 return (sam_parent_reply_send (
CS_OK, parent_fd_in, parent_fd_out));
1016 return (sam_parent_reply_send (err, parent_fd_in, parent_fd_out));
1030 struct pollfd pfds[2];
1038 recpol = sam_internal_data.recovery_policy;
1041 pfds[0].fd = parent_fd_in;
1042 pfds[0].events = POLLIN;
1043 pfds[0].revents = 0;
1046 if (status == 1 && sam_internal_data.time_interval != 0) {
1047 time_interval = sam_internal_data.time_interval;
1053 pfds[nfds].fd = sam_internal_data.quorum_fd;
1054 pfds[nfds].events = POLLIN;
1055 pfds[nfds].revents = 0;
1059 poll_error = poll (pfds, nfds, time_interval);
1061 if (poll_error == -1) {
1066 if (errno != EINTR) {
1071 if (poll_error == 0) {
1078 sam_parent_kill_child (&action, child_pid);
1082 if (poll_error > 0) {
1083 if (pfds[0].revents != 0) {
1087 bytes_read = sam_safe_read (parent_fd_in, &command, 1);
1089 if (bytes_read == 0) {
1101 if (bytes_read == -1) {
1119 if (recpol & SAM_RECOVERY_POLICY_QUORUM) {
1120 if (sam_parent_wait_for_quorum (parent_fd_in,
1121 parent_fd_out) !=
CS_OK) {
1126 if (recpol & SAM_RECOVERY_POLICY_CMAP) {
1127 if (sam_parent_cmap_state_set (parent_fd_in,
1128 parent_fd_out, 1) !=
CS_OK) {
1141 if (recpol & SAM_RECOVERY_POLICY_CMAP) {
1142 if (sam_parent_cmap_state_set (parent_fd_in,
1143 parent_fd_out, 0) !=
CS_OK) {
1152 sam_parent_data_store (parent_fd_in, parent_fd_out);
1155 sam_parent_warn_signal_set (parent_fd_in, parent_fd_out);
1159 sam_parent_mark_child_failed (&action, child_pid);
1164 if ((sam_internal_data.recovery_policy & SAM_RECOVERY_POLICY_QUORUM) &&
1165 pfds[1].revents != 0) {
1173 sam_parent_kill_child (&action, child_pid);
1189 int pipe_fd_out[2], pipe_fd_in[2];
1198 recpol = sam_internal_data.recovery_policy;
1204 if ((error = sam_cmap_register ()) !=
CS_OK) {
1212 if ((pipe_error = pipe (pipe_fd_out)) != 0) {
1217 if ((pipe_error = pipe (pipe_fd_in)) != 0) {
1218 close (pipe_fd_out[0]);
1219 close (pipe_fd_out[1]);
1225 if (recpol & SAM_RECOVERY_POLICY_CMAP) {
1231 sam_internal_data.instance_id++;
1233 sam_internal_data.term_send = 0;
1241 sam_internal_data.instance_id--;
1251 close (pipe_fd_out[0]);
1252 close (pipe_fd_in[1]);
1254 sam_internal_data.child_fd_out = pipe_fd_out[1];
1255 sam_internal_data.child_fd_in = pipe_fd_in[0];
1258 *instance_id = sam_internal_data.instance_id;
1260 sam_internal_data.am_i_child = 1;
1263 pthread_mutex_init (&sam_internal_data.lock, NULL);
1270 close (pipe_fd_out[1]);
1271 close (pipe_fd_in[0]);
1273 action = sam_parent_handler (pipe_fd_out[0], pipe_fd_in[1], pid);
1275 close (pipe_fd_out[0]);
1276 close (pipe_fd_in[1]);
1286 while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
1289 old_action = action;
1302 if (recpol & SAM_RECOVERY_POLICY_CMAP) {
1309 sam_cmap_destroy_pid_path ();
1313 exit (WEXITSTATUS (child_status));
1324 static void *hc_callback_thread (
void *unused_param)
1328 ssize_t bytes_readed;
1337 time_interval = sam_internal_data.time_interval >> 2;
1340 pfds.fd = sam_internal_data.cb_rpipe_fd;
1341 pfds.events = POLLIN;
1347 tmp_time_interval = -1;
1350 poll_error = poll (&pfds, 1, tmp_time_interval);
1352 if (poll_error == 0) {
1358 if (sam_internal_data.hc_callback () != 0) {
1366 if (poll_error > 0) {
1367 bytes_readed = sam_safe_read (sam_internal_data.cb_rpipe_fd, &command, 1);
1369 if (bytes_readed > 0) {
1383 return (unused_param);
1389 pthread_attr_t thread_attr;
1397 if (sam_internal_data.time_interval == 0) {
1401 if (sam_internal_data.cb_registered) {
1402 sam_internal_data.hc_callback = cb;
1415 pipe_error = pipe (pipe_fd);
1417 if (pipe_error != 0) {
1425 sam_internal_data.cb_rpipe_fd = pipe_fd[0];
1426 sam_internal_data.cb_wpipe_fd = pipe_fd[1];
1431 error = pthread_attr_init (&thread_attr);
1434 goto error_close_fd_exit;
1438 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
1439 pthread_attr_setstacksize (&thread_attr, 32768);
1444 error = pthread_create (&sam_internal_data.cb_thread, &thread_attr, hc_callback_thread, NULL);
1448 goto error_attr_destroy_exit;
1454 pthread_attr_destroy(&thread_attr);
1456 sam_internal_data.cb_registered = 1;
1457 sam_internal_data.hc_callback = cb;
1461 error_attr_destroy_exit:
1462 pthread_attr_destroy(&thread_attr);
1463 error_close_fd_exit:
1464 sam_internal_data.cb_rpipe_fd = sam_internal_data.cb_wpipe_fd = 0;
cs_error_t cmap_set_uint64(cmap_handle_t handle, const char *key_name, uint64_t value)
enum sam_internal_status_t internal_status
#define SAM_RP_MASK_Q(pol)
cs_error_t sam_hc_callback_register(sam_hc_callback_t cb)
Register healtcheck callback.
sam_hc_callback_t hc_callback
#define SAM_CMAP_S_STARTED
cs_error_t quorum_dispatch(quorum_handle_t handle, cs_dispatch_flags_t dispatch_types)
Dispatch messages and configuration changes.
int(* sam_hc_callback_t)(void)
Callback definition for event driven checking.
cs_error_t cmap_iter_next(cmap_handle_t handle, cmap_iter_handle_t iter_handle, char key_name[], size_t *value_len, cmap_value_types_t *type)
Return next item in iterator iter.
cmap_handle_t cmap_handle
cs_error_t cmap_initialize(cmap_handle_t *handle)
Create a new cmap connection.
The quorum_callbacks_t struct.
cs_error_t sam_initialize(int time_interval, sam_recovery_policy_t recovery_policy)
Create a new SAM connection.
quorum_handle_t quorum_handle
#define CMAP_KEYNAME_MAXLEN
cs_error_t cmap_iter_init(cmap_handle_t handle, const char *prefix, cmap_iter_handle_t *cmap_iter_handle)
Initialize iterator with given prefix.
#define SAM_CMAP_S_REGISTERED
cs_error_t sam_finalize(void)
Close the SAM handle.
sam_recovery_policy_t recovery_policy
cs_error_t cmap_iter_finalize(cmap_handle_t handle, cmap_iter_handle_t iter_handle)
Finalize iterator.
cs_error_t sam_register(unsigned int *instance_id)
Register application.
cs_error_t sam_hc_send(void)
Send healthcheck confirmation.
#define SAM_CMAP_S_Q_WAIT
size_t user_data_allocated
cs_error_t sam_mark_failed(void)
Marks child as failed.
char cmap_pid_path[CMAP_KEYNAME_MAXLEN]
cs_error_t
The cs_error_t enum.
cs_error_t sam_data_getsize(size_t *size)
Return size of stored data.
cs_error_t cmap_set_string(cmap_handle_t handle, const char *key_name, const char *value)
cs_error_t sam_stop(void)
Stop healthchecking.
uint64_t quorum_handle_t
quorum_handle_t
cs_error_t sam_data_store(const void *data, size_t size)
Store user data.
cs_error_t sam_warn_signal_set(int warn_signal)
Set warning signal to be sent.
cs_error_t cmap_delete(cmap_handle_t handle, const char *key_name)
Deletes key from cmap database.
cs_error_t sam_data_restore(void *data, size_t size)
Return stored data.
uint64_t cmap_iter_handle_t
cs_error_t quorum_fd_get(quorum_handle_t handle, int *fd)
Get a file descriptor on which to poll.
quorum_notification_fn_t quorum_notify_fn
sam_recovery_policy_t
sam_recovery_policy_t enum
cs_error_t quorum_trackstart(quorum_handle_t handle, unsigned int flags)
Track node and quorum changes.
cs_error_t cmap_finalize(cmap_handle_t handle)
Close the cmap handle.
struct memb_ring_id ring_id
cs_error_t quorum_finalize(quorum_handle_t handle)
Close the quorum handle.
#define SAM_CMAP_S_FAILED
cs_error_t sam_start(void)
Start healthchecking.
#define SAM_RP_MASK_C(pol)
cs_error_t quorum_initialize(quorum_handle_t *handle, quorum_callbacks_t *callbacks, uint32_t *quorum_type)
Create a new quorum connection.