ctdb_mutex_ceph_rados_helper: revert strtoull_err() usage
[gd/samba-autobuild/.git] / ctdb / utils / ceph / ctdb_mutex_ceph_rados_helper.c
1 /*
2    CTDB mutex helper using Ceph librados locks
3
4    Copyright (C) David Disseldorp 2016-2018
5
6    Based on ctdb_mutex_fcntl_helper.c, which is:
7    Copyright (C) Martin Schwenke 2015
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "replace.h"
24
25 #include "tevent.h"
26 #include "talloc.h"
27 #include "rados/librados.h"
28
29 #define CTDB_MUTEX_CEPH_LOCK_NAME       "ctdb_reclock_mutex"
30 #define CTDB_MUTEX_CEPH_LOCK_COOKIE     CTDB_MUTEX_CEPH_LOCK_NAME
31 #define CTDB_MUTEX_CEPH_LOCK_DESC       "CTDB recovery lock"
32 /*
33  * During failover it may take up to <lock duration> seconds before the
34  * newly elected recovery master can obtain the lock.
35  */
36 #define CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT      10
37
38 #define CTDB_MUTEX_STATUS_HOLDING "0"
39 #define CTDB_MUTEX_STATUS_CONTENDED "1"
40 #define CTDB_MUTEX_STATUS_TIMEOUT "2"
41 #define CTDB_MUTEX_STATUS_ERROR "3"
42
43 static char *progname = NULL;
44
45 static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
46                                        const char *ceph_auth_name,
47                                        const char *pool_name,
48                                        rados_t *_ceph_cluster,
49                                        rados_ioctx_t *_ioctx)
50 {
51         rados_t ceph_cluster = NULL;
52         rados_ioctx_t ioctx = NULL;
53         int ret;
54
55         ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
56         if (ret < 0) {
57                 fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
58                         " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
59                         strerror(-ret));
60                 return ret;
61         }
62
63         /* path=NULL tells librados to use default locations */
64         ret = rados_conf_read_file(ceph_cluster, NULL);
65         if (ret < 0) {
66                 fprintf(stderr, "%s: failed to parse Ceph cluster config"
67                         " - (%s)\n", progname, strerror(-ret));
68                 rados_shutdown(ceph_cluster);
69                 return ret;
70         }
71
72         ret = rados_connect(ceph_cluster);
73         if (ret < 0) {
74                 fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
75                         " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
76                         strerror(-ret));
77                 rados_shutdown(ceph_cluster);
78                 return ret;
79         }
80
81
82         ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
83         if (ret < 0) {
84                 fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
85                         " - (%s)\n", progname, pool_name, strerror(-ret));
86                 rados_shutdown(ceph_cluster);
87                 return ret;
88         }
89
90         *_ceph_cluster = ceph_cluster;
91         *_ioctx = ioctx;
92
93         return 0;
94 }
95
96 static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
97                                  const char *oid,
98                                  uint64_t lock_duration_s,
99                                  uint8_t flags)
100 {
101         int ret;
102         struct timeval tv = { lock_duration_s, 0 };
103
104         ret = rados_lock_exclusive(ioctx, oid,
105                                    CTDB_MUTEX_CEPH_LOCK_NAME,
106                                    CTDB_MUTEX_CEPH_LOCK_COOKIE,
107                                    CTDB_MUTEX_CEPH_LOCK_DESC,
108                                    lock_duration_s == 0 ? NULL : &tv,
109                                    flags);
110         if ((ret == -EEXIST) || (ret == -EBUSY)) {
111                 /* lock contention */
112                 return ret;
113         } else if (ret < 0) {
114                 /* unexpected failure */
115                 fprintf(stderr,
116                         "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
117                         progname, oid, strerror(-ret));
118                 return ret;
119         }
120
121         /* lock obtained */
122         return 0;
123 }
124
125 static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
126                                    const char *oid)
127 {
128         int ret;
129
130         ret = rados_unlock(ioctx, oid,
131                            CTDB_MUTEX_CEPH_LOCK_NAME,
132                            CTDB_MUTEX_CEPH_LOCK_COOKIE);
133         if (ret < 0) {
134                 fprintf(stderr,
135                         "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
136                         progname, oid, strerror(-ret));
137                 return ret;
138         }
139
140         return 0;
141 }
142
143 struct ctdb_mutex_rados_state {
144         bool holding_mutex;
145         const char *ceph_cluster_name;
146         const char *ceph_auth_name;
147         const char *pool_name;
148         const char *object;
149         uint64_t lock_duration_s;
150         int ppid;
151         struct tevent_context *ev;
152         struct tevent_signal *sigterm_ev;
153         struct tevent_signal *sigint_ev;
154         struct tevent_timer *ppid_timer_ev;
155         struct tevent_timer *renew_timer_ev;
156         rados_t ceph_cluster;
157         rados_ioctx_t ioctx;
158 };
159
160 static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
161                                         struct tevent_signal *se,
162                                         int signum,
163                                         int count,
164                                         void *siginfo,
165                                         void *private_data)
166 {
167         struct ctdb_mutex_rados_state *cmr_state = private_data;
168         int ret = 0;
169
170         if (!cmr_state->holding_mutex) {
171                 fprintf(stderr, "Sigterm callback invoked without mutex!\n");
172                 ret = -EINVAL;
173         }
174
175         talloc_free(cmr_state);
176         exit(ret ? 1 : 0);
177 }
178
179 static void ctdb_mutex_rados_ppid_timer_cb(struct tevent_context *ev,
180                                            struct tevent_timer *te,
181                                            struct timeval current_time,
182                                            void *private_data)
183 {
184         struct ctdb_mutex_rados_state *cmr_state = private_data;
185         int ret = 0;
186
187         if (!cmr_state->holding_mutex) {
188                 fprintf(stderr, "Timer callback invoked without mutex!\n");
189                 ret = -EINVAL;
190                 goto err_ctx_cleanup;
191         }
192
193         if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
194                 /* parent still around, keep waiting */
195                 cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev,
196                                                             cmr_state,
197                                                tevent_timeval_current_ofs(5, 0),
198                                                 ctdb_mutex_rados_ppid_timer_cb,
199                                                             cmr_state);
200                 if (cmr_state->ppid_timer_ev == NULL) {
201                         fprintf(stderr, "Failed to create timer event\n");
202                         /* rely on signal cb */
203                 }
204                 return;
205         }
206
207         /* parent ended, drop lock (via destructor) and exit */
208 err_ctx_cleanup:
209         talloc_free(cmr_state);
210         exit(ret ? 1 : 0);
211 }
212
213 #define USECS_IN_SEC 1000000
214
215 static void ctdb_mutex_rados_lock_renew_timer_cb(struct tevent_context *ev,
216                                                  struct tevent_timer *te,
217                                                  struct timeval current_time,
218                                                  void *private_data)
219 {
220         struct ctdb_mutex_rados_state *cmr_state = private_data;
221         struct timeval tv;
222         int ret;
223
224         ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
225                                     cmr_state->lock_duration_s,
226                                     LIBRADOS_LOCK_FLAG_RENEW);
227         if (ret == -EBUSY) {
228                 /* should never get -EEXIST on renewal */
229                 fprintf(stderr, "Lock contention during renew: %d\n", ret);
230                 goto err_ctx_cleanup;
231         } else if (ret < 0) {
232                 fprintf(stderr, "Lock renew failed\n");
233                 goto err_ctx_cleanup;
234         }
235
236         tv = tevent_timeval_current_ofs(0,
237                             cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
238         cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
239                                                        cmr_state,
240                                                        tv,
241                                         ctdb_mutex_rados_lock_renew_timer_cb,
242                                                        cmr_state);
243         if (cmr_state->renew_timer_ev == NULL) {
244                 fprintf(stderr, "Failed to create timer event\n");
245                 goto err_ctx_cleanup;
246         }
247
248         return;
249
250 err_ctx_cleanup:
251         /* drop lock (via destructor) and exit */
252         talloc_free(cmr_state);
253         exit(1);
254 }
255
256 static int ctdb_mutex_rados_state_destroy(struct ctdb_mutex_rados_state *cmr_state)
257 {
258         if (cmr_state->holding_mutex) {
259                 ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
260         }
261         if (cmr_state->ioctx != NULL) {
262                 rados_ioctx_destroy(cmr_state->ioctx);
263         }
264         if (cmr_state->ceph_cluster != NULL) {
265                 rados_shutdown(cmr_state->ceph_cluster);
266         }
267         return 0;
268 }
269
270 int main(int argc, char *argv[])
271 {
272         int ret;
273         struct ctdb_mutex_rados_state *cmr_state;
274
275         progname = argv[0];
276
277         if ((argc != 5) && (argc != 6)) {
278                 fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
279                                 "<RADOS pool> <RADOS object> "
280                                 "[lock duration secs]\n",
281                         progname);
282                 ret = -EINVAL;
283                 goto err_out;
284         }
285
286         ret = setvbuf(stdout, NULL, _IONBF, 0);
287         if (ret != 0) {
288                 fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
289         }
290
291         cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
292         if (cmr_state == NULL) {
293                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
294                 ret = -ENOMEM;
295                 goto err_out;
296         }
297
298         talloc_set_destructor(cmr_state, ctdb_mutex_rados_state_destroy);
299         cmr_state->ceph_cluster_name = argv[1];
300         cmr_state->ceph_auth_name = argv[2];
301         cmr_state->pool_name = argv[3];
302         cmr_state->object = argv[4];
303         if (argc == 6) {
304                 /* optional lock duration provided */
305                 char *endptr = NULL;
306                 cmr_state->lock_duration_s = strtoull(argv[5], &endptr, 0);
307                 if ((endptr == argv[5]) || (*endptr != '\0')) {
308                         fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
309                         ret = -EINVAL;
310                         goto err_ctx_cleanup;
311                 }
312         } else {
313                 cmr_state->lock_duration_s
314                         = CTDB_MUTEX_CEPH_LOCK_DURATION_SECS_DEFAULT;
315         }
316
317         cmr_state->ppid = getppid();
318         if (cmr_state->ppid == 1) {
319                 /*
320                  * The original parent is gone and the process has
321                  * been reparented to init.  This can happen if the
322                  * helper is started just as the parent is killed
323                  * during shutdown.  The error message doesn't need to
324                  * be stellar, since there won't be anything around to
325                  * capture and log it...
326                  */
327                 fprintf(stderr, "%s: PPID == 1\n", progname);
328                 ret = -EPIPE;
329                 goto err_ctx_cleanup;
330         }
331
332         cmr_state->ev = tevent_context_init(cmr_state);
333         if (cmr_state->ev == NULL) {
334                 fprintf(stderr, "tevent_context_init failed\n");
335                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
336                 ret = -ENOMEM;
337                 goto err_ctx_cleanup;
338         }
339
340         /* wait for sigterm */
341         cmr_state->sigterm_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
342                                               ctdb_mutex_rados_sigterm_cb,
343                                               cmr_state);
344         if (cmr_state->sigterm_ev == NULL) {
345                 fprintf(stderr, "Failed to create term signal event\n");
346                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
347                 ret = -ENOMEM;
348                 goto err_ctx_cleanup;
349         }
350
351         cmr_state->sigint_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGINT, 0,
352                                               ctdb_mutex_rados_sigterm_cb,
353                                               cmr_state);
354         if (cmr_state->sigint_ev == NULL) {
355                 fprintf(stderr, "Failed to create int signal event\n");
356                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
357                 ret = -ENOMEM;
358                 goto err_ctx_cleanup;
359         }
360
361         /* periodically check parent */
362         cmr_state->ppid_timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
363                                                tevent_timeval_current_ofs(5, 0),
364                                                ctdb_mutex_rados_ppid_timer_cb,
365                                                cmr_state);
366         if (cmr_state->ppid_timer_ev == NULL) {
367                 fprintf(stderr, "Failed to create timer event\n");
368                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
369                 ret = -ENOMEM;
370                 goto err_ctx_cleanup;
371         }
372
373         ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
374                                           cmr_state->ceph_auth_name,
375                                           cmr_state->pool_name,
376                                           &cmr_state->ceph_cluster,
377                                           &cmr_state->ioctx);
378         if (ret < 0) {
379                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
380                 goto err_ctx_cleanup;
381         }
382
383         ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object,
384                                     cmr_state->lock_duration_s,
385                                     0);
386         if ((ret == -EEXIST) || (ret == -EBUSY)) {
387                 fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
388                 goto err_ctx_cleanup;
389         } else if (ret < 0) {
390                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
391                 goto err_ctx_cleanup;
392         }
393         cmr_state->holding_mutex = true;
394
395         if (cmr_state->lock_duration_s != 0) {
396                 /*
397                  * renew (reobtain) the lock, using a period of half the lock
398                  * duration. Convert to usecs to avoid rounding.
399                  */
400                 struct timeval tv = tevent_timeval_current_ofs(0,
401                                cmr_state->lock_duration_s * (USECS_IN_SEC / 2));
402                 cmr_state->renew_timer_ev = tevent_add_timer(cmr_state->ev,
403                                                                cmr_state,
404                                                                tv,
405                                         ctdb_mutex_rados_lock_renew_timer_cb,
406                                                                cmr_state);
407                 if (cmr_state->renew_timer_ev == NULL) {
408                         fprintf(stderr, "Failed to create timer event\n");
409                         fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
410                         ret = -ENOMEM;
411                         goto err_ctx_cleanup;
412                 }
413         }
414
415         fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
416
417         /* wait for the signal / timer events to do their work */
418         ret = tevent_loop_wait(cmr_state->ev);
419         if (ret < 0) {
420                 goto err_ctx_cleanup;
421         }
422 err_ctx_cleanup:
423         talloc_free(cmr_state);
424 err_out:
425         return ret ? 1 : 0;
426 }