326a0b025196b036904cc8f6629777e934edb60d
[amitay/samba.git] / ctdb / utils / ceph / ctdb_mutex_ceph_rados_helper.c
1 /*
2    CTDB mutex helper using Ceph librados locks
3
4    Copyright (C) David Disseldorp 2016
5
6    Based on ctdb_mutex_fcntl_helper.c, which is:
7    Copyright (C) Martin Schwenke 2015
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include "replace.h"
24
25 #include "tevent.h"
26 #include "talloc.h"
27 #include "rados/librados.h"
28
29 #define CTDB_MUTEX_CEPH_LOCK_NAME       "ctdb_reclock_mutex"
30 #define CTDB_MUTEX_CEPH_LOCK_COOKIE     CTDB_MUTEX_CEPH_LOCK_NAME
31 #define CTDB_MUTEX_CEPH_LOCK_DESC       "CTDB recovery lock"
32
33 #define CTDB_MUTEX_STATUS_HOLDING "0"
34 #define CTDB_MUTEX_STATUS_CONTENDED "1"
35 #define CTDB_MUTEX_STATUS_TIMEOUT "2"
36 #define CTDB_MUTEX_STATUS_ERROR "3"
37
38 static char *progname = NULL;
39
40 static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
41                                        const char *ceph_auth_name,
42                                        const char *pool_name,
43                                        rados_t *_ceph_cluster,
44                                        rados_ioctx_t *_ioctx)
45 {
46         rados_t ceph_cluster = NULL;
47         rados_ioctx_t ioctx = NULL;
48         int ret;
49
50         ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
51         if (ret < 0) {
52                 fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
53                         " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
54                         strerror(-ret));
55                 return ret;
56         }
57
58         /* path=NULL tells librados to use default locations */
59         ret = rados_conf_read_file(ceph_cluster, NULL);
60         if (ret < 0) {
61                 fprintf(stderr, "%s: failed to parse Ceph cluster config"
62                         " - (%s)\n", progname, strerror(-ret));
63                 rados_shutdown(ceph_cluster);
64                 return ret;
65         }
66
67         ret = rados_connect(ceph_cluster);
68         if (ret < 0) {
69                 fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
70                         " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
71                         strerror(-ret));
72                 rados_shutdown(ceph_cluster);
73                 return ret;
74         }
75
76
77         ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
78         if (ret < 0) {
79                 fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
80                         " - (%s)\n", progname, pool_name, strerror(-ret));
81                 rados_shutdown(ceph_cluster);
82                 return ret;
83         }
84
85         *_ceph_cluster = ceph_cluster;
86         *_ioctx = ioctx;
87
88         return 0;
89 }
90
91 static void ctdb_mutex_rados_ctx_destroy(rados_t ceph_cluster,
92                                          rados_ioctx_t ioctx)
93 {
94         rados_ioctx_destroy(ioctx);
95         rados_shutdown(ceph_cluster);
96 }
97
98 static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
99                                  const char *oid)
100 {
101         int ret;
102
103         ret = rados_lock_exclusive(ioctx, oid,
104                                    CTDB_MUTEX_CEPH_LOCK_NAME,
105                                    CTDB_MUTEX_CEPH_LOCK_COOKIE,
106                                    CTDB_MUTEX_CEPH_LOCK_DESC,
107                                    NULL, /* infinite duration */
108                                    0);
109         if ((ret == -EEXIST) || (ret == -EBUSY)) {
110                 /* lock contention */
111                 return ret;
112         } else if (ret < 0) {
113                 /* unexpected failure */
114                 fprintf(stderr,
115                         "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
116                         progname, oid, strerror(-ret));
117                 return ret;
118         }
119
120         /* lock obtained */
121         return 0;
122 }
123
124 static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
125                                    const char *oid)
126 {
127         int ret;
128
129         ret = rados_unlock(ioctx, oid,
130                            CTDB_MUTEX_CEPH_LOCK_NAME,
131                            CTDB_MUTEX_CEPH_LOCK_COOKIE);
132         if (ret < 0) {
133                 fprintf(stderr,
134                         "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
135                         progname, oid, strerror(-ret));
136                 return ret;
137         }
138
139         return 0;
140 }
141
142 struct ctdb_mutex_rados_state {
143         bool holding_mutex;
144         const char *ceph_cluster_name;
145         const char *ceph_auth_name;
146         const char *pool_name;
147         const char *object;
148         int ppid;
149         struct tevent_context *ev;
150         struct tevent_signal *sig_ev;
151         struct tevent_timer *timer_ev;
152         rados_t ceph_cluster;
153         rados_ioctx_t ioctx;
154 };
155
156 static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
157                                         struct tevent_signal *se,
158                                         int signum,
159                                         int count,
160                                         void *siginfo,
161                                         void *private_data)
162 {
163         struct ctdb_mutex_rados_state *cmr_state = private_data;
164         int ret;
165
166         if (!cmr_state->holding_mutex) {
167                 fprintf(stderr, "Sigterm callback invoked without mutex!\n");
168                 ret = -EINVAL;
169                 goto err_ctx_cleanup;
170         }
171
172         ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
173 err_ctx_cleanup:
174         ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
175                                      cmr_state->ioctx);
176         talloc_free(cmr_state);
177         exit(ret ? 1 : 0);
178 }
179
180 static void ctdb_mutex_rados_timer_cb(struct tevent_context *ev,
181                                       struct tevent_timer *te,
182                                       struct timeval current_time,
183                                       void *private_data)
184 {
185         struct ctdb_mutex_rados_state *cmr_state = private_data;
186         int ret;
187
188         if (!cmr_state->holding_mutex) {
189                 fprintf(stderr, "Timer callback invoked without mutex!\n");
190                 ret = -EINVAL;
191                 goto err_ctx_cleanup;
192         }
193
194         if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
195                 /* parent still around, keep waiting */
196                 cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
197                                                tevent_timeval_current_ofs(5, 0),
198                                                       ctdb_mutex_rados_timer_cb,
199                                                        cmr_state);
200                 if (cmr_state->timer_ev == NULL) {
201                         fprintf(stderr, "Failed to create timer event\n");
202                         /* rely on signal cb */
203                 }
204                 return;
205         }
206
207         /* parent ended, drop lock and exit */
208         ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
209 err_ctx_cleanup:
210         ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
211                                      cmr_state->ioctx);
212         talloc_free(cmr_state);
213         exit(ret ? 1 : 0);
214 }
215
216 int main(int argc, char *argv[])
217 {
218         int ret;
219         struct ctdb_mutex_rados_state *cmr_state;
220
221         progname = argv[0];
222
223         if (argc != 5) {
224                 fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
225                                 "<RADOS pool> <RADOS object>\n",
226                         progname);
227                 ret = -EINVAL;
228                 goto err_out;
229         }
230
231         ret = setvbuf(stdout, NULL, _IONBF, 0);
232         if (ret != 0) {
233                 fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
234         }
235
236         cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
237         if (cmr_state == NULL) {
238                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
239                 ret = -ENOMEM;
240                 goto err_out;
241         }
242
243         cmr_state->ceph_cluster_name = argv[1];
244         cmr_state->ceph_auth_name = argv[2];
245         cmr_state->pool_name = argv[3];
246         cmr_state->object = argv[4];
247
248         cmr_state->ppid = getppid();
249         if (cmr_state->ppid == 1) {
250                 /*
251                  * The original parent is gone and the process has
252                  * been reparented to init.  This can happen if the
253                  * helper is started just as the parent is killed
254                  * during shutdown.  The error message doesn't need to
255                  * be stellar, since there won't be anything around to
256                  * capture and log it...
257                  */
258                 fprintf(stderr, "%s: PPID == 1\n", progname);
259                 ret = -EPIPE;
260                 goto err_state_free;
261         }
262
263         cmr_state->ev = tevent_context_init(cmr_state);
264         if (cmr_state->ev == NULL) {
265                 fprintf(stderr, "tevent_context_init failed\n");
266                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
267                 ret = -ENOMEM;
268                 goto err_state_free;
269         }
270
271         /* wait for sigterm */
272         cmr_state->sig_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
273                                               ctdb_mutex_rados_sigterm_cb,
274                                               cmr_state);
275         if (cmr_state->sig_ev == NULL) {
276                 fprintf(stderr, "Failed to create signal event\n");
277                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
278                 ret = -ENOMEM;
279                 goto err_state_free;
280         }
281
282         /* periodically check parent */
283         cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
284                                                tevent_timeval_current_ofs(5, 0),
285                                                ctdb_mutex_rados_timer_cb,
286                                                cmr_state);
287         if (cmr_state->timer_ev == NULL) {
288                 fprintf(stderr, "Failed to create timer event\n");
289                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
290                 ret = -ENOMEM;
291                 goto err_state_free;
292         }
293
294         ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
295                                           cmr_state->ceph_auth_name,
296                                           cmr_state->pool_name,
297                                           &cmr_state->ceph_cluster,
298                                           &cmr_state->ioctx);
299         if (ret < 0) {
300                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
301                 goto err_state_free;
302         }
303
304         ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object);
305         if ((ret == -EEXIST) || (ret == -EBUSY)) {
306                 fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
307                 goto err_ctx_cleanup;
308         } else if (ret < 0) {
309                 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
310                 goto err_ctx_cleanup;
311         }
312
313         cmr_state->holding_mutex = true;
314         fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
315
316         /* wait for the signal / timer events to do their work */
317         ret = tevent_loop_wait(cmr_state->ev);
318         if (ret < 0) {
319                 goto err_ctx_cleanup;
320         }
321 err_ctx_cleanup:
322         ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
323                                      cmr_state->ioctx);
324 err_state_free:
325         talloc_free(cmr_state);
326 err_out:
327         return ret ? 1 : 0;
328 }