2 CTDB mutex helper using Ceph librados locks
4 Copyright (C) David Disseldorp 2016
6 Based on ctdb_mutex_fcntl_helper.c, which is:
7 Copyright (C) Martin Schwenke 2015
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 #include "rados/librados.h"
29 #define CTDB_MUTEX_CEPH_LOCK_NAME "ctdb_reclock_mutex"
30 #define CTDB_MUTEX_CEPH_LOCK_COOKIE CTDB_MUTEX_CEPH_LOCK_NAME
31 #define CTDB_MUTEX_CEPH_LOCK_DESC "CTDB recovery lock"
33 #define CTDB_MUTEX_STATUS_HOLDING "0"
34 #define CTDB_MUTEX_STATUS_CONTENDED "1"
35 #define CTDB_MUTEX_STATUS_TIMEOUT "2"
36 #define CTDB_MUTEX_STATUS_ERROR "3"
38 static char *progname = NULL;
40 static int ctdb_mutex_rados_ctx_create(const char *ceph_cluster_name,
41 const char *ceph_auth_name,
42 const char *pool_name,
43 rados_t *_ceph_cluster,
44 rados_ioctx_t *_ioctx)
46 rados_t ceph_cluster = NULL;
47 rados_ioctx_t ioctx = NULL;
50 ret = rados_create2(&ceph_cluster, ceph_cluster_name, ceph_auth_name, 0);
52 fprintf(stderr, "%s: failed to initialise Ceph cluster %s as %s"
53 " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
58 /* path=NULL tells librados to use default locations */
59 ret = rados_conf_read_file(ceph_cluster, NULL);
61 fprintf(stderr, "%s: failed to parse Ceph cluster config"
62 " - (%s)\n", progname, strerror(-ret));
63 rados_shutdown(ceph_cluster);
67 ret = rados_connect(ceph_cluster);
69 fprintf(stderr, "%s: failed to connect to Ceph cluster %s as %s"
70 " - (%s)\n", progname, ceph_cluster_name, ceph_auth_name,
72 rados_shutdown(ceph_cluster);
77 ret = rados_ioctx_create(ceph_cluster, pool_name, &ioctx);
79 fprintf(stderr, "%s: failed to create Ceph ioctx for pool %s"
80 " - (%s)\n", progname, pool_name, strerror(-ret));
81 rados_shutdown(ceph_cluster);
85 *_ceph_cluster = ceph_cluster;
91 static void ctdb_mutex_rados_ctx_destroy(rados_t ceph_cluster,
94 rados_ioctx_destroy(ioctx);
95 rados_shutdown(ceph_cluster);
98 static int ctdb_mutex_rados_lock(rados_ioctx_t *ioctx,
103 ret = rados_lock_exclusive(ioctx, oid,
104 CTDB_MUTEX_CEPH_LOCK_NAME,
105 CTDB_MUTEX_CEPH_LOCK_COOKIE,
106 CTDB_MUTEX_CEPH_LOCK_DESC,
107 NULL, /* infinite duration */
109 if ((ret == -EEXIST) || (ret == -EBUSY)) {
110 /* lock contention */
112 } else if (ret < 0) {
113 /* unexpected failure */
115 "%s: Failed to get lock on RADOS object '%s' - (%s)\n",
116 progname, oid, strerror(-ret));
124 static int ctdb_mutex_rados_unlock(rados_ioctx_t *ioctx,
129 ret = rados_unlock(ioctx, oid,
130 CTDB_MUTEX_CEPH_LOCK_NAME,
131 CTDB_MUTEX_CEPH_LOCK_COOKIE);
134 "%s: Failed to drop lock on RADOS object '%s' - (%s)\n",
135 progname, oid, strerror(-ret));
142 struct ctdb_mutex_rados_state {
144 const char *ceph_cluster_name;
145 const char *ceph_auth_name;
146 const char *pool_name;
149 struct tevent_context *ev;
150 struct tevent_signal *sig_ev;
151 struct tevent_timer *timer_ev;
152 rados_t ceph_cluster;
156 static void ctdb_mutex_rados_sigterm_cb(struct tevent_context *ev,
157 struct tevent_signal *se,
163 struct ctdb_mutex_rados_state *cmr_state = private_data;
166 if (!cmr_state->holding_mutex) {
167 fprintf(stderr, "Sigterm callback invoked without mutex!\n");
169 goto err_ctx_cleanup;
172 ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
174 ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
176 talloc_free(cmr_state);
180 static void ctdb_mutex_rados_timer_cb(struct tevent_context *ev,
181 struct tevent_timer *te,
182 struct timeval current_time,
185 struct ctdb_mutex_rados_state *cmr_state = private_data;
188 if (!cmr_state->holding_mutex) {
189 fprintf(stderr, "Timer callback invoked without mutex!\n");
191 goto err_ctx_cleanup;
194 if ((kill(cmr_state->ppid, 0) == 0) || (errno != ESRCH)) {
195 /* parent still around, keep waiting */
196 cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
197 tevent_timeval_current_ofs(5, 0),
198 ctdb_mutex_rados_timer_cb,
200 if (cmr_state->timer_ev == NULL) {
201 fprintf(stderr, "Failed to create timer event\n");
202 /* rely on signal cb */
207 /* parent ended, drop lock and exit */
208 ret = ctdb_mutex_rados_unlock(cmr_state->ioctx, cmr_state->object);
210 ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
212 talloc_free(cmr_state);
216 int main(int argc, char *argv[])
219 struct ctdb_mutex_rados_state *cmr_state;
224 fprintf(stderr, "Usage: %s <Ceph Cluster> <Ceph user> "
225 "<RADOS pool> <RADOS object>\n",
231 ret = setvbuf(stdout, NULL, _IONBF, 0);
233 fprintf(stderr, "Failed to configure unbuffered stdout I/O\n");
236 cmr_state = talloc_zero(NULL, struct ctdb_mutex_rados_state);
237 if (cmr_state == NULL) {
238 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
243 cmr_state->ceph_cluster_name = argv[1];
244 cmr_state->ceph_auth_name = argv[2];
245 cmr_state->pool_name = argv[3];
246 cmr_state->object = argv[4];
248 cmr_state->ppid = getppid();
249 if (cmr_state->ppid == 1) {
251 * The original parent is gone and the process has
252 * been reparented to init. This can happen if the
253 * helper is started just as the parent is killed
254 * during shutdown. The error message doesn't need to
255 * be stellar, since there won't be anything around to
256 * capture and log it...
258 fprintf(stderr, "%s: PPID == 1\n", progname);
263 cmr_state->ev = tevent_context_init(cmr_state);
264 if (cmr_state->ev == NULL) {
265 fprintf(stderr, "tevent_context_init failed\n");
266 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
271 /* wait for sigterm */
272 cmr_state->sig_ev = tevent_add_signal(cmr_state->ev, cmr_state, SIGTERM, 0,
273 ctdb_mutex_rados_sigterm_cb,
275 if (cmr_state->sig_ev == NULL) {
276 fprintf(stderr, "Failed to create signal event\n");
277 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
282 /* periodically check parent */
283 cmr_state->timer_ev = tevent_add_timer(cmr_state->ev, cmr_state,
284 tevent_timeval_current_ofs(5, 0),
285 ctdb_mutex_rados_timer_cb,
287 if (cmr_state->timer_ev == NULL) {
288 fprintf(stderr, "Failed to create timer event\n");
289 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
294 ret = ctdb_mutex_rados_ctx_create(cmr_state->ceph_cluster_name,
295 cmr_state->ceph_auth_name,
296 cmr_state->pool_name,
297 &cmr_state->ceph_cluster,
300 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
304 ret = ctdb_mutex_rados_lock(cmr_state->ioctx, cmr_state->object);
305 if ((ret == -EEXIST) || (ret == -EBUSY)) {
306 fprintf(stdout, CTDB_MUTEX_STATUS_CONTENDED);
307 goto err_ctx_cleanup;
308 } else if (ret < 0) {
309 fprintf(stdout, CTDB_MUTEX_STATUS_ERROR);
310 goto err_ctx_cleanup;
313 cmr_state->holding_mutex = true;
314 fprintf(stdout, CTDB_MUTEX_STATUS_HOLDING);
316 /* wait for the signal / timer events to do their work */
317 ret = tevent_loop_wait(cmr_state->ev);
319 goto err_ctx_cleanup;
322 ctdb_mutex_rados_ctx_destroy(cmr_state->ceph_cluster,
325 talloc_free(cmr_state);