s3:ctdb_conn: add ctdbd_conn_get_fd() to get the fd out of the ctdb connection
[amitay/samba.git] / source3 / lib / g_lock.c
1 /*
2    Unix SMB/CIFS implementation.
3    global locks based on dbwrap and messaging
4    Copyright (C) 2009 by Volker Lendecke
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "includes.h"
21 #include "g_lock.h"
22
23 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
24                                     struct server_id pid);
25
26 struct g_lock_ctx {
27         struct db_context *db;
28         struct messaging_context *msg;
29 };
30
31 /*
32  * The "g_lock.tdb" file contains records, indexed by the 0-terminated
33  * lockname. The record contains an array of "struct g_lock_rec"
34  * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
35  */
36
37 struct g_lock_rec {
38         enum g_lock_type lock_type;
39         struct server_id pid;
40 };
41
42 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
43                                    struct messaging_context *msg)
44 {
45         struct g_lock_ctx *result;
46
47         result = talloc(mem_ctx, struct g_lock_ctx);
48         if (result == NULL) {
49                 return NULL;
50         }
51         result->msg = msg;
52
53         result->db = db_open(result, lock_path("g_lock.tdb"), 0,
54                              TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
55         if (result->db == NULL) {
56                 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
57                 TALLOC_FREE(result);
58                 return NULL;
59         }
60         return result;
61 }
62
63 static bool g_lock_conflicts(enum g_lock_type lock_type,
64                              const struct g_lock_rec *rec)
65 {
66         enum g_lock_type rec_lock = rec->lock_type;
67
68         if ((rec_lock & G_LOCK_PENDING) != 0) {
69                 return false;
70         }
71
72         /*
73          * Only tested write locks so far. Very likely this routine
74          * needs to be fixed for read locks....
75          */
76         if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
77                 return false;
78         }
79         return true;
80 }
81
82 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
83                          int *pnum_locks, struct g_lock_rec **plocks)
84 {
85         int i, num_locks;
86         struct g_lock_rec *locks;
87
88         if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
89                 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
90                 return false;
91         }
92
93         num_locks = data.dsize / sizeof(struct g_lock_rec);
94         locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
95         if (locks == NULL) {
96                 DEBUG(1, ("talloc failed\n"));
97                 return false;
98         }
99
100         memcpy(locks, data.dptr, data.dsize);
101
102         DEBUG(10, ("locks:\n"));
103         for (i=0; i<num_locks; i++) {
104                 DEBUGADD(10, ("%s: %s %s\n",
105                               procid_str(talloc_tos(), &locks[i].pid),
106                               ((locks[i].lock_type & 1) == G_LOCK_READ) ?
107                               "read" : "write",
108                               (locks[i].lock_type & G_LOCK_PENDING) ?
109                               "(pending)" : "(owner)"));
110
111                 if (process_exists(locks[i].pid)) {
112                         continue;
113                 }
114                 DEBUGADD(10, ("%s does not exist -- discarding\n",
115                               procid_str(talloc_tos(), &locks[i].pid)));
116
117                 if (i < (num_locks-1)) {
118                         locks[i] = locks[num_locks-1];
119                 }
120                 num_locks -= 1;
121         }
122
123         *plocks = locks;
124         *pnum_locks = num_locks;
125         return true;
126 }
127
128 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
129                                         struct g_lock_rec *locks,
130                                         int num_locks,
131                                         const struct server_id pid,
132                                         enum g_lock_type lock_type)
133 {
134         struct g_lock_rec *result;
135
136         result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
137                                 num_locks+1);
138         if (result == NULL) {
139                 return NULL;
140         }
141
142         result[num_locks].pid = pid;
143         result[num_locks].lock_type = lock_type;
144         return result;
145 }
146
147 static void g_lock_got_retry(struct messaging_context *msg,
148                              void *private_data,
149                              uint32_t msg_type,
150                              struct server_id server_id,
151                              DATA_BLOB *data);
152 static void g_lock_timedout(struct tevent_context *ev,
153                             struct tevent_timer *te,
154                             struct timeval current_time,
155                             void *private_data);
156
157 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
158                                enum g_lock_type lock_type)
159 {
160         struct db_record *rec = NULL;
161         struct g_lock_rec *locks = NULL;
162         int i, num_locks;
163         struct server_id self;
164         int our_index;
165         TDB_DATA data;
166         NTSTATUS status = NT_STATUS_OK;
167         NTSTATUS store_status;
168
169 again:
170         rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
171                                     string_term_tdb_data(name));
172         if (rec == NULL) {
173                 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
174                 status = NT_STATUS_LOCK_NOT_GRANTED;
175                 goto done;
176         }
177
178         if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
179                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
180                 status = NT_STATUS_INTERNAL_ERROR;
181                 goto done;
182         }
183
184         self = procid_self();
185         our_index = -1;
186
187         for (i=0; i<num_locks; i++) {
188                 if (procid_equal(&self, &locks[i].pid)) {
189                         if (our_index != -1) {
190                                 DEBUG(1, ("g_lock_trylock: Added ourself "
191                                           "twice!\n"));
192                                 status = NT_STATUS_INTERNAL_ERROR;
193                                 goto done;
194                         }
195                         if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
196                                 DEBUG(1, ("g_lock_trylock: Found ourself not "
197                                           "pending!\n"));
198                                 status = NT_STATUS_INTERNAL_ERROR;
199                                 goto done;
200                         }
201
202                         our_index = i;
203
204                         /* never conflict with ourself */
205                         continue;
206                 }
207                 if (g_lock_conflicts(lock_type, &locks[i])) {
208                         struct server_id pid = locks[i].pid;
209
210                         if (!process_exists(pid)) {
211                                 TALLOC_FREE(locks);
212                                 TALLOC_FREE(rec);
213                                 status = g_lock_force_unlock(ctx, name, pid);
214                                 if (!NT_STATUS_IS_OK(status)) {
215                                         DEBUG(1, ("Could not unlock dead lock "
216                                                   "holder!\n"));
217                                         goto done;
218                                 }
219                                 goto again;
220                         }
221                         lock_type |= G_LOCK_PENDING;
222                 }
223         }
224
225         if (our_index == -1) {
226                 /* First round, add ourself */
227
228                 locks = g_lock_addrec(talloc_tos(), locks, num_locks,
229                                       self, lock_type);
230                 if (locks == NULL) {
231                         DEBUG(10, ("g_lock_addrec failed\n"));
232                         status = NT_STATUS_NO_MEMORY;
233                         goto done;
234                 }
235         } else {
236                 /*
237                  * Retry. We were pending last time. Overwrite the
238                  * stored lock_type with what we calculated, we might
239                  * have acquired the lock this time.
240                  */
241                 locks[our_index].lock_type = lock_type;
242         }
243
244         data = make_tdb_data((uint8_t *)locks, talloc_get_size(locks));
245         store_status = rec->store(rec, data, 0);
246         if (!NT_STATUS_IS_OK(store_status)) {
247                 DEBUG(1, ("rec->store failed: %s\n",
248                           nt_errstr(store_status)));
249                 status = store_status;
250         }
251
252 done:
253         TALLOC_FREE(locks);
254         TALLOC_FREE(rec);
255
256         if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
257                 return STATUS_PENDING;
258         }
259
260         return NT_STATUS_OK;
261 }
262
263 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
264                      enum g_lock_type lock_type, struct timeval timeout)
265 {
266         struct tevent_timer *te = NULL;
267         NTSTATUS status;
268         bool retry = false;
269         bool timedout = false;
270
271         DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
272                    name));
273
274         if (lock_type & ~1) {
275                 DEBUG(1, ("Got invalid lock type %d for %s\n",
276                           (int)lock_type, name));
277                 return NT_STATUS_INVALID_PARAMETER;
278         }
279
280 #ifdef CLUSTER_SUPPORT
281         if (lp_clustering()) {
282                 status = ctdb_watch_us(messaging_ctdbd_connection());
283                 if (!NT_STATUS_IS_OK(status)) {
284                         DEBUG(10, ("could not register retry with ctdb: %s\n",
285                                    nt_errstr(status)));
286                         goto done;
287                 }
288         }
289 #endif
290
291         status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
292                                     g_lock_got_retry);
293         if (!NT_STATUS_IS_OK(status)) {
294                 DEBUG(10, ("messaging_register failed: %s\n",
295                            nt_errstr(status)));
296                 return status;
297         }
298 again:
299         retry = false;
300
301         status = g_lock_trylock(ctx, name, lock_type);
302         if (NT_STATUS_IS_OK(status)) {
303                 DEBUG(10, ("Got lock %s\n", name));
304                 goto done;
305         }
306         if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
307                 DEBUG(10, ("g_lock_trylock failed: %s\n",
308                            nt_errstr(status)));
309                 goto done;
310         }
311
312         DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
313
314         if (te == NULL) {
315                 te = tevent_add_timer(
316                         ctx->msg->event_ctx, talloc_tos(),
317                         timeval_current_ofs(timeout.tv_sec, timeout.tv_usec),
318                         g_lock_timedout, &timedout);
319                 if (te == NULL) {
320                         DEBUG(10, ("tevent_add_timer failed\n"));
321                         status = NT_STATUS_NO_MEMORY;
322                         goto done;
323                 }
324         }
325
326         while (true) {
327                 if (tevent_loop_once(ctx->msg->event_ctx) == -1) {
328                         DEBUG(1, ("tevent_loop_once failed\n"));
329                         status = NT_STATUS_INTERNAL_ERROR;
330                         goto done;
331                 }
332                 if (retry) {
333                         goto again;
334                 }
335                 if (timedout) {
336                         DEBUG(10, ("g_lock_lock timed out\n"));
337
338                         te = NULL;
339
340                         status = NT_STATUS_LOCK_NOT_GRANTED;
341                         goto done;
342                 }
343         }
344 done:
345
346         if (!NT_STATUS_IS_OK(status)) {
347                 NTSTATUS unlock_status;
348
349                 unlock_status = g_lock_unlock(ctx, name);
350
351                 if (!NT_STATUS_IS_OK(unlock_status)) {
352                         DEBUG(1, ("Could not remove ourself from the locking "
353                                   "db: %s\n", nt_errstr(status)));
354                 }
355         }
356
357         messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
358         TALLOC_FREE(te);
359
360         return status;
361 }
362
363 static void g_lock_got_retry(struct messaging_context *msg,
364                              void *private_data,
365                              uint32_t msg_type,
366                              struct server_id server_id,
367                              DATA_BLOB *data)
368 {
369         bool *pretry = (bool *)private_data;
370
371         DEBUG(10, ("Got retry message from pid %s\n",
372                    procid_str(talloc_tos(), &server_id)));
373
374         *pretry = true;
375 }
376
377 static void g_lock_timedout(struct tevent_context *ev,
378                             struct tevent_timer *te,
379                             struct timeval current_time,
380                             void *private_data)
381 {
382         bool *ptimedout = (bool *)private_data;
383         *ptimedout = true;
384         TALLOC_FREE(te);
385 }
386
387 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
388                                     struct server_id pid)
389 {
390         struct db_record *rec = NULL;
391         struct g_lock_rec *locks = NULL;
392         int i, num_locks;
393         enum g_lock_type lock_type;
394         NTSTATUS status;
395
396         rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
397                                     string_term_tdb_data(name));
398         if (rec == NULL) {
399                 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
400                 status = NT_STATUS_INTERNAL_ERROR;
401                 goto done;
402         }
403
404         if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
405                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
406                 status = NT_STATUS_INTERNAL_ERROR;
407                 goto done;
408         }
409
410         for (i=0; i<num_locks; i++) {
411                 if (procid_equal(&pid, &locks[i].pid)) {
412                         break;
413                 }
414         }
415
416         if (i == num_locks) {
417                 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
418                 status = NT_STATUS_INTERNAL_ERROR;
419                 goto done;
420         }
421
422         lock_type = locks[i].lock_type;
423
424         if (i < (num_locks-1)) {
425                 locks[i] = locks[num_locks-1];
426         }
427         num_locks -= 1;
428
429         if (num_locks == 0) {
430                 status = rec->delete_rec(rec);
431         } else {
432                 TDB_DATA data;
433                 data = make_tdb_data((uint8_t *)locks,
434                                      sizeof(struct g_lock_rec) * num_locks);
435                 status = rec->store(rec, data, 0);
436         }
437
438         if (!NT_STATUS_IS_OK(status)) {
439                 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
440                           nt_errstr(status)));
441                 goto done;
442         }
443
444         if ((lock_type & G_LOCK_PENDING) == 0) {
445                 /*
446                  * We've been the lock holder. Tell all others to retry.
447                  */
448                 for (i=0; i<num_locks; i++) {
449                         if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
450                                 continue;
451                         }
452
453                         /*
454                          * Ping all waiters to retry
455                          */
456                         status = messaging_send(ctx->msg, locks[i].pid,
457                                                 MSG_DBWRAP_G_LOCK_RETRY,
458                                                 &data_blob_null);
459                         if (!NT_STATUS_IS_OK(status)) {
460                                 DEBUG(1, ("sending retry to %s failed: %s\n",
461                                           procid_str(talloc_tos(),
462                                                      &locks[i].pid),
463                                           nt_errstr(status)));
464                         }
465                 }
466         }
467 done:
468
469         TALLOC_FREE(locks);
470         TALLOC_FREE(rec);
471         return status;
472 }
473
474 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
475 {
476         NTSTATUS status;
477
478         status = g_lock_force_unlock(ctx, name, procid_self());
479
480 #ifdef CLUSTER_SUPPORT
481         if (lp_clustering()) {
482                 ctdb_unwatch(messaging_ctdbd_connection());
483         }
484 #endif
485         return status;
486 }
487
488 struct g_lock_locks_state {
489         int (*fn)(const char *name, void *private_data);
490         void *private_data;
491 };
492
493 static int g_lock_locks_fn(struct db_record *rec, void *priv)
494 {
495         struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
496
497         if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
498                 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
499                 return 0;
500         }
501         return state->fn((char *)rec->key.dptr, state->private_data);
502 }
503
504 int g_lock_locks(struct g_lock_ctx *ctx,
505                  int (*fn)(const char *name, void *private_data),
506                  void *private_data)
507 {
508         struct g_lock_locks_state state;
509
510         state.fn = fn;
511         state.private_data = private_data;
512
513         return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
514 }
515
516 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
517                      int (*fn)(struct server_id pid,
518                                enum g_lock_type lock_type,
519                                void *private_data),
520                      void *private_data)
521 {
522         TDB_DATA data;
523         int i, num_locks;
524         struct g_lock_rec *locks = NULL;
525         bool ret;
526
527         if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
528                            &data) != 0) {
529                 return NT_STATUS_NOT_FOUND;
530         }
531
532         if ((data.dsize == 0) || (data.dptr == NULL)) {
533                 return NT_STATUS_OK;
534         }
535
536         ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
537
538         TALLOC_FREE(data.dptr);
539
540         if (!ret) {
541                 DEBUG(10, ("g_lock_parse for %s failed\n", name));
542                 return NT_STATUS_INTERNAL_ERROR;
543         }
544
545         for (i=0; i<num_locks; i++) {
546                 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
547                         break;
548                 }
549         }
550         TALLOC_FREE(locks);
551         return NT_STATUS_OK;
552 }
553
554 struct g_lock_get_state {
555         bool found;
556         struct server_id *pid;
557 };
558
559 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
560                          void *priv)
561 {
562         struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
563
564         if ((lock_type & G_LOCK_PENDING) != 0) {
565                 return 0;
566         }
567
568         state->found = true;
569         *state->pid = pid;
570         return 1;
571 }
572
573 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
574                     struct server_id *pid)
575 {
576         struct g_lock_get_state state;
577         NTSTATUS status;
578
579         state.found = false;
580         state.pid = pid;
581
582         status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
583         if (!NT_STATUS_IS_OK(status)) {
584                 return status;
585         }
586         if (!state.found) {
587                 return NT_STATUS_NOT_FOUND;
588         }
589         return NT_STATUS_OK;
590 }