fix conflict in merge from metze
[obnox/samba/samba-obnox.git] / ctdb / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/events/events.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28
29
30 /*
31   lock all databases
32  */
33 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
34 {
35         struct ctdb_db_context *ctdb_db;
36         /* REMOVE later */
37         /* This double loop is for backward compatibility and deadlock
38            avoidance for old samba versions that not yet support
39            the set prio call.
40            This code shall be removed later
41         */
42         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
43                 if (ctdb_db->priority != priority) {
44                         continue;
45                 }
46                 if (strstr(ctdb_db->db_name, "notify") != NULL) {
47                         continue;
48                 }
49                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
50                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
51                         return -1;
52                 }
53         }
54         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
55                 if (ctdb_db->priority != priority) {
56                         continue;
57                 }
58                 if (strstr(ctdb_db->db_name, "notify") == NULL) {
59                         continue;
60                 }
61                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
62                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
63                         return -1;
64                 }
65         }
66         return 0;
67 }
68
69 /*
70   a list of control requests waiting for a freeze lock child to get
71   the database locks
72  */
73 struct ctdb_freeze_waiter {
74         struct ctdb_freeze_waiter *next, *prev;
75         struct ctdb_context *ctdb;
76         struct ctdb_req_control *c;
77         uint32_t priority;
78         int32_t status;
79 };
80
81 /* a handle to a freeze lock child process */
82 struct ctdb_freeze_handle {
83         struct ctdb_context *ctdb;
84         uint32_t priority;
85         pid_t child;
86         int fd;
87         struct ctdb_freeze_waiter *waiters;
88 };
89
90 /*
91   destroy a freeze handle
92  */     
93 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
94 {
95         struct ctdb_context *ctdb = h->ctdb;
96         struct ctdb_db_context *ctdb_db;
97
98         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
99
100         /* cancel any pending transactions */
101         if (ctdb->freeze_transaction_started) {
102                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
103                         if (ctdb_db->priority != h->priority) {
104                                 continue;
105                         }
106                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
107                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
108                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
109                                          ctdb_db->db_name));
110                         }
111                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
112                 }
113                 ctdb->freeze_transaction_started = false;
114         }
115
116         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
117         ctdb->freeze_handles[h->priority] = NULL;
118
119         kill(h->child, SIGKILL);
120         return 0;
121 }
122
123 /*
124   called when the child writes its status to us
125  */
126 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
127                                        uint16_t flags, void *private_data)
128 {
129         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
130         int32_t status;
131         struct ctdb_freeze_waiter *w;
132
133         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
134                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
135                 talloc_free(h);
136                 return;
137         }
138
139         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
140                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
141                 status = -1;
142         }
143
144         if (status == -1) {
145                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
146                 /* we didn't get the locks - destroy the handle */
147                 talloc_free(h);
148                 return;
149         }
150
151         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
152
153         /* notify the waiters */
154         if (h != h->ctdb->freeze_handles[h->priority]) {
155                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
156         }
157         while ((w = h->waiters)) {
158                 w->status = status;
159                 DLIST_REMOVE(h->waiters, w);
160                 talloc_free(w);
161         }
162 }
163
164 /*
165   create a child which gets locks on all the open databases, then calls the callback telling the parent
166   that it is done
167  */
168 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
169 {
170         struct ctdb_freeze_handle *h;
171         int fd[2];
172         struct fd_event *fde;
173
174         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
175         CTDB_NO_MEMORY_NULL(ctdb, h);
176
177         h->ctdb     = ctdb;
178         h->priority = priority;
179
180         if (pipe(fd) == -1) {
181                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
182                 talloc_free(h);
183                 return NULL;
184         }
185         
186         h->child = fork();
187         if (h->child == -1) {
188                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
189                 talloc_free(h);
190                 return NULL;
191         }
192
193         if (h->child == 0) {
194                 int ret;
195
196                 /* in the child */
197                 close(fd[0]);
198
199                 ret = ctdb_lock_all_databases(ctdb, priority);
200                 if (ret != 0) {
201                         _exit(0);
202                 }
203
204                 ret = write(fd[1], &ret, sizeof(ret));
205                 if (ret != sizeof(ret)) {
206                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
207                         _exit(1);
208                 }
209
210                 while (1) {
211                         sleep(1);
212                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
213                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
214
215                                 _exit(0);
216                         }
217                 }
218         }
219
220         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
221
222         close(fd[1]);
223         set_close_on_exec(fd[0]);
224
225         h->fd = fd[0];
226
227
228         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 
229                            ctdb_freeze_lock_handler, h);
230         if (fde == NULL) {
231                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
232                 close(fd[0]);
233                 talloc_free(h);
234                 return NULL;
235         }
236
237         return h;
238 }
239
240 /*
241   destroy a waiter for a freeze mode change
242  */
243 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
244 {
245         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
246         return 0;
247 }
248
249 /*
250   start the freeze process for a certain priority
251  */
252 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
253 {
254         if (priority == 0) {
255                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
256                 priority = 1;
257         }
258
259         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
260                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
261                 return -1;
262         }
263
264         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
265                 /* we're already frozen */
266                 return 0;
267         }
268
269         /* if there isn't a freeze lock child then create one */
270         if (ctdb->freeze_handles[priority] == NULL) {
271                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
272                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
273                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
274         }
275
276         return 0;
277 }
278
279 /*
280   freeze the databases
281  */
282 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
283 {
284         struct ctdb_freeze_waiter *w;
285         uint32_t priority;
286
287         priority = (uint32_t)c->srvid;
288
289         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
290
291         if (priority == 0) {
292                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
293                 priority = 1;
294         }
295
296         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
297                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
298                 return -1;
299         }
300
301         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
302                 /* we're already frozen */
303                 return 0;
304         }
305
306         if (ctdb_start_freeze(ctdb, priority) != 0) {
307                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
308                 return -1;
309         }
310
311         /* add ourselves to list of waiters */
312         if (ctdb->freeze_handles[priority] == NULL) {
313                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
314                 return -1;
315         }
316
317         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
318         CTDB_NO_MEMORY(ctdb, w);
319         w->ctdb     = ctdb;
320         w->c        = talloc_steal(w, c);
321         w->priority = priority;
322         w->status   = -1;
323         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
324         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
325
326         /* we won't reply till later */
327         *async_reply = True;
328         return 0;
329 }
330
331
332 /*
333   block until we are frozen, used during daemon startup
334  */
335 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
336 {
337         int i;
338
339         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
340                 if (ctdb_start_freeze(ctdb, i)) {
341                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
342                         continue;
343                 }
344
345                 /* block until frozen */
346                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
347                         event_loop_once(ctdb->ev);
348                 }
349         }
350
351         return 0;
352 }
353
354
355 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
356 {
357         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
358
359         /* cancel any pending transactions */
360         if (ctdb->freeze_transaction_started) {
361                 struct ctdb_db_context *ctdb_db;
362
363                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
364                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
365                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
366                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
367                                          ctdb_db->db_name));
368                         }
369                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
370                 }
371         }
372         ctdb->freeze_transaction_started = false;
373
374 #if 0
375         /* this hack can be used to get a copy of the databases at the end of a recovery */
376         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
377 #endif
378
379 #if 0
380         /* and this one for local testing */
381         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
382 #endif
383
384         if (ctdb->freeze_handles[priority] != NULL) {
385                 talloc_free(ctdb->freeze_handles[priority]);
386                 ctdb->freeze_handles[priority] = NULL;
387         }
388 }
389
390 /*
391   thaw the databases
392  */
393 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
394 {
395
396         if (priority > NUM_DB_PRIORITIES) {
397                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
398                 return -1;
399         }
400
401         if (priority == 0) {
402                 int i;
403                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
404                         thaw_priority(ctdb, i);
405                 }
406         } else {
407                 thaw_priority(ctdb, priority);
408         }
409
410         ctdb_call_resend_all(ctdb);
411         return 0;
412 }
413
414
415 /*
416   start a transaction on all databases - used for recovery
417  */
418 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
419 {
420         struct ctdb_db_context *ctdb_db;
421         int i;
422
423         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
424                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
425                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
426                         return -1;
427                 }
428         }
429
430         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
431                 int ret;
432
433                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
434
435                 if (ctdb->freeze_transaction_started) {
436                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
437                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
438                                          ctdb_db->db_name));
439                                 /* not a fatal error */
440                         }
441                 }
442
443                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
444
445                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
446
447                 if (ret != 0) {
448                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
449                                  ctdb_db->db_name));
450                         return -1;
451                 }
452         }
453
454         ctdb->freeze_transaction_started = true;
455         ctdb->freeze_transaction_id = id;
456
457         return 0;
458 }
459
460 /*
461   cancel a transaction for all databases - used for recovery
462  */
463 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
464 {
465         struct ctdb_db_context *ctdb_db;
466
467         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
468
469         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
470                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
471
472                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
473                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
474                         /* not a fatal error */
475                 }
476
477                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
478         }
479
480         ctdb->freeze_transaction_started = false;
481
482         return 0;
483 }
484
485 /*
486   commit transactions on all databases
487  */
488 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
489 {
490         struct ctdb_db_context *ctdb_db;
491         int i;
492         int healthy_nodes = 0;
493
494         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
495                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
496                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
497                         return -1;
498                 }
499         }
500
501         if (!ctdb->freeze_transaction_started) {
502                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
503                 return -1;
504         }
505
506         if (id != ctdb->freeze_transaction_id) {
507                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
508                 return -1;
509         }
510
511         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
512         for (i=0; i < ctdb->num_nodes; i++) {
513                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
514                                    i, ctdb->nodes[i]->flags));
515                 if (ctdb->nodes[i]->flags == 0) {
516                         healthy_nodes++;
517                 }
518         }
519         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
520
521         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
522                 int ret;
523
524                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
525                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
526                 if (ret != 0) {
527                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
528                                  ctdb_db->db_name));
529                         goto fail;
530                 }
531                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
532
533                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
534                 if (ret != 0) {
535                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
536                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
537                                          ctdb_db->db_name));
538                         goto fail;
539                 }
540         }
541
542         ctdb->freeze_transaction_started = false;
543         ctdb->freeze_transaction_id = 0;
544
545         return 0;
546
547 fail:
548         /* cancel any pending transactions */
549         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
550                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
551                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
552                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
553                                  ctdb_db->db_name));
554                 }
555                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
556         }
557         ctdb->freeze_transaction_started = false;
558
559         return -1;
560 }
561
562 /*
563   wipe a database - only possible when in a frozen transaction
564  */
565 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
566 {
567         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
568         struct ctdb_db_context *ctdb_db;
569
570         ctdb_db = find_ctdb_db(ctdb, w.db_id);
571         if (!ctdb_db) {
572                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
573                 return -1;
574         }
575
576         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
577                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
578                 return -1;
579         }
580
581         if (!ctdb->freeze_transaction_started) {
582                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
583                 return -1;
584         }
585
586         if (w.transaction_id != ctdb->freeze_transaction_id) {
587                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
588                 return -1;
589         }
590
591         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
592                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
593                          ctdb_db->db_name));
594                 return -1;
595         }
596
597         return 0;
598 }