ctdb-daemon: Avoid the use of ctdb->freeze_mode variable
[obnox/samba/samba-obnox.git] / ctdb / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "tdb.h"
21 #include "system/network.h"
22 #include "system/filesys.h"
23 #include "system/wait.h"
24 #include "../include/ctdb_private.h"
25 #include "lib/util/dlinklist.h"
26 #include "lib/tdb_wrap/tdb_wrap.h"
27 #include "../common/rb_tree.h"
28
29
30 /**
31  * Cancel a transaction on database
32  */
33 static int db_transaction_cancel_handler(struct ctdb_db_context *ctdb_db,
34                                          void *private_data)
35 {
36         int ret;
37
38         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
39         ret = tdb_transaction_cancel(ctdb_db->ltdb->tdb);
40         if (ret != 0) {
41                 DEBUG(DEBUG_ERR, ("Failed to cancel transaction for db %s\n",
42                                   ctdb_db->db_name));
43         }
44         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
45         return 0;
46 }
47
48 /**
49  * Start a transaction on database
50  */
51 static int db_transaction_start_handler(struct ctdb_db_context *ctdb_db,
52                                         void *private_data)
53 {
54         bool freeze_transaction_started = *(bool *)private_data;
55         int ret;
56
57         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
58         if (freeze_transaction_started) {
59                 ret = tdb_transaction_cancel(ctdb_db->ltdb->tdb);
60                 if (ret != 0) {
61                         DEBUG(DEBUG_ERR,
62                               ("Failed to cancel transaction for db %s\n",
63                                ctdb_db->db_name));
64                 }
65         }
66         ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
67         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
68         if (ret != 0) {
69                 DEBUG(DEBUG_ERR, ("Failed to start transaction for db %s\n",
70                                   ctdb_db->db_name));
71                 return -1;
72         }
73         return 0;
74 }
75
76 /**
77  * Commit a transaction on database
78  */
79 static int db_transaction_commit_handler(struct ctdb_db_context *ctdb_db,
80                                          void *private_data)
81 {
82         int healthy_nodes = *(int *)private_data;
83         int ret;
84
85         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
86         ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
87         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
88         if (ret != 0) {
89                 DEBUG(DEBUG_ERR, ("Failed to commit transaction for db %s\n",
90                                   ctdb_db->db_name));
91                 return -1;
92         }
93
94         ret = ctdb_update_persistent_health(ctdb_db->ctdb, ctdb_db, NULL,
95                                             healthy_nodes);
96         if (ret != 0) {
97                 DEBUG(DEBUG_ERR, ("Failed to update persistent health for db %s\n",
98                                   ctdb_db->db_name));
99         }
100         return ret;
101 }
102
103
104 /*
105   a list of control requests waiting for a freeze lock child to get
106   the database locks
107  */
108 struct ctdb_freeze_waiter {
109         struct ctdb_freeze_waiter *next, *prev;
110         struct ctdb_context *ctdb;
111         struct ctdb_req_control *c;
112         uint32_t priority;
113         int32_t status;
114 };
115
116 /* a handle to a freeze lock child process */
117 struct ctdb_freeze_handle {
118         struct ctdb_context *ctdb;
119         uint32_t priority;
120         struct lock_request *lreq;
121         struct ctdb_freeze_waiter *waiters;
122 };
123
124 /*
125   destroy a freeze handle
126  */
127 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
128 {
129         struct ctdb_context *ctdb = h->ctdb;
130
131         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
132
133         /* cancel any pending transactions */
134         if (ctdb->freeze_transaction_started) {
135                 ctdb_db_prio_iterator(ctdb, h->priority,
136                                       db_transaction_cancel_handler, NULL);
137                 ctdb->freeze_transaction_started = false;
138         }
139
140         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
141         ctdb->freeze_handles[h->priority] = NULL;
142
143         return 0;
144 }
145
146 /*
147   called when the child writes its status to us
148  */
149 static void ctdb_freeze_lock_handler(void *private_data, bool locked)
150 {
151         struct ctdb_freeze_handle *h = talloc_get_type_abort(private_data,
152                                                              struct ctdb_freeze_handle);
153         struct ctdb_freeze_waiter *w;
154
155         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
156                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
157                 talloc_free(h);
158                 return;
159         }
160
161         if (!locked) {
162                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
163                 /* we didn't get the locks - destroy the handle */
164                 talloc_free(h);
165                 return;
166         }
167
168         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
169
170         /* notify the waiters */
171         if (h != h->ctdb->freeze_handles[h->priority]) {
172                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
173         }
174         while ((w = h->waiters)) {
175                 w->status = 0;
176                 DLIST_REMOVE(h->waiters, w);
177                 talloc_free(w);
178         }
179 }
180
181 /*
182   destroy a waiter for a freeze mode change
183  */
184 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
185 {
186         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
187         return 0;
188 }
189
190 /*
191   start the freeze process for a certain priority
192  */
193 void ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
194 {
195         struct ctdb_freeze_handle *h;
196
197         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
198                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
199                 ctdb_fatal(ctdb, "Internal error");
200         }
201
202         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
203                 /* we're already frozen */
204                 return;
205         }
206
207         if (ctdb->freeze_handles[priority] != NULL) {
208                 /* already trying to freeze */
209                 return;
210         }
211
212         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
213
214         /* Stop any vacuuming going on: we don't want to wait. */
215         ctdb_stop_vacuuming(ctdb);
216
217         /* create freeze lock child */
218         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
219         CTDB_NO_MEMORY_FATAL(ctdb, h);
220         h->ctdb = ctdb;
221         h->priority = priority;
222         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
223
224         h->lreq = ctdb_lock_alldb_prio(h, ctdb, priority, false,
225                                        ctdb_freeze_lock_handler, h);
226         CTDB_NO_MEMORY_FATAL(ctdb, h->lreq);
227         ctdb->freeze_handles[priority] = h;
228         ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
229 }
230
231 /*
232   freeze the databases
233  */
234 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
235 {
236         struct ctdb_freeze_waiter *w;
237         uint32_t priority;
238
239         priority = (uint32_t)c->srvid;
240
241         if (priority == 0) {
242                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
243                 priority = 1;
244         }
245
246         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
247                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
248                 return -1;
249         }
250
251         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
252                 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
253                 /* we're already frozen */
254                 return 0;
255         }
256
257         ctdb_start_freeze(ctdb, priority);
258
259         /* add ourselves to list of waiters */
260         if (ctdb->freeze_handles[priority] == NULL) {
261                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
262                 return -1;
263         }
264
265         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
266         CTDB_NO_MEMORY(ctdb, w);
267         w->ctdb     = ctdb;
268         w->c        = talloc_steal(w, c);
269         w->priority = priority;
270         w->status   = -1;
271         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
272         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
273
274         /* we won't reply till later */
275         *async_reply = true;
276         return 0;
277 }
278
279
280 /*
281   block until we are frozen, used during daemon startup
282  */
283 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
284 {
285         int i;
286
287         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
288                 ctdb_start_freeze(ctdb, i);
289
290                 /* block until frozen */
291                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
292                         event_loop_once(ctdb->ev);
293                 }
294         }
295
296         return true;
297 }
298
299
300 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
301 {
302         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
303
304         /* cancel any pending transactions */
305         if (ctdb->freeze_transaction_started) {
306                 ctdb_db_prio_iterator(ctdb, priority,
307                                       db_transaction_cancel_handler, NULL);
308                 ctdb->freeze_transaction_started = false;
309         }
310
311         if (ctdb->freeze_handles[priority] != NULL) {
312                 talloc_free(ctdb->freeze_handles[priority]);
313                 ctdb->freeze_handles[priority] = NULL;
314         }
315 }
316
317 /*
318   thaw the databases
319  */
320 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority,
321                           bool check_recmode)
322 {
323         if (priority > NUM_DB_PRIORITIES) {
324                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n",
325                                  priority));
326                 return -1;
327         }
328
329         if (check_recmode && ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE) {
330                 DEBUG(DEBUG_ERR, ("Failing to thaw databases while "
331                                   "recovery is active\n"));
332                 return -1;
333         }
334
335         if (priority == 0) {
336                 int i;
337                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
338                         thaw_priority(ctdb, i);
339                 }
340         } else {
341                 thaw_priority(ctdb, priority);
342         }
343
344         ctdb_call_resend_all(ctdb);
345         return 0;
346 }
347
348
349 /*
350   start a transaction on all databases - used for recovery
351  */
352 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
353 {
354         int ret;
355
356         if (!ctdb_db_all_frozen(ctdb)) {
357                 DEBUG(DEBUG_ERR, (__location__
358                       " failing transaction start while not frozen\n"));
359                 return -1;
360         }
361
362         ret = ctdb_db_iterator(ctdb, db_transaction_start_handler,
363                                &ctdb->freeze_transaction_started);
364         if (ret != 0) {
365                 return -1;
366         }
367
368         ctdb->freeze_transaction_started = true;
369         ctdb->freeze_transaction_id = id;
370
371         return 0;
372 }
373
374 /*
375   cancel a transaction for all databases - used for recovery
376  */
377 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
378 {
379         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
380
381         ctdb_db_iterator(ctdb, db_transaction_cancel_handler, NULL);
382         ctdb->freeze_transaction_started = false;
383
384         return 0;
385 }
386
387 /*
388   commit transactions on all databases
389  */
390 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
391 {
392         int i;
393         int healthy_nodes = 0;
394         int ret;
395
396         if (!ctdb_db_all_frozen(ctdb)) {
397                 DEBUG(DEBUG_ERR, (__location__
398                       " failing transaction commit while not frozen\n"));
399                 return -1;
400         }
401
402         if (!ctdb->freeze_transaction_started) {
403                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
404                 return -1;
405         }
406
407         if (id != ctdb->freeze_transaction_id) {
408                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
409                 return -1;
410         }
411
412         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
413         for (i=0; i < ctdb->num_nodes; i++) {
414                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
415                                    i, ctdb->nodes[i]->flags));
416                 if (ctdb->nodes[i]->flags == 0) {
417                         healthy_nodes++;
418                 }
419         }
420         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
421
422         ret = ctdb_db_iterator(ctdb, db_transaction_commit_handler,
423                                &healthy_nodes);
424         if (ret != 0) {
425                 DEBUG(DEBUG_ERR, ("Cancel all transactions\n"));
426                 goto fail;
427         }
428
429         ctdb->freeze_transaction_started = false;
430         ctdb->freeze_transaction_id = 0;
431
432         return 0;
433
434 fail:
435         /* cancel any pending transactions */
436         ctdb_db_iterator(ctdb, db_transaction_cancel_handler, NULL);
437         ctdb->freeze_transaction_started = false;
438
439         return -1;
440 }
441
442 /*
443   wipe a database - only possible when in a frozen transaction
444  */
445 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
446 {
447         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
448         struct ctdb_db_context *ctdb_db;
449
450         ctdb_db = find_ctdb_db(ctdb, w.db_id);
451         if (!ctdb_db) {
452                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
453                 return -1;
454         }
455
456         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
457                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
458                 return -1;
459         }
460
461         if (!ctdb->freeze_transaction_started) {
462                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
463                 return -1;
464         }
465
466         if (w.transaction_id != ctdb->freeze_transaction_id) {
467                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
468                 return -1;
469         }
470
471         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
472                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
473                          ctdb_db->db_name));
474                 return -1;
475         }
476
477         if (!ctdb_db->persistent) {
478                 talloc_free(ctdb_db->delete_queue);
479                 ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
480                 if (ctdb_db->delete_queue == NULL) {
481                         DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
482                                           "the vacuum tree.\n"));
483                         return -1;
484                 }
485         }
486
487         return 0;
488 }
489
490 bool ctdb_db_prio_frozen(struct ctdb_context *ctdb, uint32_t priority)
491 {
492         if (priority == 0) {
493                 priority = 1;
494         }
495         if (priority > NUM_DB_PRIORITIES) {
496                 DEBUG(DEBUG_ERR, ("Invalid DB priority specified\n"));
497                 return false;
498         }
499
500         if (ctdb->freeze_mode[priority] != CTDB_FREEZE_FROZEN) {
501                 return false;
502         }
503
504         return true;
505 }
506
507 bool ctdb_db_all_frozen(struct ctdb_context *ctdb)
508 {
509         int i;
510
511         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
512                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
513                         return false;
514                 }
515         }
516         return true;
517 }