4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "lib/events/events.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
33 static int ctdb_lock_all_databases(struct ctdb_context *ctdb)
35 struct ctdb_db_context *ctdb_db;
36 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
37 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
45 a list of control requests waiting for a freeze lock child to get
48 struct ctdb_freeze_waiter {
49 struct ctdb_freeze_waiter *next, *prev;
50 struct ctdb_context *ctdb;
51 struct ctdb_req_control *c;
55 /* a handle to a freeze lock child process */
56 struct ctdb_freeze_handle {
57 struct ctdb_context *ctdb;
60 struct ctdb_freeze_waiter *waiters;
61 bool transaction_started;
62 uint32_t transaction_id;
66 destroy a freeze handle
68 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
70 struct ctdb_context *ctdb = h->ctdb;
71 struct ctdb_db_context *ctdb_db;
73 /* cancel any pending transactions */
74 if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
75 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
76 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
77 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
78 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
81 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
83 ctdb->freeze_handle->transaction_started = false;
86 ctdb->freeze_mode = CTDB_FREEZE_NONE;
87 ctdb->freeze_handle = NULL;
89 kill(h->child, SIGKILL);
94 called when the child writes its status to us
96 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde,
97 uint16_t flags, void *private_data)
99 struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
101 struct ctdb_freeze_waiter *w;
103 if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
104 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
105 if (h->ctdb->freeze_handle == h) {
106 h->ctdb->freeze_handle = NULL;
112 if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
113 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
118 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
119 /* we didn't get the locks - destroy the handle */
124 h->ctdb->freeze_mode = CTDB_FREEZE_FROZEN;
126 /* notify the waiters */
127 while ((w = h->ctdb->freeze_handle->waiters)) {
129 DLIST_REMOVE(h->ctdb->freeze_handle->waiters, w);
135 create a child which gets locks on all the open databases, then calls the callback telling the parent
138 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb)
140 struct ctdb_freeze_handle *h;
142 struct fd_event *fde;
144 h = talloc_zero(ctdb, struct ctdb_freeze_handle);
145 CTDB_NO_MEMORY_NULL(ctdb, h);
149 /* use socketpair() instead of pipe() so we have bi-directional fds */
150 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) {
151 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
157 if (h->child == -1) {
158 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
168 ret = ctdb_lock_all_databases(ctdb);
175 while (count++ < 30) {
176 ret = write(fd[1], &ret, sizeof(ret));
177 if (ret == sizeof(ret)) {
180 DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
184 DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. Aborting freeze child\n"));
188 /* the read here means we will die if the parent exits */
189 read(fd[1], &ret, sizeof(ret));
193 talloc_set_destructor(h, ctdb_freeze_handle_destructor);
199 fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
200 ctdb_freeze_lock_handler, h);
202 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
212 destroy a waiter for a freeze mode change
214 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
216 DLIST_REMOVE(w->ctdb->freeze_handle->waiters, w);
217 ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
222 start the freeze process
224 void ctdb_start_freeze(struct ctdb_context *ctdb)
226 if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
227 /* we're already frozen */
231 /* if there isn't a freeze lock child then create one */
232 if (!ctdb->freeze_handle) {
233 ctdb->freeze_handle = ctdb_freeze_lock(ctdb);
234 CTDB_NO_MEMORY_VOID(ctdb, ctdb->freeze_handle);
235 ctdb->freeze_mode = CTDB_FREEZE_PENDING;
242 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
244 struct ctdb_freeze_waiter *w;
246 if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
247 /* we're already frozen */
251 ctdb_start_freeze(ctdb);
253 /* add ourselves to list of waiters */
254 w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
255 CTDB_NO_MEMORY(ctdb, w);
257 w->c = talloc_steal(w, c);
259 talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
260 DLIST_ADD(ctdb->freeze_handle->waiters, w);
262 /* we won't reply till later */
269 block until we are frozen, used during daemon startup
271 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
273 ctdb_start_freeze(ctdb);
275 /* block until frozen */
276 while (ctdb->freeze_mode == CTDB_FREEZE_PENDING) {
277 event_loop_once(ctdb->ev);
280 return ctdb->freeze_mode == CTDB_FREEZE_FROZEN;
288 int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
290 /* cancel any pending transactions */
291 if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
292 struct ctdb_db_context *ctdb_db;
294 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
295 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
296 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
297 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
300 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
305 /* this hack can be used to get a copy of the databases at the end of a recovery */
306 system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
310 /* and this one for local testing */
311 system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
315 talloc_free(ctdb->freeze_handle);
316 ctdb->freeze_handle = NULL;
317 ctdb_call_resend_all(ctdb);
323 start a transaction on all databases - used for recovery
325 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
327 struct ctdb_db_context *ctdb_db;
329 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
330 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
335 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
338 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
340 if (ctdb->freeze_handle->transaction_started) {
341 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
342 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
344 /* not a fatal error */
348 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NO_NESTING);
349 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
351 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
354 DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
360 ctdb->freeze_handle->transaction_started = true;
361 ctdb->freeze_handle->transaction_id = id;
367 commit transactions on all databases
369 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
371 struct ctdb_db_context *ctdb_db;
373 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
374 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
378 if (!ctdb->freeze_handle->transaction_started) {
379 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
383 if (id != ctdb->freeze_handle->transaction_id) {
384 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
388 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
389 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
390 if (tdb_transaction_commit(ctdb_db->ltdb->tdb) != 0) {
391 DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
394 /* cancel any pending transactions */
395 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
396 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
397 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
398 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
401 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
403 ctdb->freeze_handle->transaction_started = false;
407 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
410 ctdb->freeze_handle->transaction_started = false;
411 ctdb->freeze_handle->transaction_id = 0;
417 wipe a database - only possible when in a frozen transaction
419 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
421 struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
422 struct ctdb_db_context *ctdb_db;
424 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
425 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
429 if (!ctdb->freeze_handle->transaction_started) {
430 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
434 if (w.transaction_id != ctdb->freeze_handle->transaction_id) {
435 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
439 ctdb_db = find_ctdb_db(ctdb, w.db_id);
441 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
445 if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
446 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",