This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
pid_t child;
int fd;
struct ctdb_freeze_waiter *waiters;
+ bool transaction_started;
+ uint32_t transaction_id;
};
/*
*/
static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
{
- h->ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ struct ctdb_context *ctdb = h->ctdb;
+ struct ctdb_db_context *ctdb_db;
+
+ /* cancel any pending transactions */
+ if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
+ for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
+ ctdb_db->db_name));
+ }
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ }
+ ctdb->freeze_handle->transaction_started = false;
+ }
+
+ ctdb->freeze_mode = CTDB_FREEZE_NONE;
+ ctdb->freeze_handle = NULL;
+
kill(h->child, SIGKILL);
- waitpid(h->child, NULL, 0);
return 0;
}
struct ctdb_freeze_waiter *w;
if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
- DEBUG(0,("freeze child died - unfreezing\n"));
+ DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
+ if (h->ctdb->freeze_handle == h) {
+ h->ctdb->freeze_handle = NULL;
+ }
talloc_free(h);
return;
}
if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
- DEBUG(0,("read error from freeze lock child\n"));
+ DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
status = -1;
}
if (status == -1) {
- DEBUG(0,("Failed to get locks in ctdb_freeze_child\n"));
+ DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
/* we didn't get the locks - destroy the handle */
talloc_free(h);
return;
struct fd_event *fde;
h = talloc_zero(ctdb, struct ctdb_freeze_handle);
- CTDB_NO_MEMORY_VOID(ctdb, h);
+ CTDB_NO_MEMORY_NULL(ctdb, h);
h->ctdb = ctdb;
/* use socketpair() instead of pipe() so we have bi-directional fds */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) {
- DEBUG(0,("Failed to create pipe for ctdb_freeze_lock\n"));
+ DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
talloc_free(h);
return NULL;
}
h->child = fork();
if (h->child == -1) {
- DEBUG(0,("Failed to fork child for ctdb_freeze_lock\n"));
+ DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
talloc_free(h);
return NULL;
}
if (h->child == 0) {
int ret;
+ int count = 0;
/* in the child */
close(fd[0]);
ret = ctdb_lock_all_databases(ctdb);
if (ret != 0) {
_exit(0);
}
- write(fd[1], &ret, sizeof(ret));
+
+ alarm(30);
+
+ while (count++ < 30) {
+ ret = write(fd[1], &ret, sizeof(ret));
+ if (ret == sizeof(ret)) {
+ break;
+ }
+ DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
+ sleep (1);
+ }
+ if (count >= 30) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. Aborting freeze child\n"));
+ _exit(0);
+ }
+
/* the read here means we will die if the parent exits */
read(fd[1], &ret, sizeof(ret));
_exit(0);
fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_freeze_lock_handler, h);
if (fde == NULL) {
- DEBUG(0,("Failed to setup fd event for ctdb_freeze_lock\n"));
+ DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
close(fd[0]);
talloc_free(h);
return NULL;
}
/*
- freeze the databases
+ start the freeze process
*/
-int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
+void ctdb_start_freeze(struct ctdb_context *ctdb)
{
- struct ctdb_freeze_waiter *w;
-
if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
/* we're already frozen */
- return 0;
+ return;
}
/* if there isn't a freeze lock child then create one */
if (!ctdb->freeze_handle) {
ctdb->freeze_handle = ctdb_freeze_lock(ctdb);
- CTDB_NO_MEMORY(ctdb, ctdb->freeze_handle);
+ CTDB_NO_MEMORY_VOID(ctdb, ctdb->freeze_handle);
ctdb->freeze_mode = CTDB_FREEZE_PENDING;
}
+}
+
+/*
+ freeze the databases
+ */
+int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
+{
+ struct ctdb_freeze_waiter *w;
+
+ if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
+ /* we're already frozen */
+ return 0;
+ }
+
+ ctdb_start_freeze(ctdb);
/* add ourselves to list of waiters */
w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
*/
bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
{
- if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
- /* we're already frozen */
- return true;
- }
-
- /* if there isn't a freeze lock child then create one */
- if (!ctdb->freeze_handle) {
- ctdb->freeze_handle = ctdb_freeze_lock(ctdb);
- CTDB_NO_MEMORY(ctdb, ctdb->freeze_handle);
- ctdb->freeze_mode = CTDB_FREEZE_PENDING;
- }
+ ctdb_start_freeze(ctdb);
/* block until frozen */
while (ctdb->freeze_mode == CTDB_FREEZE_PENDING) {
*/
int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
{
+ /* cancel any pending transactions */
+ if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
+ struct ctdb_db_context *ctdb_db;
+
+ for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
+ ctdb_db->db_name));
+ }
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ }
+ }
+
+#if 0
+ /* this hack can be used to get a copy of the databases at the end of a recovery */
+ system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
+#endif
+
+#if 0
+ /* and this one for local testing */
+ system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
+#endif
+
+
talloc_free(ctdb->freeze_handle);
ctdb->freeze_handle = NULL;
ctdb_call_resend_all(ctdb);
return 0;
}
+
+
+/*
+ start a transaction on all databases - used for recovery
+ */
+int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
+ return -1;
+ }
+
+
+ for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+ int ret;
+
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+
+ if (ctdb->freeze_handle->transaction_started) {
+ if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
+ ctdb_db->db_name));
+ /* not a fatal error */
+ }
+ }
+
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NO_NESTING);
+ ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NO_NESTING);
+
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+ }
+
+ ctdb->freeze_handle->transaction_started = true;
+ ctdb->freeze_handle->transaction_id = id;
+
+ return 0;
+}
+
+/*
+ commit transactions on all databases
+ */
+int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
+{
+ struct ctdb_db_context *ctdb_db;
+
+ if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
+ return -1;
+ }
+
+ if (!ctdb->freeze_handle->transaction_started) {
+ DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
+ return -1;
+ }
+
+ if (id != ctdb->freeze_handle->transaction_id) {
+ DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
+ return -1;
+ }
+
+ for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (tdb_transaction_commit(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
+ ctdb_db->db_name));
+
+ /* cancel any pending transactions */
+ for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
+ tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
+ ctdb_db->db_name));
+ }
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ }
+ ctdb->freeze_handle->transaction_started = false;
+
+ return -1;
+ }
+ tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
+ }
+
+ ctdb->freeze_handle->transaction_started = false;
+ ctdb->freeze_handle->transaction_id = 0;
+
+ return 0;
+}
+
+/*
+ wipe a database - only possible when in a frozen transaction
+ */
+int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
+{
+ struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
+ struct ctdb_db_context *ctdb_db;
+
+ if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
+ return -1;
+ }
+
+ if (!ctdb->freeze_handle->transaction_started) {
+ DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
+ return -1;
+ }
+
+ if (w.transaction_id != ctdb->freeze_handle->transaction_id) {
+ DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
+ return -1;
+ }
+
+ ctdb_db = find_ctdb_db(ctdb, w.db_id);
+ if (!ctdb_db) {
+ DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
+ return -1;
+ }
+
+ if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
+ ctdb_db->db_name));
+ return -1;
+ }
+
+ return 0;
+}