*/
#include "includes.h"
-#include "lib/tevent/tevent.h"
#include "lib/tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "db_wrap.h"
#include "lib/util/dlinklist.h"
-#include "lib/tevent/tevent.h"
#include "../include/ctdb_private.h"
#include "../common/rb_tree.h"
struct ctdb_ltdb_header *header;
TDB_DATA tdb_data;
uint32_t lmaster;
+ uint32_t hash = ctdb_hash(&(dd->key));
vdata->fast_total++;
res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key);
if (res != 0) {
- DEBUG(DEBUG_ERR, (__location__ " Error getting chainlock.\n"));
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error getting chainlock on record with "
+ "key hash [0x%08x] on database db[%s].\n",
+ hash, ctdb_db->db_name));
vdata->fast_error++;
return 0;
}
goto skipped;
}
-
if (header->rsn != dd->hdr.rsn) {
/*
* The record has been migrated off the node and back again.
if (res != 0) {
DEBUG(DEBUG_ERR,
- (__location__ " Error deleting record from local "
- "data base.\n"));
+ (__location__ " Error deleting record with key "
+ "hash [0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
vdata->fast_error++;
} else {
+ DEBUG(DEBUG_DEBUG,
+ (__location__ " Deleted record with key hash "
+ "[0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
vdata->fast_deleted++;
}
}
TDB_DATA tdb_data;
uint32_t lmaster;
bool deleted = false;
+ uint32_t hash = ctdb_hash(&(dd->key));
res = tdb_chainlock(ctdb_db->ltdb->tdb, dd->key);
if (res != 0) {
- DEBUG(DEBUG_ERR, (__location__ " Error getting chainlock.\n"));
+ DEBUG(DEBUG_ERR,
+ (__location__ " Error getting chainlock on record with "
+ "key hash [0x%08x] on database db[%s].\n",
+ hash, ctdb_db->db_name));
vdata->delete_local_error++;
return 0;
}
header = (struct ctdb_ltdb_header *)tdb_data.dptr;
+ if (header->flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE)) {
+ /* The record has readonly flags set. skip deleting */
+ vdata->delete_skipped++;
+ goto done;
+ }
+
if (header->dmaster != ctdb->pnn) {
/* The record has been migrated off the node. Skip. */
vdata->delete_skipped++;
if (res != 0) {
DEBUG(DEBUG_ERR,
- (__location__ " Error deleting record from local "
- "data base.\n"));
+ (__location__ " Error deleting record with key hash "
+ "[0x%08x] from local data base db[%s].\n",
+ hash, ctdb_db->db_name));
vdata->delete_local_error++;
goto done;
}
deleted = true;
+ DEBUG(DEBUG_DEBUG,
+ (__location__ " Deleted record with key hash [0x%08x] from "
+ "local data base db[%s].\n", hash, ctdb_db->db_name));
+
done:
if (tdb_data.dptr != NULL) {
free(tdb_data.dptr);
}
/**
- * Vacuum a DB:
- * - Always do the fast vacuuming run, which traverses
- * the in-memory delete queue: these records have been
- * scheduled for deletion.
- * - Only if explicitly requested, the database is traversed
- * in order to use the traditional heuristics on empty records
- * to trigger deletion.
- * This is done only every VacuumFastPathCount'th vacuuming run.
- *
- * The traverse runs fill two lists:
- *
- * - The delete_list:
- * This is the list of empty records the current
- * node is lmaster and dmaster for. These records are later
- * deleted first on other nodes and then locally.
- *
- * The fast vacuuming run has a short cut for those records
- * that have never been migrated with data: these records
- * are immediately deleted locally, since they have left
- * no trace on other nodes.
- *
- * - The vacuum_fetch lists
- * (one for each other lmaster node):
- * The records in this list are sent for deletion to
- * their lmaster in a bulk VACUUM_FETCH message.
- *
- * The lmaster then migrates all these records to itelf
- * so that they can be vacuumed there.
- *
- * This executes in the child context.
+ * Proces the delete list:
+ * Send the records to delete to all other nodes with the
+ * try_delete_records control.
*/
-static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db,
- struct vacuum_data *vdata,
- bool full_vacuum_run)
+static int ctdb_process_delete_list(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
{
+ int ret, i;
struct ctdb_context *ctdb = ctdb_db->ctdb;
- int ret, i, pnn;
-
- DEBUG(DEBUG_INFO, (__location__ " Entering %s vacuum run for db "
- "%s db_id[0x%08x]\n",
- full_vacuum_run ? "full" : "fast",
- ctdb_db->db_name, ctdb_db->db_id));
-
- ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &ctdb->vnn_map);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get vnnmap from local node\n"));
- return ret;
- }
-
- pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
- if (pnn == -1) {
- DEBUG(DEBUG_ERR, ("Unable to get pnn from local node\n"));
- return -1;
- }
- ctdb->pnn = pnn;
+ vdata->delete_left = vdata->delete_count;
- vdata->fast_added_to_delete_list = 0;
- vdata->fast_added_to_vacuum_fetch_list = 0;
- vdata->fast_deleted = 0;
- vdata->fast_skipped = 0;
- vdata->fast_error = 0;
- vdata->fast_total = 0;
- vdata->full_added_to_delete_list = 0;
- vdata->full_added_to_vacuum_fetch_list = 0;
- vdata->full_skipped = 0;
- vdata->full_error = 0;
- vdata->full_total = 0;
- vdata->delete_count = 0;
- vdata->delete_left = 0;
- vdata->delete_remote_error = 0;
- vdata->delete_local_error = 0;
- vdata->delete_skipped = 0;
- vdata->delete_deleted = 0;
-
- /* the list needs to be of length num_nodes */
- vdata->vacuum_fetch_list = talloc_array(vdata,
- struct ctdb_marshall_buffer *,
- ctdb->num_nodes);
- if (vdata->vacuum_fetch_list == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
- return -1;
- }
- for (i = 0; i < ctdb->num_nodes; i++) {
- vdata->vacuum_fetch_list[i] = (struct ctdb_marshall_buffer *)
- talloc_zero_size(vdata->vacuum_fetch_list,
- offsetof(struct ctdb_marshall_buffer, data));
- if (vdata->vacuum_fetch_list[i] == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
- return -1;
- }
- vdata->vacuum_fetch_list[i]->db_id = ctdb_db->db_id;
- }
-
- ctdb_vacuum_db_fast(ctdb_db, vdata);
-
- ret = ctdb_vacuum_db_full(ctdb_db, vdata, full_vacuum_run);
- if (ret != 0) {
- return ret;
- }
-
- ret = ctdb_process_vacuum_fetch_lists(ctdb_db, vdata);
- if (ret != 0) {
- return ret;
- }
-
- /* Process all records we can delete (if any) */
if (vdata->delete_count > 0) {
struct delete_records_list *recs;
TDB_DATA indata, outdata;
uint32_t *active_nodes;
int num_active_nodes;
- vdata->delete_left = vdata->delete_count;
-
recs = talloc_zero(vdata, struct delete_records_list);
if (recs == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
* outdata contains the list of records coming back
* from the node: These are the records that the
* remote node could not delete.
+ *
+ * NOTE: There is a problem here:
+ *
+ * When a node failed to delete the record, but
+ * others succeeded, we may have created gaps in the
+ * history of the record. Hence when a node dies, an
+ * closed file handle might be resurrected or an open
+ * file handle might be lost, leading to blocked access
+ * or data corruption.
+ *
+ * TODO: This needs to be fixed!
*/
records = (struct ctdb_marshall_buffer *)outdata.dptr;
rec = (struct ctdb_rec_data *)&records->data[0];
delete_record_traverse, vdata);
}
+ if (vdata->delete_count > 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__
+ " vacuum delete list statistics: "
+ "db[%s] "
+ "coll[%u] "
+ "rem.err[%u] "
+ "loc.err[%u] "
+ "skip[%u] "
+ "del[%u] "
+ "left[%u]\n",
+ ctdb_db->db_name,
+ (unsigned)vdata->delete_count,
+ (unsigned)vdata->delete_remote_error,
+ (unsigned)vdata->delete_local_error,
+ (unsigned)vdata->delete_skipped,
+ (unsigned)vdata->delete_deleted,
+ (unsigned)vdata->delete_left));
+ }
+
+ return 0;
+}
+
+/**
+ * initialize the vacuum_data
+ */
+static int ctdb_vacuum_init_vacuum_data(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata)
+{
+ int i;
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+
+ vdata->fast_added_to_delete_list = 0;
+ vdata->fast_added_to_vacuum_fetch_list = 0;
+ vdata->fast_deleted = 0;
+ vdata->fast_skipped = 0;
+ vdata->fast_error = 0;
+ vdata->fast_total = 0;
+ vdata->full_added_to_delete_list = 0;
+ vdata->full_added_to_vacuum_fetch_list = 0;
+ vdata->full_skipped = 0;
+ vdata->full_error = 0;
+ vdata->full_total = 0;
+ vdata->delete_count = 0;
+ vdata->delete_left = 0;
+ vdata->delete_remote_error = 0;
+ vdata->delete_local_error = 0;
+ vdata->delete_skipped = 0;
+ vdata->delete_deleted = 0;
+
+ /* the list needs to be of length num_nodes */
+ vdata->vacuum_fetch_list = talloc_zero_array(vdata,
+ struct ctdb_marshall_buffer *,
+ ctdb->num_nodes);
+ if (vdata->vacuum_fetch_list == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return -1;
+ }
+ for (i = 0; i < ctdb->num_nodes; i++) {
+ vdata->vacuum_fetch_list[i] = (struct ctdb_marshall_buffer *)
+ talloc_zero_size(vdata->vacuum_fetch_list,
+ offsetof(struct ctdb_marshall_buffer, data));
+ if (vdata->vacuum_fetch_list[i] == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
+ return -1;
+ }
+ vdata->vacuum_fetch_list[i]->db_id = ctdb_db->db_id;
+ }
+
+ return 0;
+}
+
+/**
+ * Vacuum a DB:
+ * - Always do the fast vacuuming run, which traverses
+ * the in-memory delete queue: these records have been
+ * scheduled for deletion.
+ * - Only if explicitly requested, the database is traversed
+ * in order to use the traditional heuristics on empty records
+ * to trigger deletion.
+ * This is done only every VacuumFastPathCount'th vacuuming run.
+ *
+ * The traverse runs fill two lists:
+ *
+ * - The delete_list:
+ * This is the list of empty records the current
+ * node is lmaster and dmaster for. These records are later
+ * deleted first on other nodes and then locally.
+ *
+ * The fast vacuuming run has a short cut for those records
+ * that have never been migrated with data: these records
+ * are immediately deleted locally, since they have left
+ * no trace on other nodes.
+ *
+ * - The vacuum_fetch lists
+ * (one for each other lmaster node):
+ * The records in this list are sent for deletion to
+ * their lmaster in a bulk VACUUM_FETCH message.
+ *
+ * The lmaster then migrates all these records to itelf
+ * so that they can be vacuumed there.
+ *
+ * This executes in the child context.
+ */
+static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db,
+ struct vacuum_data *vdata,
+ bool full_vacuum_run)
+{
+ struct ctdb_context *ctdb = ctdb_db->ctdb;
+ int ret, pnn;
+
+ DEBUG(DEBUG_INFO, (__location__ " Entering %s vacuum run for db "
+ "%s db_id[0x%08x]\n",
+ full_vacuum_run ? "full" : "fast",
+ ctdb_db->db_name, ctdb_db->db_id));
+
+ ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &ctdb->vnn_map);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get vnnmap from local node\n"));
+ return ret;
+ }
+
+ pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
+ if (pnn == -1) {
+ DEBUG(DEBUG_ERR, ("Unable to get pnn from local node\n"));
+ return -1;
+ }
+
+ ctdb->pnn = pnn;
+
+ ret = ctdb_vacuum_init_vacuum_data(ctdb_db, vdata);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ctdb_vacuum_db_fast(ctdb_db, vdata);
+
+ ret = ctdb_vacuum_db_full(ctdb_db, vdata, full_vacuum_run);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_process_vacuum_fetch_lists(ctdb_db, vdata);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = ctdb_process_delete_list(ctdb_db, vdata);
+ if (ret != 0) {
+ return ret;
+ }
+
/* this ensures we run our event queue */
ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
hash = (uint32_t)ctdb_hash(&key);
DEBUG(DEBUG_DEBUG, (__location__
- " remove_record_from_delete_queue: db[%s] "
+ " remove_record_from_delete_queue: "
+ "db[%s] "
"db_id[0x%08x] "
"key_hash[0x%08x] "
"lmaster[%u] "
kd = (struct delete_record_data *)trbt_lookup32(ctdb_db->delete_queue, hash);
if (kd == NULL) {
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "record not in queue (hash[0x%08x])\n.",
+ hash));
return;
}
- if (kd->key.dsize != key.dsize) {
- return;
- }
- if (memcmp(kd->key.dptr, key.dptr, key.dsize) != 0) {
+
+ if ((kd->key.dsize != key.dsize) ||
+ (memcmp(kd->key.dptr, key.dptr, key.dsize) != 0))
+ {
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "hash collision for key with hash[0x%08x] "
+ "in db[%s] - skipping\n",
+ hash, ctdb_db->db_name));
return;
}
+ DEBUG(DEBUG_DEBUG, (__location__
+ " remove_record_from_delete_queue: "
+ "removing key with hash[0x%08x]\n",
+ hash));
+
talloc_free(kd);
return;
hash = (uint32_t)ctdb_hash(&key);
- DEBUG(DEBUG_INFO, (__location__ " Schedule for deletion: db[%s] "
+ DEBUG(DEBUG_INFO, (__location__ " schedule for deletion: db[%s] "
"db_id[0x%08x] "
"key_hash[0x%08x] "
"lmaster[%u] "
(memcmp(kd->key.dptr, key.dptr, key.dsize) != 0))
{
DEBUG(DEBUG_INFO,
- ("schedule for deletion: Hash collision (0x%08x)."
- " Skipping the record.\n", hash));
+ (__location__ " schedule for deletion: "
+ "hash collision for key hash [0x%08x]. "
+ "Skipping the record.\n", hash));
return 0;
} else {
DEBUG(DEBUG_DEBUG,
- ("schedule for deletion: Overwriting entry for "
- "key with hash 0x%08x.\n", hash));
+ (__location__ " schedule for deletion: "
+ "updating entry for key with hash [0x%08x].\n",
+ hash));
}
}
ctdb_db->delete_queue,
hdr, key);
if (ret != 0) {
+ DEBUG(DEBUG_INFO,
+ (__location__ " schedule for deletion: error "
+ "inserting key with hash [0x%08x] into delete queue\n",
+ hash));
return -1;
}
return ret;
}
- /* child process: send the main daemon a control */
+ /* if we dont have a connection to the daemon we can not send
+ a control. For example sometimes from update_record control child
+ process.
+ */
+ if (!ctdb_db->ctdb->can_send_controls) {
+ return -1;
+ }
+
+ /* child process: send the main daemon a control */
indata.dsize = offsetof(struct ctdb_control_schedule_for_deletion, key) + key.dsize;
indata.dptr = talloc_zero_array(ctdb_db, uint8_t, indata.dsize);
if (indata.dptr == NULL) {