#include "lib/util/dlinklist.h"
#include "lib/util/debug.h"
#include "lib/util/samba_util.h"
+#include "lib/util/sys_rw.h"
#include "ctdb_private.h"
-#include "common/system.h"
#include "common/common.h"
#include "common/logging.h"
lock_ctx->request->lctx = NULL;
}
if (lock_ctx->child > 0) {
- ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
if (lock_ctx->type == LOCK_RECORD) {
DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
} else {
process_callbacks(lock_ctx, locked);
}
+struct lock_log_entry {
+ struct db_hash_context *lock_log;
+ TDB_DATA key;
+ unsigned long log_sec;
+ struct tevent_timer *timer;
+};
+
+static int lock_log_fetch_parser(uint8_t *keybuf, size_t keylen,
+ uint8_t *databuf, size_t datalen,
+ void *private_data)
+{
+ struct lock_log_entry **entry =
+ (struct lock_log_entry **)private_data;
+
+ if (datalen != sizeof(struct lock_log_entry *)) {
+ return EINVAL;
+ }
+
+ *entry = talloc_get_type_abort(*(void **)databuf,
+ struct lock_log_entry);
+ return 0;
+}
+
+static void lock_log_cleanup(struct tevent_context *ev,
+ struct tevent_timer *ttimer,
+ struct timeval current_time,
+ void *private_data)
+{
+ struct lock_log_entry *entry = talloc_get_type_abort(
+ private_data, struct lock_log_entry);
+ int ret;
+
+ entry->timer = NULL;
+
+ ret = db_hash_delete(entry->lock_log, entry->key.dptr,
+ entry->key.dsize);
+ if (ret != 0) {
+ return;
+ }
+ talloc_free(entry);
+}
+
+static bool lock_log_skip(struct tevent_context *ev,
+ struct db_hash_context *lock_log,
+ TDB_DATA key, unsigned long elapsed_sec)
+{
+ struct lock_log_entry *entry = NULL;
+ int ret;
+
+ ret = db_hash_fetch(lock_log, key.dptr, key.dsize,
+ lock_log_fetch_parser, &entry);
+ if (ret == ENOENT) {
+
+ entry = talloc_zero(lock_log, struct lock_log_entry);
+ if (entry == NULL) {
+ goto fail;
+ }
+
+ entry->lock_log = lock_log;
+
+ entry->key.dptr = talloc_memdup(entry, key.dptr, key.dsize);
+ if (entry->key.dptr == NULL) {
+ talloc_free(entry);
+ goto fail;
+ }
+ entry->key.dsize = key.dsize;
+
+ entry->log_sec = elapsed_sec;
+ entry->timer = tevent_add_timer(ev, entry,
+ timeval_current_ofs(30, 0),
+ lock_log_cleanup, entry);
+ if (entry->timer == NULL) {
+ talloc_free(entry);
+ goto fail;
+ }
+
+ ret = db_hash_add(lock_log, key.dptr, key.dsize,
+ (uint8_t *)&entry,
+ sizeof(struct lock_log_entry *));
+ if (ret != 0) {
+ talloc_free(entry);
+ goto fail;
+ }
+
+ return false;
+
+ } else if (ret == EINVAL) {
+
+ ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ return false;
+
+ } else if (ret == 0) {
+
+ if (elapsed_sec <= entry->log_sec) {
+ return true;
+ }
+
+ entry->log_sec = elapsed_sec;
+
+ TALLOC_FREE(entry->timer);
+ entry->timer = tevent_add_timer(ev, entry,
+ timeval_current_ofs(30, 0),
+ lock_log_cleanup, entry);
+ if (entry->timer == NULL) {
+ ret = db_hash_delete(lock_log, key.dptr, key.dsize);
+ if (ret != 0) {
+ goto fail;
+ }
+ talloc_free(entry);
+ }
+
+ return false;
+ }
+
+
+fail:
+ return false;
+
+}
/*
* Callback routine when required locks are not obtained within timeout
void *private_data)
{
static char debug_locks[PATH_MAX+1] = "";
- static struct timeval last_debug_time;
struct lock_context *lock_ctx;
struct ctdb_context *ctdb;
- struct timeval now;
pid_t pid;
double elapsed_time;
- int new_timer;
+ bool skip;
+ char *keystr;
lock_ctx = talloc_get_type_abort(private_data, struct lock_context);
ctdb = lock_ctx->ctdb;
elapsed_time = timeval_elapsed(&lock_ctx->start_time);
- if (lock_ctx->ctdb_db) {
+
+ /* For database locks, always log */
+ if (lock_ctx->type == LOCK_DB) {
DEBUG(DEBUG_WARNING,
- ("Unable to get %s lock on database %s for %.0lf seconds\n",
- (lock_ctx->type == LOCK_RECORD ? "RECORD" : "DB"),
+ ("Unable to get DB lock on database %s for "
+ "%.0lf seconds\n",
lock_ctx->ctdb_db->db_name, elapsed_time));
- } else {
- DEBUG(DEBUG_WARNING,
- ("Unable to get ALLDB locks for %.0lf seconds\n",
- elapsed_time));
+ goto lock_debug;
}
- /* If a node stopped/banned, don't spam the logs */
- if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
+ /* For record locks, check if we have already logged */
+ skip = lock_log_skip(ev, lock_ctx->ctdb_db->lock_log,
+ lock_ctx->key, (unsigned long)elapsed_time);
+ if (skip) {
goto skip_lock_debug;
}
- /* Restrict log debugging to once per second */
- now = timeval_current();
- if (last_debug_time.tv_sec == now.tv_sec) {
+ keystr = hex_encode_talloc(lock_ctx, lock_ctx->key.dptr,
+ lock_ctx->key.dsize);
+ DEBUG(DEBUG_WARNING,
+ ("Unable to get RECORD lock on database %s for %.0lf seconds"
+ " (key %s)\n",
+ lock_ctx->ctdb_db->db_name, elapsed_time,
+ keystr ? keystr : ""));
+ TALLOC_FREE(keystr);
+
+ /* If a node stopped/banned, don't spam the logs */
+ if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
goto skip_lock_debug;
}
- last_debug_time.tv_sec = now.tv_sec;
+lock_debug:
if (ctdb_set_helper("lock debugging helper",
debug_locks, sizeof(debug_locks),
skip_lock_debug:
- /* Back-off logging if lock is not obtained for a long time */
- if (elapsed_time < 100.0) {
- new_timer = 10;
- } else if (elapsed_time < 1000.0) {
- new_timer = 100;
- } else {
- new_timer = 1000;
- }
-
/* reset the timeout timer */
// talloc_free(lock_ctx->ttimer);
lock_ctx->ttimer = tevent_add_timer(ctdb->ev,
lock_ctx,
- timeval_current_ofs(new_timer, 0),
+ timeval_current_ofs(10, 0),
ctdb_lock_timeout_handler,
(void *)lock_ctx);
}
-static int db_flags(struct ctdb_db_context *ctdb_db)
-{
- int tdb_flags = TDB_DEFAULT;
-
-#ifdef TDB_MUTEX_LOCKING
- if (!ctdb_db->persistent && ctdb_db->ctdb->tunable.mutex_enabled) {
- tdb_flags = (TDB_MUTEX_LOCKING | TDB_CLEAR_IF_FIRST);
- }
-#endif
- return tdb_flags;
-}
-
static bool lock_helper_args(TALLOC_CTX *mem_ctx,
struct lock_context *lock_ctx, int fd,
int *argc, const char ***argv)
args[2] = talloc_strdup(args, "RECORD");
args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
args[4] = talloc_asprintf(args, "0x%x",
- db_flags(lock_ctx->ctdb_db));
+ tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
if (lock_ctx->key.dsize == 0) {
args[5] = talloc_strdup(args, "NULL");
} else {
args[2] = talloc_strdup(args, "DB");
args[3] = talloc_strdup(args, lock_ctx->ctdb_db->db_path);
args[4] = talloc_asprintf(args, "0x%x",
- db_flags(lock_ctx->ctdb_db));
+ tdb_get_flags(lock_ctx->ctdb_db->ltdb->tdb));
break;
}
return;
}
- if (!ctdb_vfork_with_logging(lock_ctx, ctdb, "lock_helper",
- prog, argc, (const char **)args,
- NULL, NULL, &lock_ctx->child)) {
+ lock_ctx->child = ctdb_vfork_exec(lock_ctx, ctdb, prog, argc,
+ (const char **)args);
+ if (lock_ctx->child == -1) {
DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
close(lock_ctx->fd[0]);
close(lock_ctx->fd[1]);
ctdb_lock_timeout_handler,
(void *)lock_ctx);
if (lock_ctx->ttimer == NULL) {
- ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
lock_ctx->child = -1;
close(lock_ctx->fd[0]);
return;
(void *)lock_ctx);
if (lock_ctx->tfd == NULL) {
TALLOC_FREE(lock_ctx->ttimer);
- ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
lock_ctx->child = -1;
close(lock_ctx->fd[0]);
return;