s3: Add the "ctdb locktime warn threshold" parameter
authorVolker Lendecke <vl@samba.org>
Fri, 5 Mar 2010 15:46:36 +0000 (16:46 +0100)
committerVolker Lendecke <vl@samba.org>
Mon, 22 Mar 2010 16:32:18 +0000 (17:32 +0100)
This is mainly a debugging aid for post-mortem analysis in case a cluster file
system is slow.

docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml [new file with mode: 0644]
source3/include/proto.h
source3/lib/dbwrap_ctdb.c
source3/param/loadparm.c

diff --git a/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml b/docs-xml/smbdotconf/misc/ctdblocktimewarnthreshold.xml
new file mode 100644 (file)
index 0000000..149d8d6
--- /dev/null
@@ -0,0 +1,16 @@
+<samba:parameter name="ctdb locktime warn threshold"
+                 context="G"
+                type="integer"
+                 advanced="1"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+       <para>In a cluster, ctdb is very unhappy if tdb database locks
+       are held for extended periods of time. This parameter adds a
+       warning threshold in milliseconds. If Samba holds a lock for
+       longer that ctdb locktime warn threshold milliseconds, a debug
+       level 0 message is printed when the lock is released. This is
+       mainly a debugging aid for post-mortem analysis.</para>
+       <para>If this parameter is set to 0, no message is printed.</para>
+</description>
+<value type="default">0</value>
+</samba:parameter>
index 06d324eea432daac3bddeac0829967d93518a246..346da39749a835a5ebae31d3e4a3c185196e9786 100644 (file)
@@ -4145,6 +4145,7 @@ const char *lp_ctdbd_socket(void);
 const char **lp_cluster_addresses(void);
 bool lp_clustering(void);
 int lp_ctdb_timeout(void);
+int lp_ctdb_locktime_warn_threshold(void);
 char *lp_printcommand(int );
 char *lp_lpqcommand(int );
 char *lp_lprmcommand(int );
index 938a3126579da5a84b9e2bc14381640edae31d1f..bef91cb1ec6e5d7f33d6b2ffd556d6a5b0f99df3 100644 (file)
@@ -50,6 +50,7 @@ struct db_ctdb_ctx {
 struct db_ctdb_rec {
        struct db_ctdb_ctx *ctdb_ctx;
        struct ctdb_ltdb_header header;
+       struct timeval lock_time;
 };
 
 static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb)
@@ -898,6 +899,7 @@ static int db_ctdb_record_destr(struct db_record* data)
 {
        struct db_ctdb_rec *crec = talloc_get_type_abort(
                data->private_data, struct db_ctdb_rec);
+       int threshold;
 
        DEBUG(10, (DEBUGLEVEL > 10
                   ? "Unlocking db %u key %s\n"
@@ -911,6 +913,14 @@ static int db_ctdb_record_destr(struct db_record* data)
                return -1;
        }
 
+       threshold = lp_ctdb_locktime_warn_threshold();
+       if (threshold != 0) {
+               double timediff = timeval_elapsed(&crec->lock_time);
+               if ((timediff * 1000) > threshold) {
+                       DEBUG(0, ("Held tdb lock %f seconds\n", timediff));
+               }
+       }
+
        return 0;
 }
 
@@ -1011,6 +1021,8 @@ again:
                          migrate_attempts));
        }
 
+       GetTimeOfDay(&crec->lock_time);
+
        memcpy(&crec->header, ctdb_data.dptr, sizeof(crec->header));
 
        result->value.dsize = ctdb_data.dsize - sizeof(crec->header);
index 76b0d350662829870160585edb019b03fa48e6e3..a6e096e09f9d377e520a57611b4fd16931cfa712 100644 (file)
@@ -274,6 +274,7 @@ struct global {
        char **szClusterAddresses;
        bool clustering;
        int ctdb_timeout;
+       int ctdb_locktime_warn_threshold;
        int ldap_passwd_sync;
        int ldap_replication_sleep;
        int ldap_timeout; /* This is initialised in init_globals */
@@ -2577,6 +2578,15 @@ static struct parm_struct parm_table[] = {
                .enum_list      = NULL,
                .flags          = FLAG_ADVANCED | FLAG_GLOBAL,
        },
+       {
+               .label          = "ctdb locktime warn threshold",
+               .type           = P_INTEGER,
+               .p_class        = P_GLOBAL,
+               .ptr            = &Globals.ctdb_locktime_warn_threshold,
+               .special        = NULL,
+               .enum_list      = NULL,
+               .flags          = FLAG_ADVANCED | FLAG_GLOBAL,
+       },
 
        {N_("Printing Options"), P_SEP, P_SEPARATOR},
 
@@ -5185,6 +5195,7 @@ static void init_globals(bool first_time_only)
        Globals.szClusterAddresses = NULL;
        Globals.clustering = False;
        Globals.ctdb_timeout = 0;
+       Globals.ctdb_locktime_warn_threshold = 0;
 
        Globals.winbind_cache_time = 300;       /* 5 minutes */
        Globals.winbind_reconnect_delay = 30;   /* 30 seconds */
@@ -5640,6 +5651,7 @@ FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
 FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
 FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
 FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
+FN_GLOBAL_INTEGER(lp_ctdb_locktime_warn_threshold, &Globals.ctdb_locktime_warn_threshold)
 FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
 FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
 FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)