dsdb: Log the transaction duraton.
authorGary Lockyer <gary@catalyst.net.nz>
Wed, 6 Jun 2018 13:30:44 +0000 (15:30 +0200)
committerAndrew Bartlett <abartlet@samba.org>
Mon, 25 Jun 2018 09:16:18 +0000 (11:16 +0200)
This is not a general purpose profiling solution, but these JSON
logs are already being generated and stored, so this is worth adding.

This will allow administrators to identify long running
transactions, and identify potential performance bottlenecks.

This complements a similar patch set to log authentication duration.

Signed-off-by: Gary Lockyer <gary@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
Autobuild-User(master): Andrew Bartlett <abartlet@samba.org>
Autobuild-Date(master): Mon Jun 25 11:16:18 CEST 2018 on sn-devel-144

python/samba/tests/audit_log_base.py
python/samba/tests/audit_log_dsdb.py
source4/dsdb/samdb/ldb_modules/audit_log.c
source4/dsdb/samdb/ldb_modules/tests/test_audit_log.c

index fa51911e7650962a0aa3338e99d27f006cfb622c..e91c4142f1cafbe36a9bfdd6e85e83aebd816fce 100644 (file)
@@ -27,6 +27,7 @@ import json
 import os
 import re
 
+
 def getAudit(message):
     if "type" not in message:
         return None
@@ -35,8 +36,8 @@ def getAudit(message):
     audit = message[type]
     return audit
 
-class AuditLogTestBase(samba.tests.TestCase):
 
+class AuditLogTestBase(samba.tests.TestCase):
 
     def setUp(self):
         super(AuditLogTestBase, self).setUp()
@@ -51,7 +52,7 @@ class AuditLogTestBase(samba.tests.TestCase):
         def isRemote(message):
             audit = getAudit(message)
             if audit is None:
-                return false
+                return False
 
             remote = audit["remoteAddress"]
             if remote is None:
@@ -76,7 +77,7 @@ class AuditLogTestBase(samba.tests.TestCase):
             elif jsonMsg["type"] == "dsdbTransaction":
                 context["txnMessage"] = jsonMsg
 
-        self.context = {"messages": [], "txnMessage": ""}
+        self.context = {"messages": [], "txnMessage": None}
         self.msg_handler_and_context = (messageHandler, self.context)
         self.msg_ctx.register(self.msg_handler_and_context,
                               msg_type=self.message_type)
@@ -127,7 +128,6 @@ class AuditLogTestBase(samba.tests.TestCase):
                         return True
             return False
 
-
     def waitForMessages(self, number, connection=None, dn=None):
         """Wait for all the expected messages to arrive
         The connection is passed through to keep the connection alive
@@ -158,8 +158,11 @@ class AuditLogTestBase(samba.tests.TestCase):
     # Discard any previously queued messages.
     def discardMessages(self):
         self.msg_ctx.loop_once(0.001)
-        while len(self.context["messages"]):
+        while (len(self.context["messages"]) or
+               self.context["txnMessage"] is not None):
+
             self.context["messages"] = []
+            self.context["txnMessage"] = None
             self.msg_ctx.loop_once(0.001)
 
     GUID_RE = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
index 5329af45e69b7c378615c356ad4eada8751279a8..53d457374248f5faf76768fb15a0467de2ecd699 100644 (file)
@@ -21,6 +21,7 @@ from __future__ import print_function
 
 import samba.tests
 from samba.dcerpc.messaging import MSG_DSDB_LOG, DSDB_EVENT_NAME
+from ldb import ERR_NO_SUCH_OBJECT
 from samba.samdb import SamDB
 from samba.auth import system_session
 import os
@@ -33,8 +34,6 @@ from samba.dcerpc import security, lsa
 
 USER_NAME = "auditlogtestuser"
 USER_PASS = samba.generate_random_password(32, 32)
-SECOND_USER_NAME = "auditlogtestuser02"
-SECOND_USER_PASS = samba.generate_random_password(32, 32)
 
 
 class AuditLogDsdbTests(AuditLogTestBase):
@@ -79,9 +78,6 @@ class AuditLogDsdbTests(AuditLogTestBase):
 
         # (Re)adds the test user USER_NAME with password USER_PASS
         delete_force(self.ldb, "cn=" + USER_NAME + ",cn=users," + self.base_dn)
-        delete_force(
-            self.ldb,
-            "cn=" + SECOND_USER_NAME + ",cn=users," + self.base_dn)
         self.ldb.add({
             "dn": "cn=" + USER_NAME + ",cn=users," + self.base_dn,
             "objectclass": "user",
@@ -93,15 +89,21 @@ class AuditLogDsdbTests(AuditLogTestBase):
     # Discard the messages from the setup code
     #
     def discardSetupMessages(self, dn):
-        messages = self.waitForMessages(2, dn=dn)
+        self.waitForMessages(2, dn=dn)
         self.discardMessages()
 
-
     def tearDown(self):
         self.discardMessages()
         super(AuditLogDsdbTests, self).tearDown()
 
-    def waitForTransaction(self, connection=None):
+    def haveExpectedTxn(self, expected):
+        if self.context["txnMessage"] is not None:
+            txn = self.context["txnMessage"]["dsdbTransaction"]
+            if txn["transactionId"] == expected:
+                return True
+        return False
+
+    def waitForTransaction(self, expected, connection=None):
         """Wait for a transaction message to arrive
         The connection is passed through to keep the connection alive
         until all the logging messages have been received.
@@ -110,7 +112,7 @@ class AuditLogDsdbTests(AuditLogTestBase):
         self.connection = connection
 
         start_time = time.time()
-        while self.context["txnMessage"] == "":
+        while not self.haveExpectedTxn(expected):
             self.msg_ctx.loop_once(0.1)
             if time.time() - start_time > 1:
                 self.connection = None
@@ -329,55 +331,72 @@ class AuditLogDsdbTests(AuditLogTestBase):
 
         self.ldb.deleteuser(USER_NAME)
 
-        messages = self.waitForMessages(2, dn=dn)
+        messages = self.waitForMessages(1, dn=dn)
         print("Received %d messages" % len(messages))
-        self.assertEquals(2,
+        self.assertEquals(1,
                           len(messages),
                           "Did not receive the expected number of messages")
 
-        audit = messages[1]["dsdbChange"]
+        audit = messages[0]["dsdbChange"]
         self.assertEquals("Delete", audit["operation"])
         self.assertFalse(audit["performedAsSystem"])
         self.assertTrue(dn.lower(), audit["dn"].lower())
         self.assertRegexpMatches(audit["remoteAddress"],
                                  self.remoteAddress)
         self.assertTrue(self.is_guid(audit["sessionId"]))
+        self.assertEquals(0, audit["statusCode"])
+        self.assertEquals("Success", audit["status"])
         session_id = self.get_session()
         self.assertEquals(session_id, audit["sessionId"])
         service_description = self.get_service_description()
         self.assertEquals(service_description, "LDAP")
 
-    def test_net_set_password_user_without_permission(self):
-
-        self.ldb.newuser(SECOND_USER_NAME, SECOND_USER_PASS)
+        transactionId = audit["transactionId"]
+        message = self.waitForTransaction(transactionId)
+        audit = message["dsdbTransaction"]
+        self.assertEquals("commit", audit["action"])
+        self.assertTrue(self.is_guid(audit["transactionId"]))
+        self.assertTrue(audit["duration"] > 0)
 
-        creds = self.insta_creds(
-            template=self.get_credentials(),
-            username=SECOND_USER_NAME,
-            userpass=SECOND_USER_PASS,
-            kerberos_state=None)
+    def test_samdb_delete_non_existent_dn(self):
 
-        lp = self.get_loadparm()
-        net = Net(creds, lp, server=self.server)
-        password = "newPassword!!42"
-        domain = lp.get("workgroup")
+        DOES_NOT_EXIST = "doesNotExist"
+        dn = "cn=" + USER_NAME + ",cn=users," + self.base_dn
+        self.discardSetupMessages(dn)
 
-        #
-        # This operation should fail and trigger a transaction roll back.
-        #
+        dn = "cn=" + DOES_NOT_EXIST + ",cn=users," + self.base_dn
         try:
-            net.set_password(newpassword=password.encode('utf-8'),
-                             account_name=USER_NAME,
-                             domain_name=domain)
-            self.fail("Expected exception not thrown")
+            self.ldb.delete(dn)
+            self.fail("Exception not thrown")
         except Exception:
             pass
 
-        message = self.waitForTransaction(net)
+        messages = self.waitForMessages(1)
+        print("Received %d messages" % len(messages))
+        self.assertEquals(1,
+                          len(messages),
+                          "Did not receive the expected number of messages")
+
+        audit = messages[0]["dsdbChange"]
+        self.assertEquals("Delete", audit["operation"])
+        self.assertFalse(audit["performedAsSystem"])
+        self.assertTrue(dn.lower(), audit["dn"].lower())
+        self.assertRegexpMatches(audit["remoteAddress"],
+                                 self.remoteAddress)
+        self.assertEquals(ERR_NO_SUCH_OBJECT, audit["statusCode"])
+        self.assertEquals("No such object", audit["status"])
+        self.assertTrue(self.is_guid(audit["sessionId"]))
+        session_id = self.get_session()
+        self.assertEquals(session_id, audit["sessionId"])
+        service_description = self.get_service_description()
+        self.assertEquals(service_description, "LDAP")
 
+        transactionId = audit["transactionId"]
+        message = self.waitForTransaction(transactionId)
         audit = message["dsdbTransaction"]
         self.assertEquals("rollback", audit["action"])
         self.assertTrue(self.is_guid(audit["transactionId"]))
+        self.assertTrue(audit["duration"] > 0)
 
     def test_create_and_delete_secret_over_lsa(self):
 
index 23b4651b32d05940ff736aa30d6132bece529985..80914cb8350b58821ec24710d4645ced0156147b 100644 (file)
 #define TRANSACTION_HR_TAG "DSDB Transaction"
 #define TRANSACTION_MAJOR 1
 #define TRANSACTION_MINOR 0
-/*
- * Currently we only log roll backs and prepare commit failures
- */
-#define TRANSACTION_LOG_LVL 5
+#define TRANSACTION_LOG_FAILURE_LVL 5
+#define TRANSACTION_LOG_COMPLETION_LVL 10
 
 #define REPLICATION_JSON_TYPE "replicatedUpdate"
 #define REPLICATION_HR_TAG "Replicated Update"
@@ -91,6 +89,11 @@ struct audit_context {
         * Unique transaction id for the current transaction
         */
        struct GUID transaction_guid;
+       /*
+        * Transaction start time, used to calculate the transaction
+        * duration.
+        */
+       struct timeval transaction_start;
 };
 
 /*
@@ -360,12 +363,15 @@ static struct json_object password_change_json(
  *
  * @param action a one word description of the event/action
  * @param transaction_id the GUID identifying the current transaction.
+ * @param status the status code returned by the operation
+ * @param duration the duration of the operation.
  *
  * @return a JSON object detailing the event
  */
 static struct json_object transaction_json(
        const char *action,
-       struct GUID *transaction_id)
+       struct GUID *transaction_id,
+       const int64_t duration)
 {
        struct json_object wrapper;
        struct json_object audit;
@@ -374,6 +380,8 @@ static struct json_object transaction_json(
        json_add_version(&audit, TRANSACTION_MAJOR, TRANSACTION_MINOR);
        json_add_string(&audit, "action", action);
        json_add_guid(&audit, "transactionId", transaction_id);
+       json_add_int(&audit, "duration", duration);
+
 
        wrapper = json_new_object();
        json_add_timestamp(&wrapper);
@@ -396,6 +404,7 @@ static struct json_object transaction_json(
  */
 static struct json_object commit_failure_json(
        const char *action,
+       const int64_t duration,
        int status,
        const char *reason,
        struct GUID *transaction_id)
@@ -407,6 +416,7 @@ static struct json_object commit_failure_json(
        json_add_version(&audit, TRANSACTION_MAJOR, TRANSACTION_MINOR);
        json_add_string(&audit, "action", action);
        json_add_guid(&audit, "transactionId", transaction_id);
+       json_add_int(&audit, "duration", duration);
        json_add_int(&audit, "statusCode", status);
        json_add_string(&audit, "status", ldb_strerror(status));
        json_add_string(&audit, "reason", reason);
@@ -719,13 +729,14 @@ static char *replicated_update_human_readable(
  *
  * @param mem_ctx The talloc context owning the returned string.
  * @param action a one word description of the event/action
- * @param transaction_id the GUID identifying the current transaction.
+ * @param duration the duration of the transaction.
  *
  * @return the log entry
  */
 static char *transaction_human_readable(
        TALLOC_CTX *mem_ctx,
-       const char* action)
+       const char* action,
+       const int64_t duration)
 {
        const char *timestamp = NULL;
        char *log_entry = NULL;
@@ -736,9 +747,10 @@ static char *transaction_human_readable(
 
        log_entry = talloc_asprintf(
                mem_ctx,
-               "[%s] at [%s]",
+               "[%s] at [%s] duration [%ld]",
                action,
-               timestamp);
+               timestamp,
+               duration);
 
        TALLOC_FREE(ctx);
        return log_entry;
@@ -759,6 +771,7 @@ static char *transaction_human_readable(
 static char *commit_failure_human_readable(
        TALLOC_CTX *mem_ctx,
        const char *action,
+       const int64_t duration,
        int status,
        const char *reason)
 {
@@ -771,9 +784,10 @@ static char *commit_failure_human_readable(
 
        log_entry = talloc_asprintf(
                mem_ctx,
-               "[%s] at [%s] status [%d] reason [%s]",
+               "[%s] at [%s] duration [%ld] status [%d] reason [%s]",
                action,
                timestamp,
+               duration,
                status,
                reason);
 
@@ -976,40 +990,47 @@ static void log_operation(
  * and send over the message bus.
  *
  * @param module the ldb_module
- * @param  action the transaction event i.e. begin, commit, roll back.
+ * @param action the transaction event i.e. begin, commit, roll back.
+ * @param log_level the logging level
  *
  */
 static void log_transaction(
        struct ldb_module *module,
-       const char *action)
+       const char *action,
+       int log_level)
 {
 
        struct audit_context *ac =
                talloc_get_type(ldb_module_get_private(module),
                                struct audit_context);
+       const struct timeval now = timeval_current();
+       const int64_t duration = usec_time_diff(&now, &ac->transaction_start);
 
        TALLOC_CTX *ctx = talloc_new(NULL);
 
-       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT, TRANSACTION_LOG_LVL)) {
+       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT, log_level)) {
                char* entry = NULL;
-               entry = transaction_human_readable(ctx, action);
+               entry = transaction_human_readable(ctx, action, duration);
                audit_log_human_text(
                        TRANSACTION_HR_TAG,
                        entry,
                        DBGC_DSDB_TXN_AUDIT,
-                       TRANSACTION_LOG_LVL);
+                       log_level);
                TALLOC_FREE(entry);
        }
 #ifdef HAVE_JANSSON
-       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT_JSON, TRANSACTION_LOG_LVL) ||
+       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT_JSON, log_level) ||
                (ac->msg_ctx && ac->send_samdb_events)) {
                struct json_object json;
-               json = transaction_json(action, &ac->transaction_guid);
+               json = transaction_json(
+                       action,
+                       &ac->transaction_guid,
+                       duration);
                audit_log_json(
                        TRANSACTION_JSON_TYPE,
                        &json,
                        DBGC_DSDB_TXN_AUDIT_JSON,
-                       TRANSACTION_LOG_LVL);
+                       log_level);
                if (ac->send_samdb_events) {
                        audit_message_send(
                                ac->msg_ctx,
@@ -1044,29 +1065,35 @@ static void log_commit_failure(
                talloc_get_type(ldb_module_get_private(module),
                                struct audit_context);
        const char* reason = dsdb_audit_get_ldb_error_string(module, status);
+       const int log_level = TRANSACTION_LOG_FAILURE_LVL;
+       const struct timeval now = timeval_current();
+       const int64_t duration = usec_time_diff(&now, &ac->transaction_start);
 
        TALLOC_CTX *ctx = talloc_new(NULL);
 
-       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT, TRANSACTION_LOG_LVL)) {
+       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT, log_level)) {
+
                char* entry = NULL;
                entry = commit_failure_human_readable(
                        ctx,
                        action,
+                       duration,
                        status,
                        reason);
                audit_log_human_text(
                        TRANSACTION_HR_TAG,
                        entry,
                        DBGC_DSDB_TXN_AUDIT,
-                       TRANSACTION_LOG_LVL);
+                       TRANSACTION_LOG_FAILURE_LVL);
                TALLOC_FREE(entry);
        }
 #ifdef HAVE_JANSSON
-       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT_JSON, TRANSACTION_LOG_LVL) ||
+       if (CHECK_DEBUGLVLC(DBGC_DSDB_TXN_AUDIT_JSON, log_level) ||
                (ac->msg_ctx && ac->send_samdb_events)) {
                struct json_object json;
                json = commit_failure_json(
                        action,
+                       duration,
                        status,
                        reason,
                        &ac->transaction_guid);
@@ -1074,7 +1101,7 @@ static void log_commit_failure(
                        TRANSACTION_JSON_TYPE,
                        &json,
                        DBGC_DSDB_TXN_AUDIT_JSON,
-                       TRANSACTION_LOG_LVL);
+                       log_level);
                if (ac->send_samdb_events) {
                        audit_message_send(ac->msg_ctx,
                                           DSDB_EVENT_NAME,
@@ -1361,10 +1388,12 @@ static int log_start_transaction(struct ldb_module *module)
 
        /*
         * We do not log transaction begins
-        * however we do generate a new transaction_id
+        * however we do generate a new transaction_id and record the start
+        * time so that we can log the transaction duration.
         *
         */
        ac->transaction_guid = GUID_random();
+       ac->transaction_start = timeval_current();
        return ldb_next_start_trans(module);
 }
 
@@ -1410,18 +1439,20 @@ static int log_end_transaction(struct ldb_module *module)
                                struct audit_context);
        int ret = 0;
 
-       /*
-        * Clear the transaction id inserted by log_start_transaction
-        */
-       memset(&ac->transaction_guid, 0, sizeof(struct GUID));
 
        ret = ldb_next_end_trans(module);
-       if (ret != LDB_SUCCESS) {
-               /*
-                * We currently only log commit failures
-                */
+       if (ret == LDB_SUCCESS) {
+               log_transaction(
+                       module,
+                       "commit",
+                       TRANSACTION_LOG_COMPLETION_LVL);
+       } else {
                log_commit_failure(module, "commit", ret);
        }
+       /*
+        * Clear the transaction id inserted by log_start_transaction
+        */
+       memset(&ac->transaction_guid, 0, sizeof(struct GUID));
        return ret;
 }
 
@@ -1441,7 +1472,7 @@ static int log_del_transaction(struct ldb_module *module)
                talloc_get_type(ldb_module_get_private(module),
                                struct audit_context);
 
-       log_transaction(module, "rollback");
+       log_transaction(module, "rollback", TRANSACTION_LOG_FAILURE_LVL);
        memset(&ac->transaction_guid, 0, sizeof(struct GUID));
        return ldb_next_del_trans(module);
 }
index 567c5b345d87907fd7d73b24930ceec5964ae8ed..6ceea80f155a5469aebadc8e31e8a2a96fd69f4c 100644 (file)
@@ -1016,7 +1016,7 @@ static void test_transaction_json(void **state)
        GUID_from_string(GUID, &guid);
 
        before = time(NULL);
-       json = transaction_json("delete", &guid);
+       json = transaction_json("delete", &guid, 10000099);
 
        assert_int_equal(3, json_object_size(json.root));
 
@@ -1033,7 +1033,7 @@ static void test_transaction_json(void **state)
        audit = json_object_get(json.root, "dsdbTransaction");
        assert_non_null(audit);
        assert_true(json_is_object(audit));
-       assert_int_equal(3, json_object_size(audit));
+       assert_int_equal(4, json_object_size(audit));
 
        o = json_object_get(audit, "version");
        assert_non_null(o);
@@ -1049,6 +1049,11 @@ static void test_transaction_json(void **state)
        assert_true(json_is_string(v));
        assert_string_equal("delete", json_string_value(v));
 
+       v = json_object_get(audit, "duration");
+       assert_non_null(v);
+       assert_true(json_is_integer(v));
+       assert_int_equal(10000099, json_integer_value(v));
+
        json_free(&json);
 
 }
@@ -1074,6 +1079,7 @@ static void test_commit_failure_json(void **state)
        before = time(NULL);
        json = commit_failure_json(
                "prepare",
+               987876,
                LDB_ERR_OPERATIONS_ERROR,
                "because",
                &guid);
@@ -1093,7 +1099,7 @@ static void test_commit_failure_json(void **state)
        audit = json_object_get(json.root, "dsdbTransaction");
        assert_non_null(audit);
        assert_true(json_is_object(audit));
-       assert_int_equal(6, json_object_size(audit));
+       assert_int_equal(7, json_object_size(audit));
 
        o = json_object_get(audit, "version");
        assert_non_null(o);
@@ -1126,6 +1132,11 @@ static void test_commit_failure_json(void **state)
        assert_true(json_is_string(v));
        assert_string_equal("because", json_string_value(v));
 
+       v = json_object_get(audit, "duration");
+       assert_non_null(v);
+       assert_true(json_is_integer(v));
+       assert_int_equal(987876, json_integer_value(v));
+
        json_free(&json);
 
 }
@@ -1829,14 +1840,14 @@ static void test_transaction_hr(void **state)
 
        GUID_from_string(GUID, &guid);
 
-       line = transaction_human_readable(ctx, "delete");
+       line = transaction_human_readable(ctx, "delete", 23);
        assert_non_null(line);
 
        /*
         * We ignore the timestamp to make this test a little easier
         * to write.
         */
-       rs = "\\[delete] at \\[[^[]*\\]";
+       rs = "\\[delete] at \\[[^[]*\\] duration \\[23\\]";
 
        ret = regcomp(&regex, rs, 0);
        assert_int_equal(0, ret);
@@ -1871,6 +1882,7 @@ static void test_commit_failure_hr(void **state)
        line = commit_failure_human_readable(
                ctx,
                "commit",
+               789345,
                LDB_ERR_OPERATIONS_ERROR,
                "because");
 
@@ -1880,7 +1892,8 @@ static void test_commit_failure_hr(void **state)
         * We ignore the timestamp to make this test a little easier
         * to write.
         */
-       rs = "\\[commit\\] at \\[[^[]*\\] status \\[1\\] reason \\[because\\]";
+       rs = "\\[commit\\] at \\[[^[]*\\] duration \\[789345\\] "
+            "status \\[1\\] reason \\[because\\]";
 
        ret = regcomp(&regex, rs, 0);
        assert_int_equal(0, ret);