netcmd: Add 'samba-tool group stats' command
authorTim Beale <timbeale@catalyst.net.nz>
Thu, 18 Oct 2018 04:08:32 +0000 (17:08 +1300)
committerDouglas Bagnall <dbagnall@samba.org>
Wed, 31 Oct 2018 02:40:41 +0000 (03:40 +0100)
With large domains it's hard to get an idea of how many groups there
are, and how many users are in each group, on average. However, this
could have a big impact on whether a problem can be reproduced or not.

This patch dumps out some summary information so that you can get a
quick idea of how big the groups are.

Signed-off-by: Tim Beale <timbeale@catalyst.net.nz>
Reviewed-by: Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
Autobuild-User(master): Douglas Bagnall <dbagnall@samba.org>
Autobuild-Date(master): Wed Oct 31 03:40:41 CET 2018 on sn-devel-144

docs-xml/manpages/samba-tool.8.xml
python/samba/netcmd/group.py
python/samba/tests/samba_tool/group.py

index 2c043b90fcacf084b1c4400c8d7b392822bdb8a1..01f5313abf8301988c5e7e75b584fb2a7a66ed57 100644 (file)
        <para>Show group object and it's attributes.</para>
 </refsect3>
 
+<refsect3>
+       <title>group stats [options]</title>
+       <para>Show statistics for overall groups and group memberships.</para>
+</refsect3>
+
 <refsect2>
        <title>ldapcmp <replaceable>URL1</replaceable> <replaceable>URL2</replaceable> <replaceable>domain|configuration|schema|dnsdomain|dnsforest</replaceable> [options] </title>
        <para>Compare two LDAP databases.</para>
index 7c7dfd8a699d3931de374d6883fa83e060005bcb..121161cda3dbeeb14acb00300dcad7d0226c9d41 100644 (file)
@@ -34,6 +34,7 @@ from samba.dsdb import (
     GTYPE_DISTRIBUTION_GLOBAL_GROUP,
     GTYPE_DISTRIBUTION_UNIVERSAL_GROUP,
 )
+from collections import defaultdict
 
 security_group = dict({"Builtin": GTYPE_SECURITY_BUILTIN_LOCAL_GROUP,
                        "Domain": GTYPE_SECURITY_DOMAIN_LOCAL_GROUP,
@@ -587,6 +588,102 @@ Example3 shows how to display a users objectGUID and member attributes.
             self.outf.write(user_ldif)
 
 
+class cmd_group_stats(Command):
+    """Summary statistics about group memberships."""
+
+    synopsis = "%prog [options]"
+
+    takes_options = [
+        Option("-H", "--URL", help="LDB URL for database or target server", type=str,
+               metavar="URL", dest="H"),
+    ]
+
+    takes_optiongroups = {
+        "sambaopts": options.SambaOptions,
+        "credopts": options.CredentialsOptions,
+        "versionopts": options.VersionOptions,
+    }
+
+    def num_in_range(self, range_min, range_max, group_freqs):
+        total_count = 0
+        for members, count in group_freqs.items():
+            if range_min <= members and members <= range_max:
+                total_count += count
+
+        return total_count
+
+    def run(self, sambaopts=None, credopts=None, versionopts=None, H=None):
+        lp = sambaopts.get_loadparm()
+        creds = credopts.get_credentials(lp, fallback_machine=True)
+
+        samdb = SamDB(url=H, session_info=system_session(),
+                      credentials=creds, lp=lp)
+
+        domain_dn = samdb.domain_dn()
+        res = samdb.search(domain_dn, scope=ldb.SCOPE_SUBTREE,
+                           expression=("(objectClass=group)"),
+                           attrs=["samaccountname", "member"])
+
+        # first count up how many members each group has
+        group_assignments = {}
+        total_memberships = 0
+
+        for msg in res:
+            name = str(msg.get("samaccountname"))
+            memberships = len(msg.get("member", default=[]))
+            group_assignments[name] = memberships
+            total_memberships += memberships
+
+        self.outf.write("Group membership statistics*\n")
+        self.outf.write("-------------------------------------------------\n")
+        self.outf.write("Total groups: {0}\n".format(res.count))
+        self.outf.write("Total memberships: {0}\n".format(total_memberships))
+        average = float(total_memberships / res.count)
+        self.outf.write("Average members per group: %.2f\n" % average)
+        group_names = list(group_assignments.keys())
+        group_members = list(group_assignments.values())
+        # note that some builtin groups have no members, so this doesn't tell us much
+        idx = group_members.index(min(group_members))
+        self.outf.write("Min members: {0} ({1})\n".format(group_members[idx],
+                                                          group_names[idx]))
+        idx = group_members.index(max(group_members))
+        max_members = group_members[idx]
+        self.outf.write("Max members: {0} ({1})\n\n".format(max_members,
+                                                            group_names[idx]))
+
+        # convert this to the frequency of group membership, i.e. how many
+        # groups have 5 members, how many have 6 members, etc
+        group_freqs = defaultdict(int)
+        for group, count in group_assignments.items():
+            group_freqs[count] += 1
+
+        # now squash this down even further, so that we just display the number
+        # of groups that fall into one of the following membership bands
+        bands = [(0, 1), (2, 4), (5, 9), (10, 14), (15, 19), (20, 24), (25, 29),
+                 (30, 39), (40, 49), (50, 59), (60, 69), (70, 79), (80, 89),
+                 (90, 99), (100, 149), (150, 199), (200, 249), (250, 299),
+                 (300, 399), (400, 499), (500, 999), (1000, 1999),
+                 (2000, 2999), (3000, 3999), (4000, 4999), (5000, 9999),
+                 (10000, max_members)]
+
+        self.outf.write("Members        Number of Groups\n")
+        self.outf.write("-------------------------------------------------\n")
+
+        for band in bands:
+            band_start = band[0]
+            band_end = band[1]
+            if band_start > max_members:
+                break
+
+            num_groups = self.num_in_range(band_start, band_end, group_freqs)
+
+            if num_groups != 0:
+                band_str = "{0}-{1}".format(band_start, band_end)
+                self.outf.write("%13s  %u\n" % (band_str, num_groups))
+
+        self.outf.write("\n* Note this does not include nested group memberships\n")
+
+
 class cmd_group(SuperCommand):
     """Group management."""
 
@@ -599,3 +696,4 @@ class cmd_group(SuperCommand):
     subcommands["listmembers"] = cmd_group_list_members()
     subcommands["move"] = cmd_group_move()
     subcommands["show"] = cmd_group_show()
+    subcommands["stats"] = cmd_group_stats()
index 7a5fd96a077d5d8aa95151b810d79e93c8ead3ab..bb701e91262d2b966446bae01a8acf0462d56f8f 100644 (file)
@@ -208,3 +208,21 @@ class GroupCmdTestCase(SambaToolCmdTest):
             return grouplist[0]
         else:
             return None
+
+    def test_stats(self):
+        (result, out, err) = self.runsubcmd("group", "stats",
+                                            "-H", "ldap://%s" % os.environ["DC_SERVER"],
+                                            "-U%s%%%s" % (os.environ["DC_USERNAME"],
+                                                          os.environ["DC_PASSWORD"]))
+        self.assertCmdSuccess(result, out, err, "Error running stats")
+
+        # sanity-check the command reports 'total groups' correctly
+        search_filter = "(objectClass=group)"
+        grouplist = self.samdb.search(base=self.samdb.domain_dn(),
+                                      scope=ldb.SCOPE_SUBTREE,
+                                      expression=search_filter,
+                                      attrs=[])
+
+        total_groups = len(grouplist)
+        self.assertTrue("Total groups: {0}".format(total_groups) in out,
+                        "Total groups not reported correctly")