traffic_replay: Add a max-members option to cap group size
authorTim Beale <timbeale@catalyst.net.nz>
Tue, 27 Nov 2018 00:50:32 +0000 (13:50 +1300)
committerAndrew Bartlett <abartlet@samba.org>
Tue, 4 Dec 2018 11:22:50 +0000 (12:22 +0100)
traffic_replay tries to distribute the users among the groups in a
realistic manner - some groups will have almost all users in them.
However, this becomes a problem when testing a really large database,
e.g. we may want 100K users, but no more than 5K users in each group.

This patch adds a max-member option so we can limit how big the groups
actually get.

If we detect that a group exceeds the max-members, we reset the group's
probability (of getting selected) to zero, and then recalculate the
cumulative distribution. The means that the group should no longer get
selected by generate_random_membership(). (Note we can't completely
remove the group from the list because that changes the
list-index-to-group-ID mapping).

Signed-off-by: Tim Beale <timbeale@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
Autobuild-User(master): Andrew Bartlett <abartlet@samba.org>
Autobuild-Date(master): Tue Dec  4 12:22:50 CET 2018 on sn-devel-144

python/samba/emulate/traffic.py
script/traffic_replay

index fd886e3865e6a5b392db4e3ad5e1e37aeea07a9b..291162f279ac066757f1d4459df360759604ef9c 100644 (file)
@@ -1764,8 +1764,8 @@ def clean_up_accounts(ldb, instance_id):
 
 def generate_users_and_groups(ldb, instance_id, password,
                               number_of_users, number_of_groups,
-                              group_memberships, machine_accounts,
-                              traffic_accounts=True):
+                              group_memberships, max_members,
+                              machine_accounts, traffic_accounts=True):
     """Generate the required users and groups, allocating the users to
        those groups."""
     memberships_added = 0
@@ -1792,7 +1792,8 @@ def generate_users_and_groups(ldb, instance_id, password,
                                        groups_added,
                                        number_of_users,
                                        users_added,
-                                       group_memberships)
+                                       group_memberships,
+                                       max_members)
         LOGGER.info("Adding users to groups")
         add_users_to_groups(ldb, instance_id, assignments)
         memberships_added = assignments.total()
@@ -1808,11 +1809,12 @@ def generate_users_and_groups(ldb, instance_id, password,
 
 class GroupAssignments(object):
     def __init__(self, number_of_groups, groups_added, number_of_users,
-                 users_added, group_memberships):
+                 users_added, group_memberships, max_members):
 
         self.count = 0
         self.generate_group_distribution(number_of_groups)
         self.generate_user_distribution(number_of_users, group_memberships)
+        self.max_members = max_members
         self.assignments = defaultdict(list)
         self.assign_groups(number_of_groups, groups_added, number_of_users,
                            users_added, group_memberships)
@@ -1825,6 +1827,9 @@ class GroupAssignments(object):
         # value, so we can use random.random() as a simple index into the list
         dist = []
         total = sum(weights)
+        if total == 0:
+            return None
+
         cumulative = 0.0
         for probability in weights:
             cumulative += probability
@@ -1868,6 +1873,7 @@ class GroupAssignments(object):
             weights.append(p)
 
         # convert the weights to a cumulative distribution between 0.0 and 1.0
+        self.group_weights = weights
         self.group_dist = self.cumulative_distribution(weights)
 
     def generate_random_membership(self):
@@ -1888,6 +1894,18 @@ class GroupAssignments(object):
     def get_groups(self):
         return self.assignments.keys()
 
+    def cap_group_membership(self, group, max_members):
+        """Prevent the group's membership from exceeding the max specified"""
+        num_members = len(self.assignments[group])
+        if num_members >= max_members:
+            LOGGER.info("Group {0} has {1} members".format(group, num_members))
+
+            # remove this group and then recalculate the cumulative
+            # distribution, so this group is no longer selected
+            self.group_weights[group - 1] = 0
+            new_dist = self.cumulative_distribution(self.group_weights)
+            self.group_dist = new_dist
+
     def add_assignment(self, user, group):
         # the assignments are stored in a dictionary where key=group,
         # value=list-of-users-in-group (indexing by group-ID allows us to
@@ -1896,6 +1914,10 @@ class GroupAssignments(object):
             self.assignments[group].append(user)
             self.count += 1
 
+        # check if there'a cap on how big the groups can grow
+        if self.max_members:
+            self.cap_group_membership(group, self.max_members)
+
     def assign_groups(self, number_of_groups, groups_added,
                       number_of_users, users_added, group_memberships):
         """Allocate users to groups.
@@ -1915,6 +1937,10 @@ class GroupAssignments(object):
             float(group_memberships) *
             (float(users_added) / float(number_of_users)))
 
+        if self.max_members:
+            group_memberships = min(group_memberships,
+                                    self.max_members * number_of_groups)
+
         existing_users  = number_of_users  - users_added  - 1
         existing_groups = number_of_groups - groups_added - 1
         while self.total() < group_memberships:
index 991c9a9eb030324c5d16cd6df9234d285ca0ac8f..0ee0f9b65752291a38ecb256acb28e2217fda71a 100755 (executable)
@@ -112,6 +112,8 @@ def main():
     user_gen_group.add_option('--group-memberships', type='int', default=0,
                               help='Total memberships to assign across all '
                               'test users and all groups')
+    user_gen_group.add_option('--max-members', type='int', default=None,
+                              help='Max users to add to any one group')
     parser.add_option_group(user_gen_group)
 
     sambaopts = options.SambaOptions(parser)
@@ -333,6 +335,7 @@ def main():
                                           opts.number_of_users,
                                           opts.number_of_groups,
                                           opts.group_memberships,
+                                          opts.max_members,
                                           machine_accounts=computer_accounts,
                                           traffic_accounts=False)
         sys.exit()
@@ -346,6 +349,7 @@ def main():
                                       number_of_users,
                                       opts.number_of_groups,
                                       opts.group_memberships,
+                                      opts.max_members,
                                       machine_accounts=len(conversations),
                                       traffic_accounts=True)