bpf/task_iter: In task_file_seq_get_next use task_lookup_next_fd_rcu
authorEric W. Biederman <ebiederm@xmission.com>
Fri, 20 Nov 2020 23:14:33 +0000 (17:14 -0600)
committerEric W. Biederman <ebiederm@xmission.com>
Thu, 10 Dec 2020 18:42:58 +0000 (12:42 -0600)
When discussing[1] exec and posix file locks it was realized that none
of the callers of get_files_struct fundamentally needed to call
get_files_struct, and that by switching them to helper functions
instead it will both simplify their code and remove unnecessary
increments of files_struct.count.  Those unnecessary increments can
result in exec unnecessarily unsharing files_struct which breaking
posix locks, and it can result in fget_light having to fallback to
fget reducing system performance.

Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by
moving the checking for the maximum file descritor into the generic
code, and by remvoing the need for capturing and releasing a reference
on files_struct.  As the reference count of files_struct no longer
needs to be maintained bpf_iter_seq_task_file_info can have it's files
member removed and task_file_seq_get_next no longer needs it's fstruct
argument.

The curr_fd local variable does need to become unsigned to be used
with fnext_task.  As curr_fd is assigned from and assigned a u32
making curr_fd an unsigned int won't cause problems and might prevent
them.

[1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com
Suggested-by: Oleg Nesterov <oleg@redhat.com>
v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20201120231441.29911-16-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
kernel/bpf/task_iter.c

index 5ab2ccfb96cbec139b75ecac37ccc6ceed317f2e..4ec63170c741771c2d61e68626836de95f12d113 100644 (file)
@@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info {
         */
        struct bpf_iter_seq_task_common common;
        struct task_struct *task;
-       struct files_struct *files;
        u32 tid;
        u32 fd;
 };
 
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
-                      struct task_struct **task, struct files_struct **fstruct)
+                      struct task_struct **task)
 {
        struct pid_namespace *ns = info->common.ns;
-       u32 curr_tid = info->tid, max_fds;
-       struct files_struct *curr_files;
+       u32 curr_tid = info->tid;
        struct task_struct *curr_task;
-       int curr_fd = info->fd;
+       unsigned int curr_fd = info->fd;
 
        /* If this function returns a non-NULL file object,
-        * it held a reference to the task/files_struct/file.
+        * it held a reference to the task/file.
         * Otherwise, it does not hold any reference.
         */
 again:
        if (*task) {
                curr_task = *task;
-               curr_files = *fstruct;
                curr_fd = info->fd;
        } else {
                curr_task = task_seq_get_next(ns, &curr_tid, true);
                if (!curr_task)
                        return NULL;
 
-               curr_files = get_files_struct(curr_task);
-               if (!curr_files) {
-                       put_task_struct(curr_task);
-                       curr_tid = ++(info->tid);
-                       info->fd = 0;
-                       goto again;
-               }
-
-               /* set *fstruct, *task and info->tid */
-               *fstruct = curr_files;
+               /* set *task and info->tid */
                *task = curr_task;
                if (curr_tid == info->tid) {
                        curr_fd = info->fd;
@@ -179,13 +167,11 @@ again:
        }
 
        rcu_read_lock();
-       max_fds = files_fdtable(curr_files)->max_fds;
-       for (; curr_fd < max_fds; curr_fd++) {
+       for (;; curr_fd++) {
                struct file *f;
-
-               f = files_lookup_fd_rcu(curr_files, curr_fd);
+               f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
                if (!f)
-                       continue;
+                       break;
                if (!get_file_rcu(f))
                        continue;
 
@@ -197,10 +183,8 @@ again:
 
        /* the current task is done, go to the next task */
        rcu_read_unlock();
-       put_files_struct(curr_files);
        put_task_struct(curr_task);
        *task = NULL;
-       *fstruct = NULL;
        info->fd = 0;
        curr_tid = ++(info->tid);
        goto again;
@@ -209,13 +193,11 @@ again:
 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 {
        struct bpf_iter_seq_task_file_info *info = seq->private;
-       struct files_struct *files = NULL;
        struct task_struct *task = NULL;
        struct file *file;
 
-       file = task_file_seq_get_next(info, &task, &files);
+       file = task_file_seq_get_next(info, &task);
        if (!file) {
-               info->files = NULL;
                info->task = NULL;
                return NULL;
        }
@@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
        if (*pos == 0)
                ++*pos;
        info->task = task;
-       info->files = files;
 
        return file;
 }
@@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct bpf_iter_seq_task_file_info *info = seq->private;
-       struct files_struct *files = info->files;
        struct task_struct *task = info->task;
        struct file *file;
 
        ++*pos;
        ++info->fd;
        fput((struct file *)v);
-       file = task_file_seq_get_next(info, &task, &files);
+       file = task_file_seq_get_next(info, &task);
        if (!file) {
-               info->files = NULL;
                info->task = NULL;
                return NULL;
        }
 
        info->task = task;
-       info->files = files;
 
        return file;
 }
@@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
                (void)__task_file_seq_show(seq, v, true);
        } else {
                fput((struct file *)v);
-               put_files_struct(info->files);
                put_task_struct(info->task);
-               info->files = NULL;
                info->task = NULL;
        }
 }