rseq: Introduce extensible rseq ABI
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Tue, 22 Nov 2022 20:39:05 +0000 (15:39 -0500)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 27 Dec 2022 11:52:10 +0000 (12:52 +0100)
Introduce the extensible rseq ABI, where the feature size supported by
the kernel and the required alignment are communicated to user-space
through ELF auxiliary vectors.

This allows user-space to call rseq registration with a rseq_len of
either 32 bytes for the original struct rseq size (which includes
padding), or larger.

If rseq_len is larger than 32 bytes, then it must be large enough to
contain the feature size communicated to user-space through ELF
auxiliary vectors.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221122203932.231377-4-mathieu.desnoyers@efficios.com
include/linux/sched.h
kernel/ptrace.c
kernel/rseq.c

index 853d08f7562bdaae9119ff2b9b78522ca561ee48..e0bc020a63a9ac59543cfc07b7210a9aab68a036 100644 (file)
@@ -1302,6 +1302,7 @@ struct task_struct {
 
 #ifdef CONFIG_RSEQ
        struct rseq __user *rseq;
+       u32 rseq_len;
        u32 rseq_sig;
        /*
         * RmW on rseq_event_mask must be performed atomically
@@ -2352,10 +2353,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 {
        if (clone_flags & CLONE_VM) {
                t->rseq = NULL;
+               t->rseq_len = 0;
                t->rseq_sig = 0;
                t->rseq_event_mask = 0;
        } else {
                t->rseq = current->rseq;
+               t->rseq_len = current->rseq_len;
                t->rseq_sig = current->rseq_sig;
                t->rseq_event_mask = current->rseq_event_mask;
        }
@@ -2364,6 +2367,7 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 static inline void rseq_execve(struct task_struct *t)
 {
        t->rseq = NULL;
+       t->rseq_len = 0;
        t->rseq_sig = 0;
        t->rseq_event_mask = 0;
 }
index 54482193e1edc2c665a8f32f08c4d81300757dbc..0786450074c13b787fec9a58072c49ec972e3aad 100644 (file)
@@ -813,7 +813,7 @@ static long ptrace_get_rseq_configuration(struct task_struct *task,
 {
        struct ptrace_rseq_configuration conf = {
                .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
-               .rseq_abi_size = sizeof(*task->rseq),
+               .rseq_abi_size = task->rseq_len,
                .signature = task->rseq_sig,
                .flags = 0,
        };
index d38ab944105d7ff919c8dce46f31499f393f0a51..7962738455c9a94584c080d131ec6b7615daa13f 100644 (file)
@@ -18,6 +18,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/rseq.h>
 
+/* The original rseq structure size (including padding) is 32 bytes. */
+#define ORIG_RSEQ_SIZE         32
+
 #define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
                                  RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
                                  RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
@@ -87,10 +90,15 @@ static int rseq_update_cpu_id(struct task_struct *t)
        u32 cpu_id = raw_smp_processor_id();
        struct rseq __user *rseq = t->rseq;
 
-       if (!user_write_access_begin(rseq, sizeof(*rseq)))
+       if (!user_write_access_begin(rseq, t->rseq_len))
                goto efault;
        unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
        unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
+       /*
+        * Additional feature fields added after ORIG_RSEQ_SIZE
+        * need to be conditionally updated only if
+        * t->rseq_len != ORIG_RSEQ_SIZE.
+        */
        user_write_access_end();
        trace_rseq_update(t);
        return 0;
@@ -117,6 +125,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
         */
        if (put_user(cpu_id, &t->rseq->cpu_id))
                return -EFAULT;
+       /*
+        * Additional feature fields added after ORIG_RSEQ_SIZE
+        * need to be conditionally reset only if
+        * t->rseq_len != ORIG_RSEQ_SIZE.
+        */
        return 0;
 }
 
@@ -344,7 +357,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
                /* Unregister rseq for current thread. */
                if (current->rseq != rseq || !current->rseq)
                        return -EINVAL;
-               if (rseq_len != sizeof(*rseq))
+               if (rseq_len != current->rseq_len)
                        return -EINVAL;
                if (current->rseq_sig != sig)
                        return -EPERM;
@@ -353,6 +366,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
                        return ret;
                current->rseq = NULL;
                current->rseq_sig = 0;
+               current->rseq_len = 0;
                return 0;
        }
 
@@ -365,7 +379,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
                 * the provided address differs from the prior
                 * one.
                 */
-               if (current->rseq != rseq || rseq_len != sizeof(*rseq))
+               if (current->rseq != rseq || rseq_len != current->rseq_len)
                        return -EINVAL;
                if (current->rseq_sig != sig)
                        return -EPERM;
@@ -374,15 +388,24 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
        }
 
        /*
-        * If there was no rseq previously registered,
-        * ensure the provided rseq is properly aligned and valid.
+        * If there was no rseq previously registered, ensure the provided rseq
+        * is properly aligned, as communcated to user-space through the ELF
+        * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
+        * size, the required alignment is the original struct rseq alignment.
+        *
+        * In order to be valid, rseq_len is either the original rseq size, or
+        * large enough to contain all supported fields, as communicated to
+        * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
         */
-       if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
-           rseq_len != sizeof(*rseq))
+       if (rseq_len < ORIG_RSEQ_SIZE ||
+           (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
+           (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
+                                           rseq_len < offsetof(struct rseq, end))))
                return -EINVAL;
        if (!access_ok(rseq, rseq_len))
                return -EFAULT;
        current->rseq = rseq;
+       current->rseq_len = rseq_len;
        current->rseq_sig = sig;
        /*
         * If rseq was previously inactive, and has just been