Merge tag 'nfsd-4.14-1' of git://linux-nfs.org/~bfields/linux
[sfrench/cifs-2.6.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <sys/types.h>
9
10 /*
11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12  * we need to use the kernel's siginfo.h file and trick glibc
13  * into accepting it.
14  */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47
48 #include "../kselftest_harness.h"
49
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78
79 #ifndef SECCOMP_RET_ALLOW
80 struct seccomp_data {
81         int nr;
82         __u32 arch;
83         __u64 instruction_pointer;
84         __u64 args[6];
85 };
86 #endif
87
88 #ifndef SECCOMP_RET_KILL_PROCESS
89 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
90 #define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
91 #endif
92 #ifndef SECCOMP_RET_KILL
93 #define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
94 #define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
95 #define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
96 #define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
97 #define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
98 #endif
99 #ifndef SECCOMP_RET_LOG
100 #define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
101 #endif
102
103 #ifndef __NR_seccomp
104 # if defined(__i386__)
105 #  define __NR_seccomp 354
106 # elif defined(__x86_64__)
107 #  define __NR_seccomp 317
108 # elif defined(__arm__)
109 #  define __NR_seccomp 383
110 # elif defined(__aarch64__)
111 #  define __NR_seccomp 277
112 # elif defined(__hppa__)
113 #  define __NR_seccomp 338
114 # elif defined(__powerpc__)
115 #  define __NR_seccomp 358
116 # elif defined(__s390__)
117 #  define __NR_seccomp 348
118 # else
119 #  warning "seccomp syscall number unknown for this architecture"
120 #  define __NR_seccomp 0xffff
121 # endif
122 #endif
123
124 #ifndef SECCOMP_SET_MODE_STRICT
125 #define SECCOMP_SET_MODE_STRICT 0
126 #endif
127
128 #ifndef SECCOMP_SET_MODE_FILTER
129 #define SECCOMP_SET_MODE_FILTER 1
130 #endif
131
132 #ifndef SECCOMP_GET_ACTION_AVAIL
133 #define SECCOMP_GET_ACTION_AVAIL 2
134 #endif
135
136 #ifndef SECCOMP_FILTER_FLAG_TSYNC
137 #define SECCOMP_FILTER_FLAG_TSYNC 1
138 #endif
139
140 #ifndef SECCOMP_FILTER_FLAG_LOG
141 #define SECCOMP_FILTER_FLAG_LOG 2
142 #endif
143
144 #ifndef seccomp
145 int seccomp(unsigned int op, unsigned int flags, void *args)
146 {
147         errno = 0;
148         return syscall(__NR_seccomp, op, flags, args);
149 }
150 #endif
151
152 #if __BYTE_ORDER == __LITTLE_ENDIAN
153 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
154 #elif __BYTE_ORDER == __BIG_ENDIAN
155 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
156 #else
157 #error "wut? Unknown __BYTE_ORDER?!"
158 #endif
159
160 #define SIBLING_EXIT_UNKILLED   0xbadbeef
161 #define SIBLING_EXIT_FAILURE    0xbadface
162 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
163
164 TEST(mode_strict_support)
165 {
166         long ret;
167
168         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
169         ASSERT_EQ(0, ret) {
170                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
171         }
172         syscall(__NR_exit, 0);
173 }
174
175 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
176 {
177         long ret;
178
179         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
180         ASSERT_EQ(0, ret) {
181                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
182         }
183         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
184                 NULL, NULL, NULL);
185         EXPECT_FALSE(true) {
186                 TH_LOG("Unreachable!");
187         }
188 }
189
190 /* Note! This doesn't test no new privs behavior */
191 TEST(no_new_privs_support)
192 {
193         long ret;
194
195         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
196         EXPECT_EQ(0, ret) {
197                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
198         }
199 }
200
201 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
202 TEST(mode_filter_support)
203 {
204         long ret;
205
206         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
207         ASSERT_EQ(0, ret) {
208                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
209         }
210         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
211         EXPECT_EQ(-1, ret);
212         EXPECT_EQ(EFAULT, errno) {
213                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
214         }
215 }
216
217 TEST(mode_filter_without_nnp)
218 {
219         struct sock_filter filter[] = {
220                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
221         };
222         struct sock_fprog prog = {
223                 .len = (unsigned short)ARRAY_SIZE(filter),
224                 .filter = filter,
225         };
226         long ret;
227
228         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
229         ASSERT_LE(0, ret) {
230                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
231         }
232         errno = 0;
233         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
234         /* Succeeds with CAP_SYS_ADMIN, fails without */
235         /* TODO(wad) check caps not euid */
236         if (geteuid()) {
237                 EXPECT_EQ(-1, ret);
238                 EXPECT_EQ(EACCES, errno);
239         } else {
240                 EXPECT_EQ(0, ret);
241         }
242 }
243
244 #define MAX_INSNS_PER_PATH 32768
245
246 TEST(filter_size_limits)
247 {
248         int i;
249         int count = BPF_MAXINSNS + 1;
250         struct sock_filter allow[] = {
251                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
252         };
253         struct sock_filter *filter;
254         struct sock_fprog prog = { };
255         long ret;
256
257         filter = calloc(count, sizeof(*filter));
258         ASSERT_NE(NULL, filter);
259
260         for (i = 0; i < count; i++)
261                 filter[i] = allow[0];
262
263         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
264         ASSERT_EQ(0, ret);
265
266         prog.filter = filter;
267         prog.len = count;
268
269         /* Too many filter instructions in a single filter. */
270         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
271         ASSERT_NE(0, ret) {
272                 TH_LOG("Installing %d insn filter was allowed", prog.len);
273         }
274
275         /* One less is okay, though. */
276         prog.len -= 1;
277         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
278         ASSERT_EQ(0, ret) {
279                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
280         }
281 }
282
283 TEST(filter_chain_limits)
284 {
285         int i;
286         int count = BPF_MAXINSNS;
287         struct sock_filter allow[] = {
288                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
289         };
290         struct sock_filter *filter;
291         struct sock_fprog prog = { };
292         long ret;
293
294         filter = calloc(count, sizeof(*filter));
295         ASSERT_NE(NULL, filter);
296
297         for (i = 0; i < count; i++)
298                 filter[i] = allow[0];
299
300         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
301         ASSERT_EQ(0, ret);
302
303         prog.filter = filter;
304         prog.len = 1;
305
306         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
307         ASSERT_EQ(0, ret);
308
309         prog.len = count;
310
311         /* Too many total filter instructions. */
312         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
313                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
314                 if (ret != 0)
315                         break;
316         }
317         ASSERT_NE(0, ret) {
318                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
319                        i, count, i * (count + 4));
320         }
321 }
322
323 TEST(mode_filter_cannot_move_to_strict)
324 {
325         struct sock_filter filter[] = {
326                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
327         };
328         struct sock_fprog prog = {
329                 .len = (unsigned short)ARRAY_SIZE(filter),
330                 .filter = filter,
331         };
332         long ret;
333
334         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
335         ASSERT_EQ(0, ret);
336
337         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
338         ASSERT_EQ(0, ret);
339
340         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
341         EXPECT_EQ(-1, ret);
342         EXPECT_EQ(EINVAL, errno);
343 }
344
345
346 TEST(mode_filter_get_seccomp)
347 {
348         struct sock_filter filter[] = {
349                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
350         };
351         struct sock_fprog prog = {
352                 .len = (unsigned short)ARRAY_SIZE(filter),
353                 .filter = filter,
354         };
355         long ret;
356
357         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
358         ASSERT_EQ(0, ret);
359
360         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
361         EXPECT_EQ(0, ret);
362
363         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
364         ASSERT_EQ(0, ret);
365
366         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
367         EXPECT_EQ(2, ret);
368 }
369
370
371 TEST(ALLOW_all)
372 {
373         struct sock_filter filter[] = {
374                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
375         };
376         struct sock_fprog prog = {
377                 .len = (unsigned short)ARRAY_SIZE(filter),
378                 .filter = filter,
379         };
380         long ret;
381
382         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
383         ASSERT_EQ(0, ret);
384
385         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
386         ASSERT_EQ(0, ret);
387 }
388
389 TEST(empty_prog)
390 {
391         struct sock_filter filter[] = {
392         };
393         struct sock_fprog prog = {
394                 .len = (unsigned short)ARRAY_SIZE(filter),
395                 .filter = filter,
396         };
397         long ret;
398
399         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
400         ASSERT_EQ(0, ret);
401
402         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
403         EXPECT_EQ(-1, ret);
404         EXPECT_EQ(EINVAL, errno);
405 }
406
407 TEST(log_all)
408 {
409         struct sock_filter filter[] = {
410                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
411         };
412         struct sock_fprog prog = {
413                 .len = (unsigned short)ARRAY_SIZE(filter),
414                 .filter = filter,
415         };
416         long ret;
417         pid_t parent = getppid();
418
419         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
420         ASSERT_EQ(0, ret);
421
422         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
423         ASSERT_EQ(0, ret);
424
425         /* getppid() should succeed and be logged (no check for logging) */
426         EXPECT_EQ(parent, syscall(__NR_getppid));
427 }
428
429 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
430 {
431         struct sock_filter filter[] = {
432                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
433         };
434         struct sock_fprog prog = {
435                 .len = (unsigned short)ARRAY_SIZE(filter),
436                 .filter = filter,
437         };
438         long ret;
439
440         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
441         ASSERT_EQ(0, ret);
442
443         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
444         ASSERT_EQ(0, ret);
445         EXPECT_EQ(0, syscall(__NR_getpid)) {
446                 TH_LOG("getpid() shouldn't ever return");
447         }
448 }
449
450 /* return code >= 0x80000000 is unused. */
451 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
452 {
453         struct sock_filter filter[] = {
454                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
455         };
456         struct sock_fprog prog = {
457                 .len = (unsigned short)ARRAY_SIZE(filter),
458                 .filter = filter,
459         };
460         long ret;
461
462         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
463         ASSERT_EQ(0, ret);
464
465         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
466         ASSERT_EQ(0, ret);
467         EXPECT_EQ(0, syscall(__NR_getpid)) {
468                 TH_LOG("getpid() shouldn't ever return");
469         }
470 }
471
472 TEST_SIGNAL(KILL_all, SIGSYS)
473 {
474         struct sock_filter filter[] = {
475                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
476         };
477         struct sock_fprog prog = {
478                 .len = (unsigned short)ARRAY_SIZE(filter),
479                 .filter = filter,
480         };
481         long ret;
482
483         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
484         ASSERT_EQ(0, ret);
485
486         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
487         ASSERT_EQ(0, ret);
488 }
489
490 TEST_SIGNAL(KILL_one, SIGSYS)
491 {
492         struct sock_filter filter[] = {
493                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
494                         offsetof(struct seccomp_data, nr)),
495                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
496                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
497                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
498         };
499         struct sock_fprog prog = {
500                 .len = (unsigned short)ARRAY_SIZE(filter),
501                 .filter = filter,
502         };
503         long ret;
504         pid_t parent = getppid();
505
506         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
507         ASSERT_EQ(0, ret);
508
509         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
510         ASSERT_EQ(0, ret);
511
512         EXPECT_EQ(parent, syscall(__NR_getppid));
513         /* getpid() should never return. */
514         EXPECT_EQ(0, syscall(__NR_getpid));
515 }
516
517 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
518 {
519         void *fatal_address;
520         struct sock_filter filter[] = {
521                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
522                         offsetof(struct seccomp_data, nr)),
523                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
524                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
525                 /* Only both with lower 32-bit for now. */
526                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
527                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
528                         (unsigned long)&fatal_address, 0, 1),
529                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
530                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
531         };
532         struct sock_fprog prog = {
533                 .len = (unsigned short)ARRAY_SIZE(filter),
534                 .filter = filter,
535         };
536         long ret;
537         pid_t parent = getppid();
538         struct tms timebuf;
539         clock_t clock = times(&timebuf);
540
541         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
542         ASSERT_EQ(0, ret);
543
544         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
545         ASSERT_EQ(0, ret);
546
547         EXPECT_EQ(parent, syscall(__NR_getppid));
548         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
549         /* times() should never return. */
550         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
551 }
552
553 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
554 {
555 #ifndef __NR_mmap2
556         int sysno = __NR_mmap;
557 #else
558         int sysno = __NR_mmap2;
559 #endif
560         struct sock_filter filter[] = {
561                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
562                         offsetof(struct seccomp_data, nr)),
563                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
564                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
565                 /* Only both with lower 32-bit for now. */
566                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
567                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
568                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
569                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
570         };
571         struct sock_fprog prog = {
572                 .len = (unsigned short)ARRAY_SIZE(filter),
573                 .filter = filter,
574         };
575         long ret;
576         pid_t parent = getppid();
577         int fd;
578         void *map1, *map2;
579         int page_size = sysconf(_SC_PAGESIZE);
580
581         ASSERT_LT(0, page_size);
582
583         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
584         ASSERT_EQ(0, ret);
585
586         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
587         ASSERT_EQ(0, ret);
588
589         fd = open("/dev/zero", O_RDONLY);
590         ASSERT_NE(-1, fd);
591
592         EXPECT_EQ(parent, syscall(__NR_getppid));
593         map1 = (void *)syscall(sysno,
594                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
595         EXPECT_NE(MAP_FAILED, map1);
596         /* mmap2() should never return. */
597         map2 = (void *)syscall(sysno,
598                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
599         EXPECT_EQ(MAP_FAILED, map2);
600
601         /* The test failed, so clean up the resources. */
602         munmap(map1, page_size);
603         munmap(map2, page_size);
604         close(fd);
605 }
606
607 /* This is a thread task to die via seccomp filter violation. */
608 void *kill_thread(void *data)
609 {
610         bool die = (bool)data;
611
612         if (die) {
613                 prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
614                 return (void *)SIBLING_EXIT_FAILURE;
615         }
616
617         return (void *)SIBLING_EXIT_UNKILLED;
618 }
619
620 /* Prepare a thread that will kill itself or both of us. */
621 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
622 {
623         pthread_t thread;
624         void *status;
625         /* Kill only when calling __NR_prctl. */
626         struct sock_filter filter_thread[] = {
627                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
628                         offsetof(struct seccomp_data, nr)),
629                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
630                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
631                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
632         };
633         struct sock_fprog prog_thread = {
634                 .len = (unsigned short)ARRAY_SIZE(filter_thread),
635                 .filter = filter_thread,
636         };
637         struct sock_filter filter_process[] = {
638                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
639                         offsetof(struct seccomp_data, nr)),
640                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
641                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
642                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
643         };
644         struct sock_fprog prog_process = {
645                 .len = (unsigned short)ARRAY_SIZE(filter_process),
646                 .filter = filter_process,
647         };
648
649         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
650                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
651         }
652
653         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
654                              kill_process ? &prog_process : &prog_thread));
655
656         /*
657          * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
658          * flag cannot be downgraded by a new filter.
659          */
660         ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
661
662         /* Start a thread that will exit immediately. */
663         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
664         ASSERT_EQ(0, pthread_join(thread, &status));
665         ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
666
667         /* Start a thread that will die immediately. */
668         ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
669         ASSERT_EQ(0, pthread_join(thread, &status));
670         ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
671
672         /*
673          * If we get here, only the spawned thread died. Let the parent know
674          * the whole process didn't die (i.e. this thread, the spawner,
675          * stayed running).
676          */
677         exit(42);
678 }
679
680 TEST(KILL_thread)
681 {
682         int status;
683         pid_t child_pid;
684
685         child_pid = fork();
686         ASSERT_LE(0, child_pid);
687         if (child_pid == 0) {
688                 kill_thread_or_group(_metadata, false);
689                 _exit(38);
690         }
691
692         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
693
694         /* If only the thread was killed, we'll see exit 42. */
695         ASSERT_TRUE(WIFEXITED(status));
696         ASSERT_EQ(42, WEXITSTATUS(status));
697 }
698
699 TEST(KILL_process)
700 {
701         int status;
702         pid_t child_pid;
703
704         child_pid = fork();
705         ASSERT_LE(0, child_pid);
706         if (child_pid == 0) {
707                 kill_thread_or_group(_metadata, true);
708                 _exit(38);
709         }
710
711         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
712
713         /* If the entire process was killed, we'll see SIGSYS. */
714         ASSERT_TRUE(WIFSIGNALED(status));
715         ASSERT_EQ(SIGSYS, WTERMSIG(status));
716 }
717
718 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
719 TEST(arg_out_of_range)
720 {
721         struct sock_filter filter[] = {
722                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
723                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
724         };
725         struct sock_fprog prog = {
726                 .len = (unsigned short)ARRAY_SIZE(filter),
727                 .filter = filter,
728         };
729         long ret;
730
731         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
732         ASSERT_EQ(0, ret);
733
734         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
735         EXPECT_EQ(-1, ret);
736         EXPECT_EQ(EINVAL, errno);
737 }
738
739 #define ERRNO_FILTER(name, errno)                                       \
740         struct sock_filter _read_filter_##name[] = {                    \
741                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
742                         offsetof(struct seccomp_data, nr)),             \
743                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
744                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
745                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
746         };                                                              \
747         struct sock_fprog prog_##name = {                               \
748                 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
749                 .filter = _read_filter_##name,                          \
750         }
751
752 /* Make sure basic errno values are correctly passed through a filter. */
753 TEST(ERRNO_valid)
754 {
755         ERRNO_FILTER(valid, E2BIG);
756         long ret;
757         pid_t parent = getppid();
758
759         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
760         ASSERT_EQ(0, ret);
761
762         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
763         ASSERT_EQ(0, ret);
764
765         EXPECT_EQ(parent, syscall(__NR_getppid));
766         EXPECT_EQ(-1, read(0, NULL, 0));
767         EXPECT_EQ(E2BIG, errno);
768 }
769
770 /* Make sure an errno of zero is correctly handled by the arch code. */
771 TEST(ERRNO_zero)
772 {
773         ERRNO_FILTER(zero, 0);
774         long ret;
775         pid_t parent = getppid();
776
777         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
778         ASSERT_EQ(0, ret);
779
780         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
781         ASSERT_EQ(0, ret);
782
783         EXPECT_EQ(parent, syscall(__NR_getppid));
784         /* "errno" of 0 is ok. */
785         EXPECT_EQ(0, read(0, NULL, 0));
786 }
787
788 /*
789  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
790  * This tests that the errno value gets capped correctly, fixed by
791  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
792  */
793 TEST(ERRNO_capped)
794 {
795         ERRNO_FILTER(capped, 4096);
796         long ret;
797         pid_t parent = getppid();
798
799         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
800         ASSERT_EQ(0, ret);
801
802         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
803         ASSERT_EQ(0, ret);
804
805         EXPECT_EQ(parent, syscall(__NR_getppid));
806         EXPECT_EQ(-1, read(0, NULL, 0));
807         EXPECT_EQ(4095, errno);
808 }
809
810 /*
811  * Filters are processed in reverse order: last applied is executed first.
812  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
813  * SECCOMP_RET_DATA mask results will follow the most recently applied
814  * matching filter return (and not the lowest or highest value).
815  */
816 TEST(ERRNO_order)
817 {
818         ERRNO_FILTER(first,  11);
819         ERRNO_FILTER(second, 13);
820         ERRNO_FILTER(third,  12);
821         long ret;
822         pid_t parent = getppid();
823
824         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
825         ASSERT_EQ(0, ret);
826
827         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
828         ASSERT_EQ(0, ret);
829
830         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
831         ASSERT_EQ(0, ret);
832
833         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
834         ASSERT_EQ(0, ret);
835
836         EXPECT_EQ(parent, syscall(__NR_getppid));
837         EXPECT_EQ(-1, read(0, NULL, 0));
838         EXPECT_EQ(12, errno);
839 }
840
841 FIXTURE_DATA(TRAP) {
842         struct sock_fprog prog;
843 };
844
845 FIXTURE_SETUP(TRAP)
846 {
847         struct sock_filter filter[] = {
848                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
849                         offsetof(struct seccomp_data, nr)),
850                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
851                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
852                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
853         };
854
855         memset(&self->prog, 0, sizeof(self->prog));
856         self->prog.filter = malloc(sizeof(filter));
857         ASSERT_NE(NULL, self->prog.filter);
858         memcpy(self->prog.filter, filter, sizeof(filter));
859         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
860 }
861
862 FIXTURE_TEARDOWN(TRAP)
863 {
864         if (self->prog.filter)
865                 free(self->prog.filter);
866 }
867
868 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
869 {
870         long ret;
871
872         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
873         ASSERT_EQ(0, ret);
874
875         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
876         ASSERT_EQ(0, ret);
877         syscall(__NR_getpid);
878 }
879
880 /* Ensure that SIGSYS overrides SIG_IGN */
881 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
882 {
883         long ret;
884
885         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
886         ASSERT_EQ(0, ret);
887
888         signal(SIGSYS, SIG_IGN);
889
890         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
891         ASSERT_EQ(0, ret);
892         syscall(__NR_getpid);
893 }
894
895 static siginfo_t TRAP_info;
896 static volatile int TRAP_nr;
897 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
898 {
899         memcpy(&TRAP_info, info, sizeof(TRAP_info));
900         TRAP_nr = nr;
901 }
902
903 TEST_F(TRAP, handler)
904 {
905         int ret, test;
906         struct sigaction act;
907         sigset_t mask;
908
909         memset(&act, 0, sizeof(act));
910         sigemptyset(&mask);
911         sigaddset(&mask, SIGSYS);
912
913         act.sa_sigaction = &TRAP_action;
914         act.sa_flags = SA_SIGINFO;
915         ret = sigaction(SIGSYS, &act, NULL);
916         ASSERT_EQ(0, ret) {
917                 TH_LOG("sigaction failed");
918         }
919         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
920         ASSERT_EQ(0, ret) {
921                 TH_LOG("sigprocmask failed");
922         }
923
924         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
925         ASSERT_EQ(0, ret);
926         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
927         ASSERT_EQ(0, ret);
928         TRAP_nr = 0;
929         memset(&TRAP_info, 0, sizeof(TRAP_info));
930         /* Expect the registers to be rolled back. (nr = error) may vary
931          * based on arch. */
932         ret = syscall(__NR_getpid);
933         /* Silence gcc warning about volatile. */
934         test = TRAP_nr;
935         EXPECT_EQ(SIGSYS, test);
936         struct local_sigsys {
937                 void *_call_addr;       /* calling user insn */
938                 int _syscall;           /* triggering system call number */
939                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
940         } *sigsys = (struct local_sigsys *)
941 #ifdef si_syscall
942                 &(TRAP_info.si_call_addr);
943 #else
944                 &TRAP_info.si_pid;
945 #endif
946         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
947         /* Make sure arch is non-zero. */
948         EXPECT_NE(0, sigsys->_arch);
949         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
950 }
951
952 FIXTURE_DATA(precedence) {
953         struct sock_fprog allow;
954         struct sock_fprog log;
955         struct sock_fprog trace;
956         struct sock_fprog error;
957         struct sock_fprog trap;
958         struct sock_fprog kill;
959 };
960
961 FIXTURE_SETUP(precedence)
962 {
963         struct sock_filter allow_insns[] = {
964                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
965         };
966         struct sock_filter log_insns[] = {
967                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
968                         offsetof(struct seccomp_data, nr)),
969                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
970                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
971                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
972         };
973         struct sock_filter trace_insns[] = {
974                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
975                         offsetof(struct seccomp_data, nr)),
976                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
977                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
978                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
979         };
980         struct sock_filter error_insns[] = {
981                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
982                         offsetof(struct seccomp_data, nr)),
983                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
984                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
985                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
986         };
987         struct sock_filter trap_insns[] = {
988                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
989                         offsetof(struct seccomp_data, nr)),
990                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
991                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
992                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
993         };
994         struct sock_filter kill_insns[] = {
995                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
996                         offsetof(struct seccomp_data, nr)),
997                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
998                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
999                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1000         };
1001
1002         memset(self, 0, sizeof(*self));
1003 #define FILTER_ALLOC(_x) \
1004         self->_x.filter = malloc(sizeof(_x##_insns)); \
1005         ASSERT_NE(NULL, self->_x.filter); \
1006         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1007         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1008         FILTER_ALLOC(allow);
1009         FILTER_ALLOC(log);
1010         FILTER_ALLOC(trace);
1011         FILTER_ALLOC(error);
1012         FILTER_ALLOC(trap);
1013         FILTER_ALLOC(kill);
1014 }
1015
1016 FIXTURE_TEARDOWN(precedence)
1017 {
1018 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1019         FILTER_FREE(allow);
1020         FILTER_FREE(log);
1021         FILTER_FREE(trace);
1022         FILTER_FREE(error);
1023         FILTER_FREE(trap);
1024         FILTER_FREE(kill);
1025 }
1026
1027 TEST_F(precedence, allow_ok)
1028 {
1029         pid_t parent, res = 0;
1030         long ret;
1031
1032         parent = getppid();
1033         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1034         ASSERT_EQ(0, ret);
1035
1036         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1037         ASSERT_EQ(0, ret);
1038         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1039         ASSERT_EQ(0, ret);
1040         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1041         ASSERT_EQ(0, ret);
1042         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1043         ASSERT_EQ(0, ret);
1044         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1045         ASSERT_EQ(0, ret);
1046         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1047         ASSERT_EQ(0, ret);
1048         /* Should work just fine. */
1049         res = syscall(__NR_getppid);
1050         EXPECT_EQ(parent, res);
1051 }
1052
1053 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1054 {
1055         pid_t parent, res = 0;
1056         long ret;
1057
1058         parent = getppid();
1059         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1060         ASSERT_EQ(0, ret);
1061
1062         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1063         ASSERT_EQ(0, ret);
1064         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1065         ASSERT_EQ(0, ret);
1066         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1067         ASSERT_EQ(0, ret);
1068         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1069         ASSERT_EQ(0, ret);
1070         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1071         ASSERT_EQ(0, ret);
1072         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1073         ASSERT_EQ(0, ret);
1074         /* Should work just fine. */
1075         res = syscall(__NR_getppid);
1076         EXPECT_EQ(parent, res);
1077         /* getpid() should never return. */
1078         res = syscall(__NR_getpid);
1079         EXPECT_EQ(0, res);
1080 }
1081
1082 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1083 {
1084         pid_t parent;
1085         long ret;
1086
1087         parent = getppid();
1088         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1089         ASSERT_EQ(0, ret);
1090
1091         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1092         ASSERT_EQ(0, ret);
1093         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1094         ASSERT_EQ(0, ret);
1095         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1096         ASSERT_EQ(0, ret);
1097         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1098         ASSERT_EQ(0, ret);
1099         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1100         ASSERT_EQ(0, ret);
1101         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1102         ASSERT_EQ(0, ret);
1103         /* Should work just fine. */
1104         EXPECT_EQ(parent, syscall(__NR_getppid));
1105         /* getpid() should never return. */
1106         EXPECT_EQ(0, syscall(__NR_getpid));
1107 }
1108
1109 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1110 {
1111         pid_t parent;
1112         long ret;
1113
1114         parent = getppid();
1115         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1116         ASSERT_EQ(0, ret);
1117
1118         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1119         ASSERT_EQ(0, ret);
1120         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1121         ASSERT_EQ(0, ret);
1122         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1123         ASSERT_EQ(0, ret);
1124         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1125         ASSERT_EQ(0, ret);
1126         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1127         ASSERT_EQ(0, ret);
1128         /* Should work just fine. */
1129         EXPECT_EQ(parent, syscall(__NR_getppid));
1130         /* getpid() should never return. */
1131         EXPECT_EQ(0, syscall(__NR_getpid));
1132 }
1133
1134 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1135 {
1136         pid_t parent;
1137         long ret;
1138
1139         parent = getppid();
1140         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1141         ASSERT_EQ(0, ret);
1142
1143         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1144         ASSERT_EQ(0, ret);
1145         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1146         ASSERT_EQ(0, ret);
1147         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1148         ASSERT_EQ(0, ret);
1149         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1150         ASSERT_EQ(0, ret);
1151         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1152         ASSERT_EQ(0, ret);
1153         /* Should work just fine. */
1154         EXPECT_EQ(parent, syscall(__NR_getppid));
1155         /* getpid() should never return. */
1156         EXPECT_EQ(0, syscall(__NR_getpid));
1157 }
1158
1159 TEST_F(precedence, errno_is_third)
1160 {
1161         pid_t parent;
1162         long ret;
1163
1164         parent = getppid();
1165         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1166         ASSERT_EQ(0, ret);
1167
1168         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1169         ASSERT_EQ(0, ret);
1170         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1171         ASSERT_EQ(0, ret);
1172         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1173         ASSERT_EQ(0, ret);
1174         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1175         ASSERT_EQ(0, ret);
1176         /* Should work just fine. */
1177         EXPECT_EQ(parent, syscall(__NR_getppid));
1178         EXPECT_EQ(0, syscall(__NR_getpid));
1179 }
1180
1181 TEST_F(precedence, errno_is_third_in_any_order)
1182 {
1183         pid_t parent;
1184         long ret;
1185
1186         parent = getppid();
1187         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1188         ASSERT_EQ(0, ret);
1189
1190         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1191         ASSERT_EQ(0, ret);
1192         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1193         ASSERT_EQ(0, ret);
1194         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1195         ASSERT_EQ(0, ret);
1196         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1197         ASSERT_EQ(0, ret);
1198         /* Should work just fine. */
1199         EXPECT_EQ(parent, syscall(__NR_getppid));
1200         EXPECT_EQ(0, syscall(__NR_getpid));
1201 }
1202
1203 TEST_F(precedence, trace_is_fourth)
1204 {
1205         pid_t parent;
1206         long ret;
1207
1208         parent = getppid();
1209         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1210         ASSERT_EQ(0, ret);
1211
1212         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1213         ASSERT_EQ(0, ret);
1214         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1215         ASSERT_EQ(0, ret);
1216         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1217         ASSERT_EQ(0, ret);
1218         /* Should work just fine. */
1219         EXPECT_EQ(parent, syscall(__NR_getppid));
1220         /* No ptracer */
1221         EXPECT_EQ(-1, syscall(__NR_getpid));
1222 }
1223
1224 TEST_F(precedence, trace_is_fourth_in_any_order)
1225 {
1226         pid_t parent;
1227         long ret;
1228
1229         parent = getppid();
1230         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1231         ASSERT_EQ(0, ret);
1232
1233         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1234         ASSERT_EQ(0, ret);
1235         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1236         ASSERT_EQ(0, ret);
1237         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1238         ASSERT_EQ(0, ret);
1239         /* Should work just fine. */
1240         EXPECT_EQ(parent, syscall(__NR_getppid));
1241         /* No ptracer */
1242         EXPECT_EQ(-1, syscall(__NR_getpid));
1243 }
1244
1245 TEST_F(precedence, log_is_fifth)
1246 {
1247         pid_t mypid, parent;
1248         long ret;
1249
1250         mypid = getpid();
1251         parent = getppid();
1252         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1253         ASSERT_EQ(0, ret);
1254
1255         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1256         ASSERT_EQ(0, ret);
1257         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1258         ASSERT_EQ(0, ret);
1259         /* Should work just fine. */
1260         EXPECT_EQ(parent, syscall(__NR_getppid));
1261         /* Should also work just fine */
1262         EXPECT_EQ(mypid, syscall(__NR_getpid));
1263 }
1264
1265 TEST_F(precedence, log_is_fifth_in_any_order)
1266 {
1267         pid_t mypid, parent;
1268         long ret;
1269
1270         mypid = getpid();
1271         parent = getppid();
1272         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1273         ASSERT_EQ(0, ret);
1274
1275         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1276         ASSERT_EQ(0, ret);
1277         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1278         ASSERT_EQ(0, ret);
1279         /* Should work just fine. */
1280         EXPECT_EQ(parent, syscall(__NR_getppid));
1281         /* Should also work just fine */
1282         EXPECT_EQ(mypid, syscall(__NR_getpid));
1283 }
1284
1285 #ifndef PTRACE_O_TRACESECCOMP
1286 #define PTRACE_O_TRACESECCOMP   0x00000080
1287 #endif
1288
1289 /* Catch the Ubuntu 12.04 value error. */
1290 #if PTRACE_EVENT_SECCOMP != 7
1291 #undef PTRACE_EVENT_SECCOMP
1292 #endif
1293
1294 #ifndef PTRACE_EVENT_SECCOMP
1295 #define PTRACE_EVENT_SECCOMP 7
1296 #endif
1297
1298 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1299 bool tracer_running;
1300 void tracer_stop(int sig)
1301 {
1302         tracer_running = false;
1303 }
1304
1305 typedef void tracer_func_t(struct __test_metadata *_metadata,
1306                            pid_t tracee, int status, void *args);
1307
1308 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1309             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1310 {
1311         int ret = -1;
1312         struct sigaction action = {
1313                 .sa_handler = tracer_stop,
1314         };
1315
1316         /* Allow external shutdown. */
1317         tracer_running = true;
1318         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1319
1320         errno = 0;
1321         while (ret == -1 && errno != EINVAL)
1322                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1323         ASSERT_EQ(0, ret) {
1324                 kill(tracee, SIGKILL);
1325         }
1326         /* Wait for attach stop */
1327         wait(NULL);
1328
1329         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1330                                                       PTRACE_O_TRACESYSGOOD :
1331                                                       PTRACE_O_TRACESECCOMP);
1332         ASSERT_EQ(0, ret) {
1333                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1334                 kill(tracee, SIGKILL);
1335         }
1336         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1337                      tracee, NULL, 0);
1338         ASSERT_EQ(0, ret);
1339
1340         /* Unblock the tracee */
1341         ASSERT_EQ(1, write(fd, "A", 1));
1342         ASSERT_EQ(0, close(fd));
1343
1344         /* Run until we're shut down. Must assert to stop execution. */
1345         while (tracer_running) {
1346                 int status;
1347
1348                 if (wait(&status) != tracee)
1349                         continue;
1350                 if (WIFSIGNALED(status) || WIFEXITED(status))
1351                         /* Child is dead. Time to go. */
1352                         return;
1353
1354                 /* Check if this is a seccomp event. */
1355                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1356
1357                 tracer_func(_metadata, tracee, status, args);
1358
1359                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1360                              tracee, NULL, 0);
1361                 ASSERT_EQ(0, ret);
1362         }
1363         /* Directly report the status of our test harness results. */
1364         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1365 }
1366
1367 /* Common tracer setup/teardown functions. */
1368 void cont_handler(int num)
1369 { }
1370 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1371                           tracer_func_t func, void *args, bool ptrace_syscall)
1372 {
1373         char sync;
1374         int pipefd[2];
1375         pid_t tracer_pid;
1376         pid_t tracee = getpid();
1377
1378         /* Setup a pipe for clean synchronization. */
1379         ASSERT_EQ(0, pipe(pipefd));
1380
1381         /* Fork a child which we'll promote to tracer */
1382         tracer_pid = fork();
1383         ASSERT_LE(0, tracer_pid);
1384         signal(SIGALRM, cont_handler);
1385         if (tracer_pid == 0) {
1386                 close(pipefd[0]);
1387                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1388                              ptrace_syscall);
1389                 syscall(__NR_exit, 0);
1390         }
1391         close(pipefd[1]);
1392         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1393         read(pipefd[0], &sync, 1);
1394         close(pipefd[0]);
1395
1396         return tracer_pid;
1397 }
1398 void teardown_trace_fixture(struct __test_metadata *_metadata,
1399                             pid_t tracer)
1400 {
1401         if (tracer) {
1402                 int status;
1403                 /*
1404                  * Extract the exit code from the other process and
1405                  * adopt it for ourselves in case its asserts failed.
1406                  */
1407                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1408                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1409                 if (WEXITSTATUS(status))
1410                         _metadata->passed = 0;
1411         }
1412 }
1413
1414 /* "poke" tracer arguments and function. */
1415 struct tracer_args_poke_t {
1416         unsigned long poke_addr;
1417 };
1418
1419 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1420                  void *args)
1421 {
1422         int ret;
1423         unsigned long msg;
1424         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1425
1426         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1427         EXPECT_EQ(0, ret);
1428         /* If this fails, don't try to recover. */
1429         ASSERT_EQ(0x1001, msg) {
1430                 kill(tracee, SIGKILL);
1431         }
1432         /*
1433          * Poke in the message.
1434          * Registers are not touched to try to keep this relatively arch
1435          * agnostic.
1436          */
1437         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1438         EXPECT_EQ(0, ret);
1439 }
1440
1441 FIXTURE_DATA(TRACE_poke) {
1442         struct sock_fprog prog;
1443         pid_t tracer;
1444         long poked;
1445         struct tracer_args_poke_t tracer_args;
1446 };
1447
1448 FIXTURE_SETUP(TRACE_poke)
1449 {
1450         struct sock_filter filter[] = {
1451                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1452                         offsetof(struct seccomp_data, nr)),
1453                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1454                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1455                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1456         };
1457
1458         self->poked = 0;
1459         memset(&self->prog, 0, sizeof(self->prog));
1460         self->prog.filter = malloc(sizeof(filter));
1461         ASSERT_NE(NULL, self->prog.filter);
1462         memcpy(self->prog.filter, filter, sizeof(filter));
1463         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1464
1465         /* Set up tracer args. */
1466         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1467
1468         /* Launch tracer. */
1469         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1470                                            &self->tracer_args, false);
1471 }
1472
1473 FIXTURE_TEARDOWN(TRACE_poke)
1474 {
1475         teardown_trace_fixture(_metadata, self->tracer);
1476         if (self->prog.filter)
1477                 free(self->prog.filter);
1478 }
1479
1480 TEST_F(TRACE_poke, read_has_side_effects)
1481 {
1482         ssize_t ret;
1483
1484         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1485         ASSERT_EQ(0, ret);
1486
1487         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1488         ASSERT_EQ(0, ret);
1489
1490         EXPECT_EQ(0, self->poked);
1491         ret = read(-1, NULL, 0);
1492         EXPECT_EQ(-1, ret);
1493         EXPECT_EQ(0x1001, self->poked);
1494 }
1495
1496 TEST_F(TRACE_poke, getpid_runs_normally)
1497 {
1498         long ret;
1499
1500         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1501         ASSERT_EQ(0, ret);
1502
1503         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1504         ASSERT_EQ(0, ret);
1505
1506         EXPECT_EQ(0, self->poked);
1507         EXPECT_NE(0, syscall(__NR_getpid));
1508         EXPECT_EQ(0, self->poked);
1509 }
1510
1511 #if defined(__x86_64__)
1512 # define ARCH_REGS      struct user_regs_struct
1513 # define SYSCALL_NUM    orig_rax
1514 # define SYSCALL_RET    rax
1515 #elif defined(__i386__)
1516 # define ARCH_REGS      struct user_regs_struct
1517 # define SYSCALL_NUM    orig_eax
1518 # define SYSCALL_RET    eax
1519 #elif defined(__arm__)
1520 # define ARCH_REGS      struct pt_regs
1521 # define SYSCALL_NUM    ARM_r7
1522 # define SYSCALL_RET    ARM_r0
1523 #elif defined(__aarch64__)
1524 # define ARCH_REGS      struct user_pt_regs
1525 # define SYSCALL_NUM    regs[8]
1526 # define SYSCALL_RET    regs[0]
1527 #elif defined(__hppa__)
1528 # define ARCH_REGS      struct user_regs_struct
1529 # define SYSCALL_NUM    gr[20]
1530 # define SYSCALL_RET    gr[28]
1531 #elif defined(__powerpc__)
1532 # define ARCH_REGS      struct pt_regs
1533 # define SYSCALL_NUM    gpr[0]
1534 # define SYSCALL_RET    gpr[3]
1535 #elif defined(__s390__)
1536 # define ARCH_REGS     s390_regs
1537 # define SYSCALL_NUM   gprs[2]
1538 # define SYSCALL_RET   gprs[2]
1539 #elif defined(__mips__)
1540 # define ARCH_REGS      struct pt_regs
1541 # define SYSCALL_NUM    regs[2]
1542 # define SYSCALL_SYSCALL_NUM regs[4]
1543 # define SYSCALL_RET    regs[2]
1544 # define SYSCALL_NUM_RET_SHARE_REG
1545 #else
1546 # error "Do not know how to find your architecture's registers and syscalls"
1547 #endif
1548
1549 /* When the syscall return can't be changed, stub out the tests for it. */
1550 #ifdef SYSCALL_NUM_RET_SHARE_REG
1551 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1552 #else
1553 # define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(val, action)
1554 #endif
1555
1556 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1557  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1558  */
1559 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1560 #define HAVE_GETREGS
1561 #endif
1562
1563 /* Architecture-specific syscall fetching routine. */
1564 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1565 {
1566         ARCH_REGS regs;
1567 #ifdef HAVE_GETREGS
1568         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1569                 TH_LOG("PTRACE_GETREGS failed");
1570                 return -1;
1571         }
1572 #else
1573         struct iovec iov;
1574
1575         iov.iov_base = &regs;
1576         iov.iov_len = sizeof(regs);
1577         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1578                 TH_LOG("PTRACE_GETREGSET failed");
1579                 return -1;
1580         }
1581 #endif
1582
1583 #if defined(__mips__)
1584         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1585                 return regs.SYSCALL_SYSCALL_NUM;
1586 #endif
1587         return regs.SYSCALL_NUM;
1588 }
1589
1590 /* Architecture-specific syscall changing routine. */
1591 void change_syscall(struct __test_metadata *_metadata,
1592                     pid_t tracee, int syscall)
1593 {
1594         int ret;
1595         ARCH_REGS regs;
1596 #ifdef HAVE_GETREGS
1597         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1598 #else
1599         struct iovec iov;
1600         iov.iov_base = &regs;
1601         iov.iov_len = sizeof(regs);
1602         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1603 #endif
1604         EXPECT_EQ(0, ret) {}
1605
1606 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1607     defined(__s390__) || defined(__hppa__)
1608         {
1609                 regs.SYSCALL_NUM = syscall;
1610         }
1611 #elif defined(__mips__)
1612         {
1613                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1614                         regs.SYSCALL_SYSCALL_NUM = syscall;
1615                 else
1616                         regs.SYSCALL_NUM = syscall;
1617         }
1618
1619 #elif defined(__arm__)
1620 # ifndef PTRACE_SET_SYSCALL
1621 #  define PTRACE_SET_SYSCALL   23
1622 # endif
1623         {
1624                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1625                 EXPECT_EQ(0, ret);
1626         }
1627
1628 #elif defined(__aarch64__)
1629 # ifndef NT_ARM_SYSTEM_CALL
1630 #  define NT_ARM_SYSTEM_CALL 0x404
1631 # endif
1632         {
1633                 iov.iov_base = &syscall;
1634                 iov.iov_len = sizeof(syscall);
1635                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1636                              &iov);
1637                 EXPECT_EQ(0, ret);
1638         }
1639
1640 #else
1641         ASSERT_EQ(1, 0) {
1642                 TH_LOG("How is the syscall changed on this architecture?");
1643         }
1644 #endif
1645
1646         /* If syscall is skipped, change return value. */
1647         if (syscall == -1)
1648 #ifdef SYSCALL_NUM_RET_SHARE_REG
1649                 TH_LOG("Can't modify syscall return on this architecture");
1650 #else
1651                 regs.SYSCALL_RET = EPERM;
1652 #endif
1653
1654 #ifdef HAVE_GETREGS
1655         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1656 #else
1657         iov.iov_base = &regs;
1658         iov.iov_len = sizeof(regs);
1659         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1660 #endif
1661         EXPECT_EQ(0, ret);
1662 }
1663
1664 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1665                     int status, void *args)
1666 {
1667         int ret;
1668         unsigned long msg;
1669
1670         /* Make sure we got the right message. */
1671         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1672         EXPECT_EQ(0, ret);
1673
1674         /* Validate and take action on expected syscalls. */
1675         switch (msg) {
1676         case 0x1002:
1677                 /* change getpid to getppid. */
1678                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1679                 change_syscall(_metadata, tracee, __NR_getppid);
1680                 break;
1681         case 0x1003:
1682                 /* skip gettid. */
1683                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1684                 change_syscall(_metadata, tracee, -1);
1685                 break;
1686         case 0x1004:
1687                 /* do nothing (allow getppid) */
1688                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1689                 break;
1690         default:
1691                 EXPECT_EQ(0, msg) {
1692                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1693                         kill(tracee, SIGKILL);
1694                 }
1695         }
1696
1697 }
1698
1699 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1700                    int status, void *args)
1701 {
1702         int ret, nr;
1703         unsigned long msg;
1704         static bool entry;
1705
1706         /* Make sure we got an empty message. */
1707         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1708         EXPECT_EQ(0, ret);
1709         EXPECT_EQ(0, msg);
1710
1711         /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1712         entry = !entry;
1713         if (!entry)
1714                 return;
1715
1716         nr = get_syscall(_metadata, tracee);
1717
1718         if (nr == __NR_getpid)
1719                 change_syscall(_metadata, tracee, __NR_getppid);
1720         if (nr == __NR_open)
1721                 change_syscall(_metadata, tracee, -1);
1722 }
1723
1724 FIXTURE_DATA(TRACE_syscall) {
1725         struct sock_fprog prog;
1726         pid_t tracer, mytid, mypid, parent;
1727 };
1728
1729 FIXTURE_SETUP(TRACE_syscall)
1730 {
1731         struct sock_filter filter[] = {
1732                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1733                         offsetof(struct seccomp_data, nr)),
1734                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1735                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1736                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1737                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1738                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1739                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1740                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1741         };
1742
1743         memset(&self->prog, 0, sizeof(self->prog));
1744         self->prog.filter = malloc(sizeof(filter));
1745         ASSERT_NE(NULL, self->prog.filter);
1746         memcpy(self->prog.filter, filter, sizeof(filter));
1747         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1748
1749         /* Prepare some testable syscall results. */
1750         self->mytid = syscall(__NR_gettid);
1751         ASSERT_GT(self->mytid, 0);
1752         ASSERT_NE(self->mytid, 1) {
1753                 TH_LOG("Running this test as init is not supported. :)");
1754         }
1755
1756         self->mypid = getpid();
1757         ASSERT_GT(self->mypid, 0);
1758         ASSERT_EQ(self->mytid, self->mypid);
1759
1760         self->parent = getppid();
1761         ASSERT_GT(self->parent, 0);
1762         ASSERT_NE(self->parent, self->mypid);
1763
1764         /* Launch tracer. */
1765         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1766                                            false);
1767 }
1768
1769 FIXTURE_TEARDOWN(TRACE_syscall)
1770 {
1771         teardown_trace_fixture(_metadata, self->tracer);
1772         if (self->prog.filter)
1773                 free(self->prog.filter);
1774 }
1775
1776 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1777 {
1778         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1779         teardown_trace_fixture(_metadata, self->tracer);
1780         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1781                                            true);
1782
1783         /* Tracer will redirect getpid to getppid. */
1784         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1785 }
1786
1787 TEST_F(TRACE_syscall, ptrace_syscall_dropped)
1788 {
1789         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1790         teardown_trace_fixture(_metadata, self->tracer);
1791         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1792                                            true);
1793
1794         /* Tracer should skip the open syscall, resulting in EPERM. */
1795         EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
1796 }
1797
1798 TEST_F(TRACE_syscall, syscall_allowed)
1799 {
1800         long ret;
1801
1802         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1803         ASSERT_EQ(0, ret);
1804
1805         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1806         ASSERT_EQ(0, ret);
1807
1808         /* getppid works as expected (no changes). */
1809         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1810         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1811 }
1812
1813 TEST_F(TRACE_syscall, syscall_redirected)
1814 {
1815         long ret;
1816
1817         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1818         ASSERT_EQ(0, ret);
1819
1820         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1821         ASSERT_EQ(0, ret);
1822
1823         /* getpid has been redirected to getppid as expected. */
1824         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1825         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1826 }
1827
1828 TEST_F(TRACE_syscall, syscall_dropped)
1829 {
1830         long ret;
1831
1832         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1833         ASSERT_EQ(0, ret);
1834
1835         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1836         ASSERT_EQ(0, ret);
1837
1838         /* gettid has been skipped and an altered return value stored. */
1839         EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
1840         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1841 }
1842
1843 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1844 {
1845         struct sock_filter filter[] = {
1846                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1847                         offsetof(struct seccomp_data, nr)),
1848                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1849                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1850                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1851         };
1852         struct sock_fprog prog = {
1853                 .len = (unsigned short)ARRAY_SIZE(filter),
1854                 .filter = filter,
1855         };
1856         long ret;
1857
1858         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1859         ASSERT_EQ(0, ret);
1860
1861         /* Install fixture filter. */
1862         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1863         ASSERT_EQ(0, ret);
1864
1865         /* Install "errno on getppid" filter. */
1866         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1867         ASSERT_EQ(0, ret);
1868
1869         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1870         errno = 0;
1871         EXPECT_EQ(-1, syscall(__NR_getpid));
1872         EXPECT_EQ(EPERM, errno);
1873 }
1874
1875 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1876 {
1877         struct sock_filter filter[] = {
1878                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1879                         offsetof(struct seccomp_data, nr)),
1880                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1881                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1882                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1883         };
1884         struct sock_fprog prog = {
1885                 .len = (unsigned short)ARRAY_SIZE(filter),
1886                 .filter = filter,
1887         };
1888         long ret;
1889
1890         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1891         ASSERT_EQ(0, ret);
1892
1893         /* Install fixture filter. */
1894         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1895         ASSERT_EQ(0, ret);
1896
1897         /* Install "death on getppid" filter. */
1898         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1899         ASSERT_EQ(0, ret);
1900
1901         /* Tracer will redirect getpid to getppid, and we should die. */
1902         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1903 }
1904
1905 TEST_F(TRACE_syscall, skip_after_ptrace)
1906 {
1907         struct sock_filter filter[] = {
1908                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1909                         offsetof(struct seccomp_data, nr)),
1910                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1911                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1912                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1913         };
1914         struct sock_fprog prog = {
1915                 .len = (unsigned short)ARRAY_SIZE(filter),
1916                 .filter = filter,
1917         };
1918         long ret;
1919
1920         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1921         teardown_trace_fixture(_metadata, self->tracer);
1922         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1923                                            true);
1924
1925         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1926         ASSERT_EQ(0, ret);
1927
1928         /* Install "errno on getppid" filter. */
1929         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1930         ASSERT_EQ(0, ret);
1931
1932         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1933         EXPECT_EQ(-1, syscall(__NR_getpid));
1934         EXPECT_EQ(EPERM, errno);
1935 }
1936
1937 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1938 {
1939         struct sock_filter filter[] = {
1940                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1941                         offsetof(struct seccomp_data, nr)),
1942                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1943                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1944                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1945         };
1946         struct sock_fprog prog = {
1947                 .len = (unsigned short)ARRAY_SIZE(filter),
1948                 .filter = filter,
1949         };
1950         long ret;
1951
1952         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1953         teardown_trace_fixture(_metadata, self->tracer);
1954         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1955                                            true);
1956
1957         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1958         ASSERT_EQ(0, ret);
1959
1960         /* Install "death on getppid" filter. */
1961         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1962         ASSERT_EQ(0, ret);
1963
1964         /* Tracer will redirect getpid to getppid, and we should die. */
1965         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1966 }
1967
1968 TEST(seccomp_syscall)
1969 {
1970         struct sock_filter filter[] = {
1971                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1972         };
1973         struct sock_fprog prog = {
1974                 .len = (unsigned short)ARRAY_SIZE(filter),
1975                 .filter = filter,
1976         };
1977         long ret;
1978
1979         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1980         ASSERT_EQ(0, ret) {
1981                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1982         }
1983
1984         /* Reject insane operation. */
1985         ret = seccomp(-1, 0, &prog);
1986         ASSERT_NE(ENOSYS, errno) {
1987                 TH_LOG("Kernel does not support seccomp syscall!");
1988         }
1989         EXPECT_EQ(EINVAL, errno) {
1990                 TH_LOG("Did not reject crazy op value!");
1991         }
1992
1993         /* Reject strict with flags or pointer. */
1994         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1995         EXPECT_EQ(EINVAL, errno) {
1996                 TH_LOG("Did not reject mode strict with flags!");
1997         }
1998         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1999         EXPECT_EQ(EINVAL, errno) {
2000                 TH_LOG("Did not reject mode strict with uargs!");
2001         }
2002
2003         /* Reject insane args for filter. */
2004         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2005         EXPECT_EQ(EINVAL, errno) {
2006                 TH_LOG("Did not reject crazy filter flags!");
2007         }
2008         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2009         EXPECT_EQ(EFAULT, errno) {
2010                 TH_LOG("Did not reject NULL filter!");
2011         }
2012
2013         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2014         EXPECT_EQ(0, errno) {
2015                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2016                         strerror(errno));
2017         }
2018 }
2019
2020 TEST(seccomp_syscall_mode_lock)
2021 {
2022         struct sock_filter filter[] = {
2023                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2024         };
2025         struct sock_fprog prog = {
2026                 .len = (unsigned short)ARRAY_SIZE(filter),
2027                 .filter = filter,
2028         };
2029         long ret;
2030
2031         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2032         ASSERT_EQ(0, ret) {
2033                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2034         }
2035
2036         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2037         ASSERT_NE(ENOSYS, errno) {
2038                 TH_LOG("Kernel does not support seccomp syscall!");
2039         }
2040         EXPECT_EQ(0, ret) {
2041                 TH_LOG("Could not install filter!");
2042         }
2043
2044         /* Make sure neither entry point will switch to strict. */
2045         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2046         EXPECT_EQ(EINVAL, errno) {
2047                 TH_LOG("Switched to mode strict!");
2048         }
2049
2050         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2051         EXPECT_EQ(EINVAL, errno) {
2052                 TH_LOG("Switched to mode strict!");
2053         }
2054 }
2055
2056 /*
2057  * Test detection of known and unknown filter flags. Userspace needs to be able
2058  * to check if a filter flag is supported by the current kernel and a good way
2059  * of doing that is by attempting to enter filter mode, with the flag bit in
2060  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2061  * that the flag is valid and EINVAL indicates that the flag is invalid.
2062  */
2063 TEST(detect_seccomp_filter_flags)
2064 {
2065         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2066                                  SECCOMP_FILTER_FLAG_LOG };
2067         unsigned int flag, all_flags;
2068         int i;
2069         long ret;
2070
2071         /* Test detection of known-good filter flags */
2072         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2073                 flag = flags[i];
2074                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2075                 ASSERT_NE(ENOSYS, errno) {
2076                         TH_LOG("Kernel does not support seccomp syscall!");
2077                 }
2078                 EXPECT_EQ(-1, ret);
2079                 EXPECT_EQ(EFAULT, errno) {
2080                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2081                                flag);
2082                 }
2083
2084                 all_flags |= flag;
2085         }
2086
2087         /* Test detection of all known-good filter flags */
2088         ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
2089         EXPECT_EQ(-1, ret);
2090         EXPECT_EQ(EFAULT, errno) {
2091                 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2092                        all_flags);
2093         }
2094
2095         /* Test detection of an unknown filter flag */
2096         flag = -1;
2097         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2098         EXPECT_EQ(-1, ret);
2099         EXPECT_EQ(EINVAL, errno) {
2100                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2101                        flag);
2102         }
2103
2104         /*
2105          * Test detection of an unknown filter flag that may simply need to be
2106          * added to this test
2107          */
2108         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2109         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2110         EXPECT_EQ(-1, ret);
2111         EXPECT_EQ(EINVAL, errno) {
2112                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2113                        flag);
2114         }
2115 }
2116
2117 TEST(TSYNC_first)
2118 {
2119         struct sock_filter filter[] = {
2120                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2121         };
2122         struct sock_fprog prog = {
2123                 .len = (unsigned short)ARRAY_SIZE(filter),
2124                 .filter = filter,
2125         };
2126         long ret;
2127
2128         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2129         ASSERT_EQ(0, ret) {
2130                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2131         }
2132
2133         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2134                       &prog);
2135         ASSERT_NE(ENOSYS, errno) {
2136                 TH_LOG("Kernel does not support seccomp syscall!");
2137         }
2138         EXPECT_EQ(0, ret) {
2139                 TH_LOG("Could not install initial filter with TSYNC!");
2140         }
2141 }
2142
2143 #define TSYNC_SIBLINGS 2
2144 struct tsync_sibling {
2145         pthread_t tid;
2146         pid_t system_tid;
2147         sem_t *started;
2148         pthread_cond_t *cond;
2149         pthread_mutex_t *mutex;
2150         int diverge;
2151         int num_waits;
2152         struct sock_fprog *prog;
2153         struct __test_metadata *metadata;
2154 };
2155
2156 /*
2157  * To avoid joining joined threads (which is not allowed by Bionic),
2158  * make sure we both successfully join and clear the tid to skip a
2159  * later join attempt during fixture teardown. Any remaining threads
2160  * will be directly killed during teardown.
2161  */
2162 #define PTHREAD_JOIN(tid, status)                                       \
2163         do {                                                            \
2164                 int _rc = pthread_join(tid, status);                    \
2165                 if (_rc) {                                              \
2166                         TH_LOG("pthread_join of tid %u failed: %d\n",   \
2167                                 (unsigned int)tid, _rc);                \
2168                 } else {                                                \
2169                         tid = 0;                                        \
2170                 }                                                       \
2171         } while (0)
2172
2173 FIXTURE_DATA(TSYNC) {
2174         struct sock_fprog root_prog, apply_prog;
2175         struct tsync_sibling sibling[TSYNC_SIBLINGS];
2176         sem_t started;
2177         pthread_cond_t cond;
2178         pthread_mutex_t mutex;
2179         int sibling_count;
2180 };
2181
2182 FIXTURE_SETUP(TSYNC)
2183 {
2184         struct sock_filter root_filter[] = {
2185                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2186         };
2187         struct sock_filter apply_filter[] = {
2188                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2189                         offsetof(struct seccomp_data, nr)),
2190                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2191                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2192                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2193         };
2194
2195         memset(&self->root_prog, 0, sizeof(self->root_prog));
2196         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2197         memset(&self->sibling, 0, sizeof(self->sibling));
2198         self->root_prog.filter = malloc(sizeof(root_filter));
2199         ASSERT_NE(NULL, self->root_prog.filter);
2200         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2201         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2202
2203         self->apply_prog.filter = malloc(sizeof(apply_filter));
2204         ASSERT_NE(NULL, self->apply_prog.filter);
2205         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2206         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2207
2208         self->sibling_count = 0;
2209         pthread_mutex_init(&self->mutex, NULL);
2210         pthread_cond_init(&self->cond, NULL);
2211         sem_init(&self->started, 0, 0);
2212         self->sibling[0].tid = 0;
2213         self->sibling[0].cond = &self->cond;
2214         self->sibling[0].started = &self->started;
2215         self->sibling[0].mutex = &self->mutex;
2216         self->sibling[0].diverge = 0;
2217         self->sibling[0].num_waits = 1;
2218         self->sibling[0].prog = &self->root_prog;
2219         self->sibling[0].metadata = _metadata;
2220         self->sibling[1].tid = 0;
2221         self->sibling[1].cond = &self->cond;
2222         self->sibling[1].started = &self->started;
2223         self->sibling[1].mutex = &self->mutex;
2224         self->sibling[1].diverge = 0;
2225         self->sibling[1].prog = &self->root_prog;
2226         self->sibling[1].num_waits = 1;
2227         self->sibling[1].metadata = _metadata;
2228 }
2229
2230 FIXTURE_TEARDOWN(TSYNC)
2231 {
2232         int sib = 0;
2233
2234         if (self->root_prog.filter)
2235                 free(self->root_prog.filter);
2236         if (self->apply_prog.filter)
2237                 free(self->apply_prog.filter);
2238
2239         for ( ; sib < self->sibling_count; ++sib) {
2240                 struct tsync_sibling *s = &self->sibling[sib];
2241
2242                 if (!s->tid)
2243                         continue;
2244                 /*
2245                  * If a thread is still running, it may be stuck, so hit
2246                  * it over the head really hard.
2247                  */
2248                 pthread_kill(s->tid, 9);
2249         }
2250         pthread_mutex_destroy(&self->mutex);
2251         pthread_cond_destroy(&self->cond);
2252         sem_destroy(&self->started);
2253 }
2254
2255 void *tsync_sibling(void *data)
2256 {
2257         long ret = 0;
2258         struct tsync_sibling *me = data;
2259
2260         me->system_tid = syscall(__NR_gettid);
2261
2262         pthread_mutex_lock(me->mutex);
2263         if (me->diverge) {
2264                 /* Just re-apply the root prog to fork the tree */
2265                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2266                                 me->prog, 0, 0);
2267         }
2268         sem_post(me->started);
2269         /* Return outside of started so parent notices failures. */
2270         if (ret) {
2271                 pthread_mutex_unlock(me->mutex);
2272                 return (void *)SIBLING_EXIT_FAILURE;
2273         }
2274         do {
2275                 pthread_cond_wait(me->cond, me->mutex);
2276                 me->num_waits = me->num_waits - 1;
2277         } while (me->num_waits);
2278         pthread_mutex_unlock(me->mutex);
2279
2280         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2281         if (!ret)
2282                 return (void *)SIBLING_EXIT_NEWPRIVS;
2283         read(0, NULL, 0);
2284         return (void *)SIBLING_EXIT_UNKILLED;
2285 }
2286
2287 void tsync_start_sibling(struct tsync_sibling *sibling)
2288 {
2289         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2290 }
2291
2292 TEST_F(TSYNC, siblings_fail_prctl)
2293 {
2294         long ret;
2295         void *status;
2296         struct sock_filter filter[] = {
2297                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2298                         offsetof(struct seccomp_data, nr)),
2299                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2300                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2301                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2302         };
2303         struct sock_fprog prog = {
2304                 .len = (unsigned short)ARRAY_SIZE(filter),
2305                 .filter = filter,
2306         };
2307
2308         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2309                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2310         }
2311
2312         /* Check prctl failure detection by requesting sib 0 diverge. */
2313         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2314         ASSERT_NE(ENOSYS, errno) {
2315                 TH_LOG("Kernel does not support seccomp syscall!");
2316         }
2317         ASSERT_EQ(0, ret) {
2318                 TH_LOG("setting filter failed");
2319         }
2320
2321         self->sibling[0].diverge = 1;
2322         tsync_start_sibling(&self->sibling[0]);
2323         tsync_start_sibling(&self->sibling[1]);
2324
2325         while (self->sibling_count < TSYNC_SIBLINGS) {
2326                 sem_wait(&self->started);
2327                 self->sibling_count++;
2328         }
2329
2330         /* Signal the threads to clean up*/
2331         pthread_mutex_lock(&self->mutex);
2332         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2333                 TH_LOG("cond broadcast non-zero");
2334         }
2335         pthread_mutex_unlock(&self->mutex);
2336
2337         /* Ensure diverging sibling failed to call prctl. */
2338         PTHREAD_JOIN(self->sibling[0].tid, &status);
2339         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2340         PTHREAD_JOIN(self->sibling[1].tid, &status);
2341         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2342 }
2343
2344 TEST_F(TSYNC, two_siblings_with_ancestor)
2345 {
2346         long ret;
2347         void *status;
2348
2349         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2350                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2351         }
2352
2353         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2354         ASSERT_NE(ENOSYS, errno) {
2355                 TH_LOG("Kernel does not support seccomp syscall!");
2356         }
2357         ASSERT_EQ(0, ret) {
2358                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2359         }
2360         tsync_start_sibling(&self->sibling[0]);
2361         tsync_start_sibling(&self->sibling[1]);
2362
2363         while (self->sibling_count < TSYNC_SIBLINGS) {
2364                 sem_wait(&self->started);
2365                 self->sibling_count++;
2366         }
2367
2368         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2369                       &self->apply_prog);
2370         ASSERT_EQ(0, ret) {
2371                 TH_LOG("Could install filter on all threads!");
2372         }
2373         /* Tell the siblings to test the policy */
2374         pthread_mutex_lock(&self->mutex);
2375         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2376                 TH_LOG("cond broadcast non-zero");
2377         }
2378         pthread_mutex_unlock(&self->mutex);
2379         /* Ensure they are both killed and don't exit cleanly. */
2380         PTHREAD_JOIN(self->sibling[0].tid, &status);
2381         EXPECT_EQ(0x0, (long)status);
2382         PTHREAD_JOIN(self->sibling[1].tid, &status);
2383         EXPECT_EQ(0x0, (long)status);
2384 }
2385
2386 TEST_F(TSYNC, two_sibling_want_nnp)
2387 {
2388         void *status;
2389
2390         /* start siblings before any prctl() operations */
2391         tsync_start_sibling(&self->sibling[0]);
2392         tsync_start_sibling(&self->sibling[1]);
2393         while (self->sibling_count < TSYNC_SIBLINGS) {
2394                 sem_wait(&self->started);
2395                 self->sibling_count++;
2396         }
2397
2398         /* Tell the siblings to test no policy */
2399         pthread_mutex_lock(&self->mutex);
2400         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2401                 TH_LOG("cond broadcast non-zero");
2402         }
2403         pthread_mutex_unlock(&self->mutex);
2404
2405         /* Ensure they are both upset about lacking nnp. */
2406         PTHREAD_JOIN(self->sibling[0].tid, &status);
2407         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2408         PTHREAD_JOIN(self->sibling[1].tid, &status);
2409         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2410 }
2411
2412 TEST_F(TSYNC, two_siblings_with_no_filter)
2413 {
2414         long ret;
2415         void *status;
2416
2417         /* start siblings before any prctl() operations */
2418         tsync_start_sibling(&self->sibling[0]);
2419         tsync_start_sibling(&self->sibling[1]);
2420         while (self->sibling_count < TSYNC_SIBLINGS) {
2421                 sem_wait(&self->started);
2422                 self->sibling_count++;
2423         }
2424
2425         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2426                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2427         }
2428
2429         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2430                       &self->apply_prog);
2431         ASSERT_NE(ENOSYS, errno) {
2432                 TH_LOG("Kernel does not support seccomp syscall!");
2433         }
2434         ASSERT_EQ(0, ret) {
2435                 TH_LOG("Could install filter on all threads!");
2436         }
2437
2438         /* Tell the siblings to test the policy */
2439         pthread_mutex_lock(&self->mutex);
2440         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2441                 TH_LOG("cond broadcast non-zero");
2442         }
2443         pthread_mutex_unlock(&self->mutex);
2444
2445         /* Ensure they are both killed and don't exit cleanly. */
2446         PTHREAD_JOIN(self->sibling[0].tid, &status);
2447         EXPECT_EQ(0x0, (long)status);
2448         PTHREAD_JOIN(self->sibling[1].tid, &status);
2449         EXPECT_EQ(0x0, (long)status);
2450 }
2451
2452 TEST_F(TSYNC, two_siblings_with_one_divergence)
2453 {
2454         long ret;
2455         void *status;
2456
2457         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2458                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2459         }
2460
2461         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2462         ASSERT_NE(ENOSYS, errno) {
2463                 TH_LOG("Kernel does not support seccomp syscall!");
2464         }
2465         ASSERT_EQ(0, ret) {
2466                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2467         }
2468         self->sibling[0].diverge = 1;
2469         tsync_start_sibling(&self->sibling[0]);
2470         tsync_start_sibling(&self->sibling[1]);
2471
2472         while (self->sibling_count < TSYNC_SIBLINGS) {
2473                 sem_wait(&self->started);
2474                 self->sibling_count++;
2475         }
2476
2477         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2478                       &self->apply_prog);
2479         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2480                 TH_LOG("Did not fail on diverged sibling.");
2481         }
2482
2483         /* Wake the threads */
2484         pthread_mutex_lock(&self->mutex);
2485         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2486                 TH_LOG("cond broadcast non-zero");
2487         }
2488         pthread_mutex_unlock(&self->mutex);
2489
2490         /* Ensure they are both unkilled. */
2491         PTHREAD_JOIN(self->sibling[0].tid, &status);
2492         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2493         PTHREAD_JOIN(self->sibling[1].tid, &status);
2494         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2495 }
2496
2497 TEST_F(TSYNC, two_siblings_not_under_filter)
2498 {
2499         long ret, sib;
2500         void *status;
2501
2502         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2503                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2504         }
2505
2506         /*
2507          * Sibling 0 will have its own seccomp policy
2508          * and Sibling 1 will not be under seccomp at
2509          * all. Sibling 1 will enter seccomp and 0
2510          * will cause failure.
2511          */
2512         self->sibling[0].diverge = 1;
2513         tsync_start_sibling(&self->sibling[0]);
2514         tsync_start_sibling(&self->sibling[1]);
2515
2516         while (self->sibling_count < TSYNC_SIBLINGS) {
2517                 sem_wait(&self->started);
2518                 self->sibling_count++;
2519         }
2520
2521         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2522         ASSERT_NE(ENOSYS, errno) {
2523                 TH_LOG("Kernel does not support seccomp syscall!");
2524         }
2525         ASSERT_EQ(0, ret) {
2526                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2527         }
2528
2529         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2530                       &self->apply_prog);
2531         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2532                 TH_LOG("Did not fail on diverged sibling.");
2533         }
2534         sib = 1;
2535         if (ret == self->sibling[0].system_tid)
2536                 sib = 0;
2537
2538         pthread_mutex_lock(&self->mutex);
2539
2540         /* Increment the other siblings num_waits so we can clean up
2541          * the one we just saw.
2542          */
2543         self->sibling[!sib].num_waits += 1;
2544
2545         /* Signal the thread to clean up*/
2546         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2547                 TH_LOG("cond broadcast non-zero");
2548         }
2549         pthread_mutex_unlock(&self->mutex);
2550         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2551         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2552         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2553         while (!kill(self->sibling[sib].system_tid, 0))
2554                 sleep(0.1);
2555         /* Switch to the remaining sibling */
2556         sib = !sib;
2557
2558         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2559                       &self->apply_prog);
2560         ASSERT_EQ(0, ret) {
2561                 TH_LOG("Expected the remaining sibling to sync");
2562         };
2563
2564         pthread_mutex_lock(&self->mutex);
2565
2566         /* If remaining sibling didn't have a chance to wake up during
2567          * the first broadcast, manually reduce the num_waits now.
2568          */
2569         if (self->sibling[sib].num_waits > 1)
2570                 self->sibling[sib].num_waits = 1;
2571         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2572                 TH_LOG("cond broadcast non-zero");
2573         }
2574         pthread_mutex_unlock(&self->mutex);
2575         PTHREAD_JOIN(self->sibling[sib].tid, &status);
2576         EXPECT_EQ(0, (long)status);
2577         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2578         while (!kill(self->sibling[sib].system_tid, 0))
2579                 sleep(0.1);
2580
2581         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2582                       &self->apply_prog);
2583         ASSERT_EQ(0, ret);  /* just us chickens */
2584 }
2585
2586 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2587 TEST(syscall_restart)
2588 {
2589         long ret;
2590         unsigned long msg;
2591         pid_t child_pid;
2592         int pipefd[2];
2593         int status;
2594         siginfo_t info = { };
2595         struct sock_filter filter[] = {
2596                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2597                          offsetof(struct seccomp_data, nr)),
2598
2599 #ifdef __NR_sigreturn
2600                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2601 #endif
2602                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2603                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2604                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2605                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2606                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2607
2608                 /* Allow __NR_write for easy logging. */
2609                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2610                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2611                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2612                 /* The nanosleep jump target. */
2613                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2614                 /* The restart_syscall jump target. */
2615                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2616         };
2617         struct sock_fprog prog = {
2618                 .len = (unsigned short)ARRAY_SIZE(filter),
2619                 .filter = filter,
2620         };
2621 #if defined(__arm__)
2622         struct utsname utsbuf;
2623 #endif
2624
2625         ASSERT_EQ(0, pipe(pipefd));
2626
2627         child_pid = fork();
2628         ASSERT_LE(0, child_pid);
2629         if (child_pid == 0) {
2630                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2631                 char buf = ' ';
2632                 struct timespec timeout = { };
2633
2634                 /* Attach parent as tracer and stop. */
2635                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2636                 EXPECT_EQ(0, raise(SIGSTOP));
2637
2638                 EXPECT_EQ(0, close(pipefd[1]));
2639
2640                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2641                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2642                 }
2643
2644                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2645                 EXPECT_EQ(0, ret) {
2646                         TH_LOG("Failed to install filter!");
2647                 }
2648
2649                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2650                         TH_LOG("Failed to read() sync from parent");
2651                 }
2652                 EXPECT_EQ('.', buf) {
2653                         TH_LOG("Failed to get sync data from read()");
2654                 }
2655
2656                 /* Start nanosleep to be interrupted. */
2657                 timeout.tv_sec = 1;
2658                 errno = 0;
2659                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2660                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2661                 }
2662
2663                 /* Read final sync from parent. */
2664                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2665                         TH_LOG("Failed final read() from parent");
2666                 }
2667                 EXPECT_EQ('!', buf) {
2668                         TH_LOG("Failed to get final data from read()");
2669                 }
2670
2671                 /* Directly report the status of our test harness results. */
2672                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2673                                                      : EXIT_FAILURE);
2674         }
2675         EXPECT_EQ(0, close(pipefd[0]));
2676
2677         /* Attach to child, setup options, and release. */
2678         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2679         ASSERT_EQ(true, WIFSTOPPED(status));
2680         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2681                             PTRACE_O_TRACESECCOMP));
2682         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2683         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2684
2685         /* Wait for nanosleep() to start. */
2686         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2687         ASSERT_EQ(true, WIFSTOPPED(status));
2688         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2689         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2690         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2691         ASSERT_EQ(0x100, msg);
2692         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2693
2694         /* Might as well check siginfo for sanity while we're here. */
2695         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2696         ASSERT_EQ(SIGTRAP, info.si_signo);
2697         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2698         EXPECT_EQ(0, info.si_errno);
2699         EXPECT_EQ(getuid(), info.si_uid);
2700         /* Verify signal delivery came from child (seccomp-triggered). */
2701         EXPECT_EQ(child_pid, info.si_pid);
2702
2703         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2704         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2705         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2706         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2707         ASSERT_EQ(true, WIFSTOPPED(status));
2708         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2709         /* Verify signal delivery came from parent now. */
2710         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2711         EXPECT_EQ(getpid(), info.si_pid);
2712
2713         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2714         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2715         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2716         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2717         ASSERT_EQ(true, WIFSTOPPED(status));
2718         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2719         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2720
2721         /* Wait for restart_syscall() to start. */
2722         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2723         ASSERT_EQ(true, WIFSTOPPED(status));
2724         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2725         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2726         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2727
2728         ASSERT_EQ(0x200, msg);
2729         ret = get_syscall(_metadata, child_pid);
2730 #if defined(__arm__)
2731         /*
2732          * FIXME:
2733          * - native ARM registers do NOT expose true syscall.
2734          * - compat ARM registers on ARM64 DO expose true syscall.
2735          */
2736         ASSERT_EQ(0, uname(&utsbuf));
2737         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2738                 EXPECT_EQ(__NR_nanosleep, ret);
2739         } else
2740 #endif
2741         {
2742                 EXPECT_EQ(__NR_restart_syscall, ret);
2743         }
2744
2745         /* Write again to end test. */
2746         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2747         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2748         EXPECT_EQ(0, close(pipefd[1]));
2749
2750         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2751         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2752                 _metadata->passed = 0;
2753 }
2754
2755 TEST_SIGNAL(filter_flag_log, SIGSYS)
2756 {
2757         struct sock_filter allow_filter[] = {
2758                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2759         };
2760         struct sock_filter kill_filter[] = {
2761                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2762                         offsetof(struct seccomp_data, nr)),
2763                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2764                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2765                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2766         };
2767         struct sock_fprog allow_prog = {
2768                 .len = (unsigned short)ARRAY_SIZE(allow_filter),
2769                 .filter = allow_filter,
2770         };
2771         struct sock_fprog kill_prog = {
2772                 .len = (unsigned short)ARRAY_SIZE(kill_filter),
2773                 .filter = kill_filter,
2774         };
2775         long ret;
2776         pid_t parent = getppid();
2777
2778         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2779         ASSERT_EQ(0, ret);
2780
2781         /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2782         ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2783                       &allow_prog);
2784         ASSERT_NE(ENOSYS, errno) {
2785                 TH_LOG("Kernel does not support seccomp syscall!");
2786         }
2787         EXPECT_NE(0, ret) {
2788                 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2789         }
2790         EXPECT_EQ(EINVAL, errno) {
2791                 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2792         }
2793
2794         /* Verify that a simple, permissive filter can be added with no flags */
2795         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2796         EXPECT_EQ(0, ret);
2797
2798         /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2799         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2800                       &allow_prog);
2801         ASSERT_NE(EINVAL, errno) {
2802                 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2803         }
2804         EXPECT_EQ(0, ret);
2805
2806         /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2807         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2808                       &kill_prog);
2809         EXPECT_EQ(0, ret);
2810
2811         EXPECT_EQ(parent, syscall(__NR_getppid));
2812         /* getpid() should never return. */
2813         EXPECT_EQ(0, syscall(__NR_getpid));
2814 }
2815
2816 TEST(get_action_avail)
2817 {
2818         __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2819                             SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2820                             SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2821         __u32 unknown_action = 0x10000000U;
2822         int i;
2823         long ret;
2824
2825         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2826         ASSERT_NE(ENOSYS, errno) {
2827                 TH_LOG("Kernel does not support seccomp syscall!");
2828         }
2829         ASSERT_NE(EINVAL, errno) {
2830                 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2831         }
2832         EXPECT_EQ(ret, 0);
2833
2834         for (i = 0; i < ARRAY_SIZE(actions); i++) {
2835                 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2836                 EXPECT_EQ(ret, 0) {
2837                         TH_LOG("Expected action (0x%X) not available!",
2838                                actions[i]);
2839                 }
2840         }
2841
2842         /* Check that an unknown action is handled properly (EOPNOTSUPP) */
2843         ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2844         EXPECT_EQ(ret, -1);
2845         EXPECT_EQ(errno, EOPNOTSUPP);
2846 }
2847
2848 /*
2849  * TODO:
2850  * - add microbenchmarks
2851  * - expand NNP testing
2852  * - better arch-specific TRACE and TRAP handlers.
2853  * - endianness checking when appropriate
2854  * - 64-bit arg prodding
2855  * - arch value testing (x86 modes especially)
2856  * - verify that FILTER_FLAG_LOG filters generate log messages
2857  * - verify that RET_LOG generates log messages
2858  * - ...
2859  */
2860
2861 TEST_HARNESS_MAIN