1 // SPDX-License-Identifier: LGPL-2.1
13 #include <sys/types.h>
18 static inline pid_t gettid(void)
20 return syscall(__NR_gettid);
24 static int loop_cnt[NR_INJECT + 1];
26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
33 static int opt_modulo, verbose;
35 static int opt_yield, opt_signal, opt_sleep,
36 opt_disable_rseq, opt_threads = 200,
37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
39 #ifndef RSEQ_SKIP_FASTPATH
40 static long long opt_reps = 5000;
42 static long long opt_reps = 100;
45 static __thread __attribute__((tls_model("initial-exec")))
46 unsigned int signals_delivered;
50 static __thread __attribute__((tls_model("initial-exec"), unused))
51 unsigned int yield_mod_cnt, nr_abort;
53 #define printf_verbose(fmt, ...) \
56 printf(fmt, ## __VA_ARGS__); \
59 #if defined(__x86_64__) || defined(__i386__)
61 #define INJECT_ASM_REG "eax"
63 #define RSEQ_INJECT_CLOBBER \
68 #define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
77 #elif defined(__x86_64__)
79 #define RSEQ_INJECT_ASM(n) \
80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \
81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "dec %%" INJECT_ASM_REG "\n\t" \
90 #error "Unsupported architecture"
93 #elif defined(__ARMEL__)
95 #define RSEQ_INJECT_INPUT \
96 , [loop_cnt_1]"m"(loop_cnt[1]) \
97 , [loop_cnt_2]"m"(loop_cnt[2]) \
98 , [loop_cnt_3]"m"(loop_cnt[3]) \
99 , [loop_cnt_4]"m"(loop_cnt[4]) \
100 , [loop_cnt_5]"m"(loop_cnt[5]) \
101 , [loop_cnt_6]"m"(loop_cnt[6])
103 #define INJECT_ASM_REG "r4"
105 #define RSEQ_INJECT_CLOBBER \
108 #define RSEQ_INJECT_ASM(n) \
109 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110 "cmp " INJECT_ASM_REG ", #0\n\t" \
113 "subs " INJECT_ASM_REG ", #1\n\t" \
119 #define RSEQ_INJECT_INPUT \
120 , [loop_cnt_1]"m"(loop_cnt[1]) \
121 , [loop_cnt_2]"m"(loop_cnt[2]) \
122 , [loop_cnt_3]"m"(loop_cnt[3]) \
123 , [loop_cnt_4]"m"(loop_cnt[4]) \
124 , [loop_cnt_5]"m"(loop_cnt[5]) \
125 , [loop_cnt_6]"m"(loop_cnt[6])
127 #define INJECT_ASM_REG "r18"
129 #define RSEQ_INJECT_CLOBBER \
132 #define RSEQ_INJECT_ASM(n) \
133 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
137 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
141 #error unsupported target
144 #define RSEQ_INJECT_FAILED \
147 #define RSEQ_INJECT_C(n) \
149 int loc_i, loc_nr_loops = loop_cnt[n]; \
151 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
154 if (loc_nr_loops == -1 && opt_modulo) { \
155 if (yield_mod_cnt == opt_modulo - 1) { \
157 poll(NULL, 0, opt_sleep); \
171 #define printf_verbose(fmt, ...)
173 #endif /* BENCHMARK */
177 struct percpu_lock_entry {
179 } __attribute__((aligned(128)));
182 struct percpu_lock_entry c[CPU_SETSIZE];
185 struct test_data_entry {
187 } __attribute__((aligned(128)));
189 struct spinlock_test_data {
190 struct percpu_lock lock;
191 struct test_data_entry c[CPU_SETSIZE];
194 struct spinlock_thread_test_data {
195 struct spinlock_test_data *data;
200 struct inc_test_data {
201 struct test_data_entry c[CPU_SETSIZE];
204 struct inc_thread_test_data {
205 struct inc_test_data *data;
210 struct percpu_list_node {
212 struct percpu_list_node *next;
215 struct percpu_list_entry {
216 struct percpu_list_node *head;
217 } __attribute__((aligned(128)));
220 struct percpu_list_entry c[CPU_SETSIZE];
223 #define BUFFER_ITEM_PER_CPU 100
225 struct percpu_buffer_node {
229 struct percpu_buffer_entry {
232 struct percpu_buffer_node **array;
233 } __attribute__((aligned(128)));
235 struct percpu_buffer {
236 struct percpu_buffer_entry c[CPU_SETSIZE];
239 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
241 struct percpu_memcpy_buffer_node {
246 struct percpu_memcpy_buffer_entry {
249 struct percpu_memcpy_buffer_node *array;
250 } __attribute__((aligned(128)));
252 struct percpu_memcpy_buffer {
253 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
256 /* A simple percpu spinlock. Grabs lock on current cpu. */
257 static int rseq_this_cpu_lock(struct percpu_lock *lock)
264 cpu = rseq_cpu_start();
265 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
267 if (rseq_likely(!ret))
269 /* Retry if comparison fails or rseq aborts. */
272 * Acquire semantic when taking lock after control dependency.
273 * Matches rseq_smp_store_release().
275 rseq_smp_acquire__after_ctrl_dep();
279 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
281 assert(lock->c[cpu].v == 1);
283 * Release lock, with release semantic. Matches
284 * rseq_smp_acquire__after_ctrl_dep().
286 rseq_smp_store_release(&lock->c[cpu].v, 0);
289 void *test_percpu_spinlock_thread(void *arg)
291 struct spinlock_thread_test_data *thread_data = arg;
292 struct spinlock_test_data *data = thread_data->data;
295 if (!opt_disable_rseq && thread_data->reg &&
296 rseq_register_current_thread())
298 reps = thread_data->reps;
299 for (i = 0; i < reps; i++) {
300 int cpu = rseq_cpu_start();
302 cpu = rseq_this_cpu_lock(&data->lock);
303 data->c[cpu].count++;
304 rseq_percpu_unlock(&data->lock, cpu);
306 if (i != 0 && !(i % (reps / 10)))
307 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
310 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
311 (int) gettid(), nr_abort, signals_delivered);
312 if (!opt_disable_rseq && thread_data->reg &&
313 rseq_unregister_current_thread())
319 * A simple test which implements a sharded counter using a per-cpu
320 * lock. Obviously real applications might prefer to simply use a
321 * per-cpu increment; however, this is reasonable for a test and the
322 * lock can be extended to synchronize more complicated operations.
324 void test_percpu_spinlock(void)
326 const int num_threads = opt_threads;
329 pthread_t test_threads[num_threads];
330 struct spinlock_test_data data;
331 struct spinlock_thread_test_data thread_data[num_threads];
333 memset(&data, 0, sizeof(data));
334 for (i = 0; i < num_threads; i++) {
335 thread_data[i].reps = opt_reps;
336 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
337 thread_data[i].reg = 1;
339 thread_data[i].reg = 0;
340 thread_data[i].data = &data;
341 ret = pthread_create(&test_threads[i], NULL,
342 test_percpu_spinlock_thread,
346 perror("pthread_create");
351 for (i = 0; i < num_threads; i++) {
352 ret = pthread_join(test_threads[i], NULL);
355 perror("pthread_join");
361 for (i = 0; i < CPU_SETSIZE; i++)
362 sum += data.c[i].count;
364 assert(sum == (uint64_t)opt_reps * num_threads);
367 void *test_percpu_inc_thread(void *arg)
369 struct inc_thread_test_data *thread_data = arg;
370 struct inc_test_data *data = thread_data->data;
373 if (!opt_disable_rseq && thread_data->reg &&
374 rseq_register_current_thread())
376 reps = thread_data->reps;
377 for (i = 0; i < reps; i++) {
383 cpu = rseq_cpu_start();
384 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
385 } while (rseq_unlikely(ret));
387 if (i != 0 && !(i % (reps / 10)))
388 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
391 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
392 (int) gettid(), nr_abort, signals_delivered);
393 if (!opt_disable_rseq && thread_data->reg &&
394 rseq_unregister_current_thread())
399 void test_percpu_inc(void)
401 const int num_threads = opt_threads;
404 pthread_t test_threads[num_threads];
405 struct inc_test_data data;
406 struct inc_thread_test_data thread_data[num_threads];
408 memset(&data, 0, sizeof(data));
409 for (i = 0; i < num_threads; i++) {
410 thread_data[i].reps = opt_reps;
411 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
412 thread_data[i].reg = 1;
414 thread_data[i].reg = 0;
415 thread_data[i].data = &data;
416 ret = pthread_create(&test_threads[i], NULL,
417 test_percpu_inc_thread,
421 perror("pthread_create");
426 for (i = 0; i < num_threads; i++) {
427 ret = pthread_join(test_threads[i], NULL);
430 perror("pthread_join");
436 for (i = 0; i < CPU_SETSIZE; i++)
437 sum += data.c[i].count;
439 assert(sum == (uint64_t)opt_reps * num_threads);
442 void this_cpu_list_push(struct percpu_list *list,
443 struct percpu_list_node *node,
449 intptr_t *targetptr, newval, expect;
452 cpu = rseq_cpu_start();
453 /* Load list->c[cpu].head with single-copy atomicity. */
454 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
455 newval = (intptr_t)node;
456 targetptr = (intptr_t *)&list->c[cpu].head;
457 node->next = (struct percpu_list_node *)expect;
458 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
459 if (rseq_likely(!ret))
461 /* Retry if comparison fails or rseq aborts. */
468 * Unlike a traditional lock-less linked list; the availability of a
469 * rseq primitive allows us to implement pop without concerns over
472 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
475 struct percpu_list_node *node = NULL;
479 struct percpu_list_node *head;
480 intptr_t *targetptr, expectnot, *load;
484 cpu = rseq_cpu_start();
485 targetptr = (intptr_t *)&list->c[cpu].head;
486 expectnot = (intptr_t)NULL;
487 offset = offsetof(struct percpu_list_node, next);
488 load = (intptr_t *)&head;
489 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
491 if (rseq_likely(!ret)) {
497 /* Retry if rseq aborts. */
505 * __percpu_list_pop is not safe against concurrent accesses. Should
506 * only be used on lists that are not concurrently modified.
508 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
510 struct percpu_list_node *node;
512 node = list->c[cpu].head;
515 list->c[cpu].head = node->next;
519 void *test_percpu_list_thread(void *arg)
522 struct percpu_list *list = (struct percpu_list *)arg;
524 if (!opt_disable_rseq && rseq_register_current_thread())
528 for (i = 0; i < reps; i++) {
529 struct percpu_list_node *node;
531 node = this_cpu_list_pop(list, NULL);
533 sched_yield(); /* encourage shuffling */
535 this_cpu_list_push(list, node, NULL);
538 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
539 (int) gettid(), nr_abort, signals_delivered);
540 if (!opt_disable_rseq && rseq_unregister_current_thread())
546 /* Simultaneous modification to a per-cpu linked list from many threads. */
547 void test_percpu_list(void)
549 const int num_threads = opt_threads;
551 uint64_t sum = 0, expected_sum = 0;
552 struct percpu_list list;
553 pthread_t test_threads[num_threads];
554 cpu_set_t allowed_cpus;
556 memset(&list, 0, sizeof(list));
558 /* Generate list entries for every usable cpu. */
559 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
560 for (i = 0; i < CPU_SETSIZE; i++) {
561 if (!CPU_ISSET(i, &allowed_cpus))
563 for (j = 1; j <= 100; j++) {
564 struct percpu_list_node *node;
568 node = malloc(sizeof(*node));
571 node->next = list.c[i].head;
572 list.c[i].head = node;
576 for (i = 0; i < num_threads; i++) {
577 ret = pthread_create(&test_threads[i], NULL,
578 test_percpu_list_thread, &list);
581 perror("pthread_create");
586 for (i = 0; i < num_threads; i++) {
587 ret = pthread_join(test_threads[i], NULL);
590 perror("pthread_join");
595 for (i = 0; i < CPU_SETSIZE; i++) {
596 struct percpu_list_node *node;
598 if (!CPU_ISSET(i, &allowed_cpus))
601 while ((node = __percpu_list_pop(&list, i))) {
608 * All entries should now be accounted for (unless some external
609 * actor is interfering with our allowed affinity while this
612 assert(sum == expected_sum);
615 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
616 struct percpu_buffer_node *node,
623 intptr_t *targetptr_spec, newval_spec;
624 intptr_t *targetptr_final, newval_final;
628 cpu = rseq_cpu_start();
629 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
630 if (offset == buffer->c[cpu].buflen)
632 newval_spec = (intptr_t)node;
633 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
634 newval_final = offset + 1;
635 targetptr_final = &buffer->c[cpu].offset;
637 ret = rseq_cmpeqv_trystorev_storev_release(
638 targetptr_final, offset, targetptr_spec,
639 newval_spec, newval_final, cpu);
641 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
642 offset, targetptr_spec, newval_spec,
644 if (rseq_likely(!ret)) {
648 /* Retry if comparison fails or rseq aborts. */
655 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
658 struct percpu_buffer_node *head;
662 intptr_t *targetptr, newval;
666 cpu = rseq_cpu_start();
667 /* Load offset with single-copy atomicity. */
668 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
673 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
675 targetptr = (intptr_t *)&buffer->c[cpu].offset;
676 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
677 (intptr_t *)&buffer->c[cpu].array[offset - 1],
678 (intptr_t)head, newval, cpu);
679 if (rseq_likely(!ret))
681 /* Retry if comparison fails or rseq aborts. */
689 * __percpu_buffer_pop is not safe against concurrent accesses. Should
690 * only be used on buffers that are not concurrently modified.
692 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
695 struct percpu_buffer_node *head;
698 offset = buffer->c[cpu].offset;
701 head = buffer->c[cpu].array[offset - 1];
702 buffer->c[cpu].offset = offset - 1;
706 void *test_percpu_buffer_thread(void *arg)
709 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
711 if (!opt_disable_rseq && rseq_register_current_thread())
715 for (i = 0; i < reps; i++) {
716 struct percpu_buffer_node *node;
718 node = this_cpu_buffer_pop(buffer, NULL);
720 sched_yield(); /* encourage shuffling */
722 if (!this_cpu_buffer_push(buffer, node, NULL)) {
723 /* Should increase buffer size. */
729 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
730 (int) gettid(), nr_abort, signals_delivered);
731 if (!opt_disable_rseq && rseq_unregister_current_thread())
737 /* Simultaneous modification to a per-cpu buffer from many threads. */
738 void test_percpu_buffer(void)
740 const int num_threads = opt_threads;
742 uint64_t sum = 0, expected_sum = 0;
743 struct percpu_buffer buffer;
744 pthread_t test_threads[num_threads];
745 cpu_set_t allowed_cpus;
747 memset(&buffer, 0, sizeof(buffer));
749 /* Generate list entries for every usable cpu. */
750 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
751 for (i = 0; i < CPU_SETSIZE; i++) {
752 if (!CPU_ISSET(i, &allowed_cpus))
754 /* Worse-case is every item in same CPU. */
756 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
757 BUFFER_ITEM_PER_CPU);
758 assert(buffer.c[i].array);
759 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
760 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
761 struct percpu_buffer_node *node;
766 * We could theoretically put the word-sized
767 * "data" directly in the buffer. However, we
768 * want to model objects that would not fit
769 * within a single word, so allocate an object
772 node = malloc(sizeof(*node));
775 buffer.c[i].array[j - 1] = node;
776 buffer.c[i].offset++;
780 for (i = 0; i < num_threads; i++) {
781 ret = pthread_create(&test_threads[i], NULL,
782 test_percpu_buffer_thread, &buffer);
785 perror("pthread_create");
790 for (i = 0; i < num_threads; i++) {
791 ret = pthread_join(test_threads[i], NULL);
794 perror("pthread_join");
799 for (i = 0; i < CPU_SETSIZE; i++) {
800 struct percpu_buffer_node *node;
802 if (!CPU_ISSET(i, &allowed_cpus))
805 while ((node = __percpu_buffer_pop(&buffer, i))) {
809 free(buffer.c[i].array);
813 * All entries should now be accounted for (unless some external
814 * actor is interfering with our allowed affinity while this
817 assert(sum == expected_sum);
820 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
821 struct percpu_memcpy_buffer_node item,
828 intptr_t *targetptr_final, newval_final, offset;
829 char *destptr, *srcptr;
833 cpu = rseq_cpu_start();
834 /* Load offset with single-copy atomicity. */
835 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
836 if (offset == buffer->c[cpu].buflen)
838 destptr = (char *)&buffer->c[cpu].array[offset];
839 srcptr = (char *)&item;
840 /* copylen must be <= 4kB. */
841 copylen = sizeof(item);
842 newval_final = offset + 1;
843 targetptr_final = &buffer->c[cpu].offset;
845 ret = rseq_cmpeqv_trymemcpy_storev_release(
846 targetptr_final, offset,
847 destptr, srcptr, copylen,
850 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
851 offset, destptr, srcptr, copylen,
853 if (rseq_likely(!ret)) {
857 /* Retry if comparison fails or rseq aborts. */
864 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
865 struct percpu_memcpy_buffer_node *item,
872 intptr_t *targetptr_final, newval_final, offset;
873 char *destptr, *srcptr;
877 cpu = rseq_cpu_start();
878 /* Load offset with single-copy atomicity. */
879 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
882 destptr = (char *)item;
883 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
884 /* copylen must be <= 4kB. */
885 copylen = sizeof(*item);
886 newval_final = offset - 1;
887 targetptr_final = &buffer->c[cpu].offset;
888 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
889 offset, destptr, srcptr, copylen,
891 if (rseq_likely(!ret)) {
895 /* Retry if comparison fails or rseq aborts. */
903 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
904 * only be used on buffers that are not concurrently modified.
906 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
907 struct percpu_memcpy_buffer_node *item,
912 offset = buffer->c[cpu].offset;
915 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
916 buffer->c[cpu].offset = offset - 1;
920 void *test_percpu_memcpy_buffer_thread(void *arg)
923 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
925 if (!opt_disable_rseq && rseq_register_current_thread())
929 for (i = 0; i < reps; i++) {
930 struct percpu_memcpy_buffer_node item;
933 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
935 sched_yield(); /* encourage shuffling */
937 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
938 /* Should increase buffer size. */
944 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
945 (int) gettid(), nr_abort, signals_delivered);
946 if (!opt_disable_rseq && rseq_unregister_current_thread())
952 /* Simultaneous modification to a per-cpu buffer from many threads. */
953 void test_percpu_memcpy_buffer(void)
955 const int num_threads = opt_threads;
957 uint64_t sum = 0, expected_sum = 0;
958 struct percpu_memcpy_buffer buffer;
959 pthread_t test_threads[num_threads];
960 cpu_set_t allowed_cpus;
962 memset(&buffer, 0, sizeof(buffer));
964 /* Generate list entries for every usable cpu. */
965 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
966 for (i = 0; i < CPU_SETSIZE; i++) {
967 if (!CPU_ISSET(i, &allowed_cpus))
969 /* Worse-case is every item in same CPU. */
971 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
972 MEMCPY_BUFFER_ITEM_PER_CPU);
973 assert(buffer.c[i].array);
974 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
975 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
976 expected_sum += 2 * j + 1;
979 * We could theoretically put the word-sized
980 * "data" directly in the buffer. However, we
981 * want to model objects that would not fit
982 * within a single word, so allocate an object
985 buffer.c[i].array[j - 1].data1 = j;
986 buffer.c[i].array[j - 1].data2 = j + 1;
987 buffer.c[i].offset++;
991 for (i = 0; i < num_threads; i++) {
992 ret = pthread_create(&test_threads[i], NULL,
993 test_percpu_memcpy_buffer_thread,
997 perror("pthread_create");
1002 for (i = 0; i < num_threads; i++) {
1003 ret = pthread_join(test_threads[i], NULL);
1006 perror("pthread_join");
1011 for (i = 0; i < CPU_SETSIZE; i++) {
1012 struct percpu_memcpy_buffer_node item;
1014 if (!CPU_ISSET(i, &allowed_cpus))
1017 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1021 free(buffer.c[i].array);
1025 * All entries should now be accounted for (unless some external
1026 * actor is interfering with our allowed affinity while this
1029 assert(sum == expected_sum);
1032 static void test_signal_interrupt_handler(int signo)
1034 signals_delivered++;
1037 static int set_signal_handler(void)
1040 struct sigaction sa;
1043 ret = sigemptyset(&sigset);
1045 perror("sigemptyset");
1049 sa.sa_handler = test_signal_interrupt_handler;
1050 sa.sa_mask = sigset;
1052 ret = sigaction(SIGUSR1, &sa, NULL);
1054 perror("sigaction");
1058 printf_verbose("Signal handler set for SIGUSR1\n");
1063 static void show_usage(int argc, char **argv)
1065 printf("Usage : %s <OPTIONS>\n",
1067 printf("OPTIONS:\n");
1068 printf(" [-1 loops] Number of loops for delay injection 1\n");
1069 printf(" [-2 loops] Number of loops for delay injection 2\n");
1070 printf(" [-3 loops] Number of loops for delay injection 3\n");
1071 printf(" [-4 loops] Number of loops for delay injection 4\n");
1072 printf(" [-5 loops] Number of loops for delay injection 5\n");
1073 printf(" [-6 loops] Number of loops for delay injection 6\n");
1074 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1075 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1076 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1077 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1078 printf(" [-y] Yield\n");
1079 printf(" [-k] Kill thread with signal\n");
1080 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1081 printf(" [-t N] Number of threads (default 200)\n");
1082 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1083 printf(" [-d] Disable rseq system call (no initialization)\n");
1084 printf(" [-D M] Disable rseq for each M threads\n");
1085 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1086 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1087 printf(" [-v] Verbose output.\n");
1088 printf(" [-h] Show this help.\n");
1092 int main(int argc, char **argv)
1096 for (i = 1; i < argc; i++) {
1097 if (argv[i][0] != '-')
1099 switch (argv[i][1]) {
1110 show_usage(argc, argv);
1113 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1118 show_usage(argc, argv);
1121 opt_modulo = atol(argv[i + 1]);
1122 if (opt_modulo < 0) {
1123 show_usage(argc, argv);
1130 show_usage(argc, argv);
1133 opt_sleep = atol(argv[i + 1]);
1134 if (opt_sleep < 0) {
1135 show_usage(argc, argv);
1147 opt_disable_rseq = 1;
1151 show_usage(argc, argv);
1154 opt_disable_mod = atol(argv[i + 1]);
1155 if (opt_disable_mod < 0) {
1156 show_usage(argc, argv);
1163 show_usage(argc, argv);
1166 opt_threads = atol(argv[i + 1]);
1167 if (opt_threads < 0) {
1168 show_usage(argc, argv);
1175 show_usage(argc, argv);
1178 opt_reps = atoll(argv[i + 1]);
1180 show_usage(argc, argv);
1186 show_usage(argc, argv);
1190 show_usage(argc, argv);
1193 opt_test = *argv[i + 1];
1202 show_usage(argc, argv);
1214 show_usage(argc, argv);
1219 loop_cnt_1 = loop_cnt[1];
1220 loop_cnt_2 = loop_cnt[2];
1221 loop_cnt_3 = loop_cnt[3];
1222 loop_cnt_4 = loop_cnt[4];
1223 loop_cnt_5 = loop_cnt[5];
1224 loop_cnt_6 = loop_cnt[6];
1226 if (set_signal_handler())
1229 if (!opt_disable_rseq && rseq_register_current_thread())
1233 printf_verbose("spinlock\n");
1234 test_percpu_spinlock();
1237 printf_verbose("linked list\n");
1241 printf_verbose("buffer\n");
1242 test_percpu_buffer();
1245 printf_verbose("memcpy buffer\n");
1246 test_percpu_memcpy_buffer();
1249 printf_verbose("counter increment\n");
1253 if (!opt_disable_rseq && rseq_unregister_current_thread())