Merge tag 'kvm-x86-misc-6.9' of https://github.com/kvm-x86/linux into HEAD
[linux-2.6-block.git] / tools / testing / selftests / rseq / param_test.c
1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <linux/membarrier.h>
5 #include <pthread.h>
6 #include <sched.h>
7 #include <stdatomic.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19 #include <stdbool.h>
20
21 static inline pid_t rseq_gettid(void)
22 {
23         return syscall(__NR_gettid);
24 }
25
26 #define NR_INJECT       9
27 static int loop_cnt[NR_INJECT + 1];
28
29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
35
36 static int opt_modulo, verbose;
37
38 static int opt_yield, opt_signal, opt_sleep,
39                 opt_disable_rseq, opt_threads = 200,
40                 opt_disable_mod = 0, opt_test = 's';
41
42 static long long opt_reps = 5000;
43
44 static __thread __attribute__((tls_model("initial-exec")))
45 unsigned int signals_delivered;
46
47 #ifndef BENCHMARK
48
49 static __thread __attribute__((tls_model("initial-exec"), unused))
50 unsigned int yield_mod_cnt, nr_abort;
51
52 #define printf_verbose(fmt, ...)                        \
53         do {                                            \
54                 if (verbose)                            \
55                         printf(fmt, ## __VA_ARGS__);    \
56         } while (0)
57
58 #ifdef __i386__
59
60 #define INJECT_ASM_REG  "eax"
61
62 #define RSEQ_INJECT_CLOBBER \
63         , INJECT_ASM_REG
64
65 #define RSEQ_INJECT_ASM(n) \
66         "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
67         "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
68         "jz 333f\n\t" \
69         "222:\n\t" \
70         "dec %%" INJECT_ASM_REG "\n\t" \
71         "jnz 222b\n\t" \
72         "333:\n\t"
73
74 #elif defined(__x86_64__)
75
76 #define INJECT_ASM_REG_P        "rax"
77 #define INJECT_ASM_REG          "eax"
78
79 #define RSEQ_INJECT_CLOBBER \
80         , INJECT_ASM_REG_P \
81         , INJECT_ASM_REG
82
83 #define RSEQ_INJECT_ASM(n) \
84         "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
85         "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
86         "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
87         "jz 333f\n\t" \
88         "222:\n\t" \
89         "dec %%" INJECT_ASM_REG "\n\t" \
90         "jnz 222b\n\t" \
91         "333:\n\t"
92
93 #elif defined(__s390__)
94
95 #define RSEQ_INJECT_INPUT \
96         , [loop_cnt_1]"m"(loop_cnt[1]) \
97         , [loop_cnt_2]"m"(loop_cnt[2]) \
98         , [loop_cnt_3]"m"(loop_cnt[3]) \
99         , [loop_cnt_4]"m"(loop_cnt[4]) \
100         , [loop_cnt_5]"m"(loop_cnt[5]) \
101         , [loop_cnt_6]"m"(loop_cnt[6])
102
103 #define INJECT_ASM_REG  "r12"
104
105 #define RSEQ_INJECT_CLOBBER \
106         , INJECT_ASM_REG
107
108 #define RSEQ_INJECT_ASM(n) \
109         "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110         "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
111         "je 333f\n\t" \
112         "222:\n\t" \
113         "ahi %%" INJECT_ASM_REG ", -1\n\t" \
114         "jnz 222b\n\t" \
115         "333:\n\t"
116
117 #elif defined(__ARMEL__)
118
119 #define RSEQ_INJECT_INPUT \
120         , [loop_cnt_1]"m"(loop_cnt[1]) \
121         , [loop_cnt_2]"m"(loop_cnt[2]) \
122         , [loop_cnt_3]"m"(loop_cnt[3]) \
123         , [loop_cnt_4]"m"(loop_cnt[4]) \
124         , [loop_cnt_5]"m"(loop_cnt[5]) \
125         , [loop_cnt_6]"m"(loop_cnt[6])
126
127 #define INJECT_ASM_REG  "r4"
128
129 #define RSEQ_INJECT_CLOBBER \
130         , INJECT_ASM_REG
131
132 #define RSEQ_INJECT_ASM(n) \
133         "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134         "cmp " INJECT_ASM_REG ", #0\n\t" \
135         "beq 333f\n\t" \
136         "222:\n\t" \
137         "subs " INJECT_ASM_REG ", #1\n\t" \
138         "bne 222b\n\t" \
139         "333:\n\t"
140
141 #elif defined(__AARCH64EL__)
142
143 #define RSEQ_INJECT_INPUT \
144         , [loop_cnt_1] "Qo" (loop_cnt[1]) \
145         , [loop_cnt_2] "Qo" (loop_cnt[2]) \
146         , [loop_cnt_3] "Qo" (loop_cnt[3]) \
147         , [loop_cnt_4] "Qo" (loop_cnt[4]) \
148         , [loop_cnt_5] "Qo" (loop_cnt[5]) \
149         , [loop_cnt_6] "Qo" (loop_cnt[6])
150
151 #define INJECT_ASM_REG  RSEQ_ASM_TMP_REG32
152
153 #define RSEQ_INJECT_ASM(n) \
154         "       ldr     " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"       \
155         "       cbz     " INJECT_ASM_REG ", 333f\n"                     \
156         "222:\n"                                                        \
157         "       sub     " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"   \
158         "       cbnz    " INJECT_ASM_REG ", 222b\n"                     \
159         "333:\n"
160
161 #elif defined(__PPC__)
162
163 #define RSEQ_INJECT_INPUT \
164         , [loop_cnt_1]"m"(loop_cnt[1]) \
165         , [loop_cnt_2]"m"(loop_cnt[2]) \
166         , [loop_cnt_3]"m"(loop_cnt[3]) \
167         , [loop_cnt_4]"m"(loop_cnt[4]) \
168         , [loop_cnt_5]"m"(loop_cnt[5]) \
169         , [loop_cnt_6]"m"(loop_cnt[6])
170
171 #define INJECT_ASM_REG  "r18"
172
173 #define RSEQ_INJECT_CLOBBER \
174         , INJECT_ASM_REG
175
176 #define RSEQ_INJECT_ASM(n) \
177         "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
178         "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
179         "beq 333f\n\t" \
180         "222:\n\t" \
181         "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
182         "bne 222b\n\t" \
183         "333:\n\t"
184
185 #elif defined(__mips__)
186
187 #define RSEQ_INJECT_INPUT \
188         , [loop_cnt_1]"m"(loop_cnt[1]) \
189         , [loop_cnt_2]"m"(loop_cnt[2]) \
190         , [loop_cnt_3]"m"(loop_cnt[3]) \
191         , [loop_cnt_4]"m"(loop_cnt[4]) \
192         , [loop_cnt_5]"m"(loop_cnt[5]) \
193         , [loop_cnt_6]"m"(loop_cnt[6])
194
195 #define INJECT_ASM_REG  "$5"
196
197 #define RSEQ_INJECT_CLOBBER \
198         , INJECT_ASM_REG
199
200 #define RSEQ_INJECT_ASM(n) \
201         "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202         "beqz " INJECT_ASM_REG ", 333f\n\t" \
203         "222:\n\t" \
204         "addiu " INJECT_ASM_REG ", -1\n\t" \
205         "bnez " INJECT_ASM_REG ", 222b\n\t" \
206         "333:\n\t"
207 #elif defined(__riscv)
208
209 #define RSEQ_INJECT_INPUT \
210         , [loop_cnt_1]"m"(loop_cnt[1]) \
211         , [loop_cnt_2]"m"(loop_cnt[2]) \
212         , [loop_cnt_3]"m"(loop_cnt[3]) \
213         , [loop_cnt_4]"m"(loop_cnt[4]) \
214         , [loop_cnt_5]"m"(loop_cnt[5]) \
215         , [loop_cnt_6]"m"(loop_cnt[6])
216
217 #define INJECT_ASM_REG  "t1"
218
219 #define RSEQ_INJECT_CLOBBER \
220         , INJECT_ASM_REG
221
222 #define RSEQ_INJECT_ASM(n)                                      \
223         "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"         \
224         "beqz " INJECT_ASM_REG ", 333f\n\t"                     \
225         "222:\n\t"                                              \
226         "addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"   \
227         "bnez " INJECT_ASM_REG ", 222b\n\t"                     \
228         "333:\n\t"
229
230
231 #else
232 #error unsupported target
233 #endif
234
235 #define RSEQ_INJECT_FAILED \
236         nr_abort++;
237
238 #define RSEQ_INJECT_C(n) \
239 { \
240         int loc_i, loc_nr_loops = loop_cnt[n]; \
241         \
242         for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
243                 rseq_barrier(); \
244         } \
245         if (loc_nr_loops == -1 && opt_modulo) { \
246                 if (yield_mod_cnt == opt_modulo - 1) { \
247                         if (opt_sleep > 0) \
248                                 poll(NULL, 0, opt_sleep); \
249                         if (opt_yield) \
250                                 sched_yield(); \
251                         if (opt_signal) \
252                                 raise(SIGUSR1); \
253                         yield_mod_cnt = 0; \
254                 } else { \
255                         yield_mod_cnt++; \
256                 } \
257         } \
258 }
259
260 #else
261
262 #define printf_verbose(fmt, ...)
263
264 #endif /* BENCHMARK */
265
266 #include "rseq.h"
267
268 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
269
270 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
271 #define TEST_MEMBARRIER
272
273 static int sys_membarrier(int cmd, int flags, int cpu_id)
274 {
275         return syscall(__NR_membarrier, cmd, flags, cpu_id);
276 }
277 #endif
278
279 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
280 # define RSEQ_PERCPU    RSEQ_PERCPU_MM_CID
281 static
282 int get_current_cpu_id(void)
283 {
284         return rseq_current_mm_cid();
285 }
286 static
287 bool rseq_validate_cpu_id(void)
288 {
289         return rseq_mm_cid_available();
290 }
291 static
292 bool rseq_use_cpu_index(void)
293 {
294         return false;   /* Use mm_cid */
295 }
296 # ifdef TEST_MEMBARRIER
297 /*
298  * Membarrier does not currently support targeting a mm_cid, so
299  * issue the barrier on all cpus.
300  */
301 static
302 int rseq_membarrier_expedited(int cpu)
303 {
304         return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
305                               0, 0);
306 }
307 # endif /* TEST_MEMBARRIER */
308 #else
309 # define RSEQ_PERCPU    RSEQ_PERCPU_CPU_ID
310 static
311 int get_current_cpu_id(void)
312 {
313         return rseq_cpu_start();
314 }
315 static
316 bool rseq_validate_cpu_id(void)
317 {
318         return rseq_current_cpu_raw() >= 0;
319 }
320 static
321 bool rseq_use_cpu_index(void)
322 {
323         return true;    /* Use cpu_id as index. */
324 }
325 # ifdef TEST_MEMBARRIER
326 static
327 int rseq_membarrier_expedited(int cpu)
328 {
329         return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
330                               MEMBARRIER_CMD_FLAG_CPU, cpu);
331 }
332 # endif /* TEST_MEMBARRIER */
333 #endif
334
335 struct percpu_lock_entry {
336         intptr_t v;
337 } __attribute__((aligned(128)));
338
339 struct percpu_lock {
340         struct percpu_lock_entry c[CPU_SETSIZE];
341 };
342
343 struct test_data_entry {
344         intptr_t count;
345 } __attribute__((aligned(128)));
346
347 struct spinlock_test_data {
348         struct percpu_lock lock;
349         struct test_data_entry c[CPU_SETSIZE];
350 };
351
352 struct spinlock_thread_test_data {
353         struct spinlock_test_data *data;
354         long long reps;
355         int reg;
356 };
357
358 struct inc_test_data {
359         struct test_data_entry c[CPU_SETSIZE];
360 };
361
362 struct inc_thread_test_data {
363         struct inc_test_data *data;
364         long long reps;
365         int reg;
366 };
367
368 struct percpu_list_node {
369         intptr_t data;
370         struct percpu_list_node *next;
371 };
372
373 struct percpu_list_entry {
374         struct percpu_list_node *head;
375 } __attribute__((aligned(128)));
376
377 struct percpu_list {
378         struct percpu_list_entry c[CPU_SETSIZE];
379 };
380
381 #define BUFFER_ITEM_PER_CPU     100
382
383 struct percpu_buffer_node {
384         intptr_t data;
385 };
386
387 struct percpu_buffer_entry {
388         intptr_t offset;
389         intptr_t buflen;
390         struct percpu_buffer_node **array;
391 } __attribute__((aligned(128)));
392
393 struct percpu_buffer {
394         struct percpu_buffer_entry c[CPU_SETSIZE];
395 };
396
397 #define MEMCPY_BUFFER_ITEM_PER_CPU      100
398
399 struct percpu_memcpy_buffer_node {
400         intptr_t data1;
401         uint64_t data2;
402 };
403
404 struct percpu_memcpy_buffer_entry {
405         intptr_t offset;
406         intptr_t buflen;
407         struct percpu_memcpy_buffer_node *array;
408 } __attribute__((aligned(128)));
409
410 struct percpu_memcpy_buffer {
411         struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
412 };
413
414 /* A simple percpu spinlock. Grabs lock on current cpu. */
415 static int rseq_this_cpu_lock(struct percpu_lock *lock)
416 {
417         int cpu;
418
419         for (;;) {
420                 int ret;
421
422                 cpu = get_current_cpu_id();
423                 if (cpu < 0) {
424                         fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
425                                         getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
426                         abort();
427                 }
428                 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
429                                          &lock->c[cpu].v,
430                                          0, 1, cpu);
431                 if (rseq_likely(!ret))
432                         break;
433                 /* Retry if comparison fails or rseq aborts. */
434         }
435         /*
436          * Acquire semantic when taking lock after control dependency.
437          * Matches rseq_smp_store_release().
438          */
439         rseq_smp_acquire__after_ctrl_dep();
440         return cpu;
441 }
442
443 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
444 {
445         assert(lock->c[cpu].v == 1);
446         /*
447          * Release lock, with release semantic. Matches
448          * rseq_smp_acquire__after_ctrl_dep().
449          */
450         rseq_smp_store_release(&lock->c[cpu].v, 0);
451 }
452
453 void *test_percpu_spinlock_thread(void *arg)
454 {
455         struct spinlock_thread_test_data *thread_data = arg;
456         struct spinlock_test_data *data = thread_data->data;
457         long long i, reps;
458
459         if (!opt_disable_rseq && thread_data->reg &&
460             rseq_register_current_thread())
461                 abort();
462         reps = thread_data->reps;
463         for (i = 0; i < reps; i++) {
464                 int cpu = rseq_this_cpu_lock(&data->lock);
465                 data->c[cpu].count++;
466                 rseq_percpu_unlock(&data->lock, cpu);
467 #ifndef BENCHMARK
468                 if (i != 0 && !(i % (reps / 10)))
469                         printf_verbose("tid %d: count %lld\n",
470                                        (int) rseq_gettid(), i);
471 #endif
472         }
473         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
474                        (int) rseq_gettid(), nr_abort, signals_delivered);
475         if (!opt_disable_rseq && thread_data->reg &&
476             rseq_unregister_current_thread())
477                 abort();
478         return NULL;
479 }
480
481 /*
482  * A simple test which implements a sharded counter using a per-cpu
483  * lock.  Obviously real applications might prefer to simply use a
484  * per-cpu increment; however, this is reasonable for a test and the
485  * lock can be extended to synchronize more complicated operations.
486  */
487 void test_percpu_spinlock(void)
488 {
489         const int num_threads = opt_threads;
490         int i, ret;
491         uint64_t sum;
492         pthread_t test_threads[num_threads];
493         struct spinlock_test_data data;
494         struct spinlock_thread_test_data thread_data[num_threads];
495
496         memset(&data, 0, sizeof(data));
497         for (i = 0; i < num_threads; i++) {
498                 thread_data[i].reps = opt_reps;
499                 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
500                         thread_data[i].reg = 1;
501                 else
502                         thread_data[i].reg = 0;
503                 thread_data[i].data = &data;
504                 ret = pthread_create(&test_threads[i], NULL,
505                                      test_percpu_spinlock_thread,
506                                      &thread_data[i]);
507                 if (ret) {
508                         errno = ret;
509                         perror("pthread_create");
510                         abort();
511                 }
512         }
513
514         for (i = 0; i < num_threads; i++) {
515                 ret = pthread_join(test_threads[i], NULL);
516                 if (ret) {
517                         errno = ret;
518                         perror("pthread_join");
519                         abort();
520                 }
521         }
522
523         sum = 0;
524         for (i = 0; i < CPU_SETSIZE; i++)
525                 sum += data.c[i].count;
526
527         assert(sum == (uint64_t)opt_reps * num_threads);
528 }
529
530 void *test_percpu_inc_thread(void *arg)
531 {
532         struct inc_thread_test_data *thread_data = arg;
533         struct inc_test_data *data = thread_data->data;
534         long long i, reps;
535
536         if (!opt_disable_rseq && thread_data->reg &&
537             rseq_register_current_thread())
538                 abort();
539         reps = thread_data->reps;
540         for (i = 0; i < reps; i++) {
541                 int ret;
542
543                 do {
544                         int cpu;
545
546                         cpu = get_current_cpu_id();
547                         ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
548                                         &data->c[cpu].count, 1, cpu);
549                 } while (rseq_unlikely(ret));
550 #ifndef BENCHMARK
551                 if (i != 0 && !(i % (reps / 10)))
552                         printf_verbose("tid %d: count %lld\n",
553                                        (int) rseq_gettid(), i);
554 #endif
555         }
556         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
557                        (int) rseq_gettid(), nr_abort, signals_delivered);
558         if (!opt_disable_rseq && thread_data->reg &&
559             rseq_unregister_current_thread())
560                 abort();
561         return NULL;
562 }
563
564 void test_percpu_inc(void)
565 {
566         const int num_threads = opt_threads;
567         int i, ret;
568         uint64_t sum;
569         pthread_t test_threads[num_threads];
570         struct inc_test_data data;
571         struct inc_thread_test_data thread_data[num_threads];
572
573         memset(&data, 0, sizeof(data));
574         for (i = 0; i < num_threads; i++) {
575                 thread_data[i].reps = opt_reps;
576                 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
577                         thread_data[i].reg = 1;
578                 else
579                         thread_data[i].reg = 0;
580                 thread_data[i].data = &data;
581                 ret = pthread_create(&test_threads[i], NULL,
582                                      test_percpu_inc_thread,
583                                      &thread_data[i]);
584                 if (ret) {
585                         errno = ret;
586                         perror("pthread_create");
587                         abort();
588                 }
589         }
590
591         for (i = 0; i < num_threads; i++) {
592                 ret = pthread_join(test_threads[i], NULL);
593                 if (ret) {
594                         errno = ret;
595                         perror("pthread_join");
596                         abort();
597                 }
598         }
599
600         sum = 0;
601         for (i = 0; i < CPU_SETSIZE; i++)
602                 sum += data.c[i].count;
603
604         assert(sum == (uint64_t)opt_reps * num_threads);
605 }
606
607 void this_cpu_list_push(struct percpu_list *list,
608                         struct percpu_list_node *node,
609                         int *_cpu)
610 {
611         int cpu;
612
613         for (;;) {
614                 intptr_t *targetptr, newval, expect;
615                 int ret;
616
617                 cpu = get_current_cpu_id();
618                 /* Load list->c[cpu].head with single-copy atomicity. */
619                 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
620                 newval = (intptr_t)node;
621                 targetptr = (intptr_t *)&list->c[cpu].head;
622                 node->next = (struct percpu_list_node *)expect;
623                 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
624                                          targetptr, expect, newval, cpu);
625                 if (rseq_likely(!ret))
626                         break;
627                 /* Retry if comparison fails or rseq aborts. */
628         }
629         if (_cpu)
630                 *_cpu = cpu;
631 }
632
633 /*
634  * Unlike a traditional lock-less linked list; the availability of a
635  * rseq primitive allows us to implement pop without concerns over
636  * ABA-type races.
637  */
638 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
639                                            int *_cpu)
640 {
641         struct percpu_list_node *node = NULL;
642         int cpu;
643
644         for (;;) {
645                 struct percpu_list_node *head;
646                 intptr_t *targetptr, expectnot, *load;
647                 long offset;
648                 int ret;
649
650                 cpu = get_current_cpu_id();
651                 targetptr = (intptr_t *)&list->c[cpu].head;
652                 expectnot = (intptr_t)NULL;
653                 offset = offsetof(struct percpu_list_node, next);
654                 load = (intptr_t *)&head;
655                 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
656                                                  targetptr, expectnot,
657                                                  offset, load, cpu);
658                 if (rseq_likely(!ret)) {
659                         node = head;
660                         break;
661                 }
662                 if (ret > 0)
663                         break;
664                 /* Retry if rseq aborts. */
665         }
666         if (_cpu)
667                 *_cpu = cpu;
668         return node;
669 }
670
671 /*
672  * __percpu_list_pop is not safe against concurrent accesses. Should
673  * only be used on lists that are not concurrently modified.
674  */
675 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
676 {
677         struct percpu_list_node *node;
678
679         node = list->c[cpu].head;
680         if (!node)
681                 return NULL;
682         list->c[cpu].head = node->next;
683         return node;
684 }
685
686 void *test_percpu_list_thread(void *arg)
687 {
688         long long i, reps;
689         struct percpu_list *list = (struct percpu_list *)arg;
690
691         if (!opt_disable_rseq && rseq_register_current_thread())
692                 abort();
693
694         reps = opt_reps;
695         for (i = 0; i < reps; i++) {
696                 struct percpu_list_node *node;
697
698                 node = this_cpu_list_pop(list, NULL);
699                 if (opt_yield)
700                         sched_yield();  /* encourage shuffling */
701                 if (node)
702                         this_cpu_list_push(list, node, NULL);
703         }
704
705         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
706                        (int) rseq_gettid(), nr_abort, signals_delivered);
707         if (!opt_disable_rseq && rseq_unregister_current_thread())
708                 abort();
709
710         return NULL;
711 }
712
713 /* Simultaneous modification to a per-cpu linked list from many threads.  */
714 void test_percpu_list(void)
715 {
716         const int num_threads = opt_threads;
717         int i, j, ret;
718         uint64_t sum = 0, expected_sum = 0;
719         struct percpu_list list;
720         pthread_t test_threads[num_threads];
721         cpu_set_t allowed_cpus;
722
723         memset(&list, 0, sizeof(list));
724
725         /* Generate list entries for every usable cpu. */
726         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
727         for (i = 0; i < CPU_SETSIZE; i++) {
728                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
729                         continue;
730                 for (j = 1; j <= 100; j++) {
731                         struct percpu_list_node *node;
732
733                         expected_sum += j;
734
735                         node = malloc(sizeof(*node));
736                         assert(node);
737                         node->data = j;
738                         node->next = list.c[i].head;
739                         list.c[i].head = node;
740                 }
741         }
742
743         for (i = 0; i < num_threads; i++) {
744                 ret = pthread_create(&test_threads[i], NULL,
745                                      test_percpu_list_thread, &list);
746                 if (ret) {
747                         errno = ret;
748                         perror("pthread_create");
749                         abort();
750                 }
751         }
752
753         for (i = 0; i < num_threads; i++) {
754                 ret = pthread_join(test_threads[i], NULL);
755                 if (ret) {
756                         errno = ret;
757                         perror("pthread_join");
758                         abort();
759                 }
760         }
761
762         for (i = 0; i < CPU_SETSIZE; i++) {
763                 struct percpu_list_node *node;
764
765                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
766                         continue;
767
768                 while ((node = __percpu_list_pop(&list, i))) {
769                         sum += node->data;
770                         free(node);
771                 }
772         }
773
774         /*
775          * All entries should now be accounted for (unless some external
776          * actor is interfering with our allowed affinity while this
777          * test is running).
778          */
779         assert(sum == expected_sum);
780 }
781
782 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
783                           struct percpu_buffer_node *node,
784                           int *_cpu)
785 {
786         bool result = false;
787         int cpu;
788
789         for (;;) {
790                 intptr_t *targetptr_spec, newval_spec;
791                 intptr_t *targetptr_final, newval_final;
792                 intptr_t offset;
793                 int ret;
794
795                 cpu = get_current_cpu_id();
796                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
797                 if (offset == buffer->c[cpu].buflen)
798                         break;
799                 newval_spec = (intptr_t)node;
800                 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
801                 newval_final = offset + 1;
802                 targetptr_final = &buffer->c[cpu].offset;
803                 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
804                         targetptr_final, offset, targetptr_spec,
805                         newval_spec, newval_final, cpu);
806                 if (rseq_likely(!ret)) {
807                         result = true;
808                         break;
809                 }
810                 /* Retry if comparison fails or rseq aborts. */
811         }
812         if (_cpu)
813                 *_cpu = cpu;
814         return result;
815 }
816
817 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
818                                                int *_cpu)
819 {
820         struct percpu_buffer_node *head;
821         int cpu;
822
823         for (;;) {
824                 intptr_t *targetptr, newval;
825                 intptr_t offset;
826                 int ret;
827
828                 cpu = get_current_cpu_id();
829                 /* Load offset with single-copy atomicity. */
830                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
831                 if (offset == 0) {
832                         head = NULL;
833                         break;
834                 }
835                 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
836                 newval = offset - 1;
837                 targetptr = (intptr_t *)&buffer->c[cpu].offset;
838                 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
839                         targetptr, offset,
840                         (intptr_t *)&buffer->c[cpu].array[offset - 1],
841                         (intptr_t)head, newval, cpu);
842                 if (rseq_likely(!ret))
843                         break;
844                 /* Retry if comparison fails or rseq aborts. */
845         }
846         if (_cpu)
847                 *_cpu = cpu;
848         return head;
849 }
850
851 /*
852  * __percpu_buffer_pop is not safe against concurrent accesses. Should
853  * only be used on buffers that are not concurrently modified.
854  */
855 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
856                                                int cpu)
857 {
858         struct percpu_buffer_node *head;
859         intptr_t offset;
860
861         offset = buffer->c[cpu].offset;
862         if (offset == 0)
863                 return NULL;
864         head = buffer->c[cpu].array[offset - 1];
865         buffer->c[cpu].offset = offset - 1;
866         return head;
867 }
868
869 void *test_percpu_buffer_thread(void *arg)
870 {
871         long long i, reps;
872         struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
873
874         if (!opt_disable_rseq && rseq_register_current_thread())
875                 abort();
876
877         reps = opt_reps;
878         for (i = 0; i < reps; i++) {
879                 struct percpu_buffer_node *node;
880
881                 node = this_cpu_buffer_pop(buffer, NULL);
882                 if (opt_yield)
883                         sched_yield();  /* encourage shuffling */
884                 if (node) {
885                         if (!this_cpu_buffer_push(buffer, node, NULL)) {
886                                 /* Should increase buffer size. */
887                                 abort();
888                         }
889                 }
890         }
891
892         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
893                        (int) rseq_gettid(), nr_abort, signals_delivered);
894         if (!opt_disable_rseq && rseq_unregister_current_thread())
895                 abort();
896
897         return NULL;
898 }
899
900 /* Simultaneous modification to a per-cpu buffer from many threads.  */
901 void test_percpu_buffer(void)
902 {
903         const int num_threads = opt_threads;
904         int i, j, ret;
905         uint64_t sum = 0, expected_sum = 0;
906         struct percpu_buffer buffer;
907         pthread_t test_threads[num_threads];
908         cpu_set_t allowed_cpus;
909
910         memset(&buffer, 0, sizeof(buffer));
911
912         /* Generate list entries for every usable cpu. */
913         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
914         for (i = 0; i < CPU_SETSIZE; i++) {
915                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
916                         continue;
917                 /* Worse-case is every item in same CPU. */
918                 buffer.c[i].array =
919                         malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
920                                BUFFER_ITEM_PER_CPU);
921                 assert(buffer.c[i].array);
922                 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
923                 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
924                         struct percpu_buffer_node *node;
925
926                         expected_sum += j;
927
928                         /*
929                          * We could theoretically put the word-sized
930                          * "data" directly in the buffer. However, we
931                          * want to model objects that would not fit
932                          * within a single word, so allocate an object
933                          * for each node.
934                          */
935                         node = malloc(sizeof(*node));
936                         assert(node);
937                         node->data = j;
938                         buffer.c[i].array[j - 1] = node;
939                         buffer.c[i].offset++;
940                 }
941         }
942
943         for (i = 0; i < num_threads; i++) {
944                 ret = pthread_create(&test_threads[i], NULL,
945                                      test_percpu_buffer_thread, &buffer);
946                 if (ret) {
947                         errno = ret;
948                         perror("pthread_create");
949                         abort();
950                 }
951         }
952
953         for (i = 0; i < num_threads; i++) {
954                 ret = pthread_join(test_threads[i], NULL);
955                 if (ret) {
956                         errno = ret;
957                         perror("pthread_join");
958                         abort();
959                 }
960         }
961
962         for (i = 0; i < CPU_SETSIZE; i++) {
963                 struct percpu_buffer_node *node;
964
965                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
966                         continue;
967
968                 while ((node = __percpu_buffer_pop(&buffer, i))) {
969                         sum += node->data;
970                         free(node);
971                 }
972                 free(buffer.c[i].array);
973         }
974
975         /*
976          * All entries should now be accounted for (unless some external
977          * actor is interfering with our allowed affinity while this
978          * test is running).
979          */
980         assert(sum == expected_sum);
981 }
982
983 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
984                                  struct percpu_memcpy_buffer_node item,
985                                  int *_cpu)
986 {
987         bool result = false;
988         int cpu;
989
990         for (;;) {
991                 intptr_t *targetptr_final, newval_final, offset;
992                 char *destptr, *srcptr;
993                 size_t copylen;
994                 int ret;
995
996                 cpu = get_current_cpu_id();
997                 /* Load offset with single-copy atomicity. */
998                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
999                 if (offset == buffer->c[cpu].buflen)
1000                         break;
1001                 destptr = (char *)&buffer->c[cpu].array[offset];
1002                 srcptr = (char *)&item;
1003                 /* copylen must be <= 4kB. */
1004                 copylen = sizeof(item);
1005                 newval_final = offset + 1;
1006                 targetptr_final = &buffer->c[cpu].offset;
1007                 ret = rseq_cmpeqv_trymemcpy_storev(
1008                         opt_mo, RSEQ_PERCPU,
1009                         targetptr_final, offset,
1010                         destptr, srcptr, copylen,
1011                         newval_final, cpu);
1012                 if (rseq_likely(!ret)) {
1013                         result = true;
1014                         break;
1015                 }
1016                 /* Retry if comparison fails or rseq aborts. */
1017         }
1018         if (_cpu)
1019                 *_cpu = cpu;
1020         return result;
1021 }
1022
1023 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1024                                 struct percpu_memcpy_buffer_node *item,
1025                                 int *_cpu)
1026 {
1027         bool result = false;
1028         int cpu;
1029
1030         for (;;) {
1031                 intptr_t *targetptr_final, newval_final, offset;
1032                 char *destptr, *srcptr;
1033                 size_t copylen;
1034                 int ret;
1035
1036                 cpu = get_current_cpu_id();
1037                 /* Load offset with single-copy atomicity. */
1038                 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1039                 if (offset == 0)
1040                         break;
1041                 destptr = (char *)item;
1042                 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1043                 /* copylen must be <= 4kB. */
1044                 copylen = sizeof(*item);
1045                 newval_final = offset - 1;
1046                 targetptr_final = &buffer->c[cpu].offset;
1047                 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1048                         targetptr_final, offset, destptr, srcptr, copylen,
1049                         newval_final, cpu);
1050                 if (rseq_likely(!ret)) {
1051                         result = true;
1052                         break;
1053                 }
1054                 /* Retry if comparison fails or rseq aborts. */
1055         }
1056         if (_cpu)
1057                 *_cpu = cpu;
1058         return result;
1059 }
1060
1061 /*
1062  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1063  * only be used on buffers that are not concurrently modified.
1064  */
1065 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1066                                 struct percpu_memcpy_buffer_node *item,
1067                                 int cpu)
1068 {
1069         intptr_t offset;
1070
1071         offset = buffer->c[cpu].offset;
1072         if (offset == 0)
1073                 return false;
1074         memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1075         buffer->c[cpu].offset = offset - 1;
1076         return true;
1077 }
1078
1079 void *test_percpu_memcpy_buffer_thread(void *arg)
1080 {
1081         long long i, reps;
1082         struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1083
1084         if (!opt_disable_rseq && rseq_register_current_thread())
1085                 abort();
1086
1087         reps = opt_reps;
1088         for (i = 0; i < reps; i++) {
1089                 struct percpu_memcpy_buffer_node item;
1090                 bool result;
1091
1092                 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1093                 if (opt_yield)
1094                         sched_yield();  /* encourage shuffling */
1095                 if (result) {
1096                         if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1097                                 /* Should increase buffer size. */
1098                                 abort();
1099                         }
1100                 }
1101         }
1102
1103         printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1104                        (int) rseq_gettid(), nr_abort, signals_delivered);
1105         if (!opt_disable_rseq && rseq_unregister_current_thread())
1106                 abort();
1107
1108         return NULL;
1109 }
1110
1111 /* Simultaneous modification to a per-cpu buffer from many threads.  */
1112 void test_percpu_memcpy_buffer(void)
1113 {
1114         const int num_threads = opt_threads;
1115         int i, j, ret;
1116         uint64_t sum = 0, expected_sum = 0;
1117         struct percpu_memcpy_buffer buffer;
1118         pthread_t test_threads[num_threads];
1119         cpu_set_t allowed_cpus;
1120
1121         memset(&buffer, 0, sizeof(buffer));
1122
1123         /* Generate list entries for every usable cpu. */
1124         sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1125         for (i = 0; i < CPU_SETSIZE; i++) {
1126                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1127                         continue;
1128                 /* Worse-case is every item in same CPU. */
1129                 buffer.c[i].array =
1130                         malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1131                                MEMCPY_BUFFER_ITEM_PER_CPU);
1132                 assert(buffer.c[i].array);
1133                 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1134                 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1135                         expected_sum += 2 * j + 1;
1136
1137                         /*
1138                          * We could theoretically put the word-sized
1139                          * "data" directly in the buffer. However, we
1140                          * want to model objects that would not fit
1141                          * within a single word, so allocate an object
1142                          * for each node.
1143                          */
1144                         buffer.c[i].array[j - 1].data1 = j;
1145                         buffer.c[i].array[j - 1].data2 = j + 1;
1146                         buffer.c[i].offset++;
1147                 }
1148         }
1149
1150         for (i = 0; i < num_threads; i++) {
1151                 ret = pthread_create(&test_threads[i], NULL,
1152                                      test_percpu_memcpy_buffer_thread,
1153                                      &buffer);
1154                 if (ret) {
1155                         errno = ret;
1156                         perror("pthread_create");
1157                         abort();
1158                 }
1159         }
1160
1161         for (i = 0; i < num_threads; i++) {
1162                 ret = pthread_join(test_threads[i], NULL);
1163                 if (ret) {
1164                         errno = ret;
1165                         perror("pthread_join");
1166                         abort();
1167                 }
1168         }
1169
1170         for (i = 0; i < CPU_SETSIZE; i++) {
1171                 struct percpu_memcpy_buffer_node item;
1172
1173                 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1174                         continue;
1175
1176                 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1177                         sum += item.data1;
1178                         sum += item.data2;
1179                 }
1180                 free(buffer.c[i].array);
1181         }
1182
1183         /*
1184          * All entries should now be accounted for (unless some external
1185          * actor is interfering with our allowed affinity while this
1186          * test is running).
1187          */
1188         assert(sum == expected_sum);
1189 }
1190
1191 static void test_signal_interrupt_handler(int signo)
1192 {
1193         signals_delivered++;
1194 }
1195
1196 static int set_signal_handler(void)
1197 {
1198         int ret = 0;
1199         struct sigaction sa;
1200         sigset_t sigset;
1201
1202         ret = sigemptyset(&sigset);
1203         if (ret < 0) {
1204                 perror("sigemptyset");
1205                 return ret;
1206         }
1207
1208         sa.sa_handler = test_signal_interrupt_handler;
1209         sa.sa_mask = sigset;
1210         sa.sa_flags = 0;
1211         ret = sigaction(SIGUSR1, &sa, NULL);
1212         if (ret < 0) {
1213                 perror("sigaction");
1214                 return ret;
1215         }
1216
1217         printf_verbose("Signal handler set for SIGUSR1\n");
1218
1219         return ret;
1220 }
1221
1222 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1223 #ifdef TEST_MEMBARRIER
1224 struct test_membarrier_thread_args {
1225         int stop;
1226         intptr_t percpu_list_ptr;
1227 };
1228
1229 /* Worker threads modify data in their "active" percpu lists. */
1230 void *test_membarrier_worker_thread(void *arg)
1231 {
1232         struct test_membarrier_thread_args *args =
1233                 (struct test_membarrier_thread_args *)arg;
1234         const int iters = opt_reps;
1235         int i;
1236
1237         if (rseq_register_current_thread()) {
1238                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1239                         errno, strerror(errno));
1240                 abort();
1241         }
1242
1243         /* Wait for initialization. */
1244         while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
1245
1246         for (i = 0; i < iters; ++i) {
1247                 int ret;
1248
1249                 do {
1250                         int cpu = get_current_cpu_id();
1251
1252                         ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1253                                 &args->percpu_list_ptr,
1254                                 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1255                 } while (rseq_unlikely(ret));
1256         }
1257
1258         if (rseq_unregister_current_thread()) {
1259                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1260                         errno, strerror(errno));
1261                 abort();
1262         }
1263         return NULL;
1264 }
1265
1266 void test_membarrier_init_percpu_list(struct percpu_list *list)
1267 {
1268         int i;
1269
1270         memset(list, 0, sizeof(*list));
1271         for (i = 0; i < CPU_SETSIZE; i++) {
1272                 struct percpu_list_node *node;
1273
1274                 node = malloc(sizeof(*node));
1275                 assert(node);
1276                 node->data = 0;
1277                 node->next = NULL;
1278                 list->c[i].head = node;
1279         }
1280 }
1281
1282 void test_membarrier_free_percpu_list(struct percpu_list *list)
1283 {
1284         int i;
1285
1286         for (i = 0; i < CPU_SETSIZE; i++)
1287                 free(list->c[i].head);
1288 }
1289
1290 /*
1291  * The manager thread swaps per-cpu lists that worker threads see,
1292  * and validates that there are no unexpected modifications.
1293  */
1294 void *test_membarrier_manager_thread(void *arg)
1295 {
1296         struct test_membarrier_thread_args *args =
1297                 (struct test_membarrier_thread_args *)arg;
1298         struct percpu_list list_a, list_b;
1299         intptr_t expect_a = 0, expect_b = 0;
1300         int cpu_a = 0, cpu_b = 0;
1301
1302         if (rseq_register_current_thread()) {
1303                 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1304                         errno, strerror(errno));
1305                 abort();
1306         }
1307
1308         /* Init lists. */
1309         test_membarrier_init_percpu_list(&list_a);
1310         test_membarrier_init_percpu_list(&list_b);
1311
1312         __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1313
1314         while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
1315                 /* list_a is "active". */
1316                 cpu_a = rand() % CPU_SETSIZE;
1317                 /*
1318                  * As list_b is "inactive", we should never see changes
1319                  * to list_b.
1320                  */
1321                 if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
1322                         fprintf(stderr, "Membarrier test failed\n");
1323                         abort();
1324                 }
1325
1326                 /* Make list_b "active". */
1327                 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
1328                 if (rseq_membarrier_expedited(cpu_a) &&
1329                                 errno != ENXIO /* missing CPU */) {
1330                         perror("sys_membarrier");
1331                         abort();
1332                 }
1333                 /*
1334                  * Cpu A should now only modify list_b, so the values
1335                  * in list_a should be stable.
1336                  */
1337                 expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
1338
1339                 cpu_b = rand() % CPU_SETSIZE;
1340                 /*
1341                  * As list_a is "inactive", we should never see changes
1342                  * to list_a.
1343                  */
1344                 if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
1345                         fprintf(stderr, "Membarrier test failed\n");
1346                         abort();
1347                 }
1348
1349                 /* Make list_a "active". */
1350                 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
1351                 if (rseq_membarrier_expedited(cpu_b) &&
1352                                 errno != ENXIO /* missing CPU*/) {
1353                         perror("sys_membarrier");
1354                         abort();
1355                 }
1356                 /* Remember a value from list_b. */
1357                 expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
1358         }
1359
1360         test_membarrier_free_percpu_list(&list_a);
1361         test_membarrier_free_percpu_list(&list_b);
1362
1363         if (rseq_unregister_current_thread()) {
1364                 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1365                         errno, strerror(errno));
1366                 abort();
1367         }
1368         return NULL;
1369 }
1370
1371 void test_membarrier(void)
1372 {
1373         const int num_threads = opt_threads;
1374         struct test_membarrier_thread_args thread_args;
1375         pthread_t worker_threads[num_threads];
1376         pthread_t manager_thread;
1377         int i, ret;
1378
1379         if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1380                 perror("sys_membarrier");
1381                 abort();
1382         }
1383
1384         thread_args.stop = 0;
1385         thread_args.percpu_list_ptr = 0;
1386         ret = pthread_create(&manager_thread, NULL,
1387                         test_membarrier_manager_thread, &thread_args);
1388         if (ret) {
1389                 errno = ret;
1390                 perror("pthread_create");
1391                 abort();
1392         }
1393
1394         for (i = 0; i < num_threads; i++) {
1395                 ret = pthread_create(&worker_threads[i], NULL,
1396                                 test_membarrier_worker_thread, &thread_args);
1397                 if (ret) {
1398                         errno = ret;
1399                         perror("pthread_create");
1400                         abort();
1401                 }
1402         }
1403
1404
1405         for (i = 0; i < num_threads; i++) {
1406                 ret = pthread_join(worker_threads[i], NULL);
1407                 if (ret) {
1408                         errno = ret;
1409                         perror("pthread_join");
1410                         abort();
1411                 }
1412         }
1413
1414         __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
1415         ret = pthread_join(manager_thread, NULL);
1416         if (ret) {
1417                 errno = ret;
1418                 perror("pthread_join");
1419                 abort();
1420         }
1421 }
1422 #else /* TEST_MEMBARRIER */
1423 void test_membarrier(void)
1424 {
1425         fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1426                         "Skipping membarrier test.\n");
1427 }
1428 #endif
1429
1430 static void show_usage(int argc, char **argv)
1431 {
1432         printf("Usage : %s <OPTIONS>\n",
1433                 argv[0]);
1434         printf("OPTIONS:\n");
1435         printf("        [-1 loops] Number of loops for delay injection 1\n");
1436         printf("        [-2 loops] Number of loops for delay injection 2\n");
1437         printf("        [-3 loops] Number of loops for delay injection 3\n");
1438         printf("        [-4 loops] Number of loops for delay injection 4\n");
1439         printf("        [-5 loops] Number of loops for delay injection 5\n");
1440         printf("        [-6 loops] Number of loops for delay injection 6\n");
1441         printf("        [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1442         printf("        [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1443         printf("        [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1444         printf("        [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1445         printf("        [-y] Yield\n");
1446         printf("        [-k] Kill thread with signal\n");
1447         printf("        [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1448         printf("        [-t N] Number of threads (default 200)\n");
1449         printf("        [-r N] Number of repetitions per thread (default 5000)\n");
1450         printf("        [-d] Disable rseq system call (no initialization)\n");
1451         printf("        [-D M] Disable rseq for each M threads\n");
1452         printf("        [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1453         printf("        [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1454         printf("        [-v] Verbose output.\n");
1455         printf("        [-h] Show this help.\n");
1456         printf("\n");
1457 }
1458
1459 int main(int argc, char **argv)
1460 {
1461         int i;
1462
1463         for (i = 1; i < argc; i++) {
1464                 if (argv[i][0] != '-')
1465                         continue;
1466                 switch (argv[i][1]) {
1467                 case '1':
1468                 case '2':
1469                 case '3':
1470                 case '4':
1471                 case '5':
1472                 case '6':
1473                 case '7':
1474                 case '8':
1475                 case '9':
1476                         if (argc < i + 2) {
1477                                 show_usage(argc, argv);
1478                                 goto error;
1479                         }
1480                         loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1481                         i++;
1482                         break;
1483                 case 'm':
1484                         if (argc < i + 2) {
1485                                 show_usage(argc, argv);
1486                                 goto error;
1487                         }
1488                         opt_modulo = atol(argv[i + 1]);
1489                         if (opt_modulo < 0) {
1490                                 show_usage(argc, argv);
1491                                 goto error;
1492                         }
1493                         i++;
1494                         break;
1495                 case 's':
1496                         if (argc < i + 2) {
1497                                 show_usage(argc, argv);
1498                                 goto error;
1499                         }
1500                         opt_sleep = atol(argv[i + 1]);
1501                         if (opt_sleep < 0) {
1502                                 show_usage(argc, argv);
1503                                 goto error;
1504                         }
1505                         i++;
1506                         break;
1507                 case 'y':
1508                         opt_yield = 1;
1509                         break;
1510                 case 'k':
1511                         opt_signal = 1;
1512                         break;
1513                 case 'd':
1514                         opt_disable_rseq = 1;
1515                         break;
1516                 case 'D':
1517                         if (argc < i + 2) {
1518                                 show_usage(argc, argv);
1519                                 goto error;
1520                         }
1521                         opt_disable_mod = atol(argv[i + 1]);
1522                         if (opt_disable_mod < 0) {
1523                                 show_usage(argc, argv);
1524                                 goto error;
1525                         }
1526                         i++;
1527                         break;
1528                 case 't':
1529                         if (argc < i + 2) {
1530                                 show_usage(argc, argv);
1531                                 goto error;
1532                         }
1533                         opt_threads = atol(argv[i + 1]);
1534                         if (opt_threads < 0) {
1535                                 show_usage(argc, argv);
1536                                 goto error;
1537                         }
1538                         i++;
1539                         break;
1540                 case 'r':
1541                         if (argc < i + 2) {
1542                                 show_usage(argc, argv);
1543                                 goto error;
1544                         }
1545                         opt_reps = atoll(argv[i + 1]);
1546                         if (opt_reps < 0) {
1547                                 show_usage(argc, argv);
1548                                 goto error;
1549                         }
1550                         i++;
1551                         break;
1552                 case 'h':
1553                         show_usage(argc, argv);
1554                         goto end;
1555                 case 'T':
1556                         if (argc < i + 2) {
1557                                 show_usage(argc, argv);
1558                                 goto error;
1559                         }
1560                         opt_test = *argv[i + 1];
1561                         switch (opt_test) {
1562                         case 's':
1563                         case 'l':
1564                         case 'i':
1565                         case 'b':
1566                         case 'm':
1567                         case 'r':
1568                                 break;
1569                         default:
1570                                 show_usage(argc, argv);
1571                                 goto error;
1572                         }
1573                         i++;
1574                         break;
1575                 case 'v':
1576                         verbose = 1;
1577                         break;
1578                 case 'M':
1579                         opt_mo = RSEQ_MO_RELEASE;
1580                         break;
1581                 default:
1582                         show_usage(argc, argv);
1583                         goto error;
1584                 }
1585         }
1586
1587         loop_cnt_1 = loop_cnt[1];
1588         loop_cnt_2 = loop_cnt[2];
1589         loop_cnt_3 = loop_cnt[3];
1590         loop_cnt_4 = loop_cnt[4];
1591         loop_cnt_5 = loop_cnt[5];
1592         loop_cnt_6 = loop_cnt[6];
1593
1594         if (set_signal_handler())
1595                 goto error;
1596
1597         if (!opt_disable_rseq && rseq_register_current_thread())
1598                 goto error;
1599         if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1600                 fprintf(stderr, "Error: cpu id getter unavailable\n");
1601                 goto error;
1602         }
1603         switch (opt_test) {
1604         case 's':
1605                 printf_verbose("spinlock\n");
1606                 test_percpu_spinlock();
1607                 break;
1608         case 'l':
1609                 printf_verbose("linked list\n");
1610                 test_percpu_list();
1611                 break;
1612         case 'b':
1613                 printf_verbose("buffer\n");
1614                 test_percpu_buffer();
1615                 break;
1616         case 'm':
1617                 printf_verbose("memcpy buffer\n");
1618                 test_percpu_memcpy_buffer();
1619                 break;
1620         case 'i':
1621                 printf_verbose("counter increment\n");
1622                 test_percpu_inc();
1623                 break;
1624         case 'r':
1625                 printf_verbose("membarrier\n");
1626                 test_membarrier();
1627                 break;
1628         }
1629         if (!opt_disable_rseq && rseq_unregister_current_thread())
1630                 abort();
1631 end:
1632         return 0;
1633
1634 error:
1635         return -1;
1636 }