Merge branches 'acpi-scan', 'acpi-resource', 'acpi-apei', 'acpi-extlog' and 'acpi...
[sfrench/cifs-2.6.git] / arch / loongarch / kernel / perf_event.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Linux performance counter support for LoongArch.
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  *
7  * Derived from MIPS:
8  * Copyright (C) 2010 MIPS Technologies, Inc.
9  * Copyright (C) 2011 Cavium Networks, Inc.
10  * Author: Deng-Cheng Zhu
11  */
12
13 #include <linux/cpumask.h>
14 #include <linux/interrupt.h>
15 #include <linux/smp.h>
16 #include <linux/kernel.h>
17 #include <linux/perf_event.h>
18 #include <linux/uaccess.h>
19 #include <linux/sched/task_stack.h>
20
21 #include <asm/irq.h>
22 #include <asm/irq_regs.h>
23 #include <asm/stacktrace.h>
24 #include <asm/unwind.h>
25
26 /*
27  * Get the return address for a single stackframe and return a pointer to the
28  * next frame tail.
29  */
30 static unsigned long
31 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
32 {
33         unsigned long err;
34         unsigned long __user *user_frame_tail;
35         struct stack_frame buftail;
36
37         user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame));
38
39         /* Also check accessibility of one struct frame_tail beyond */
40         if (!access_ok(user_frame_tail, sizeof(buftail)))
41                 return 0;
42
43         pagefault_disable();
44         err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail));
45         pagefault_enable();
46
47         if (err || (unsigned long)user_frame_tail >= buftail.fp)
48                 return 0;
49
50         perf_callchain_store(entry, buftail.ra);
51
52         return buftail.fp;
53 }
54
55 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
56                          struct pt_regs *regs)
57 {
58         unsigned long fp;
59
60         if (perf_guest_state()) {
61                 /* We don't support guest os callchain now */
62                 return;
63         }
64
65         perf_callchain_store(entry, regs->csr_era);
66
67         fp = regs->regs[22];
68
69         while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf))
70                 fp = user_backtrace(entry, fp);
71 }
72
73 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
74                            struct pt_regs *regs)
75 {
76         struct unwind_state state;
77         unsigned long addr;
78
79         for (unwind_start(&state, current, regs);
80               !unwind_done(&state); unwind_next_frame(&state)) {
81                 addr = unwind_get_return_address(&state);
82                 if (!addr || perf_callchain_store(entry, addr))
83                         return;
84         }
85 }
86
87 #define LOONGARCH_MAX_HWEVENTS 32
88
89 struct cpu_hw_events {
90         /* Array of events on this cpu. */
91         struct perf_event       *events[LOONGARCH_MAX_HWEVENTS];
92
93         /*
94          * Set the bit (indexed by the counter number) when the counter
95          * is used for an event.
96          */
97         unsigned long           used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)];
98
99         /*
100          * Software copy of the control register for each performance counter.
101          */
102         unsigned int            saved_ctrl[LOONGARCH_MAX_HWEVENTS];
103 };
104 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
105         .saved_ctrl = {0},
106 };
107
108 /* The description of LoongArch performance events. */
109 struct loongarch_perf_event {
110         unsigned int event_id;
111 };
112
113 static struct loongarch_perf_event raw_event;
114 static DEFINE_MUTEX(raw_event_mutex);
115
116 #define C(x) PERF_COUNT_HW_CACHE_##x
117 #define HW_OP_UNSUPPORTED               0xffffffff
118 #define CACHE_OP_UNSUPPORTED            0xffffffff
119
120 #define PERF_MAP_ALL_UNSUPPORTED                                        \
121         [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED}
122
123 #define PERF_CACHE_MAP_ALL_UNSUPPORTED                                  \
124 [0 ... C(MAX) - 1] = {                                                  \
125         [0 ... C(OP_MAX) - 1] = {                                       \
126                 [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED},     \
127         },                                                              \
128 }
129
130 struct loongarch_pmu {
131         u64             max_period;
132         u64             valid_count;
133         u64             overflow;
134         const char      *name;
135         unsigned int    num_counters;
136         u64             (*read_counter)(unsigned int idx);
137         void            (*write_counter)(unsigned int idx, u64 val);
138         const struct loongarch_perf_event *(*map_raw_event)(u64 config);
139         const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
140         const struct loongarch_perf_event (*cache_event_map)
141                                 [PERF_COUNT_HW_CACHE_MAX]
142                                 [PERF_COUNT_HW_CACHE_OP_MAX]
143                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
144 };
145
146 static struct loongarch_pmu loongarch_pmu;
147
148 #define M_PERFCTL_EVENT(event)  (event & CSR_PERFCTRL_EVENT)
149
150 #define M_PERFCTL_COUNT_EVENT_WHENEVER  (CSR_PERFCTRL_PLV0 |    \
151                                         CSR_PERFCTRL_PLV1 |     \
152                                         CSR_PERFCTRL_PLV2 |     \
153                                         CSR_PERFCTRL_PLV3 |     \
154                                         CSR_PERFCTRL_IE)
155
156 #define M_PERFCTL_CONFIG_MASK           0x1f0000
157
158 static void pause_local_counters(void);
159 static void resume_local_counters(void);
160
161 static u64 loongarch_pmu_read_counter(unsigned int idx)
162 {
163         u64 val = -1;
164
165         switch (idx) {
166         case 0:
167                 val = read_csr_perfcntr0();
168                 break;
169         case 1:
170                 val = read_csr_perfcntr1();
171                 break;
172         case 2:
173                 val = read_csr_perfcntr2();
174                 break;
175         case 3:
176                 val = read_csr_perfcntr3();
177                 break;
178         default:
179                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
180                 return 0;
181         }
182
183         return val;
184 }
185
186 static void loongarch_pmu_write_counter(unsigned int idx, u64 val)
187 {
188         switch (idx) {
189         case 0:
190                 write_csr_perfcntr0(val);
191                 return;
192         case 1:
193                 write_csr_perfcntr1(val);
194                 return;
195         case 2:
196                 write_csr_perfcntr2(val);
197                 return;
198         case 3:
199                 write_csr_perfcntr3(val);
200                 return;
201         default:
202                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
203                 return;
204         }
205 }
206
207 static unsigned int loongarch_pmu_read_control(unsigned int idx)
208 {
209         unsigned int val = -1;
210
211         switch (idx) {
212         case 0:
213                 val = read_csr_perfctrl0();
214                 break;
215         case 1:
216                 val = read_csr_perfctrl1();
217                 break;
218         case 2:
219                 val = read_csr_perfctrl2();
220                 break;
221         case 3:
222                 val = read_csr_perfctrl3();
223                 break;
224         default:
225                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
226                 return 0;
227         }
228
229         return val;
230 }
231
232 static void loongarch_pmu_write_control(unsigned int idx, unsigned int val)
233 {
234         switch (idx) {
235         case 0:
236                 write_csr_perfctrl0(val);
237                 return;
238         case 1:
239                 write_csr_perfctrl1(val);
240                 return;
241         case 2:
242                 write_csr_perfctrl2(val);
243                 return;
244         case 3:
245                 write_csr_perfctrl3(val);
246                 return;
247         default:
248                 WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
249                 return;
250         }
251 }
252
253 static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
254 {
255         int i;
256
257         for (i = 0; i < loongarch_pmu.num_counters; i++) {
258                 if (!test_and_set_bit(i, cpuc->used_mask))
259                         return i;
260         }
261
262         return -EAGAIN;
263 }
264
265 static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx)
266 {
267         unsigned int cpu;
268         struct perf_event *event = container_of(evt, struct perf_event, hw);
269         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
270
271         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
272
273         /* Make sure interrupt enabled. */
274         cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
275                 (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE;
276
277         cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
278
279         /*
280          * We do not actually let the counter run. Leave it until start().
281          */
282         pr_debug("Enabling perf counter for CPU%d\n", cpu);
283 }
284
285 static void loongarch_pmu_disable_event(int idx)
286 {
287         unsigned long flags;
288         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
289
290         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
291
292         local_irq_save(flags);
293         cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) &
294                 ~M_PERFCTL_COUNT_EVENT_WHENEVER;
295         loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
296         local_irq_restore(flags);
297 }
298
299 static int loongarch_pmu_event_set_period(struct perf_event *event,
300                                     struct hw_perf_event *hwc,
301                                     int idx)
302 {
303         int ret = 0;
304         u64 left = local64_read(&hwc->period_left);
305         u64 period = hwc->sample_period;
306
307         if (unlikely((left + period) & (1ULL << 63))) {
308                 /* left underflowed by more than period. */
309                 left = period;
310                 local64_set(&hwc->period_left, left);
311                 hwc->last_period = period;
312                 ret = 1;
313         } else  if (unlikely((left + period) <= period)) {
314                 /* left underflowed by less than period. */
315                 left += period;
316                 local64_set(&hwc->period_left, left);
317                 hwc->last_period = period;
318                 ret = 1;
319         }
320
321         if (left > loongarch_pmu.max_period) {
322                 left = loongarch_pmu.max_period;
323                 local64_set(&hwc->period_left, left);
324         }
325
326         local64_set(&hwc->prev_count, loongarch_pmu.overflow - left);
327
328         loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left);
329
330         perf_event_update_userpage(event);
331
332         return ret;
333 }
334
335 static void loongarch_pmu_event_update(struct perf_event *event,
336                                  struct hw_perf_event *hwc,
337                                  int idx)
338 {
339         u64 delta;
340         u64 prev_raw_count, new_raw_count;
341
342 again:
343         prev_raw_count = local64_read(&hwc->prev_count);
344         new_raw_count = loongarch_pmu.read_counter(idx);
345
346         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
347                                 new_raw_count) != prev_raw_count)
348                 goto again;
349
350         delta = new_raw_count - prev_raw_count;
351
352         local64_add(delta, &event->count);
353         local64_sub(delta, &hwc->period_left);
354 }
355
356 static void loongarch_pmu_start(struct perf_event *event, int flags)
357 {
358         struct hw_perf_event *hwc = &event->hw;
359
360         if (flags & PERF_EF_RELOAD)
361                 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
362
363         hwc->state = 0;
364
365         /* Set the period for the event. */
366         loongarch_pmu_event_set_period(event, hwc, hwc->idx);
367
368         /* Enable the event. */
369         loongarch_pmu_enable_event(hwc, hwc->idx);
370 }
371
372 static void loongarch_pmu_stop(struct perf_event *event, int flags)
373 {
374         struct hw_perf_event *hwc = &event->hw;
375
376         if (!(hwc->state & PERF_HES_STOPPED)) {
377                 /* We are working on a local event. */
378                 loongarch_pmu_disable_event(hwc->idx);
379                 barrier();
380                 loongarch_pmu_event_update(event, hwc, hwc->idx);
381                 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
382         }
383 }
384
385 static int loongarch_pmu_add(struct perf_event *event, int flags)
386 {
387         int idx, err = 0;
388         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
389         struct hw_perf_event *hwc = &event->hw;
390
391         perf_pmu_disable(event->pmu);
392
393         /* To look for a free counter for this event. */
394         idx = loongarch_pmu_alloc_counter(cpuc, hwc);
395         if (idx < 0) {
396                 err = idx;
397                 goto out;
398         }
399
400         /*
401          * If there is an event in the counter we are going to use then
402          * make sure it is disabled.
403          */
404         event->hw.idx = idx;
405         loongarch_pmu_disable_event(idx);
406         cpuc->events[idx] = event;
407
408         hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
409         if (flags & PERF_EF_START)
410                 loongarch_pmu_start(event, PERF_EF_RELOAD);
411
412         /* Propagate our changes to the userspace mapping. */
413         perf_event_update_userpage(event);
414
415 out:
416         perf_pmu_enable(event->pmu);
417         return err;
418 }
419
420 static void loongarch_pmu_del(struct perf_event *event, int flags)
421 {
422         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
423         struct hw_perf_event *hwc = &event->hw;
424         int idx = hwc->idx;
425
426         WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
427
428         loongarch_pmu_stop(event, PERF_EF_UPDATE);
429         cpuc->events[idx] = NULL;
430         clear_bit(idx, cpuc->used_mask);
431
432         perf_event_update_userpage(event);
433 }
434
435 static void loongarch_pmu_read(struct perf_event *event)
436 {
437         struct hw_perf_event *hwc = &event->hw;
438
439         /* Don't read disabled counters! */
440         if (hwc->idx < 0)
441                 return;
442
443         loongarch_pmu_event_update(event, hwc, hwc->idx);
444 }
445
446 static void loongarch_pmu_enable(struct pmu *pmu)
447 {
448         resume_local_counters();
449 }
450
451 static void loongarch_pmu_disable(struct pmu *pmu)
452 {
453         pause_local_counters();
454 }
455
456 static DEFINE_MUTEX(pmu_reserve_mutex);
457 static atomic_t active_events = ATOMIC_INIT(0);
458
459 static int get_pmc_irq(void)
460 {
461         struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
462
463         if (d)
464                 return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
465
466         return -EINVAL;
467 }
468
469 static void reset_counters(void *arg);
470 static int __hw_perf_event_init(struct perf_event *event);
471
472 static void hw_perf_event_destroy(struct perf_event *event)
473 {
474         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
475                 on_each_cpu(reset_counters, NULL, 1);
476                 free_irq(get_pmc_irq(), &loongarch_pmu);
477                 mutex_unlock(&pmu_reserve_mutex);
478         }
479 }
480
481 static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
482                         struct perf_sample_data *data, struct pt_regs *regs)
483 {
484         struct perf_event *event = cpuc->events[idx];
485         struct hw_perf_event *hwc = &event->hw;
486
487         loongarch_pmu_event_update(event, hwc, idx);
488         data->period = event->hw.last_period;
489         if (!loongarch_pmu_event_set_period(event, hwc, idx))
490                 return;
491
492         if (perf_event_overflow(event, data, regs))
493                 loongarch_pmu_disable_event(idx);
494 }
495
496 static irqreturn_t pmu_handle_irq(int irq, void *dev)
497 {
498         int n;
499         int handled = IRQ_NONE;
500         uint64_t counter;
501         struct pt_regs *regs;
502         struct perf_sample_data data;
503         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
504
505         /*
506          * First we pause the local counters, so that when we are locked
507          * here, the counters are all paused. When it gets locked due to
508          * perf_disable(), the timer interrupt handler will be delayed.
509          *
510          * See also loongarch_pmu_start().
511          */
512         pause_local_counters();
513
514         regs = get_irq_regs();
515
516         perf_sample_data_init(&data, 0, 0);
517
518         for (n = 0; n < loongarch_pmu.num_counters; n++) {
519                 if (test_bit(n, cpuc->used_mask)) {
520                         counter = loongarch_pmu.read_counter(n);
521                         if (counter & loongarch_pmu.overflow) {
522                                 handle_associated_event(cpuc, n, &data, regs);
523                                 handled = IRQ_HANDLED;
524                         }
525                 }
526         }
527
528         resume_local_counters();
529
530         /*
531          * Do all the work for the pending perf events. We can do this
532          * in here because the performance counter interrupt is a regular
533          * interrupt, not NMI.
534          */
535         if (handled == IRQ_HANDLED)
536                 irq_work_run();
537
538         return handled;
539 }
540
541 static int loongarch_pmu_event_init(struct perf_event *event)
542 {
543         int r, irq;
544         unsigned long flags;
545
546         /* does not support taken branch sampling */
547         if (has_branch_stack(event))
548                 return -EOPNOTSUPP;
549
550         switch (event->attr.type) {
551         case PERF_TYPE_RAW:
552         case PERF_TYPE_HARDWARE:
553         case PERF_TYPE_HW_CACHE:
554                 break;
555
556         default:
557                 /* Init it to avoid false validate_group */
558                 event->hw.event_base = 0xffffffff;
559                 return -ENOENT;
560         }
561
562         if (event->cpu >= 0 && !cpu_online(event->cpu))
563                 return -ENODEV;
564
565         irq = get_pmc_irq();
566         flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
567         if (!atomic_inc_not_zero(&active_events)) {
568                 mutex_lock(&pmu_reserve_mutex);
569                 if (atomic_read(&active_events) == 0) {
570                         r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu);
571                         if (r < 0) {
572                                 mutex_unlock(&pmu_reserve_mutex);
573                                 pr_warn("PMU IRQ request failed\n");
574                                 return -ENODEV;
575                         }
576                 }
577                 atomic_inc(&active_events);
578                 mutex_unlock(&pmu_reserve_mutex);
579         }
580
581         return __hw_perf_event_init(event);
582 }
583
584 static struct pmu pmu = {
585         .pmu_enable     = loongarch_pmu_enable,
586         .pmu_disable    = loongarch_pmu_disable,
587         .event_init     = loongarch_pmu_event_init,
588         .add            = loongarch_pmu_add,
589         .del            = loongarch_pmu_del,
590         .start          = loongarch_pmu_start,
591         .stop           = loongarch_pmu_stop,
592         .read           = loongarch_pmu_read,
593 };
594
595 static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev)
596 {
597         return (pev->event_id & 0xff);
598 }
599
600 static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx)
601 {
602         const struct loongarch_perf_event *pev;
603
604         pev = &(*loongarch_pmu.general_event_map)[idx];
605
606         if (pev->event_id == HW_OP_UNSUPPORTED)
607                 return ERR_PTR(-ENOENT);
608
609         return pev;
610 }
611
612 static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config)
613 {
614         unsigned int cache_type, cache_op, cache_result;
615         const struct loongarch_perf_event *pev;
616
617         cache_type = (config >> 0) & 0xff;
618         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
619                 return ERR_PTR(-EINVAL);
620
621         cache_op = (config >> 8) & 0xff;
622         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
623                 return ERR_PTR(-EINVAL);
624
625         cache_result = (config >> 16) & 0xff;
626         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
627                 return ERR_PTR(-EINVAL);
628
629         pev = &((*loongarch_pmu.cache_event_map)
630                                         [cache_type]
631                                         [cache_op]
632                                         [cache_result]);
633
634         if (pev->event_id == CACHE_OP_UNSUPPORTED)
635                 return ERR_PTR(-ENOENT);
636
637         return pev;
638 }
639
640 static int validate_group(struct perf_event *event)
641 {
642         struct cpu_hw_events fake_cpuc;
643         struct perf_event *sibling, *leader = event->group_leader;
644
645         memset(&fake_cpuc, 0, sizeof(fake_cpuc));
646
647         if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
648                 return -EINVAL;
649
650         for_each_sibling_event(sibling, leader) {
651                 if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
652                         return -EINVAL;
653         }
654
655         if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
656                 return -EINVAL;
657
658         return 0;
659 }
660
661 static void reset_counters(void *arg)
662 {
663         int n;
664         int counters = loongarch_pmu.num_counters;
665
666         for (n = 0; n < counters; n++) {
667                 loongarch_pmu_write_control(n, 0);
668                 loongarch_pmu.write_counter(n, 0);
669         }
670 }
671
672 static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = {
673         PERF_MAP_ALL_UNSUPPORTED,
674         [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 },
675         [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 },
676         [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 },
677         [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 },
678         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 },
679         [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 },
680 };
681
682 static const struct loongarch_perf_event loongson_cache_map
683                                 [PERF_COUNT_HW_CACHE_MAX]
684                                 [PERF_COUNT_HW_CACHE_OP_MAX]
685                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
686 PERF_CACHE_MAP_ALL_UNSUPPORTED,
687 [C(L1D)] = {
688         /*
689          * Like some other architectures (e.g. ARM), the performance
690          * counters don't differentiate between read and write
691          * accesses/misses, so this isn't strictly correct, but it's the
692          * best we can do. Writes and reads get combined.
693          */
694         [C(OP_READ)] = {
695                 [C(RESULT_ACCESS)]      = { 0x8 },
696                 [C(RESULT_MISS)]        = { 0x9 },
697         },
698         [C(OP_WRITE)] = {
699                 [C(RESULT_ACCESS)]      = { 0x8 },
700                 [C(RESULT_MISS)]        = { 0x9 },
701         },
702         [C(OP_PREFETCH)] = {
703                 [C(RESULT_ACCESS)]      = { 0xaa },
704                 [C(RESULT_MISS)]        = { 0xa9 },
705         },
706 },
707 [C(L1I)] = {
708         [C(OP_READ)] = {
709                 [C(RESULT_ACCESS)]      = { 0x6 },
710                 [C(RESULT_MISS)]        = { 0x7 },
711         },
712 },
713 [C(LL)] = {
714         [C(OP_READ)] = {
715                 [C(RESULT_ACCESS)]      = { 0xc },
716                 [C(RESULT_MISS)]        = { 0xd },
717         },
718         [C(OP_WRITE)] = {
719                 [C(RESULT_ACCESS)]      = { 0xc },
720                 [C(RESULT_MISS)]        = { 0xd },
721         },
722 },
723 [C(ITLB)] = {
724         [C(OP_READ)] = {
725                 [C(RESULT_MISS)]    = { 0x3b },
726         },
727 },
728 [C(DTLB)] = {
729         [C(OP_READ)] = {
730                 [C(RESULT_ACCESS)]      = { 0x4 },
731                 [C(RESULT_MISS)]        = { 0x3c },
732         },
733         [C(OP_WRITE)] = {
734                 [C(RESULT_ACCESS)]      = { 0x4 },
735                 [C(RESULT_MISS)]        = { 0x3c },
736         },
737 },
738 [C(BPU)] = {
739         /* Using the same code for *HW_BRANCH* */
740         [C(OP_READ)] = {
741                 [C(RESULT_ACCESS)]  = { 0x02 },
742                 [C(RESULT_MISS)]    = { 0x03 },
743         },
744 },
745 };
746
747 static int __hw_perf_event_init(struct perf_event *event)
748 {
749         int err;
750         struct hw_perf_event *hwc = &event->hw;
751         struct perf_event_attr *attr = &event->attr;
752         const struct loongarch_perf_event *pev;
753
754         /* Returning LoongArch event descriptor for generic perf event. */
755         if (PERF_TYPE_HARDWARE == event->attr.type) {
756                 if (event->attr.config >= PERF_COUNT_HW_MAX)
757                         return -EINVAL;
758                 pev = loongarch_pmu_map_general_event(event->attr.config);
759         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
760                 pev = loongarch_pmu_map_cache_event(event->attr.config);
761         } else if (PERF_TYPE_RAW == event->attr.type) {
762                 /* We are working on the global raw event. */
763                 mutex_lock(&raw_event_mutex);
764                 pev = loongarch_pmu.map_raw_event(event->attr.config);
765         } else {
766                 /* The event type is not (yet) supported. */
767                 return -EOPNOTSUPP;
768         }
769
770         if (IS_ERR(pev)) {
771                 if (PERF_TYPE_RAW == event->attr.type)
772                         mutex_unlock(&raw_event_mutex);
773                 return PTR_ERR(pev);
774         }
775
776         /*
777          * We allow max flexibility on how each individual counter shared
778          * by the single CPU operates (the mode exclusion and the range).
779          */
780         hwc->config_base = CSR_PERFCTRL_IE;
781
782         hwc->event_base = loongarch_pmu_perf_event_encode(pev);
783         if (PERF_TYPE_RAW == event->attr.type)
784                 mutex_unlock(&raw_event_mutex);
785
786         if (!attr->exclude_user) {
787                 hwc->config_base |= CSR_PERFCTRL_PLV3;
788                 hwc->config_base |= CSR_PERFCTRL_PLV2;
789         }
790         if (!attr->exclude_kernel) {
791                 hwc->config_base |= CSR_PERFCTRL_PLV0;
792         }
793         if (!attr->exclude_hv) {
794                 hwc->config_base |= CSR_PERFCTRL_PLV1;
795         }
796
797         hwc->config_base &= M_PERFCTL_CONFIG_MASK;
798         /*
799          * The event can belong to another cpu. We do not assign a local
800          * counter for it for now.
801          */
802         hwc->idx = -1;
803         hwc->config = 0;
804
805         if (!hwc->sample_period) {
806                 hwc->sample_period  = loongarch_pmu.max_period;
807                 hwc->last_period    = hwc->sample_period;
808                 local64_set(&hwc->period_left, hwc->sample_period);
809         }
810
811         err = 0;
812         if (event->group_leader != event)
813                 err = validate_group(event);
814
815         event->destroy = hw_perf_event_destroy;
816
817         if (err)
818                 event->destroy(event);
819
820         return err;
821 }
822
823 static void pause_local_counters(void)
824 {
825         unsigned long flags;
826         int ctr = loongarch_pmu.num_counters;
827         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
828
829         local_irq_save(flags);
830         do {
831                 ctr--;
832                 cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr);
833                 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
834                                          ~M_PERFCTL_COUNT_EVENT_WHENEVER);
835         } while (ctr > 0);
836         local_irq_restore(flags);
837 }
838
839 static void resume_local_counters(void)
840 {
841         int ctr = loongarch_pmu.num_counters;
842         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
843
844         do {
845                 ctr--;
846                 loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
847         } while (ctr > 0);
848 }
849
850 static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config)
851 {
852         raw_event.event_id = config & 0xff;
853
854         return &raw_event;
855 }
856
857 static int __init init_hw_perf_events(void)
858 {
859         int counters;
860
861         if (!cpu_has_pmp)
862                 return -ENODEV;
863
864         pr_info("Performance counters: ");
865         counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
866
867         loongarch_pmu.num_counters = counters;
868         loongarch_pmu.max_period = (1ULL << 63) - 1;
869         loongarch_pmu.valid_count = (1ULL << 63) - 1;
870         loongarch_pmu.overflow = 1ULL << 63;
871         loongarch_pmu.name = "loongarch/loongson64";
872         loongarch_pmu.read_counter = loongarch_pmu_read_counter;
873         loongarch_pmu.write_counter = loongarch_pmu_write_counter;
874         loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event;
875         loongarch_pmu.general_event_map = &loongson_event_map;
876         loongarch_pmu.cache_event_map = &loongson_cache_map;
877
878         on_each_cpu(reset_counters, NULL, 1);
879
880         pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
881                         loongarch_pmu.name, counters, 64);
882
883         perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
884
885         return 0;
886 }
887 early_initcall(init_hw_perf_events);