8c6e227cb29df47b8d143dbf9766229a785813b9
[sfrench/cifs-2.6.git] / arch / loongarch / kernel / smp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4  *
5  * Derived from MIPS:
6  * Copyright (C) 2000, 2001 Kanoj Sarcar
7  * Copyright (C) 2000, 2001 Ralf Baechle
8  * Copyright (C) 2000, 2001 Silicon Graphics, Inc.
9  * Copyright (C) 2000, 2001, 2003 Broadcom Corporation
10  */
11 #include <linux/cpu.h>
12 #include <linux/cpumask.h>
13 #include <linux/init.h>
14 #include <linux/interrupt.h>
15 #include <linux/seq_file.h>
16 #include <linux/smp.h>
17 #include <linux/threads.h>
18 #include <linux/export.h>
19 #include <linux/syscore_ops.h>
20 #include <linux/time.h>
21 #include <linux/tracepoint.h>
22 #include <linux/sched/hotplug.h>
23 #include <linux/sched/task_stack.h>
24
25 #include <asm/cpu.h>
26 #include <asm/idle.h>
27 #include <asm/loongson.h>
28 #include <asm/mmu_context.h>
29 #include <asm/numa.h>
30 #include <asm/processor.h>
31 #include <asm/setup.h>
32 #include <asm/time.h>
33
34 int __cpu_number_map[NR_CPUS];   /* Map physical to logical */
35 EXPORT_SYMBOL(__cpu_number_map);
36
37 int __cpu_logical_map[NR_CPUS];         /* Map logical to physical */
38 EXPORT_SYMBOL(__cpu_logical_map);
39
40 /* Number of threads (siblings) per CPU core */
41 int smp_num_siblings = 1;
42 EXPORT_SYMBOL(smp_num_siblings);
43
44 /* Representing the threads (siblings) of each logical CPU */
45 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
46 EXPORT_SYMBOL(cpu_sibling_map);
47
48 /* Representing the core map of multi-core chips of each logical CPU */
49 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
50 EXPORT_SYMBOL(cpu_core_map);
51
52 static DECLARE_COMPLETION(cpu_starting);
53 static DECLARE_COMPLETION(cpu_running);
54
55 /*
56  * A logcal cpu mask containing only one VPE per core to
57  * reduce the number of IPIs on large MT systems.
58  */
59 cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly;
60 EXPORT_SYMBOL(cpu_foreign_map);
61
62 /* representing cpus for which sibling maps can be computed */
63 static cpumask_t cpu_sibling_setup_map;
64
65 /* representing cpus for which core maps can be computed */
66 static cpumask_t cpu_core_setup_map;
67
68 struct secondary_data cpuboot_data;
69 static DEFINE_PER_CPU(int, cpu_state);
70
71 enum ipi_msg_type {
72         IPI_RESCHEDULE,
73         IPI_CALL_FUNCTION,
74 };
75
76 static const char *ipi_types[NR_IPI] __tracepoint_string = {
77         [IPI_RESCHEDULE] = "Rescheduling interrupts",
78         [IPI_CALL_FUNCTION] = "Function call interrupts",
79 };
80
81 void show_ipi_list(struct seq_file *p, int prec)
82 {
83         unsigned int cpu, i;
84
85         for (i = 0; i < NR_IPI; i++) {
86                 seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : "");
87                 for_each_online_cpu(cpu)
88                         seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]);
89                 seq_printf(p, " LoongArch  %d  %s\n", i + 1, ipi_types[i]);
90         }
91 }
92
93 /* Send mailbox buffer via Mail_Send */
94 static void csr_mail_send(uint64_t data, int cpu, int mailbox)
95 {
96         uint64_t val;
97
98         /* Send high 32 bits */
99         val = IOCSR_MBUF_SEND_BLOCKING;
100         val |= (IOCSR_MBUF_SEND_BOX_HI(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
101         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
102         val |= (data & IOCSR_MBUF_SEND_H32_MASK);
103         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
104
105         /* Send low 32 bits */
106         val = IOCSR_MBUF_SEND_BLOCKING;
107         val |= (IOCSR_MBUF_SEND_BOX_LO(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT);
108         val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
109         val |= (data << IOCSR_MBUF_SEND_BUF_SHIFT);
110         iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
111 };
112
113 static u32 ipi_read_clear(int cpu)
114 {
115         u32 action;
116
117         /* Load the ipi register to figure out what we're supposed to do */
118         action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS);
119         /* Clear the ipi register to clear the interrupt */
120         iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR);
121         smp_mb();
122
123         return action;
124 }
125
126 static void ipi_write_action(int cpu, u32 action)
127 {
128         unsigned int irq = 0;
129
130         while ((irq = ffs(action))) {
131                 uint32_t val = IOCSR_IPI_SEND_BLOCKING;
132
133                 val |= (irq - 1);
134                 val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT);
135                 iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
136                 action &= ~BIT(irq - 1);
137         }
138 }
139
140 void loongson_send_ipi_single(int cpu, unsigned int action)
141 {
142         ipi_write_action(cpu_logical_map(cpu), (u32)action);
143 }
144
145 void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
146 {
147         unsigned int i;
148
149         for_each_cpu(i, mask)
150                 ipi_write_action(cpu_logical_map(i), (u32)action);
151 }
152
153 /*
154  * This function sends a 'reschedule' IPI to another CPU.
155  * it goes straight through and wastes no time serializing
156  * anything. Worst case is that we lose a reschedule ...
157  */
158 void smp_send_reschedule(int cpu)
159 {
160         loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
161 }
162 EXPORT_SYMBOL_GPL(smp_send_reschedule);
163
164 irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
165 {
166         unsigned int action;
167         unsigned int cpu = smp_processor_id();
168
169         action = ipi_read_clear(cpu_logical_map(cpu));
170
171         if (action & SMP_RESCHEDULE) {
172                 scheduler_ipi();
173                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_RESCHEDULE]++;
174         }
175
176         if (action & SMP_CALL_FUNCTION) {
177                 generic_smp_call_function_interrupt();
178                 per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++;
179         }
180
181         return IRQ_HANDLED;
182 }
183
184 static void __init fdt_smp_setup(void)
185 {
186 #ifdef CONFIG_OF
187         unsigned int cpu, cpuid;
188         struct device_node *node = NULL;
189
190         for_each_of_cpu_node(node) {
191                 if (!of_device_is_available(node))
192                         continue;
193
194                 cpuid = of_get_cpu_hwid(node, 0);
195                 if (cpuid >= nr_cpu_ids)
196                         continue;
197
198                 if (cpuid == loongson_sysconf.boot_cpu_id) {
199                         cpu = 0;
200                         numa_add_cpu(cpu);
201                 } else {
202                         cpu = cpumask_next_zero(-1, cpu_present_mask);
203                 }
204
205                 num_processors++;
206                 set_cpu_possible(cpu, true);
207                 set_cpu_present(cpu, true);
208                 __cpu_number_map[cpuid] = cpu;
209                 __cpu_logical_map[cpu] = cpuid;
210         }
211
212         loongson_sysconf.nr_cpus = num_processors;
213 #endif
214 }
215
216 void __init loongson_smp_setup(void)
217 {
218         fdt_smp_setup();
219
220         cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
221         cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
222
223         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
224         pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus);
225 }
226
227 void __init loongson_prepare_cpus(unsigned int max_cpus)
228 {
229         int i = 0;
230
231         for (i = 0; i < loongson_sysconf.nr_cpus; i++) {
232                 set_cpu_present(i, true);
233                 csr_mail_send(0, __cpu_logical_map[i], 0);
234         }
235
236         per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
237 }
238
239 /*
240  * Setup the PC, SP, and TP of a secondary processor and start it running!
241  */
242 void loongson_boot_secondary(int cpu, struct task_struct *idle)
243 {
244         unsigned long entry;
245
246         pr_info("Booting CPU#%d...\n", cpu);
247
248         entry = __pa_symbol((unsigned long)&smpboot_entry);
249         cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle);
250         cpuboot_data.thread_info = (unsigned long)task_thread_info(idle);
251
252         csr_mail_send(entry, cpu_logical_map(cpu), 0);
253
254         loongson_send_ipi_single(cpu, SMP_BOOT_CPU);
255 }
256
257 /*
258  * SMP init and finish on secondary CPUs
259  */
260 void loongson_init_secondary(void)
261 {
262         unsigned int cpu = smp_processor_id();
263         unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 |
264                              ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER;
265
266         change_csr_ecfg(ECFG0_IM, imask);
267
268         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
269
270 #ifdef CONFIG_NUMA
271         numa_add_cpu(cpu);
272 #endif
273         per_cpu(cpu_state, cpu) = CPU_ONLINE;
274         cpu_data[cpu].core =
275                      cpu_logical_map(cpu) % loongson_sysconf.cores_per_package;
276         cpu_data[cpu].package =
277                      cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
278 }
279
280 void loongson_smp_finish(void)
281 {
282         local_irq_enable();
283         iocsr_write64(0, LOONGARCH_IOCSR_MBUF0);
284         pr_info("CPU#%d finished\n", smp_processor_id());
285 }
286
287 #ifdef CONFIG_HOTPLUG_CPU
288
289 int loongson_cpu_disable(void)
290 {
291         unsigned long flags;
292         unsigned int cpu = smp_processor_id();
293
294         if (io_master(cpu))
295                 return -EBUSY;
296
297 #ifdef CONFIG_NUMA
298         numa_remove_cpu(cpu);
299 #endif
300         set_cpu_online(cpu, false);
301         calculate_cpu_foreign_map();
302         local_irq_save(flags);
303         irq_migrate_all_off_this_cpu();
304         clear_csr_ecfg(ECFG0_IM);
305         local_irq_restore(flags);
306         local_flush_tlb_all();
307
308         return 0;
309 }
310
311 void loongson_cpu_die(unsigned int cpu)
312 {
313         while (per_cpu(cpu_state, cpu) != CPU_DEAD)
314                 cpu_relax();
315
316         mb();
317 }
318
319 void play_dead(void)
320 {
321         register uint64_t addr;
322         register void (*init_fn)(void);
323
324         idle_task_exit();
325         local_irq_enable();
326         set_csr_ecfg(ECFGF_IPI);
327         __this_cpu_write(cpu_state, CPU_DEAD);
328
329         __smp_mb();
330         do {
331                 __asm__ __volatile__("idle 0\n\t");
332                 addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
333         } while (addr == 0);
334
335         init_fn = (void *)TO_CACHE(addr);
336         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
337
338         init_fn();
339         unreachable();
340 }
341
342 #endif
343
344 /*
345  * Power management
346  */
347 #ifdef CONFIG_PM
348
349 static int loongson_ipi_suspend(void)
350 {
351         return 0;
352 }
353
354 static void loongson_ipi_resume(void)
355 {
356         iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN);
357 }
358
359 static struct syscore_ops loongson_ipi_syscore_ops = {
360         .resume         = loongson_ipi_resume,
361         .suspend        = loongson_ipi_suspend,
362 };
363
364 /*
365  * Enable boot cpu ipi before enabling nonboot cpus
366  * during syscore_resume.
367  */
368 static int __init ipi_pm_init(void)
369 {
370         register_syscore_ops(&loongson_ipi_syscore_ops);
371         return 0;
372 }
373
374 core_initcall(ipi_pm_init);
375 #endif
376
377 static inline void set_cpu_sibling_map(int cpu)
378 {
379         int i;
380
381         cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
382
383         if (smp_num_siblings <= 1)
384                 cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]);
385         else {
386                 for_each_cpu(i, &cpu_sibling_setup_map) {
387                         if (cpus_are_siblings(cpu, i)) {
388                                 cpumask_set_cpu(i, &cpu_sibling_map[cpu]);
389                                 cpumask_set_cpu(cpu, &cpu_sibling_map[i]);
390                         }
391                 }
392         }
393 }
394
395 static inline void set_cpu_core_map(int cpu)
396 {
397         int i;
398
399         cpumask_set_cpu(cpu, &cpu_core_setup_map);
400
401         for_each_cpu(i, &cpu_core_setup_map) {
402                 if (cpu_data[cpu].package == cpu_data[i].package) {
403                         cpumask_set_cpu(i, &cpu_core_map[cpu]);
404                         cpumask_set_cpu(cpu, &cpu_core_map[i]);
405                 }
406         }
407 }
408
409 /*
410  * Calculate a new cpu_foreign_map mask whenever a
411  * new cpu appears or disappears.
412  */
413 void calculate_cpu_foreign_map(void)
414 {
415         int i, k, core_present;
416         cpumask_t temp_foreign_map;
417
418         /* Re-calculate the mask */
419         cpumask_clear(&temp_foreign_map);
420         for_each_online_cpu(i) {
421                 core_present = 0;
422                 for_each_cpu(k, &temp_foreign_map)
423                         if (cpus_are_siblings(i, k))
424                                 core_present = 1;
425                 if (!core_present)
426                         cpumask_set_cpu(i, &temp_foreign_map);
427         }
428
429         for_each_online_cpu(i)
430                 cpumask_andnot(&cpu_foreign_map[i],
431                                &temp_foreign_map, &cpu_sibling_map[i]);
432 }
433
434 /* Preload SMP state for boot cpu */
435 void smp_prepare_boot_cpu(void)
436 {
437         unsigned int cpu, node, rr_node;
438
439         set_cpu_possible(0, true);
440         set_cpu_online(0, true);
441         set_my_cpu_offset(per_cpu_offset(0));
442
443         rr_node = first_node(node_online_map);
444         for_each_possible_cpu(cpu) {
445                 node = early_cpu_to_node(cpu);
446
447                 /*
448                  * The mapping between present cpus and nodes has been
449                  * built during MADT and SRAT parsing.
450                  *
451                  * If possible cpus = present cpus here, early_cpu_to_node
452                  * will return valid node.
453                  *
454                  * If possible cpus > present cpus here (e.g. some possible
455                  * cpus will be added by cpu-hotplug later), for possible but
456                  * not present cpus, early_cpu_to_node will return NUMA_NO_NODE,
457                  * and we just map them to online nodes in round-robin way.
458                  * Once hotplugged, new correct mapping will be built for them.
459                  */
460                 if (node != NUMA_NO_NODE)
461                         set_cpu_numa_node(cpu, node);
462                 else {
463                         set_cpu_numa_node(cpu, rr_node);
464                         rr_node = next_node_in(rr_node, node_online_map);
465                 }
466         }
467 }
468
469 /* called from main before smp_init() */
470 void __init smp_prepare_cpus(unsigned int max_cpus)
471 {
472         init_new_context(current, &init_mm);
473         current_thread_info()->cpu = 0;
474         loongson_prepare_cpus(max_cpus);
475         set_cpu_sibling_map(0);
476         set_cpu_core_map(0);
477         calculate_cpu_foreign_map();
478 #ifndef CONFIG_HOTPLUG_CPU
479         init_cpu_present(cpu_possible_mask);
480 #endif
481 }
482
483 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
484 {
485         loongson_boot_secondary(cpu, tidle);
486
487         /* Wait for CPU to start and be ready to sync counters */
488         if (!wait_for_completion_timeout(&cpu_starting,
489                                          msecs_to_jiffies(5000))) {
490                 pr_crit("CPU%u: failed to start\n", cpu);
491                 return -EIO;
492         }
493
494         /* Wait for CPU to finish startup & mark itself online before return */
495         wait_for_completion(&cpu_running);
496
497         return 0;
498 }
499
500 /*
501  * First C code run on the secondary CPUs after being started up by
502  * the master.
503  */
504 asmlinkage void start_secondary(void)
505 {
506         unsigned int cpu;
507
508         sync_counter();
509         cpu = smp_processor_id();
510         set_my_cpu_offset(per_cpu_offset(cpu));
511
512         cpu_probe();
513         constant_clockevent_init();
514         loongson_init_secondary();
515
516         set_cpu_sibling_map(cpu);
517         set_cpu_core_map(cpu);
518
519         notify_cpu_starting(cpu);
520
521         /* Notify boot CPU that we're starting */
522         complete(&cpu_starting);
523
524         /* The CPU is running, now mark it online */
525         set_cpu_online(cpu, true);
526
527         calculate_cpu_foreign_map();
528
529         /*
530          * Notify boot CPU that we're up & online and it can safely return
531          * from __cpu_up()
532          */
533         complete(&cpu_running);
534
535         /*
536          * irq will be enabled in loongson_smp_finish(), enabling it too
537          * early is dangerous.
538          */
539         WARN_ON_ONCE(!irqs_disabled());
540         loongson_smp_finish();
541
542         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
543 }
544
545 void __init smp_cpus_done(unsigned int max_cpus)
546 {
547 }
548
549 static void stop_this_cpu(void *dummy)
550 {
551         set_cpu_online(smp_processor_id(), false);
552         calculate_cpu_foreign_map();
553         local_irq_disable();
554         while (true);
555 }
556
557 void smp_send_stop(void)
558 {
559         smp_call_function(stop_this_cpu, NULL, 0);
560 }
561
562 int setup_profiling_timer(unsigned int multiplier)
563 {
564         return 0;
565 }
566
567 static void flush_tlb_all_ipi(void *info)
568 {
569         local_flush_tlb_all();
570 }
571
572 void flush_tlb_all(void)
573 {
574         on_each_cpu(flush_tlb_all_ipi, NULL, 1);
575 }
576
577 static void flush_tlb_mm_ipi(void *mm)
578 {
579         local_flush_tlb_mm((struct mm_struct *)mm);
580 }
581
582 void flush_tlb_mm(struct mm_struct *mm)
583 {
584         if (atomic_read(&mm->mm_users) == 0)
585                 return;         /* happens as a result of exit_mmap() */
586
587         preempt_disable();
588
589         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
590                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_mm_ipi, mm, 1);
591         } else {
592                 unsigned int cpu;
593
594                 for_each_online_cpu(cpu) {
595                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
596                                 cpu_context(cpu, mm) = 0;
597                 }
598                 local_flush_tlb_mm(mm);
599         }
600
601         preempt_enable();
602 }
603
604 struct flush_tlb_data {
605         struct vm_area_struct *vma;
606         unsigned long addr1;
607         unsigned long addr2;
608 };
609
610 static void flush_tlb_range_ipi(void *info)
611 {
612         struct flush_tlb_data *fd = info;
613
614         local_flush_tlb_range(fd->vma, fd->addr1, fd->addr2);
615 }
616
617 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
618 {
619         struct mm_struct *mm = vma->vm_mm;
620
621         preempt_disable();
622         if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
623                 struct flush_tlb_data fd = {
624                         .vma = vma,
625                         .addr1 = start,
626                         .addr2 = end,
627                 };
628
629                 on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1);
630         } else {
631                 unsigned int cpu;
632
633                 for_each_online_cpu(cpu) {
634                         if (cpu != smp_processor_id() && cpu_context(cpu, mm))
635                                 cpu_context(cpu, mm) = 0;
636                 }
637                 local_flush_tlb_range(vma, start, end);
638         }
639         preempt_enable();
640 }
641
642 static void flush_tlb_kernel_range_ipi(void *info)
643 {
644         struct flush_tlb_data *fd = info;
645
646         local_flush_tlb_kernel_range(fd->addr1, fd->addr2);
647 }
648
649 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
650 {
651         struct flush_tlb_data fd = {
652                 .addr1 = start,
653                 .addr2 = end,
654         };
655
656         on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1);
657 }
658
659 static void flush_tlb_page_ipi(void *info)
660 {
661         struct flush_tlb_data *fd = info;
662
663         local_flush_tlb_page(fd->vma, fd->addr1);
664 }
665
666 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
667 {
668         preempt_disable();
669         if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) {
670                 struct flush_tlb_data fd = {
671                         .vma = vma,
672                         .addr1 = page,
673                 };
674
675                 on_each_cpu_mask(mm_cpumask(vma->vm_mm), flush_tlb_page_ipi, &fd, 1);
676         } else {
677                 unsigned int cpu;
678
679                 for_each_online_cpu(cpu) {
680                         if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm))
681                                 cpu_context(cpu, vma->vm_mm) = 0;
682                 }
683                 local_flush_tlb_page(vma, page);
684         }
685         preempt_enable();
686 }
687 EXPORT_SYMBOL(flush_tlb_page);
688
689 static void flush_tlb_one_ipi(void *info)
690 {
691         unsigned long vaddr = (unsigned long) info;
692
693         local_flush_tlb_one(vaddr);
694 }
695
696 void flush_tlb_one(unsigned long vaddr)
697 {
698         on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
699 }
700 EXPORT_SYMBOL(flush_tlb_one);