Merge tag 'driver-core-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / arch / powerpc / kvm / book3s_xive_native.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2017-2019, IBM Corporation.
4  */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32         u64 val;
33
34         if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35                 offset |= offset << 4;
36
37         val = in_be64(xd->eoi_mmio + offset);
38         return (u8)val;
39 }
40
41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42 {
43         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44         struct xive_q *q = &xc->queues[prio];
45
46         xive_native_disable_queue(xc->vp_id, q, prio);
47         if (q->qpage) {
48                 put_page(virt_to_page(q->qpage));
49                 q->qpage = NULL;
50         }
51 }
52
53 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
54                                               u8 prio, __be32 *qpage,
55                                               u32 order, bool can_escalate)
56 {
57         int rc;
58         __be32 *qpage_prev = q->qpage;
59
60         rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
61                                          can_escalate);
62         if (rc)
63                 return rc;
64
65         if (qpage_prev)
66                 put_page(virt_to_page(qpage_prev));
67
68         return rc;
69 }
70
71 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
72 {
73         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
74         int i;
75
76         if (!kvmppc_xive_enabled(vcpu))
77                 return;
78
79         if (!xc)
80                 return;
81
82         pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
83
84         /* Ensure no interrupt is still routed to that VP */
85         xc->valid = false;
86         kvmppc_xive_disable_vcpu_interrupts(vcpu);
87
88         /* Free escalations */
89         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
90                 /* Free the escalation irq */
91                 if (xc->esc_virq[i]) {
92                         if (xc->xive->single_escalation)
93                                 xive_cleanup_single_escalation(vcpu, xc,
94                                                         xc->esc_virq[i]);
95                         free_irq(xc->esc_virq[i], vcpu);
96                         irq_dispose_mapping(xc->esc_virq[i]);
97                         kfree(xc->esc_virq_names[i]);
98                         xc->esc_virq[i] = 0;
99                 }
100         }
101
102         /* Disable the VP */
103         xive_native_disable_vp(xc->vp_id);
104
105         /* Clear the cam word so guest entry won't try to push context */
106         vcpu->arch.xive_cam_word = 0;
107
108         /* Free the queues */
109         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
110                 kvmppc_xive_native_cleanup_queue(vcpu, i);
111         }
112
113         /* Free the VP */
114         kfree(xc);
115
116         /* Cleanup the vcpu */
117         vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
118         vcpu->arch.xive_vcpu = NULL;
119 }
120
121 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
122                                     struct kvm_vcpu *vcpu, u32 server_num)
123 {
124         struct kvmppc_xive *xive = dev->private;
125         struct kvmppc_xive_vcpu *xc = NULL;
126         int rc;
127         u32 vp_id;
128
129         pr_devel("native_connect_vcpu(server=%d)\n", server_num);
130
131         if (dev->ops != &kvm_xive_native_ops) {
132                 pr_devel("Wrong ops !\n");
133                 return -EPERM;
134         }
135         if (xive->kvm != vcpu->kvm)
136                 return -EPERM;
137         if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
138                 return -EBUSY;
139
140         mutex_lock(&xive->lock);
141
142         rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
143         if (rc)
144                 goto bail;
145
146         xc = kzalloc(sizeof(*xc), GFP_KERNEL);
147         if (!xc) {
148                 rc = -ENOMEM;
149                 goto bail;
150         }
151
152         vcpu->arch.xive_vcpu = xc;
153         xc->xive = xive;
154         xc->vcpu = vcpu;
155         xc->server_num = server_num;
156
157         xc->vp_id = vp_id;
158         xc->valid = true;
159         vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
160
161         rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
162         if (rc) {
163                 pr_err("Failed to get VP info from OPAL: %d\n", rc);
164                 goto bail;
165         }
166
167         /*
168          * Enable the VP first as the single escalation mode will
169          * affect escalation interrupts numbering
170          */
171         rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
172         if (rc) {
173                 pr_err("Failed to enable VP in OPAL: %d\n", rc);
174                 goto bail;
175         }
176
177         /* Configure VCPU fields for use by assembly push/pull */
178         vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
179         vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
180
181         /* TODO: reset all queues to a clean state ? */
182 bail:
183         mutex_unlock(&xive->lock);
184         if (rc)
185                 kvmppc_xive_native_cleanup_vcpu(vcpu);
186
187         return rc;
188 }
189
190 /*
191  * Device passthrough support
192  */
193 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
194 {
195         struct kvmppc_xive *xive = kvm->arch.xive;
196         pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
197
198         if (irq >= KVMPPC_XIVE_NR_IRQS)
199                 return -EINVAL;
200
201         /*
202          * Clear the ESB pages of the IRQ number being mapped (or
203          * unmapped) into the guest and let the the VM fault handler
204          * repopulate with the appropriate ESB pages (device or IC)
205          */
206         pr_debug("clearing esb pages for girq 0x%lx\n", irq);
207         mutex_lock(&xive->mapping_lock);
208         if (xive->mapping)
209                 unmap_mapping_range(xive->mapping,
210                                     esb_pgoff << PAGE_SHIFT,
211                                     2ull << PAGE_SHIFT, 1);
212         mutex_unlock(&xive->mapping_lock);
213         return 0;
214 }
215
216 static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
217         .reset_mapped = kvmppc_xive_native_reset_mapped,
218 };
219
220 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
221 {
222         struct vm_area_struct *vma = vmf->vma;
223         struct kvm_device *dev = vma->vm_file->private_data;
224         struct kvmppc_xive *xive = dev->private;
225         struct kvmppc_xive_src_block *sb;
226         struct kvmppc_xive_irq_state *state;
227         struct xive_irq_data *xd;
228         u32 hw_num;
229         u16 src;
230         u64 page;
231         unsigned long irq;
232         u64 page_offset;
233
234         /*
235          * Linux/KVM uses a two pages ESB setting, one for trigger and
236          * one for EOI
237          */
238         page_offset = vmf->pgoff - vma->vm_pgoff;
239         irq = page_offset / 2;
240
241         sb = kvmppc_xive_find_source(xive, irq, &src);
242         if (!sb) {
243                 pr_devel("%s: source %lx not found !\n", __func__, irq);
244                 return VM_FAULT_SIGBUS;
245         }
246
247         state = &sb->irq_state[src];
248         kvmppc_xive_select_irq(state, &hw_num, &xd);
249
250         arch_spin_lock(&sb->lock);
251
252         /*
253          * first/even page is for trigger
254          * second/odd page is for EOI and management.
255          */
256         page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
257         arch_spin_unlock(&sb->lock);
258
259         if (WARN_ON(!page)) {
260                 pr_err("%s: accessing invalid ESB page for source %lx !\n",
261                        __func__, irq);
262                 return VM_FAULT_SIGBUS;
263         }
264
265         vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
266         return VM_FAULT_NOPAGE;
267 }
268
269 static const struct vm_operations_struct xive_native_esb_vmops = {
270         .fault = xive_native_esb_fault,
271 };
272
273 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
274 {
275         struct vm_area_struct *vma = vmf->vma;
276
277         switch (vmf->pgoff - vma->vm_pgoff) {
278         case 0: /* HW - forbid access */
279         case 1: /* HV - forbid access */
280                 return VM_FAULT_SIGBUS;
281         case 2: /* OS */
282                 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
283                 return VM_FAULT_NOPAGE;
284         case 3: /* USER - TODO */
285         default:
286                 return VM_FAULT_SIGBUS;
287         }
288 }
289
290 static const struct vm_operations_struct xive_native_tima_vmops = {
291         .fault = xive_native_tima_fault,
292 };
293
294 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
295                                    struct vm_area_struct *vma)
296 {
297         struct kvmppc_xive *xive = dev->private;
298
299         /* We only allow mappings at fixed offset for now */
300         if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
301                 if (vma_pages(vma) > 4)
302                         return -EINVAL;
303                 vma->vm_ops = &xive_native_tima_vmops;
304         } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
305                 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
306                         return -EINVAL;
307                 vma->vm_ops = &xive_native_esb_vmops;
308         } else {
309                 return -EINVAL;
310         }
311
312         vma->vm_flags |= VM_IO | VM_PFNMAP;
313         vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
314
315         /*
316          * Grab the KVM device file address_space to be able to clear
317          * the ESB pages mapping when a device is passed-through into
318          * the guest.
319          */
320         xive->mapping = vma->vm_file->f_mapping;
321         return 0;
322 }
323
324 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
325                                          u64 addr)
326 {
327         struct kvmppc_xive_src_block *sb;
328         struct kvmppc_xive_irq_state *state;
329         u64 __user *ubufp = (u64 __user *) addr;
330         u64 val;
331         u16 idx;
332         int rc;
333
334         pr_devel("%s irq=0x%lx\n", __func__, irq);
335
336         if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
337                 return -E2BIG;
338
339         sb = kvmppc_xive_find_source(xive, irq, &idx);
340         if (!sb) {
341                 pr_debug("No source, creating source block...\n");
342                 sb = kvmppc_xive_create_src_block(xive, irq);
343                 if (!sb) {
344                         pr_err("Failed to create block...\n");
345                         return -ENOMEM;
346                 }
347         }
348         state = &sb->irq_state[idx];
349
350         if (get_user(val, ubufp)) {
351                 pr_err("fault getting user info !\n");
352                 return -EFAULT;
353         }
354
355         arch_spin_lock(&sb->lock);
356
357         /*
358          * If the source doesn't already have an IPI, allocate
359          * one and get the corresponding data
360          */
361         if (!state->ipi_number) {
362                 state->ipi_number = xive_native_alloc_irq();
363                 if (state->ipi_number == 0) {
364                         pr_err("Failed to allocate IRQ !\n");
365                         rc = -ENXIO;
366                         goto unlock;
367                 }
368                 xive_native_populate_irq_data(state->ipi_number,
369                                               &state->ipi_data);
370                 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
371                          state->ipi_number, irq);
372         }
373
374         /* Restore LSI state */
375         if (val & KVM_XIVE_LEVEL_SENSITIVE) {
376                 state->lsi = true;
377                 if (val & KVM_XIVE_LEVEL_ASSERTED)
378                         state->asserted = true;
379                 pr_devel("  LSI ! Asserted=%d\n", state->asserted);
380         }
381
382         /* Mask IRQ to start with */
383         state->act_server = 0;
384         state->act_priority = MASKED;
385         xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
386         xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
387
388         /* Increment the number of valid sources and mark this one valid */
389         if (!state->valid)
390                 xive->src_count++;
391         state->valid = true;
392
393         rc = 0;
394
395 unlock:
396         arch_spin_unlock(&sb->lock);
397
398         return rc;
399 }
400
401 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
402                                         struct kvmppc_xive_src_block *sb,
403                                         struct kvmppc_xive_irq_state *state,
404                                         u32 server, u8 priority, bool masked,
405                                         u32 eisn)
406 {
407         struct kvm *kvm = xive->kvm;
408         u32 hw_num;
409         int rc = 0;
410
411         arch_spin_lock(&sb->lock);
412
413         if (state->act_server == server && state->act_priority == priority &&
414             state->eisn == eisn)
415                 goto unlock;
416
417         pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
418                  priority, server, masked, state->act_server,
419                  state->act_priority);
420
421         kvmppc_xive_select_irq(state, &hw_num, NULL);
422
423         if (priority != MASKED && !masked) {
424                 rc = kvmppc_xive_select_target(kvm, &server, priority);
425                 if (rc)
426                         goto unlock;
427
428                 state->act_priority = priority;
429                 state->act_server = server;
430                 state->eisn = eisn;
431
432                 rc = xive_native_configure_irq(hw_num,
433                                                kvmppc_xive_vp(xive, server),
434                                                priority, eisn);
435         } else {
436                 state->act_priority = MASKED;
437                 state->act_server = 0;
438                 state->eisn = 0;
439
440                 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
441         }
442
443 unlock:
444         arch_spin_unlock(&sb->lock);
445         return rc;
446 }
447
448 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
449                                                 long irq, u64 addr)
450 {
451         struct kvmppc_xive_src_block *sb;
452         struct kvmppc_xive_irq_state *state;
453         u64 __user *ubufp = (u64 __user *) addr;
454         u16 src;
455         u64 kvm_cfg;
456         u32 server;
457         u8 priority;
458         bool masked;
459         u32 eisn;
460
461         sb = kvmppc_xive_find_source(xive, irq, &src);
462         if (!sb)
463                 return -ENOENT;
464
465         state = &sb->irq_state[src];
466
467         if (!state->valid)
468                 return -EINVAL;
469
470         if (get_user(kvm_cfg, ubufp))
471                 return -EFAULT;
472
473         pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
474
475         priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
476                 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
477         server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
478                 KVM_XIVE_SOURCE_SERVER_SHIFT;
479         masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
480                 KVM_XIVE_SOURCE_MASKED_SHIFT;
481         eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
482                 KVM_XIVE_SOURCE_EISN_SHIFT;
483
484         if (priority != xive_prio_from_guest(priority)) {
485                 pr_err("invalid priority for queue %d for VCPU %d\n",
486                        priority, server);
487                 return -EINVAL;
488         }
489
490         return kvmppc_xive_native_update_source_config(xive, sb, state, server,
491                                                        priority, masked, eisn);
492 }
493
494 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
495                                           long irq, u64 addr)
496 {
497         struct kvmppc_xive_src_block *sb;
498         struct kvmppc_xive_irq_state *state;
499         struct xive_irq_data *xd;
500         u32 hw_num;
501         u16 src;
502         int rc = 0;
503
504         pr_devel("%s irq=0x%lx", __func__, irq);
505
506         sb = kvmppc_xive_find_source(xive, irq, &src);
507         if (!sb)
508                 return -ENOENT;
509
510         state = &sb->irq_state[src];
511
512         rc = -EINVAL;
513
514         arch_spin_lock(&sb->lock);
515
516         if (state->valid) {
517                 kvmppc_xive_select_irq(state, &hw_num, &xd);
518                 xive_native_sync_source(hw_num);
519                 rc = 0;
520         }
521
522         arch_spin_unlock(&sb->lock);
523         return rc;
524 }
525
526 static int xive_native_validate_queue_size(u32 qshift)
527 {
528         /*
529          * We only support 64K pages for the moment. This is also
530          * advertised in the DT property "ibm,xive-eq-sizes"
531          */
532         switch (qshift) {
533         case 0: /* EQ reset */
534         case 16:
535                 return 0;
536         case 12:
537         case 21:
538         case 24:
539         default:
540                 return -EINVAL;
541         }
542 }
543
544 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
545                                                long eq_idx, u64 addr)
546 {
547         struct kvm *kvm = xive->kvm;
548         struct kvm_vcpu *vcpu;
549         struct kvmppc_xive_vcpu *xc;
550         void __user *ubufp = (void __user *) addr;
551         u32 server;
552         u8 priority;
553         struct kvm_ppc_xive_eq kvm_eq;
554         int rc;
555         __be32 *qaddr = 0;
556         struct page *page;
557         struct xive_q *q;
558         gfn_t gfn;
559         unsigned long page_size;
560         int srcu_idx;
561
562         /*
563          * Demangle priority/server tuple from the EQ identifier
564          */
565         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
566                 KVM_XIVE_EQ_PRIORITY_SHIFT;
567         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
568                 KVM_XIVE_EQ_SERVER_SHIFT;
569
570         if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
571                 return -EFAULT;
572
573         vcpu = kvmppc_xive_find_server(kvm, server);
574         if (!vcpu) {
575                 pr_err("Can't find server %d\n", server);
576                 return -ENOENT;
577         }
578         xc = vcpu->arch.xive_vcpu;
579
580         if (priority != xive_prio_from_guest(priority)) {
581                 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
582                        priority, server);
583                 return -EINVAL;
584         }
585         q = &xc->queues[priority];
586
587         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
588                  __func__, server, priority, kvm_eq.flags,
589                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
590
591         /* reset queue and disable queueing */
592         if (!kvm_eq.qshift) {
593                 q->guest_qaddr  = 0;
594                 q->guest_qshift = 0;
595
596                 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
597                                                         NULL, 0, true);
598                 if (rc) {
599                         pr_err("Failed to reset queue %d for VCPU %d: %d\n",
600                                priority, xc->server_num, rc);
601                         return rc;
602                 }
603
604                 return 0;
605         }
606
607         /*
608          * sPAPR specifies a "Unconditional Notify (n) flag" for the
609          * H_INT_SET_QUEUE_CONFIG hcall which forces notification
610          * without using the coalescing mechanisms provided by the
611          * XIVE END ESBs. This is required on KVM as notification
612          * using the END ESBs is not supported.
613          */
614         if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
615                 pr_err("invalid flags %d\n", kvm_eq.flags);
616                 return -EINVAL;
617         }
618
619         rc = xive_native_validate_queue_size(kvm_eq.qshift);
620         if (rc) {
621                 pr_err("invalid queue size %d\n", kvm_eq.qshift);
622                 return rc;
623         }
624
625         if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
626                 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
627                        1ull << kvm_eq.qshift);
628                 return -EINVAL;
629         }
630
631         srcu_idx = srcu_read_lock(&kvm->srcu);
632         gfn = gpa_to_gfn(kvm_eq.qaddr);
633
634         page_size = kvm_host_page_size(kvm, gfn);
635         if (1ull << kvm_eq.qshift > page_size) {
636                 srcu_read_unlock(&kvm->srcu, srcu_idx);
637                 pr_warn("Incompatible host page size %lx!\n", page_size);
638                 return -EINVAL;
639         }
640
641         page = gfn_to_page(kvm, gfn);
642         if (is_error_page(page)) {
643                 srcu_read_unlock(&kvm->srcu, srcu_idx);
644                 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
645                 return -EINVAL;
646         }
647
648         qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
649         srcu_read_unlock(&kvm->srcu, srcu_idx);
650
651         /*
652          * Backup the queue page guest address to the mark EQ page
653          * dirty for migration.
654          */
655         q->guest_qaddr  = kvm_eq.qaddr;
656         q->guest_qshift = kvm_eq.qshift;
657
658          /*
659           * Unconditional Notification is forced by default at the
660           * OPAL level because the use of END ESBs is not supported by
661           * Linux.
662           */
663         rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
664                                         (__be32 *) qaddr, kvm_eq.qshift, true);
665         if (rc) {
666                 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
667                        priority, xc->server_num, rc);
668                 put_page(page);
669                 return rc;
670         }
671
672         /*
673          * Only restore the queue state when needed. When doing the
674          * H_INT_SET_SOURCE_CONFIG hcall, it should not.
675          */
676         if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
677                 rc = xive_native_set_queue_state(xc->vp_id, priority,
678                                                  kvm_eq.qtoggle,
679                                                  kvm_eq.qindex);
680                 if (rc)
681                         goto error;
682         }
683
684         rc = kvmppc_xive_attach_escalation(vcpu, priority,
685                                            xive->single_escalation);
686 error:
687         if (rc)
688                 kvmppc_xive_native_cleanup_queue(vcpu, priority);
689         return rc;
690 }
691
692 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
693                                                long eq_idx, u64 addr)
694 {
695         struct kvm *kvm = xive->kvm;
696         struct kvm_vcpu *vcpu;
697         struct kvmppc_xive_vcpu *xc;
698         struct xive_q *q;
699         void __user *ubufp = (u64 __user *) addr;
700         u32 server;
701         u8 priority;
702         struct kvm_ppc_xive_eq kvm_eq;
703         u64 qaddr;
704         u64 qshift;
705         u64 qeoi_page;
706         u32 escalate_irq;
707         u64 qflags;
708         int rc;
709
710         /*
711          * Demangle priority/server tuple from the EQ identifier
712          */
713         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
714                 KVM_XIVE_EQ_PRIORITY_SHIFT;
715         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
716                 KVM_XIVE_EQ_SERVER_SHIFT;
717
718         vcpu = kvmppc_xive_find_server(kvm, server);
719         if (!vcpu) {
720                 pr_err("Can't find server %d\n", server);
721                 return -ENOENT;
722         }
723         xc = vcpu->arch.xive_vcpu;
724
725         if (priority != xive_prio_from_guest(priority)) {
726                 pr_err("invalid priority for queue %d for VCPU %d\n",
727                        priority, server);
728                 return -EINVAL;
729         }
730         q = &xc->queues[priority];
731
732         memset(&kvm_eq, 0, sizeof(kvm_eq));
733
734         if (!q->qpage)
735                 return 0;
736
737         rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
738                                         &qeoi_page, &escalate_irq, &qflags);
739         if (rc)
740                 return rc;
741
742         kvm_eq.flags = 0;
743         if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
744                 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
745
746         kvm_eq.qshift = q->guest_qshift;
747         kvm_eq.qaddr  = q->guest_qaddr;
748
749         rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
750                                          &kvm_eq.qindex);
751         if (rc)
752                 return rc;
753
754         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
755                  __func__, server, priority, kvm_eq.flags,
756                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
757
758         if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
759                 return -EFAULT;
760
761         return 0;
762 }
763
764 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
765 {
766         int i;
767
768         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
769                 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
770
771                 if (!state->valid)
772                         continue;
773
774                 if (state->act_priority == MASKED)
775                         continue;
776
777                 state->eisn = 0;
778                 state->act_server = 0;
779                 state->act_priority = MASKED;
780                 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
781                 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
782                 if (state->pt_number) {
783                         xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
784                         xive_native_configure_irq(state->pt_number,
785                                                   0, MASKED, 0);
786                 }
787         }
788 }
789
790 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
791 {
792         struct kvm *kvm = xive->kvm;
793         struct kvm_vcpu *vcpu;
794         unsigned int i;
795
796         pr_devel("%s\n", __func__);
797
798         mutex_lock(&xive->lock);
799
800         kvm_for_each_vcpu(i, vcpu, kvm) {
801                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
802                 unsigned int prio;
803
804                 if (!xc)
805                         continue;
806
807                 kvmppc_xive_disable_vcpu_interrupts(vcpu);
808
809                 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
810
811                         /* Single escalation, no queue 7 */
812                         if (prio == 7 && xive->single_escalation)
813                                 break;
814
815                         if (xc->esc_virq[prio]) {
816                                 free_irq(xc->esc_virq[prio], vcpu);
817                                 irq_dispose_mapping(xc->esc_virq[prio]);
818                                 kfree(xc->esc_virq_names[prio]);
819                                 xc->esc_virq[prio] = 0;
820                         }
821
822                         kvmppc_xive_native_cleanup_queue(vcpu, prio);
823                 }
824         }
825
826         for (i = 0; i <= xive->max_sbid; i++) {
827                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
828
829                 if (sb) {
830                         arch_spin_lock(&sb->lock);
831                         kvmppc_xive_reset_sources(sb);
832                         arch_spin_unlock(&sb->lock);
833                 }
834         }
835
836         mutex_unlock(&xive->lock);
837
838         return 0;
839 }
840
841 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
842 {
843         int j;
844
845         for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
846                 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
847                 struct xive_irq_data *xd;
848                 u32 hw_num;
849
850                 if (!state->valid)
851                         continue;
852
853                 /*
854                  * The struct kvmppc_xive_irq_state reflects the state
855                  * of the EAS configuration and not the state of the
856                  * source. The source is masked setting the PQ bits to
857                  * '-Q', which is what is being done before calling
858                  * the KVM_DEV_XIVE_EQ_SYNC control.
859                  *
860                  * If a source EAS is configured, OPAL syncs the XIVE
861                  * IC of the source and the XIVE IC of the previous
862                  * target if any.
863                  *
864                  * So it should be fine ignoring MASKED sources as
865                  * they have been synced already.
866                  */
867                 if (state->act_priority == MASKED)
868                         continue;
869
870                 kvmppc_xive_select_irq(state, &hw_num, &xd);
871                 xive_native_sync_source(hw_num);
872                 xive_native_sync_queue(hw_num);
873         }
874 }
875
876 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
877 {
878         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
879         unsigned int prio;
880         int srcu_idx;
881
882         if (!xc)
883                 return -ENOENT;
884
885         for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
886                 struct xive_q *q = &xc->queues[prio];
887
888                 if (!q->qpage)
889                         continue;
890
891                 /* Mark EQ page dirty for migration */
892                 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
893                 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
894                 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
895         }
896         return 0;
897 }
898
899 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
900 {
901         struct kvm *kvm = xive->kvm;
902         struct kvm_vcpu *vcpu;
903         unsigned int i;
904
905         pr_devel("%s\n", __func__);
906
907         mutex_lock(&xive->lock);
908         for (i = 0; i <= xive->max_sbid; i++) {
909                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
910
911                 if (sb) {
912                         arch_spin_lock(&sb->lock);
913                         kvmppc_xive_native_sync_sources(sb);
914                         arch_spin_unlock(&sb->lock);
915                 }
916         }
917
918         kvm_for_each_vcpu(i, vcpu, kvm) {
919                 kvmppc_xive_native_vcpu_eq_sync(vcpu);
920         }
921         mutex_unlock(&xive->lock);
922
923         return 0;
924 }
925
926 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
927                                        struct kvm_device_attr *attr)
928 {
929         struct kvmppc_xive *xive = dev->private;
930
931         switch (attr->group) {
932         case KVM_DEV_XIVE_GRP_CTRL:
933                 switch (attr->attr) {
934                 case KVM_DEV_XIVE_RESET:
935                         return kvmppc_xive_reset(xive);
936                 case KVM_DEV_XIVE_EQ_SYNC:
937                         return kvmppc_xive_native_eq_sync(xive);
938                 case KVM_DEV_XIVE_NR_SERVERS:
939                         return kvmppc_xive_set_nr_servers(xive, attr->addr);
940                 }
941                 break;
942         case KVM_DEV_XIVE_GRP_SOURCE:
943                 return kvmppc_xive_native_set_source(xive, attr->attr,
944                                                      attr->addr);
945         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
946                 return kvmppc_xive_native_set_source_config(xive, attr->attr,
947                                                             attr->addr);
948         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
949                 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
950                                                            attr->addr);
951         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
952                 return kvmppc_xive_native_sync_source(xive, attr->attr,
953                                                       attr->addr);
954         }
955         return -ENXIO;
956 }
957
958 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
959                                        struct kvm_device_attr *attr)
960 {
961         struct kvmppc_xive *xive = dev->private;
962
963         switch (attr->group) {
964         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
965                 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
966                                                            attr->addr);
967         }
968         return -ENXIO;
969 }
970
971 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
972                                        struct kvm_device_attr *attr)
973 {
974         switch (attr->group) {
975         case KVM_DEV_XIVE_GRP_CTRL:
976                 switch (attr->attr) {
977                 case KVM_DEV_XIVE_RESET:
978                 case KVM_DEV_XIVE_EQ_SYNC:
979                 case KVM_DEV_XIVE_NR_SERVERS:
980                         return 0;
981                 }
982                 break;
983         case KVM_DEV_XIVE_GRP_SOURCE:
984         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
985         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
986                 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
987                     attr->attr < KVMPPC_XIVE_NR_IRQS)
988                         return 0;
989                 break;
990         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
991                 return 0;
992         }
993         return -ENXIO;
994 }
995
996 /*
997  * Called when device fd is closed.  kvm->lock is held.
998  */
999 static void kvmppc_xive_native_release(struct kvm_device *dev)
1000 {
1001         struct kvmppc_xive *xive = dev->private;
1002         struct kvm *kvm = xive->kvm;
1003         struct kvm_vcpu *vcpu;
1004         int i;
1005
1006         pr_devel("Releasing xive native device\n");
1007
1008         /*
1009          * Clear the KVM device file address_space which is used to
1010          * unmap the ESB pages when a device is passed-through.
1011          */
1012         mutex_lock(&xive->mapping_lock);
1013         xive->mapping = NULL;
1014         mutex_unlock(&xive->mapping_lock);
1015
1016         /*
1017          * Since this is the device release function, we know that
1018          * userspace does not have any open fd or mmap referring to
1019          * the device.  Therefore there can not be any of the
1020          * device attribute set/get, mmap, or page fault functions
1021          * being executed concurrently, and similarly, the
1022          * connect_vcpu and set/clr_mapped functions also cannot
1023          * be being executed.
1024          */
1025
1026         debugfs_remove(xive->dentry);
1027
1028         /*
1029          * We should clean up the vCPU interrupt presenters first.
1030          */
1031         kvm_for_each_vcpu(i, vcpu, kvm) {
1032                 /*
1033                  * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1034                  * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1035                  * Holding the vcpu->mutex also means that the vcpu cannot
1036                  * be executing the KVM_RUN ioctl, and therefore it cannot
1037                  * be executing the XIVE push or pull code or accessing
1038                  * the XIVE MMIO regions.
1039                  */
1040                 mutex_lock(&vcpu->mutex);
1041                 kvmppc_xive_native_cleanup_vcpu(vcpu);
1042                 mutex_unlock(&vcpu->mutex);
1043         }
1044
1045         /*
1046          * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1047          * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1048          * against xive code getting called during vcpu execution or
1049          * set/get one_reg operations.
1050          */
1051         kvm->arch.xive = NULL;
1052
1053         for (i = 0; i <= xive->max_sbid; i++) {
1054                 if (xive->src_blocks[i])
1055                         kvmppc_xive_free_sources(xive->src_blocks[i]);
1056                 kfree(xive->src_blocks[i]);
1057                 xive->src_blocks[i] = NULL;
1058         }
1059
1060         if (xive->vp_base != XIVE_INVALID_VP)
1061                 xive_native_free_vp_block(xive->vp_base);
1062
1063         /*
1064          * A reference of the kvmppc_xive pointer is now kept under
1065          * the xive_devices struct of the machine for reuse. It is
1066          * freed when the VM is destroyed for now until we fix all the
1067          * execution paths.
1068          */
1069
1070         kfree(dev);
1071 }
1072
1073 /*
1074  * Create a XIVE device.  kvm->lock is held.
1075  */
1076 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1077 {
1078         struct kvmppc_xive *xive;
1079         struct kvm *kvm = dev->kvm;
1080
1081         pr_devel("Creating xive native device\n");
1082
1083         if (kvm->arch.xive)
1084                 return -EEXIST;
1085
1086         xive = kvmppc_xive_get_device(kvm, type);
1087         if (!xive)
1088                 return -ENOMEM;
1089
1090         dev->private = xive;
1091         xive->dev = dev;
1092         xive->kvm = kvm;
1093         mutex_init(&xive->mapping_lock);
1094         mutex_init(&xive->lock);
1095
1096         /* VP allocation is delayed to the first call to connect_vcpu */
1097         xive->vp_base = XIVE_INVALID_VP;
1098         /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1099          * on a POWER9 system.
1100          */
1101         xive->nr_servers = KVM_MAX_VCPUS;
1102
1103         xive->single_escalation = xive_native_has_single_escalation();
1104         xive->ops = &kvmppc_xive_native_ops;
1105
1106         kvm->arch.xive = xive;
1107         return 0;
1108 }
1109
1110 /*
1111  * Interrupt Pending Buffer (IPB) offset
1112  */
1113 #define TM_IPB_SHIFT 40
1114 #define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1115
1116 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1117 {
1118         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1119         u64 opal_state;
1120         int rc;
1121
1122         if (!kvmppc_xive_enabled(vcpu))
1123                 return -EPERM;
1124
1125         if (!xc)
1126                 return -ENOENT;
1127
1128         /* Thread context registers. We only care about IPB and CPPR */
1129         val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1130
1131         /* Get the VP state from OPAL */
1132         rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1133         if (rc)
1134                 return rc;
1135
1136         /*
1137          * Capture the backup of IPB register in the NVT structure and
1138          * merge it in our KVM VP state.
1139          */
1140         val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1141
1142         pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1143                  __func__,
1144                  vcpu->arch.xive_saved_state.nsr,
1145                  vcpu->arch.xive_saved_state.cppr,
1146                  vcpu->arch.xive_saved_state.ipb,
1147                  vcpu->arch.xive_saved_state.pipr,
1148                  vcpu->arch.xive_saved_state.w01,
1149                  (u32) vcpu->arch.xive_cam_word, opal_state);
1150
1151         return 0;
1152 }
1153
1154 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1155 {
1156         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1157         struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1158
1159         pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1160                  val->xive_timaval[0], val->xive_timaval[1]);
1161
1162         if (!kvmppc_xive_enabled(vcpu))
1163                 return -EPERM;
1164
1165         if (!xc || !xive)
1166                 return -ENOENT;
1167
1168         /* We can't update the state of a "pushed" VCPU  */
1169         if (WARN_ON(vcpu->arch.xive_pushed))
1170                 return -EBUSY;
1171
1172         /*
1173          * Restore the thread context registers. IPB and CPPR should
1174          * be the only ones that matter.
1175          */
1176         vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1177
1178         /*
1179          * There is no need to restore the XIVE internal state (IPB
1180          * stored in the NVT) as the IPB register was merged in KVM VP
1181          * state when captured.
1182          */
1183         return 0;
1184 }
1185
1186 bool kvmppc_xive_native_supported(void)
1187 {
1188         return xive_native_has_queue_state_support();
1189 }
1190
1191 static int xive_native_debug_show(struct seq_file *m, void *private)
1192 {
1193         struct kvmppc_xive *xive = m->private;
1194         struct kvm *kvm = xive->kvm;
1195         struct kvm_vcpu *vcpu;
1196         unsigned int i;
1197
1198         if (!kvm)
1199                 return 0;
1200
1201         seq_puts(m, "=========\nVCPU state\n=========\n");
1202
1203         kvm_for_each_vcpu(i, vcpu, kvm) {
1204                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1205
1206                 if (!xc)
1207                         continue;
1208
1209                 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1210                            xc->server_num, xc->vp_id,
1211                            vcpu->arch.xive_saved_state.nsr,
1212                            vcpu->arch.xive_saved_state.cppr,
1213                            vcpu->arch.xive_saved_state.ipb,
1214                            vcpu->arch.xive_saved_state.pipr,
1215                            vcpu->arch.xive_saved_state.w01,
1216                            (u32) vcpu->arch.xive_cam_word);
1217
1218                 kvmppc_xive_debug_show_queues(m, vcpu);
1219         }
1220
1221         return 0;
1222 }
1223
1224 static int xive_native_debug_open(struct inode *inode, struct file *file)
1225 {
1226         return single_open(file, xive_native_debug_show, inode->i_private);
1227 }
1228
1229 static const struct file_operations xive_native_debug_fops = {
1230         .open = xive_native_debug_open,
1231         .read = seq_read,
1232         .llseek = seq_lseek,
1233         .release = single_release,
1234 };
1235
1236 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1237 {
1238         char *name;
1239
1240         name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1241         if (!name) {
1242                 pr_err("%s: no memory for name\n", __func__);
1243                 return;
1244         }
1245
1246         xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1247                                            xive, &xive_native_debug_fops);
1248
1249         pr_debug("%s: created %s\n", __func__, name);
1250         kfree(name);
1251 }
1252
1253 static void kvmppc_xive_native_init(struct kvm_device *dev)
1254 {
1255         struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1256
1257         /* Register some debug interfaces */
1258         xive_native_debugfs_init(xive);
1259 }
1260
1261 struct kvm_device_ops kvm_xive_native_ops = {
1262         .name = "kvm-xive-native",
1263         .create = kvmppc_xive_native_create,
1264         .init = kvmppc_xive_native_init,
1265         .release = kvmppc_xive_native_release,
1266         .set_attr = kvmppc_xive_native_set_attr,
1267         .get_attr = kvmppc_xive_native_get_attr,
1268         .has_attr = kvmppc_xive_native_has_attr,
1269         .mmap = kvmppc_xive_native_mmap,
1270 };
1271
1272 void kvmppc_xive_native_init_module(void)
1273 {
1274         ;
1275 }
1276
1277 void kvmppc_xive_native_exit_module(void)
1278 {
1279         ;
1280 }