Merge branches 'acpi-scan', 'acpi-resource', 'acpi-apei', 'acpi-extlog' and 'acpi...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2  * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Kevin Tian <kevin.tian@intel.com>
27  *    Jike Song <jike.song@intel.com>
28  *    Xiaoguang Chen <xiaoguang.chen@intel.com>
29  *    Eddie Dong <eddie.dong@intel.com>
30  *
31  * Contributors:
32  *    Niu Bing <bing.niu@intel.com>
33  *    Zhi Wang <zhi.a.wang@intel.com>
34  */
35
36 #include <linux/init.h>
37 #include <linux/mm.h>
38 #include <linux/kthread.h>
39 #include <linux/sched/mm.h>
40 #include <linux/types.h>
41 #include <linux/list.h>
42 #include <linux/rbtree.h>
43 #include <linux/spinlock.h>
44 #include <linux/eventfd.h>
45 #include <linux/mdev.h>
46 #include <linux/debugfs.h>
47
48 #include <linux/nospec.h>
49
50 #include <drm/drm_edid.h>
51
52 #include "i915_drv.h"
53 #include "intel_gvt.h"
54 #include "gvt.h"
55
56 MODULE_IMPORT_NS(DMA_BUF);
57 MODULE_IMPORT_NS(I915_GVT);
58
59 /* helper macros copied from vfio-pci */
60 #define VFIO_PCI_OFFSET_SHIFT   40
61 #define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
62 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
63 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
64
65 #define EDID_BLOB_OFFSET (PAGE_SIZE/2)
66
67 #define OPREGION_SIGNATURE "IntelGraphicsMem"
68
69 struct vfio_region;
70 struct intel_vgpu_regops {
71         size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
72                         size_t count, loff_t *ppos, bool iswrite);
73         void (*release)(struct intel_vgpu *vgpu,
74                         struct vfio_region *region);
75 };
76
77 struct vfio_region {
78         u32                             type;
79         u32                             subtype;
80         size_t                          size;
81         u32                             flags;
82         const struct intel_vgpu_regops  *ops;
83         void                            *data;
84 };
85
86 struct vfio_edid_region {
87         struct vfio_region_gfx_edid vfio_edid_regs;
88         void *edid_blob;
89 };
90
91 struct kvmgt_pgfn {
92         gfn_t gfn;
93         struct hlist_node hnode;
94 };
95
96 struct gvt_dma {
97         struct intel_vgpu *vgpu;
98         struct rb_node gfn_node;
99         struct rb_node dma_addr_node;
100         gfn_t gfn;
101         dma_addr_t dma_addr;
102         unsigned long size;
103         struct kref ref;
104 };
105
106 #define vfio_dev_to_vgpu(vfio_dev) \
107         container_of((vfio_dev), struct intel_vgpu, vfio_device)
108
109 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
110                 const u8 *val, int len,
111                 struct kvm_page_track_notifier_node *node);
112 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
113                 struct kvm_memory_slot *slot,
114                 struct kvm_page_track_notifier_node *node);
115
116 static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
117 {
118         struct intel_vgpu_type *type =
119                 container_of(mtype, struct intel_vgpu_type, type);
120
121         return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
122                        "fence: %d\nresolution: %s\n"
123                        "weight: %d\n",
124                        BYTES_TO_MB(type->conf->low_mm),
125                        BYTES_TO_MB(type->conf->high_mm),
126                        type->conf->fence, vgpu_edid_str(type->conf->edid),
127                        type->conf->weight);
128 }
129
130 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
131                 unsigned long size)
132 {
133         vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
134                          DIV_ROUND_UP(size, PAGE_SIZE));
135 }
136
137 /* Pin a normal or compound guest page for dma. */
138 static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
139                 unsigned long size, struct page **page)
140 {
141         int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
142         struct page *base_page = NULL;
143         int npage;
144         int ret;
145
146         /*
147          * We pin the pages one-by-one to avoid allocating a big arrary
148          * on stack to hold pfns.
149          */
150         for (npage = 0; npage < total_pages; npage++) {
151                 dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
152                 struct page *cur_page;
153
154                 ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
155                                      IOMMU_READ | IOMMU_WRITE, &cur_page);
156                 if (ret != 1) {
157                         gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
158                                      &cur_iova, ret);
159                         goto err;
160                 }
161
162                 if (npage == 0)
163                         base_page = cur_page;
164                 else if (base_page + npage != cur_page) {
165                         gvt_vgpu_err("The pages are not continuous\n");
166                         ret = -EINVAL;
167                         npage++;
168                         goto err;
169                 }
170         }
171
172         *page = base_page;
173         return 0;
174 err:
175         gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
176         return ret;
177 }
178
179 static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
180                 dma_addr_t *dma_addr, unsigned long size)
181 {
182         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
183         struct page *page = NULL;
184         int ret;
185
186         ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
187         if (ret)
188                 return ret;
189
190         /* Setup DMA mapping. */
191         *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL);
192         if (dma_mapping_error(dev, *dma_addr)) {
193                 gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
194                              page_to_pfn(page), ret);
195                 gvt_unpin_guest_page(vgpu, gfn, size);
196                 return -ENOMEM;
197         }
198
199         return 0;
200 }
201
202 static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
203                 dma_addr_t dma_addr, unsigned long size)
204 {
205         struct device *dev = vgpu->gvt->gt->i915->drm.dev;
206
207         dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL);
208         gvt_unpin_guest_page(vgpu, gfn, size);
209 }
210
211 static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
212                 dma_addr_t dma_addr)
213 {
214         struct rb_node *node = vgpu->dma_addr_cache.rb_node;
215         struct gvt_dma *itr;
216
217         while (node) {
218                 itr = rb_entry(node, struct gvt_dma, dma_addr_node);
219
220                 if (dma_addr < itr->dma_addr)
221                         node = node->rb_left;
222                 else if (dma_addr > itr->dma_addr)
223                         node = node->rb_right;
224                 else
225                         return itr;
226         }
227         return NULL;
228 }
229
230 static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
231 {
232         struct rb_node *node = vgpu->gfn_cache.rb_node;
233         struct gvt_dma *itr;
234
235         while (node) {
236                 itr = rb_entry(node, struct gvt_dma, gfn_node);
237
238                 if (gfn < itr->gfn)
239                         node = node->rb_left;
240                 else if (gfn > itr->gfn)
241                         node = node->rb_right;
242                 else
243                         return itr;
244         }
245         return NULL;
246 }
247
248 static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
249                 dma_addr_t dma_addr, unsigned long size)
250 {
251         struct gvt_dma *new, *itr;
252         struct rb_node **link, *parent = NULL;
253
254         new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
255         if (!new)
256                 return -ENOMEM;
257
258         new->vgpu = vgpu;
259         new->gfn = gfn;
260         new->dma_addr = dma_addr;
261         new->size = size;
262         kref_init(&new->ref);
263
264         /* gfn_cache maps gfn to struct gvt_dma. */
265         link = &vgpu->gfn_cache.rb_node;
266         while (*link) {
267                 parent = *link;
268                 itr = rb_entry(parent, struct gvt_dma, gfn_node);
269
270                 if (gfn < itr->gfn)
271                         link = &parent->rb_left;
272                 else
273                         link = &parent->rb_right;
274         }
275         rb_link_node(&new->gfn_node, parent, link);
276         rb_insert_color(&new->gfn_node, &vgpu->gfn_cache);
277
278         /* dma_addr_cache maps dma addr to struct gvt_dma. */
279         parent = NULL;
280         link = &vgpu->dma_addr_cache.rb_node;
281         while (*link) {
282                 parent = *link;
283                 itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
284
285                 if (dma_addr < itr->dma_addr)
286                         link = &parent->rb_left;
287                 else
288                         link = &parent->rb_right;
289         }
290         rb_link_node(&new->dma_addr_node, parent, link);
291         rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache);
292
293         vgpu->nr_cache_entries++;
294         return 0;
295 }
296
297 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
298                                 struct gvt_dma *entry)
299 {
300         rb_erase(&entry->gfn_node, &vgpu->gfn_cache);
301         rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache);
302         kfree(entry);
303         vgpu->nr_cache_entries--;
304 }
305
306 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
307 {
308         struct gvt_dma *dma;
309         struct rb_node *node = NULL;
310
311         for (;;) {
312                 mutex_lock(&vgpu->cache_lock);
313                 node = rb_first(&vgpu->gfn_cache);
314                 if (!node) {
315                         mutex_unlock(&vgpu->cache_lock);
316                         break;
317                 }
318                 dma = rb_entry(node, struct gvt_dma, gfn_node);
319                 gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
320                 __gvt_cache_remove_entry(vgpu, dma);
321                 mutex_unlock(&vgpu->cache_lock);
322         }
323 }
324
325 static void gvt_cache_init(struct intel_vgpu *vgpu)
326 {
327         vgpu->gfn_cache = RB_ROOT;
328         vgpu->dma_addr_cache = RB_ROOT;
329         vgpu->nr_cache_entries = 0;
330         mutex_init(&vgpu->cache_lock);
331 }
332
333 static void kvmgt_protect_table_init(struct intel_vgpu *info)
334 {
335         hash_init(info->ptable);
336 }
337
338 static void kvmgt_protect_table_destroy(struct intel_vgpu *info)
339 {
340         struct kvmgt_pgfn *p;
341         struct hlist_node *tmp;
342         int i;
343
344         hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
345                 hash_del(&p->hnode);
346                 kfree(p);
347         }
348 }
349
350 static struct kvmgt_pgfn *
351 __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
352 {
353         struct kvmgt_pgfn *p, *res = NULL;
354
355         hash_for_each_possible(info->ptable, p, hnode, gfn) {
356                 if (gfn == p->gfn) {
357                         res = p;
358                         break;
359                 }
360         }
361
362         return res;
363 }
364
365 static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn)
366 {
367         struct kvmgt_pgfn *p;
368
369         p = __kvmgt_protect_table_find(info, gfn);
370         return !!p;
371 }
372
373 static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn)
374 {
375         struct kvmgt_pgfn *p;
376
377         if (kvmgt_gfn_is_write_protected(info, gfn))
378                 return;
379
380         p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
381         if (WARN(!p, "gfn: 0x%llx\n", gfn))
382                 return;
383
384         p->gfn = gfn;
385         hash_add(info->ptable, &p->hnode, gfn);
386 }
387
388 static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn)
389 {
390         struct kvmgt_pgfn *p;
391
392         p = __kvmgt_protect_table_find(info, gfn);
393         if (p) {
394                 hash_del(&p->hnode);
395                 kfree(p);
396         }
397 }
398
399 static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
400                 size_t count, loff_t *ppos, bool iswrite)
401 {
402         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
403                         VFIO_PCI_NUM_REGIONS;
404         void *base = vgpu->region[i].data;
405         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
406
407
408         if (pos >= vgpu->region[i].size || iswrite) {
409                 gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
410                 return -EINVAL;
411         }
412         count = min(count, (size_t)(vgpu->region[i].size - pos));
413         memcpy(buf, base + pos, count);
414
415         return count;
416 }
417
418 static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
419                 struct vfio_region *region)
420 {
421 }
422
423 static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
424         .rw = intel_vgpu_reg_rw_opregion,
425         .release = intel_vgpu_reg_release_opregion,
426 };
427
428 static int handle_edid_regs(struct intel_vgpu *vgpu,
429                         struct vfio_edid_region *region, char *buf,
430                         size_t count, u16 offset, bool is_write)
431 {
432         struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
433         unsigned int data;
434
435         if (offset + count > sizeof(*regs))
436                 return -EINVAL;
437
438         if (count != 4)
439                 return -EINVAL;
440
441         if (is_write) {
442                 data = *((unsigned int *)buf);
443                 switch (offset) {
444                 case offsetof(struct vfio_region_gfx_edid, link_state):
445                         if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
446                                 if (!drm_edid_block_valid(
447                                         (u8 *)region->edid_blob,
448                                         0,
449                                         true,
450                                         NULL)) {
451                                         gvt_vgpu_err("invalid EDID blob\n");
452                                         return -EINVAL;
453                                 }
454                                 intel_vgpu_emulate_hotplug(vgpu, true);
455                         } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
456                                 intel_vgpu_emulate_hotplug(vgpu, false);
457                         else {
458                                 gvt_vgpu_err("invalid EDID link state %d\n",
459                                         regs->link_state);
460                                 return -EINVAL;
461                         }
462                         regs->link_state = data;
463                         break;
464                 case offsetof(struct vfio_region_gfx_edid, edid_size):
465                         if (data > regs->edid_max_size) {
466                                 gvt_vgpu_err("EDID size is bigger than %d!\n",
467                                         regs->edid_max_size);
468                                 return -EINVAL;
469                         }
470                         regs->edid_size = data;
471                         break;
472                 default:
473                         /* read-only regs */
474                         gvt_vgpu_err("write read-only EDID region at offset %d\n",
475                                 offset);
476                         return -EPERM;
477                 }
478         } else {
479                 memcpy(buf, (char *)regs + offset, count);
480         }
481
482         return count;
483 }
484
485 static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
486                         size_t count, u16 offset, bool is_write)
487 {
488         if (offset + count > region->vfio_edid_regs.edid_size)
489                 return -EINVAL;
490
491         if (is_write)
492                 memcpy(region->edid_blob + offset, buf, count);
493         else
494                 memcpy(buf, region->edid_blob + offset, count);
495
496         return count;
497 }
498
499 static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
500                 size_t count, loff_t *ppos, bool iswrite)
501 {
502         int ret;
503         unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
504                         VFIO_PCI_NUM_REGIONS;
505         struct vfio_edid_region *region = vgpu->region[i].data;
506         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
507
508         if (pos < region->vfio_edid_regs.edid_offset) {
509                 ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
510         } else {
511                 pos -= EDID_BLOB_OFFSET;
512                 ret = handle_edid_blob(region, buf, count, pos, iswrite);
513         }
514
515         if (ret < 0)
516                 gvt_vgpu_err("failed to access EDID region\n");
517
518         return ret;
519 }
520
521 static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
522                                         struct vfio_region *region)
523 {
524         kfree(region->data);
525 }
526
527 static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
528         .rw = intel_vgpu_reg_rw_edid,
529         .release = intel_vgpu_reg_release_edid,
530 };
531
532 static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
533                 unsigned int type, unsigned int subtype,
534                 const struct intel_vgpu_regops *ops,
535                 size_t size, u32 flags, void *data)
536 {
537         struct vfio_region *region;
538
539         region = krealloc(vgpu->region,
540                         (vgpu->num_regions + 1) * sizeof(*region),
541                         GFP_KERNEL);
542         if (!region)
543                 return -ENOMEM;
544
545         vgpu->region = region;
546         vgpu->region[vgpu->num_regions].type = type;
547         vgpu->region[vgpu->num_regions].subtype = subtype;
548         vgpu->region[vgpu->num_regions].ops = ops;
549         vgpu->region[vgpu->num_regions].size = size;
550         vgpu->region[vgpu->num_regions].flags = flags;
551         vgpu->region[vgpu->num_regions].data = data;
552         vgpu->num_regions++;
553         return 0;
554 }
555
556 int intel_gvt_set_opregion(struct intel_vgpu *vgpu)
557 {
558         void *base;
559         int ret;
560
561         /* Each vgpu has its own opregion, although VFIO would create another
562          * one later. This one is used to expose opregion to VFIO. And the
563          * other one created by VFIO later, is used by guest actually.
564          */
565         base = vgpu_opregion(vgpu)->va;
566         if (!base)
567                 return -ENOMEM;
568
569         if (memcmp(base, OPREGION_SIGNATURE, 16)) {
570                 memunmap(base);
571                 return -EINVAL;
572         }
573
574         ret = intel_vgpu_register_reg(vgpu,
575                         PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
576                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
577                         &intel_vgpu_regops_opregion, OPREGION_SIZE,
578                         VFIO_REGION_INFO_FLAG_READ, base);
579
580         return ret;
581 }
582
583 int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
584 {
585         struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
586         struct vfio_edid_region *base;
587         int ret;
588
589         base = kzalloc(sizeof(*base), GFP_KERNEL);
590         if (!base)
591                 return -ENOMEM;
592
593         /* TODO: Add multi-port and EDID extension block support */
594         base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
595         base->vfio_edid_regs.edid_max_size = EDID_SIZE;
596         base->vfio_edid_regs.edid_size = EDID_SIZE;
597         base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
598         base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
599         base->edid_blob = port->edid->edid_block;
600
601         ret = intel_vgpu_register_reg(vgpu,
602                         VFIO_REGION_TYPE_GFX,
603                         VFIO_REGION_SUBTYPE_GFX_EDID,
604                         &intel_vgpu_regops_edid, EDID_SIZE,
605                         VFIO_REGION_INFO_FLAG_READ |
606                         VFIO_REGION_INFO_FLAG_WRITE |
607                         VFIO_REGION_INFO_FLAG_CAPS, base);
608
609         return ret;
610 }
611
612 static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
613                                  u64 length)
614 {
615         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
616         struct gvt_dma *entry;
617         u64 iov_pfn = iova >> PAGE_SHIFT;
618         u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
619
620         mutex_lock(&vgpu->cache_lock);
621         for (; iov_pfn < end_iov_pfn; iov_pfn++) {
622                 entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
623                 if (!entry)
624                         continue;
625
626                 gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
627                                    entry->size);
628                 __gvt_cache_remove_entry(vgpu, entry);
629         }
630         mutex_unlock(&vgpu->cache_lock);
631 }
632
633 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
634 {
635         struct intel_vgpu *itr;
636         int id;
637         bool ret = false;
638
639         mutex_lock(&vgpu->gvt->lock);
640         for_each_active_vgpu(vgpu->gvt, itr, id) {
641                 if (!itr->attached)
642                         continue;
643
644                 if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
645                         ret = true;
646                         goto out;
647                 }
648         }
649 out:
650         mutex_unlock(&vgpu->gvt->lock);
651         return ret;
652 }
653
654 static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
655 {
656         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
657
658         if (vgpu->attached)
659                 return -EEXIST;
660
661         if (!vgpu->vfio_device.kvm ||
662             vgpu->vfio_device.kvm->mm != current->mm) {
663                 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
664                 return -ESRCH;
665         }
666
667         kvm_get_kvm(vgpu->vfio_device.kvm);
668
669         if (__kvmgt_vgpu_exist(vgpu))
670                 return -EEXIST;
671
672         vgpu->attached = true;
673
674         kvmgt_protect_table_init(vgpu);
675         gvt_cache_init(vgpu);
676
677         vgpu->track_node.track_write = kvmgt_page_track_write;
678         vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
679         kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
680                                          &vgpu->track_node);
681
682         debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
683                              &vgpu->nr_cache_entries);
684
685         intel_gvt_activate_vgpu(vgpu);
686
687         atomic_set(&vgpu->released, 0);
688         return 0;
689 }
690
691 static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
692 {
693         struct eventfd_ctx *trigger;
694
695         trigger = vgpu->msi_trigger;
696         if (trigger) {
697                 eventfd_ctx_put(trigger);
698                 vgpu->msi_trigger = NULL;
699         }
700 }
701
702 static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
703 {
704         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
705
706         if (!vgpu->attached)
707                 return;
708
709         if (atomic_cmpxchg(&vgpu->released, 0, 1))
710                 return;
711
712         intel_gvt_release_vgpu(vgpu);
713
714         debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
715
716         kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
717                                            &vgpu->track_node);
718         kvmgt_protect_table_destroy(vgpu);
719         gvt_cache_destroy(vgpu);
720
721         intel_vgpu_release_msi_eventfd_ctx(vgpu);
722
723         vgpu->attached = false;
724
725         if (vgpu->vfio_device.kvm)
726                 kvm_put_kvm(vgpu->vfio_device.kvm);
727 }
728
729 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
730 {
731         u32 start_lo, start_hi;
732         u32 mem_type;
733
734         start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
735                         PCI_BASE_ADDRESS_MEM_MASK;
736         mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
737                         PCI_BASE_ADDRESS_MEM_TYPE_MASK;
738
739         switch (mem_type) {
740         case PCI_BASE_ADDRESS_MEM_TYPE_64:
741                 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
742                                                 + bar + 4));
743                 break;
744         case PCI_BASE_ADDRESS_MEM_TYPE_32:
745         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
746                 /* 1M mem BAR treated as 32-bit BAR */
747         default:
748                 /* mem unknown type treated as 32-bit BAR */
749                 start_hi = 0;
750                 break;
751         }
752
753         return ((u64)start_hi << 32) | start_lo;
754 }
755
756 static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
757                              void *buf, unsigned int count, bool is_write)
758 {
759         u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
760         int ret;
761
762         if (is_write)
763                 ret = intel_vgpu_emulate_mmio_write(vgpu,
764                                         bar_start + off, buf, count);
765         else
766                 ret = intel_vgpu_emulate_mmio_read(vgpu,
767                                         bar_start + off, buf, count);
768         return ret;
769 }
770
771 static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
772 {
773         return off >= vgpu_aperture_offset(vgpu) &&
774                off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
775 }
776
777 static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
778                 void *buf, unsigned long count, bool is_write)
779 {
780         void __iomem *aperture_va;
781
782         if (!intel_vgpu_in_aperture(vgpu, off) ||
783             !intel_vgpu_in_aperture(vgpu, off + count)) {
784                 gvt_vgpu_err("Invalid aperture offset %llu\n", off);
785                 return -EINVAL;
786         }
787
788         aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
789                                         ALIGN_DOWN(off, PAGE_SIZE),
790                                         count + offset_in_page(off));
791         if (!aperture_va)
792                 return -EIO;
793
794         if (is_write)
795                 memcpy_toio(aperture_va + offset_in_page(off), buf, count);
796         else
797                 memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
798
799         io_mapping_unmap(aperture_va);
800
801         return 0;
802 }
803
804 static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf,
805                         size_t count, loff_t *ppos, bool is_write)
806 {
807         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
808         u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
809         int ret = -EINVAL;
810
811
812         if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) {
813                 gvt_vgpu_err("invalid index: %u\n", index);
814                 return -EINVAL;
815         }
816
817         switch (index) {
818         case VFIO_PCI_CONFIG_REGION_INDEX:
819                 if (is_write)
820                         ret = intel_vgpu_emulate_cfg_write(vgpu, pos,
821                                                 buf, count);
822                 else
823                         ret = intel_vgpu_emulate_cfg_read(vgpu, pos,
824                                                 buf, count);
825                 break;
826         case VFIO_PCI_BAR0_REGION_INDEX:
827                 ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
828                                         buf, count, is_write);
829                 break;
830         case VFIO_PCI_BAR2_REGION_INDEX:
831                 ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
832                 break;
833         case VFIO_PCI_BAR1_REGION_INDEX:
834         case VFIO_PCI_BAR3_REGION_INDEX:
835         case VFIO_PCI_BAR4_REGION_INDEX:
836         case VFIO_PCI_BAR5_REGION_INDEX:
837         case VFIO_PCI_VGA_REGION_INDEX:
838         case VFIO_PCI_ROM_REGION_INDEX:
839                 break;
840         default:
841                 if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions)
842                         return -EINVAL;
843
844                 index -= VFIO_PCI_NUM_REGIONS;
845                 return vgpu->region[index].ops->rw(vgpu, buf, count,
846                                 ppos, is_write);
847         }
848
849         return ret == 0 ? count : ret;
850 }
851
852 static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos)
853 {
854         unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
855         struct intel_gvt *gvt = vgpu->gvt;
856         int offset;
857
858         /* Only allow MMIO GGTT entry access */
859         if (index != PCI_BASE_ADDRESS_0)
860                 return false;
861
862         offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
863                 intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
864
865         return (offset >= gvt->device_info.gtt_start_offset &&
866                 offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
867                         true : false;
868 }
869
870 static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf,
871                         size_t count, loff_t *ppos)
872 {
873         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
874         unsigned int done = 0;
875         int ret;
876
877         while (count) {
878                 size_t filled;
879
880                 /* Only support GGTT entry 8 bytes read */
881                 if (count >= 8 && !(*ppos % 8) &&
882                         gtt_entry(vgpu, ppos)) {
883                         u64 val;
884
885                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
886                                         ppos, false);
887                         if (ret <= 0)
888                                 goto read_err;
889
890                         if (copy_to_user(buf, &val, sizeof(val)))
891                                 goto read_err;
892
893                         filled = 8;
894                 } else if (count >= 4 && !(*ppos % 4)) {
895                         u32 val;
896
897                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
898                                         ppos, false);
899                         if (ret <= 0)
900                                 goto read_err;
901
902                         if (copy_to_user(buf, &val, sizeof(val)))
903                                 goto read_err;
904
905                         filled = 4;
906                 } else if (count >= 2 && !(*ppos % 2)) {
907                         u16 val;
908
909                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
910                                         ppos, false);
911                         if (ret <= 0)
912                                 goto read_err;
913
914                         if (copy_to_user(buf, &val, sizeof(val)))
915                                 goto read_err;
916
917                         filled = 2;
918                 } else {
919                         u8 val;
920
921                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos,
922                                         false);
923                         if (ret <= 0)
924                                 goto read_err;
925
926                         if (copy_to_user(buf, &val, sizeof(val)))
927                                 goto read_err;
928
929                         filled = 1;
930                 }
931
932                 count -= filled;
933                 done += filled;
934                 *ppos += filled;
935                 buf += filled;
936         }
937
938         return done;
939
940 read_err:
941         return -EFAULT;
942 }
943
944 static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev,
945                                 const char __user *buf,
946                                 size_t count, loff_t *ppos)
947 {
948         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
949         unsigned int done = 0;
950         int ret;
951
952         while (count) {
953                 size_t filled;
954
955                 /* Only support GGTT entry 8 bytes write */
956                 if (count >= 8 && !(*ppos % 8) &&
957                         gtt_entry(vgpu, ppos)) {
958                         u64 val;
959
960                         if (copy_from_user(&val, buf, sizeof(val)))
961                                 goto write_err;
962
963                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
964                                         ppos, true);
965                         if (ret <= 0)
966                                 goto write_err;
967
968                         filled = 8;
969                 } else if (count >= 4 && !(*ppos % 4)) {
970                         u32 val;
971
972                         if (copy_from_user(&val, buf, sizeof(val)))
973                                 goto write_err;
974
975                         ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
976                                         ppos, true);
977                         if (ret <= 0)
978                                 goto write_err;
979
980                         filled = 4;
981                 } else if (count >= 2 && !(*ppos % 2)) {
982                         u16 val;
983
984                         if (copy_from_user(&val, buf, sizeof(val)))
985                                 goto write_err;
986
987                         ret = intel_vgpu_rw(vgpu, (char *)&val,
988                                         sizeof(val), ppos, true);
989                         if (ret <= 0)
990                                 goto write_err;
991
992                         filled = 2;
993                 } else {
994                         u8 val;
995
996                         if (copy_from_user(&val, buf, sizeof(val)))
997                                 goto write_err;
998
999                         ret = intel_vgpu_rw(vgpu, &val, sizeof(val),
1000                                         ppos, true);
1001                         if (ret <= 0)
1002                                 goto write_err;
1003
1004                         filled = 1;
1005                 }
1006
1007                 count -= filled;
1008                 done += filled;
1009                 *ppos += filled;
1010                 buf += filled;
1011         }
1012
1013         return done;
1014 write_err:
1015         return -EFAULT;
1016 }
1017
1018 static int intel_vgpu_mmap(struct vfio_device *vfio_dev,
1019                 struct vm_area_struct *vma)
1020 {
1021         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1022         unsigned int index;
1023         u64 virtaddr;
1024         unsigned long req_size, pgoff, req_start;
1025         pgprot_t pg_prot;
1026
1027         index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
1028         if (index >= VFIO_PCI_ROM_REGION_INDEX)
1029                 return -EINVAL;
1030
1031         if (vma->vm_end < vma->vm_start)
1032                 return -EINVAL;
1033         if ((vma->vm_flags & VM_SHARED) == 0)
1034                 return -EINVAL;
1035         if (index != VFIO_PCI_BAR2_REGION_INDEX)
1036                 return -EINVAL;
1037
1038         pg_prot = vma->vm_page_prot;
1039         virtaddr = vma->vm_start;
1040         req_size = vma->vm_end - vma->vm_start;
1041         pgoff = vma->vm_pgoff &
1042                 ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
1043         req_start = pgoff << PAGE_SHIFT;
1044
1045         if (!intel_vgpu_in_aperture(vgpu, req_start))
1046                 return -EINVAL;
1047         if (req_start + req_size >
1048             vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
1049                 return -EINVAL;
1050
1051         pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
1052
1053         return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
1054 }
1055
1056 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
1057 {
1058         if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
1059                 return 1;
1060
1061         return 0;
1062 }
1063
1064 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
1065                         unsigned int index, unsigned int start,
1066                         unsigned int count, u32 flags,
1067                         void *data)
1068 {
1069         return 0;
1070 }
1071
1072 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
1073                         unsigned int index, unsigned int start,
1074                         unsigned int count, u32 flags, void *data)
1075 {
1076         return 0;
1077 }
1078
1079 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
1080                 unsigned int index, unsigned int start, unsigned int count,
1081                 u32 flags, void *data)
1082 {
1083         return 0;
1084 }
1085
1086 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
1087                 unsigned int index, unsigned int start, unsigned int count,
1088                 u32 flags, void *data)
1089 {
1090         struct eventfd_ctx *trigger;
1091
1092         if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
1093                 int fd = *(int *)data;
1094
1095                 trigger = eventfd_ctx_fdget(fd);
1096                 if (IS_ERR(trigger)) {
1097                         gvt_vgpu_err("eventfd_ctx_fdget failed\n");
1098                         return PTR_ERR(trigger);
1099                 }
1100                 vgpu->msi_trigger = trigger;
1101         } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
1102                 intel_vgpu_release_msi_eventfd_ctx(vgpu);
1103
1104         return 0;
1105 }
1106
1107 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
1108                 unsigned int index, unsigned int start, unsigned int count,
1109                 void *data)
1110 {
1111         int (*func)(struct intel_vgpu *vgpu, unsigned int index,
1112                         unsigned int start, unsigned int count, u32 flags,
1113                         void *data) = NULL;
1114
1115         switch (index) {
1116         case VFIO_PCI_INTX_IRQ_INDEX:
1117                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1118                 case VFIO_IRQ_SET_ACTION_MASK:
1119                         func = intel_vgpu_set_intx_mask;
1120                         break;
1121                 case VFIO_IRQ_SET_ACTION_UNMASK:
1122                         func = intel_vgpu_set_intx_unmask;
1123                         break;
1124                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1125                         func = intel_vgpu_set_intx_trigger;
1126                         break;
1127                 }
1128                 break;
1129         case VFIO_PCI_MSI_IRQ_INDEX:
1130                 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
1131                 case VFIO_IRQ_SET_ACTION_MASK:
1132                 case VFIO_IRQ_SET_ACTION_UNMASK:
1133                         /* XXX Need masking support exported */
1134                         break;
1135                 case VFIO_IRQ_SET_ACTION_TRIGGER:
1136                         func = intel_vgpu_set_msi_trigger;
1137                         break;
1138                 }
1139                 break;
1140         }
1141
1142         if (!func)
1143                 return -ENOTTY;
1144
1145         return func(vgpu, index, start, count, flags, data);
1146 }
1147
1148 static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
1149                              unsigned long arg)
1150 {
1151         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1152         unsigned long minsz;
1153
1154         gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
1155
1156         if (cmd == VFIO_DEVICE_GET_INFO) {
1157                 struct vfio_device_info info;
1158
1159                 minsz = offsetofend(struct vfio_device_info, num_irqs);
1160
1161                 if (copy_from_user(&info, (void __user *)arg, minsz))
1162                         return -EFAULT;
1163
1164                 if (info.argsz < minsz)
1165                         return -EINVAL;
1166
1167                 info.flags = VFIO_DEVICE_FLAGS_PCI;
1168                 info.flags |= VFIO_DEVICE_FLAGS_RESET;
1169                 info.num_regions = VFIO_PCI_NUM_REGIONS +
1170                                 vgpu->num_regions;
1171                 info.num_irqs = VFIO_PCI_NUM_IRQS;
1172
1173                 return copy_to_user((void __user *)arg, &info, minsz) ?
1174                         -EFAULT : 0;
1175
1176         } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
1177                 struct vfio_region_info info;
1178                 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
1179                 unsigned int i;
1180                 int ret;
1181                 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
1182                 int nr_areas = 1;
1183                 int cap_type_id;
1184
1185                 minsz = offsetofend(struct vfio_region_info, offset);
1186
1187                 if (copy_from_user(&info, (void __user *)arg, minsz))
1188                         return -EFAULT;
1189
1190                 if (info.argsz < minsz)
1191                         return -EINVAL;
1192
1193                 switch (info.index) {
1194                 case VFIO_PCI_CONFIG_REGION_INDEX:
1195                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1196                         info.size = vgpu->gvt->device_info.cfg_space_size;
1197                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1198                                      VFIO_REGION_INFO_FLAG_WRITE;
1199                         break;
1200                 case VFIO_PCI_BAR0_REGION_INDEX:
1201                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1202                         info.size = vgpu->cfg_space.bar[info.index].size;
1203                         if (!info.size) {
1204                                 info.flags = 0;
1205                                 break;
1206                         }
1207
1208                         info.flags = VFIO_REGION_INFO_FLAG_READ |
1209                                      VFIO_REGION_INFO_FLAG_WRITE;
1210                         break;
1211                 case VFIO_PCI_BAR1_REGION_INDEX:
1212                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1213                         info.size = 0;
1214                         info.flags = 0;
1215                         break;
1216                 case VFIO_PCI_BAR2_REGION_INDEX:
1217                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1218                         info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1219                                         VFIO_REGION_INFO_FLAG_MMAP |
1220                                         VFIO_REGION_INFO_FLAG_READ |
1221                                         VFIO_REGION_INFO_FLAG_WRITE;
1222                         info.size = gvt_aperture_sz(vgpu->gvt);
1223
1224                         sparse = kzalloc(struct_size(sparse, areas, nr_areas),
1225                                          GFP_KERNEL);
1226                         if (!sparse)
1227                                 return -ENOMEM;
1228
1229                         sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1230                         sparse->header.version = 1;
1231                         sparse->nr_areas = nr_areas;
1232                         cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1233                         sparse->areas[0].offset =
1234                                         PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1235                         sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1236                         break;
1237
1238                 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1239                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1240                         info.size = 0;
1241                         info.flags = 0;
1242
1243                         gvt_dbg_core("get region info bar:%d\n", info.index);
1244                         break;
1245
1246                 case VFIO_PCI_ROM_REGION_INDEX:
1247                 case VFIO_PCI_VGA_REGION_INDEX:
1248                         info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1249                         info.size = 0;
1250                         info.flags = 0;
1251
1252                         gvt_dbg_core("get region info index:%d\n", info.index);
1253                         break;
1254                 default:
1255                         {
1256                                 struct vfio_region_info_cap_type cap_type = {
1257                                         .header.id = VFIO_REGION_INFO_CAP_TYPE,
1258                                         .header.version = 1 };
1259
1260                                 if (info.index >= VFIO_PCI_NUM_REGIONS +
1261                                                 vgpu->num_regions)
1262                                         return -EINVAL;
1263                                 info.index =
1264                                         array_index_nospec(info.index,
1265                                                         VFIO_PCI_NUM_REGIONS +
1266                                                         vgpu->num_regions);
1267
1268                                 i = info.index - VFIO_PCI_NUM_REGIONS;
1269
1270                                 info.offset =
1271                                         VFIO_PCI_INDEX_TO_OFFSET(info.index);
1272                                 info.size = vgpu->region[i].size;
1273                                 info.flags = vgpu->region[i].flags;
1274
1275                                 cap_type.type = vgpu->region[i].type;
1276                                 cap_type.subtype = vgpu->region[i].subtype;
1277
1278                                 ret = vfio_info_add_capability(&caps,
1279                                                         &cap_type.header,
1280                                                         sizeof(cap_type));
1281                                 if (ret)
1282                                         return ret;
1283                         }
1284                 }
1285
1286                 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1287                         switch (cap_type_id) {
1288                         case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1289                                 ret = vfio_info_add_capability(&caps,
1290                                         &sparse->header,
1291                                         struct_size(sparse, areas,
1292                                                     sparse->nr_areas));
1293                                 if (ret) {
1294                                         kfree(sparse);
1295                                         return ret;
1296                                 }
1297                                 break;
1298                         default:
1299                                 kfree(sparse);
1300                                 return -EINVAL;
1301                         }
1302                 }
1303
1304                 if (caps.size) {
1305                         info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
1306                         if (info.argsz < sizeof(info) + caps.size) {
1307                                 info.argsz = sizeof(info) + caps.size;
1308                                 info.cap_offset = 0;
1309                         } else {
1310                                 vfio_info_cap_shift(&caps, sizeof(info));
1311                                 if (copy_to_user((void __user *)arg +
1312                                                   sizeof(info), caps.buf,
1313                                                   caps.size)) {
1314                                         kfree(caps.buf);
1315                                         kfree(sparse);
1316                                         return -EFAULT;
1317                                 }
1318                                 info.cap_offset = sizeof(info);
1319                         }
1320
1321                         kfree(caps.buf);
1322                 }
1323
1324                 kfree(sparse);
1325                 return copy_to_user((void __user *)arg, &info, minsz) ?
1326                         -EFAULT : 0;
1327         } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1328                 struct vfio_irq_info info;
1329
1330                 minsz = offsetofend(struct vfio_irq_info, count);
1331
1332                 if (copy_from_user(&info, (void __user *)arg, minsz))
1333                         return -EFAULT;
1334
1335                 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1336                         return -EINVAL;
1337
1338                 switch (info.index) {
1339                 case VFIO_PCI_INTX_IRQ_INDEX:
1340                 case VFIO_PCI_MSI_IRQ_INDEX:
1341                         break;
1342                 default:
1343                         return -EINVAL;
1344                 }
1345
1346                 info.flags = VFIO_IRQ_INFO_EVENTFD;
1347
1348                 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1349
1350                 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1351                         info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1352                                        VFIO_IRQ_INFO_AUTOMASKED);
1353                 else
1354                         info.flags |= VFIO_IRQ_INFO_NORESIZE;
1355
1356                 return copy_to_user((void __user *)arg, &info, minsz) ?
1357                         -EFAULT : 0;
1358         } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1359                 struct vfio_irq_set hdr;
1360                 u8 *data = NULL;
1361                 int ret = 0;
1362                 size_t data_size = 0;
1363
1364                 minsz = offsetofend(struct vfio_irq_set, count);
1365
1366                 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1367                         return -EFAULT;
1368
1369                 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1370                         int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1371
1372                         ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1373                                                 VFIO_PCI_NUM_IRQS, &data_size);
1374                         if (ret) {
1375                                 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1376                                 return -EINVAL;
1377                         }
1378                         if (data_size) {
1379                                 data = memdup_user((void __user *)(arg + minsz),
1380                                                    data_size);
1381                                 if (IS_ERR(data))
1382                                         return PTR_ERR(data);
1383                         }
1384                 }
1385
1386                 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1387                                         hdr.start, hdr.count, data);
1388                 kfree(data);
1389
1390                 return ret;
1391         } else if (cmd == VFIO_DEVICE_RESET) {
1392                 intel_gvt_reset_vgpu(vgpu);
1393                 return 0;
1394         } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
1395                 struct vfio_device_gfx_plane_info dmabuf;
1396                 int ret = 0;
1397
1398                 minsz = offsetofend(struct vfio_device_gfx_plane_info,
1399                                     dmabuf_id);
1400                 if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
1401                         return -EFAULT;
1402                 if (dmabuf.argsz < minsz)
1403                         return -EINVAL;
1404
1405                 ret = intel_vgpu_query_plane(vgpu, &dmabuf);
1406                 if (ret != 0)
1407                         return ret;
1408
1409                 return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
1410                                                                 -EFAULT : 0;
1411         } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
1412                 __u32 dmabuf_id;
1413
1414                 if (get_user(dmabuf_id, (__u32 __user *)arg))
1415                         return -EFAULT;
1416                 return intel_vgpu_get_dmabuf(vgpu, dmabuf_id);
1417         }
1418
1419         return -ENOTTY;
1420 }
1421
1422 static ssize_t
1423 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1424              char *buf)
1425 {
1426         struct intel_vgpu *vgpu = dev_get_drvdata(dev);
1427
1428         return sprintf(buf, "%d\n", vgpu->id);
1429 }
1430
1431 static DEVICE_ATTR_RO(vgpu_id);
1432
1433 static struct attribute *intel_vgpu_attrs[] = {
1434         &dev_attr_vgpu_id.attr,
1435         NULL
1436 };
1437
1438 static const struct attribute_group intel_vgpu_group = {
1439         .name = "intel_vgpu",
1440         .attrs = intel_vgpu_attrs,
1441 };
1442
1443 static const struct attribute_group *intel_vgpu_groups[] = {
1444         &intel_vgpu_group,
1445         NULL,
1446 };
1447
1448 static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
1449 {
1450         struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
1451         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1452         struct intel_vgpu_type *type =
1453                 container_of(mdev->type, struct intel_vgpu_type, type);
1454
1455         vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
1456         return intel_gvt_create_vgpu(vgpu, type->conf);
1457 }
1458
1459 static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
1460 {
1461         struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
1462
1463         intel_gvt_destroy_vgpu(vgpu);
1464         vfio_free_device(vfio_dev);
1465 }
1466
1467 static const struct vfio_device_ops intel_vgpu_dev_ops = {
1468         .init           = intel_vgpu_init_dev,
1469         .release        = intel_vgpu_release_dev,
1470         .open_device    = intel_vgpu_open_device,
1471         .close_device   = intel_vgpu_close_device,
1472         .read           = intel_vgpu_read,
1473         .write          = intel_vgpu_write,
1474         .mmap           = intel_vgpu_mmap,
1475         .ioctl          = intel_vgpu_ioctl,
1476         .dma_unmap      = intel_vgpu_dma_unmap,
1477 };
1478
1479 static int intel_vgpu_probe(struct mdev_device *mdev)
1480 {
1481         struct intel_vgpu *vgpu;
1482         int ret;
1483
1484         vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
1485                                  &intel_vgpu_dev_ops);
1486         if (IS_ERR(vgpu)) {
1487                 gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
1488                 return PTR_ERR(vgpu);
1489         }
1490
1491         dev_set_drvdata(&mdev->dev, vgpu);
1492         ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
1493         if (ret)
1494                 goto out_put_vdev;
1495
1496         gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
1497                      dev_name(mdev_dev(mdev)));
1498         return 0;
1499
1500 out_put_vdev:
1501         vfio_put_device(&vgpu->vfio_device);
1502         return ret;
1503 }
1504
1505 static void intel_vgpu_remove(struct mdev_device *mdev)
1506 {
1507         struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
1508
1509         if (WARN_ON_ONCE(vgpu->attached))
1510                 return;
1511
1512         vfio_unregister_group_dev(&vgpu->vfio_device);
1513         vfio_put_device(&vgpu->vfio_device);
1514 }
1515
1516 static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
1517 {
1518         struct intel_vgpu_type *type =
1519                 container_of(mtype, struct intel_vgpu_type, type);
1520         struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
1521         unsigned int low_gm_avail, high_gm_avail, fence_avail;
1522
1523         mutex_lock(&gvt->lock);
1524         low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
1525                 gvt->gm.vgpu_allocated_low_gm_size;
1526         high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
1527                 gvt->gm.vgpu_allocated_high_gm_size;
1528         fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
1529                 gvt->fence.vgpu_allocated_fence_num;
1530         mutex_unlock(&gvt->lock);
1531
1532         return min3(low_gm_avail / type->conf->low_mm,
1533                     high_gm_avail / type->conf->high_mm,
1534                     fence_avail / type->conf->fence);
1535 }
1536
1537 static struct mdev_driver intel_vgpu_mdev_driver = {
1538         .device_api     = VFIO_DEVICE_API_PCI_STRING,
1539         .driver = {
1540                 .name           = "intel_vgpu_mdev",
1541                 .owner          = THIS_MODULE,
1542                 .dev_groups     = intel_vgpu_groups,
1543         },
1544         .probe                  = intel_vgpu_probe,
1545         .remove                 = intel_vgpu_remove,
1546         .get_available          = intel_vgpu_get_available,
1547         .show_description       = intel_vgpu_show_description,
1548 };
1549
1550 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
1551 {
1552         struct kvm *kvm = info->vfio_device.kvm;
1553         struct kvm_memory_slot *slot;
1554         int idx;
1555
1556         if (!info->attached)
1557                 return -ESRCH;
1558
1559         idx = srcu_read_lock(&kvm->srcu);
1560         slot = gfn_to_memslot(kvm, gfn);
1561         if (!slot) {
1562                 srcu_read_unlock(&kvm->srcu, idx);
1563                 return -EINVAL;
1564         }
1565
1566         write_lock(&kvm->mmu_lock);
1567
1568         if (kvmgt_gfn_is_write_protected(info, gfn))
1569                 goto out;
1570
1571         kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1572         kvmgt_protect_table_add(info, gfn);
1573
1574 out:
1575         write_unlock(&kvm->mmu_lock);
1576         srcu_read_unlock(&kvm->srcu, idx);
1577         return 0;
1578 }
1579
1580 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
1581 {
1582         struct kvm *kvm = info->vfio_device.kvm;
1583         struct kvm_memory_slot *slot;
1584         int idx;
1585
1586         if (!info->attached)
1587                 return 0;
1588
1589         idx = srcu_read_lock(&kvm->srcu);
1590         slot = gfn_to_memslot(kvm, gfn);
1591         if (!slot) {
1592                 srcu_read_unlock(&kvm->srcu, idx);
1593                 return -EINVAL;
1594         }
1595
1596         write_lock(&kvm->mmu_lock);
1597
1598         if (!kvmgt_gfn_is_write_protected(info, gfn))
1599                 goto out;
1600
1601         kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1602         kvmgt_protect_table_del(info, gfn);
1603
1604 out:
1605         write_unlock(&kvm->mmu_lock);
1606         srcu_read_unlock(&kvm->srcu, idx);
1607         return 0;
1608 }
1609
1610 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1611                 const u8 *val, int len,
1612                 struct kvm_page_track_notifier_node *node)
1613 {
1614         struct intel_vgpu *info =
1615                 container_of(node, struct intel_vgpu, track_node);
1616
1617         if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1618                 intel_vgpu_page_track_handler(info, gpa,
1619                                                      (void *)val, len);
1620 }
1621
1622 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1623                 struct kvm_memory_slot *slot,
1624                 struct kvm_page_track_notifier_node *node)
1625 {
1626         int i;
1627         gfn_t gfn;
1628         struct intel_vgpu *info =
1629                 container_of(node, struct intel_vgpu, track_node);
1630
1631         write_lock(&kvm->mmu_lock);
1632         for (i = 0; i < slot->npages; i++) {
1633                 gfn = slot->base_gfn + i;
1634                 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1635                         kvm_slot_page_track_remove_page(kvm, slot, gfn,
1636                                                 KVM_PAGE_TRACK_WRITE);
1637                         kvmgt_protect_table_del(info, gfn);
1638                 }
1639         }
1640         write_unlock(&kvm->mmu_lock);
1641 }
1642
1643 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
1644 {
1645         int i;
1646
1647         if (!vgpu->region)
1648                 return;
1649
1650         for (i = 0; i < vgpu->num_regions; i++)
1651                 if (vgpu->region[i].ops->release)
1652                         vgpu->region[i].ops->release(vgpu,
1653                                         &vgpu->region[i]);
1654         vgpu->num_regions = 0;
1655         kfree(vgpu->region);
1656         vgpu->region = NULL;
1657 }
1658
1659 int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
1660                 unsigned long size, dma_addr_t *dma_addr)
1661 {
1662         struct gvt_dma *entry;
1663         int ret;
1664
1665         if (!vgpu->attached)
1666                 return -EINVAL;
1667
1668         mutex_lock(&vgpu->cache_lock);
1669
1670         entry = __gvt_cache_find_gfn(vgpu, gfn);
1671         if (!entry) {
1672                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1673                 if (ret)
1674                         goto err_unlock;
1675
1676                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1677                 if (ret)
1678                         goto err_unmap;
1679         } else if (entry->size != size) {
1680                 /* the same gfn with different size: unmap and re-map */
1681                 gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
1682                 __gvt_cache_remove_entry(vgpu, entry);
1683
1684                 ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
1685                 if (ret)
1686                         goto err_unlock;
1687
1688                 ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
1689                 if (ret)
1690                         goto err_unmap;
1691         } else {
1692                 kref_get(&entry->ref);
1693                 *dma_addr = entry->dma_addr;
1694         }
1695
1696         mutex_unlock(&vgpu->cache_lock);
1697         return 0;
1698
1699 err_unmap:
1700         gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
1701 err_unlock:
1702         mutex_unlock(&vgpu->cache_lock);
1703         return ret;
1704 }
1705
1706 int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
1707 {
1708         struct gvt_dma *entry;
1709         int ret = 0;
1710
1711         if (!vgpu->attached)
1712                 return -ENODEV;
1713
1714         mutex_lock(&vgpu->cache_lock);
1715         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1716         if (entry)
1717                 kref_get(&entry->ref);
1718         else
1719                 ret = -ENOMEM;
1720         mutex_unlock(&vgpu->cache_lock);
1721
1722         return ret;
1723 }
1724
1725 static void __gvt_dma_release(struct kref *ref)
1726 {
1727         struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
1728
1729         gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
1730                            entry->size);
1731         __gvt_cache_remove_entry(entry->vgpu, entry);
1732 }
1733
1734 void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
1735                 dma_addr_t dma_addr)
1736 {
1737         struct gvt_dma *entry;
1738
1739         if (!vgpu->attached)
1740                 return;
1741
1742         mutex_lock(&vgpu->cache_lock);
1743         entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
1744         if (entry)
1745                 kref_put(&entry->ref, __gvt_dma_release);
1746         mutex_unlock(&vgpu->cache_lock);
1747 }
1748
1749 static void init_device_info(struct intel_gvt *gvt)
1750 {
1751         struct intel_gvt_device_info *info = &gvt->device_info;
1752         struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
1753
1754         info->max_support_vgpus = 8;
1755         info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
1756         info->mmio_size = 2 * 1024 * 1024;
1757         info->mmio_bar = 0;
1758         info->gtt_start_offset = 8 * 1024 * 1024;
1759         info->gtt_entry_size = 8;
1760         info->gtt_entry_size_shift = 3;
1761         info->gmadr_bytes_in_cmd = 8;
1762         info->max_surface_size = 36 * 1024 * 1024;
1763         info->msi_cap_offset = pdev->msi_cap;
1764 }
1765
1766 static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
1767 {
1768         struct intel_vgpu *vgpu;
1769         int id;
1770
1771         mutex_lock(&gvt->lock);
1772         idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
1773                 if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
1774                                        (void *)&gvt->service_request)) {
1775                         if (vgpu->active)
1776                                 intel_vgpu_emulate_vblank(vgpu);
1777                 }
1778         }
1779         mutex_unlock(&gvt->lock);
1780 }
1781
1782 static int gvt_service_thread(void *data)
1783 {
1784         struct intel_gvt *gvt = (struct intel_gvt *)data;
1785         int ret;
1786
1787         gvt_dbg_core("service thread start\n");
1788
1789         while (!kthread_should_stop()) {
1790                 ret = wait_event_interruptible(gvt->service_thread_wq,
1791                                 kthread_should_stop() || gvt->service_request);
1792
1793                 if (kthread_should_stop())
1794                         break;
1795
1796                 if (WARN_ONCE(ret, "service thread is waken up by signal.\n"))
1797                         continue;
1798
1799                 intel_gvt_test_and_emulate_vblank(gvt);
1800
1801                 if (test_bit(INTEL_GVT_REQUEST_SCHED,
1802                                 (void *)&gvt->service_request) ||
1803                         test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
1804                                         (void *)&gvt->service_request)) {
1805                         intel_gvt_schedule(gvt);
1806                 }
1807         }
1808
1809         return 0;
1810 }
1811
1812 static void clean_service_thread(struct intel_gvt *gvt)
1813 {
1814         kthread_stop(gvt->service_thread);
1815 }
1816
1817 static int init_service_thread(struct intel_gvt *gvt)
1818 {
1819         init_waitqueue_head(&gvt->service_thread_wq);
1820
1821         gvt->service_thread = kthread_run(gvt_service_thread,
1822                         gvt, "gvt_service_thread");
1823         if (IS_ERR(gvt->service_thread)) {
1824                 gvt_err("fail to start service thread.\n");
1825                 return PTR_ERR(gvt->service_thread);
1826         }
1827         return 0;
1828 }
1829
1830 /**
1831  * intel_gvt_clean_device - clean a GVT device
1832  * @i915: i915 private
1833  *
1834  * This function is called at the driver unloading stage, to free the
1835  * resources owned by a GVT device.
1836  *
1837  */
1838 static void intel_gvt_clean_device(struct drm_i915_private *i915)
1839 {
1840         struct intel_gvt *gvt = fetch_and_zero(&i915->gvt);
1841
1842         if (drm_WARN_ON(&i915->drm, !gvt))
1843                 return;
1844
1845         mdev_unregister_parent(&gvt->parent);
1846         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1847         intel_gvt_clean_vgpu_types(gvt);
1848
1849         intel_gvt_debugfs_clean(gvt);
1850         clean_service_thread(gvt);
1851         intel_gvt_clean_cmd_parser(gvt);
1852         intel_gvt_clean_sched_policy(gvt);
1853         intel_gvt_clean_workload_scheduler(gvt);
1854         intel_gvt_clean_gtt(gvt);
1855         intel_gvt_free_firmware(gvt);
1856         intel_gvt_clean_mmio_info(gvt);
1857         idr_destroy(&gvt->vgpu_idr);
1858
1859         kfree(i915->gvt);
1860 }
1861
1862 /**
1863  * intel_gvt_init_device - initialize a GVT device
1864  * @i915: drm i915 private data
1865  *
1866  * This function is called at the initialization stage, to initialize
1867  * necessary GVT components.
1868  *
1869  * Returns:
1870  * Zero on success, negative error code if failed.
1871  *
1872  */
1873 static int intel_gvt_init_device(struct drm_i915_private *i915)
1874 {
1875         struct intel_gvt *gvt;
1876         struct intel_vgpu *vgpu;
1877         int ret;
1878
1879         if (drm_WARN_ON(&i915->drm, i915->gvt))
1880                 return -EEXIST;
1881
1882         gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL);
1883         if (!gvt)
1884                 return -ENOMEM;
1885
1886         gvt_dbg_core("init gvt device\n");
1887
1888         idr_init_base(&gvt->vgpu_idr, 1);
1889         spin_lock_init(&gvt->scheduler.mmio_context_lock);
1890         mutex_init(&gvt->lock);
1891         mutex_init(&gvt->sched_lock);
1892         gvt->gt = to_gt(i915);
1893         i915->gvt = gvt;
1894
1895         init_device_info(gvt);
1896
1897         ret = intel_gvt_setup_mmio_info(gvt);
1898         if (ret)
1899                 goto out_clean_idr;
1900
1901         intel_gvt_init_engine_mmio_context(gvt);
1902
1903         ret = intel_gvt_load_firmware(gvt);
1904         if (ret)
1905                 goto out_clean_mmio_info;
1906
1907         ret = intel_gvt_init_irq(gvt);
1908         if (ret)
1909                 goto out_free_firmware;
1910
1911         ret = intel_gvt_init_gtt(gvt);
1912         if (ret)
1913                 goto out_free_firmware;
1914
1915         ret = intel_gvt_init_workload_scheduler(gvt);
1916         if (ret)
1917                 goto out_clean_gtt;
1918
1919         ret = intel_gvt_init_sched_policy(gvt);
1920         if (ret)
1921                 goto out_clean_workload_scheduler;
1922
1923         ret = intel_gvt_init_cmd_parser(gvt);
1924         if (ret)
1925                 goto out_clean_sched_policy;
1926
1927         ret = init_service_thread(gvt);
1928         if (ret)
1929                 goto out_clean_cmd_parser;
1930
1931         ret = intel_gvt_init_vgpu_types(gvt);
1932         if (ret)
1933                 goto out_clean_thread;
1934
1935         vgpu = intel_gvt_create_idle_vgpu(gvt);
1936         if (IS_ERR(vgpu)) {
1937                 ret = PTR_ERR(vgpu);
1938                 gvt_err("failed to create idle vgpu\n");
1939                 goto out_clean_types;
1940         }
1941         gvt->idle_vgpu = vgpu;
1942
1943         intel_gvt_debugfs_init(gvt);
1944
1945         ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
1946                                    &intel_vgpu_mdev_driver,
1947                                    gvt->mdev_types, gvt->num_types);
1948         if (ret)
1949                 goto out_destroy_idle_vgpu;
1950
1951         gvt_dbg_core("gvt device initialization is done\n");
1952         return 0;
1953
1954 out_destroy_idle_vgpu:
1955         intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
1956         intel_gvt_debugfs_clean(gvt);
1957 out_clean_types:
1958         intel_gvt_clean_vgpu_types(gvt);
1959 out_clean_thread:
1960         clean_service_thread(gvt);
1961 out_clean_cmd_parser:
1962         intel_gvt_clean_cmd_parser(gvt);
1963 out_clean_sched_policy:
1964         intel_gvt_clean_sched_policy(gvt);
1965 out_clean_workload_scheduler:
1966         intel_gvt_clean_workload_scheduler(gvt);
1967 out_clean_gtt:
1968         intel_gvt_clean_gtt(gvt);
1969 out_free_firmware:
1970         intel_gvt_free_firmware(gvt);
1971 out_clean_mmio_info:
1972         intel_gvt_clean_mmio_info(gvt);
1973 out_clean_idr:
1974         idr_destroy(&gvt->vgpu_idr);
1975         kfree(gvt);
1976         i915->gvt = NULL;
1977         return ret;
1978 }
1979
1980 static void intel_gvt_pm_resume(struct drm_i915_private *i915)
1981 {
1982         struct intel_gvt *gvt = i915->gvt;
1983
1984         intel_gvt_restore_fence(gvt);
1985         intel_gvt_restore_mmio(gvt);
1986         intel_gvt_restore_ggtt(gvt);
1987 }
1988
1989 static const struct intel_vgpu_ops intel_gvt_vgpu_ops = {
1990         .init_device    = intel_gvt_init_device,
1991         .clean_device   = intel_gvt_clean_device,
1992         .pm_resume      = intel_gvt_pm_resume,
1993 };
1994
1995 static int __init kvmgt_init(void)
1996 {
1997         int ret;
1998
1999         ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops);
2000         if (ret)
2001                 return ret;
2002
2003         ret = mdev_register_driver(&intel_vgpu_mdev_driver);
2004         if (ret)
2005                 intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2006         return ret;
2007 }
2008
2009 static void __exit kvmgt_exit(void)
2010 {
2011         mdev_unregister_driver(&intel_vgpu_mdev_driver);
2012         intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
2013 }
2014
2015 module_init(kvmgt_init);
2016 module_exit(kvmgt_exit);
2017
2018 MODULE_LICENSE("GPL and additional rights");
2019 MODULE_AUTHOR("Intel Corporation");