Merge tag 'gvt-next-2018-09-04' of https://github.com/intel/gvt-linux into drm-intel...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gvt / gtt.c
index d34dc9ab66e10e37024d7ab922b95077126bdbfb..bbfb168a966565698c7fd9827ce4ad051edb2f7d 100644 (file)
@@ -351,6 +351,8 @@ static inline int gtt_set_entry64(void *pt,
 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
 
+#define GTT_64K_PTE_STRIDE 16
+
 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
 {
        unsigned long pfn;
@@ -735,10 +737,12 @@ static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
 
        radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
 
-       if (spt->guest_page.oos_page)
-               detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
+       if (spt->guest_page.gfn) {
+               if (spt->guest_page.oos_page)
+                       detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
 
-       intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
+               intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
+       }
 
        list_del_init(&spt->post_shadow_list);
        free_spt(spt);
@@ -799,9 +803,9 @@ static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
 
 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 
+/* Allocate shadow page table without guest page. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
-               struct intel_vgpu *vgpu, int type, unsigned long gfn,
-               bool guest_pde_ips)
+               struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
 {
        struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
        struct intel_vgpu_ppgtt_spt *spt = NULL;
@@ -836,27 +840,12 @@ retry:
        spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
        spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
 
-       /*
-        * Init guest_page.
-        */
-       spt->guest_page.type = type;
-       spt->guest_page.gfn = gfn;
-       spt->guest_page.pde_ips = guest_pde_ips;
-
-       ret = intel_vgpu_register_page_track(vgpu, spt->guest_page.gfn,
-                                       ppgtt_write_protection_handler, spt);
-       if (ret)
-               goto err_unmap_dma;
-
        ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
        if (ret)
-               goto err_unreg_page_track;
+               goto err_unmap_dma;
 
-       trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
        return spt;
 
-err_unreg_page_track:
-       intel_vgpu_unregister_page_track(vgpu, spt->guest_page.gfn);
 err_unmap_dma:
        dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 err_free_spt:
@@ -864,6 +853,37 @@ err_free_spt:
        return ERR_PTR(ret);
 }
 
+/* Allocate shadow page table associated with specific gfn. */
+static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
+               struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
+               unsigned long gfn, bool guest_pde_ips)
+{
+       struct intel_vgpu_ppgtt_spt *spt;
+       int ret;
+
+       spt = ppgtt_alloc_spt(vgpu, type);
+       if (IS_ERR(spt))
+               return spt;
+
+       /*
+        * Init guest_page.
+        */
+       ret = intel_vgpu_register_page_track(vgpu, gfn,
+                       ppgtt_write_protection_handler, spt);
+       if (ret) {
+               ppgtt_free_spt(spt);
+               return ERR_PTR(ret);
+       }
+
+       spt->guest_page.type = type;
+       spt->guest_page.gfn = gfn;
+       spt->guest_page.pde_ips = guest_pde_ips;
+
+       trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
+
+       return spt;
+}
+
 #define pt_entry_size_shift(spt) \
        ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
 
@@ -871,24 +891,38 @@ err_free_spt:
        (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
 
 #define for_each_present_guest_entry(spt, e, i) \
-       for (i = 0; i < pt_entries(spt); i++) \
+       for (i = 0; i < pt_entries(spt); \
+            i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
                if (!ppgtt_get_guest_entry(spt, e, i) && \
                    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 
 #define for_each_present_shadow_entry(spt, e, i) \
-       for (i = 0; i < pt_entries(spt); i++) \
+       for (i = 0; i < pt_entries(spt); \
+            i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
                if (!ppgtt_get_shadow_entry(spt, e, i) && \
                    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
 
-static void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
+#define for_each_shadow_entry(spt, e, i) \
+       for (i = 0; i < pt_entries(spt); \
+            i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
+               if (!ppgtt_get_shadow_entry(spt, e, i))
+
+static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
 {
        int v = atomic_read(&spt->refcount);
 
        trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
-
        atomic_inc(&spt->refcount);
 }
 
+static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+       int v = atomic_read(&spt->refcount);
+
+       trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
+       return atomic_dec_return(&spt->refcount);
+}
+
 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
 
 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
@@ -927,7 +961,8 @@ static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
        pfn = ops->get_pfn(entry);
        type = spt->shadow_page.type;
 
-       if (pfn == vgpu->gtt.scratch_pt[type].page_mfn)
+       /* Uninitialized spte or unshadowed spte. */
+       if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
                return;
 
        intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
@@ -939,14 +974,11 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
        struct intel_gvt_gtt_entry e;
        unsigned long index;
        int ret;
-       int v = atomic_read(&spt->refcount);
 
        trace_spt_change(spt->vgpu->id, "die", spt,
                        spt->guest_page.gfn, spt->shadow_page.type);
 
-       trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
-
-       if (atomic_dec_return(&spt->refcount) > 0)
+       if (ppgtt_put_spt(spt) > 0)
                return 0;
 
        for_each_present_shadow_entry(spt, &e, index) {
@@ -956,9 +988,14 @@ static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
                        ppgtt_invalidate_pte(spt, &e);
                        break;
                case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
+                       /* We don't setup 64K shadow entry so far. */
+                       WARN(1, "suspicious 64K gtt entry\n");
+                       continue;
                case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
+                       gvt_vdbg_mm("invalidate 2M entry\n");
+                       continue;
                case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
-                       WARN(1, "GVT doesn't support 64K/2M/1GB page\n");
+                       WARN(1, "GVT doesn't support 1GB page\n");
                        continue;
                case GTT_TYPE_PPGTT_PML4_ENTRY:
                case GTT_TYPE_PPGTT_PDP_ENTRY:
@@ -1012,34 +1049,49 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
 
        GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
 
+       if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
+               ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
+
        spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
-       if (spt)
+       if (spt) {
                ppgtt_get_spt(spt);
-       else {
-               int type = get_next_pt_type(we->type);
 
-               if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
-                       ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
+               if (ips != spt->guest_page.pde_ips) {
+                       spt->guest_page.pde_ips = ips;
 
-               spt = ppgtt_alloc_spt(vgpu, type, ops->get_pfn(we), ips);
+                       gvt_dbg_mm("reshadow PDE since ips changed\n");
+                       clear_page(spt->shadow_page.vaddr);
+                       ret = ppgtt_populate_spt(spt);
+                       if (ret) {
+                               ppgtt_put_spt(spt);
+                               goto err;
+                       }
+               }
+       } else {
+               int type = get_next_pt_type(we->type);
+
+               spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
                if (IS_ERR(spt)) {
                        ret = PTR_ERR(spt);
-                       goto fail;
+                       goto err;
                }
 
                ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
                if (ret)
-                       goto fail;
+                       goto err_free_spt;
 
                ret = ppgtt_populate_spt(spt);
                if (ret)
-                       goto fail;
+                       goto err_free_spt;
 
                trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
                                 spt->shadow_page.type);
        }
        return spt;
-fail:
+
+err_free_spt:
+       ppgtt_free_spt(spt);
+err:
        gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
                     spt, we->val64, we->type);
        return ERR_PTR(ret);
@@ -1053,16 +1105,122 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
        se->type = ge->type;
        se->val64 = ge->val64;
 
+       /* Because we always split 64KB pages, so clear IPS in shadow PDE. */
+       if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
+               ops->clear_ips(se);
+
        ops->set_pfn(se, s->shadow_page.mfn);
 }
 
+/**
+ * Check if can do 2M page
+ * @vgpu: target vgpu
+ * @entry: target pfn's gtt entry
+ *
+ * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition,
+ * negtive if found err.
+ */
+static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
+       struct intel_gvt_gtt_entry *entry)
+{
+       struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+       unsigned long pfn;
+
+       if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M))
+               return 0;
+
+       pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
+       if (pfn == INTEL_GVT_INVALID_ADDR)
+               return -EINVAL;
+
+       return PageTransHuge(pfn_to_page(pfn));
+}
+
+static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+       struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
+       struct intel_gvt_gtt_entry *se)
+{
+       struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+       struct intel_vgpu_ppgtt_spt *sub_spt;
+       struct intel_gvt_gtt_entry sub_se;
+       unsigned long start_gfn;
+       dma_addr_t dma_addr;
+       unsigned long sub_index;
+       int ret;
+
+       gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
+
+       start_gfn = ops->get_pfn(se);
+
+       sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
+       if (IS_ERR(sub_spt))
+               return PTR_ERR(sub_spt);
+
+       for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
+               ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
+                               start_gfn + sub_index, PAGE_SIZE, &dma_addr);
+               if (ret) {
+                       ppgtt_invalidate_spt(spt);
+                       return ret;
+               }
+               sub_se.val64 = se->val64;
+
+               /* Copy the PAT field from PDE. */
+               sub_se.val64 &= ~_PAGE_PAT;
+               sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
+
+               ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
+               ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
+       }
+
+       /* Clear dirty field. */
+       se->val64 &= ~_PAGE_DIRTY;
+
+       ops->clear_pse(se);
+       ops->clear_ips(se);
+       ops->set_pfn(se, sub_spt->shadow_page.mfn);
+       ppgtt_set_shadow_entry(spt, se, index);
+       return 0;
+}
+
+static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
+       struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
+       struct intel_gvt_gtt_entry *se)
+{
+       struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+       struct intel_gvt_gtt_entry entry = *se;
+       unsigned long start_gfn;
+       dma_addr_t dma_addr;
+       int i, ret;
+
+       gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
+
+       GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
+
+       start_gfn = ops->get_pfn(se);
+
+       entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
+       ops->set_64k_splited(&entry);
+
+       for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
+               ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
+                                       start_gfn + i, PAGE_SIZE, &dma_addr);
+               if (ret)
+                       return ret;
+
+               ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
+               ppgtt_set_shadow_entry(spt, &entry, index + i);
+       }
+       return 0;
+}
+
 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
        struct intel_gvt_gtt_entry *ge)
 {
        struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
        struct intel_gvt_gtt_entry se = *ge;
-       unsigned long gfn;
+       unsigned long gfn, page_size = PAGE_SIZE;
        dma_addr_t dma_addr;
        int ret;
 
@@ -1076,16 +1234,32 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
                gvt_vdbg_mm("shadow 4K gtt entry\n");
                break;
        case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
+               gvt_vdbg_mm("shadow 64K gtt entry\n");
+               /*
+                * The layout of 64K page is special, the page size is
+                * controlled by uper PDE. To be simple, we always split
+                * 64K page to smaller 4K pages in shadow PT.
+                */
+               return split_64KB_gtt_entry(vgpu, spt, index, &se);
        case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
+               gvt_vdbg_mm("shadow 2M gtt entry\n");
+               ret = is_2MB_gtt_possible(vgpu, ge);
+               if (ret == 0)
+                       return split_2MB_gtt_entry(vgpu, spt, index, &se);
+               else if (ret < 0)
+                       return ret;
+               page_size = I915_GTT_PAGE_SIZE_2M;
+               break;
        case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
-               gvt_vgpu_err("GVT doesn't support 64K/2M/1GB entry\n");
+               gvt_vgpu_err("GVT doesn't support 1GB entry\n");
                return -EINVAL;
        default:
                GEM_BUG_ON(1);
        };
 
        /* direct shadow */
-       ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr);
+       ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
+                                                     &dma_addr);
        if (ret)
                return -ENXIO;
 
@@ -1168,8 +1342,12 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
                ret = ppgtt_invalidate_spt(s);
                if (ret)
                        goto fail;
-       } else
+       } else {
+               /* We don't setup 64K shadow entry so far. */
+               WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
+                    "suspicious 64K entry\n");
                ppgtt_invalidate_pte(spt, se);
+       }
 
        return 0;
 fail:
@@ -1392,7 +1570,7 @@ static int ppgtt_handle_guest_write_page_table(
        struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
        struct intel_gvt_gtt_entry old_se;
        int new_present;
-       int ret;
+       int i, ret;
 
        new_present = ops->test_present(we);
 
@@ -1414,8 +1592,27 @@ static int ppgtt_handle_guest_write_page_table(
                goto fail;
 
        if (!new_present) {
-               ops->set_pfn(&old_se, vgpu->gtt.scratch_pt[type].page_mfn);
-               ppgtt_set_shadow_entry(spt, &old_se, index);
+               /* For 64KB splited entries, we need clear them all. */
+               if (ops->test_64k_splited(&old_se) &&
+                   !(index % GTT_64K_PTE_STRIDE)) {
+                       gvt_vdbg_mm("remove splited 64K shadow entries\n");
+                       for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
+                               ops->clear_64k_splited(&old_se);
+                               ops->set_pfn(&old_se,
+                                       vgpu->gtt.scratch_pt[type].page_mfn);
+                               ppgtt_set_shadow_entry(spt, &old_se, index + i);
+                       }
+               } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
+                          old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
+                       ops->clear_pse(&old_se);
+                       ops->set_pfn(&old_se,
+                                    vgpu->gtt.scratch_pt[type].page_mfn);
+                       ppgtt_set_shadow_entry(spt, &old_se, index);
+               } else {
+                       ops->set_pfn(&old_se,
+                                    vgpu->gtt.scratch_pt[type].page_mfn);
+                       ppgtt_set_shadow_entry(spt, &old_se, index);
+               }
        }
 
        return 0;
@@ -1497,6 +1694,18 @@ static int ppgtt_handle_guest_write_page_table_bytes(
 
        ppgtt_get_guest_entry(spt, &we, index);
 
+       /*
+        * For page table which has 64K gtt entry, only PTE#0, PTE#16,
+        * PTE#32, ... PTE#496 are used. Unused PTEs update should be
+        * ignored.
+        */
+       if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
+           (index % GTT_64K_PTE_STRIDE)) {
+               gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
+                           index);
+               return 0;
+       }
+
        if (bytes == info->gtt_entry_size) {
                ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
                if (ret)
@@ -1689,8 +1898,9 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
        mm->type = INTEL_GVT_MM_GGTT;
 
        nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
-       mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
-                                       vgpu->gvt->device_info.gtt_entry_size);
+       mm->ggtt_mm.virtual_ggtt =
+               vzalloc(array_size(nr_entries,
+                                  vgpu->gvt->device_info.gtt_entry_size));
        if (!mm->ggtt_mm.virtual_ggtt) {
                vgpu_free_mm(mm);
                return ERR_PTR(-ENOMEM);
@@ -1737,7 +1947,7 @@ void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
 
 /**
  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
- * @vgpu: a vGPU
+ * @mm: target vgpu mm
  *
  * This function is called when user wants to use a vGPU mm object. If this
  * mm object hasn't been shadowed yet, the shadow will be populated at this
@@ -1984,7 +2194,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                }
 
                ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
-                                                             &dma_addr);
+                                                       PAGE_SIZE, &dma_addr);
                if (ret) {
                        gvt_vgpu_err("fail to populate guest ggtt entry\n");
                        /* guest driver may read/write the entry when partial
@@ -2257,8 +2467,7 @@ fail:
 /**
  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
  * @vgpu: a vGPU
- * @page_table_level: PPGTT page table level
- * @root_entry: PPGTT page table root pointers
+ * @pdps: pdp root array
  *
  * This function is used to find a PPGTT mm object from mm object pool
  *