Merge tag 'samsung-defconfig-5.4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gvt / gtt.c
1 /*
2  * GTT virtualization
3  *
4  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Zhi Wang <zhi.a.wang@intel.com>
27  *    Zhenyu Wang <zhenyuw@linux.intel.com>
28  *    Xiao Zheng <xiao.zheng@intel.com>
29  *
30  * Contributors:
31  *    Min He <min.he@intel.com>
32  *    Bing Niu <bing.niu@intel.com>
33  *
34  */
35
36 #include "i915_drv.h"
37 #include "gvt.h"
38 #include "i915_pvinfo.h"
39 #include "trace.h"
40
41 #if defined(VERBOSE_DEBUG)
42 #define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
43 #else
44 #define gvt_vdbg_mm(fmt, args...)
45 #endif
46
47 static bool enable_out_of_sync = false;
48 static int preallocated_oos_pages = 8192;
49
50 /*
51  * validate a gm address and related range size,
52  * translate it to host gm address
53  */
54 bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
55 {
56         if (size == 0)
57                 return vgpu_gmadr_is_valid(vgpu, addr);
58
59         if (vgpu_gmadr_is_aperture(vgpu, addr) &&
60             vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
61                 return true;
62         else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
63                  vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
64                 return true;
65
66         gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
67                      addr, size);
68         return false;
69 }
70
71 /* translate a guest gmadr to host gmadr */
72 int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
73 {
74         if (WARN(!vgpu_gmadr_is_valid(vgpu, g_addr),
75                  "invalid guest gmadr %llx\n", g_addr))
76                 return -EACCES;
77
78         if (vgpu_gmadr_is_aperture(vgpu, g_addr))
79                 *h_addr = vgpu_aperture_gmadr_base(vgpu)
80                           + (g_addr - vgpu_aperture_offset(vgpu));
81         else
82                 *h_addr = vgpu_hidden_gmadr_base(vgpu)
83                           + (g_addr - vgpu_hidden_offset(vgpu));
84         return 0;
85 }
86
87 /* translate a host gmadr to guest gmadr */
88 int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
89 {
90         if (WARN(!gvt_gmadr_is_valid(vgpu->gvt, h_addr),
91                  "invalid host gmadr %llx\n", h_addr))
92                 return -EACCES;
93
94         if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
95                 *g_addr = vgpu_aperture_gmadr_base(vgpu)
96                         + (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
97         else
98                 *g_addr = vgpu_hidden_gmadr_base(vgpu)
99                         + (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
100         return 0;
101 }
102
103 int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
104                              unsigned long *h_index)
105 {
106         u64 h_addr;
107         int ret;
108
109         ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
110                                        &h_addr);
111         if (ret)
112                 return ret;
113
114         *h_index = h_addr >> I915_GTT_PAGE_SHIFT;
115         return 0;
116 }
117
118 int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
119                              unsigned long *g_index)
120 {
121         u64 g_addr;
122         int ret;
123
124         ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
125                                        &g_addr);
126         if (ret)
127                 return ret;
128
129         *g_index = g_addr >> I915_GTT_PAGE_SHIFT;
130         return 0;
131 }
132
133 #define gtt_type_is_entry(type) \
134         (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
135          && type != GTT_TYPE_PPGTT_PTE_ENTRY \
136          && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
137
138 #define gtt_type_is_pt(type) \
139         (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
140
141 #define gtt_type_is_pte_pt(type) \
142         (type == GTT_TYPE_PPGTT_PTE_PT)
143
144 #define gtt_type_is_root_pointer(type) \
145         (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
146
147 #define gtt_init_entry(e, t, p, v) do { \
148         (e)->type = t; \
149         (e)->pdev = p; \
150         memcpy(&(e)->val64, &v, sizeof(v)); \
151 } while (0)
152
153 /*
154  * Mappings between GTT_TYPE* enumerations.
155  * Following information can be found according to the given type:
156  * - type of next level page table
157  * - type of entry inside this level page table
158  * - type of entry with PSE set
159  *
160  * If the given type doesn't have such a kind of information,
161  * e.g. give a l4 root entry type, then request to get its PSE type,
162  * give a PTE page table type, then request to get its next level page
163  * table type, as we know l4 root entry doesn't have a PSE bit,
164  * and a PTE page table doesn't have a next level page table type,
165  * GTT_TYPE_INVALID will be returned. This is useful when traversing a
166  * page table.
167  */
168
169 struct gtt_type_table_entry {
170         int entry_type;
171         int pt_type;
172         int next_pt_type;
173         int pse_entry_type;
174 };
175
176 #define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
177         [type] = { \
178                 .entry_type = e_type, \
179                 .pt_type = cpt_type, \
180                 .next_pt_type = npt_type, \
181                 .pse_entry_type = pse_type, \
182         }
183
184 static struct gtt_type_table_entry gtt_type_table[] = {
185         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
186                         GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
187                         GTT_TYPE_INVALID,
188                         GTT_TYPE_PPGTT_PML4_PT,
189                         GTT_TYPE_INVALID),
190         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
191                         GTT_TYPE_PPGTT_PML4_ENTRY,
192                         GTT_TYPE_PPGTT_PML4_PT,
193                         GTT_TYPE_PPGTT_PDP_PT,
194                         GTT_TYPE_INVALID),
195         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
196                         GTT_TYPE_PPGTT_PML4_ENTRY,
197                         GTT_TYPE_PPGTT_PML4_PT,
198                         GTT_TYPE_PPGTT_PDP_PT,
199                         GTT_TYPE_INVALID),
200         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
201                         GTT_TYPE_PPGTT_PDP_ENTRY,
202                         GTT_TYPE_PPGTT_PDP_PT,
203                         GTT_TYPE_PPGTT_PDE_PT,
204                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
205         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
206                         GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
207                         GTT_TYPE_INVALID,
208                         GTT_TYPE_PPGTT_PDE_PT,
209                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
210         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
211                         GTT_TYPE_PPGTT_PDP_ENTRY,
212                         GTT_TYPE_PPGTT_PDP_PT,
213                         GTT_TYPE_PPGTT_PDE_PT,
214                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
215         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
216                         GTT_TYPE_PPGTT_PDE_ENTRY,
217                         GTT_TYPE_PPGTT_PDE_PT,
218                         GTT_TYPE_PPGTT_PTE_PT,
219                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
220         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
221                         GTT_TYPE_PPGTT_PDE_ENTRY,
222                         GTT_TYPE_PPGTT_PDE_PT,
223                         GTT_TYPE_PPGTT_PTE_PT,
224                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
225         /* We take IPS bit as 'PSE' for PTE level. */
226         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
227                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
228                         GTT_TYPE_PPGTT_PTE_PT,
229                         GTT_TYPE_INVALID,
230                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
231         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
232                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
233                         GTT_TYPE_PPGTT_PTE_PT,
234                         GTT_TYPE_INVALID,
235                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
236         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
237                         GTT_TYPE_PPGTT_PTE_4K_ENTRY,
238                         GTT_TYPE_PPGTT_PTE_PT,
239                         GTT_TYPE_INVALID,
240                         GTT_TYPE_PPGTT_PTE_64K_ENTRY),
241         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
242                         GTT_TYPE_PPGTT_PDE_ENTRY,
243                         GTT_TYPE_PPGTT_PDE_PT,
244                         GTT_TYPE_INVALID,
245                         GTT_TYPE_PPGTT_PTE_2M_ENTRY),
246         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
247                         GTT_TYPE_PPGTT_PDP_ENTRY,
248                         GTT_TYPE_PPGTT_PDP_PT,
249                         GTT_TYPE_INVALID,
250                         GTT_TYPE_PPGTT_PTE_1G_ENTRY),
251         GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
252                         GTT_TYPE_GGTT_PTE,
253                         GTT_TYPE_INVALID,
254                         GTT_TYPE_INVALID,
255                         GTT_TYPE_INVALID),
256 };
257
258 static inline int get_next_pt_type(int type)
259 {
260         return gtt_type_table[type].next_pt_type;
261 }
262
263 static inline int get_pt_type(int type)
264 {
265         return gtt_type_table[type].pt_type;
266 }
267
268 static inline int get_entry_type(int type)
269 {
270         return gtt_type_table[type].entry_type;
271 }
272
273 static inline int get_pse_type(int type)
274 {
275         return gtt_type_table[type].pse_entry_type;
276 }
277
278 static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index)
279 {
280         void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
281
282         return readq(addr);
283 }
284
285 static void ggtt_invalidate(struct drm_i915_private *dev_priv)
286 {
287         mmio_hw_access_pre(dev_priv);
288         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
289         mmio_hw_access_post(dev_priv);
290 }
291
292 static void write_pte64(struct drm_i915_private *dev_priv,
293                 unsigned long index, u64 pte)
294 {
295         void __iomem *addr = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + index;
296
297         writeq(pte, addr);
298 }
299
300 static inline int gtt_get_entry64(void *pt,
301                 struct intel_gvt_gtt_entry *e,
302                 unsigned long index, bool hypervisor_access, unsigned long gpa,
303                 struct intel_vgpu *vgpu)
304 {
305         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
306         int ret;
307
308         if (WARN_ON(info->gtt_entry_size != 8))
309                 return -EINVAL;
310
311         if (hypervisor_access) {
312                 ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
313                                 (index << info->gtt_entry_size_shift),
314                                 &e->val64, 8);
315                 if (WARN_ON(ret))
316                         return ret;
317         } else if (!pt) {
318                 e->val64 = read_pte64(vgpu->gvt->dev_priv, index);
319         } else {
320                 e->val64 = *((u64 *)pt + index);
321         }
322         return 0;
323 }
324
325 static inline int gtt_set_entry64(void *pt,
326                 struct intel_gvt_gtt_entry *e,
327                 unsigned long index, bool hypervisor_access, unsigned long gpa,
328                 struct intel_vgpu *vgpu)
329 {
330         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
331         int ret;
332
333         if (WARN_ON(info->gtt_entry_size != 8))
334                 return -EINVAL;
335
336         if (hypervisor_access) {
337                 ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
338                                 (index << info->gtt_entry_size_shift),
339                                 &e->val64, 8);
340                 if (WARN_ON(ret))
341                         return ret;
342         } else if (!pt) {
343                 write_pte64(vgpu->gvt->dev_priv, index, e->val64);
344         } else {
345                 *((u64 *)pt + index) = e->val64;
346         }
347         return 0;
348 }
349
350 #define GTT_HAW 46
351
352 #define ADDR_1G_MASK    GENMASK_ULL(GTT_HAW - 1, 30)
353 #define ADDR_2M_MASK    GENMASK_ULL(GTT_HAW - 1, 21)
354 #define ADDR_64K_MASK   GENMASK_ULL(GTT_HAW - 1, 16)
355 #define ADDR_4K_MASK    GENMASK_ULL(GTT_HAW - 1, 12)
356
357 #define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
358 #define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
359
360 #define GTT_64K_PTE_STRIDE 16
361
362 static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
363 {
364         unsigned long pfn;
365
366         if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
367                 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
368         else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
369                 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
370         else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
371                 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
372         else
373                 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
374         return pfn;
375 }
376
377 static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
378 {
379         if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
380                 e->val64 &= ~ADDR_1G_MASK;
381                 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
382         } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
383                 e->val64 &= ~ADDR_2M_MASK;
384                 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
385         } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
386                 e->val64 &= ~ADDR_64K_MASK;
387                 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
388         } else {
389                 e->val64 &= ~ADDR_4K_MASK;
390                 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
391         }
392
393         e->val64 |= (pfn << PAGE_SHIFT);
394 }
395
396 static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
397 {
398         return !!(e->val64 & _PAGE_PSE);
399 }
400
401 static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
402 {
403         if (gen8_gtt_test_pse(e)) {
404                 switch (e->type) {
405                 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
406                         e->val64 &= ~_PAGE_PSE;
407                         e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
408                         break;
409                 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
410                         e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
411                         e->val64 &= ~_PAGE_PSE;
412                         break;
413                 default:
414                         WARN_ON(1);
415                 }
416         }
417 }
418
419 static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
420 {
421         if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
422                 return false;
423
424         return !!(e->val64 & GEN8_PDE_IPS_64K);
425 }
426
427 static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
428 {
429         if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
430                 return;
431
432         e->val64 &= ~GEN8_PDE_IPS_64K;
433 }
434
435 static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
436 {
437         /*
438          * i915 writes PDP root pointer registers without present bit,
439          * it also works, so we need to treat root pointer entry
440          * specifically.
441          */
442         if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
443                         || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
444                 return (e->val64 != 0);
445         else
446                 return (e->val64 & _PAGE_PRESENT);
447 }
448
449 static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
450 {
451         e->val64 &= ~_PAGE_PRESENT;
452 }
453
454 static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
455 {
456         e->val64 |= _PAGE_PRESENT;
457 }
458
459 static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
460 {
461         return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
462 }
463
464 static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
465 {
466         e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
467 }
468
469 static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
470 {
471         e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
472 }
473
474 /*
475  * Per-platform GMA routines.
476  */
477 static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
478 {
479         unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
480
481         trace_gma_index(__func__, gma, x);
482         return x;
483 }
484
485 #define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
486 static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
487 { \
488         unsigned long x = (exp); \
489         trace_gma_index(__func__, gma, x); \
490         return x; \
491 }
492
493 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
494 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
495 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
496 DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
497 DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
498
499 static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
500         .get_entry = gtt_get_entry64,
501         .set_entry = gtt_set_entry64,
502         .clear_present = gtt_entry_clear_present,
503         .set_present = gtt_entry_set_present,
504         .test_present = gen8_gtt_test_present,
505         .test_pse = gen8_gtt_test_pse,
506         .clear_pse = gen8_gtt_clear_pse,
507         .clear_ips = gen8_gtt_clear_ips,
508         .test_ips = gen8_gtt_test_ips,
509         .clear_64k_splited = gen8_gtt_clear_64k_splited,
510         .set_64k_splited = gen8_gtt_set_64k_splited,
511         .test_64k_splited = gen8_gtt_test_64k_splited,
512         .get_pfn = gen8_gtt_get_pfn,
513         .set_pfn = gen8_gtt_set_pfn,
514 };
515
516 static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
517         .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
518         .gma_to_pte_index = gen8_gma_to_pte_index,
519         .gma_to_pde_index = gen8_gma_to_pde_index,
520         .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
521         .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
522         .gma_to_pml4_index = gen8_gma_to_pml4_index,
523 };
524
525 /* Update entry type per pse and ips bit. */
526 static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops,
527         struct intel_gvt_gtt_entry *entry, bool ips)
528 {
529         switch (entry->type) {
530         case GTT_TYPE_PPGTT_PDE_ENTRY:
531         case GTT_TYPE_PPGTT_PDP_ENTRY:
532                 if (pte_ops->test_pse(entry))
533                         entry->type = get_pse_type(entry->type);
534                 break;
535         case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
536                 if (ips)
537                         entry->type = get_pse_type(entry->type);
538                 break;
539         default:
540                 GEM_BUG_ON(!gtt_type_is_entry(entry->type));
541         }
542
543         GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
544 }
545
546 /*
547  * MM helpers.
548  */
549 static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
550                 struct intel_gvt_gtt_entry *entry, unsigned long index,
551                 bool guest)
552 {
553         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
554
555         GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
556
557         entry->type = mm->ppgtt_mm.root_entry_type;
558         pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
559                            mm->ppgtt_mm.shadow_pdps,
560                            entry, index, false, 0, mm->vgpu);
561         update_entry_type_for_real(pte_ops, entry, false);
562 }
563
564 static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
565                 struct intel_gvt_gtt_entry *entry, unsigned long index)
566 {
567         _ppgtt_get_root_entry(mm, entry, index, true);
568 }
569
570 static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
571                 struct intel_gvt_gtt_entry *entry, unsigned long index)
572 {
573         _ppgtt_get_root_entry(mm, entry, index, false);
574 }
575
576 static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
577                 struct intel_gvt_gtt_entry *entry, unsigned long index,
578                 bool guest)
579 {
580         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
581
582         pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
583                            mm->ppgtt_mm.shadow_pdps,
584                            entry, index, false, 0, mm->vgpu);
585 }
586
587 static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
588                 struct intel_gvt_gtt_entry *entry, unsigned long index)
589 {
590         _ppgtt_set_root_entry(mm, entry, index, true);
591 }
592
593 static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
594                 struct intel_gvt_gtt_entry *entry, unsigned long index)
595 {
596         _ppgtt_set_root_entry(mm, entry, index, false);
597 }
598
599 static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
600                 struct intel_gvt_gtt_entry *entry, unsigned long index)
601 {
602         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
603
604         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
605
606         entry->type = GTT_TYPE_GGTT_PTE;
607         pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
608                            false, 0, mm->vgpu);
609 }
610
611 static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
612                 struct intel_gvt_gtt_entry *entry, unsigned long index)
613 {
614         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
615
616         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
617
618         pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
619                            false, 0, mm->vgpu);
620 }
621
622 static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
623                 struct intel_gvt_gtt_entry *entry, unsigned long index)
624 {
625         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
626
627         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
628
629         pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
630 }
631
632 static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
633                 struct intel_gvt_gtt_entry *entry, unsigned long index)
634 {
635         struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
636
637         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
638
639         pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
640 }
641
642 /*
643  * PPGTT shadow page table helpers.
644  */
645 static inline int ppgtt_spt_get_entry(
646                 struct intel_vgpu_ppgtt_spt *spt,
647                 void *page_table, int type,
648                 struct intel_gvt_gtt_entry *e, unsigned long index,
649                 bool guest)
650 {
651         struct intel_gvt *gvt = spt->vgpu->gvt;
652         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
653         int ret;
654
655         e->type = get_entry_type(type);
656
657         if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
658                 return -EINVAL;
659
660         ret = ops->get_entry(page_table, e, index, guest,
661                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
662                         spt->vgpu);
663         if (ret)
664                 return ret;
665
666         update_entry_type_for_real(ops, e, guest ?
667                                    spt->guest_page.pde_ips : false);
668
669         gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
670                     type, e->type, index, e->val64);
671         return 0;
672 }
673
674 static inline int ppgtt_spt_set_entry(
675                 struct intel_vgpu_ppgtt_spt *spt,
676                 void *page_table, int type,
677                 struct intel_gvt_gtt_entry *e, unsigned long index,
678                 bool guest)
679 {
680         struct intel_gvt *gvt = spt->vgpu->gvt;
681         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
682
683         if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
684                 return -EINVAL;
685
686         gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
687                     type, e->type, index, e->val64);
688
689         return ops->set_entry(page_table, e, index, guest,
690                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
691                         spt->vgpu);
692 }
693
694 #define ppgtt_get_guest_entry(spt, e, index) \
695         ppgtt_spt_get_entry(spt, NULL, \
696                 spt->guest_page.type, e, index, true)
697
698 #define ppgtt_set_guest_entry(spt, e, index) \
699         ppgtt_spt_set_entry(spt, NULL, \
700                 spt->guest_page.type, e, index, true)
701
702 #define ppgtt_get_shadow_entry(spt, e, index) \
703         ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
704                 spt->shadow_page.type, e, index, false)
705
706 #define ppgtt_set_shadow_entry(spt, e, index) \
707         ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
708                 spt->shadow_page.type, e, index, false)
709
710 static void *alloc_spt(gfp_t gfp_mask)
711 {
712         struct intel_vgpu_ppgtt_spt *spt;
713
714         spt = kzalloc(sizeof(*spt), gfp_mask);
715         if (!spt)
716                 return NULL;
717
718         spt->shadow_page.page = alloc_page(gfp_mask);
719         if (!spt->shadow_page.page) {
720                 kfree(spt);
721                 return NULL;
722         }
723         return spt;
724 }
725
726 static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
727 {
728         __free_page(spt->shadow_page.page);
729         kfree(spt);
730 }
731
732 static int detach_oos_page(struct intel_vgpu *vgpu,
733                 struct intel_vgpu_oos_page *oos_page);
734
735 static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
736 {
737         struct device *kdev = &spt->vgpu->gvt->dev_priv->drm.pdev->dev;
738
739         trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
740
741         dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
742                        PCI_DMA_BIDIRECTIONAL);
743
744         radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
745
746         if (spt->guest_page.gfn) {
747                 if (spt->guest_page.oos_page)
748                         detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
749
750                 intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
751         }
752
753         list_del_init(&spt->post_shadow_list);
754         free_spt(spt);
755 }
756
757 static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
758 {
759         struct intel_vgpu_ppgtt_spt *spt, *spn;
760         struct radix_tree_iter iter;
761         LIST_HEAD(all_spt);
762         void __rcu **slot;
763
764         rcu_read_lock();
765         radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
766                 spt = radix_tree_deref_slot(slot);
767                 list_move(&spt->post_shadow_list, &all_spt);
768         }
769         rcu_read_unlock();
770
771         list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
772                 ppgtt_free_spt(spt);
773 }
774
775 static int ppgtt_handle_guest_write_page_table_bytes(
776                 struct intel_vgpu_ppgtt_spt *spt,
777                 u64 pa, void *p_data, int bytes);
778
779 static int ppgtt_write_protection_handler(
780                 struct intel_vgpu_page_track *page_track,
781                 u64 gpa, void *data, int bytes)
782 {
783         struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
784
785         int ret;
786
787         if (bytes != 4 && bytes != 8)
788                 return -EINVAL;
789
790         ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
791         if (ret)
792                 return ret;
793         return ret;
794 }
795
796 /* Find a spt by guest gfn. */
797 static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
798                 struct intel_vgpu *vgpu, unsigned long gfn)
799 {
800         struct intel_vgpu_page_track *track;
801
802         track = intel_vgpu_find_page_track(vgpu, gfn);
803         if (track && track->handler == ppgtt_write_protection_handler)
804                 return track->priv_data;
805
806         return NULL;
807 }
808
809 /* Find the spt by shadow page mfn. */
810 static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
811                 struct intel_vgpu *vgpu, unsigned long mfn)
812 {
813         return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
814 }
815
816 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
817
818 /* Allocate shadow page table without guest page. */
819 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
820                 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
821 {
822         struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
823         struct intel_vgpu_ppgtt_spt *spt = NULL;
824         dma_addr_t daddr;
825         int ret;
826
827 retry:
828         spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
829         if (!spt) {
830                 if (reclaim_one_ppgtt_mm(vgpu->gvt))
831                         goto retry;
832
833                 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
834                 return ERR_PTR(-ENOMEM);
835         }
836
837         spt->vgpu = vgpu;
838         atomic_set(&spt->refcount, 1);
839         INIT_LIST_HEAD(&spt->post_shadow_list);
840
841         /*
842          * Init shadow_page.
843          */
844         spt->shadow_page.type = type;
845         daddr = dma_map_page(kdev, spt->shadow_page.page,
846                              0, 4096, PCI_DMA_BIDIRECTIONAL);
847         if (dma_mapping_error(kdev, daddr)) {
848                 gvt_vgpu_err("fail to map dma addr\n");
849                 ret = -EINVAL;
850                 goto err_free_spt;
851         }
852         spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
853         spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
854
855         ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
856         if (ret)
857                 goto err_unmap_dma;
858
859         return spt;
860
861 err_unmap_dma:
862         dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
863 err_free_spt:
864         free_spt(spt);
865         return ERR_PTR(ret);
866 }
867
868 /* Allocate shadow page table associated with specific gfn. */
869 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
870                 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
871                 unsigned long gfn, bool guest_pde_ips)
872 {
873         struct intel_vgpu_ppgtt_spt *spt;
874         int ret;
875
876         spt = ppgtt_alloc_spt(vgpu, type);
877         if (IS_ERR(spt))
878                 return spt;
879
880         /*
881          * Init guest_page.
882          */
883         ret = intel_vgpu_register_page_track(vgpu, gfn,
884                         ppgtt_write_protection_handler, spt);
885         if (ret) {
886                 ppgtt_free_spt(spt);
887                 return ERR_PTR(ret);
888         }
889
890         spt->guest_page.type = type;
891         spt->guest_page.gfn = gfn;
892         spt->guest_page.pde_ips = guest_pde_ips;
893
894         trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
895
896         return spt;
897 }
898
899 #define pt_entry_size_shift(spt) \
900         ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
901
902 #define pt_entries(spt) \
903         (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
904
905 #define for_each_present_guest_entry(spt, e, i) \
906         for (i = 0; i < pt_entries(spt); \
907              i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
908                 if (!ppgtt_get_guest_entry(spt, e, i) && \
909                     spt->vgpu->gvt->gtt.pte_ops->test_present(e))
910
911 #define for_each_present_shadow_entry(spt, e, i) \
912         for (i = 0; i < pt_entries(spt); \
913              i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
914                 if (!ppgtt_get_shadow_entry(spt, e, i) && \
915                     spt->vgpu->gvt->gtt.pte_ops->test_present(e))
916
917 #define for_each_shadow_entry(spt, e, i) \
918         for (i = 0; i < pt_entries(spt); \
919              i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
920                 if (!ppgtt_get_shadow_entry(spt, e, i))
921
922 static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
923 {
924         int v = atomic_read(&spt->refcount);
925
926         trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
927         atomic_inc(&spt->refcount);
928 }
929
930 static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
931 {
932         int v = atomic_read(&spt->refcount);
933
934         trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
935         return atomic_dec_return(&spt->refcount);
936 }
937
938 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
939
940 static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
941                 struct intel_gvt_gtt_entry *e)
942 {
943         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
944         struct intel_vgpu_ppgtt_spt *s;
945         enum intel_gvt_gtt_type cur_pt_type;
946
947         GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
948
949         if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
950                 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
951                 cur_pt_type = get_next_pt_type(e->type);
952
953                 if (!gtt_type_is_pt(cur_pt_type) ||
954                                 !gtt_type_is_pt(cur_pt_type + 1)) {
955                         WARN(1, "Invalid page table type, cur_pt_type is: %d\n", cur_pt_type);
956                         return -EINVAL;
957                 }
958
959                 cur_pt_type += 1;
960
961                 if (ops->get_pfn(e) ==
962                         vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
963                         return 0;
964         }
965         s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
966         if (!s) {
967                 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
968                                 ops->get_pfn(e));
969                 return -ENXIO;
970         }
971         return ppgtt_invalidate_spt(s);
972 }
973
974 static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
975                 struct intel_gvt_gtt_entry *entry)
976 {
977         struct intel_vgpu *vgpu = spt->vgpu;
978         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
979         unsigned long pfn;
980         int type;
981
982         pfn = ops->get_pfn(entry);
983         type = spt->shadow_page.type;
984
985         /* Uninitialized spte or unshadowed spte. */
986         if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
987                 return;
988
989         intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
990 }
991
992 static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
993 {
994         struct intel_vgpu *vgpu = spt->vgpu;
995         struct intel_gvt_gtt_entry e;
996         unsigned long index;
997         int ret;
998
999         trace_spt_change(spt->vgpu->id, "die", spt,
1000                         spt->guest_page.gfn, spt->shadow_page.type);
1001
1002         if (ppgtt_put_spt(spt) > 0)
1003                 return 0;
1004
1005         for_each_present_shadow_entry(spt, &e, index) {
1006                 switch (e.type) {
1007                 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1008                         gvt_vdbg_mm("invalidate 4K entry\n");
1009                         ppgtt_invalidate_pte(spt, &e);
1010                         break;
1011                 case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1012                         /* We don't setup 64K shadow entry so far. */
1013                         WARN(1, "suspicious 64K gtt entry\n");
1014                         continue;
1015                 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1016                         gvt_vdbg_mm("invalidate 2M entry\n");
1017                         continue;
1018                 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1019                         WARN(1, "GVT doesn't support 1GB page\n");
1020                         continue;
1021                 case GTT_TYPE_PPGTT_PML4_ENTRY:
1022                 case GTT_TYPE_PPGTT_PDP_ENTRY:
1023                 case GTT_TYPE_PPGTT_PDE_ENTRY:
1024                         gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
1025                         ret = ppgtt_invalidate_spt_by_shadow_entry(
1026                                         spt->vgpu, &e);
1027                         if (ret)
1028                                 goto fail;
1029                         break;
1030                 default:
1031                         GEM_BUG_ON(1);
1032                 }
1033         }
1034
1035         trace_spt_change(spt->vgpu->id, "release", spt,
1036                          spt->guest_page.gfn, spt->shadow_page.type);
1037         ppgtt_free_spt(spt);
1038         return 0;
1039 fail:
1040         gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
1041                         spt, e.val64, e.type);
1042         return ret;
1043 }
1044
1045 static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
1046 {
1047         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
1048
1049         if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) {
1050                 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
1051                         GAMW_ECO_ENABLE_64K_IPS_FIELD;
1052
1053                 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1054         } else if (INTEL_GEN(dev_priv) >= 11) {
1055                 /* 64K paging only controlled by IPS bit in PTE now. */
1056                 return true;
1057         } else
1058                 return false;
1059 }
1060
1061 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1062
1063 static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1064                 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1065 {
1066         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1067         struct intel_vgpu_ppgtt_spt *spt = NULL;
1068         bool ips = false;
1069         int ret;
1070
1071         GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1072
1073         if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1074                 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1075
1076         spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1077         if (spt) {
1078                 ppgtt_get_spt(spt);
1079
1080                 if (ips != spt->guest_page.pde_ips) {
1081                         spt->guest_page.pde_ips = ips;
1082
1083                         gvt_dbg_mm("reshadow PDE since ips changed\n");
1084                         clear_page(spt->shadow_page.vaddr);
1085                         ret = ppgtt_populate_spt(spt);
1086                         if (ret) {
1087                                 ppgtt_put_spt(spt);
1088                                 goto err;
1089                         }
1090                 }
1091         } else {
1092                 int type = get_next_pt_type(we->type);
1093
1094                 if (!gtt_type_is_pt(type)) {
1095                         ret = -EINVAL;
1096                         goto err;
1097                 }
1098
1099                 spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1100                 if (IS_ERR(spt)) {
1101                         ret = PTR_ERR(spt);
1102                         goto err;
1103                 }
1104
1105                 ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1106                 if (ret)
1107                         goto err_free_spt;
1108
1109                 ret = ppgtt_populate_spt(spt);
1110                 if (ret)
1111                         goto err_free_spt;
1112
1113                 trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1114                                  spt->shadow_page.type);
1115         }
1116         return spt;
1117
1118 err_free_spt:
1119         ppgtt_free_spt(spt);
1120         spt = NULL;
1121 err:
1122         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1123                      spt, we->val64, we->type);
1124         return ERR_PTR(ret);
1125 }
1126
1127 static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1128                 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1129 {
1130         struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1131
1132         se->type = ge->type;
1133         se->val64 = ge->val64;
1134
1135         /* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1136         if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1137                 ops->clear_ips(se);
1138
1139         ops->set_pfn(se, s->shadow_page.mfn);
1140 }
1141
1142 /**
1143  * Check if can do 2M page
1144  * @vgpu: target vgpu
1145  * @entry: target pfn's gtt entry
1146  *
1147  * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition,
1148  * negtive if found err.
1149  */
1150 static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
1151         struct intel_gvt_gtt_entry *entry)
1152 {
1153         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1154         unsigned long pfn;
1155
1156         if (!HAS_PAGE_SIZES(vgpu->gvt->dev_priv, I915_GTT_PAGE_SIZE_2M))
1157                 return 0;
1158
1159         pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
1160         if (pfn == INTEL_GVT_INVALID_ADDR)
1161                 return -EINVAL;
1162
1163         return PageTransHuge(pfn_to_page(pfn));
1164 }
1165
1166 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1167         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1168         struct intel_gvt_gtt_entry *se)
1169 {
1170         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1171         struct intel_vgpu_ppgtt_spt *sub_spt;
1172         struct intel_gvt_gtt_entry sub_se;
1173         unsigned long start_gfn;
1174         dma_addr_t dma_addr;
1175         unsigned long sub_index;
1176         int ret;
1177
1178         gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1179
1180         start_gfn = ops->get_pfn(se);
1181
1182         sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1183         if (IS_ERR(sub_spt))
1184                 return PTR_ERR(sub_spt);
1185
1186         for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1187                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1188                                 start_gfn + sub_index, PAGE_SIZE, &dma_addr);
1189                 if (ret) {
1190                         ppgtt_invalidate_spt(spt);
1191                         return ret;
1192                 }
1193                 sub_se.val64 = se->val64;
1194
1195                 /* Copy the PAT field from PDE. */
1196                 sub_se.val64 &= ~_PAGE_PAT;
1197                 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1198
1199                 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1200                 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1201         }
1202
1203         /* Clear dirty field. */
1204         se->val64 &= ~_PAGE_DIRTY;
1205
1206         ops->clear_pse(se);
1207         ops->clear_ips(se);
1208         ops->set_pfn(se, sub_spt->shadow_page.mfn);
1209         ppgtt_set_shadow_entry(spt, se, index);
1210         return 0;
1211 }
1212
1213 static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1214         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1215         struct intel_gvt_gtt_entry *se)
1216 {
1217         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1218         struct intel_gvt_gtt_entry entry = *se;
1219         unsigned long start_gfn;
1220         dma_addr_t dma_addr;
1221         int i, ret;
1222
1223         gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1224
1225         GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1226
1227         start_gfn = ops->get_pfn(se);
1228
1229         entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1230         ops->set_64k_splited(&entry);
1231
1232         for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1233                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1234                                         start_gfn + i, PAGE_SIZE, &dma_addr);
1235                 if (ret)
1236                         return ret;
1237
1238                 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1239                 ppgtt_set_shadow_entry(spt, &entry, index + i);
1240         }
1241         return 0;
1242 }
1243
1244 static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1245         struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1246         struct intel_gvt_gtt_entry *ge)
1247 {
1248         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1249         struct intel_gvt_gtt_entry se = *ge;
1250         unsigned long gfn, page_size = PAGE_SIZE;
1251         dma_addr_t dma_addr;
1252         int ret;
1253
1254         if (!pte_ops->test_present(ge))
1255                 return 0;
1256
1257         gfn = pte_ops->get_pfn(ge);
1258
1259         switch (ge->type) {
1260         case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1261                 gvt_vdbg_mm("shadow 4K gtt entry\n");
1262                 break;
1263         case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1264                 gvt_vdbg_mm("shadow 64K gtt entry\n");
1265                 /*
1266                  * The layout of 64K page is special, the page size is
1267                  * controlled by uper PDE. To be simple, we always split
1268                  * 64K page to smaller 4K pages in shadow PT.
1269                  */
1270                 return split_64KB_gtt_entry(vgpu, spt, index, &se);
1271         case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1272                 gvt_vdbg_mm("shadow 2M gtt entry\n");
1273                 ret = is_2MB_gtt_possible(vgpu, ge);
1274                 if (ret == 0)
1275                         return split_2MB_gtt_entry(vgpu, spt, index, &se);
1276                 else if (ret < 0)
1277                         return ret;
1278                 page_size = I915_GTT_PAGE_SIZE_2M;
1279                 break;
1280         case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1281                 gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1282                 return -EINVAL;
1283         default:
1284                 GEM_BUG_ON(1);
1285         };
1286
1287         /* direct shadow */
1288         ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
1289                                                       &dma_addr);
1290         if (ret)
1291                 return -ENXIO;
1292
1293         pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1294         ppgtt_set_shadow_entry(spt, &se, index);
1295         return 0;
1296 }
1297
1298 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1299 {
1300         struct intel_vgpu *vgpu = spt->vgpu;
1301         struct intel_gvt *gvt = vgpu->gvt;
1302         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1303         struct intel_vgpu_ppgtt_spt *s;
1304         struct intel_gvt_gtt_entry se, ge;
1305         unsigned long gfn, i;
1306         int ret;
1307
1308         trace_spt_change(spt->vgpu->id, "born", spt,
1309                          spt->guest_page.gfn, spt->shadow_page.type);
1310
1311         for_each_present_guest_entry(spt, &ge, i) {
1312                 if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1313                         s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1314                         if (IS_ERR(s)) {
1315                                 ret = PTR_ERR(s);
1316                                 goto fail;
1317                         }
1318                         ppgtt_get_shadow_entry(spt, &se, i);
1319                         ppgtt_generate_shadow_entry(&se, s, &ge);
1320                         ppgtt_set_shadow_entry(spt, &se, i);
1321                 } else {
1322                         gfn = ops->get_pfn(&ge);
1323                         if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1324                                 ops->set_pfn(&se, gvt->gtt.scratch_mfn);
1325                                 ppgtt_set_shadow_entry(spt, &se, i);
1326                                 continue;
1327                         }
1328
1329                         ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1330                         if (ret)
1331                                 goto fail;
1332                 }
1333         }
1334         return 0;
1335 fail:
1336         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1337                         spt, ge.val64, ge.type);
1338         return ret;
1339 }
1340
1341 static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1342                 struct intel_gvt_gtt_entry *se, unsigned long index)
1343 {
1344         struct intel_vgpu *vgpu = spt->vgpu;
1345         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1346         int ret;
1347
1348         trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1349                                spt->shadow_page.type, se->val64, index);
1350
1351         gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1352                     se->type, index, se->val64);
1353
1354         if (!ops->test_present(se))
1355                 return 0;
1356
1357         if (ops->get_pfn(se) ==
1358             vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1359                 return 0;
1360
1361         if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1362                 struct intel_vgpu_ppgtt_spt *s =
1363                         intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1364                 if (!s) {
1365                         gvt_vgpu_err("fail to find guest page\n");
1366                         ret = -ENXIO;
1367                         goto fail;
1368                 }
1369                 ret = ppgtt_invalidate_spt(s);
1370                 if (ret)
1371                         goto fail;
1372         } else {
1373                 /* We don't setup 64K shadow entry so far. */
1374                 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1375                      "suspicious 64K entry\n");
1376                 ppgtt_invalidate_pte(spt, se);
1377         }
1378
1379         return 0;
1380 fail:
1381         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1382                         spt, se->val64, se->type);
1383         return ret;
1384 }
1385
1386 static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1387                 struct intel_gvt_gtt_entry *we, unsigned long index)
1388 {
1389         struct intel_vgpu *vgpu = spt->vgpu;
1390         struct intel_gvt_gtt_entry m;
1391         struct intel_vgpu_ppgtt_spt *s;
1392         int ret;
1393
1394         trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1395                                we->val64, index);
1396
1397         gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1398                     we->type, index, we->val64);
1399
1400         if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1401                 s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1402                 if (IS_ERR(s)) {
1403                         ret = PTR_ERR(s);
1404                         goto fail;
1405                 }
1406                 ppgtt_get_shadow_entry(spt, &m, index);
1407                 ppgtt_generate_shadow_entry(&m, s, we);
1408                 ppgtt_set_shadow_entry(spt, &m, index);
1409         } else {
1410                 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1411                 if (ret)
1412                         goto fail;
1413         }
1414         return 0;
1415 fail:
1416         gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1417                 spt, we->val64, we->type);
1418         return ret;
1419 }
1420
1421 static int sync_oos_page(struct intel_vgpu *vgpu,
1422                 struct intel_vgpu_oos_page *oos_page)
1423 {
1424         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1425         struct intel_gvt *gvt = vgpu->gvt;
1426         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1427         struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1428         struct intel_gvt_gtt_entry old, new;
1429         int index;
1430         int ret;
1431
1432         trace_oos_change(vgpu->id, "sync", oos_page->id,
1433                          spt, spt->guest_page.type);
1434
1435         old.type = new.type = get_entry_type(spt->guest_page.type);
1436         old.val64 = new.val64 = 0;
1437
1438         for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1439                                 info->gtt_entry_size_shift); index++) {
1440                 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1441                 ops->get_entry(NULL, &new, index, true,
1442                                spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1443
1444                 if (old.val64 == new.val64
1445                         && !test_and_clear_bit(index, spt->post_shadow_bitmap))
1446                         continue;
1447
1448                 trace_oos_sync(vgpu->id, oos_page->id,
1449                                 spt, spt->guest_page.type,
1450                                 new.val64, index);
1451
1452                 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1453                 if (ret)
1454                         return ret;
1455
1456                 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1457         }
1458
1459         spt->guest_page.write_cnt = 0;
1460         list_del_init(&spt->post_shadow_list);
1461         return 0;
1462 }
1463
1464 static int detach_oos_page(struct intel_vgpu *vgpu,
1465                 struct intel_vgpu_oos_page *oos_page)
1466 {
1467         struct intel_gvt *gvt = vgpu->gvt;
1468         struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1469
1470         trace_oos_change(vgpu->id, "detach", oos_page->id,
1471                          spt, spt->guest_page.type);
1472
1473         spt->guest_page.write_cnt = 0;
1474         spt->guest_page.oos_page = NULL;
1475         oos_page->spt = NULL;
1476
1477         list_del_init(&oos_page->vm_list);
1478         list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1479
1480         return 0;
1481 }
1482
1483 static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1484                 struct intel_vgpu_ppgtt_spt *spt)
1485 {
1486         struct intel_gvt *gvt = spt->vgpu->gvt;
1487         int ret;
1488
1489         ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1490                         spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1491                         oos_page->mem, I915_GTT_PAGE_SIZE);
1492         if (ret)
1493                 return ret;
1494
1495         oos_page->spt = spt;
1496         spt->guest_page.oos_page = oos_page;
1497
1498         list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1499
1500         trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1501                          spt, spt->guest_page.type);
1502         return 0;
1503 }
1504
1505 static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1506 {
1507         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1508         int ret;
1509
1510         ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1511         if (ret)
1512                 return ret;
1513
1514         trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1515                          spt, spt->guest_page.type);
1516
1517         list_del_init(&oos_page->vm_list);
1518         return sync_oos_page(spt->vgpu, oos_page);
1519 }
1520
1521 static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1522 {
1523         struct intel_gvt *gvt = spt->vgpu->gvt;
1524         struct intel_gvt_gtt *gtt = &gvt->gtt;
1525         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1526         int ret;
1527
1528         WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1529
1530         if (list_empty(&gtt->oos_page_free_list_head)) {
1531                 oos_page = container_of(gtt->oos_page_use_list_head.next,
1532                         struct intel_vgpu_oos_page, list);
1533                 ret = ppgtt_set_guest_page_sync(oos_page->spt);
1534                 if (ret)
1535                         return ret;
1536                 ret = detach_oos_page(spt->vgpu, oos_page);
1537                 if (ret)
1538                         return ret;
1539         } else
1540                 oos_page = container_of(gtt->oos_page_free_list_head.next,
1541                         struct intel_vgpu_oos_page, list);
1542         return attach_oos_page(oos_page, spt);
1543 }
1544
1545 static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1546 {
1547         struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1548
1549         if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1550                 return -EINVAL;
1551
1552         trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1553                          spt, spt->guest_page.type);
1554
1555         list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1556         return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1557 }
1558
1559 /**
1560  * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1561  * @vgpu: a vGPU
1562  *
1563  * This function is called before submitting a guest workload to host,
1564  * to sync all the out-of-synced shadow for vGPU
1565  *
1566  * Returns:
1567  * Zero on success, negative error code if failed.
1568  */
1569 int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1570 {
1571         struct list_head *pos, *n;
1572         struct intel_vgpu_oos_page *oos_page;
1573         int ret;
1574
1575         if (!enable_out_of_sync)
1576                 return 0;
1577
1578         list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1579                 oos_page = container_of(pos,
1580                                 struct intel_vgpu_oos_page, vm_list);
1581                 ret = ppgtt_set_guest_page_sync(oos_page->spt);
1582                 if (ret)
1583                         return ret;
1584         }
1585         return 0;
1586 }
1587
1588 /*
1589  * The heart of PPGTT shadow page table.
1590  */
1591 static int ppgtt_handle_guest_write_page_table(
1592                 struct intel_vgpu_ppgtt_spt *spt,
1593                 struct intel_gvt_gtt_entry *we, unsigned long index)
1594 {
1595         struct intel_vgpu *vgpu = spt->vgpu;
1596         int type = spt->shadow_page.type;
1597         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1598         struct intel_gvt_gtt_entry old_se;
1599         int new_present;
1600         int i, ret;
1601
1602         new_present = ops->test_present(we);
1603
1604         /*
1605          * Adding the new entry first and then removing the old one, that can
1606          * guarantee the ppgtt table is validated during the window between
1607          * adding and removal.
1608          */
1609         ppgtt_get_shadow_entry(spt, &old_se, index);
1610
1611         if (new_present) {
1612                 ret = ppgtt_handle_guest_entry_add(spt, we, index);
1613                 if (ret)
1614                         goto fail;
1615         }
1616
1617         ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1618         if (ret)
1619                 goto fail;
1620
1621         if (!new_present) {
1622                 /* For 64KB splited entries, we need clear them all. */
1623                 if (ops->test_64k_splited(&old_se) &&
1624                     !(index % GTT_64K_PTE_STRIDE)) {
1625                         gvt_vdbg_mm("remove splited 64K shadow entries\n");
1626                         for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1627                                 ops->clear_64k_splited(&old_se);
1628                                 ops->set_pfn(&old_se,
1629                                         vgpu->gtt.scratch_pt[type].page_mfn);
1630                                 ppgtt_set_shadow_entry(spt, &old_se, index + i);
1631                         }
1632                 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1633                            old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1634                         ops->clear_pse(&old_se);
1635                         ops->set_pfn(&old_se,
1636                                      vgpu->gtt.scratch_pt[type].page_mfn);
1637                         ppgtt_set_shadow_entry(spt, &old_se, index);
1638                 } else {
1639                         ops->set_pfn(&old_se,
1640                                      vgpu->gtt.scratch_pt[type].page_mfn);
1641                         ppgtt_set_shadow_entry(spt, &old_se, index);
1642                 }
1643         }
1644
1645         return 0;
1646 fail:
1647         gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1648                         spt, we->val64, we->type);
1649         return ret;
1650 }
1651
1652
1653
1654 static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1655 {
1656         return enable_out_of_sync
1657                 && gtt_type_is_pte_pt(spt->guest_page.type)
1658                 && spt->guest_page.write_cnt >= 2;
1659 }
1660
1661 static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1662                 unsigned long index)
1663 {
1664         set_bit(index, spt->post_shadow_bitmap);
1665         if (!list_empty(&spt->post_shadow_list))
1666                 return;
1667
1668         list_add_tail(&spt->post_shadow_list,
1669                         &spt->vgpu->gtt.post_shadow_list_head);
1670 }
1671
1672 /**
1673  * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1674  * @vgpu: a vGPU
1675  *
1676  * This function is called before submitting a guest workload to host,
1677  * to flush all the post shadows for a vGPU.
1678  *
1679  * Returns:
1680  * Zero on success, negative error code if failed.
1681  */
1682 int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1683 {
1684         struct list_head *pos, *n;
1685         struct intel_vgpu_ppgtt_spt *spt;
1686         struct intel_gvt_gtt_entry ge;
1687         unsigned long index;
1688         int ret;
1689
1690         list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1691                 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1692                                 post_shadow_list);
1693
1694                 for_each_set_bit(index, spt->post_shadow_bitmap,
1695                                 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1696                         ppgtt_get_guest_entry(spt, &ge, index);
1697
1698                         ret = ppgtt_handle_guest_write_page_table(spt,
1699                                                         &ge, index);
1700                         if (ret)
1701                                 return ret;
1702                         clear_bit(index, spt->post_shadow_bitmap);
1703                 }
1704                 list_del_init(&spt->post_shadow_list);
1705         }
1706         return 0;
1707 }
1708
1709 static int ppgtt_handle_guest_write_page_table_bytes(
1710                 struct intel_vgpu_ppgtt_spt *spt,
1711                 u64 pa, void *p_data, int bytes)
1712 {
1713         struct intel_vgpu *vgpu = spt->vgpu;
1714         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1715         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1716         struct intel_gvt_gtt_entry we, se;
1717         unsigned long index;
1718         int ret;
1719
1720         index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1721
1722         ppgtt_get_guest_entry(spt, &we, index);
1723
1724         /*
1725          * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1726          * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1727          * ignored.
1728          */
1729         if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1730             (index % GTT_64K_PTE_STRIDE)) {
1731                 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1732                             index);
1733                 return 0;
1734         }
1735
1736         if (bytes == info->gtt_entry_size) {
1737                 ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1738                 if (ret)
1739                         return ret;
1740         } else {
1741                 if (!test_bit(index, spt->post_shadow_bitmap)) {
1742                         int type = spt->shadow_page.type;
1743
1744                         ppgtt_get_shadow_entry(spt, &se, index);
1745                         ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1746                         if (ret)
1747                                 return ret;
1748                         ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1749                         ppgtt_set_shadow_entry(spt, &se, index);
1750                 }
1751                 ppgtt_set_post_shadow(spt, index);
1752         }
1753
1754         if (!enable_out_of_sync)
1755                 return 0;
1756
1757         spt->guest_page.write_cnt++;
1758
1759         if (spt->guest_page.oos_page)
1760                 ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1761                                 false, 0, vgpu);
1762
1763         if (can_do_out_of_sync(spt)) {
1764                 if (!spt->guest_page.oos_page)
1765                         ppgtt_allocate_oos_page(spt);
1766
1767                 ret = ppgtt_set_guest_page_oos(spt);
1768                 if (ret < 0)
1769                         return ret;
1770         }
1771         return 0;
1772 }
1773
1774 static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1775 {
1776         struct intel_vgpu *vgpu = mm->vgpu;
1777         struct intel_gvt *gvt = vgpu->gvt;
1778         struct intel_gvt_gtt *gtt = &gvt->gtt;
1779         struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1780         struct intel_gvt_gtt_entry se;
1781         int index;
1782
1783         if (!mm->ppgtt_mm.shadowed)
1784                 return;
1785
1786         for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1787                 ppgtt_get_shadow_root_entry(mm, &se, index);
1788
1789                 if (!ops->test_present(&se))
1790                         continue;
1791
1792                 ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1793                 se.val64 = 0;
1794                 ppgtt_set_shadow_root_entry(mm, &se, index);
1795
1796                 trace_spt_guest_change(vgpu->id, "destroy root pointer",
1797                                        NULL, se.type, se.val64, index);
1798         }
1799
1800         mm->ppgtt_mm.shadowed = false;
1801 }
1802
1803
1804 static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1805 {
1806         struct intel_vgpu *vgpu = mm->vgpu;
1807         struct intel_gvt *gvt = vgpu->gvt;
1808         struct intel_gvt_gtt *gtt = &gvt->gtt;
1809         struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1810         struct intel_vgpu_ppgtt_spt *spt;
1811         struct intel_gvt_gtt_entry ge, se;
1812         int index, ret;
1813
1814         if (mm->ppgtt_mm.shadowed)
1815                 return 0;
1816
1817         mm->ppgtt_mm.shadowed = true;
1818
1819         for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1820                 ppgtt_get_guest_root_entry(mm, &ge, index);
1821
1822                 if (!ops->test_present(&ge))
1823                         continue;
1824
1825                 trace_spt_guest_change(vgpu->id, __func__, NULL,
1826                                        ge.type, ge.val64, index);
1827
1828                 spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1829                 if (IS_ERR(spt)) {
1830                         gvt_vgpu_err("fail to populate guest root pointer\n");
1831                         ret = PTR_ERR(spt);
1832                         goto fail;
1833                 }
1834                 ppgtt_generate_shadow_entry(&se, spt, &ge);
1835                 ppgtt_set_shadow_root_entry(mm, &se, index);
1836
1837                 trace_spt_guest_change(vgpu->id, "populate root pointer",
1838                                        NULL, se.type, se.val64, index);
1839         }
1840
1841         return 0;
1842 fail:
1843         invalidate_ppgtt_mm(mm);
1844         return ret;
1845 }
1846
1847 static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1848 {
1849         struct intel_vgpu_mm *mm;
1850
1851         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1852         if (!mm)
1853                 return NULL;
1854
1855         mm->vgpu = vgpu;
1856         kref_init(&mm->ref);
1857         atomic_set(&mm->pincount, 0);
1858
1859         return mm;
1860 }
1861
1862 static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1863 {
1864         kfree(mm);
1865 }
1866
1867 /**
1868  * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1869  * @vgpu: a vGPU
1870  * @root_entry_type: ppgtt root entry type
1871  * @pdps: guest pdps.
1872  *
1873  * This function is used to create a ppgtt mm object for a vGPU.
1874  *
1875  * Returns:
1876  * Zero on success, negative error code in pointer if failed.
1877  */
1878 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1879                 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1880 {
1881         struct intel_gvt *gvt = vgpu->gvt;
1882         struct intel_vgpu_mm *mm;
1883         int ret;
1884
1885         mm = vgpu_alloc_mm(vgpu);
1886         if (!mm)
1887                 return ERR_PTR(-ENOMEM);
1888
1889         mm->type = INTEL_GVT_MM_PPGTT;
1890
1891         GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1892                    root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1893         mm->ppgtt_mm.root_entry_type = root_entry_type;
1894
1895         INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1896         INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1897
1898         if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1899                 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1900         else
1901                 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1902                        sizeof(mm->ppgtt_mm.guest_pdps));
1903
1904         ret = shadow_ppgtt_mm(mm);
1905         if (ret) {
1906                 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1907                 vgpu_free_mm(mm);
1908                 return ERR_PTR(ret);
1909         }
1910
1911         list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1912
1913         mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1914         list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1915         mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1916
1917         return mm;
1918 }
1919
1920 static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1921 {
1922         struct intel_vgpu_mm *mm;
1923         unsigned long nr_entries;
1924
1925         mm = vgpu_alloc_mm(vgpu);
1926         if (!mm)
1927                 return ERR_PTR(-ENOMEM);
1928
1929         mm->type = INTEL_GVT_MM_GGTT;
1930
1931         nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1932         mm->ggtt_mm.virtual_ggtt =
1933                 vzalloc(array_size(nr_entries,
1934                                    vgpu->gvt->device_info.gtt_entry_size));
1935         if (!mm->ggtt_mm.virtual_ggtt) {
1936                 vgpu_free_mm(mm);
1937                 return ERR_PTR(-ENOMEM);
1938         }
1939
1940         return mm;
1941 }
1942
1943 /**
1944  * _intel_vgpu_mm_release - destroy a mm object
1945  * @mm_ref: a kref object
1946  *
1947  * This function is used to destroy a mm object for vGPU
1948  *
1949  */
1950 void _intel_vgpu_mm_release(struct kref *mm_ref)
1951 {
1952         struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1953
1954         if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1955                 gvt_err("vgpu mm pin count bug detected\n");
1956
1957         if (mm->type == INTEL_GVT_MM_PPGTT) {
1958                 list_del(&mm->ppgtt_mm.list);
1959                 list_del(&mm->ppgtt_mm.lru_list);
1960                 invalidate_ppgtt_mm(mm);
1961         } else {
1962                 vfree(mm->ggtt_mm.virtual_ggtt);
1963         }
1964
1965         vgpu_free_mm(mm);
1966 }
1967
1968 /**
1969  * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1970  * @mm: a vGPU mm object
1971  *
1972  * This function is called when user doesn't want to use a vGPU mm object
1973  */
1974 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1975 {
1976         atomic_dec_if_positive(&mm->pincount);
1977 }
1978
1979 /**
1980  * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1981  * @mm: target vgpu mm
1982  *
1983  * This function is called when user wants to use a vGPU mm object. If this
1984  * mm object hasn't been shadowed yet, the shadow will be populated at this
1985  * time.
1986  *
1987  * Returns:
1988  * Zero on success, negative error code if failed.
1989  */
1990 int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1991 {
1992         int ret;
1993
1994         atomic_inc(&mm->pincount);
1995
1996         if (mm->type == INTEL_GVT_MM_PPGTT) {
1997                 ret = shadow_ppgtt_mm(mm);
1998                 if (ret)
1999                         return ret;
2000
2001                 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2002                 list_move_tail(&mm->ppgtt_mm.lru_list,
2003                                &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
2004                 mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2005         }
2006
2007         return 0;
2008 }
2009
2010 static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
2011 {
2012         struct intel_vgpu_mm *mm;
2013         struct list_head *pos, *n;
2014
2015         mutex_lock(&gvt->gtt.ppgtt_mm_lock);
2016
2017         list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
2018                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
2019
2020                 if (atomic_read(&mm->pincount))
2021                         continue;
2022
2023                 list_del_init(&mm->ppgtt_mm.lru_list);
2024                 mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2025                 invalidate_ppgtt_mm(mm);
2026                 return 1;
2027         }
2028         mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2029         return 0;
2030 }
2031
2032 /*
2033  * GMA translation APIs.
2034  */
2035 static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
2036                 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
2037 {
2038         struct intel_vgpu *vgpu = mm->vgpu;
2039         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2040         struct intel_vgpu_ppgtt_spt *s;
2041
2042         s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
2043         if (!s)
2044                 return -ENXIO;
2045
2046         if (!guest)
2047                 ppgtt_get_shadow_entry(s, e, index);
2048         else
2049                 ppgtt_get_guest_entry(s, e, index);
2050         return 0;
2051 }
2052
2053 /**
2054  * intel_vgpu_gma_to_gpa - translate a gma to GPA
2055  * @mm: mm object. could be a PPGTT or GGTT mm object
2056  * @gma: graphics memory address in this mm object
2057  *
2058  * This function is used to translate a graphics memory address in specific
2059  * graphics memory space to guest physical address.
2060  *
2061  * Returns:
2062  * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2063  */
2064 unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2065 {
2066         struct intel_vgpu *vgpu = mm->vgpu;
2067         struct intel_gvt *gvt = vgpu->gvt;
2068         struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2069         struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2070         unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2071         unsigned long gma_index[4];
2072         struct intel_gvt_gtt_entry e;
2073         int i, levels = 0;
2074         int ret;
2075
2076         GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2077                    mm->type != INTEL_GVT_MM_PPGTT);
2078
2079         if (mm->type == INTEL_GVT_MM_GGTT) {
2080                 if (!vgpu_gmadr_is_valid(vgpu, gma))
2081                         goto err;
2082
2083                 ggtt_get_guest_entry(mm, &e,
2084                         gma_ops->gma_to_ggtt_pte_index(gma));
2085
2086                 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2087                         + (gma & ~I915_GTT_PAGE_MASK);
2088
2089                 trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2090         } else {
2091                 switch (mm->ppgtt_mm.root_entry_type) {
2092                 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2093                         ppgtt_get_shadow_root_entry(mm, &e, 0);
2094
2095                         gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2096                         gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2097                         gma_index[2] = gma_ops->gma_to_pde_index(gma);
2098                         gma_index[3] = gma_ops->gma_to_pte_index(gma);
2099                         levels = 4;
2100                         break;
2101                 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2102                         ppgtt_get_shadow_root_entry(mm, &e,
2103                                         gma_ops->gma_to_l3_pdp_index(gma));
2104
2105                         gma_index[0] = gma_ops->gma_to_pde_index(gma);
2106                         gma_index[1] = gma_ops->gma_to_pte_index(gma);
2107                         levels = 2;
2108                         break;
2109                 default:
2110                         GEM_BUG_ON(1);
2111                 }
2112
2113                 /* walk the shadow page table and get gpa from guest entry */
2114                 for (i = 0; i < levels; i++) {
2115                         ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2116                                 (i == levels - 1));
2117                         if (ret)
2118                                 goto err;
2119
2120                         if (!pte_ops->test_present(&e)) {
2121                                 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2122                                 goto err;
2123                         }
2124                 }
2125
2126                 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2127                                         (gma & ~I915_GTT_PAGE_MASK);
2128                 trace_gma_translate(vgpu->id, "ppgtt", 0,
2129                                     mm->ppgtt_mm.root_entry_type, gma, gpa);
2130         }
2131
2132         return gpa;
2133 err:
2134         gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2135         return INTEL_GVT_INVALID_ADDR;
2136 }
2137
2138 static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2139         unsigned int off, void *p_data, unsigned int bytes)
2140 {
2141         struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2142         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2143         unsigned long index = off >> info->gtt_entry_size_shift;
2144         unsigned long gma;
2145         struct intel_gvt_gtt_entry e;
2146
2147         if (bytes != 4 && bytes != 8)
2148                 return -EINVAL;
2149
2150         gma = index << I915_GTT_PAGE_SHIFT;
2151         if (!intel_gvt_ggtt_validate_range(vgpu,
2152                                            gma, 1 << I915_GTT_PAGE_SHIFT)) {
2153                 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2154                 memset(p_data, 0, bytes);
2155                 return 0;
2156         }
2157
2158         ggtt_get_guest_entry(ggtt_mm, &e, index);
2159         memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2160                         bytes);
2161         return 0;
2162 }
2163
2164 /**
2165  * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
2166  * @vgpu: a vGPU
2167  * @off: register offset
2168  * @p_data: data will be returned to guest
2169  * @bytes: data length
2170  *
2171  * This function is used to emulate the GTT MMIO register read
2172  *
2173  * Returns:
2174  * Zero on success, error code if failed.
2175  */
2176 int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2177         void *p_data, unsigned int bytes)
2178 {
2179         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2180         int ret;
2181
2182         if (bytes != 4 && bytes != 8)
2183                 return -EINVAL;
2184
2185         off -= info->gtt_start_offset;
2186         ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2187         return ret;
2188 }
2189
2190 static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2191                 struct intel_gvt_gtt_entry *entry)
2192 {
2193         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2194         unsigned long pfn;
2195
2196         pfn = pte_ops->get_pfn(entry);
2197         if (pfn != vgpu->gvt->gtt.scratch_mfn)
2198                 intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
2199                                                 pfn << PAGE_SHIFT);
2200 }
2201
2202 static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2203         void *p_data, unsigned int bytes)
2204 {
2205         struct intel_gvt *gvt = vgpu->gvt;
2206         const struct intel_gvt_device_info *info = &gvt->device_info;
2207         struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2208         struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2209         unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2210         unsigned long gma, gfn;
2211         struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2212         struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2213         dma_addr_t dma_addr;
2214         int ret;
2215         struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2216         bool partial_update = false;
2217
2218         if (bytes != 4 && bytes != 8)
2219                 return -EINVAL;
2220
2221         gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2222
2223         /* the VM may configure the whole GM space when ballooning is used */
2224         if (!vgpu_gmadr_is_valid(vgpu, gma))
2225                 return 0;
2226
2227         e.type = GTT_TYPE_GGTT_PTE;
2228         memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2229                         bytes);
2230
2231         /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2232          * write, save the first 4 bytes in a list and update virtual
2233          * PTE. Only update shadow PTE when the second 4 bytes comes.
2234          */
2235         if (bytes < info->gtt_entry_size) {
2236                 bool found = false;
2237
2238                 list_for_each_entry_safe(pos, n,
2239                                 &ggtt_mm->ggtt_mm.partial_pte_list, list) {
2240                         if (g_gtt_index == pos->offset >>
2241                                         info->gtt_entry_size_shift) {
2242                                 if (off != pos->offset) {
2243                                         /* the second partial part*/
2244                                         int last_off = pos->offset &
2245                                                 (info->gtt_entry_size - 1);
2246
2247                                         memcpy((void *)&e.val64 + last_off,
2248                                                 (void *)&pos->data + last_off,
2249                                                 bytes);
2250
2251                                         list_del(&pos->list);
2252                                         kfree(pos);
2253                                         found = true;
2254                                         break;
2255                                 }
2256
2257                                 /* update of the first partial part */
2258                                 pos->data = e.val64;
2259                                 ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2260                                 return 0;
2261                         }
2262                 }
2263
2264                 if (!found) {
2265                         /* the first partial part */
2266                         partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2267                         if (!partial_pte)
2268                                 return -ENOMEM;
2269                         partial_pte->offset = off;
2270                         partial_pte->data = e.val64;
2271                         list_add_tail(&partial_pte->list,
2272                                 &ggtt_mm->ggtt_mm.partial_pte_list);
2273                         partial_update = true;
2274                 }
2275         }
2276
2277         if (!partial_update && (ops->test_present(&e))) {
2278                 gfn = ops->get_pfn(&e);
2279                 m.val64 = e.val64;
2280                 m.type = e.type;
2281
2282                 /* one PTE update may be issued in multiple writes and the
2283                  * first write may not construct a valid gfn
2284                  */
2285                 if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
2286                         ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2287                         goto out;
2288                 }
2289
2290                 ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
2291                                                         PAGE_SIZE, &dma_addr);
2292                 if (ret) {
2293                         gvt_vgpu_err("fail to populate guest ggtt entry\n");
2294                         /* guest driver may read/write the entry when partial
2295                          * update the entry in this situation p2m will fail
2296                          * settting the shadow entry to point to a scratch page
2297                          */
2298                         ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2299                 } else
2300                         ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2301         } else {
2302                 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2303                 ops->clear_present(&m);
2304         }
2305
2306 out:
2307         ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2308
2309         ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2310         ggtt_invalidate_pte(vgpu, &e);
2311
2312         ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2313         ggtt_invalidate(gvt->dev_priv);
2314         return 0;
2315 }
2316
2317 /*
2318  * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2319  * @vgpu: a vGPU
2320  * @off: register offset
2321  * @p_data: data from guest write
2322  * @bytes: data length
2323  *
2324  * This function is used to emulate the GTT MMIO register write
2325  *
2326  * Returns:
2327  * Zero on success, error code if failed.
2328  */
2329 int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2330                 unsigned int off, void *p_data, unsigned int bytes)
2331 {
2332         const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2333         int ret;
2334
2335         if (bytes != 4 && bytes != 8)
2336                 return -EINVAL;
2337
2338         off -= info->gtt_start_offset;
2339         ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2340         return ret;
2341 }
2342
2343 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2344                 enum intel_gvt_gtt_type type)
2345 {
2346         struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2347         struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2348         int page_entry_num = I915_GTT_PAGE_SIZE >>
2349                                 vgpu->gvt->device_info.gtt_entry_size_shift;
2350         void *scratch_pt;
2351         int i;
2352         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2353         dma_addr_t daddr;
2354
2355         if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2356                 return -EINVAL;
2357
2358         scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2359         if (!scratch_pt) {
2360                 gvt_vgpu_err("fail to allocate scratch page\n");
2361                 return -ENOMEM;
2362         }
2363
2364         daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
2365                         4096, PCI_DMA_BIDIRECTIONAL);
2366         if (dma_mapping_error(dev, daddr)) {
2367                 gvt_vgpu_err("fail to dmamap scratch_pt\n");
2368                 __free_page(virt_to_page(scratch_pt));
2369                 return -ENOMEM;
2370         }
2371         gtt->scratch_pt[type].page_mfn =
2372                 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2373         gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2374         gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2375                         vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2376
2377         /* Build the tree by full filled the scratch pt with the entries which
2378          * point to the next level scratch pt or scratch page. The
2379          * scratch_pt[type] indicate the scratch pt/scratch page used by the
2380          * 'type' pt.
2381          * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2382          * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2383          * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2384          */
2385         if (type > GTT_TYPE_PPGTT_PTE_PT) {
2386                 struct intel_gvt_gtt_entry se;
2387
2388                 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2389                 se.type = get_entry_type(type - 1);
2390                 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2391
2392                 /* The entry parameters like present/writeable/cache type
2393                  * set to the same as i915's scratch page tree.
2394                  */
2395                 se.val64 |= _PAGE_PRESENT | _PAGE_RW;
2396                 if (type == GTT_TYPE_PPGTT_PDE_PT)
2397                         se.val64 |= PPAT_CACHED;
2398
2399                 for (i = 0; i < page_entry_num; i++)
2400                         ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2401         }
2402
2403         return 0;
2404 }
2405
2406 static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2407 {
2408         int i;
2409         struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
2410         dma_addr_t daddr;
2411
2412         for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2413                 if (vgpu->gtt.scratch_pt[i].page != NULL) {
2414                         daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2415                                         I915_GTT_PAGE_SHIFT);
2416                         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2417                         __free_page(vgpu->gtt.scratch_pt[i].page);
2418                         vgpu->gtt.scratch_pt[i].page = NULL;
2419                         vgpu->gtt.scratch_pt[i].page_mfn = 0;
2420                 }
2421         }
2422
2423         return 0;
2424 }
2425
2426 static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2427 {
2428         int i, ret;
2429
2430         for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2431                 ret = alloc_scratch_pages(vgpu, i);
2432                 if (ret)
2433                         goto err;
2434         }
2435
2436         return 0;
2437
2438 err:
2439         release_scratch_page_tree(vgpu);
2440         return ret;
2441 }
2442
2443 /**
2444  * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2445  * @vgpu: a vGPU
2446  *
2447  * This function is used to initialize per-vGPU graphics memory virtualization
2448  * components.
2449  *
2450  * Returns:
2451  * Zero on success, error code if failed.
2452  */
2453 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2454 {
2455         struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2456
2457         INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2458
2459         INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2460         INIT_LIST_HEAD(&gtt->oos_page_list_head);
2461         INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2462
2463         gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2464         if (IS_ERR(gtt->ggtt_mm)) {
2465                 gvt_vgpu_err("fail to create mm for ggtt.\n");
2466                 return PTR_ERR(gtt->ggtt_mm);
2467         }
2468
2469         intel_vgpu_reset_ggtt(vgpu, false);
2470
2471         INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2472
2473         return create_scratch_page_tree(vgpu);
2474 }
2475
2476 static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2477 {
2478         struct list_head *pos, *n;
2479         struct intel_vgpu_mm *mm;
2480
2481         list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2482                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2483                 intel_vgpu_destroy_mm(mm);
2484         }
2485
2486         if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2487                 gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2488
2489         if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2490                 gvt_err("Why we still has spt not freed?\n");
2491                 ppgtt_free_all_spt(vgpu);
2492         }
2493 }
2494
2495 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2496 {
2497         struct intel_gvt_partial_pte *pos, *next;
2498
2499         list_for_each_entry_safe(pos, next,
2500                                  &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2501                                  list) {
2502                 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2503                         pos->offset, pos->data);
2504                 kfree(pos);
2505         }
2506         intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2507         vgpu->gtt.ggtt_mm = NULL;
2508 }
2509
2510 /**
2511  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2512  * @vgpu: a vGPU
2513  *
2514  * This function is used to clean up per-vGPU graphics memory virtualization
2515  * components.
2516  *
2517  * Returns:
2518  * Zero on success, error code if failed.
2519  */
2520 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2521 {
2522         intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2523         intel_vgpu_destroy_ggtt_mm(vgpu);
2524         release_scratch_page_tree(vgpu);
2525 }
2526
2527 static void clean_spt_oos(struct intel_gvt *gvt)
2528 {
2529         struct intel_gvt_gtt *gtt = &gvt->gtt;
2530         struct list_head *pos, *n;
2531         struct intel_vgpu_oos_page *oos_page;
2532
2533         WARN(!list_empty(&gtt->oos_page_use_list_head),
2534                 "someone is still using oos page\n");
2535
2536         list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2537                 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2538                 list_del(&oos_page->list);
2539                 free_page((unsigned long)oos_page->mem);
2540                 kfree(oos_page);
2541         }
2542 }
2543
2544 static int setup_spt_oos(struct intel_gvt *gvt)
2545 {
2546         struct intel_gvt_gtt *gtt = &gvt->gtt;
2547         struct intel_vgpu_oos_page *oos_page;
2548         int i;
2549         int ret;
2550
2551         INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2552         INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2553
2554         for (i = 0; i < preallocated_oos_pages; i++) {
2555                 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2556                 if (!oos_page) {
2557                         ret = -ENOMEM;
2558                         goto fail;
2559                 }
2560                 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2561                 if (!oos_page->mem) {
2562                         ret = -ENOMEM;
2563                         kfree(oos_page);
2564                         goto fail;
2565                 }
2566
2567                 INIT_LIST_HEAD(&oos_page->list);
2568                 INIT_LIST_HEAD(&oos_page->vm_list);
2569                 oos_page->id = i;
2570                 list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2571         }
2572
2573         gvt_dbg_mm("%d oos pages preallocated\n", i);
2574
2575         return 0;
2576 fail:
2577         clean_spt_oos(gvt);
2578         return ret;
2579 }
2580
2581 /**
2582  * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2583  * @vgpu: a vGPU
2584  * @pdps: pdp root array
2585  *
2586  * This function is used to find a PPGTT mm object from mm object pool
2587  *
2588  * Returns:
2589  * pointer to mm object on success, NULL if failed.
2590  */
2591 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2592                 u64 pdps[])
2593 {
2594         struct intel_vgpu_mm *mm;
2595         struct list_head *pos;
2596
2597         list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2598                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2599
2600                 switch (mm->ppgtt_mm.root_entry_type) {
2601                 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2602                         if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2603                                 return mm;
2604                         break;
2605                 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2606                         if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2607                                     sizeof(mm->ppgtt_mm.guest_pdps)))
2608                                 return mm;
2609                         break;
2610                 default:
2611                         GEM_BUG_ON(1);
2612                 }
2613         }
2614         return NULL;
2615 }
2616
2617 /**
2618  * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2619  * @vgpu: a vGPU
2620  * @root_entry_type: ppgtt root entry type
2621  * @pdps: guest pdps
2622  *
2623  * This function is used to find or create a PPGTT mm object from a guest.
2624  *
2625  * Returns:
2626  * Zero on success, negative error code if failed.
2627  */
2628 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2629                 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2630 {
2631         struct intel_vgpu_mm *mm;
2632
2633         mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2634         if (mm) {
2635                 intel_vgpu_mm_get(mm);
2636         } else {
2637                 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2638                 if (IS_ERR(mm))
2639                         gvt_vgpu_err("fail to create mm\n");
2640         }
2641         return mm;
2642 }
2643
2644 /**
2645  * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2646  * @vgpu: a vGPU
2647  * @pdps: guest pdps
2648  *
2649  * This function is used to find a PPGTT mm object from a guest and destroy it.
2650  *
2651  * Returns:
2652  * Zero on success, negative error code if failed.
2653  */
2654 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2655 {
2656         struct intel_vgpu_mm *mm;
2657
2658         mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2659         if (!mm) {
2660                 gvt_vgpu_err("fail to find ppgtt instance.\n");
2661                 return -EINVAL;
2662         }
2663         intel_vgpu_mm_put(mm);
2664         return 0;
2665 }
2666
2667 /**
2668  * intel_gvt_init_gtt - initialize mm components of a GVT device
2669  * @gvt: GVT device
2670  *
2671  * This function is called at the initialization stage, to initialize
2672  * the mm components of a GVT device.
2673  *
2674  * Returns:
2675  * zero on success, negative error code if failed.
2676  */
2677 int intel_gvt_init_gtt(struct intel_gvt *gvt)
2678 {
2679         int ret;
2680         void *page;
2681         struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2682         dma_addr_t daddr;
2683
2684         gvt_dbg_core("init gtt\n");
2685
2686         gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2687         gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2688
2689         page = (void *)get_zeroed_page(GFP_KERNEL);
2690         if (!page) {
2691                 gvt_err("fail to allocate scratch ggtt page\n");
2692                 return -ENOMEM;
2693         }
2694
2695         daddr = dma_map_page(dev, virt_to_page(page), 0,
2696                         4096, PCI_DMA_BIDIRECTIONAL);
2697         if (dma_mapping_error(dev, daddr)) {
2698                 gvt_err("fail to dmamap scratch ggtt page\n");
2699                 __free_page(virt_to_page(page));
2700                 return -ENOMEM;
2701         }
2702
2703         gvt->gtt.scratch_page = virt_to_page(page);
2704         gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2705
2706         if (enable_out_of_sync) {
2707                 ret = setup_spt_oos(gvt);
2708                 if (ret) {
2709                         gvt_err("fail to initialize SPT oos\n");
2710                         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2711                         __free_page(gvt->gtt.scratch_page);
2712                         return ret;
2713                 }
2714         }
2715         INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2716         mutex_init(&gvt->gtt.ppgtt_mm_lock);
2717         return 0;
2718 }
2719
2720 /**
2721  * intel_gvt_clean_gtt - clean up mm components of a GVT device
2722  * @gvt: GVT device
2723  *
2724  * This function is called at the driver unloading stage, to clean up the
2725  * the mm components of a GVT device.
2726  *
2727  */
2728 void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2729 {
2730         struct device *dev = &gvt->dev_priv->drm.pdev->dev;
2731         dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2732                                         I915_GTT_PAGE_SHIFT);
2733
2734         dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2735
2736         __free_page(gvt->gtt.scratch_page);
2737
2738         if (enable_out_of_sync)
2739                 clean_spt_oos(gvt);
2740 }
2741
2742 /**
2743  * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2744  * @vgpu: a vGPU
2745  *
2746  * This function is called when invalidate all PPGTT instances of a vGPU.
2747  *
2748  */
2749 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2750 {
2751         struct list_head *pos, *n;
2752         struct intel_vgpu_mm *mm;
2753
2754         list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2755                 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2756                 if (mm->type == INTEL_GVT_MM_PPGTT) {
2757                         mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2758                         list_del_init(&mm->ppgtt_mm.lru_list);
2759                         mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2760                         if (mm->ppgtt_mm.shadowed)
2761                                 invalidate_ppgtt_mm(mm);
2762                 }
2763         }
2764 }
2765
2766 /**
2767  * intel_vgpu_reset_ggtt - reset the GGTT entry
2768  * @vgpu: a vGPU
2769  * @invalidate_old: invalidate old entries
2770  *
2771  * This function is called at the vGPU create stage
2772  * to reset all the GGTT entries.
2773  *
2774  */
2775 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2776 {
2777         struct intel_gvt *gvt = vgpu->gvt;
2778         struct drm_i915_private *dev_priv = gvt->dev_priv;
2779         struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2780         struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2781         struct intel_gvt_gtt_entry old_entry;
2782         u32 index;
2783         u32 num_entries;
2784
2785         pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2786         pte_ops->set_present(&entry);
2787
2788         index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2789         num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2790         while (num_entries--) {
2791                 if (invalidate_old) {
2792                         ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2793                         ggtt_invalidate_pte(vgpu, &old_entry);
2794                 }
2795                 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2796         }
2797
2798         index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2799         num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2800         while (num_entries--) {
2801                 if (invalidate_old) {
2802                         ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2803                         ggtt_invalidate_pte(vgpu, &old_entry);
2804                 }
2805                 ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2806         }
2807
2808         ggtt_invalidate(dev_priv);
2809 }
2810
2811 /**
2812  * intel_vgpu_reset_gtt - reset the all GTT related status
2813  * @vgpu: a vGPU
2814  *
2815  * This function is called from vfio core to reset reset all
2816  * GTT related status, including GGTT, PPGTT, scratch page.
2817  *
2818  */
2819 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
2820 {
2821         /* Shadow pages are only created when there is no page
2822          * table tracking data, so remove page tracking data after
2823          * removing the shadow pages.
2824          */
2825         intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2826         intel_vgpu_reset_ggtt(vgpu, true);
2827 }