Merge tag 'drm-intel-next-2017-11-17-1' of git://anongit.freedesktop.org/drm/drm...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gvt / scheduler.c
1 /*
2  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Zhi Wang <zhi.a.wang@intel.com>
25  *
26  * Contributors:
27  *    Ping Gao <ping.a.gao@intel.com>
28  *    Tina Zhang <tina.zhang@intel.com>
29  *    Chanbin Du <changbin.du@intel.com>
30  *    Min He <min.he@intel.com>
31  *    Bing Niu <bing.niu@intel.com>
32  *    Zhenyu Wang <zhenyuw@linux.intel.com>
33  *
34  */
35
36 #include <linux/kthread.h>
37
38 #include "i915_drv.h"
39 #include "gvt.h"
40
41 #define RING_CTX_OFF(x) \
42         offsetof(struct execlist_ring_context, x)
43
44 static void set_context_pdp_root_pointer(
45                 struct execlist_ring_context *ring_context,
46                 u32 pdp[8])
47 {
48         struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW;
49         int i;
50
51         for (i = 0; i < 8; i++)
52                 pdp_pair[i].val = pdp[7 - i];
53 }
54
55 static int populate_shadow_context(struct intel_vgpu_workload *workload)
56 {
57         struct intel_vgpu *vgpu = workload->vgpu;
58         struct intel_gvt *gvt = vgpu->gvt;
59         int ring_id = workload->ring_id;
60         struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
61         struct drm_i915_gem_object *ctx_obj =
62                 shadow_ctx->engine[ring_id].state->obj;
63         struct execlist_ring_context *shadow_ring_context;
64         struct page *page;
65         void *dst;
66         unsigned long context_gpa, context_page_num;
67         int i;
68
69         gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
70                         workload->ctx_desc.lrca);
71
72         context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
73
74         context_page_num = context_page_num >> PAGE_SHIFT;
75
76         if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
77                 context_page_num = 19;
78
79         i = 2;
80
81         while (i < context_page_num) {
82                 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
83                                 (u32)((workload->ctx_desc.lrca + i) <<
84                                 I915_GTT_PAGE_SHIFT));
85                 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
86                         gvt_vgpu_err("Invalid guest context descriptor\n");
87                         return -EFAULT;
88                 }
89
90                 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
91                 dst = kmap(page);
92                 intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
93                                 I915_GTT_PAGE_SIZE);
94                 kunmap(page);
95                 i++;
96         }
97
98         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
99         shadow_ring_context = kmap(page);
100
101 #define COPY_REG(name) \
102         intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
103                 + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
104
105         COPY_REG(ctx_ctrl);
106         COPY_REG(ctx_timestamp);
107
108         if (ring_id == RCS) {
109                 COPY_REG(bb_per_ctx_ptr);
110                 COPY_REG(rcs_indirect_ctx);
111                 COPY_REG(rcs_indirect_ctx_offset);
112         }
113 #undef COPY_REG
114
115         set_context_pdp_root_pointer(shadow_ring_context,
116                                      workload->shadow_mm->shadow_page_table);
117
118         intel_gvt_hypervisor_read_gpa(vgpu,
119                         workload->ring_context_gpa +
120                         sizeof(*shadow_ring_context),
121                         (void *)shadow_ring_context +
122                         sizeof(*shadow_ring_context),
123                         I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
124
125         kunmap(page);
126         return 0;
127 }
128
129 static inline bool is_gvt_request(struct drm_i915_gem_request *req)
130 {
131         return i915_gem_context_force_single_submission(req->ctx);
132 }
133
134 static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id)
135 {
136         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
137         u32 ring_base = dev_priv->engine[ring_id]->mmio_base;
138         i915_reg_t reg;
139
140         reg = RING_INSTDONE(ring_base);
141         vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
142         reg = RING_ACTHD(ring_base);
143         vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
144         reg = RING_ACTHD_UDW(ring_base);
145         vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg);
146 }
147
148 static int shadow_context_status_change(struct notifier_block *nb,
149                 unsigned long action, void *data)
150 {
151         struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
152         struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
153                                 shadow_ctx_notifier_block[req->engine->id]);
154         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
155         enum intel_engine_id ring_id = req->engine->id;
156         struct intel_vgpu_workload *workload;
157         unsigned long flags;
158
159         if (!is_gvt_request(req)) {
160                 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
161                 if (action == INTEL_CONTEXT_SCHEDULE_IN &&
162                     scheduler->engine_owner[ring_id]) {
163                         /* Switch ring from vGPU to host. */
164                         intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
165                                               NULL, ring_id);
166                         scheduler->engine_owner[ring_id] = NULL;
167                 }
168                 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
169
170                 return NOTIFY_OK;
171         }
172
173         workload = scheduler->current_workload[ring_id];
174         if (unlikely(!workload))
175                 return NOTIFY_OK;
176
177         switch (action) {
178         case INTEL_CONTEXT_SCHEDULE_IN:
179                 spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
180                 if (workload->vgpu != scheduler->engine_owner[ring_id]) {
181                         /* Switch ring from host to vGPU or vGPU to vGPU. */
182                         intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
183                                               workload->vgpu, ring_id);
184                         scheduler->engine_owner[ring_id] = workload->vgpu;
185                 } else
186                         gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
187                                       ring_id, workload->vgpu->id);
188                 spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
189                 atomic_set(&workload->shadow_ctx_active, 1);
190                 break;
191         case INTEL_CONTEXT_SCHEDULE_OUT:
192         case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
193                 save_ring_hw_state(workload->vgpu, ring_id);
194                 atomic_set(&workload->shadow_ctx_active, 0);
195                 break;
196         default:
197                 WARN_ON(1);
198                 return NOTIFY_OK;
199         }
200         wake_up(&workload->shadow_ctx_status_wq);
201         return NOTIFY_OK;
202 }
203
204 static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
205                 struct intel_engine_cs *engine)
206 {
207         struct intel_context *ce = &ctx->engine[engine->id];
208         u64 desc = 0;
209
210         desc = ce->lrc_desc;
211
212         /* Update bits 0-11 of the context descriptor which includes flags
213          * like GEN8_CTX_* cached in desc_template
214          */
215         desc &= U64_MAX << 12;
216         desc |= ctx->desc_template & ((1ULL << 12) - 1);
217
218         ce->lrc_desc = desc;
219 }
220
221 static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
222 {
223         struct intel_vgpu *vgpu = workload->vgpu;
224         void *shadow_ring_buffer_va;
225         u32 *cs;
226
227         /* allocate shadow ring buffer */
228         cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
229         if (IS_ERR(cs)) {
230                 gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
231                         workload->rb_len);
232                 return PTR_ERR(cs);
233         }
234
235         shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
236
237         /* get shadow ring buffer va */
238         workload->shadow_ring_buffer_va = cs;
239
240         memcpy(cs, shadow_ring_buffer_va,
241                         workload->rb_len);
242
243         cs += workload->rb_len / sizeof(u32);
244         intel_ring_advance(workload->req, cs);
245
246         return 0;
247 }
248
249 void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
250 {
251         if (!wa_ctx->indirect_ctx.obj)
252                 return;
253
254         i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
255         i915_gem_object_put(wa_ctx->indirect_ctx.obj);
256 }
257
258 /**
259  * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
260  * shadow it as well, include ringbuffer,wa_ctx and ctx.
261  * @workload: an abstract entity for each execlist submission.
262  *
263  * This function is called before the workload submitting to i915, to make
264  * sure the content of the workload is valid.
265  */
266 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
267 {
268         struct intel_vgpu *vgpu = workload->vgpu;
269         struct intel_vgpu_submission *s = &vgpu->submission;
270         struct i915_gem_context *shadow_ctx = s->shadow_ctx;
271         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
272         int ring_id = workload->ring_id;
273         struct intel_engine_cs *engine = dev_priv->engine[ring_id];
274         struct intel_ring *ring;
275         int ret;
276
277         lockdep_assert_held(&dev_priv->drm.struct_mutex);
278
279         if (workload->shadowed)
280                 return 0;
281
282         shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
283         shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
284                                     GEN8_CTX_ADDRESSING_MODE_SHIFT;
285
286         if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
287                 shadow_context_descriptor_update(shadow_ctx,
288                                         dev_priv->engine[ring_id]);
289
290         ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
291         if (ret)
292                 goto err_scan;
293
294         if ((workload->ring_id == RCS) &&
295             (workload->wa_ctx.indirect_ctx.size != 0)) {
296                 ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
297                 if (ret)
298                         goto err_scan;
299         }
300
301         /* pin shadow context by gvt even the shadow context will be pinned
302          * when i915 alloc request. That is because gvt will update the guest
303          * context from shadow context when workload is completed, and at that
304          * moment, i915 may already unpined the shadow context to make the
305          * shadow_ctx pages invalid. So gvt need to pin itself. After update
306          * the guest context, gvt can unpin the shadow_ctx safely.
307          */
308         ring = engine->context_pin(engine, shadow_ctx);
309         if (IS_ERR(ring)) {
310                 ret = PTR_ERR(ring);
311                 gvt_vgpu_err("fail to pin shadow context\n");
312                 goto err_shadow;
313         }
314
315         ret = populate_shadow_context(workload);
316         if (ret)
317                 goto err_unpin;
318         workload->shadowed = true;
319         return 0;
320
321 err_unpin:
322         engine->context_unpin(engine, shadow_ctx);
323 err_shadow:
324         release_shadow_wa_ctx(&workload->wa_ctx);
325 err_scan:
326         return ret;
327 }
328
329 static int intel_gvt_generate_request(struct intel_vgpu_workload *workload)
330 {
331         int ring_id = workload->ring_id;
332         struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
333         struct intel_engine_cs *engine = dev_priv->engine[ring_id];
334         struct drm_i915_gem_request *rq;
335         struct intel_vgpu *vgpu = workload->vgpu;
336         struct intel_vgpu_submission *s = &vgpu->submission;
337         struct i915_gem_context *shadow_ctx = s->shadow_ctx;
338         int ret;
339
340         rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
341         if (IS_ERR(rq)) {
342                 gvt_vgpu_err("fail to allocate gem request\n");
343                 ret = PTR_ERR(rq);
344                 goto err_unpin;
345         }
346
347         gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
348
349         workload->req = i915_gem_request_get(rq);
350         ret = copy_workload_to_ring_buffer(workload);
351         if (ret)
352                 goto err_unpin;
353         return 0;
354
355 err_unpin:
356         engine->context_unpin(engine, shadow_ctx);
357         release_shadow_wa_ctx(&workload->wa_ctx);
358         return ret;
359 }
360
361 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload);
362
363 static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
364 {
365         struct intel_gvt *gvt = workload->vgpu->gvt;
366         const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
367         struct intel_vgpu_shadow_bb *bb;
368         int ret;
369
370         list_for_each_entry(bb, &workload->shadow_bb, list) {
371                 bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0);
372                 if (IS_ERR(bb->vma)) {
373                         ret = PTR_ERR(bb->vma);
374                         goto err;
375                 }
376
377                 /* relocate shadow batch buffer */
378                 bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
379                 if (gmadr_bytes == 8)
380                         bb->bb_start_cmd_va[2] = 0;
381
382                 /* No one is going to touch shadow bb from now on. */
383                 if (bb->clflush & CLFLUSH_AFTER) {
384                         drm_clflush_virt_range(bb->va, bb->obj->base.size);
385                         bb->clflush &= ~CLFLUSH_AFTER;
386                 }
387
388                 ret = i915_gem_object_set_to_gtt_domain(bb->obj, false);
389                 if (ret)
390                         goto err;
391
392                 i915_gem_obj_finish_shmem_access(bb->obj);
393                 bb->accessing = false;
394
395                 i915_vma_move_to_active(bb->vma, workload->req, 0);
396         }
397         return 0;
398 err:
399         release_shadow_batch_buffer(workload);
400         return ret;
401 }
402
403 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
404 {
405         struct intel_vgpu_workload *workload = container_of(wa_ctx,
406                                         struct intel_vgpu_workload,
407                                         wa_ctx);
408         int ring_id = workload->ring_id;
409         struct intel_vgpu_submission *s = &workload->vgpu->submission;
410         struct i915_gem_context *shadow_ctx = s->shadow_ctx;
411         struct drm_i915_gem_object *ctx_obj =
412                 shadow_ctx->engine[ring_id].state->obj;
413         struct execlist_ring_context *shadow_ring_context;
414         struct page *page;
415
416         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
417         shadow_ring_context = kmap_atomic(page);
418
419         shadow_ring_context->bb_per_ctx_ptr.val =
420                 (shadow_ring_context->bb_per_ctx_ptr.val &
421                 (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma;
422         shadow_ring_context->rcs_indirect_ctx.val =
423                 (shadow_ring_context->rcs_indirect_ctx.val &
424                 (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
425
426         kunmap_atomic(shadow_ring_context);
427         return 0;
428 }
429
430 static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
431 {
432         struct i915_vma *vma;
433         unsigned char *per_ctx_va =
434                 (unsigned char *)wa_ctx->indirect_ctx.shadow_va +
435                 wa_ctx->indirect_ctx.size;
436
437         if (wa_ctx->indirect_ctx.size == 0)
438                 return 0;
439
440         vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
441                                        0, CACHELINE_BYTES, 0);
442         if (IS_ERR(vma))
443                 return PTR_ERR(vma);
444
445         /* FIXME: we are not tracking our pinned VMA leaving it
446          * up to the core to fix up the stray pin_count upon
447          * free.
448          */
449
450         wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma);
451
452         wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1);
453         memset(per_ctx_va, 0, CACHELINE_BYTES);
454
455         update_wa_ctx_2_shadow_ctx(wa_ctx);
456         return 0;
457 }
458
459 static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
460 {
461         struct intel_vgpu *vgpu = workload->vgpu;
462         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
463         struct intel_vgpu_shadow_bb *bb, *pos;
464
465         if (list_empty(&workload->shadow_bb))
466                 return;
467
468         bb = list_first_entry(&workload->shadow_bb,
469                         struct intel_vgpu_shadow_bb, list);
470
471         mutex_lock(&dev_priv->drm.struct_mutex);
472
473         list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
474                 if (bb->obj) {
475                         if (bb->accessing)
476                                 i915_gem_obj_finish_shmem_access(bb->obj);
477
478                         if (bb->va && !IS_ERR(bb->va))
479                                 i915_gem_object_unpin_map(bb->obj);
480
481                         if (bb->vma && !IS_ERR(bb->vma)) {
482                                 i915_vma_unpin(bb->vma);
483                                 i915_vma_close(bb->vma);
484                         }
485                         __i915_gem_object_release_unless_active(bb->obj);
486                 }
487                 list_del(&bb->list);
488                 kfree(bb);
489         }
490
491         mutex_unlock(&dev_priv->drm.struct_mutex);
492 }
493
494 static int prepare_workload(struct intel_vgpu_workload *workload)
495 {
496         struct intel_vgpu *vgpu = workload->vgpu;
497         int ret = 0;
498
499         ret = intel_vgpu_pin_mm(workload->shadow_mm);
500         if (ret) {
501                 gvt_vgpu_err("fail to vgpu pin mm\n");
502                 return ret;
503         }
504
505         ret = intel_vgpu_sync_oos_pages(workload->vgpu);
506         if (ret) {
507                 gvt_vgpu_err("fail to vgpu sync oos pages\n");
508                 goto err_unpin_mm;
509         }
510
511         ret = intel_vgpu_flush_post_shadow(workload->vgpu);
512         if (ret) {
513                 gvt_vgpu_err("fail to flush post shadow\n");
514                 goto err_unpin_mm;
515         }
516
517         ret = intel_gvt_generate_request(workload);
518         if (ret) {
519                 gvt_vgpu_err("fail to generate request\n");
520                 goto err_unpin_mm;
521         }
522
523         ret = prepare_shadow_batch_buffer(workload);
524         if (ret) {
525                 gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
526                 goto err_unpin_mm;
527         }
528
529         ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
530         if (ret) {
531                 gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
532                 goto err_shadow_batch;
533         }
534
535         if (workload->prepare) {
536                 ret = workload->prepare(workload);
537                 if (ret)
538                         goto err_shadow_wa_ctx;
539         }
540
541         return 0;
542 err_shadow_wa_ctx:
543         release_shadow_wa_ctx(&workload->wa_ctx);
544 err_shadow_batch:
545         release_shadow_batch_buffer(workload);
546 err_unpin_mm:
547         intel_vgpu_unpin_mm(workload->shadow_mm);
548         return ret;
549 }
550
551 static int dispatch_workload(struct intel_vgpu_workload *workload)
552 {
553         struct intel_vgpu *vgpu = workload->vgpu;
554         struct intel_vgpu_submission *s = &vgpu->submission;
555         struct i915_gem_context *shadow_ctx = s->shadow_ctx;
556         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
557         int ring_id = workload->ring_id;
558         struct intel_engine_cs *engine = dev_priv->engine[ring_id];
559         int ret = 0;
560
561         gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
562                 ring_id, workload);
563
564         mutex_lock(&dev_priv->drm.struct_mutex);
565
566         ret = intel_gvt_scan_and_shadow_workload(workload);
567         if (ret)
568                 goto out;
569
570         ret = prepare_workload(workload);
571         if (ret) {
572                 engine->context_unpin(engine, shadow_ctx);
573                 goto out;
574         }
575
576 out:
577         if (ret)
578                 workload->status = ret;
579
580         if (!IS_ERR_OR_NULL(workload->req)) {
581                 gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
582                                 ring_id, workload->req);
583                 i915_add_request(workload->req);
584                 workload->dispatched = true;
585         }
586
587         mutex_unlock(&dev_priv->drm.struct_mutex);
588         return ret;
589 }
590
591 static struct intel_vgpu_workload *pick_next_workload(
592                 struct intel_gvt *gvt, int ring_id)
593 {
594         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
595         struct intel_vgpu_workload *workload = NULL;
596
597         mutex_lock(&gvt->lock);
598
599         /*
600          * no current vgpu / will be scheduled out / no workload
601          * bail out
602          */
603         if (!scheduler->current_vgpu) {
604                 gvt_dbg_sched("ring id %d stop - no current vgpu\n", ring_id);
605                 goto out;
606         }
607
608         if (scheduler->need_reschedule) {
609                 gvt_dbg_sched("ring id %d stop - will reschedule\n", ring_id);
610                 goto out;
611         }
612
613         if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
614                 goto out;
615
616         /*
617          * still have current workload, maybe the workload disptacher
618          * fail to submit it for some reason, resubmit it.
619          */
620         if (scheduler->current_workload[ring_id]) {
621                 workload = scheduler->current_workload[ring_id];
622                 gvt_dbg_sched("ring id %d still have current workload %p\n",
623                                 ring_id, workload);
624                 goto out;
625         }
626
627         /*
628          * pick a workload as current workload
629          * once current workload is set, schedule policy routines
630          * will wait the current workload is finished when trying to
631          * schedule out a vgpu.
632          */
633         scheduler->current_workload[ring_id] = container_of(
634                         workload_q_head(scheduler->current_vgpu, ring_id)->next,
635                         struct intel_vgpu_workload, list);
636
637         workload = scheduler->current_workload[ring_id];
638
639         gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload);
640
641         atomic_inc(&workload->vgpu->submission.running_workload_num);
642 out:
643         mutex_unlock(&gvt->lock);
644         return workload;
645 }
646
647 static void update_guest_context(struct intel_vgpu_workload *workload)
648 {
649         struct intel_vgpu *vgpu = workload->vgpu;
650         struct intel_gvt *gvt = vgpu->gvt;
651         struct intel_vgpu_submission *s = &vgpu->submission;
652         struct i915_gem_context *shadow_ctx = s->shadow_ctx;
653         int ring_id = workload->ring_id;
654         struct drm_i915_gem_object *ctx_obj =
655                 shadow_ctx->engine[ring_id].state->obj;
656         struct execlist_ring_context *shadow_ring_context;
657         struct page *page;
658         void *src;
659         unsigned long context_gpa, context_page_num;
660         int i;
661
662         gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
663                         workload->ctx_desc.lrca);
664
665         context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
666
667         context_page_num = context_page_num >> PAGE_SHIFT;
668
669         if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
670                 context_page_num = 19;
671
672         i = 2;
673
674         while (i < context_page_num) {
675                 context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
676                                 (u32)((workload->ctx_desc.lrca + i) <<
677                                         I915_GTT_PAGE_SHIFT));
678                 if (context_gpa == INTEL_GVT_INVALID_ADDR) {
679                         gvt_vgpu_err("invalid guest context descriptor\n");
680                         return;
681                 }
682
683                 page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
684                 src = kmap(page);
685                 intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
686                                 I915_GTT_PAGE_SIZE);
687                 kunmap(page);
688                 i++;
689         }
690
691         intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
692                 RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
693
694         page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
695         shadow_ring_context = kmap(page);
696
697 #define COPY_REG(name) \
698         intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
699                 RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
700
701         COPY_REG(ctx_ctrl);
702         COPY_REG(ctx_timestamp);
703
704 #undef COPY_REG
705
706         intel_gvt_hypervisor_write_gpa(vgpu,
707                         workload->ring_context_gpa +
708                         sizeof(*shadow_ring_context),
709                         (void *)shadow_ring_context +
710                         sizeof(*shadow_ring_context),
711                         I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
712
713         kunmap(page);
714 }
715
716 static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
717 {
718         struct intel_vgpu_submission *s = &vgpu->submission;
719         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
720         struct intel_engine_cs *engine;
721         struct intel_vgpu_workload *pos, *n;
722         unsigned int tmp;
723
724         /* free the unsubmited workloads in the queues. */
725         for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
726                 list_for_each_entry_safe(pos, n,
727                         &s->workload_q_head[engine->id], list) {
728                         list_del_init(&pos->list);
729                         intel_vgpu_destroy_workload(pos);
730                 }
731                 clear_bit(engine->id, s->shadow_ctx_desc_updated);
732         }
733 }
734
735 static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
736 {
737         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
738         struct intel_vgpu_workload *workload =
739                 scheduler->current_workload[ring_id];
740         struct intel_vgpu *vgpu = workload->vgpu;
741         struct intel_vgpu_submission *s = &vgpu->submission;
742         int event;
743
744         mutex_lock(&gvt->lock);
745
746         /* For the workload w/ request, needs to wait for the context
747          * switch to make sure request is completed.
748          * For the workload w/o request, directly complete the workload.
749          */
750         if (workload->req) {
751                 struct drm_i915_private *dev_priv =
752                         workload->vgpu->gvt->dev_priv;
753                 struct intel_engine_cs *engine =
754                         dev_priv->engine[workload->ring_id];
755                 wait_event(workload->shadow_ctx_status_wq,
756                            !atomic_read(&workload->shadow_ctx_active));
757
758                 /* If this request caused GPU hang, req->fence.error will
759                  * be set to -EIO. Use -EIO to set workload status so
760                  * that when this request caused GPU hang, didn't trigger
761                  * context switch interrupt to guest.
762                  */
763                 if (likely(workload->status == -EINPROGRESS)) {
764                         if (workload->req->fence.error == -EIO)
765                                 workload->status = -EIO;
766                         else
767                                 workload->status = 0;
768                 }
769
770                 i915_gem_request_put(fetch_and_zero(&workload->req));
771
772                 if (!workload->status && !(vgpu->resetting_eng &
773                                            ENGINE_MASK(ring_id))) {
774                         update_guest_context(workload);
775
776                         for_each_set_bit(event, workload->pending_events,
777                                          INTEL_GVT_EVENT_MAX)
778                                 intel_vgpu_trigger_virtual_event(vgpu, event);
779                 }
780                 mutex_lock(&dev_priv->drm.struct_mutex);
781                 /* unpin shadow ctx as the shadow_ctx update is done */
782                 engine->context_unpin(engine, s->shadow_ctx);
783                 mutex_unlock(&dev_priv->drm.struct_mutex);
784         }
785
786         gvt_dbg_sched("ring id %d complete workload %p status %d\n",
787                         ring_id, workload, workload->status);
788
789         scheduler->current_workload[ring_id] = NULL;
790
791         list_del_init(&workload->list);
792
793         if (!workload->status) {
794                 release_shadow_batch_buffer(workload);
795                 release_shadow_wa_ctx(&workload->wa_ctx);
796         }
797
798         if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
799                 /* if workload->status is not successful means HW GPU
800                  * has occurred GPU hang or something wrong with i915/GVT,
801                  * and GVT won't inject context switch interrupt to guest.
802                  * So this error is a vGPU hang actually to the guest.
803                  * According to this we should emunlate a vGPU hang. If
804                  * there are pending workloads which are already submitted
805                  * from guest, we should clean them up like HW GPU does.
806                  *
807                  * if it is in middle of engine resetting, the pending
808                  * workloads won't be submitted to HW GPU and will be
809                  * cleaned up during the resetting process later, so doing
810                  * the workload clean up here doesn't have any impact.
811                  **/
812                 clean_workloads(vgpu, ENGINE_MASK(ring_id));
813         }
814
815         workload->complete(workload);
816
817         atomic_dec(&s->running_workload_num);
818         wake_up(&scheduler->workload_complete_wq);
819
820         if (gvt->scheduler.need_reschedule)
821                 intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
822
823         mutex_unlock(&gvt->lock);
824 }
825
826 struct workload_thread_param {
827         struct intel_gvt *gvt;
828         int ring_id;
829 };
830
831 static int workload_thread(void *priv)
832 {
833         struct workload_thread_param *p = (struct workload_thread_param *)priv;
834         struct intel_gvt *gvt = p->gvt;
835         int ring_id = p->ring_id;
836         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
837         struct intel_vgpu_workload *workload = NULL;
838         struct intel_vgpu *vgpu = NULL;
839         int ret;
840         bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
841                         || IS_KABYLAKE(gvt->dev_priv);
842         DEFINE_WAIT_FUNC(wait, woken_wake_function);
843
844         kfree(p);
845
846         gvt_dbg_core("workload thread for ring %d started\n", ring_id);
847
848         while (!kthread_should_stop()) {
849                 add_wait_queue(&scheduler->waitq[ring_id], &wait);
850                 do {
851                         workload = pick_next_workload(gvt, ring_id);
852                         if (workload)
853                                 break;
854                         wait_woken(&wait, TASK_INTERRUPTIBLE,
855                                    MAX_SCHEDULE_TIMEOUT);
856                 } while (!kthread_should_stop());
857                 remove_wait_queue(&scheduler->waitq[ring_id], &wait);
858
859                 if (!workload)
860                         break;
861
862                 gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
863                                 workload->ring_id, workload,
864                                 workload->vgpu->id);
865
866                 intel_runtime_pm_get(gvt->dev_priv);
867
868                 gvt_dbg_sched("ring id %d will dispatch workload %p\n",
869                                 workload->ring_id, workload);
870
871                 if (need_force_wake)
872                         intel_uncore_forcewake_get(gvt->dev_priv,
873                                         FORCEWAKE_ALL);
874
875                 mutex_lock(&gvt->lock);
876                 ret = dispatch_workload(workload);
877                 mutex_unlock(&gvt->lock);
878
879                 if (ret) {
880                         vgpu = workload->vgpu;
881                         gvt_vgpu_err("fail to dispatch workload, skip\n");
882                         goto complete;
883                 }
884
885                 gvt_dbg_sched("ring id %d wait workload %p\n",
886                                 workload->ring_id, workload);
887                 i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
888
889 complete:
890                 gvt_dbg_sched("will complete workload %p, status: %d\n",
891                                 workload, workload->status);
892
893                 complete_current_workload(gvt, ring_id);
894
895                 if (need_force_wake)
896                         intel_uncore_forcewake_put(gvt->dev_priv,
897                                         FORCEWAKE_ALL);
898
899                 intel_runtime_pm_put(gvt->dev_priv);
900                 if (ret && (vgpu_is_vm_unhealthy(ret)))
901                         enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
902         }
903         return 0;
904 }
905
906 void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
907 {
908         struct intel_vgpu_submission *s = &vgpu->submission;
909         struct intel_gvt *gvt = vgpu->gvt;
910         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
911
912         if (atomic_read(&s->running_workload_num)) {
913                 gvt_dbg_sched("wait vgpu idle\n");
914
915                 wait_event(scheduler->workload_complete_wq,
916                                 !atomic_read(&s->running_workload_num));
917         }
918 }
919
920 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
921 {
922         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
923         struct intel_engine_cs *engine;
924         enum intel_engine_id i;
925
926         gvt_dbg_core("clean workload scheduler\n");
927
928         for_each_engine(engine, gvt->dev_priv, i) {
929                 atomic_notifier_chain_unregister(
930                                         &engine->context_status_notifier,
931                                         &gvt->shadow_ctx_notifier_block[i]);
932                 kthread_stop(scheduler->thread[i]);
933         }
934 }
935
936 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
937 {
938         struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
939         struct workload_thread_param *param = NULL;
940         struct intel_engine_cs *engine;
941         enum intel_engine_id i;
942         int ret;
943
944         gvt_dbg_core("init workload scheduler\n");
945
946         init_waitqueue_head(&scheduler->workload_complete_wq);
947
948         for_each_engine(engine, gvt->dev_priv, i) {
949                 init_waitqueue_head(&scheduler->waitq[i]);
950
951                 param = kzalloc(sizeof(*param), GFP_KERNEL);
952                 if (!param) {
953                         ret = -ENOMEM;
954                         goto err;
955                 }
956
957                 param->gvt = gvt;
958                 param->ring_id = i;
959
960                 scheduler->thread[i] = kthread_run(workload_thread, param,
961                         "gvt workload %d", i);
962                 if (IS_ERR(scheduler->thread[i])) {
963                         gvt_err("fail to create workload thread\n");
964                         ret = PTR_ERR(scheduler->thread[i]);
965                         goto err;
966                 }
967
968                 gvt->shadow_ctx_notifier_block[i].notifier_call =
969                                         shadow_context_status_change;
970                 atomic_notifier_chain_register(&engine->context_status_notifier,
971                                         &gvt->shadow_ctx_notifier_block[i]);
972         }
973         return 0;
974 err:
975         intel_gvt_clean_workload_scheduler(gvt);
976         kfree(param);
977         param = NULL;
978         return ret;
979 }
980
981 /**
982  * intel_vgpu_clean_submission - free submission-related resource for vGPU
983  * @vgpu: a vGPU
984  *
985  * This function is called when a vGPU is being destroyed.
986  *
987  */
988 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
989 {
990         struct intel_vgpu_submission *s = &vgpu->submission;
991
992         intel_vgpu_select_submission_ops(vgpu, 0);
993         i915_gem_context_put(s->shadow_ctx);
994         kmem_cache_destroy(s->workloads);
995 }
996
997
998 /**
999  * intel_vgpu_reset_submission - reset submission-related resource for vGPU
1000  * @vgpu: a vGPU
1001  * @engine_mask: engines expected to be reset
1002  *
1003  * This function is called when a vGPU is being destroyed.
1004  *
1005  */
1006 void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
1007                 unsigned long engine_mask)
1008 {
1009         struct intel_vgpu_submission *s = &vgpu->submission;
1010
1011         if (!s->active)
1012                 return;
1013
1014         clean_workloads(vgpu, engine_mask);
1015         s->ops->reset(vgpu, engine_mask);
1016 }
1017
1018 /**
1019  * intel_vgpu_setup_submission - setup submission-related resource for vGPU
1020  * @vgpu: a vGPU
1021  *
1022  * This function is called when a vGPU is being created.
1023  *
1024  * Returns:
1025  * Zero on success, negative error code if failed.
1026  *
1027  */
1028 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
1029 {
1030         struct intel_vgpu_submission *s = &vgpu->submission;
1031         enum intel_engine_id i;
1032         struct intel_engine_cs *engine;
1033         int ret;
1034
1035         s->shadow_ctx = i915_gem_context_create_gvt(
1036                         &vgpu->gvt->dev_priv->drm);
1037         if (IS_ERR(s->shadow_ctx))
1038                 return PTR_ERR(s->shadow_ctx);
1039
1040         bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
1041
1042         s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
1043                         sizeof(struct intel_vgpu_workload), 0,
1044                         SLAB_HWCACHE_ALIGN,
1045                         NULL);
1046
1047         if (!s->workloads) {
1048                 ret = -ENOMEM;
1049                 goto out_shadow_ctx;
1050         }
1051
1052         for_each_engine(engine, vgpu->gvt->dev_priv, i)
1053                 INIT_LIST_HEAD(&s->workload_q_head[i]);
1054
1055         atomic_set(&s->running_workload_num, 0);
1056         bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
1057
1058         return 0;
1059
1060 out_shadow_ctx:
1061         i915_gem_context_put(s->shadow_ctx);
1062         return ret;
1063 }
1064
1065 /**
1066  * intel_vgpu_select_submission_ops - select virtual submission interface
1067  * @vgpu: a vGPU
1068  * @interface: expected vGPU virtual submission interface
1069  *
1070  * This function is called when guest configures submission interface.
1071  *
1072  * Returns:
1073  * Zero on success, negative error code if failed.
1074  *
1075  */
1076 int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
1077                                      unsigned int interface)
1078 {
1079         struct intel_vgpu_submission *s = &vgpu->submission;
1080         const struct intel_vgpu_submission_ops *ops[] = {
1081                 [INTEL_VGPU_EXECLIST_SUBMISSION] =
1082                         &intel_vgpu_execlist_submission_ops,
1083         };
1084         int ret;
1085
1086         if (WARN_ON(interface >= ARRAY_SIZE(ops)))
1087                 return -EINVAL;
1088
1089         if (s->active) {
1090                 s->ops->clean(vgpu);
1091                 s->active = false;
1092                 gvt_dbg_core("vgpu%d: de-select ops [ %s ] \n",
1093                                 vgpu->id, s->ops->name);
1094         }
1095
1096         if (interface == 0) {
1097                 s->ops = NULL;
1098                 s->virtual_submission_interface = 0;
1099                 gvt_dbg_core("vgpu%d: no submission ops\n", vgpu->id);
1100                 return 0;
1101         }
1102
1103         ret = ops[interface]->init(vgpu);
1104         if (ret)
1105                 return ret;
1106
1107         s->ops = ops[interface];
1108         s->virtual_submission_interface = interface;
1109         s->active = true;
1110
1111         gvt_dbg_core("vgpu%d: activate ops [ %s ]\n",
1112                         vgpu->id, s->ops->name);
1113
1114         return 0;
1115 }
1116
1117 /**
1118  * intel_vgpu_destroy_workload - destroy a vGPU workload
1119  * @vgpu: a vGPU
1120  *
1121  * This function is called when destroy a vGPU workload.
1122  *
1123  */
1124 void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
1125 {
1126         struct intel_vgpu_submission *s = &workload->vgpu->submission;
1127
1128         if (workload->shadow_mm)
1129                 intel_gvt_mm_unreference(workload->shadow_mm);
1130
1131         kmem_cache_free(s->workloads, workload);
1132 }
1133
1134 static struct intel_vgpu_workload *
1135 alloc_workload(struct intel_vgpu *vgpu)
1136 {
1137         struct intel_vgpu_submission *s = &vgpu->submission;
1138         struct intel_vgpu_workload *workload;
1139
1140         workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL);
1141         if (!workload)
1142                 return ERR_PTR(-ENOMEM);
1143
1144         INIT_LIST_HEAD(&workload->list);
1145         INIT_LIST_HEAD(&workload->shadow_bb);
1146
1147         init_waitqueue_head(&workload->shadow_ctx_status_wq);
1148         atomic_set(&workload->shadow_ctx_active, 0);
1149
1150         workload->status = -EINPROGRESS;
1151         workload->shadowed = false;
1152         workload->vgpu = vgpu;
1153
1154         return workload;
1155 }
1156
1157 #define RING_CTX_OFF(x) \
1158         offsetof(struct execlist_ring_context, x)
1159
1160 static void read_guest_pdps(struct intel_vgpu *vgpu,
1161                 u64 ring_context_gpa, u32 pdp[8])
1162 {
1163         u64 gpa;
1164         int i;
1165
1166         gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
1167
1168         for (i = 0; i < 8; i++)
1169                 intel_gvt_hypervisor_read_gpa(vgpu,
1170                                 gpa + i * 8, &pdp[7 - i], 4);
1171 }
1172
1173 static int prepare_mm(struct intel_vgpu_workload *workload)
1174 {
1175         struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
1176         struct intel_vgpu_mm *mm;
1177         struct intel_vgpu *vgpu = workload->vgpu;
1178         int page_table_level;
1179         u32 pdp[8];
1180
1181         if (desc->addressing_mode == 1) { /* legacy 32-bit */
1182                 page_table_level = 3;
1183         } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
1184                 page_table_level = 4;
1185         } else {
1186                 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
1187                 return -EINVAL;
1188         }
1189
1190         read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
1191
1192         mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
1193         if (mm) {
1194                 intel_gvt_mm_reference(mm);
1195         } else {
1196
1197                 mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
1198                                 pdp, page_table_level, 0);
1199                 if (IS_ERR(mm)) {
1200                         gvt_vgpu_err("fail to create mm object.\n");
1201                         return PTR_ERR(mm);
1202                 }
1203         }
1204         workload->shadow_mm = mm;
1205         return 0;
1206 }
1207
1208 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
1209                 ((a)->lrca == (b)->lrca))
1210
1211 #define get_last_workload(q) \
1212         (list_empty(q) ? NULL : container_of(q->prev, \
1213         struct intel_vgpu_workload, list))
1214 /**
1215  * intel_vgpu_create_workload - create a vGPU workload
1216  * @vgpu: a vGPU
1217  * @desc: a guest context descriptor
1218  *
1219  * This function is called when creating a vGPU workload.
1220  *
1221  * Returns:
1222  * struct intel_vgpu_workload * on success, negative error code in
1223  * pointer if failed.
1224  *
1225  */
1226 struct intel_vgpu_workload *
1227 intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
1228                            struct execlist_ctx_descriptor_format *desc)
1229 {
1230         struct intel_vgpu_submission *s = &vgpu->submission;
1231         struct list_head *q = workload_q_head(vgpu, ring_id);
1232         struct intel_vgpu_workload *last_workload = get_last_workload(q);
1233         struct intel_vgpu_workload *workload = NULL;
1234         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
1235         u64 ring_context_gpa;
1236         u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
1237         int ret;
1238
1239         ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
1240                         (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT));
1241         if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
1242                 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
1243                 return ERR_PTR(-EINVAL);
1244         }
1245
1246         intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1247                         RING_CTX_OFF(ring_header.val), &head, 4);
1248
1249         intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1250                         RING_CTX_OFF(ring_tail.val), &tail, 4);
1251
1252         head &= RB_HEAD_OFF_MASK;
1253         tail &= RB_TAIL_OFF_MASK;
1254
1255         if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
1256                 gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
1257                 gvt_dbg_el("ctx head %x real head %lx\n", head,
1258                                 last_workload->rb_tail);
1259                 /*
1260                  * cannot use guest context head pointer here,
1261                  * as it might not be updated at this time
1262                  */
1263                 head = last_workload->rb_tail;
1264         }
1265
1266         gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
1267
1268         /* record some ring buffer register values for scan and shadow */
1269         intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1270                         RING_CTX_OFF(rb_start.val), &start, 4);
1271         intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1272                         RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
1273         intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1274                         RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
1275
1276         workload = alloc_workload(vgpu);
1277         if (IS_ERR(workload))
1278                 return workload;
1279
1280         workload->ring_id = ring_id;
1281         workload->ctx_desc = *desc;
1282         workload->ring_context_gpa = ring_context_gpa;
1283         workload->rb_head = head;
1284         workload->rb_tail = tail;
1285         workload->rb_start = start;
1286         workload->rb_ctl = ctl;
1287
1288         if (ring_id == RCS) {
1289                 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1290                         RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
1291                 intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
1292                         RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
1293
1294                 workload->wa_ctx.indirect_ctx.guest_gma =
1295                         indirect_ctx & INDIRECT_CTX_ADDR_MASK;
1296                 workload->wa_ctx.indirect_ctx.size =
1297                         (indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
1298                         CACHELINE_BYTES;
1299                 workload->wa_ctx.per_ctx.guest_gma =
1300                         per_ctx & PER_CTX_ADDR_MASK;
1301                 workload->wa_ctx.per_ctx.valid = per_ctx & 1;
1302         }
1303
1304         gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
1305                         workload, ring_id, head, tail, start, ctl);
1306
1307         ret = prepare_mm(workload);
1308         if (ret) {
1309                 kmem_cache_free(s->workloads, workload);
1310                 return ERR_PTR(ret);
1311         }
1312
1313         /* Only scan and shadow the first workload in the queue
1314          * as there is only one pre-allocated buf-obj for shadow.
1315          */
1316         if (list_empty(workload_q_head(vgpu, ring_id))) {
1317                 intel_runtime_pm_get(dev_priv);
1318                 mutex_lock(&dev_priv->drm.struct_mutex);
1319                 ret = intel_gvt_scan_and_shadow_workload(workload);
1320                 mutex_unlock(&dev_priv->drm.struct_mutex);
1321                 intel_runtime_pm_put(dev_priv);
1322         }
1323
1324         if (ret && (vgpu_is_vm_unhealthy(ret))) {
1325                 enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
1326                 intel_vgpu_destroy_workload(workload);
1327                 return ERR_PTR(ret);
1328         }
1329
1330         return workload;
1331 }