drm/i915/gvt: implement per-vm mmio switching optimization
authorChangbin Du <changbin.du@intel.com>
Thu, 4 May 2017 02:52:38 +0000 (10:52 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Thu, 8 Jun 2017 05:59:15 +0000 (13:59 +0800)
Commit ab9da627906a ("drm/i915: make context status notifier head be
per engine") gives us a chance to inspect every single request. Then
we can eliminate unnecessary mmio switching for same vGPU. We only
need mmio switching for different VMs (including host).

This patch introduced a new general API intel_gvt_switch_mmio() to
replace the old intel_gvt_load/restore_render_mmio(). This function
can be further optimized for vGPU to vGPU switching.

To support individual ring switch, we track the owner who occupy
each ring. When another VM or host request a ring we do the mmio
context switching. Otherwise no need to switch the ring.

This optimization is very useful if only one guest has plenty of
workloads and the host is mostly idle. The best case is no mmio
switching will happen.

v2:
  o fix missing ring switch issue. (chuanxiao)
  o support individual ring switch.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Reviewed-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
drivers/gpu/drm/i915/gvt/gvt.c
drivers/gpu/drm/i915/gvt/render.c
drivers/gpu/drm/i915/gvt/render.h
drivers/gpu/drm/i915/gvt/sched_policy.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/gvt/scheduler.h

index 7dea5e5d556793c4cbf0fbb974819752ae211a69..20329171e4ab5a51a5e42e2eca749c207e70389e 100644 (file)
@@ -244,7 +244,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
        gvt_dbg_core("init gvt device\n");
 
        idr_init(&gvt->vgpu_idr);
-
+       spin_lock_init(&gvt->scheduler.mmio_context_lock);
        mutex_init(&gvt->lock);
        gvt->dev_priv = dev_priv;
 
index c6e7972ac21da8eda7143619a401554e2575a160..19d98c9036722c96876d9311f36ed94245e2fe2e 100644 (file)
@@ -260,7 +260,8 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 
 #define CTX_CONTEXT_CONTROL_VAL        0x03
 
-void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
+/* Switch ring mmio values (context) from host to a vgpu. */
+static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
 {
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
        struct render_mmio *mmio;
@@ -312,7 +313,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
        handle_tlb_pending_event(vgpu, ring_id);
 }
 
-void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
+/* Switch ring mmio values (context) from vgpu to host. */
+static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
 {
        struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
        struct render_mmio *mmio;
@@ -348,3 +350,32 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
                                mmio->value, v);
        }
 }
+
+/**
+ * intel_gvt_switch_render_mmio - switch mmio context of specific engine
+ * @pre: the last vGPU that own the engine
+ * @next: the vGPU to switch to
+ * @ring_id: specify the engine
+ *
+ * If pre is null indicates that host own the engine. If next is null
+ * indicates that we are switching to host workload.
+ */
+void intel_gvt_switch_mmio(struct intel_vgpu *pre,
+                          struct intel_vgpu *next, int ring_id)
+{
+       if (WARN_ON(!pre && !next))
+               return;
+
+       gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
+                      pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
+
+       /**
+        * TODO: Optimize for vGPU to vGPU switch by merging
+        * switch_mmio_to_host() and switch_mmio_to_vgpu().
+        */
+       if (pre)
+               switch_mmio_to_host(pre, ring_id);
+
+       if (next)
+               switch_mmio_to_vgpu(next, ring_id);
+}
index dac1a3cc458b0e312e94136f7e52759b058c9007..91db1d39d28f65e7ee42d17cbcc9e09cabe0ddf0 100644 (file)
@@ -36,8 +36,8 @@
 #ifndef __GVT_RENDER_H__
 #define __GVT_RENDER_H__
 
-void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id);
+void intel_gvt_switch_mmio(struct intel_vgpu *pre,
+                          struct intel_vgpu *next, int ring_id);
 
-void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id);
 
 #endif
index 79ba4b3440aafd9537f287028d1e23a6186109a1..f642a3f0cfa02fe4304f7d2c05238eaad7a1b7ba 100644 (file)
@@ -299,8 +299,20 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
 
 static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
 {
+       struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler;
+       int ring_id;
+
        kfree(vgpu->sched_data);
        vgpu->sched_data = NULL;
+
+       spin_lock_bh(&scheduler->mmio_context_lock);
+       for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
+               if (scheduler->engine_owner[ring_id] == vgpu) {
+                       intel_gvt_switch_mmio(vgpu, NULL, ring_id);
+                       scheduler->engine_owner[ring_id] = NULL;
+               }
+       }
+       spin_unlock_bh(&scheduler->mmio_context_lock);
 }
 
 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
index 6ae286cb5804aee4342b30dfeda64159d54acf7d..aa7e06df88b699f8a0f3c23b8169ffee4796041e 100644 (file)
@@ -138,21 +138,42 @@ static int shadow_context_status_change(struct notifier_block *nb,
        struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
                                shadow_ctx_notifier_block[req->engine->id]);
        struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-       struct intel_vgpu_workload *workload =
-               scheduler->current_workload[req->engine->id];
+       enum intel_engine_id ring_id = req->engine->id;
+       struct intel_vgpu_workload *workload;
+
+       if (!is_gvt_request(req)) {
+               spin_lock_bh(&scheduler->mmio_context_lock);
+               if (action == INTEL_CONTEXT_SCHEDULE_IN &&
+                   scheduler->engine_owner[ring_id]) {
+                       /* Switch ring from vGPU to host. */
+                       intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
+                                             NULL, ring_id);
+                       scheduler->engine_owner[ring_id] = NULL;
+               }
+               spin_unlock_bh(&scheduler->mmio_context_lock);
 
-       if (!is_gvt_request(req) || unlikely(!workload))
+               return NOTIFY_OK;
+       }
+
+       workload = scheduler->current_workload[ring_id];
+       if (unlikely(!workload))
                return NOTIFY_OK;
 
        switch (action) {
        case INTEL_CONTEXT_SCHEDULE_IN:
-               intel_gvt_load_render_mmio(workload->vgpu,
-                                          workload->ring_id);
+               spin_lock_bh(&scheduler->mmio_context_lock);
+               if (workload->vgpu != scheduler->engine_owner[ring_id]) {
+                       /* Switch ring from host to vGPU or vGPU to vGPU. */
+                       intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
+                                             workload->vgpu, ring_id);
+                       scheduler->engine_owner[ring_id] = workload->vgpu;
+               } else
+                       gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
+                                     ring_id, workload->vgpu->id);
+               spin_unlock_bh(&scheduler->mmio_context_lock);
                atomic_set(&workload->shadow_ctx_active, 1);
                break;
        case INTEL_CONTEXT_SCHEDULE_OUT:
-               intel_gvt_restore_render_mmio(workload->vgpu,
-                                             workload->ring_id);
                /* If the status is -EINPROGRESS means this workload
                 * doesn't meet any issue during dispatching so when
                 * get the SCHEDULE_OUT set the status to be zero for
index 2cd725c0573e75924b3afc49603b7d9b6ba0df16..9b6bf51e9b9b0b1f20ec609e50131261c1f4dec0 100644 (file)
@@ -42,6 +42,10 @@ struct intel_gvt_workload_scheduler {
        struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES];
        bool need_reschedule;
 
+       spinlock_t mmio_context_lock;
+       /* can be null when owner is host */
+       struct intel_vgpu *engine_owner[I915_NUM_ENGINES];
+
        wait_queue_head_t workload_complete_wq;
        struct task_struct *thread[I915_NUM_ENGINES];
        wait_queue_head_t waitq[I915_NUM_ENGINES];