2 * SPDX-License-Identifier: MIT
4 * Copyright © 2018 Intel Corporation
7 #include <linux/prime_numbers.h>
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 #include "selftests/i915_random.h"
15 #include "selftests/igt_flush_test.h"
16 #include "selftests/igt_live_test.h"
17 #include "selftests/igt_spinner.h"
18 #include "selftests/lib_sw_fence.h"
20 #include "gem/selftests/igt_gem_utils.h"
21 #include "gem/selftests/mock_context.h"
23 #define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
24 #define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
26 static struct i915_vma *create_scratch(struct intel_gt *gt)
28 struct drm_i915_gem_object *obj;
32 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
36 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
38 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
40 i915_gem_object_put(obj);
44 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
46 i915_gem_object_put(obj);
53 static void engine_heartbeat_disable(struct intel_engine_cs *engine,
56 *saved = engine->props.heartbeat_interval_ms;
57 engine->props.heartbeat_interval_ms = 0;
59 intel_engine_pm_get(engine);
60 intel_engine_park_heartbeat(engine);
63 static void engine_heartbeat_enable(struct intel_engine_cs *engine,
66 intel_engine_pm_put(engine);
68 engine->props.heartbeat_interval_ms = saved;
71 static int wait_for_submit(struct intel_engine_cs *engine,
72 struct i915_request *rq,
73 unsigned long timeout)
78 intel_engine_flush_submission(engine);
80 if (READ_ONCE(engine->execlists.pending[0]))
83 if (i915_request_is_active(rq))
86 if (i915_request_started(rq)) /* that was quick! */
88 } while (time_before(jiffies, timeout));
93 static int live_sanitycheck(void *arg)
95 struct intel_gt *gt = arg;
96 struct intel_engine_cs *engine;
97 enum intel_engine_id id;
98 struct igt_spinner spin;
101 if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
104 if (igt_spinner_init(&spin, gt))
107 for_each_engine(engine, gt, id) {
108 struct intel_context *ce;
109 struct i915_request *rq;
111 ce = intel_context_create(engine);
117 rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
123 i915_request_add(rq);
124 if (!igt_wait_for_spinner(&spin, rq)) {
125 GEM_TRACE("spinner failed to start\n");
127 intel_gt_set_wedged(gt);
132 igt_spinner_end(&spin);
133 if (igt_flush_test(gt->i915)) {
139 intel_context_put(ce);
144 igt_spinner_fini(&spin);
148 static int live_unlite_restore(struct intel_gt *gt, int prio)
150 struct intel_engine_cs *engine;
151 enum intel_engine_id id;
152 struct igt_spinner spin;
156 * Check that we can correctly context switch between 2 instances
157 * on the same engine from the same parent context.
160 if (igt_spinner_init(&spin, gt))
164 for_each_engine(engine, gt, id) {
165 struct intel_context *ce[2] = {};
166 struct i915_request *rq[2];
167 struct igt_live_test t;
171 if (prio && !intel_engine_has_preemption(engine))
174 if (!intel_engine_can_store_dword(engine))
177 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
181 engine_heartbeat_disable(engine, &saved);
183 for (n = 0; n < ARRAY_SIZE(ce); n++) {
184 struct intel_context *tmp;
186 tmp = intel_context_create(engine);
192 err = intel_context_pin(tmp);
194 intel_context_put(tmp);
199 * Setup the pair of contexts such that if we
200 * lite-restore using the RING_TAIL from ce[1] it
201 * will execute garbage from ce[0]->ring.
203 memset(tmp->ring->vaddr,
204 POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
205 tmp->ring->vma->size);
209 GEM_BUG_ON(!ce[1]->ring->size);
210 intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
211 __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head);
213 rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
215 err = PTR_ERR(rq[0]);
219 i915_request_get(rq[0]);
220 i915_request_add(rq[0]);
221 GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
223 if (!igt_wait_for_spinner(&spin, rq[0])) {
224 i915_request_put(rq[0]);
228 rq[1] = i915_request_create(ce[1]);
230 err = PTR_ERR(rq[1]);
231 i915_request_put(rq[0]);
237 * Ensure we do the switch to ce[1] on completion.
239 * rq[0] is already submitted, so this should reduce
240 * to a no-op (a wait on a request on the same engine
241 * uses the submit fence, not the completion fence),
242 * but it will install a dependency on rq[1] for rq[0]
243 * that will prevent the pair being reordered by
246 i915_request_await_dma_fence(rq[1], &rq[0]->fence);
249 i915_request_get(rq[1]);
250 i915_request_add(rq[1]);
251 GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
252 i915_request_put(rq[0]);
255 struct i915_sched_attr attr = {
259 /* Alternatively preempt the spinner with ce[1] */
260 engine->schedule(rq[1], &attr);
263 /* And switch back to ce[0] for good measure */
264 rq[0] = i915_request_create(ce[0]);
266 err = PTR_ERR(rq[0]);
267 i915_request_put(rq[1]);
271 i915_request_await_dma_fence(rq[0], &rq[1]->fence);
272 i915_request_get(rq[0]);
273 i915_request_add(rq[0]);
274 GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
275 i915_request_put(rq[1]);
276 i915_request_put(rq[0]);
279 tasklet_kill(&engine->execlists.tasklet); /* flush submission */
280 igt_spinner_end(&spin);
281 for (n = 0; n < ARRAY_SIZE(ce); n++) {
282 if (IS_ERR_OR_NULL(ce[n]))
285 intel_context_unpin(ce[n]);
286 intel_context_put(ce[n]);
289 engine_heartbeat_enable(engine, saved);
290 if (igt_live_test_end(&t))
296 igt_spinner_fini(&spin);
300 static int live_unlite_switch(void *arg)
302 return live_unlite_restore(arg, 0);
305 static int live_unlite_preempt(void *arg)
307 return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
310 static int live_pin_rewind(void *arg)
312 struct intel_gt *gt = arg;
313 struct intel_engine_cs *engine;
314 enum intel_engine_id id;
318 * We have to be careful not to trust intel_ring too much, for example
319 * ring->head is updated upon retire which is out of sync with pinning
320 * the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
321 * or else we risk writing an older, stale value.
323 * To simulate this, let's apply a bit of deliberate sabotague.
326 for_each_engine(engine, gt, id) {
327 struct intel_context *ce;
328 struct i915_request *rq;
329 struct intel_ring *ring;
330 struct igt_live_test t;
332 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
337 ce = intel_context_create(engine);
343 err = intel_context_pin(ce);
345 intel_context_put(ce);
349 /* Keep the context awake while we play games */
350 err = i915_active_acquire(&ce->active);
352 intel_context_unpin(ce);
353 intel_context_put(ce);
358 /* Poison the ring, and offset the next request from HEAD */
359 memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
360 ring->emit = ring->size / 2;
361 ring->tail = ring->emit;
362 GEM_BUG_ON(ring->head);
364 intel_context_unpin(ce);
366 /* Submit a simple nop request */
367 GEM_BUG_ON(intel_context_is_pinned(ce));
368 rq = intel_context_create_request(ce);
369 i915_active_release(&ce->active); /* e.g. async retire */
370 intel_context_put(ce);
375 GEM_BUG_ON(!rq->head);
376 i915_request_add(rq);
378 /* Expect not to hang! */
379 if (igt_live_test_end(&t)) {
388 static int live_hold_reset(void *arg)
390 struct intel_gt *gt = arg;
391 struct intel_engine_cs *engine;
392 enum intel_engine_id id;
393 struct igt_spinner spin;
397 * In order to support offline error capture for fast preempt reset,
398 * we need to decouple the guilty request and ensure that it and its
399 * descendents are not executed while the capture is in progress.
402 if (!intel_has_reset_engine(gt))
405 if (igt_spinner_init(&spin, gt))
408 for_each_engine(engine, gt, id) {
409 struct intel_context *ce;
410 unsigned long heartbeat;
411 struct i915_request *rq;
413 ce = intel_context_create(engine);
419 engine_heartbeat_disable(engine, &heartbeat);
421 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
426 i915_request_add(rq);
428 if (!igt_wait_for_spinner(&spin, rq)) {
429 intel_gt_set_wedged(gt);
434 /* We have our request executing, now remove it and reset */
436 if (test_and_set_bit(I915_RESET_ENGINE + id,
438 intel_gt_set_wedged(gt);
442 tasklet_disable(&engine->execlists.tasklet);
444 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
445 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
447 i915_request_get(rq);
448 execlists_hold(engine, rq);
449 GEM_BUG_ON(!i915_request_on_hold(rq));
451 intel_engine_reset(engine, NULL);
452 GEM_BUG_ON(rq->fence.error != -EIO);
454 tasklet_enable(&engine->execlists.tasklet);
455 clear_and_wake_up_bit(I915_RESET_ENGINE + id,
458 /* Check that we do not resubmit the held request */
459 if (!i915_request_wait(rq, 0, HZ / 5)) {
460 pr_err("%s: on hold request completed!\n",
462 i915_request_put(rq);
466 GEM_BUG_ON(!i915_request_on_hold(rq));
468 /* But is resubmitted on release */
469 execlists_unhold(engine, rq);
470 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
471 pr_err("%s: held request did not complete!\n",
473 intel_gt_set_wedged(gt);
476 i915_request_put(rq);
479 engine_heartbeat_enable(engine, heartbeat);
480 intel_context_put(ce);
485 igt_spinner_fini(&spin);
489 static const char *error_repr(int err)
491 return err ? "bad" : "good";
494 static int live_error_interrupt(void *arg)
496 static const struct error_phase {
497 enum { GOOD = 0, BAD = -EIO } error[2];
502 { { GOOD, GOOD } }, /* sentinel */
504 struct intel_gt *gt = arg;
505 struct intel_engine_cs *engine;
506 enum intel_engine_id id;
509 * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
510 * of invalid commands in user batches that will cause a GPU hang.
511 * This is a faster mechanism than using hangcheck/heartbeats, but
512 * only detects problems the HW knows about -- it will not warn when
515 * To verify our detection and reset, we throw some invalid commands
516 * at the HW and wait for the interrupt.
519 if (!intel_has_reset_engine(gt))
522 for_each_engine(engine, gt, id) {
523 const struct error_phase *p;
524 unsigned long heartbeat;
527 engine_heartbeat_disable(engine, &heartbeat);
529 for (p = phases; p->error[0] != GOOD; p++) {
530 struct i915_request *client[ARRAY_SIZE(phases->error)];
534 memset(client, 0, sizeof(*client));
535 for (i = 0; i < ARRAY_SIZE(client); i++) {
536 struct intel_context *ce;
537 struct i915_request *rq;
539 ce = intel_context_create(engine);
545 rq = intel_context_create_request(ce);
546 intel_context_put(ce);
552 if (rq->engine->emit_init_breadcrumb) {
553 err = rq->engine->emit_init_breadcrumb(rq);
555 i915_request_add(rq);
560 cs = intel_ring_begin(rq, 2);
562 i915_request_add(rq);
575 client[i] = i915_request_get(rq);
576 i915_request_add(rq);
579 err = wait_for_submit(engine, client[0], HZ / 2);
581 pr_err("%s: first request did not start within time!\n",
587 for (i = 0; i < ARRAY_SIZE(client); i++) {
588 if (i915_request_wait(client[i], 0, HZ / 5) < 0)
589 pr_debug("%s: %s request incomplete!\n",
591 error_repr(p->error[i]));
593 if (!i915_request_started(client[i])) {
594 pr_debug("%s: %s request not stated!\n",
596 error_repr(p->error[i]));
601 /* Kick the tasklet to process the error */
602 intel_engine_flush_submission(engine);
603 if (client[i]->fence.error != p->error[i]) {
604 pr_err("%s: %s request completed with wrong error code: %d\n",
606 error_repr(p->error[i]),
607 client[i]->fence.error);
614 for (i = 0; i < ARRAY_SIZE(client); i++)
616 i915_request_put(client[i]);
618 pr_err("%s: failed at phase[%zd] { %d, %d }\n",
619 engine->name, p - phases,
620 p->error[0], p->error[1]);
625 engine_heartbeat_enable(engine, heartbeat);
627 intel_gt_set_wedged(gt);
636 emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
640 cs = intel_ring_begin(rq, 10);
644 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
646 *cs++ = MI_SEMAPHORE_WAIT |
647 MI_SEMAPHORE_GLOBAL_GTT |
649 MI_SEMAPHORE_SAD_NEQ_SDD;
651 *cs++ = i915_ggtt_offset(vma) + 4 * idx;
655 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
656 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
666 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
668 intel_ring_advance(rq, cs);
672 static struct i915_request *
673 semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
675 struct intel_context *ce;
676 struct i915_request *rq;
679 ce = intel_context_create(engine);
683 rq = intel_context_create_request(ce);
688 if (rq->engine->emit_init_breadcrumb)
689 err = rq->engine->emit_init_breadcrumb(rq);
691 err = emit_semaphore_chain(rq, vma, idx);
693 i915_request_get(rq);
694 i915_request_add(rq);
699 intel_context_put(ce);
704 release_queue(struct intel_engine_cs *engine,
705 struct i915_vma *vma,
708 struct i915_sched_attr attr = {
711 struct i915_request *rq;
714 rq = intel_engine_create_kernel_request(engine);
718 cs = intel_ring_begin(rq, 4);
720 i915_request_add(rq);
724 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
725 *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
729 intel_ring_advance(rq, cs);
731 i915_request_get(rq);
732 i915_request_add(rq);
735 engine->schedule(rq, &attr);
736 local_bh_enable(); /* kick tasklet */
738 i915_request_put(rq);
744 slice_semaphore_queue(struct intel_engine_cs *outer,
745 struct i915_vma *vma,
748 struct intel_engine_cs *engine;
749 struct i915_request *head;
750 enum intel_engine_id id;
753 head = semaphore_queue(outer, vma, n++);
755 return PTR_ERR(head);
757 for_each_engine(engine, outer->gt, id) {
758 for (i = 0; i < count; i++) {
759 struct i915_request *rq;
761 rq = semaphore_queue(engine, vma, n++);
767 i915_request_put(rq);
771 err = release_queue(outer, vma, n, INT_MAX);
775 if (i915_request_wait(head, 0,
776 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
777 pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
780 intel_gt_set_wedged(outer->gt);
785 i915_request_put(head);
789 static int live_timeslice_preempt(void *arg)
791 struct intel_gt *gt = arg;
792 struct drm_i915_gem_object *obj;
793 struct i915_vma *vma;
799 * If a request takes too long, we would like to give other users
800 * a fair go on the GPU. In particular, users may create batches
801 * that wait upon external input, where that input may even be
802 * supplied by another GPU job. To avoid blocking forever, we
803 * need to preempt the current task and replace it with another
806 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
809 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
813 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
819 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
821 err = PTR_ERR(vaddr);
825 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
829 err = i915_vma_sync(vma);
833 for_each_prime_number_from(count, 1, 16) {
834 struct intel_engine_cs *engine;
835 enum intel_engine_id id;
837 for_each_engine(engine, gt, id) {
840 if (!intel_engine_has_preemption(engine))
843 memset(vaddr, 0, PAGE_SIZE);
845 engine_heartbeat_disable(engine, &saved);
846 err = slice_semaphore_queue(engine, vma, count);
847 engine_heartbeat_enable(engine, saved);
851 if (igt_flush_test(gt->i915)) {
861 i915_gem_object_unpin_map(obj);
863 i915_gem_object_put(obj);
867 static struct i915_request *
868 create_rewinder(struct intel_context *ce,
869 struct i915_request *wait,
873 i915_ggtt_offset(ce->engine->status_page.vma) +
874 offset_in_page(slot);
875 struct i915_request *rq;
879 rq = intel_context_create_request(ce);
884 err = i915_request_await_dma_fence(rq, &wait->fence);
889 cs = intel_ring_begin(rq, 10);
895 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
898 *cs++ = MI_SEMAPHORE_WAIT |
899 MI_SEMAPHORE_GLOBAL_GTT |
901 MI_SEMAPHORE_SAD_NEQ_SDD;
906 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
907 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
908 *cs++ = offset + idx * sizeof(u32);
911 intel_ring_advance(rq, cs);
913 rq->sched.attr.priority = I915_PRIORITY_MASK;
916 i915_request_get(rq);
917 i915_request_add(rq);
919 i915_request_put(rq);
926 static int live_timeslice_rewind(void *arg)
928 struct intel_gt *gt = arg;
929 struct intel_engine_cs *engine;
930 enum intel_engine_id id;
933 * The usual presumption on timeslice expiration is that we replace
934 * the active context with another. However, given a chain of
935 * dependencies we may end up with replacing the context with itself,
936 * but only a few of those requests, forcing us to rewind the
937 * RING_TAIL of the original request.
939 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
942 for_each_engine(engine, gt, id) {
944 enum { X = 1, Y, Z };
945 struct i915_request *rq[3] = {};
946 struct intel_context *ce;
947 unsigned long heartbeat;
948 unsigned long timeslice;
952 if (!intel_engine_has_timeslices(engine))
956 * A:rq1 -- semaphore wait, timestamp X
957 * A:rq2 -- write timestamp Y
959 * B:rq1 [await A:rq1] -- write timestamp Z
961 * Force timeslice, release semaphore.
963 * Expect execution/evaluation order XZY
966 engine_heartbeat_disable(engine, &heartbeat);
967 timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
969 slot = memset32(engine->status_page.addr + 1000, 0, 4);
971 ce = intel_context_create(engine);
977 rq[0] = create_rewinder(ce, NULL, slot, 1);
979 intel_context_put(ce);
983 rq[1] = create_rewinder(ce, NULL, slot, 2);
984 intel_context_put(ce);
988 err = wait_for_submit(engine, rq[1], HZ / 2);
990 pr_err("%s: failed to submit first context\n",
995 ce = intel_context_create(engine);
1001 rq[2] = create_rewinder(ce, rq[0], slot, 3);
1002 intel_context_put(ce);
1006 err = wait_for_submit(engine, rq[2], HZ / 2);
1008 pr_err("%s: failed to submit second context\n",
1012 GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
1014 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
1015 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1016 GEM_BUG_ON(!i915_request_is_active(rq[A2]));
1017 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1019 /* Wait for the timeslice to kick in */
1020 del_timer(&engine->execlists.timer);
1021 tasklet_hi_schedule(&engine->execlists.tasklet);
1022 intel_engine_flush_submission(engine);
1024 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
1025 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
1026 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
1027 GEM_BUG_ON(i915_request_is_active(rq[A2]));
1029 /* Release the hounds! */
1031 wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
1033 for (i = 1; i <= 3; i++) {
1034 unsigned long timeout = jiffies + HZ / 2;
1036 while (!READ_ONCE(slot[i]) &&
1037 time_before(jiffies, timeout))
1040 if (!time_before(jiffies, timeout)) {
1041 pr_err("%s: rq[%d] timed out\n",
1042 engine->name, i - 1);
1047 pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
1051 if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
1052 pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
1060 memset32(&slot[0], -1, 4);
1063 engine->props.timeslice_duration_ms = timeslice;
1064 engine_heartbeat_enable(engine, heartbeat);
1065 for (i = 0; i < 3; i++)
1066 i915_request_put(rq[i]);
1067 if (igt_flush_test(gt->i915))
1076 static struct i915_request *nop_request(struct intel_engine_cs *engine)
1078 struct i915_request *rq;
1080 rq = intel_engine_create_kernel_request(engine);
1084 i915_request_get(rq);
1085 i915_request_add(rq);
1090 static long timeslice_threshold(const struct intel_engine_cs *engine)
1092 return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
1095 static int live_timeslice_queue(void *arg)
1097 struct intel_gt *gt = arg;
1098 struct drm_i915_gem_object *obj;
1099 struct intel_engine_cs *engine;
1100 enum intel_engine_id id;
1101 struct i915_vma *vma;
1106 * Make sure that even if ELSP[0] and ELSP[1] are filled with
1107 * timeslicing between them disabled, we *do* enable timeslicing
1108 * if the queue demands it. (Normally, we do not submit if
1109 * ELSP[1] is already occupied, so must rely on timeslicing to
1110 * eject ELSP[0] in favour of the queue.)
1112 if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
1115 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1117 return PTR_ERR(obj);
1119 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1125 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
1126 if (IS_ERR(vaddr)) {
1127 err = PTR_ERR(vaddr);
1131 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1135 err = i915_vma_sync(vma);
1139 for_each_engine(engine, gt, id) {
1140 struct i915_sched_attr attr = {
1141 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
1143 struct i915_request *rq, *nop;
1144 unsigned long saved;
1146 if (!intel_engine_has_preemption(engine))
1149 engine_heartbeat_disable(engine, &saved);
1150 memset(vaddr, 0, PAGE_SIZE);
1152 /* ELSP[0]: semaphore wait */
1153 rq = semaphore_queue(engine, vma, 0);
1158 engine->schedule(rq, &attr);
1159 err = wait_for_submit(engine, rq, HZ / 2);
1161 pr_err("%s: Timed out trying to submit semaphores\n",
1166 /* ELSP[1]: nop request */
1167 nop = nop_request(engine);
1172 err = wait_for_submit(engine, nop, HZ / 2);
1173 i915_request_put(nop);
1175 pr_err("%s: Timed out trying to submit nop\n",
1180 GEM_BUG_ON(i915_request_completed(rq));
1181 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
1183 /* Queue: semaphore signal, matching priority as semaphore */
1184 err = release_queue(engine, vma, 1, effective_prio(rq));
1188 intel_engine_flush_submission(engine);
1189 if (!READ_ONCE(engine->execlists.timer.expires) &&
1190 !i915_request_completed(rq)) {
1191 struct drm_printer p =
1192 drm_info_printer(gt->i915->drm.dev);
1194 GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
1196 intel_engine_dump(engine, &p,
1197 "%s\n", engine->name);
1200 memset(vaddr, 0xff, PAGE_SIZE);
1204 /* Timeslice every jiffy, so within 2 we should signal */
1205 if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
1206 struct drm_printer p =
1207 drm_info_printer(gt->i915->drm.dev);
1209 pr_err("%s: Failed to timeslice into queue\n",
1211 intel_engine_dump(engine, &p,
1212 "%s\n", engine->name);
1214 memset(vaddr, 0xff, PAGE_SIZE);
1218 i915_request_put(rq);
1220 engine_heartbeat_enable(engine, saved);
1226 i915_vma_unpin(vma);
1228 i915_gem_object_unpin_map(obj);
1230 i915_gem_object_put(obj);
1234 static int live_busywait_preempt(void *arg)
1236 struct intel_gt *gt = arg;
1237 struct i915_gem_context *ctx_hi, *ctx_lo;
1238 struct intel_engine_cs *engine;
1239 struct drm_i915_gem_object *obj;
1240 struct i915_vma *vma;
1241 enum intel_engine_id id;
1246 * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
1247 * preempt the busywaits used to synchronise between rings.
1250 ctx_hi = kernel_context(gt->i915);
1253 ctx_hi->sched.priority =
1254 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1256 ctx_lo = kernel_context(gt->i915);
1259 ctx_lo->sched.priority =
1260 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1262 obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
1268 map = i915_gem_object_pin_map(obj, I915_MAP_WC);
1274 vma = i915_vma_instance(obj, >->ggtt->vm, NULL);
1280 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1284 err = i915_vma_sync(vma);
1288 for_each_engine(engine, gt, id) {
1289 struct i915_request *lo, *hi;
1290 struct igt_live_test t;
1293 if (!intel_engine_has_preemption(engine))
1296 if (!intel_engine_can_store_dword(engine))
1299 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1305 * We create two requests. The low priority request
1306 * busywaits on a semaphore (inside the ringbuffer where
1307 * is should be preemptible) and the high priority requests
1308 * uses a MI_STORE_DWORD_IMM to update the semaphore value
1309 * allowing the first request to complete. If preemption
1310 * fails, we hang instead.
1313 lo = igt_request_alloc(ctx_lo, engine);
1319 cs = intel_ring_begin(lo, 8);
1322 i915_request_add(lo);
1326 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1327 *cs++ = i915_ggtt_offset(vma);
1331 /* XXX Do we need a flush + invalidate here? */
1333 *cs++ = MI_SEMAPHORE_WAIT |
1334 MI_SEMAPHORE_GLOBAL_GTT |
1336 MI_SEMAPHORE_SAD_EQ_SDD;
1338 *cs++ = i915_ggtt_offset(vma);
1341 intel_ring_advance(lo, cs);
1343 i915_request_get(lo);
1344 i915_request_add(lo);
1346 if (wait_for(READ_ONCE(*map), 10)) {
1347 i915_request_put(lo);
1352 /* Low priority request should be busywaiting now */
1353 if (i915_request_wait(lo, 0, 1) != -ETIME) {
1354 i915_request_put(lo);
1355 pr_err("%s: Busywaiting request did not!\n",
1361 hi = igt_request_alloc(ctx_hi, engine);
1364 i915_request_put(lo);
1368 cs = intel_ring_begin(hi, 4);
1371 i915_request_add(hi);
1372 i915_request_put(lo);
1376 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1377 *cs++ = i915_ggtt_offset(vma);
1381 intel_ring_advance(hi, cs);
1382 i915_request_add(hi);
1384 if (i915_request_wait(lo, 0, HZ / 5) < 0) {
1385 struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
1387 pr_err("%s: Failed to preempt semaphore busywait!\n",
1390 intel_engine_dump(engine, &p, "%s\n", engine->name);
1393 i915_request_put(lo);
1394 intel_gt_set_wedged(gt);
1398 GEM_BUG_ON(READ_ONCE(*map));
1399 i915_request_put(lo);
1401 if (igt_live_test_end(&t)) {
1409 i915_vma_unpin(vma);
1411 i915_gem_object_unpin_map(obj);
1413 i915_gem_object_put(obj);
1415 kernel_context_close(ctx_lo);
1417 kernel_context_close(ctx_hi);
1421 static struct i915_request *
1422 spinner_create_request(struct igt_spinner *spin,
1423 struct i915_gem_context *ctx,
1424 struct intel_engine_cs *engine,
1427 struct intel_context *ce;
1428 struct i915_request *rq;
1430 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
1432 return ERR_CAST(ce);
1434 rq = igt_spinner_create_request(spin, ce, arb);
1435 intel_context_put(ce);
1439 static int live_preempt(void *arg)
1441 struct intel_gt *gt = arg;
1442 struct i915_gem_context *ctx_hi, *ctx_lo;
1443 struct igt_spinner spin_hi, spin_lo;
1444 struct intel_engine_cs *engine;
1445 enum intel_engine_id id;
1448 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1451 if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
1452 pr_err("Logical preemption supported, but not exposed\n");
1454 if (igt_spinner_init(&spin_hi, gt))
1457 if (igt_spinner_init(&spin_lo, gt))
1460 ctx_hi = kernel_context(gt->i915);
1463 ctx_hi->sched.priority =
1464 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
1466 ctx_lo = kernel_context(gt->i915);
1469 ctx_lo->sched.priority =
1470 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
1472 for_each_engine(engine, gt, id) {
1473 struct igt_live_test t;
1474 struct i915_request *rq;
1476 if (!intel_engine_has_preemption(engine))
1479 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1484 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1491 i915_request_add(rq);
1492 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1493 GEM_TRACE("lo spinner failed to start\n");
1495 intel_gt_set_wedged(gt);
1500 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1503 igt_spinner_end(&spin_lo);
1508 i915_request_add(rq);
1509 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1510 GEM_TRACE("hi spinner failed to start\n");
1512 intel_gt_set_wedged(gt);
1517 igt_spinner_end(&spin_hi);
1518 igt_spinner_end(&spin_lo);
1520 if (igt_live_test_end(&t)) {
1528 kernel_context_close(ctx_lo);
1530 kernel_context_close(ctx_hi);
1532 igt_spinner_fini(&spin_lo);
1534 igt_spinner_fini(&spin_hi);
1538 static int live_late_preempt(void *arg)
1540 struct intel_gt *gt = arg;
1541 struct i915_gem_context *ctx_hi, *ctx_lo;
1542 struct igt_spinner spin_hi, spin_lo;
1543 struct intel_engine_cs *engine;
1544 struct i915_sched_attr attr = {};
1545 enum intel_engine_id id;
1548 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1551 if (igt_spinner_init(&spin_hi, gt))
1554 if (igt_spinner_init(&spin_lo, gt))
1557 ctx_hi = kernel_context(gt->i915);
1561 ctx_lo = kernel_context(gt->i915);
1565 /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
1566 ctx_lo->sched.priority = I915_USER_PRIORITY(1);
1568 for_each_engine(engine, gt, id) {
1569 struct igt_live_test t;
1570 struct i915_request *rq;
1572 if (!intel_engine_has_preemption(engine))
1575 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
1580 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
1587 i915_request_add(rq);
1588 if (!igt_wait_for_spinner(&spin_lo, rq)) {
1589 pr_err("First context failed to start\n");
1593 rq = spinner_create_request(&spin_hi, ctx_hi, engine,
1596 igt_spinner_end(&spin_lo);
1601 i915_request_add(rq);
1602 if (igt_wait_for_spinner(&spin_hi, rq)) {
1603 pr_err("Second context overtook first?\n");
1607 attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1608 engine->schedule(rq, &attr);
1610 if (!igt_wait_for_spinner(&spin_hi, rq)) {
1611 pr_err("High priority context failed to preempt the low priority context\n");
1616 igt_spinner_end(&spin_hi);
1617 igt_spinner_end(&spin_lo);
1619 if (igt_live_test_end(&t)) {
1627 kernel_context_close(ctx_lo);
1629 kernel_context_close(ctx_hi);
1631 igt_spinner_fini(&spin_lo);
1633 igt_spinner_fini(&spin_hi);
1637 igt_spinner_end(&spin_hi);
1638 igt_spinner_end(&spin_lo);
1639 intel_gt_set_wedged(gt);
1644 struct preempt_client {
1645 struct igt_spinner spin;
1646 struct i915_gem_context *ctx;
1649 static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
1651 c->ctx = kernel_context(gt->i915);
1655 if (igt_spinner_init(&c->spin, gt))
1661 kernel_context_close(c->ctx);
1665 static void preempt_client_fini(struct preempt_client *c)
1667 igt_spinner_fini(&c->spin);
1668 kernel_context_close(c->ctx);
1671 static int live_nopreempt(void *arg)
1673 struct intel_gt *gt = arg;
1674 struct intel_engine_cs *engine;
1675 struct preempt_client a, b;
1676 enum intel_engine_id id;
1680 * Verify that we can disable preemption for an individual request
1681 * that may be being observed and not want to be interrupted.
1684 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
1687 if (preempt_client_init(gt, &a))
1689 if (preempt_client_init(gt, &b))
1691 b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
1693 for_each_engine(engine, gt, id) {
1694 struct i915_request *rq_a, *rq_b;
1696 if (!intel_engine_has_preemption(engine))
1699 engine->execlists.preempt_hang.count = 0;
1701 rq_a = spinner_create_request(&a.spin,
1705 err = PTR_ERR(rq_a);
1709 /* Low priority client, but unpreemptable! */
1710 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
1712 i915_request_add(rq_a);
1713 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
1714 pr_err("First client failed to start\n");
1718 rq_b = spinner_create_request(&b.spin,
1722 err = PTR_ERR(rq_b);
1726 i915_request_add(rq_b);
1728 /* B is much more important than A! (But A is unpreemptable.) */
1729 GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
1731 /* Wait long enough for preemption and timeslicing */
1732 if (igt_wait_for_spinner(&b.spin, rq_b)) {
1733 pr_err("Second client started too early!\n");
1737 igt_spinner_end(&a.spin);
1739 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
1740 pr_err("Second client failed to start\n");
1744 igt_spinner_end(&b.spin);
1746 if (engine->execlists.preempt_hang.count) {
1747 pr_err("Preemption recorded x%d; should have been suppressed!\n",
1748 engine->execlists.preempt_hang.count);
1753 if (igt_flush_test(gt->i915))
1759 preempt_client_fini(&b);
1761 preempt_client_fini(&a);
1765 igt_spinner_end(&b.spin);
1766 igt_spinner_end(&a.spin);
1767 intel_gt_set_wedged(gt);
1772 struct live_preempt_cancel {
1773 struct intel_engine_cs *engine;
1774 struct preempt_client a, b;
1777 static int __cancel_active0(struct live_preempt_cancel *arg)
1779 struct i915_request *rq;
1780 struct igt_live_test t;
1783 /* Preempt cancel of ELSP0 */
1784 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1785 if (igt_live_test_begin(&t, arg->engine->i915,
1786 __func__, arg->engine->name))
1789 rq = spinner_create_request(&arg->a.spin,
1790 arg->a.ctx, arg->engine,
1795 clear_bit(CONTEXT_BANNED, &rq->context->flags);
1796 i915_request_get(rq);
1797 i915_request_add(rq);
1798 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
1803 intel_context_set_banned(rq->context);
1804 err = intel_engine_pulse(arg->engine);
1808 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1813 if (rq->fence.error != -EIO) {
1814 pr_err("Cancelled inflight0 request did not report -EIO\n");
1820 i915_request_put(rq);
1821 if (igt_live_test_end(&t))
1826 static int __cancel_active1(struct live_preempt_cancel *arg)
1828 struct i915_request *rq[2] = {};
1829 struct igt_live_test t;
1832 /* Preempt cancel of ELSP1 */
1833 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1834 if (igt_live_test_begin(&t, arg->engine->i915,
1835 __func__, arg->engine->name))
1838 rq[0] = spinner_create_request(&arg->a.spin,
1839 arg->a.ctx, arg->engine,
1840 MI_NOOP); /* no preemption */
1842 return PTR_ERR(rq[0]);
1844 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1845 i915_request_get(rq[0]);
1846 i915_request_add(rq[0]);
1847 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1852 rq[1] = spinner_create_request(&arg->b.spin,
1853 arg->b.ctx, arg->engine,
1855 if (IS_ERR(rq[1])) {
1856 err = PTR_ERR(rq[1]);
1860 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1861 i915_request_get(rq[1]);
1862 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1863 i915_request_add(rq[1]);
1867 intel_context_set_banned(rq[1]->context);
1868 err = intel_engine_pulse(arg->engine);
1872 igt_spinner_end(&arg->a.spin);
1873 if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
1878 if (rq[0]->fence.error != 0) {
1879 pr_err("Normal inflight0 request did not complete\n");
1884 if (rq[1]->fence.error != -EIO) {
1885 pr_err("Cancelled inflight1 request did not report -EIO\n");
1891 i915_request_put(rq[1]);
1892 i915_request_put(rq[0]);
1893 if (igt_live_test_end(&t))
1898 static int __cancel_queued(struct live_preempt_cancel *arg)
1900 struct i915_request *rq[3] = {};
1901 struct igt_live_test t;
1904 /* Full ELSP and one in the wings */
1905 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1906 if (igt_live_test_begin(&t, arg->engine->i915,
1907 __func__, arg->engine->name))
1910 rq[0] = spinner_create_request(&arg->a.spin,
1911 arg->a.ctx, arg->engine,
1914 return PTR_ERR(rq[0]);
1916 clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
1917 i915_request_get(rq[0]);
1918 i915_request_add(rq[0]);
1919 if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
1924 rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
1925 if (IS_ERR(rq[1])) {
1926 err = PTR_ERR(rq[1]);
1930 clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
1931 i915_request_get(rq[1]);
1932 err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
1933 i915_request_add(rq[1]);
1937 rq[2] = spinner_create_request(&arg->b.spin,
1938 arg->a.ctx, arg->engine,
1940 if (IS_ERR(rq[2])) {
1941 err = PTR_ERR(rq[2]);
1945 i915_request_get(rq[2]);
1946 err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
1947 i915_request_add(rq[2]);
1951 intel_context_set_banned(rq[2]->context);
1952 err = intel_engine_pulse(arg->engine);
1956 if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
1961 if (rq[0]->fence.error != -EIO) {
1962 pr_err("Cancelled inflight0 request did not report -EIO\n");
1967 if (rq[1]->fence.error != 0) {
1968 pr_err("Normal inflight1 request did not complete\n");
1973 if (rq[2]->fence.error != -EIO) {
1974 pr_err("Cancelled queued request did not report -EIO\n");
1980 i915_request_put(rq[2]);
1981 i915_request_put(rq[1]);
1982 i915_request_put(rq[0]);
1983 if (igt_live_test_end(&t))
1988 static int __cancel_hostile(struct live_preempt_cancel *arg)
1990 struct i915_request *rq;
1993 /* Preempt cancel non-preemptible spinner in ELSP0 */
1994 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
1997 GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
1998 rq = spinner_create_request(&arg->a.spin,
1999 arg->a.ctx, arg->engine,
2000 MI_NOOP); /* preemption disabled */
2004 clear_bit(CONTEXT_BANNED, &rq->context->flags);
2005 i915_request_get(rq);
2006 i915_request_add(rq);
2007 if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
2012 intel_context_set_banned(rq->context);
2013 err = intel_engine_pulse(arg->engine); /* force reset */
2017 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2022 if (rq->fence.error != -EIO) {
2023 pr_err("Cancelled inflight0 request did not report -EIO\n");
2029 i915_request_put(rq);
2030 if (igt_flush_test(arg->engine->i915))
2035 static int live_preempt_cancel(void *arg)
2037 struct intel_gt *gt = arg;
2038 struct live_preempt_cancel data;
2039 enum intel_engine_id id;
2043 * To cancel an inflight context, we need to first remove it from the
2044 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
2047 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2050 if (preempt_client_init(gt, &data.a))
2052 if (preempt_client_init(gt, &data.b))
2055 for_each_engine(data.engine, gt, id) {
2056 if (!intel_engine_has_preemption(data.engine))
2059 err = __cancel_active0(&data);
2063 err = __cancel_active1(&data);
2067 err = __cancel_queued(&data);
2071 err = __cancel_hostile(&data);
2078 preempt_client_fini(&data.b);
2080 preempt_client_fini(&data.a);
2085 igt_spinner_end(&data.b.spin);
2086 igt_spinner_end(&data.a.spin);
2087 intel_gt_set_wedged(gt);
2091 static int live_suppress_self_preempt(void *arg)
2093 struct intel_gt *gt = arg;
2094 struct intel_engine_cs *engine;
2095 struct i915_sched_attr attr = {
2096 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
2098 struct preempt_client a, b;
2099 enum intel_engine_id id;
2103 * Verify that if a preemption request does not cause a change in
2104 * the current execution order, the preempt-to-idle injection is
2105 * skipped and that we do not accidentally apply it after the CS
2109 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2112 if (intel_uc_uses_guc_submission(>->uc))
2113 return 0; /* presume black blox */
2115 if (intel_vgpu_active(gt->i915))
2116 return 0; /* GVT forces single port & request submission */
2118 if (preempt_client_init(gt, &a))
2120 if (preempt_client_init(gt, &b))
2123 for_each_engine(engine, gt, id) {
2124 struct i915_request *rq_a, *rq_b;
2127 if (!intel_engine_has_preemption(engine))
2130 if (igt_flush_test(gt->i915))
2133 intel_engine_pm_get(engine);
2134 engine->execlists.preempt_hang.count = 0;
2136 rq_a = spinner_create_request(&a.spin,
2140 err = PTR_ERR(rq_a);
2141 intel_engine_pm_put(engine);
2145 i915_request_add(rq_a);
2146 if (!igt_wait_for_spinner(&a.spin, rq_a)) {
2147 pr_err("First client failed to start\n");
2148 intel_engine_pm_put(engine);
2152 /* Keep postponing the timer to avoid premature slicing */
2153 mod_timer(&engine->execlists.timer, jiffies + HZ);
2154 for (depth = 0; depth < 8; depth++) {
2155 rq_b = spinner_create_request(&b.spin,
2159 err = PTR_ERR(rq_b);
2160 intel_engine_pm_put(engine);
2163 i915_request_add(rq_b);
2165 GEM_BUG_ON(i915_request_completed(rq_a));
2166 engine->schedule(rq_a, &attr);
2167 igt_spinner_end(&a.spin);
2169 if (!igt_wait_for_spinner(&b.spin, rq_b)) {
2170 pr_err("Second client failed to start\n");
2171 intel_engine_pm_put(engine);
2178 igt_spinner_end(&a.spin);
2180 if (engine->execlists.preempt_hang.count) {
2181 pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
2183 engine->execlists.preempt_hang.count,
2185 intel_engine_pm_put(engine);
2190 intel_engine_pm_put(engine);
2191 if (igt_flush_test(gt->i915))
2197 preempt_client_fini(&b);
2199 preempt_client_fini(&a);
2203 igt_spinner_end(&b.spin);
2204 igt_spinner_end(&a.spin);
2205 intel_gt_set_wedged(gt);
2210 static int __i915_sw_fence_call
2211 dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
2216 static struct i915_request *dummy_request(struct intel_engine_cs *engine)
2218 struct i915_request *rq;
2220 rq = kzalloc(sizeof(*rq), GFP_KERNEL);
2224 rq->engine = engine;
2226 spin_lock_init(&rq->lock);
2227 INIT_LIST_HEAD(&rq->fence.cb_list);
2228 rq->fence.lock = &rq->lock;
2229 rq->fence.ops = &i915_fence_ops;
2231 i915_sched_node_init(&rq->sched);
2233 /* mark this request as permanently incomplete */
2234 rq->fence.seqno = 1;
2235 BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
2236 rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
2237 GEM_BUG_ON(i915_request_completed(rq));
2239 i915_sw_fence_init(&rq->submit, dummy_notify);
2240 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
2242 spin_lock_init(&rq->lock);
2243 rq->fence.lock = &rq->lock;
2244 INIT_LIST_HEAD(&rq->fence.cb_list);
2249 static void dummy_request_free(struct i915_request *dummy)
2251 /* We have to fake the CS interrupt to kick the next request */
2252 i915_sw_fence_commit(&dummy->submit);
2254 i915_request_mark_complete(dummy);
2255 dma_fence_signal(&dummy->fence);
2257 i915_sched_node_fini(&dummy->sched);
2258 i915_sw_fence_fini(&dummy->submit);
2260 dma_fence_free(&dummy->fence);
2263 static int live_suppress_wait_preempt(void *arg)
2265 struct intel_gt *gt = arg;
2266 struct preempt_client client[4];
2267 struct i915_request *rq[ARRAY_SIZE(client)] = {};
2268 struct intel_engine_cs *engine;
2269 enum intel_engine_id id;
2274 * Waiters are given a little priority nudge, but not enough
2275 * to actually cause any preemption. Double check that we do
2276 * not needlessly generate preempt-to-idle cycles.
2279 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2282 if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
2284 if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
2286 if (preempt_client_init(gt, &client[2])) /* head of queue */
2288 if (preempt_client_init(gt, &client[3])) /* bystander */
2291 for_each_engine(engine, gt, id) {
2294 if (!intel_engine_has_preemption(engine))
2297 if (!engine->emit_init_breadcrumb)
2300 for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
2301 struct i915_request *dummy;
2303 engine->execlists.preempt_hang.count = 0;
2305 dummy = dummy_request(engine);
2309 for (i = 0; i < ARRAY_SIZE(client); i++) {
2310 struct i915_request *this;
2312 this = spinner_create_request(&client[i].spin,
2313 client[i].ctx, engine,
2316 err = PTR_ERR(this);
2320 /* Disable NEWCLIENT promotion */
2321 __i915_active_fence_set(&i915_request_timeline(this)->last_request,
2324 rq[i] = i915_request_get(this);
2325 i915_request_add(this);
2328 dummy_request_free(dummy);
2330 GEM_BUG_ON(i915_request_completed(rq[0]));
2331 if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
2332 pr_err("%s: First client failed to start\n",
2336 GEM_BUG_ON(!i915_request_started(rq[0]));
2338 if (i915_request_wait(rq[depth],
2341 pr_err("%s: Waiter depth:%d completed!\n",
2342 engine->name, depth);
2346 for (i = 0; i < ARRAY_SIZE(client); i++) {
2347 igt_spinner_end(&client[i].spin);
2348 i915_request_put(rq[i]);
2352 if (igt_flush_test(gt->i915))
2355 if (engine->execlists.preempt_hang.count) {
2356 pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
2358 engine->execlists.preempt_hang.count,
2368 preempt_client_fini(&client[3]);
2370 preempt_client_fini(&client[2]);
2372 preempt_client_fini(&client[1]);
2374 preempt_client_fini(&client[0]);
2378 for (i = 0; i < ARRAY_SIZE(client); i++) {
2379 igt_spinner_end(&client[i].spin);
2380 i915_request_put(rq[i]);
2382 intel_gt_set_wedged(gt);
2387 static int live_chain_preempt(void *arg)
2389 struct intel_gt *gt = arg;
2390 struct intel_engine_cs *engine;
2391 struct preempt_client hi, lo;
2392 enum intel_engine_id id;
2396 * Build a chain AB...BA between two contexts (A, B) and request
2397 * preemption of the last request. It should then complete before
2398 * the previously submitted spinner in B.
2401 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2404 if (preempt_client_init(gt, &hi))
2407 if (preempt_client_init(gt, &lo))
2410 for_each_engine(engine, gt, id) {
2411 struct i915_sched_attr attr = {
2412 .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
2414 struct igt_live_test t;
2415 struct i915_request *rq;
2416 int ring_size, count, i;
2418 if (!intel_engine_has_preemption(engine))
2421 rq = spinner_create_request(&lo.spin,
2427 i915_request_get(rq);
2428 i915_request_add(rq);
2430 ring_size = rq->wa_tail - rq->head;
2432 ring_size += rq->ring->size;
2433 ring_size = rq->ring->size / ring_size;
2434 pr_debug("%s(%s): Using maximum of %d requests\n",
2435 __func__, engine->name, ring_size);
2437 igt_spinner_end(&lo.spin);
2438 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
2439 pr_err("Timed out waiting to flush %s\n", engine->name);
2440 i915_request_put(rq);
2443 i915_request_put(rq);
2445 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
2450 for_each_prime_number_from(count, 1, ring_size) {
2451 rq = spinner_create_request(&hi.spin,
2456 i915_request_add(rq);
2457 if (!igt_wait_for_spinner(&hi.spin, rq))
2460 rq = spinner_create_request(&lo.spin,
2465 i915_request_add(rq);
2467 for (i = 0; i < count; i++) {
2468 rq = igt_request_alloc(lo.ctx, engine);
2471 i915_request_add(rq);
2474 rq = igt_request_alloc(hi.ctx, engine);
2478 i915_request_get(rq);
2479 i915_request_add(rq);
2480 engine->schedule(rq, &attr);
2482 igt_spinner_end(&hi.spin);
2483 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2484 struct drm_printer p =
2485 drm_info_printer(gt->i915->drm.dev);
2487 pr_err("Failed to preempt over chain of %d\n",
2489 intel_engine_dump(engine, &p,
2490 "%s\n", engine->name);
2491 i915_request_put(rq);
2494 igt_spinner_end(&lo.spin);
2495 i915_request_put(rq);
2497 rq = igt_request_alloc(lo.ctx, engine);
2501 i915_request_get(rq);
2502 i915_request_add(rq);
2504 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2505 struct drm_printer p =
2506 drm_info_printer(gt->i915->drm.dev);
2508 pr_err("Failed to flush low priority chain of %d requests\n",
2510 intel_engine_dump(engine, &p,
2511 "%s\n", engine->name);
2513 i915_request_put(rq);
2516 i915_request_put(rq);
2519 if (igt_live_test_end(&t)) {
2527 preempt_client_fini(&lo);
2529 preempt_client_fini(&hi);
2533 igt_spinner_end(&hi.spin);
2534 igt_spinner_end(&lo.spin);
2535 intel_gt_set_wedged(gt);
2540 static int create_gang(struct intel_engine_cs *engine,
2541 struct i915_request **prev)
2543 struct drm_i915_gem_object *obj;
2544 struct intel_context *ce;
2545 struct i915_request *rq;
2546 struct i915_vma *vma;
2550 ce = intel_context_create(engine);
2554 obj = i915_gem_object_create_internal(engine->i915, 4096);
2560 vma = i915_vma_instance(obj, ce->vm, NULL);
2566 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2570 cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
2574 /* Semaphore target: spin until zero */
2575 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
2577 *cs++ = MI_SEMAPHORE_WAIT |
2579 MI_SEMAPHORE_SAD_EQ_SDD;
2581 *cs++ = lower_32_bits(vma->node.start);
2582 *cs++ = upper_32_bits(vma->node.start);
2585 u64 offset = (*prev)->batch->node.start;
2587 /* Terminate the spinner in the next lower priority batch. */
2588 *cs++ = MI_STORE_DWORD_IMM_GEN4;
2589 *cs++ = lower_32_bits(offset);
2590 *cs++ = upper_32_bits(offset);
2594 *cs++ = MI_BATCH_BUFFER_END;
2595 i915_gem_object_flush_map(obj);
2596 i915_gem_object_unpin_map(obj);
2598 rq = intel_context_create_request(ce);
2603 i915_request_get(rq);
2606 err = i915_request_await_object(rq, vma->obj, false);
2608 err = i915_vma_move_to_active(vma, rq, 0);
2610 err = rq->engine->emit_bb_start(rq,
2613 i915_vma_unlock(vma);
2614 i915_request_add(rq);
2618 i915_gem_object_put(obj);
2619 intel_context_put(ce);
2621 rq->client_link.next = &(*prev)->client_link;
2626 i915_request_put(rq);
2628 i915_gem_object_put(obj);
2630 intel_context_put(ce);
2634 static int live_preempt_gang(void *arg)
2636 struct intel_gt *gt = arg;
2637 struct intel_engine_cs *engine;
2638 enum intel_engine_id id;
2640 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2644 * Build as long a chain of preempters as we can, with each
2645 * request higher priority than the last. Once we are ready, we release
2646 * the last batch which then precolates down the chain, each releasing
2647 * the next oldest in turn. The intent is to simply push as hard as we
2648 * can with the number of preemptions, trying to exceed narrow HW
2649 * limits. At a minimum, we insist that we can sort all the user
2650 * high priority levels into execution order.
2653 for_each_engine(engine, gt, id) {
2654 struct i915_request *rq = NULL;
2655 struct igt_live_test t;
2656 IGT_TIMEOUT(end_time);
2661 if (!intel_engine_has_preemption(engine))
2664 if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
2668 struct i915_sched_attr attr = {
2669 .priority = I915_USER_PRIORITY(prio++),
2672 err = create_gang(engine, &rq);
2676 /* Submit each spinner at increasing priority */
2677 engine->schedule(rq, &attr);
2679 if (prio <= I915_PRIORITY_MAX)
2682 if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
2685 if (__igt_timeout(end_time, NULL))
2688 pr_debug("%s: Preempt chain of %d requests\n",
2689 engine->name, prio);
2692 * Such that the last spinner is the highest priority and
2693 * should execute first. When that spinner completes,
2694 * it will terminate the next lowest spinner until there
2695 * are no more spinners and the gang is complete.
2697 cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
2700 i915_gem_object_unpin_map(rq->batch->obj);
2703 intel_gt_set_wedged(gt);
2706 while (rq) { /* wait for each rq from highest to lowest prio */
2707 struct i915_request *n =
2708 list_next_entry(rq, client_link);
2710 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
2711 struct drm_printer p =
2712 drm_info_printer(engine->i915->drm.dev);
2714 pr_err("Failed to flush chain of %d requests, at %d\n",
2715 prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
2716 intel_engine_dump(engine, &p,
2717 "%s\n", engine->name);
2722 i915_request_put(rq);
2726 if (igt_live_test_end(&t))
2735 static int live_preempt_timeout(void *arg)
2737 struct intel_gt *gt = arg;
2738 struct i915_gem_context *ctx_hi, *ctx_lo;
2739 struct igt_spinner spin_lo;
2740 struct intel_engine_cs *engine;
2741 enum intel_engine_id id;
2745 * Check that we force preemption to occur by cancelling the previous
2746 * context if it refuses to yield the GPU.
2748 if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
2751 if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
2754 if (!intel_has_reset_engine(gt))
2757 if (igt_spinner_init(&spin_lo, gt))
2760 ctx_hi = kernel_context(gt->i915);
2763 ctx_hi->sched.priority =
2764 I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
2766 ctx_lo = kernel_context(gt->i915);
2769 ctx_lo->sched.priority =
2770 I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
2772 for_each_engine(engine, gt, id) {
2773 unsigned long saved_timeout;
2774 struct i915_request *rq;
2776 if (!intel_engine_has_preemption(engine))
2779 rq = spinner_create_request(&spin_lo, ctx_lo, engine,
2780 MI_NOOP); /* preemption disabled */
2786 i915_request_add(rq);
2787 if (!igt_wait_for_spinner(&spin_lo, rq)) {
2788 intel_gt_set_wedged(gt);
2793 rq = igt_request_alloc(ctx_hi, engine);
2795 igt_spinner_end(&spin_lo);
2800 /* Flush the previous CS ack before changing timeouts */
2801 while (READ_ONCE(engine->execlists.pending[0]))
2804 saved_timeout = engine->props.preempt_timeout_ms;
2805 engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
2807 i915_request_get(rq);
2808 i915_request_add(rq);
2810 intel_engine_flush_submission(engine);
2811 engine->props.preempt_timeout_ms = saved_timeout;
2813 if (i915_request_wait(rq, 0, HZ / 10) < 0) {
2814 intel_gt_set_wedged(gt);
2815 i915_request_put(rq);
2820 igt_spinner_end(&spin_lo);
2821 i915_request_put(rq);
2826 kernel_context_close(ctx_lo);
2828 kernel_context_close(ctx_hi);
2830 igt_spinner_fini(&spin_lo);
2834 static int random_range(struct rnd_state *rnd, int min, int max)
2836 return i915_prandom_u32_max_state(max - min, rnd) + min;
2839 static int random_priority(struct rnd_state *rnd)
2841 return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX);
2844 struct preempt_smoke {
2845 struct intel_gt *gt;
2846 struct i915_gem_context **contexts;
2847 struct intel_engine_cs *engine;
2848 struct drm_i915_gem_object *batch;
2849 unsigned int ncontext;
2850 struct rnd_state prng;
2851 unsigned long count;
2854 static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
2856 return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext,
2860 static int smoke_submit(struct preempt_smoke *smoke,
2861 struct i915_gem_context *ctx, int prio,
2862 struct drm_i915_gem_object *batch)
2864 struct i915_request *rq;
2865 struct i915_vma *vma = NULL;
2869 struct i915_address_space *vm;
2871 vm = i915_gem_context_get_vm_rcu(ctx);
2872 vma = i915_vma_instance(batch, vm, NULL);
2875 return PTR_ERR(vma);
2877 err = i915_vma_pin(vma, 0, 0, PIN_USER);
2882 ctx->sched.priority = prio;
2884 rq = igt_request_alloc(ctx, smoke->engine);
2892 err = i915_request_await_object(rq, vma->obj, false);
2894 err = i915_vma_move_to_active(vma, rq, 0);
2896 err = rq->engine->emit_bb_start(rq,
2899 i915_vma_unlock(vma);
2902 i915_request_add(rq);
2906 i915_vma_unpin(vma);
2911 static int smoke_crescendo_thread(void *arg)
2913 struct preempt_smoke *smoke = arg;
2914 IGT_TIMEOUT(end_time);
2915 unsigned long count;
2919 struct i915_gem_context *ctx = smoke_context(smoke);
2922 err = smoke_submit(smoke,
2923 ctx, count % I915_PRIORITY_MAX,
2929 } while (!__igt_timeout(end_time, NULL));
2931 smoke->count = count;
2935 static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
2936 #define BATCH BIT(0)
2938 struct task_struct *tsk[I915_NUM_ENGINES] = {};
2939 struct preempt_smoke arg[I915_NUM_ENGINES];
2940 struct intel_engine_cs *engine;
2941 enum intel_engine_id id;
2942 unsigned long count;
2945 for_each_engine(engine, smoke->gt, id) {
2947 arg[id].engine = engine;
2948 if (!(flags & BATCH))
2949 arg[id].batch = NULL;
2952 tsk[id] = kthread_run(smoke_crescendo_thread, &arg,
2953 "igt/smoke:%d", id);
2954 if (IS_ERR(tsk[id])) {
2955 err = PTR_ERR(tsk[id]);
2958 get_task_struct(tsk[id]);
2961 yield(); /* start all threads before we kthread_stop() */
2964 for_each_engine(engine, smoke->gt, id) {
2967 if (IS_ERR_OR_NULL(tsk[id]))
2970 status = kthread_stop(tsk[id]);
2974 count += arg[id].count;
2976 put_task_struct(tsk[id]);
2979 pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
2981 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
2985 static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
2987 enum intel_engine_id id;
2988 IGT_TIMEOUT(end_time);
2989 unsigned long count;
2993 for_each_engine(smoke->engine, smoke->gt, id) {
2994 struct i915_gem_context *ctx = smoke_context(smoke);
2997 err = smoke_submit(smoke,
2998 ctx, random_priority(&smoke->prng),
2999 flags & BATCH ? smoke->batch : NULL);
3005 } while (!__igt_timeout(end_time, NULL));
3007 pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
3009 RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
3013 static int live_preempt_smoke(void *arg)
3015 struct preempt_smoke smoke = {
3017 .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
3020 const unsigned int phase[] = { 0, BATCH };
3021 struct igt_live_test t;
3026 if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
3029 smoke.contexts = kmalloc_array(smoke.ncontext,
3030 sizeof(*smoke.contexts),
3032 if (!smoke.contexts)
3036 i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
3037 if (IS_ERR(smoke.batch)) {
3038 err = PTR_ERR(smoke.batch);
3042 cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
3047 for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++)
3048 cs[n] = MI_ARB_CHECK;
3049 cs[n] = MI_BATCH_BUFFER_END;
3050 i915_gem_object_flush_map(smoke.batch);
3051 i915_gem_object_unpin_map(smoke.batch);
3053 if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
3058 for (n = 0; n < smoke.ncontext; n++) {
3059 smoke.contexts[n] = kernel_context(smoke.gt->i915);
3060 if (!smoke.contexts[n])
3064 for (n = 0; n < ARRAY_SIZE(phase); n++) {
3065 err = smoke_crescendo(&smoke, phase[n]);
3069 err = smoke_random(&smoke, phase[n]);
3075 if (igt_live_test_end(&t))
3078 for (n = 0; n < smoke.ncontext; n++) {
3079 if (!smoke.contexts[n])
3081 kernel_context_close(smoke.contexts[n]);
3085 i915_gem_object_put(smoke.batch);
3087 kfree(smoke.contexts);
3092 static int nop_virtual_engine(struct intel_gt *gt,
3093 struct intel_engine_cs **siblings,
3094 unsigned int nsibling,
3097 #define CHAIN BIT(0)
3099 IGT_TIMEOUT(end_time);
3100 struct i915_request *request[16] = {};
3101 struct intel_context *ve[16];
3102 unsigned long n, prime, nc;
3103 struct igt_live_test t;
3104 ktime_t times[2] = {};
3107 GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
3109 for (n = 0; n < nctx; n++) {
3110 ve[n] = intel_execlists_create_virtual(siblings, nsibling);
3111 if (IS_ERR(ve[n])) {
3112 err = PTR_ERR(ve[n]);
3117 err = intel_context_pin(ve[n]);
3119 intel_context_put(ve[n]);
3125 err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
3129 for_each_prime_number_from(prime, 1, 8192) {
3130 times[1] = ktime_get_raw();
3132 if (flags & CHAIN) {
3133 for (nc = 0; nc < nctx; nc++) {
3134 for (n = 0; n < prime; n++) {
3135 struct i915_request *rq;
3137 rq = i915_request_create(ve[nc]);
3144 i915_request_put(request[nc]);
3145 request[nc] = i915_request_get(rq);
3146 i915_request_add(rq);
3150 for (n = 0; n < prime; n++) {
3151 for (nc = 0; nc < nctx; nc++) {
3152 struct i915_request *rq;
3154 rq = i915_request_create(ve[nc]);
3161 i915_request_put(request[nc]);
3162 request[nc] = i915_request_get(rq);
3163 i915_request_add(rq);
3168 for (nc = 0; nc < nctx; nc++) {
3169 if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
3170 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3171 __func__, ve[0]->engine->name,
3172 request[nc]->fence.context,
3173 request[nc]->fence.seqno);
3175 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3176 __func__, ve[0]->engine->name,
3177 request[nc]->fence.context,
3178 request[nc]->fence.seqno);
3180 intel_gt_set_wedged(gt);
3185 times[1] = ktime_sub(ktime_get_raw(), times[1]);
3187 times[0] = times[1];
3189 for (nc = 0; nc < nctx; nc++) {
3190 i915_request_put(request[nc]);
3194 if (__igt_timeout(end_time, NULL))
3198 err = igt_live_test_end(&t);
3202 pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
3203 nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
3204 prime, div64_u64(ktime_to_ns(times[1]), prime));
3207 if (igt_flush_test(gt->i915))
3210 for (nc = 0; nc < nctx; nc++) {
3211 i915_request_put(request[nc]);
3212 intel_context_unpin(ve[nc]);
3213 intel_context_put(ve[nc]);
3218 static int live_virtual_engine(void *arg)
3220 struct intel_gt *gt = arg;
3221 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3222 struct intel_engine_cs *engine;
3223 enum intel_engine_id id;
3224 unsigned int class, inst;
3227 if (intel_uc_uses_guc_submission(>->uc))
3230 for_each_engine(engine, gt, id) {
3231 err = nop_virtual_engine(gt, &engine, 1, 1, 0);
3233 pr_err("Failed to wrap engine %s: err=%d\n",
3239 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3243 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3244 if (!gt->engine_class[class][inst])
3247 siblings[nsibling++] = gt->engine_class[class][inst];
3252 for (n = 1; n <= nsibling + 1; n++) {
3253 err = nop_virtual_engine(gt, siblings, nsibling,
3259 err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
3267 static int mask_virtual_engine(struct intel_gt *gt,
3268 struct intel_engine_cs **siblings,
3269 unsigned int nsibling)
3271 struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
3272 struct intel_context *ve;
3273 struct igt_live_test t;
3278 * Check that by setting the execution mask on a request, we can
3279 * restrict it to our desired engine within the virtual engine.
3282 ve = intel_execlists_create_virtual(siblings, nsibling);
3288 err = intel_context_pin(ve);
3292 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3296 for (n = 0; n < nsibling; n++) {
3297 request[n] = i915_request_create(ve);
3298 if (IS_ERR(request[n])) {
3299 err = PTR_ERR(request[n]);
3304 /* Reverse order as it's more likely to be unnatural */
3305 request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
3307 i915_request_get(request[n]);
3308 i915_request_add(request[n]);
3311 for (n = 0; n < nsibling; n++) {
3312 if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
3313 pr_err("%s(%s): wait for %llx:%lld timed out\n",
3314 __func__, ve->engine->name,
3315 request[n]->fence.context,
3316 request[n]->fence.seqno);
3318 GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
3319 __func__, ve->engine->name,
3320 request[n]->fence.context,
3321 request[n]->fence.seqno);
3323 intel_gt_set_wedged(gt);
3328 if (request[n]->engine != siblings[nsibling - n - 1]) {
3329 pr_err("Executed on wrong sibling '%s', expected '%s'\n",
3330 request[n]->engine->name,
3331 siblings[nsibling - n - 1]->name);
3337 err = igt_live_test_end(&t);
3339 if (igt_flush_test(gt->i915))
3342 for (n = 0; n < nsibling; n++)
3343 i915_request_put(request[n]);
3346 intel_context_unpin(ve);
3348 intel_context_put(ve);
3353 static int live_virtual_mask(void *arg)
3355 struct intel_gt *gt = arg;
3356 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3357 unsigned int class, inst;
3360 if (intel_uc_uses_guc_submission(>->uc))
3363 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3364 unsigned int nsibling;
3367 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3368 if (!gt->engine_class[class][inst])
3371 siblings[nsibling++] = gt->engine_class[class][inst];
3376 err = mask_virtual_engine(gt, siblings, nsibling);
3384 static int preserved_virtual_engine(struct intel_gt *gt,
3385 struct intel_engine_cs **siblings,
3386 unsigned int nsibling)
3388 struct i915_request *last = NULL;
3389 struct intel_context *ve;
3390 struct i915_vma *scratch;
3391 struct igt_live_test t;
3396 scratch = create_scratch(siblings[0]->gt);
3397 if (IS_ERR(scratch))
3398 return PTR_ERR(scratch);
3400 err = i915_vma_sync(scratch);
3404 ve = intel_execlists_create_virtual(siblings, nsibling);
3410 err = intel_context_pin(ve);
3414 err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
3418 for (n = 0; n < NUM_GPR_DW; n++) {
3419 struct intel_engine_cs *engine = siblings[n % nsibling];
3420 struct i915_request *rq;
3422 rq = i915_request_create(ve);
3428 i915_request_put(last);
3429 last = i915_request_get(rq);
3431 cs = intel_ring_begin(rq, 8);
3433 i915_request_add(rq);
3438 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
3439 *cs++ = CS_GPR(engine, n);
3440 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
3443 *cs++ = MI_LOAD_REGISTER_IMM(1);
3444 *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
3448 intel_ring_advance(rq, cs);
3450 /* Restrict this request to run on a particular engine */
3451 rq->execution_mask = engine->mask;
3452 i915_request_add(rq);
3455 if (i915_request_wait(last, 0, HZ / 5) < 0) {
3460 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
3466 for (n = 0; n < NUM_GPR_DW; n++) {
3468 pr_err("Incorrect value[%d] found for GPR[%d]\n",
3475 i915_gem_object_unpin_map(scratch->obj);
3478 if (igt_live_test_end(&t))
3480 i915_request_put(last);
3482 intel_context_unpin(ve);
3484 intel_context_put(ve);
3486 i915_vma_unpin_and_release(&scratch, 0);
3490 static int live_virtual_preserved(void *arg)
3492 struct intel_gt *gt = arg;
3493 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3494 unsigned int class, inst;
3497 * Check that the context image retains non-privileged (user) registers
3498 * from one engine to the next. For this we check that the CS_GPR
3502 if (intel_uc_uses_guc_submission(>->uc))
3505 /* As we use CS_GPR we cannot run before they existed on all engines. */
3506 if (INTEL_GEN(gt->i915) < 9)
3509 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3513 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3514 if (!gt->engine_class[class][inst])
3517 siblings[nsibling++] = gt->engine_class[class][inst];
3522 err = preserved_virtual_engine(gt, siblings, nsibling);
3530 static int bond_virtual_engine(struct intel_gt *gt,
3532 struct intel_engine_cs **siblings,
3533 unsigned int nsibling,
3535 #define BOND_SCHEDULE BIT(0)
3537 struct intel_engine_cs *master;
3538 struct i915_request *rq[16];
3539 enum intel_engine_id id;
3540 struct igt_spinner spin;
3545 * A set of bonded requests is intended to be run concurrently
3546 * across a number of engines. We use one request per-engine
3547 * and a magic fence to schedule each of the bonded requests
3548 * at the same time. A consequence of our current scheduler is that
3549 * we only move requests to the HW ready queue when the request
3550 * becomes ready, that is when all of its prerequisite fences have
3551 * been signaled. As one of those fences is the master submit fence,
3552 * there is a delay on all secondary fences as the HW may be
3553 * currently busy. Equally, as all the requests are independent,
3554 * they may have other fences that delay individual request
3555 * submission to HW. Ergo, we do not guarantee that all requests are
3556 * immediately submitted to HW at the same time, just that if the
3557 * rules are abided by, they are ready at the same time as the
3558 * first is submitted. Userspace can embed semaphores in its batch
3559 * to ensure parallel execution of its phases as it requires.
3560 * Though naturally it gets requested that perhaps the scheduler should
3561 * take care of parallel execution, even across preemption events on
3562 * different HW. (The proper answer is of course "lalalala".)
3564 * With the submit-fence, we have identified three possible phases
3565 * of synchronisation depending on the master fence: queued (not
3566 * ready), executing, and signaled. The first two are quite simple
3567 * and checked below. However, the signaled master fence handling is
3568 * contentious. Currently we do not distinguish between a signaled
3569 * fence and an expired fence, as once signaled it does not convey
3570 * any information about the previous execution. It may even be freed
3571 * and hence checking later it may not exist at all. Ergo we currently
3572 * do not apply the bonding constraint for an already signaled fence,
3573 * as our expectation is that it should not constrain the secondaries
3574 * and is outside of the scope of the bonded request API (i.e. all
3575 * userspace requests are meant to be running in parallel). As
3576 * it imposes no constraint, and is effectively a no-op, we do not
3577 * check below as normal execution flows are checked extensively above.
3579 * XXX Is the degenerate handling of signaled submit fences the
3580 * expected behaviour for userpace?
3583 GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
3585 if (igt_spinner_init(&spin, gt))
3589 rq[0] = ERR_PTR(-ENOMEM);
3590 for_each_engine(master, gt, id) {
3591 struct i915_sw_fence fence = {};
3592 struct intel_context *ce;
3594 if (master->class == class)
3597 ce = intel_context_create(master);
3603 memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
3605 rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
3606 intel_context_put(ce);
3607 if (IS_ERR(rq[0])) {
3608 err = PTR_ERR(rq[0]);
3611 i915_request_get(rq[0]);
3613 if (flags & BOND_SCHEDULE) {
3614 onstack_fence_init(&fence);
3615 err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
3620 i915_request_add(rq[0]);
3624 if (!(flags & BOND_SCHEDULE) &&
3625 !igt_wait_for_spinner(&spin, rq[0])) {
3630 for (n = 0; n < nsibling; n++) {
3631 struct intel_context *ve;
3633 ve = intel_execlists_create_virtual(siblings, nsibling);
3636 onstack_fence_fini(&fence);
3640 err = intel_virtual_engine_attach_bond(ve->engine,
3644 intel_context_put(ve);
3645 onstack_fence_fini(&fence);
3649 err = intel_context_pin(ve);
3650 intel_context_put(ve);
3652 onstack_fence_fini(&fence);
3656 rq[n + 1] = i915_request_create(ve);
3657 intel_context_unpin(ve);
3658 if (IS_ERR(rq[n + 1])) {
3659 err = PTR_ERR(rq[n + 1]);
3660 onstack_fence_fini(&fence);
3663 i915_request_get(rq[n + 1]);
3665 err = i915_request_await_execution(rq[n + 1],
3667 ve->engine->bond_execute);
3668 i915_request_add(rq[n + 1]);
3670 onstack_fence_fini(&fence);
3674 onstack_fence_fini(&fence);
3675 intel_engine_flush_submission(master);
3676 igt_spinner_end(&spin);
3678 if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
3679 pr_err("Master request did not execute (on %s)!\n",
3680 rq[0]->engine->name);
3685 for (n = 0; n < nsibling; n++) {
3686 if (i915_request_wait(rq[n + 1], 0,
3687 MAX_SCHEDULE_TIMEOUT) < 0) {
3692 if (rq[n + 1]->engine != siblings[n]) {
3693 pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
3695 rq[n + 1]->engine->name,
3696 rq[0]->engine->name);
3702 for (n = 0; !IS_ERR(rq[n]); n++)
3703 i915_request_put(rq[n]);
3704 rq[0] = ERR_PTR(-ENOMEM);
3708 for (n = 0; !IS_ERR(rq[n]); n++)
3709 i915_request_put(rq[n]);
3710 if (igt_flush_test(gt->i915))
3713 igt_spinner_fini(&spin);
3717 static int live_virtual_bond(void *arg)
3719 static const struct phase {
3724 { "schedule", BOND_SCHEDULE },
3727 struct intel_gt *gt = arg;
3728 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3729 unsigned int class, inst;
3732 if (intel_uc_uses_guc_submission(>->uc))
3735 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3736 const struct phase *p;
3740 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3741 if (!gt->engine_class[class][inst])
3744 GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
3745 siblings[nsibling++] = gt->engine_class[class][inst];
3750 for (p = phases; p->name; p++) {
3751 err = bond_virtual_engine(gt,
3752 class, siblings, nsibling,
3755 pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
3756 __func__, p->name, class, nsibling, err);
3765 static int reset_virtual_engine(struct intel_gt *gt,
3766 struct intel_engine_cs **siblings,
3767 unsigned int nsibling)
3769 struct intel_engine_cs *engine;
3770 struct intel_context *ve;
3771 unsigned long *heartbeat;
3772 struct igt_spinner spin;
3773 struct i915_request *rq;
3778 * In order to support offline error capture for fast preempt reset,
3779 * we need to decouple the guilty request and ensure that it and its
3780 * descendents are not executed while the capture is in progress.
3783 heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
3787 if (igt_spinner_init(&spin, gt)) {
3792 ve = intel_execlists_create_virtual(siblings, nsibling);
3798 for (n = 0; n < nsibling; n++)
3799 engine_heartbeat_disable(siblings[n], &heartbeat[n]);
3801 rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
3806 i915_request_add(rq);
3808 if (!igt_wait_for_spinner(&spin, rq)) {
3809 intel_gt_set_wedged(gt);
3814 engine = rq->engine;
3815 GEM_BUG_ON(engine == ve->engine);
3817 /* Take ownership of the reset and tasklet */
3818 if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
3819 >->reset.flags)) {
3820 intel_gt_set_wedged(gt);
3824 tasklet_disable(&engine->execlists.tasklet);
3826 engine->execlists.tasklet.func(engine->execlists.tasklet.data);
3827 GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
3829 /* Fake a preemption event; failed of course */
3830 spin_lock_irq(&engine->active.lock);
3831 __unwind_incomplete_requests(engine);
3832 spin_unlock_irq(&engine->active.lock);
3833 GEM_BUG_ON(rq->engine != ve->engine);
3835 /* Reset the engine while keeping our active request on hold */
3836 execlists_hold(engine, rq);
3837 GEM_BUG_ON(!i915_request_on_hold(rq));
3839 intel_engine_reset(engine, NULL);
3840 GEM_BUG_ON(rq->fence.error != -EIO);
3842 /* Release our grasp on the engine, letting CS flow again */
3843 tasklet_enable(&engine->execlists.tasklet);
3844 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags);
3846 /* Check that we do not resubmit the held request */
3847 i915_request_get(rq);
3848 if (!i915_request_wait(rq, 0, HZ / 5)) {
3849 pr_err("%s: on hold request completed!\n",
3851 intel_gt_set_wedged(gt);
3855 GEM_BUG_ON(!i915_request_on_hold(rq));
3857 /* But is resubmitted on release */
3858 execlists_unhold(engine, rq);
3859 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3860 pr_err("%s: held request did not complete!\n",
3862 intel_gt_set_wedged(gt);
3867 i915_request_put(rq);
3869 for (n = 0; n < nsibling; n++)
3870 engine_heartbeat_enable(siblings[n], heartbeat[n]);
3872 intel_context_put(ve);
3874 igt_spinner_fini(&spin);
3880 static int live_virtual_reset(void *arg)
3882 struct intel_gt *gt = arg;
3883 struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
3884 unsigned int class, inst;
3887 * Check that we handle a reset event within a virtual engine.
3888 * Only the physical engine is reset, but we have to check the flow
3889 * of the virtual requests around the reset, and make sure it is not
3893 if (intel_uc_uses_guc_submission(>->uc))
3896 if (!intel_has_reset_engine(gt))
3899 for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
3903 for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
3904 if (!gt->engine_class[class][inst])
3907 siblings[nsibling++] = gt->engine_class[class][inst];
3912 err = reset_virtual_engine(gt, siblings, nsibling);
3920 int intel_execlists_live_selftests(struct drm_i915_private *i915)
3922 static const struct i915_subtest tests[] = {
3923 SUBTEST(live_sanitycheck),
3924 SUBTEST(live_unlite_switch),
3925 SUBTEST(live_unlite_preempt),
3926 SUBTEST(live_pin_rewind),
3927 SUBTEST(live_hold_reset),
3928 SUBTEST(live_error_interrupt),
3929 SUBTEST(live_timeslice_preempt),
3930 SUBTEST(live_timeslice_rewind),
3931 SUBTEST(live_timeslice_queue),
3932 SUBTEST(live_busywait_preempt),
3933 SUBTEST(live_preempt),
3934 SUBTEST(live_late_preempt),
3935 SUBTEST(live_nopreempt),
3936 SUBTEST(live_preempt_cancel),
3937 SUBTEST(live_suppress_self_preempt),
3938 SUBTEST(live_suppress_wait_preempt),
3939 SUBTEST(live_chain_preempt),
3940 SUBTEST(live_preempt_gang),
3941 SUBTEST(live_preempt_timeout),
3942 SUBTEST(live_preempt_smoke),
3943 SUBTEST(live_virtual_engine),
3944 SUBTEST(live_virtual_mask),
3945 SUBTEST(live_virtual_preserved),
3946 SUBTEST(live_virtual_bond),
3947 SUBTEST(live_virtual_reset),
3950 if (!HAS_EXECLISTS(i915))
3953 if (intel_gt_is_wedged(&i915->gt))
3956 return intel_gt_live_subtests(tests, &i915->gt);
3959 static void hexdump(const void *buf, size_t len)
3961 const size_t rowsize = 8 * sizeof(u32);
3962 const void *prev = NULL;
3966 for (pos = 0; pos < len; pos += rowsize) {
3969 if (prev && !memcmp(prev, buf + pos, rowsize)) {
3977 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
3978 rowsize, sizeof(u32),
3980 false) >= sizeof(line));
3981 pr_info("[%04zx] %s\n", pos, line);
3988 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
3991 i915_ggtt_offset(ce->engine->status_page.vma) +
3992 offset_in_page(slot);
3993 struct i915_request *rq;
3996 rq = intel_context_create_request(ce);
4000 cs = intel_ring_begin(rq, 4);
4002 i915_request_add(rq);
4006 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
4011 intel_ring_advance(rq, cs);
4013 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4014 i915_request_add(rq);
4018 static int live_lrc_layout(void *arg)
4020 struct intel_gt *gt = arg;
4021 struct intel_engine_cs *engine;
4022 enum intel_engine_id id;
4027 * Check the registers offsets we use to create the initial reg state
4028 * match the layout saved by HW.
4031 lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
4036 for_each_engine(engine, gt, id) {
4040 if (!engine->default_state)
4043 hw = i915_gem_object_pin_map(engine->default_state,
4049 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4051 execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
4052 engine->kernel_context,
4054 engine->kernel_context->ring,
4067 pr_debug("%s: skipped instruction %x at dword %d\n",
4068 engine->name, lri, dw);
4073 if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
4074 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
4075 engine->name, dw, lri);
4080 if (lrc[dw] != lri) {
4081 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
4082 engine->name, dw, lri, lrc[dw]);
4092 if (hw[dw] != lrc[dw]) {
4093 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
4094 engine->name, dw, hw[dw], lrc[dw]);
4100 * Skip over the actual register value as we
4101 * expect that to differ.
4106 } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
4109 pr_info("%s: HW register image:\n", engine->name);
4110 hexdump(hw, PAGE_SIZE);
4112 pr_info("%s: SW register image:\n", engine->name);
4113 hexdump(lrc, PAGE_SIZE);
4116 i915_gem_object_unpin_map(engine->default_state);
4125 static int find_offset(const u32 *lri, u32 offset)
4129 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
4130 if (lri[i] == offset)
4136 static int live_lrc_fixed(void *arg)
4138 struct intel_gt *gt = arg;
4139 struct intel_engine_cs *engine;
4140 enum intel_engine_id id;
4144 * Check the assumed register offsets match the actual locations in
4145 * the context image.
4148 for_each_engine(engine, gt, id) {
4155 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
4160 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
4165 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
4170 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
4175 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
4176 lrc_ring_mi_mode(engine),
4180 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
4185 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
4187 "RING_CTX_TIMESTAMP"
4193 if (!engine->default_state)
4196 hw = i915_gem_object_pin_map(engine->default_state,
4202 hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
4204 for (t = tbl; t->name; t++) {
4205 int dw = find_offset(hw, t->reg);
4207 if (dw != t->offset) {
4208 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
4218 i915_gem_object_unpin_map(engine->default_state);
4224 static int __live_lrc_state(struct intel_engine_cs *engine,
4225 struct i915_vma *scratch)
4227 struct intel_context *ce;
4228 struct i915_request *rq;
4234 u32 expected[MAX_IDX];
4239 ce = intel_context_create(engine);
4243 err = intel_context_pin(ce);
4247 rq = i915_request_create(ce);
4253 cs = intel_ring_begin(rq, 4 * MAX_IDX);
4256 i915_request_add(rq);
4260 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4261 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
4262 *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
4265 expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
4267 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4268 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
4269 *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
4272 i915_vma_lock(scratch);
4273 err = i915_request_await_object(rq, scratch->obj, true);
4275 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4276 i915_vma_unlock(scratch);
4278 i915_request_get(rq);
4279 i915_request_add(rq);
4283 intel_engine_flush_submission(engine);
4284 expected[RING_TAIL_IDX] = ce->ring->tail;
4286 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4291 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4297 for (n = 0; n < MAX_IDX; n++) {
4298 if (cs[n] != expected[n]) {
4299 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
4300 engine->name, n, cs[n], expected[n]);
4306 i915_gem_object_unpin_map(scratch->obj);
4309 i915_request_put(rq);
4311 intel_context_unpin(ce);
4313 intel_context_put(ce);
4317 static int live_lrc_state(void *arg)
4319 struct intel_gt *gt = arg;
4320 struct intel_engine_cs *engine;
4321 struct i915_vma *scratch;
4322 enum intel_engine_id id;
4326 * Check the live register state matches what we expect for this
4330 scratch = create_scratch(gt);
4331 if (IS_ERR(scratch))
4332 return PTR_ERR(scratch);
4334 for_each_engine(engine, gt, id) {
4335 err = __live_lrc_state(engine, scratch);
4340 if (igt_flush_test(gt->i915))
4343 i915_vma_unpin_and_release(&scratch, 0);
4347 static int gpr_make_dirty(struct intel_context *ce)
4349 struct i915_request *rq;
4353 rq = intel_context_create_request(ce);
4357 cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
4359 i915_request_add(rq);
4363 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
4364 for (n = 0; n < NUM_GPR_DW; n++) {
4365 *cs++ = CS_GPR(ce->engine, n);
4366 *cs++ = STACK_MAGIC;
4370 intel_ring_advance(rq, cs);
4372 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
4373 i915_request_add(rq);
4378 static struct i915_request *
4379 __gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
4382 i915_ggtt_offset(ce->engine->status_page.vma) +
4383 offset_in_page(slot);
4384 struct i915_request *rq;
4389 rq = intel_context_create_request(ce);
4393 cs = intel_ring_begin(rq, 6 + 4 * NUM_GPR_DW);
4395 i915_request_add(rq);
4396 return ERR_CAST(cs);
4399 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4402 *cs++ = MI_SEMAPHORE_WAIT |
4403 MI_SEMAPHORE_GLOBAL_GTT |
4405 MI_SEMAPHORE_SAD_NEQ_SDD;
4410 for (n = 0; n < NUM_GPR_DW; n++) {
4411 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4412 *cs++ = CS_GPR(ce->engine, n);
4413 *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
4417 i915_vma_lock(scratch);
4418 err = i915_request_await_object(rq, scratch->obj, true);
4420 err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
4421 i915_vma_unlock(scratch);
4423 i915_request_get(rq);
4424 i915_request_add(rq);
4426 i915_request_put(rq);
4433 static int __live_lrc_gpr(struct intel_engine_cs *engine,
4434 struct i915_vma *scratch,
4437 u32 *slot = memset32(engine->status_page.addr + 1000, 0, 4);
4438 struct intel_context *ce;
4439 struct i915_request *rq;
4444 if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
4445 return 0; /* GPR only on rcs0 for gen8 */
4447 err = gpr_make_dirty(engine->kernel_context);
4451 ce = intel_context_create(engine);
4455 rq = __gpr_read(ce, scratch, slot);
4461 err = wait_for_submit(engine, rq, HZ / 2);
4466 err = gpr_make_dirty(engine->kernel_context);
4470 err = emit_semaphore_signal(engine->kernel_context, slot);
4478 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
4483 cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
4489 for (n = 0; n < NUM_GPR_DW; n++) {
4491 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
4493 n / 2, n & 1 ? "udw" : "ldw",
4500 i915_gem_object_unpin_map(scratch->obj);
4503 memset32(&slot[0], -1, 4);
4505 i915_request_put(rq);
4507 intel_context_put(ce);
4511 static int live_lrc_gpr(void *arg)
4513 struct intel_gt *gt = arg;
4514 struct intel_engine_cs *engine;
4515 struct i915_vma *scratch;
4516 enum intel_engine_id id;
4520 * Check that GPR registers are cleared in new contexts as we need
4521 * to avoid leaking any information from previous contexts.
4524 scratch = create_scratch(gt);
4525 if (IS_ERR(scratch))
4526 return PTR_ERR(scratch);
4528 for_each_engine(engine, gt, id) {
4529 unsigned long heartbeat;
4531 engine_heartbeat_disable(engine, &heartbeat);
4533 err = __live_lrc_gpr(engine, scratch, false);
4537 err = __live_lrc_gpr(engine, scratch, true);
4542 engine_heartbeat_enable(engine, heartbeat);
4543 if (igt_flush_test(gt->i915))
4549 i915_vma_unpin_and_release(&scratch, 0);
4553 static struct i915_request *
4554 create_timestamp(struct intel_context *ce, void *slot, int idx)
4557 i915_ggtt_offset(ce->engine->status_page.vma) +
4558 offset_in_page(slot);
4559 struct i915_request *rq;
4563 rq = intel_context_create_request(ce);
4567 cs = intel_ring_begin(rq, 10);
4573 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
4576 *cs++ = MI_SEMAPHORE_WAIT |
4577 MI_SEMAPHORE_GLOBAL_GTT |
4579 MI_SEMAPHORE_SAD_NEQ_SDD;
4584 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
4585 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
4586 *cs++ = offset + idx * sizeof(u32);
4589 intel_ring_advance(rq, cs);
4591 rq->sched.attr.priority = I915_PRIORITY_MASK;
4594 i915_request_get(rq);
4595 i915_request_add(rq);
4597 i915_request_put(rq);
4598 return ERR_PTR(err);
4604 struct lrc_timestamp {
4605 struct intel_engine_cs *engine;
4606 struct intel_context *ce[2];
4610 static bool timestamp_advanced(u32 start, u32 end)
4612 return (s32)(end - start) > 0;
4615 static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
4617 u32 *slot = memset32(arg->engine->status_page.addr + 1000, 0, 4);
4618 struct i915_request *rq;
4622 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
4623 rq = create_timestamp(arg->ce[0], slot, 1);
4627 err = wait_for_submit(rq->engine, rq, HZ / 2);
4632 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
4633 err = emit_semaphore_signal(arg->ce[1], slot);
4641 if (i915_request_wait(rq, 0, HZ / 2) < 0) {
4646 /* and wait for switch to kernel */
4647 if (igt_flush_test(arg->engine->i915)) {
4654 if (!timestamp_advanced(arg->poison, slot[1])) {
4655 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
4656 arg->engine->name, preempt ? "preempt" : "simple",
4657 arg->poison, slot[1]);
4661 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
4662 if (!timestamp_advanced(slot[1], timestamp)) {
4663 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
4664 arg->engine->name, preempt ? "preempt" : "simple",
4665 slot[1], timestamp);
4670 memset32(slot, -1, 4);
4671 i915_request_put(rq);
4675 static int live_lrc_timestamp(void *arg)
4677 struct intel_gt *gt = arg;
4678 enum intel_engine_id id;
4679 struct lrc_timestamp data;
4680 const u32 poison[] = {
4688 * We want to verify that the timestamp is saved and restore across
4689 * context switches and is monotonic.
4691 * So we do this with a little bit of LRC poisoning to check various
4692 * boundary conditions, and see what happens if we preempt the context
4693 * with a second request (carrying more poison into the timestamp).
4696 for_each_engine(data.engine, gt, id) {
4697 unsigned long heartbeat;
4700 engine_heartbeat_disable(data.engine, &heartbeat);
4702 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4703 struct intel_context *tmp;
4705 tmp = intel_context_create(data.engine);
4711 err = intel_context_pin(tmp);
4713 intel_context_put(tmp);
4720 for (i = 0; i < ARRAY_SIZE(poison); i++) {
4721 data.poison = poison[i];
4723 err = __lrc_timestamp(&data, false);
4727 err = __lrc_timestamp(&data, true);
4733 engine_heartbeat_enable(data.engine, heartbeat);
4734 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
4738 intel_context_unpin(data.ce[i]);
4739 intel_context_put(data.ce[i]);
4742 if (igt_flush_test(gt->i915))
4751 static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
4753 struct intel_context *ce;
4754 struct i915_request *rq;
4755 IGT_TIMEOUT(end_time);
4758 ce = intel_context_create(engine);
4762 ce->runtime.num_underflow = 0;
4763 ce->runtime.max_underflow = 0;
4766 unsigned int loop = 1024;
4769 rq = intel_context_create_request(ce);
4776 i915_request_get(rq);
4778 i915_request_add(rq);
4781 if (__igt_timeout(end_time, NULL))
4784 i915_request_put(rq);
4787 err = i915_request_wait(rq, 0, HZ / 5);
4789 pr_err("%s: request not completed!\n", engine->name);
4793 igt_flush_test(engine->i915);
4795 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
4797 intel_context_get_total_runtime_ns(ce),
4798 intel_context_get_avg_runtime_ns(ce));
4801 if (ce->runtime.num_underflow) {
4802 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
4804 ce->runtime.num_underflow,
4805 ce->runtime.max_underflow);
4811 i915_request_put(rq);
4813 intel_context_put(ce);
4817 static int live_pphwsp_runtime(void *arg)
4819 struct intel_gt *gt = arg;
4820 struct intel_engine_cs *engine;
4821 enum intel_engine_id id;
4825 * Check that cumulative context runtime as stored in the pphwsp[16]
4829 for_each_engine(engine, gt, id) {
4830 err = __live_pphwsp_runtime(engine);
4835 if (igt_flush_test(gt->i915))
4841 int intel_lrc_live_selftests(struct drm_i915_private *i915)
4843 static const struct i915_subtest tests[] = {
4844 SUBTEST(live_lrc_layout),
4845 SUBTEST(live_lrc_fixed),
4846 SUBTEST(live_lrc_state),
4847 SUBTEST(live_lrc_gpr),
4848 SUBTEST(live_lrc_timestamp),
4849 SUBTEST(live_pphwsp_runtime),
4852 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
4855 return intel_gt_live_subtests(tests, &i915->gt);