Merge tag 'wberr-v4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / selftests / intel_hangcheck.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/kthread.h>
26
27 #include "../i915_selftest.h"
28
29 #include "mock_context.h"
30 #include "mock_drm.h"
31
32 struct hang {
33         struct drm_i915_private *i915;
34         struct drm_i915_gem_object *hws;
35         struct drm_i915_gem_object *obj;
36         u32 *seqno;
37         u32 *batch;
38 };
39
40 static int hang_init(struct hang *h, struct drm_i915_private *i915)
41 {
42         void *vaddr;
43         int err;
44
45         memset(h, 0, sizeof(*h));
46         h->i915 = i915;
47
48         h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49         if (IS_ERR(h->hws))
50                 return PTR_ERR(h->hws);
51
52         h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53         if (IS_ERR(h->obj)) {
54                 err = PTR_ERR(h->obj);
55                 goto err_hws;
56         }
57
58         i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59         vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60         if (IS_ERR(vaddr)) {
61                 err = PTR_ERR(vaddr);
62                 goto err_obj;
63         }
64         h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65
66         vaddr = i915_gem_object_pin_map(h->obj,
67                                         HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68         if (IS_ERR(vaddr)) {
69                 err = PTR_ERR(vaddr);
70                 goto err_unpin_hws;
71         }
72         h->batch = vaddr;
73
74         return 0;
75
76 err_unpin_hws:
77         i915_gem_object_unpin_map(h->hws);
78 err_obj:
79         i915_gem_object_put(h->obj);
80 err_hws:
81         i915_gem_object_put(h->hws);
82         return err;
83 }
84
85 static u64 hws_address(const struct i915_vma *hws,
86                        const struct drm_i915_gem_request *rq)
87 {
88         return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89 }
90
91 static int emit_recurse_batch(struct hang *h,
92                               struct drm_i915_gem_request *rq)
93 {
94         struct drm_i915_private *i915 = h->i915;
95         struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96         struct i915_vma *hws, *vma;
97         unsigned int flags;
98         u32 *batch;
99         int err;
100
101         vma = i915_vma_instance(h->obj, vm, NULL);
102         if (IS_ERR(vma))
103                 return PTR_ERR(vma);
104
105         hws = i915_vma_instance(h->hws, vm, NULL);
106         if (IS_ERR(hws))
107                 return PTR_ERR(hws);
108
109         err = i915_vma_pin(vma, 0, 0, PIN_USER);
110         if (err)
111                 return err;
112
113         err = i915_vma_pin(hws, 0, 0, PIN_USER);
114         if (err)
115                 goto unpin_vma;
116
117         err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
118         if (err)
119                 goto unpin_hws;
120
121         err = i915_switch_context(rq);
122         if (err)
123                 goto unpin_hws;
124
125         i915_vma_move_to_active(vma, rq, 0);
126         if (!i915_gem_object_has_active_reference(vma->obj)) {
127                 i915_gem_object_get(vma->obj);
128                 i915_gem_object_set_active_reference(vma->obj);
129         }
130
131         i915_vma_move_to_active(hws, rq, 0);
132         if (!i915_gem_object_has_active_reference(hws->obj)) {
133                 i915_gem_object_get(hws->obj);
134                 i915_gem_object_set_active_reference(hws->obj);
135         }
136
137         batch = h->batch;
138         if (INTEL_GEN(i915) >= 8) {
139                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
140                 *batch++ = lower_32_bits(hws_address(hws, rq));
141                 *batch++ = upper_32_bits(hws_address(hws, rq));
142                 *batch++ = rq->fence.seqno;
143                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
144                 *batch++ = lower_32_bits(vma->node.start);
145                 *batch++ = upper_32_bits(vma->node.start);
146         } else if (INTEL_GEN(i915) >= 6) {
147                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
148                 *batch++ = 0;
149                 *batch++ = lower_32_bits(hws_address(hws, rq));
150                 *batch++ = rq->fence.seqno;
151                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
152                 *batch++ = lower_32_bits(vma->node.start);
153         } else if (INTEL_GEN(i915) >= 4) {
154                 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
155                 *batch++ = 0;
156                 *batch++ = lower_32_bits(hws_address(hws, rq));
157                 *batch++ = rq->fence.seqno;
158                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
159                 *batch++ = lower_32_bits(vma->node.start);
160         } else {
161                 *batch++ = MI_STORE_DWORD_IMM;
162                 *batch++ = lower_32_bits(hws_address(hws, rq));
163                 *batch++ = rq->fence.seqno;
164                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
165                 *batch++ = lower_32_bits(vma->node.start);
166         }
167         *batch++ = MI_BATCH_BUFFER_END; /* not reached */
168
169         flags = 0;
170         if (INTEL_GEN(vm->i915) <= 5)
171                 flags |= I915_DISPATCH_SECURE;
172
173         err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
174
175 unpin_hws:
176         i915_vma_unpin(hws);
177 unpin_vma:
178         i915_vma_unpin(vma);
179         return err;
180 }
181
182 static struct drm_i915_gem_request *
183 hang_create_request(struct hang *h,
184                     struct intel_engine_cs *engine,
185                     struct i915_gem_context *ctx)
186 {
187         struct drm_i915_gem_request *rq;
188         int err;
189
190         if (i915_gem_object_is_active(h->obj)) {
191                 struct drm_i915_gem_object *obj;
192                 void *vaddr;
193
194                 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
195                 if (IS_ERR(obj))
196                         return ERR_CAST(obj);
197
198                 vaddr = i915_gem_object_pin_map(obj,
199                                                 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
200                 if (IS_ERR(vaddr)) {
201                         i915_gem_object_put(obj);
202                         return ERR_CAST(vaddr);
203                 }
204
205                 i915_gem_object_unpin_map(h->obj);
206                 i915_gem_object_put(h->obj);
207
208                 h->obj = obj;
209                 h->batch = vaddr;
210         }
211
212         rq = i915_gem_request_alloc(engine, ctx);
213         if (IS_ERR(rq))
214                 return rq;
215
216         err = emit_recurse_batch(h, rq);
217         if (err) {
218                 __i915_add_request(rq, false);
219                 return ERR_PTR(err);
220         }
221
222         return rq;
223 }
224
225 static u32 hws_seqno(const struct hang *h,
226                      const struct drm_i915_gem_request *rq)
227 {
228         return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
229 }
230
231 static void hang_fini(struct hang *h)
232 {
233         *h->batch = MI_BATCH_BUFFER_END;
234         wmb();
235
236         i915_gem_object_unpin_map(h->obj);
237         i915_gem_object_put(h->obj);
238
239         i915_gem_object_unpin_map(h->hws);
240         i915_gem_object_put(h->hws);
241
242         i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
243 }
244
245 static int igt_hang_sanitycheck(void *arg)
246 {
247         struct drm_i915_private *i915 = arg;
248         struct drm_i915_gem_request *rq;
249         struct intel_engine_cs *engine;
250         enum intel_engine_id id;
251         struct hang h;
252         int err;
253
254         /* Basic check that we can execute our hanging batch */
255
256         mutex_lock(&i915->drm.struct_mutex);
257         err = hang_init(&h, i915);
258         if (err)
259                 goto unlock;
260
261         for_each_engine(engine, i915, id) {
262                 long timeout;
263
264                 if (!intel_engine_can_store_dword(engine))
265                         continue;
266
267                 rq = hang_create_request(&h, engine, i915->kernel_context);
268                 if (IS_ERR(rq)) {
269                         err = PTR_ERR(rq);
270                         pr_err("Failed to create request for %s, err=%d\n",
271                                engine->name, err);
272                         goto fini;
273                 }
274
275                 i915_gem_request_get(rq);
276
277                 *h.batch = MI_BATCH_BUFFER_END;
278                 __i915_add_request(rq, true);
279
280                 timeout = i915_wait_request(rq,
281                                             I915_WAIT_LOCKED,
282                                             MAX_SCHEDULE_TIMEOUT);
283                 i915_gem_request_put(rq);
284
285                 if (timeout < 0) {
286                         err = timeout;
287                         pr_err("Wait for request failed on %s, err=%d\n",
288                                engine->name, err);
289                         goto fini;
290                 }
291         }
292
293 fini:
294         hang_fini(&h);
295 unlock:
296         mutex_unlock(&i915->drm.struct_mutex);
297         return err;
298 }
299
300 static void global_reset_lock(struct drm_i915_private *i915)
301 {
302         struct intel_engine_cs *engine;
303         enum intel_engine_id id;
304
305         while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
306                 wait_event(i915->gpu_error.reset_queue,
307                            !test_bit(I915_RESET_BACKOFF,
308                                      &i915->gpu_error.flags));
309
310         for_each_engine(engine, i915, id) {
311                 while (test_and_set_bit(I915_RESET_ENGINE + id,
312                                         &i915->gpu_error.flags))
313                         wait_on_bit(&i915->gpu_error.flags,
314                                     I915_RESET_ENGINE + id,
315                                     TASK_UNINTERRUPTIBLE);
316         }
317 }
318
319 static void global_reset_unlock(struct drm_i915_private *i915)
320 {
321         struct intel_engine_cs *engine;
322         enum intel_engine_id id;
323
324         for_each_engine(engine, i915, id)
325                 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
326
327         clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
328         wake_up_all(&i915->gpu_error.reset_queue);
329 }
330
331 static int igt_global_reset(void *arg)
332 {
333         struct drm_i915_private *i915 = arg;
334         unsigned int reset_count;
335         int err = 0;
336
337         /* Check that we can issue a global GPU reset */
338
339         global_reset_lock(i915);
340         set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
341
342         mutex_lock(&i915->drm.struct_mutex);
343         reset_count = i915_reset_count(&i915->gpu_error);
344
345         i915_reset(i915, I915_RESET_QUIET);
346
347         if (i915_reset_count(&i915->gpu_error) == reset_count) {
348                 pr_err("No GPU reset recorded!\n");
349                 err = -EINVAL;
350         }
351         mutex_unlock(&i915->drm.struct_mutex);
352
353         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
354         global_reset_unlock(i915);
355
356         if (i915_terminally_wedged(&i915->gpu_error))
357                 err = -EIO;
358
359         return err;
360 }
361
362 static int igt_reset_engine(void *arg)
363 {
364         struct drm_i915_private *i915 = arg;
365         struct intel_engine_cs *engine;
366         enum intel_engine_id id;
367         unsigned int reset_count, reset_engine_count;
368         int err = 0;
369
370         /* Check that we can issue a global GPU and engine reset */
371
372         if (!intel_has_reset_engine(i915))
373                 return 0;
374
375         for_each_engine(engine, i915, id) {
376                 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
377                 reset_count = i915_reset_count(&i915->gpu_error);
378                 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
379                                                              engine);
380
381                 err = i915_reset_engine(engine, I915_RESET_QUIET);
382                 if (err) {
383                         pr_err("i915_reset_engine failed\n");
384                         break;
385                 }
386
387                 if (i915_reset_count(&i915->gpu_error) != reset_count) {
388                         pr_err("Full GPU reset recorded! (engine reset expected)\n");
389                         err = -EINVAL;
390                         break;
391                 }
392
393                 if (i915_reset_engine_count(&i915->gpu_error, engine) ==
394                     reset_engine_count) {
395                         pr_err("No %s engine reset recorded!\n", engine->name);
396                         err = -EINVAL;
397                         break;
398                 }
399
400                 clear_bit(I915_RESET_ENGINE + engine->id,
401                           &i915->gpu_error.flags);
402         }
403
404         if (i915_terminally_wedged(&i915->gpu_error))
405                 err = -EIO;
406
407         return err;
408 }
409
410 static int active_engine(void *data)
411 {
412         struct intel_engine_cs *engine = data;
413         struct drm_i915_gem_request *rq[2] = {};
414         struct i915_gem_context *ctx[2];
415         struct drm_file *file;
416         unsigned long count = 0;
417         int err = 0;
418
419         file = mock_file(engine->i915);
420         if (IS_ERR(file))
421                 return PTR_ERR(file);
422
423         mutex_lock(&engine->i915->drm.struct_mutex);
424         ctx[0] = live_context(engine->i915, file);
425         mutex_unlock(&engine->i915->drm.struct_mutex);
426         if (IS_ERR(ctx[0])) {
427                 err = PTR_ERR(ctx[0]);
428                 goto err_file;
429         }
430
431         mutex_lock(&engine->i915->drm.struct_mutex);
432         ctx[1] = live_context(engine->i915, file);
433         mutex_unlock(&engine->i915->drm.struct_mutex);
434         if (IS_ERR(ctx[1])) {
435                 err = PTR_ERR(ctx[1]);
436                 i915_gem_context_put(ctx[0]);
437                 goto err_file;
438         }
439
440         while (!kthread_should_stop()) {
441                 unsigned int idx = count++ & 1;
442                 struct drm_i915_gem_request *old = rq[idx];
443                 struct drm_i915_gem_request *new;
444
445                 mutex_lock(&engine->i915->drm.struct_mutex);
446                 new = i915_gem_request_alloc(engine, ctx[idx]);
447                 if (IS_ERR(new)) {
448                         mutex_unlock(&engine->i915->drm.struct_mutex);
449                         err = PTR_ERR(new);
450                         break;
451                 }
452
453                 rq[idx] = i915_gem_request_get(new);
454                 i915_add_request(new);
455                 mutex_unlock(&engine->i915->drm.struct_mutex);
456
457                 if (old) {
458                         i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
459                         i915_gem_request_put(old);
460                 }
461         }
462
463         for (count = 0; count < ARRAY_SIZE(rq); count++)
464                 i915_gem_request_put(rq[count]);
465
466 err_file:
467         mock_file_free(engine->i915, file);
468         return err;
469 }
470
471 static int igt_reset_active_engines(void *arg)
472 {
473         struct drm_i915_private *i915 = arg;
474         struct intel_engine_cs *engine, *active;
475         enum intel_engine_id id, tmp;
476         int err = 0;
477
478         /* Check that issuing a reset on one engine does not interfere
479          * with any other engine.
480          */
481
482         if (!intel_has_reset_engine(i915))
483                 return 0;
484
485         for_each_engine(engine, i915, id) {
486                 struct task_struct *threads[I915_NUM_ENGINES];
487                 unsigned long resets[I915_NUM_ENGINES];
488                 unsigned long global = i915_reset_count(&i915->gpu_error);
489                 IGT_TIMEOUT(end_time);
490
491                 memset(threads, 0, sizeof(threads));
492                 for_each_engine(active, i915, tmp) {
493                         struct task_struct *tsk;
494
495                         if (active == engine)
496                                 continue;
497
498                         resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
499                                                               active);
500
501                         tsk = kthread_run(active_engine, active,
502                                           "igt/%s", active->name);
503                         if (IS_ERR(tsk)) {
504                                 err = PTR_ERR(tsk);
505                                 goto unwind;
506                         }
507
508                         threads[tmp] = tsk;
509                         get_task_struct(tsk);
510                 }
511
512                 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
513                 do {
514                         err = i915_reset_engine(engine, I915_RESET_QUIET);
515                         if (err) {
516                                 pr_err("i915_reset_engine(%s) failed, err=%d\n",
517                                        engine->name, err);
518                                 break;
519                         }
520                 } while (time_before(jiffies, end_time));
521                 clear_bit(I915_RESET_ENGINE + engine->id,
522                           &i915->gpu_error.flags);
523
524 unwind:
525                 for_each_engine(active, i915, tmp) {
526                         int ret;
527
528                         if (!threads[tmp])
529                                 continue;
530
531                         ret = kthread_stop(threads[tmp]);
532                         if (ret) {
533                                 pr_err("kthread for active engine %s failed, err=%d\n",
534                                        active->name, ret);
535                                 if (!err)
536                                         err = ret;
537                         }
538                         put_task_struct(threads[tmp]);
539
540                         if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
541                                                                    active)) {
542                                 pr_err("Innocent engine %s was reset (count=%ld)\n",
543                                        active->name,
544                                        i915_reset_engine_count(&i915->gpu_error,
545                                                                active) - resets[tmp]);
546                                 err = -EIO;
547                         }
548                 }
549
550                 if (global != i915_reset_count(&i915->gpu_error)) {
551                         pr_err("Global reset (count=%ld)!\n",
552                                i915_reset_count(&i915->gpu_error) - global);
553                         err = -EIO;
554                 }
555
556                 if (err)
557                         break;
558
559                 cond_resched();
560         }
561
562         if (i915_terminally_wedged(&i915->gpu_error))
563                 err = -EIO;
564
565         return err;
566 }
567
568 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
569 {
570         u32 reset_count;
571
572         rq->engine->hangcheck.stalled = true;
573         rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
574
575         reset_count = i915_reset_count(&rq->i915->gpu_error);
576
577         set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
578         wake_up_all(&rq->i915->gpu_error.wait_queue);
579
580         return reset_count;
581 }
582
583 static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
584 {
585         return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
586                                                rq->fence.seqno),
587                              10) &&
588                  wait_for(i915_seqno_passed(hws_seqno(h, rq),
589                                             rq->fence.seqno),
590                           1000));
591 }
592
593 static int igt_wait_reset(void *arg)
594 {
595         struct drm_i915_private *i915 = arg;
596         struct drm_i915_gem_request *rq;
597         unsigned int reset_count;
598         struct hang h;
599         long timeout;
600         int err;
601
602         if (!intel_engine_can_store_dword(i915->engine[RCS]))
603                 return 0;
604
605         /* Check that we detect a stuck waiter and issue a reset */
606
607         global_reset_lock(i915);
608
609         mutex_lock(&i915->drm.struct_mutex);
610         err = hang_init(&h, i915);
611         if (err)
612                 goto unlock;
613
614         rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
615         if (IS_ERR(rq)) {
616                 err = PTR_ERR(rq);
617                 goto fini;
618         }
619
620         i915_gem_request_get(rq);
621         __i915_add_request(rq, true);
622
623         if (!wait_for_hang(&h, rq)) {
624                 pr_err("Failed to start request %x\n", rq->fence.seqno);
625                 err = -EIO;
626                 goto out_rq;
627         }
628
629         reset_count = fake_hangcheck(rq);
630
631         timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
632         if (timeout < 0) {
633                 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
634                        timeout);
635                 err = timeout;
636                 goto out_rq;
637         }
638
639         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
640         if (i915_reset_count(&i915->gpu_error) == reset_count) {
641                 pr_err("No GPU reset recorded!\n");
642                 err = -EINVAL;
643                 goto out_rq;
644         }
645
646 out_rq:
647         i915_gem_request_put(rq);
648 fini:
649         hang_fini(&h);
650 unlock:
651         mutex_unlock(&i915->drm.struct_mutex);
652         global_reset_unlock(i915);
653
654         if (i915_terminally_wedged(&i915->gpu_error))
655                 return -EIO;
656
657         return err;
658 }
659
660 static int igt_reset_queue(void *arg)
661 {
662         struct drm_i915_private *i915 = arg;
663         struct intel_engine_cs *engine;
664         enum intel_engine_id id;
665         struct hang h;
666         int err;
667
668         /* Check that we replay pending requests following a hang */
669
670         global_reset_lock(i915);
671
672         mutex_lock(&i915->drm.struct_mutex);
673         err = hang_init(&h, i915);
674         if (err)
675                 goto unlock;
676
677         for_each_engine(engine, i915, id) {
678                 struct drm_i915_gem_request *prev;
679                 IGT_TIMEOUT(end_time);
680                 unsigned int count;
681
682                 if (!intel_engine_can_store_dword(engine))
683                         continue;
684
685                 prev = hang_create_request(&h, engine, i915->kernel_context);
686                 if (IS_ERR(prev)) {
687                         err = PTR_ERR(prev);
688                         goto fini;
689                 }
690
691                 i915_gem_request_get(prev);
692                 __i915_add_request(prev, true);
693
694                 count = 0;
695                 do {
696                         struct drm_i915_gem_request *rq;
697                         unsigned int reset_count;
698
699                         rq = hang_create_request(&h,
700                                                  engine,
701                                                  i915->kernel_context);
702                         if (IS_ERR(rq)) {
703                                 err = PTR_ERR(rq);
704                                 goto fini;
705                         }
706
707                         i915_gem_request_get(rq);
708                         __i915_add_request(rq, true);
709
710                         if (!wait_for_hang(&h, prev)) {
711                                 pr_err("Failed to start request %x\n",
712                                        prev->fence.seqno);
713                                 i915_gem_request_put(rq);
714                                 i915_gem_request_put(prev);
715                                 err = -EIO;
716                                 goto fini;
717                         }
718
719                         reset_count = fake_hangcheck(prev);
720
721                         i915_reset(i915, I915_RESET_QUIET);
722
723                         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
724                                             &i915->gpu_error.flags));
725
726                         if (prev->fence.error != -EIO) {
727                                 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
728                                        prev->fence.error);
729                                 i915_gem_request_put(rq);
730                                 i915_gem_request_put(prev);
731                                 err = -EINVAL;
732                                 goto fini;
733                         }
734
735                         if (rq->fence.error) {
736                                 pr_err("Fence error status not zero [%d] after unrelated reset\n",
737                                        rq->fence.error);
738                                 i915_gem_request_put(rq);
739                                 i915_gem_request_put(prev);
740                                 err = -EINVAL;
741                                 goto fini;
742                         }
743
744                         if (i915_reset_count(&i915->gpu_error) == reset_count) {
745                                 pr_err("No GPU reset recorded!\n");
746                                 i915_gem_request_put(rq);
747                                 i915_gem_request_put(prev);
748                                 err = -EINVAL;
749                                 goto fini;
750                         }
751
752                         i915_gem_request_put(prev);
753                         prev = rq;
754                         count++;
755                 } while (time_before(jiffies, end_time));
756                 pr_info("%s: Completed %d resets\n", engine->name, count);
757
758                 *h.batch = MI_BATCH_BUFFER_END;
759                 wmb();
760
761                 i915_gem_request_put(prev);
762         }
763
764 fini:
765         hang_fini(&h);
766 unlock:
767         mutex_unlock(&i915->drm.struct_mutex);
768         global_reset_unlock(i915);
769
770         if (i915_terminally_wedged(&i915->gpu_error))
771                 return -EIO;
772
773         return err;
774 }
775
776 static int igt_handle_error(void *arg)
777 {
778         struct drm_i915_private *i915 = arg;
779         struct intel_engine_cs *engine = i915->engine[RCS];
780         struct hang h;
781         struct drm_i915_gem_request *rq;
782         struct i915_gpu_state *error;
783         int err;
784
785         /* Check that we can issue a global GPU and engine reset */
786
787         if (!intel_has_reset_engine(i915))
788                 return 0;
789
790         if (!intel_engine_can_store_dword(i915->engine[RCS]))
791                 return 0;
792
793         mutex_lock(&i915->drm.struct_mutex);
794
795         err = hang_init(&h, i915);
796         if (err)
797                 goto err_unlock;
798
799         rq = hang_create_request(&h, engine, i915->kernel_context);
800         if (IS_ERR(rq)) {
801                 err = PTR_ERR(rq);
802                 goto err_fini;
803         }
804
805         i915_gem_request_get(rq);
806         __i915_add_request(rq, true);
807
808         if (!wait_for_hang(&h, rq)) {
809                 pr_err("Failed to start request %x\n", rq->fence.seqno);
810                 err = -EIO;
811                 goto err_request;
812         }
813
814         mutex_unlock(&i915->drm.struct_mutex);
815
816         /* Temporarily disable error capture */
817         error = xchg(&i915->gpu_error.first_error, (void *)-1);
818
819         engine->hangcheck.stalled = true;
820         engine->hangcheck.seqno = intel_engine_get_seqno(engine);
821
822         i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
823
824         xchg(&i915->gpu_error.first_error, error);
825
826         mutex_lock(&i915->drm.struct_mutex);
827
828         if (rq->fence.error != -EIO) {
829                 pr_err("Guilty request not identified!\n");
830                 err = -EINVAL;
831                 goto err_request;
832         }
833
834 err_request:
835         i915_gem_request_put(rq);
836 err_fini:
837         hang_fini(&h);
838 err_unlock:
839         mutex_unlock(&i915->drm.struct_mutex);
840         return err;
841 }
842
843 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
844 {
845         static const struct i915_subtest tests[] = {
846                 SUBTEST(igt_hang_sanitycheck),
847                 SUBTEST(igt_global_reset),
848                 SUBTEST(igt_reset_engine),
849                 SUBTEST(igt_reset_active_engines),
850                 SUBTEST(igt_wait_reset),
851                 SUBTEST(igt_reset_queue),
852                 SUBTEST(igt_handle_error),
853         };
854
855         if (!intel_has_gpu_reset(i915))
856                 return 0;
857
858         return i915_subtests(tests, i915);
859 }