Merge tag 'dmaengine-5.1-rc1' of git://git.infradead.org/users/vkoul/slave-dma
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_reset.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008-2018 Intel Corporation
5  */
6
7 #include <linux/sched/mm.h>
8 #include <linux/stop_machine.h>
9
10 #include "i915_drv.h"
11 #include "i915_gpu_error.h"
12 #include "i915_reset.h"
13
14 #include "intel_guc.h"
15
16 #define RESET_MAX_RETRIES 3
17
18 /* XXX How to handle concurrent GGTT updates using tiling registers? */
19 #define RESET_UNDER_STOP_MACHINE 0
20
21 static void engine_skip_context(struct i915_request *rq)
22 {
23         struct intel_engine_cs *engine = rq->engine;
24         struct i915_gem_context *hung_ctx = rq->gem_context;
25         struct i915_timeline *timeline = rq->timeline;
26
27         lockdep_assert_held(&engine->timeline.lock);
28         GEM_BUG_ON(timeline == &engine->timeline);
29
30         spin_lock(&timeline->lock);
31
32         if (i915_request_is_active(rq)) {
33                 list_for_each_entry_continue(rq,
34                                              &engine->timeline.requests, link)
35                         if (rq->gem_context == hung_ctx)
36                                 i915_request_skip(rq, -EIO);
37         }
38
39         list_for_each_entry(rq, &timeline->requests, link)
40                 i915_request_skip(rq, -EIO);
41
42         spin_unlock(&timeline->lock);
43 }
44
45 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
46                                const struct i915_gem_context *ctx)
47 {
48         unsigned int score;
49         unsigned long prev_hang;
50
51         if (i915_gem_context_is_banned(ctx))
52                 score = I915_CLIENT_SCORE_CONTEXT_BAN;
53         else
54                 score = 0;
55
56         prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
57         if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
58                 score += I915_CLIENT_SCORE_HANG_FAST;
59
60         if (score) {
61                 atomic_add(score, &file_priv->ban_score);
62
63                 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
64                                  ctx->name, score,
65                                  atomic_read(&file_priv->ban_score));
66         }
67 }
68
69 static bool context_mark_guilty(struct i915_gem_context *ctx)
70 {
71         unsigned int score;
72         bool banned, bannable;
73
74         atomic_inc(&ctx->guilty_count);
75
76         bannable = i915_gem_context_is_bannable(ctx);
77         score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
78         banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
79
80         /* Cool contexts don't accumulate client ban score */
81         if (!bannable)
82                 return false;
83
84         if (banned) {
85                 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
86                                  ctx->name, atomic_read(&ctx->guilty_count),
87                                  score);
88                 i915_gem_context_set_banned(ctx);
89         }
90
91         if (!IS_ERR_OR_NULL(ctx->file_priv))
92                 client_mark_guilty(ctx->file_priv, ctx);
93
94         return banned;
95 }
96
97 static void context_mark_innocent(struct i915_gem_context *ctx)
98 {
99         atomic_inc(&ctx->active_count);
100 }
101
102 void i915_reset_request(struct i915_request *rq, bool guilty)
103 {
104         lockdep_assert_held(&rq->engine->timeline.lock);
105         GEM_BUG_ON(i915_request_completed(rq));
106
107         if (guilty) {
108                 i915_request_skip(rq, -EIO);
109                 if (context_mark_guilty(rq->gem_context))
110                         engine_skip_context(rq);
111         } else {
112                 dma_fence_set_error(&rq->fence, -EAGAIN);
113                 context_mark_innocent(rq->gem_context);
114         }
115 }
116
117 static void gen3_stop_engine(struct intel_engine_cs *engine)
118 {
119         struct drm_i915_private *dev_priv = engine->i915;
120         const u32 base = engine->mmio_base;
121
122         if (intel_engine_stop_cs(engine))
123                 DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name);
124
125         I915_WRITE_FW(RING_HEAD(base), I915_READ_FW(RING_TAIL(base)));
126         POSTING_READ_FW(RING_HEAD(base)); /* paranoia */
127
128         I915_WRITE_FW(RING_HEAD(base), 0);
129         I915_WRITE_FW(RING_TAIL(base), 0);
130         POSTING_READ_FW(RING_TAIL(base));
131
132         /* The ring must be empty before it is disabled */
133         I915_WRITE_FW(RING_CTL(base), 0);
134
135         /* Check acts as a post */
136         if (I915_READ_FW(RING_HEAD(base)) != 0)
137                 DRM_DEBUG_DRIVER("%s: ring head not parked\n",
138                                  engine->name);
139 }
140
141 static void i915_stop_engines(struct drm_i915_private *i915,
142                               unsigned int engine_mask)
143 {
144         struct intel_engine_cs *engine;
145         enum intel_engine_id id;
146
147         if (INTEL_GEN(i915) < 3)
148                 return;
149
150         for_each_engine_masked(engine, i915, engine_mask, id)
151                 gen3_stop_engine(engine);
152 }
153
154 static bool i915_in_reset(struct pci_dev *pdev)
155 {
156         u8 gdrst;
157
158         pci_read_config_byte(pdev, I915_GDRST, &gdrst);
159         return gdrst & GRDOM_RESET_STATUS;
160 }
161
162 static int i915_do_reset(struct drm_i915_private *i915,
163                          unsigned int engine_mask,
164                          unsigned int retry)
165 {
166         struct pci_dev *pdev = i915->drm.pdev;
167         int err;
168
169         /* Assert reset for at least 20 usec, and wait for acknowledgement. */
170         pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
171         udelay(50);
172         err = wait_for_atomic(i915_in_reset(pdev), 50);
173
174         /* Clear the reset request. */
175         pci_write_config_byte(pdev, I915_GDRST, 0);
176         udelay(50);
177         if (!err)
178                 err = wait_for_atomic(!i915_in_reset(pdev), 50);
179
180         return err;
181 }
182
183 static bool g4x_reset_complete(struct pci_dev *pdev)
184 {
185         u8 gdrst;
186
187         pci_read_config_byte(pdev, I915_GDRST, &gdrst);
188         return (gdrst & GRDOM_RESET_ENABLE) == 0;
189 }
190
191 static int g33_do_reset(struct drm_i915_private *i915,
192                         unsigned int engine_mask,
193                         unsigned int retry)
194 {
195         struct pci_dev *pdev = i915->drm.pdev;
196
197         pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
198         return wait_for_atomic(g4x_reset_complete(pdev), 50);
199 }
200
201 static int g4x_do_reset(struct drm_i915_private *dev_priv,
202                         unsigned int engine_mask,
203                         unsigned int retry)
204 {
205         struct pci_dev *pdev = dev_priv->drm.pdev;
206         int ret;
207
208         /* WaVcpClkGateDisableForMediaReset:ctg,elk */
209         I915_WRITE_FW(VDECCLK_GATE_D,
210                       I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
211         POSTING_READ_FW(VDECCLK_GATE_D);
212
213         pci_write_config_byte(pdev, I915_GDRST,
214                               GRDOM_MEDIA | GRDOM_RESET_ENABLE);
215         ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
216         if (ret) {
217                 DRM_DEBUG_DRIVER("Wait for media reset failed\n");
218                 goto out;
219         }
220
221         pci_write_config_byte(pdev, I915_GDRST,
222                               GRDOM_RENDER | GRDOM_RESET_ENABLE);
223         ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
224         if (ret) {
225                 DRM_DEBUG_DRIVER("Wait for render reset failed\n");
226                 goto out;
227         }
228
229 out:
230         pci_write_config_byte(pdev, I915_GDRST, 0);
231
232         I915_WRITE_FW(VDECCLK_GATE_D,
233                       I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
234         POSTING_READ_FW(VDECCLK_GATE_D);
235
236         return ret;
237 }
238
239 static int ironlake_do_reset(struct drm_i915_private *dev_priv,
240                              unsigned int engine_mask,
241                              unsigned int retry)
242 {
243         int ret;
244
245         I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
246         ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
247                                            ILK_GRDOM_RESET_ENABLE, 0,
248                                            5000, 0,
249                                            NULL);
250         if (ret) {
251                 DRM_DEBUG_DRIVER("Wait for render reset failed\n");
252                 goto out;
253         }
254
255         I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
256         ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR,
257                                            ILK_GRDOM_RESET_ENABLE, 0,
258                                            5000, 0,
259                                            NULL);
260         if (ret) {
261                 DRM_DEBUG_DRIVER("Wait for media reset failed\n");
262                 goto out;
263         }
264
265 out:
266         I915_WRITE_FW(ILK_GDSR, 0);
267         POSTING_READ_FW(ILK_GDSR);
268         return ret;
269 }
270
271 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
272 static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
273                                 u32 hw_domain_mask)
274 {
275         int err;
276
277         /*
278          * GEN6_GDRST is not in the gt power well, no need to check
279          * for fifo space for the write or forcewake the chip for
280          * the read
281          */
282         I915_WRITE_FW(GEN6_GDRST, hw_domain_mask);
283
284         /* Wait for the device to ack the reset requests */
285         err = __intel_wait_for_register_fw(dev_priv,
286                                            GEN6_GDRST, hw_domain_mask, 0,
287                                            500, 0,
288                                            NULL);
289         if (err)
290                 DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
291                                  hw_domain_mask);
292
293         return err;
294 }
295
296 static int gen6_reset_engines(struct drm_i915_private *i915,
297                               unsigned int engine_mask,
298                               unsigned int retry)
299 {
300         struct intel_engine_cs *engine;
301         const u32 hw_engine_mask[I915_NUM_ENGINES] = {
302                 [RCS] = GEN6_GRDOM_RENDER,
303                 [BCS] = GEN6_GRDOM_BLT,
304                 [VCS] = GEN6_GRDOM_MEDIA,
305                 [VCS2] = GEN8_GRDOM_MEDIA2,
306                 [VECS] = GEN6_GRDOM_VECS,
307         };
308         u32 hw_mask;
309
310         if (engine_mask == ALL_ENGINES) {
311                 hw_mask = GEN6_GRDOM_FULL;
312         } else {
313                 unsigned int tmp;
314
315                 hw_mask = 0;
316                 for_each_engine_masked(engine, i915, engine_mask, tmp)
317                         hw_mask |= hw_engine_mask[engine->id];
318         }
319
320         return gen6_hw_domain_reset(i915, hw_mask);
321 }
322
323 static u32 gen11_lock_sfc(struct drm_i915_private *dev_priv,
324                           struct intel_engine_cs *engine)
325 {
326         u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
327         i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
328         u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
329         i915_reg_t sfc_usage;
330         u32 sfc_usage_bit;
331         u32 sfc_reset_bit;
332
333         switch (engine->class) {
334         case VIDEO_DECODE_CLASS:
335                 if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
336                         return 0;
337
338                 sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
339                 sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
340
341                 sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
342                 sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
343
344                 sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
345                 sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
346                 sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
347                 break;
348
349         case VIDEO_ENHANCEMENT_CLASS:
350                 sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
351                 sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
352
353                 sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
354                 sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
355
356                 sfc_usage = GEN11_VECS_SFC_USAGE(engine);
357                 sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
358                 sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
359                 break;
360
361         default:
362                 return 0;
363         }
364
365         /*
366          * Tell the engine that a software reset is going to happen. The engine
367          * will then try to force lock the SFC (if currently locked, it will
368          * remain so until we tell the engine it is safe to unlock; if currently
369          * unlocked, it will ignore this and all new lock requests). If SFC
370          * ends up being locked to the engine we want to reset, we have to reset
371          * it as well (we will unlock it once the reset sequence is completed).
372          */
373         I915_WRITE_FW(sfc_forced_lock,
374                       I915_READ_FW(sfc_forced_lock) | sfc_forced_lock_bit);
375
376         if (__intel_wait_for_register_fw(dev_priv,
377                                          sfc_forced_lock_ack,
378                                          sfc_forced_lock_ack_bit,
379                                          sfc_forced_lock_ack_bit,
380                                          1000, 0, NULL)) {
381                 DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
382                 return 0;
383         }
384
385         if (I915_READ_FW(sfc_usage) & sfc_usage_bit)
386                 return sfc_reset_bit;
387
388         return 0;
389 }
390
391 static void gen11_unlock_sfc(struct drm_i915_private *dev_priv,
392                              struct intel_engine_cs *engine)
393 {
394         u8 vdbox_sfc_access = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
395         i915_reg_t sfc_forced_lock;
396         u32 sfc_forced_lock_bit;
397
398         switch (engine->class) {
399         case VIDEO_DECODE_CLASS:
400                 if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
401                         return;
402
403                 sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
404                 sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
405                 break;
406
407         case VIDEO_ENHANCEMENT_CLASS:
408                 sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
409                 sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
410                 break;
411
412         default:
413                 return;
414         }
415
416         I915_WRITE_FW(sfc_forced_lock,
417                       I915_READ_FW(sfc_forced_lock) & ~sfc_forced_lock_bit);
418 }
419
420 static int gen11_reset_engines(struct drm_i915_private *i915,
421                                unsigned int engine_mask,
422                                unsigned int retry)
423 {
424         const u32 hw_engine_mask[I915_NUM_ENGINES] = {
425                 [RCS] = GEN11_GRDOM_RENDER,
426                 [BCS] = GEN11_GRDOM_BLT,
427                 [VCS] = GEN11_GRDOM_MEDIA,
428                 [VCS2] = GEN11_GRDOM_MEDIA2,
429                 [VCS3] = GEN11_GRDOM_MEDIA3,
430                 [VCS4] = GEN11_GRDOM_MEDIA4,
431                 [VECS] = GEN11_GRDOM_VECS,
432                 [VECS2] = GEN11_GRDOM_VECS2,
433         };
434         struct intel_engine_cs *engine;
435         unsigned int tmp;
436         u32 hw_mask;
437         int ret;
438
439         BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
440
441         if (engine_mask == ALL_ENGINES) {
442                 hw_mask = GEN11_GRDOM_FULL;
443         } else {
444                 hw_mask = 0;
445                 for_each_engine_masked(engine, i915, engine_mask, tmp) {
446                         hw_mask |= hw_engine_mask[engine->id];
447                         hw_mask |= gen11_lock_sfc(i915, engine);
448                 }
449         }
450
451         ret = gen6_hw_domain_reset(i915, hw_mask);
452
453         if (engine_mask != ALL_ENGINES)
454                 for_each_engine_masked(engine, i915, engine_mask, tmp)
455                         gen11_unlock_sfc(i915, engine);
456
457         return ret;
458 }
459
460 static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
461 {
462         struct drm_i915_private *dev_priv = engine->i915;
463         int ret;
464
465         I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
466                       _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
467
468         ret = __intel_wait_for_register_fw(dev_priv,
469                                            RING_RESET_CTL(engine->mmio_base),
470                                            RESET_CTL_READY_TO_RESET,
471                                            RESET_CTL_READY_TO_RESET,
472                                            700, 0,
473                                            NULL);
474         if (ret)
475                 DRM_ERROR("%s: reset request timeout\n", engine->name);
476
477         return ret;
478 }
479
480 static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
481 {
482         struct drm_i915_private *dev_priv = engine->i915;
483
484         I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
485                       _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
486 }
487
488 static int gen8_reset_engines(struct drm_i915_private *i915,
489                               unsigned int engine_mask,
490                               unsigned int retry)
491 {
492         struct intel_engine_cs *engine;
493         const bool reset_non_ready = retry >= 1;
494         unsigned int tmp;
495         int ret;
496
497         for_each_engine_masked(engine, i915, engine_mask, tmp) {
498                 ret = gen8_engine_reset_prepare(engine);
499                 if (ret && !reset_non_ready)
500                         goto skip_reset;
501
502                 /*
503                  * If this is not the first failed attempt to prepare,
504                  * we decide to proceed anyway.
505                  *
506                  * By doing so we risk context corruption and with
507                  * some gens (kbl), possible system hang if reset
508                  * happens during active bb execution.
509                  *
510                  * We rather take context corruption instead of
511                  * failed reset with a wedged driver/gpu. And
512                  * active bb execution case should be covered by
513                  * i915_stop_engines we have before the reset.
514                  */
515         }
516
517         if (INTEL_GEN(i915) >= 11)
518                 ret = gen11_reset_engines(i915, engine_mask, retry);
519         else
520                 ret = gen6_reset_engines(i915, engine_mask, retry);
521
522 skip_reset:
523         for_each_engine_masked(engine, i915, engine_mask, tmp)
524                 gen8_engine_reset_cancel(engine);
525
526         return ret;
527 }
528
529 typedef int (*reset_func)(struct drm_i915_private *,
530                           unsigned int engine_mask,
531                           unsigned int retry);
532
533 static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
534 {
535         if (!i915_modparams.reset)
536                 return NULL;
537
538         if (INTEL_GEN(i915) >= 8)
539                 return gen8_reset_engines;
540         else if (INTEL_GEN(i915) >= 6)
541                 return gen6_reset_engines;
542         else if (INTEL_GEN(i915) >= 5)
543                 return ironlake_do_reset;
544         else if (IS_G4X(i915))
545                 return g4x_do_reset;
546         else if (IS_G33(i915) || IS_PINEVIEW(i915))
547                 return g33_do_reset;
548         else if (INTEL_GEN(i915) >= 3)
549                 return i915_do_reset;
550         else
551                 return NULL;
552 }
553
554 int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
555 {
556         const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
557         reset_func reset;
558         int ret = -ETIMEDOUT;
559         int retry;
560
561         reset = intel_get_gpu_reset(i915);
562         if (!reset)
563                 return -ENODEV;
564
565         /*
566          * If the power well sleeps during the reset, the reset
567          * request may be dropped and never completes (causing -EIO).
568          */
569         intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
570         for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
571                 /*
572                  * We stop engines, otherwise we might get failed reset and a
573                  * dead gpu (on elk). Also as modern gpu as kbl can suffer
574                  * from system hang if batchbuffer is progressing when
575                  * the reset is issued, regardless of READY_TO_RESET ack.
576                  * Thus assume it is best to stop engines on all gens
577                  * where we have a gpu reset.
578                  *
579                  * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
580                  *
581                  * WaMediaResetMainRingCleanup:ctg,elk (presumably)
582                  *
583                  * FIXME: Wa for more modern gens needs to be validated
584                  */
585                 i915_stop_engines(i915, engine_mask);
586
587                 GEM_TRACE("engine_mask=%x\n", engine_mask);
588                 preempt_disable();
589                 ret = reset(i915, engine_mask, retry);
590                 preempt_enable();
591         }
592         intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
593
594         return ret;
595 }
596
597 bool intel_has_gpu_reset(struct drm_i915_private *i915)
598 {
599         if (USES_GUC(i915))
600                 return false;
601
602         return intel_get_gpu_reset(i915);
603 }
604
605 bool intel_has_reset_engine(struct drm_i915_private *i915)
606 {
607         return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
608 }
609
610 int intel_reset_guc(struct drm_i915_private *i915)
611 {
612         u32 guc_domain =
613                 INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
614         int ret;
615
616         GEM_BUG_ON(!HAS_GUC(i915));
617
618         intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
619         ret = gen6_hw_domain_reset(i915, guc_domain);
620         intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
621
622         return ret;
623 }
624
625 /*
626  * Ensure irq handler finishes, and not run again.
627  * Also return the active request so that we only search for it once.
628  */
629 static void reset_prepare_engine(struct intel_engine_cs *engine)
630 {
631         /*
632          * During the reset sequence, we must prevent the engine from
633          * entering RC6. As the context state is undefined until we restart
634          * the engine, if it does enter RC6 during the reset, the state
635          * written to the powercontext is undefined and so we may lose
636          * GPU state upon resume, i.e. fail to restart after a reset.
637          */
638         intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
639         engine->reset.prepare(engine);
640 }
641
642 static void reset_prepare(struct drm_i915_private *i915)
643 {
644         struct intel_engine_cs *engine;
645         enum intel_engine_id id;
646
647         for_each_engine(engine, i915, id)
648                 reset_prepare_engine(engine);
649
650         intel_uc_sanitize(i915);
651 }
652
653 static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
654 {
655         struct intel_engine_cs *engine;
656         enum intel_engine_id id;
657         int err;
658
659         /*
660          * Everything depends on having the GTT running, so we need to start
661          * there.
662          */
663         err = i915_ggtt_enable_hw(i915);
664         if (err)
665                 return err;
666
667         for_each_engine(engine, i915, id)
668                 intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
669
670         i915_gem_restore_fences(i915);
671
672         return err;
673 }
674
675 static void reset_finish_engine(struct intel_engine_cs *engine)
676 {
677         engine->reset.finish(engine);
678         intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
679 }
680
681 struct i915_gpu_restart {
682         struct work_struct work;
683         struct drm_i915_private *i915;
684 };
685
686 static void restart_work(struct work_struct *work)
687 {
688         struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
689         struct drm_i915_private *i915 = arg->i915;
690         struct intel_engine_cs *engine;
691         enum intel_engine_id id;
692         intel_wakeref_t wakeref;
693
694         wakeref = intel_runtime_pm_get(i915);
695         mutex_lock(&i915->drm.struct_mutex);
696         WRITE_ONCE(i915->gpu_error.restart, NULL);
697
698         for_each_engine(engine, i915, id) {
699                 struct i915_request *rq;
700
701                 /*
702                  * Ostensibily, we always want a context loaded for powersaving,
703                  * so if the engine is idle after the reset, send a request
704                  * to load our scratch kernel_context.
705                  */
706                 if (!intel_engine_is_idle(engine))
707                         continue;
708
709                 rq = i915_request_alloc(engine, i915->kernel_context);
710                 if (!IS_ERR(rq))
711                         i915_request_add(rq);
712         }
713
714         mutex_unlock(&i915->drm.struct_mutex);
715         intel_runtime_pm_put(i915, wakeref);
716
717         kfree(arg);
718 }
719
720 static void reset_finish(struct drm_i915_private *i915)
721 {
722         struct intel_engine_cs *engine;
723         enum intel_engine_id id;
724
725         for_each_engine(engine, i915, id)
726                 reset_finish_engine(engine);
727 }
728
729 static void reset_restart(struct drm_i915_private *i915)
730 {
731         struct i915_gpu_restart *arg;
732
733         /*
734          * Following the reset, ensure that we always reload context for
735          * powersaving, and to correct engine->last_retired_context. Since
736          * this requires us to submit a request, queue a worker to do that
737          * task for us to evade any locking here.
738          */
739         if (READ_ONCE(i915->gpu_error.restart))
740                 return;
741
742         arg = kmalloc(sizeof(*arg), GFP_KERNEL);
743         if (arg) {
744                 arg->i915 = i915;
745                 INIT_WORK(&arg->work, restart_work);
746
747                 WRITE_ONCE(i915->gpu_error.restart, arg);
748                 queue_work(i915->wq, &arg->work);
749         }
750 }
751
752 static void nop_submit_request(struct i915_request *request)
753 {
754         struct intel_engine_cs *engine = request->engine;
755         unsigned long flags;
756
757         GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
758                   engine->name, request->fence.context, request->fence.seqno);
759         dma_fence_set_error(&request->fence, -EIO);
760
761         spin_lock_irqsave(&engine->timeline.lock, flags);
762         __i915_request_submit(request);
763         i915_request_mark_complete(request);
764         intel_engine_write_global_seqno(engine, request->global_seqno);
765         spin_unlock_irqrestore(&engine->timeline.lock, flags);
766
767         intel_engine_queue_breadcrumbs(engine);
768 }
769
770 void i915_gem_set_wedged(struct drm_i915_private *i915)
771 {
772         struct i915_gpu_error *error = &i915->gpu_error;
773         struct intel_engine_cs *engine;
774         enum intel_engine_id id;
775
776         mutex_lock(&error->wedge_mutex);
777         if (test_bit(I915_WEDGED, &error->flags)) {
778                 mutex_unlock(&error->wedge_mutex);
779                 return;
780         }
781
782         if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
783                 struct drm_printer p = drm_debug_printer(__func__);
784
785                 for_each_engine(engine, i915, id)
786                         intel_engine_dump(engine, &p, "%s\n", engine->name);
787         }
788
789         GEM_TRACE("start\n");
790
791         /*
792          * First, stop submission to hw, but do not yet complete requests by
793          * rolling the global seqno forward (since this would complete requests
794          * for which we haven't set the fence error to EIO yet).
795          */
796         for_each_engine(engine, i915, id)
797                 reset_prepare_engine(engine);
798
799         /* Even if the GPU reset fails, it should still stop the engines */
800         if (INTEL_GEN(i915) >= 5)
801                 intel_gpu_reset(i915, ALL_ENGINES);
802
803         for_each_engine(engine, i915, id) {
804                 engine->submit_request = nop_submit_request;
805                 engine->schedule = NULL;
806         }
807         i915->caps.scheduler = 0;
808
809         /*
810          * Make sure no request can slip through without getting completed by
811          * either this call here to intel_engine_write_global_seqno, or the one
812          * in nop_submit_request.
813          */
814         synchronize_rcu();
815
816         /* Mark all executing requests as skipped */
817         for_each_engine(engine, i915, id)
818                 engine->cancel_requests(engine);
819
820         for_each_engine(engine, i915, id) {
821                 reset_finish_engine(engine);
822                 intel_engine_signal_breadcrumbs(engine);
823         }
824
825         smp_mb__before_atomic();
826         set_bit(I915_WEDGED, &error->flags);
827
828         GEM_TRACE("end\n");
829         mutex_unlock(&error->wedge_mutex);
830
831         wake_up_all(&error->reset_queue);
832 }
833
834 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
835 {
836         struct i915_gpu_error *error = &i915->gpu_error;
837         struct i915_timeline *tl;
838         bool ret = false;
839
840         if (!test_bit(I915_WEDGED, &error->flags))
841                 return true;
842
843         if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
844                 return false;
845
846         mutex_lock(&error->wedge_mutex);
847
848         GEM_TRACE("start\n");
849
850         /*
851          * Before unwedging, make sure that all pending operations
852          * are flushed and errored out - we may have requests waiting upon
853          * third party fences. We marked all inflight requests as EIO, and
854          * every execbuf since returned EIO, for consistency we want all
855          * the currently pending requests to also be marked as EIO, which
856          * is done inside our nop_submit_request - and so we must wait.
857          *
858          * No more can be submitted until we reset the wedged bit.
859          */
860         mutex_lock(&i915->gt.timelines.mutex);
861         list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
862                 struct i915_request *rq;
863                 long timeout;
864
865                 rq = i915_active_request_get_unlocked(&tl->last_request);
866                 if (!rq)
867                         continue;
868
869                 /*
870                  * We can't use our normal waiter as we want to
871                  * avoid recursively trying to handle the current
872                  * reset. The basic dma_fence_default_wait() installs
873                  * a callback for dma_fence_signal(), which is
874                  * triggered by our nop handler (indirectly, the
875                  * callback enables the signaler thread which is
876                  * woken by the nop_submit_request() advancing the seqno
877                  * and when the seqno passes the fence, the signaler
878                  * then signals the fence waking us up).
879                  */
880                 timeout = dma_fence_default_wait(&rq->fence, true,
881                                                  MAX_SCHEDULE_TIMEOUT);
882                 i915_request_put(rq);
883                 if (timeout < 0) {
884                         mutex_unlock(&i915->gt.timelines.mutex);
885                         goto unlock;
886                 }
887         }
888         mutex_unlock(&i915->gt.timelines.mutex);
889
890         intel_engines_sanitize(i915, false);
891
892         /*
893          * Undo nop_submit_request. We prevent all new i915 requests from
894          * being queued (by disallowing execbuf whilst wedged) so having
895          * waited for all active requests above, we know the system is idle
896          * and do not have to worry about a thread being inside
897          * engine->submit_request() as we swap over. So unlike installing
898          * the nop_submit_request on reset, we can do this from normal
899          * context and do not require stop_machine().
900          */
901         intel_engines_reset_default_submission(i915);
902
903         GEM_TRACE("end\n");
904
905         smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
906         clear_bit(I915_WEDGED, &i915->gpu_error.flags);
907         ret = true;
908 unlock:
909         mutex_unlock(&i915->gpu_error.wedge_mutex);
910
911         return ret;
912 }
913
914 struct __i915_reset {
915         struct drm_i915_private *i915;
916         unsigned int stalled_mask;
917 };
918
919 static int __i915_reset__BKL(void *data)
920 {
921         struct __i915_reset *arg = data;
922         int err;
923
924         err = intel_gpu_reset(arg->i915, ALL_ENGINES);
925         if (err)
926                 return err;
927
928         return gt_reset(arg->i915, arg->stalled_mask);
929 }
930
931 #if RESET_UNDER_STOP_MACHINE
932 /*
933  * XXX An alternative to using stop_machine would be to park only the
934  * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
935  * we should be able to prevent their memmory accesses via the lost fence
936  * registers over the course of the reset without the potential recursive
937  * of mutexes between the pagefault handler and reset.
938  *
939  * See igt/gem_mmap_gtt/hang
940  */
941 #define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
942 #else
943 #define __do_reset(fn, arg) fn(arg)
944 #endif
945
946 static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
947 {
948         struct __i915_reset arg = { i915, stalled_mask };
949         int err, i;
950
951         err = __do_reset(__i915_reset__BKL, &arg);
952         for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
953                 msleep(100);
954                 err = __do_reset(__i915_reset__BKL, &arg);
955         }
956
957         return err;
958 }
959
960 /**
961  * i915_reset - reset chip after a hang
962  * @i915: #drm_i915_private to reset
963  * @stalled_mask: mask of the stalled engines with the guilty requests
964  * @reason: user error message for why we are resetting
965  *
966  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
967  * on failure.
968  *
969  * Caller must hold the struct_mutex.
970  *
971  * Procedure is fairly simple:
972  *   - reset the chip using the reset reg
973  *   - re-init context state
974  *   - re-init hardware status page
975  *   - re-init ring buffer
976  *   - re-init interrupt state
977  *   - re-init display
978  */
979 void i915_reset(struct drm_i915_private *i915,
980                 unsigned int stalled_mask,
981                 const char *reason)
982 {
983         struct i915_gpu_error *error = &i915->gpu_error;
984         int ret;
985
986         GEM_TRACE("flags=%lx\n", error->flags);
987
988         might_sleep();
989         assert_rpm_wakelock_held(i915);
990         GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
991
992         /* Clear any previous failed attempts at recovery. Time to try again. */
993         if (!i915_gem_unset_wedged(i915))
994                 return;
995
996         if (reason)
997                 dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
998         error->reset_count++;
999
1000         reset_prepare(i915);
1001
1002         if (!intel_has_gpu_reset(i915)) {
1003                 if (i915_modparams.reset)
1004                         dev_err(i915->drm.dev, "GPU reset not supported\n");
1005                 else
1006                         DRM_DEBUG_DRIVER("GPU reset disabled\n");
1007                 goto error;
1008         }
1009
1010         if (do_reset(i915, stalled_mask)) {
1011                 dev_err(i915->drm.dev, "Failed to reset chip\n");
1012                 goto taint;
1013         }
1014
1015         intel_overlay_reset(i915);
1016
1017         /*
1018          * Next we need to restore the context, but we don't use those
1019          * yet either...
1020          *
1021          * Ring buffer needs to be re-initialized in the KMS case, or if X
1022          * was running at the time of the reset (i.e. we weren't VT
1023          * switched away).
1024          */
1025         ret = i915_gem_init_hw(i915);
1026         if (ret) {
1027                 DRM_ERROR("Failed to initialise HW following reset (%d)\n",
1028                           ret);
1029                 goto error;
1030         }
1031
1032         i915_queue_hangcheck(i915);
1033
1034 finish:
1035         reset_finish(i915);
1036         if (!i915_terminally_wedged(error))
1037                 reset_restart(i915);
1038         return;
1039
1040 taint:
1041         /*
1042          * History tells us that if we cannot reset the GPU now, we
1043          * never will. This then impacts everything that is run
1044          * subsequently. On failing the reset, we mark the driver
1045          * as wedged, preventing further execution on the GPU.
1046          * We also want to go one step further and add a taint to the
1047          * kernel so that any subsequent faults can be traced back to
1048          * this failure. This is important for CI, where if the
1049          * GPU/driver fails we would like to reboot and restart testing
1050          * rather than continue on into oblivion. For everyone else,
1051          * the system should still plod along, but they have been warned!
1052          */
1053         add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
1054 error:
1055         i915_gem_set_wedged(i915);
1056         goto finish;
1057 }
1058
1059 static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
1060                                         struct intel_engine_cs *engine)
1061 {
1062         return intel_gpu_reset(i915, intel_engine_flag(engine));
1063 }
1064
1065 /**
1066  * i915_reset_engine - reset GPU engine to recover from a hang
1067  * @engine: engine to reset
1068  * @msg: reason for GPU reset; or NULL for no dev_notice()
1069  *
1070  * Reset a specific GPU engine. Useful if a hang is detected.
1071  * Returns zero on successful reset or otherwise an error code.
1072  *
1073  * Procedure is:
1074  *  - identifies the request that caused the hang and it is dropped
1075  *  - reset engine (which will force the engine to idle)
1076  *  - re-init/configure engine
1077  */
1078 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
1079 {
1080         struct i915_gpu_error *error = &engine->i915->gpu_error;
1081         int ret;
1082
1083         GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
1084         GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
1085
1086         reset_prepare_engine(engine);
1087
1088         if (msg)
1089                 dev_notice(engine->i915->drm.dev,
1090                            "Resetting %s for %s\n", engine->name, msg);
1091         error->reset_engine_count[engine->id]++;
1092
1093         if (!engine->i915->guc.execbuf_client)
1094                 ret = intel_gt_reset_engine(engine->i915, engine);
1095         else
1096                 ret = intel_guc_reset_engine(&engine->i915->guc, engine);
1097         if (ret) {
1098                 /* If we fail here, we expect to fallback to a global reset */
1099                 DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
1100                                  engine->i915->guc.execbuf_client ? "GuC " : "",
1101                                  engine->name, ret);
1102                 goto out;
1103         }
1104
1105         /*
1106          * The request that caused the hang is stuck on elsp, we know the
1107          * active request and can drop it, adjust head to skip the offending
1108          * request to resume executing remaining requests in the queue.
1109          */
1110         intel_engine_reset(engine, true);
1111
1112         /*
1113          * The engine and its registers (and workarounds in case of render)
1114          * have been reset to their default values. Follow the init_ring
1115          * process to program RING_MODE, HWSP and re-enable submission.
1116          */
1117         ret = engine->init_hw(engine);
1118         if (ret)
1119                 goto out;
1120
1121 out:
1122         intel_engine_cancel_stop_cs(engine);
1123         reset_finish_engine(engine);
1124         return ret;
1125 }
1126
1127 static void i915_reset_device(struct drm_i915_private *i915,
1128                               u32 engine_mask,
1129                               const char *reason)
1130 {
1131         struct i915_gpu_error *error = &i915->gpu_error;
1132         struct kobject *kobj = &i915->drm.primary->kdev->kobj;
1133         char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
1134         char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
1135         char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
1136         struct i915_wedge_me w;
1137
1138         kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
1139
1140         DRM_DEBUG_DRIVER("resetting chip\n");
1141         kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
1142
1143         /* Use a watchdog to ensure that our reset completes */
1144         i915_wedge_on_timeout(&w, i915, 5 * HZ) {
1145                 intel_prepare_reset(i915);
1146
1147                 i915_reset(i915, engine_mask, reason);
1148
1149                 intel_finish_reset(i915);
1150         }
1151
1152         if (!test_bit(I915_WEDGED, &error->flags))
1153                 kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
1154 }
1155
1156 void i915_clear_error_registers(struct drm_i915_private *dev_priv)
1157 {
1158         u32 eir;
1159
1160         if (!IS_GEN(dev_priv, 2))
1161                 I915_WRITE(PGTBL_ER, I915_READ(PGTBL_ER));
1162
1163         if (INTEL_GEN(dev_priv) < 4)
1164                 I915_WRITE(IPEIR, I915_READ(IPEIR));
1165         else
1166                 I915_WRITE(IPEIR_I965, I915_READ(IPEIR_I965));
1167
1168         I915_WRITE(EIR, I915_READ(EIR));
1169         eir = I915_READ(EIR);
1170         if (eir) {
1171                 /*
1172                  * some errors might have become stuck,
1173                  * mask them.
1174                  */
1175                 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
1176                 I915_WRITE(EMR, I915_READ(EMR) | eir);
1177                 I915_WRITE(IIR, I915_MASTER_ERROR_INTERRUPT);
1178         }
1179
1180         if (INTEL_GEN(dev_priv) >= 8) {
1181                 I915_WRITE(GEN8_RING_FAULT_REG,
1182                            I915_READ(GEN8_RING_FAULT_REG) & ~RING_FAULT_VALID);
1183                 POSTING_READ(GEN8_RING_FAULT_REG);
1184         } else if (INTEL_GEN(dev_priv) >= 6) {
1185                 struct intel_engine_cs *engine;
1186                 enum intel_engine_id id;
1187
1188                 for_each_engine(engine, dev_priv, id) {
1189                         I915_WRITE(RING_FAULT_REG(engine),
1190                                    I915_READ(RING_FAULT_REG(engine)) &
1191                                    ~RING_FAULT_VALID);
1192                 }
1193                 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
1194         }
1195 }
1196
1197 /**
1198  * i915_handle_error - handle a gpu error
1199  * @i915: i915 device private
1200  * @engine_mask: mask representing engines that are hung
1201  * @flags: control flags
1202  * @fmt: Error message format string
1203  *
1204  * Do some basic checking of register state at error time and
1205  * dump it to the syslog.  Also call i915_capture_error_state() to make
1206  * sure we get a record and make it available in debugfs.  Fire a uevent
1207  * so userspace knows something bad happened (should trigger collection
1208  * of a ring dump etc.).
1209  */
1210 void i915_handle_error(struct drm_i915_private *i915,
1211                        u32 engine_mask,
1212                        unsigned long flags,
1213                        const char *fmt, ...)
1214 {
1215         struct intel_engine_cs *engine;
1216         intel_wakeref_t wakeref;
1217         unsigned int tmp;
1218         char error_msg[80];
1219         char *msg = NULL;
1220
1221         if (fmt) {
1222                 va_list args;
1223
1224                 va_start(args, fmt);
1225                 vscnprintf(error_msg, sizeof(error_msg), fmt, args);
1226                 va_end(args);
1227
1228                 msg = error_msg;
1229         }
1230
1231         /*
1232          * In most cases it's guaranteed that we get here with an RPM
1233          * reference held, for example because there is a pending GPU
1234          * request that won't finish until the reset is done. This
1235          * isn't the case at least when we get here by doing a
1236          * simulated reset via debugfs, so get an RPM reference.
1237          */
1238         wakeref = intel_runtime_pm_get(i915);
1239
1240         engine_mask &= INTEL_INFO(i915)->ring_mask;
1241
1242         if (flags & I915_ERROR_CAPTURE) {
1243                 i915_capture_error_state(i915, engine_mask, msg);
1244                 i915_clear_error_registers(i915);
1245         }
1246
1247         /*
1248          * Try engine reset when available. We fall back to full reset if
1249          * single reset fails.
1250          */
1251         if (intel_has_reset_engine(i915) &&
1252             !i915_terminally_wedged(&i915->gpu_error)) {
1253                 for_each_engine_masked(engine, i915, engine_mask, tmp) {
1254                         BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
1255                         if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1256                                              &i915->gpu_error.flags))
1257                                 continue;
1258
1259                         if (i915_reset_engine(engine, msg) == 0)
1260                                 engine_mask &= ~intel_engine_flag(engine);
1261
1262                         clear_bit(I915_RESET_ENGINE + engine->id,
1263                                   &i915->gpu_error.flags);
1264                         wake_up_bit(&i915->gpu_error.flags,
1265                                     I915_RESET_ENGINE + engine->id);
1266                 }
1267         }
1268
1269         if (!engine_mask)
1270                 goto out;
1271
1272         /* Full reset needs the mutex, stop any other user trying to do so. */
1273         if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
1274                 wait_event(i915->gpu_error.reset_queue,
1275                            !test_bit(I915_RESET_BACKOFF,
1276                                      &i915->gpu_error.flags));
1277                 goto out;
1278         }
1279
1280         /* Prevent any other reset-engine attempt. */
1281         for_each_engine(engine, i915, tmp) {
1282                 while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1283                                         &i915->gpu_error.flags))
1284                         wait_on_bit(&i915->gpu_error.flags,
1285                                     I915_RESET_ENGINE + engine->id,
1286                                     TASK_UNINTERRUPTIBLE);
1287         }
1288
1289         i915_reset_device(i915, engine_mask, msg);
1290
1291         for_each_engine(engine, i915, tmp) {
1292                 clear_bit(I915_RESET_ENGINE + engine->id,
1293                           &i915->gpu_error.flags);
1294         }
1295
1296         clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
1297         wake_up_all(&i915->gpu_error.reset_queue);
1298
1299 out:
1300         intel_runtime_pm_put(i915, wakeref);
1301 }
1302
1303 bool i915_reset_flush(struct drm_i915_private *i915)
1304 {
1305         int err;
1306
1307         cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
1308
1309         flush_workqueue(i915->wq);
1310         GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
1311
1312         mutex_lock(&i915->drm.struct_mutex);
1313         err = i915_gem_wait_for_idle(i915,
1314                                      I915_WAIT_LOCKED |
1315                                      I915_WAIT_FOR_IDLE_BOOST,
1316                                      MAX_SCHEDULE_TIMEOUT);
1317         mutex_unlock(&i915->drm.struct_mutex);
1318
1319         return !err;
1320 }
1321
1322 static void i915_wedge_me(struct work_struct *work)
1323 {
1324         struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
1325
1326         dev_err(w->i915->drm.dev,
1327                 "%s timed out, cancelling all in-flight rendering.\n",
1328                 w->name);
1329         i915_gem_set_wedged(w->i915);
1330 }
1331
1332 void __i915_init_wedge(struct i915_wedge_me *w,
1333                        struct drm_i915_private *i915,
1334                        long timeout,
1335                        const char *name)
1336 {
1337         w->i915 = i915;
1338         w->name = name;
1339
1340         INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
1341         schedule_delayed_work(&w->work, timeout);
1342 }
1343
1344 void __i915_fini_wedge(struct i915_wedge_me *w)
1345 {
1346         cancel_delayed_work_sync(&w->work);
1347         destroy_delayed_work_on_stack(&w->work);
1348         w->i915 = NULL;
1349 }