Merge tag 'gvt-next-2019-02-01' of https://github.com/intel/gvt-linux into drm-intel...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / selftests / i915_request.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/prime_numbers.h>
26
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 #include "igt_live_test.h"
30 #include "lib_sw_fence.h"
31
32 #include "mock_context.h"
33 #include "mock_drm.h"
34 #include "mock_gem_device.h"
35
36 static int igt_add_request(void *arg)
37 {
38         struct drm_i915_private *i915 = arg;
39         struct i915_request *request;
40         int err = -ENOMEM;
41
42         /* Basic preliminary test to create a request and let it loose! */
43
44         mutex_lock(&i915->drm.struct_mutex);
45         request = mock_request(i915->engine[RCS],
46                                i915->kernel_context,
47                                HZ / 10);
48         if (!request)
49                 goto out_unlock;
50
51         i915_request_add(request);
52
53         err = 0;
54 out_unlock:
55         mutex_unlock(&i915->drm.struct_mutex);
56         return err;
57 }
58
59 static int igt_wait_request(void *arg)
60 {
61         const long T = HZ / 4;
62         struct drm_i915_private *i915 = arg;
63         struct i915_request *request;
64         int err = -EINVAL;
65
66         /* Submit a request, then wait upon it */
67
68         mutex_lock(&i915->drm.struct_mutex);
69         request = mock_request(i915->engine[RCS], i915->kernel_context, T);
70         if (!request) {
71                 err = -ENOMEM;
72                 goto out_unlock;
73         }
74
75         if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
76                 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
77                 goto out_unlock;
78         }
79
80         if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) {
81                 pr_err("request wait succeeded (expected timeout before submit!)\n");
82                 goto out_unlock;
83         }
84
85         if (i915_request_completed(request)) {
86                 pr_err("request completed before submit!!\n");
87                 goto out_unlock;
88         }
89
90         i915_request_add(request);
91
92         if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
93                 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
94                 goto out_unlock;
95         }
96
97         if (i915_request_completed(request)) {
98                 pr_err("request completed immediately!\n");
99                 goto out_unlock;
100         }
101
102         if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) {
103                 pr_err("request wait succeeded (expected timeout!)\n");
104                 goto out_unlock;
105         }
106
107         if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
108                 pr_err("request wait timed out!\n");
109                 goto out_unlock;
110         }
111
112         if (!i915_request_completed(request)) {
113                 pr_err("request not complete after waiting!\n");
114                 goto out_unlock;
115         }
116
117         if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
118                 pr_err("request wait timed out when already complete!\n");
119                 goto out_unlock;
120         }
121
122         err = 0;
123 out_unlock:
124         mock_device_flush(i915);
125         mutex_unlock(&i915->drm.struct_mutex);
126         return err;
127 }
128
129 static int igt_fence_wait(void *arg)
130 {
131         const long T = HZ / 4;
132         struct drm_i915_private *i915 = arg;
133         struct i915_request *request;
134         int err = -EINVAL;
135
136         /* Submit a request, treat it as a fence and wait upon it */
137
138         mutex_lock(&i915->drm.struct_mutex);
139         request = mock_request(i915->engine[RCS], i915->kernel_context, T);
140         if (!request) {
141                 err = -ENOMEM;
142                 goto out_locked;
143         }
144         mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */
145
146         if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
147                 pr_err("fence wait success before submit (expected timeout)!\n");
148                 goto out_device;
149         }
150
151         mutex_lock(&i915->drm.struct_mutex);
152         i915_request_add(request);
153         mutex_unlock(&i915->drm.struct_mutex);
154
155         if (dma_fence_is_signaled(&request->fence)) {
156                 pr_err("fence signaled immediately!\n");
157                 goto out_device;
158         }
159
160         if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
161                 pr_err("fence wait success after submit (expected timeout)!\n");
162                 goto out_device;
163         }
164
165         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
166                 pr_err("fence wait timed out (expected success)!\n");
167                 goto out_device;
168         }
169
170         if (!dma_fence_is_signaled(&request->fence)) {
171                 pr_err("fence unsignaled after waiting!\n");
172                 goto out_device;
173         }
174
175         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
176                 pr_err("fence wait timed out when complete (expected success)!\n");
177                 goto out_device;
178         }
179
180         err = 0;
181 out_device:
182         mutex_lock(&i915->drm.struct_mutex);
183 out_locked:
184         mock_device_flush(i915);
185         mutex_unlock(&i915->drm.struct_mutex);
186         return err;
187 }
188
189 static int igt_request_rewind(void *arg)
190 {
191         struct drm_i915_private *i915 = arg;
192         struct i915_request *request, *vip;
193         struct i915_gem_context *ctx[2];
194         int err = -EINVAL;
195
196         mutex_lock(&i915->drm.struct_mutex);
197         ctx[0] = mock_context(i915, "A");
198         request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
199         if (!request) {
200                 err = -ENOMEM;
201                 goto err_context_0;
202         }
203
204         i915_request_get(request);
205         i915_request_add(request);
206
207         ctx[1] = mock_context(i915, "B");
208         vip = mock_request(i915->engine[RCS], ctx[1], 0);
209         if (!vip) {
210                 err = -ENOMEM;
211                 goto err_context_1;
212         }
213
214         /* Simulate preemption by manual reordering */
215         if (!mock_cancel_request(request)) {
216                 pr_err("failed to cancel request (already executed)!\n");
217                 i915_request_add(vip);
218                 goto err_context_1;
219         }
220         i915_request_get(vip);
221         i915_request_add(vip);
222         rcu_read_lock();
223         request->engine->submit_request(request);
224         rcu_read_unlock();
225
226         mutex_unlock(&i915->drm.struct_mutex);
227
228         if (i915_request_wait(vip, 0, HZ) == -ETIME) {
229                 pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
230                        vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
231                 goto err;
232         }
233
234         if (i915_request_completed(request)) {
235                 pr_err("low priority request already completed\n");
236                 goto err;
237         }
238
239         err = 0;
240 err:
241         i915_request_put(vip);
242         mutex_lock(&i915->drm.struct_mutex);
243 err_context_1:
244         mock_context_close(ctx[1]);
245         i915_request_put(request);
246 err_context_0:
247         mock_context_close(ctx[0]);
248         mock_device_flush(i915);
249         mutex_unlock(&i915->drm.struct_mutex);
250         return err;
251 }
252
253 struct smoketest {
254         struct intel_engine_cs *engine;
255         struct i915_gem_context **contexts;
256         atomic_long_t num_waits, num_fences;
257         int ncontexts, max_batch;
258         struct i915_request *(*request_alloc)(struct i915_gem_context *,
259                                               struct intel_engine_cs *);
260 };
261
262 static struct i915_request *
263 __mock_request_alloc(struct i915_gem_context *ctx,
264                      struct intel_engine_cs *engine)
265 {
266         return mock_request(engine, ctx, 0);
267 }
268
269 static struct i915_request *
270 __live_request_alloc(struct i915_gem_context *ctx,
271                      struct intel_engine_cs *engine)
272 {
273         return i915_request_alloc(engine, ctx);
274 }
275
276 static int __igt_breadcrumbs_smoketest(void *arg)
277 {
278         struct smoketest *t = arg;
279         struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
280         const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
281         const unsigned int total = 4 * t->ncontexts + 1;
282         unsigned int num_waits = 0, num_fences = 0;
283         struct i915_request **requests;
284         I915_RND_STATE(prng);
285         unsigned int *order;
286         int err = 0;
287
288         /*
289          * A very simple test to catch the most egregious of list handling bugs.
290          *
291          * At its heart, we simply create oodles of requests running across
292          * multiple kthreads and enable signaling on them, for the sole purpose
293          * of stressing our breadcrumb handling. The only inspection we do is
294          * that the fences were marked as signaled.
295          */
296
297         requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
298         if (!requests)
299                 return -ENOMEM;
300
301         order = i915_random_order(total, &prng);
302         if (!order) {
303                 err = -ENOMEM;
304                 goto out_requests;
305         }
306
307         while (!kthread_should_stop()) {
308                 struct i915_sw_fence *submit, *wait;
309                 unsigned int n, count;
310
311                 submit = heap_fence_create(GFP_KERNEL);
312                 if (!submit) {
313                         err = -ENOMEM;
314                         break;
315                 }
316
317                 wait = heap_fence_create(GFP_KERNEL);
318                 if (!wait) {
319                         i915_sw_fence_commit(submit);
320                         heap_fence_put(submit);
321                         err = ENOMEM;
322                         break;
323                 }
324
325                 i915_random_reorder(order, total, &prng);
326                 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
327
328                 for (n = 0; n < count; n++) {
329                         struct i915_gem_context *ctx =
330                                 t->contexts[order[n] % t->ncontexts];
331                         struct i915_request *rq;
332
333                         mutex_lock(BKL);
334
335                         rq = t->request_alloc(ctx, t->engine);
336                         if (IS_ERR(rq)) {
337                                 mutex_unlock(BKL);
338                                 err = PTR_ERR(rq);
339                                 count = n;
340                                 break;
341                         }
342
343                         err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
344                                                                submit,
345                                                                GFP_KERNEL);
346
347                         requests[n] = i915_request_get(rq);
348                         i915_request_add(rq);
349
350                         mutex_unlock(BKL);
351
352                         if (err >= 0)
353                                 err = i915_sw_fence_await_dma_fence(wait,
354                                                                     &rq->fence,
355                                                                     0,
356                                                                     GFP_KERNEL);
357
358                         if (err < 0) {
359                                 i915_request_put(rq);
360                                 count = n;
361                                 break;
362                         }
363                 }
364
365                 i915_sw_fence_commit(submit);
366                 i915_sw_fence_commit(wait);
367
368                 if (!wait_event_timeout(wait->wait,
369                                         i915_sw_fence_done(wait),
370                                         HZ / 2)) {
371                         struct i915_request *rq = requests[count - 1];
372
373                         pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
374                                count,
375                                rq->fence.context, rq->fence.seqno,
376                                t->engine->name);
377                         i915_gem_set_wedged(t->engine->i915);
378                         GEM_BUG_ON(!i915_request_completed(rq));
379                         i915_sw_fence_wait(wait);
380                         err = -EIO;
381                 }
382
383                 for (n = 0; n < count; n++) {
384                         struct i915_request *rq = requests[n];
385
386                         if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
387                                       &rq->fence.flags)) {
388                                 pr_err("%llu:%llu was not signaled!\n",
389                                        rq->fence.context, rq->fence.seqno);
390                                 err = -EINVAL;
391                         }
392
393                         i915_request_put(rq);
394                 }
395
396                 heap_fence_put(wait);
397                 heap_fence_put(submit);
398
399                 if (err < 0)
400                         break;
401
402                 num_fences += count;
403                 num_waits++;
404
405                 cond_resched();
406         }
407
408         atomic_long_add(num_fences, &t->num_fences);
409         atomic_long_add(num_waits, &t->num_waits);
410
411         kfree(order);
412 out_requests:
413         kfree(requests);
414         return err;
415 }
416
417 static int mock_breadcrumbs_smoketest(void *arg)
418 {
419         struct drm_i915_private *i915 = arg;
420         struct smoketest t = {
421                 .engine = i915->engine[RCS],
422                 .ncontexts = 1024,
423                 .max_batch = 1024,
424                 .request_alloc = __mock_request_alloc
425         };
426         unsigned int ncpus = num_online_cpus();
427         struct task_struct **threads;
428         unsigned int n;
429         int ret = 0;
430
431         /*
432          * Smoketest our breadcrumb/signal handling for requests across multiple
433          * threads. A very simple test to only catch the most egregious of bugs.
434          * See __igt_breadcrumbs_smoketest();
435          */
436
437         threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
438         if (!threads)
439                 return -ENOMEM;
440
441         t.contexts =
442                 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
443         if (!t.contexts) {
444                 ret = -ENOMEM;
445                 goto out_threads;
446         }
447
448         mutex_lock(&t.engine->i915->drm.struct_mutex);
449         for (n = 0; n < t.ncontexts; n++) {
450                 t.contexts[n] = mock_context(t.engine->i915, "mock");
451                 if (!t.contexts[n]) {
452                         ret = -ENOMEM;
453                         goto out_contexts;
454                 }
455         }
456         mutex_unlock(&t.engine->i915->drm.struct_mutex);
457
458         for (n = 0; n < ncpus; n++) {
459                 threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
460                                          &t, "igt/%d", n);
461                 if (IS_ERR(threads[n])) {
462                         ret = PTR_ERR(threads[n]);
463                         ncpus = n;
464                         break;
465                 }
466
467                 get_task_struct(threads[n]);
468         }
469
470         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
471
472         for (n = 0; n < ncpus; n++) {
473                 int err;
474
475                 err = kthread_stop(threads[n]);
476                 if (err < 0 && !ret)
477                         ret = err;
478
479                 put_task_struct(threads[n]);
480         }
481         pr_info("Completed %lu waits for %lu fence across %d cpus\n",
482                 atomic_long_read(&t.num_waits),
483                 atomic_long_read(&t.num_fences),
484                 ncpus);
485
486         mutex_lock(&t.engine->i915->drm.struct_mutex);
487 out_contexts:
488         for (n = 0; n < t.ncontexts; n++) {
489                 if (!t.contexts[n])
490                         break;
491                 mock_context_close(t.contexts[n]);
492         }
493         mutex_unlock(&t.engine->i915->drm.struct_mutex);
494         kfree(t.contexts);
495 out_threads:
496         kfree(threads);
497
498         return ret;
499 }
500
501 int i915_request_mock_selftests(void)
502 {
503         static const struct i915_subtest tests[] = {
504                 SUBTEST(igt_add_request),
505                 SUBTEST(igt_wait_request),
506                 SUBTEST(igt_fence_wait),
507                 SUBTEST(igt_request_rewind),
508                 SUBTEST(mock_breadcrumbs_smoketest),
509         };
510         struct drm_i915_private *i915;
511         intel_wakeref_t wakeref;
512         int err = 0;
513
514         i915 = mock_gem_device();
515         if (!i915)
516                 return -ENOMEM;
517
518         with_intel_runtime_pm(i915, wakeref)
519                 err = i915_subtests(tests, i915);
520
521         drm_dev_put(&i915->drm);
522
523         return err;
524 }
525
526 static int live_nop_request(void *arg)
527 {
528         struct drm_i915_private *i915 = arg;
529         struct intel_engine_cs *engine;
530         intel_wakeref_t wakeref;
531         struct igt_live_test t;
532         unsigned int id;
533         int err = -ENODEV;
534
535         /* Submit various sized batches of empty requests, to each engine
536          * (individually), and wait for the batch to complete. We can check
537          * the overhead of submitting requests to the hardware.
538          */
539
540         mutex_lock(&i915->drm.struct_mutex);
541         wakeref = intel_runtime_pm_get(i915);
542
543         for_each_engine(engine, i915, id) {
544                 struct i915_request *request = NULL;
545                 unsigned long n, prime;
546                 IGT_TIMEOUT(end_time);
547                 ktime_t times[2] = {};
548
549                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
550                 if (err)
551                         goto out_unlock;
552
553                 for_each_prime_number_from(prime, 1, 8192) {
554                         times[1] = ktime_get_raw();
555
556                         for (n = 0; n < prime; n++) {
557                                 request = i915_request_alloc(engine,
558                                                              i915->kernel_context);
559                                 if (IS_ERR(request)) {
560                                         err = PTR_ERR(request);
561                                         goto out_unlock;
562                                 }
563
564                                 /* This space is left intentionally blank.
565                                  *
566                                  * We do not actually want to perform any
567                                  * action with this request, we just want
568                                  * to measure the latency in allocation
569                                  * and submission of our breadcrumbs -
570                                  * ensuring that the bare request is sufficient
571                                  * for the system to work (i.e. proper HEAD
572                                  * tracking of the rings, interrupt handling,
573                                  * etc). It also gives us the lowest bounds
574                                  * for latency.
575                                  */
576
577                                 i915_request_add(request);
578                         }
579                         i915_request_wait(request,
580                                           I915_WAIT_LOCKED,
581                                           MAX_SCHEDULE_TIMEOUT);
582
583                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
584                         if (prime == 1)
585                                 times[0] = times[1];
586
587                         if (__igt_timeout(end_time, NULL))
588                                 break;
589                 }
590
591                 err = igt_live_test_end(&t);
592                 if (err)
593                         goto out_unlock;
594
595                 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
596                         engine->name,
597                         ktime_to_ns(times[0]),
598                         prime, div64_u64(ktime_to_ns(times[1]), prime));
599         }
600
601 out_unlock:
602         intel_runtime_pm_put(i915, wakeref);
603         mutex_unlock(&i915->drm.struct_mutex);
604         return err;
605 }
606
607 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
608 {
609         struct drm_i915_gem_object *obj;
610         struct i915_vma *vma;
611         u32 *cmd;
612         int err;
613
614         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
615         if (IS_ERR(obj))
616                 return ERR_CAST(obj);
617
618         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
619         if (IS_ERR(cmd)) {
620                 err = PTR_ERR(cmd);
621                 goto err;
622         }
623
624         *cmd = MI_BATCH_BUFFER_END;
625         i915_gem_chipset_flush(i915);
626
627         i915_gem_object_unpin_map(obj);
628
629         err = i915_gem_object_set_to_gtt_domain(obj, false);
630         if (err)
631                 goto err;
632
633         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
634         if (IS_ERR(vma)) {
635                 err = PTR_ERR(vma);
636                 goto err;
637         }
638
639         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
640         if (err)
641                 goto err;
642
643         return vma;
644
645 err:
646         i915_gem_object_put(obj);
647         return ERR_PTR(err);
648 }
649
650 static struct i915_request *
651 empty_request(struct intel_engine_cs *engine,
652               struct i915_vma *batch)
653 {
654         struct i915_request *request;
655         int err;
656
657         request = i915_request_alloc(engine, engine->i915->kernel_context);
658         if (IS_ERR(request))
659                 return request;
660
661         err = engine->emit_bb_start(request,
662                                     batch->node.start,
663                                     batch->node.size,
664                                     I915_DISPATCH_SECURE);
665         if (err)
666                 goto out_request;
667
668 out_request:
669         i915_request_add(request);
670         return err ? ERR_PTR(err) : request;
671 }
672
673 static int live_empty_request(void *arg)
674 {
675         struct drm_i915_private *i915 = arg;
676         struct intel_engine_cs *engine;
677         intel_wakeref_t wakeref;
678         struct igt_live_test t;
679         struct i915_vma *batch;
680         unsigned int id;
681         int err = 0;
682
683         /* Submit various sized batches of empty requests, to each engine
684          * (individually), and wait for the batch to complete. We can check
685          * the overhead of submitting requests to the hardware.
686          */
687
688         mutex_lock(&i915->drm.struct_mutex);
689         wakeref = intel_runtime_pm_get(i915);
690
691         batch = empty_batch(i915);
692         if (IS_ERR(batch)) {
693                 err = PTR_ERR(batch);
694                 goto out_unlock;
695         }
696
697         for_each_engine(engine, i915, id) {
698                 IGT_TIMEOUT(end_time);
699                 struct i915_request *request;
700                 unsigned long n, prime;
701                 ktime_t times[2] = {};
702
703                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
704                 if (err)
705                         goto out_batch;
706
707                 /* Warmup / preload */
708                 request = empty_request(engine, batch);
709                 if (IS_ERR(request)) {
710                         err = PTR_ERR(request);
711                         goto out_batch;
712                 }
713                 i915_request_wait(request,
714                                   I915_WAIT_LOCKED,
715                                   MAX_SCHEDULE_TIMEOUT);
716
717                 for_each_prime_number_from(prime, 1, 8192) {
718                         times[1] = ktime_get_raw();
719
720                         for (n = 0; n < prime; n++) {
721                                 request = empty_request(engine, batch);
722                                 if (IS_ERR(request)) {
723                                         err = PTR_ERR(request);
724                                         goto out_batch;
725                                 }
726                         }
727                         i915_request_wait(request,
728                                           I915_WAIT_LOCKED,
729                                           MAX_SCHEDULE_TIMEOUT);
730
731                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
732                         if (prime == 1)
733                                 times[0] = times[1];
734
735                         if (__igt_timeout(end_time, NULL))
736                                 break;
737                 }
738
739                 err = igt_live_test_end(&t);
740                 if (err)
741                         goto out_batch;
742
743                 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
744                         engine->name,
745                         ktime_to_ns(times[0]),
746                         prime, div64_u64(ktime_to_ns(times[1]), prime));
747         }
748
749 out_batch:
750         i915_vma_unpin(batch);
751         i915_vma_put(batch);
752 out_unlock:
753         intel_runtime_pm_put(i915, wakeref);
754         mutex_unlock(&i915->drm.struct_mutex);
755         return err;
756 }
757
758 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
759 {
760         struct i915_gem_context *ctx = i915->kernel_context;
761         struct i915_address_space *vm =
762                 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
763         struct drm_i915_gem_object *obj;
764         const int gen = INTEL_GEN(i915);
765         struct i915_vma *vma;
766         u32 *cmd;
767         int err;
768
769         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
770         if (IS_ERR(obj))
771                 return ERR_CAST(obj);
772
773         vma = i915_vma_instance(obj, vm, NULL);
774         if (IS_ERR(vma)) {
775                 err = PTR_ERR(vma);
776                 goto err;
777         }
778
779         err = i915_vma_pin(vma, 0, 0, PIN_USER);
780         if (err)
781                 goto err;
782
783         err = i915_gem_object_set_to_wc_domain(obj, true);
784         if (err)
785                 goto err;
786
787         cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
788         if (IS_ERR(cmd)) {
789                 err = PTR_ERR(cmd);
790                 goto err;
791         }
792
793         if (gen >= 8) {
794                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
795                 *cmd++ = lower_32_bits(vma->node.start);
796                 *cmd++ = upper_32_bits(vma->node.start);
797         } else if (gen >= 6) {
798                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
799                 *cmd++ = lower_32_bits(vma->node.start);
800         } else {
801                 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
802                 *cmd++ = lower_32_bits(vma->node.start);
803         }
804         *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
805         i915_gem_chipset_flush(i915);
806
807         i915_gem_object_unpin_map(obj);
808
809         return vma;
810
811 err:
812         i915_gem_object_put(obj);
813         return ERR_PTR(err);
814 }
815
816 static int recursive_batch_resolve(struct i915_vma *batch)
817 {
818         u32 *cmd;
819
820         cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
821         if (IS_ERR(cmd))
822                 return PTR_ERR(cmd);
823
824         *cmd = MI_BATCH_BUFFER_END;
825         i915_gem_chipset_flush(batch->vm->i915);
826
827         i915_gem_object_unpin_map(batch->obj);
828
829         return 0;
830 }
831
832 static int live_all_engines(void *arg)
833 {
834         struct drm_i915_private *i915 = arg;
835         struct intel_engine_cs *engine;
836         struct i915_request *request[I915_NUM_ENGINES];
837         intel_wakeref_t wakeref;
838         struct igt_live_test t;
839         struct i915_vma *batch;
840         unsigned int id;
841         int err;
842
843         /* Check we can submit requests to all engines simultaneously. We
844          * send a recursive batch to each engine - checking that we don't
845          * block doing so, and that they don't complete too soon.
846          */
847
848         mutex_lock(&i915->drm.struct_mutex);
849         wakeref = intel_runtime_pm_get(i915);
850
851         err = igt_live_test_begin(&t, i915, __func__, "");
852         if (err)
853                 goto out_unlock;
854
855         batch = recursive_batch(i915);
856         if (IS_ERR(batch)) {
857                 err = PTR_ERR(batch);
858                 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
859                 goto out_unlock;
860         }
861
862         for_each_engine(engine, i915, id) {
863                 request[id] = i915_request_alloc(engine, i915->kernel_context);
864                 if (IS_ERR(request[id])) {
865                         err = PTR_ERR(request[id]);
866                         pr_err("%s: Request allocation failed with err=%d\n",
867                                __func__, err);
868                         goto out_request;
869                 }
870
871                 err = engine->emit_bb_start(request[id],
872                                             batch->node.start,
873                                             batch->node.size,
874                                             0);
875                 GEM_BUG_ON(err);
876                 request[id]->batch = batch;
877
878                 if (!i915_gem_object_has_active_reference(batch->obj)) {
879                         i915_gem_object_get(batch->obj);
880                         i915_gem_object_set_active_reference(batch->obj);
881                 }
882
883                 err = i915_vma_move_to_active(batch, request[id], 0);
884                 GEM_BUG_ON(err);
885
886                 i915_request_get(request[id]);
887                 i915_request_add(request[id]);
888         }
889
890         for_each_engine(engine, i915, id) {
891                 if (i915_request_completed(request[id])) {
892                         pr_err("%s(%s): request completed too early!\n",
893                                __func__, engine->name);
894                         err = -EINVAL;
895                         goto out_request;
896                 }
897         }
898
899         err = recursive_batch_resolve(batch);
900         if (err) {
901                 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
902                 goto out_request;
903         }
904
905         for_each_engine(engine, i915, id) {
906                 long timeout;
907
908                 timeout = i915_request_wait(request[id],
909                                             I915_WAIT_LOCKED,
910                                             MAX_SCHEDULE_TIMEOUT);
911                 if (timeout < 0) {
912                         err = timeout;
913                         pr_err("%s: error waiting for request on %s, err=%d\n",
914                                __func__, engine->name, err);
915                         goto out_request;
916                 }
917
918                 GEM_BUG_ON(!i915_request_completed(request[id]));
919                 i915_request_put(request[id]);
920                 request[id] = NULL;
921         }
922
923         err = igt_live_test_end(&t);
924
925 out_request:
926         for_each_engine(engine, i915, id)
927                 if (request[id])
928                         i915_request_put(request[id]);
929         i915_vma_unpin(batch);
930         i915_vma_put(batch);
931 out_unlock:
932         intel_runtime_pm_put(i915, wakeref);
933         mutex_unlock(&i915->drm.struct_mutex);
934         return err;
935 }
936
937 static int live_sequential_engines(void *arg)
938 {
939         struct drm_i915_private *i915 = arg;
940         struct i915_request *request[I915_NUM_ENGINES] = {};
941         struct i915_request *prev = NULL;
942         struct intel_engine_cs *engine;
943         intel_wakeref_t wakeref;
944         struct igt_live_test t;
945         unsigned int id;
946         int err;
947
948         /* Check we can submit requests to all engines sequentially, such
949          * that each successive request waits for the earlier ones. This
950          * tests that we don't execute requests out of order, even though
951          * they are running on independent engines.
952          */
953
954         mutex_lock(&i915->drm.struct_mutex);
955         wakeref = intel_runtime_pm_get(i915);
956
957         err = igt_live_test_begin(&t, i915, __func__, "");
958         if (err)
959                 goto out_unlock;
960
961         for_each_engine(engine, i915, id) {
962                 struct i915_vma *batch;
963
964                 batch = recursive_batch(i915);
965                 if (IS_ERR(batch)) {
966                         err = PTR_ERR(batch);
967                         pr_err("%s: Unable to create batch for %s, err=%d\n",
968                                __func__, engine->name, err);
969                         goto out_unlock;
970                 }
971
972                 request[id] = i915_request_alloc(engine, i915->kernel_context);
973                 if (IS_ERR(request[id])) {
974                         err = PTR_ERR(request[id]);
975                         pr_err("%s: Request allocation failed for %s with err=%d\n",
976                                __func__, engine->name, err);
977                         goto out_request;
978                 }
979
980                 if (prev) {
981                         err = i915_request_await_dma_fence(request[id],
982                                                            &prev->fence);
983                         if (err) {
984                                 i915_request_add(request[id]);
985                                 pr_err("%s: Request await failed for %s with err=%d\n",
986                                        __func__, engine->name, err);
987                                 goto out_request;
988                         }
989                 }
990
991                 err = engine->emit_bb_start(request[id],
992                                             batch->node.start,
993                                             batch->node.size,
994                                             0);
995                 GEM_BUG_ON(err);
996                 request[id]->batch = batch;
997
998                 err = i915_vma_move_to_active(batch, request[id], 0);
999                 GEM_BUG_ON(err);
1000
1001                 i915_gem_object_set_active_reference(batch->obj);
1002                 i915_vma_get(batch);
1003
1004                 i915_request_get(request[id]);
1005                 i915_request_add(request[id]);
1006
1007                 prev = request[id];
1008         }
1009
1010         for_each_engine(engine, i915, id) {
1011                 long timeout;
1012
1013                 if (i915_request_completed(request[id])) {
1014                         pr_err("%s(%s): request completed too early!\n",
1015                                __func__, engine->name);
1016                         err = -EINVAL;
1017                         goto out_request;
1018                 }
1019
1020                 err = recursive_batch_resolve(request[id]->batch);
1021                 if (err) {
1022                         pr_err("%s: failed to resolve batch, err=%d\n",
1023                                __func__, err);
1024                         goto out_request;
1025                 }
1026
1027                 timeout = i915_request_wait(request[id],
1028                                             I915_WAIT_LOCKED,
1029                                             MAX_SCHEDULE_TIMEOUT);
1030                 if (timeout < 0) {
1031                         err = timeout;
1032                         pr_err("%s: error waiting for request on %s, err=%d\n",
1033                                __func__, engine->name, err);
1034                         goto out_request;
1035                 }
1036
1037                 GEM_BUG_ON(!i915_request_completed(request[id]));
1038         }
1039
1040         err = igt_live_test_end(&t);
1041
1042 out_request:
1043         for_each_engine(engine, i915, id) {
1044                 u32 *cmd;
1045
1046                 if (!request[id])
1047                         break;
1048
1049                 cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1050                                               I915_MAP_WC);
1051                 if (!IS_ERR(cmd)) {
1052                         *cmd = MI_BATCH_BUFFER_END;
1053                         i915_gem_chipset_flush(i915);
1054
1055                         i915_gem_object_unpin_map(request[id]->batch->obj);
1056                 }
1057
1058                 i915_vma_put(request[id]->batch);
1059                 i915_request_put(request[id]);
1060         }
1061 out_unlock:
1062         intel_runtime_pm_put(i915, wakeref);
1063         mutex_unlock(&i915->drm.struct_mutex);
1064         return err;
1065 }
1066
1067 static int
1068 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1069 {
1070         struct i915_request *rq;
1071         int ret;
1072
1073         /*
1074          * Before execlists, all contexts share the same ringbuffer. With
1075          * execlists, each context/engine has a separate ringbuffer and
1076          * for the purposes of this test, inexhaustible.
1077          *
1078          * For the global ringbuffer though, we have to be very careful
1079          * that we do not wrap while preventing the execution of requests
1080          * with a unsignaled fence.
1081          */
1082         if (HAS_EXECLISTS(ctx->i915))
1083                 return INT_MAX;
1084
1085         rq = i915_request_alloc(engine, ctx);
1086         if (IS_ERR(rq)) {
1087                 ret = PTR_ERR(rq);
1088         } else {
1089                 int sz;
1090
1091                 ret = rq->ring->size - rq->reserved_space;
1092                 i915_request_add(rq);
1093
1094                 sz = rq->ring->emit - rq->head;
1095                 if (sz < 0)
1096                         sz += rq->ring->size;
1097                 ret /= sz;
1098                 ret /= 2; /* leave half spare, in case of emergency! */
1099         }
1100
1101         return ret;
1102 }
1103
1104 static int live_breadcrumbs_smoketest(void *arg)
1105 {
1106         struct drm_i915_private *i915 = arg;
1107         struct smoketest t[I915_NUM_ENGINES];
1108         unsigned int ncpus = num_online_cpus();
1109         unsigned long num_waits, num_fences;
1110         struct intel_engine_cs *engine;
1111         struct task_struct **threads;
1112         struct igt_live_test live;
1113         enum intel_engine_id id;
1114         intel_wakeref_t wakeref;
1115         struct drm_file *file;
1116         unsigned int n;
1117         int ret = 0;
1118
1119         /*
1120          * Smoketest our breadcrumb/signal handling for requests across multiple
1121          * threads. A very simple test to only catch the most egregious of bugs.
1122          * See __igt_breadcrumbs_smoketest();
1123          *
1124          * On real hardware this time.
1125          */
1126
1127         wakeref = intel_runtime_pm_get(i915);
1128
1129         file = mock_file(i915);
1130         if (IS_ERR(file)) {
1131                 ret = PTR_ERR(file);
1132                 goto out_rpm;
1133         }
1134
1135         threads = kcalloc(ncpus * I915_NUM_ENGINES,
1136                           sizeof(*threads),
1137                           GFP_KERNEL);
1138         if (!threads) {
1139                 ret = -ENOMEM;
1140                 goto out_file;
1141         }
1142
1143         memset(&t[0], 0, sizeof(t[0]));
1144         t[0].request_alloc = __live_request_alloc;
1145         t[0].ncontexts = 64;
1146         t[0].contexts = kmalloc_array(t[0].ncontexts,
1147                                       sizeof(*t[0].contexts),
1148                                       GFP_KERNEL);
1149         if (!t[0].contexts) {
1150                 ret = -ENOMEM;
1151                 goto out_threads;
1152         }
1153
1154         mutex_lock(&i915->drm.struct_mutex);
1155         for (n = 0; n < t[0].ncontexts; n++) {
1156                 t[0].contexts[n] = live_context(i915, file);
1157                 if (!t[0].contexts[n]) {
1158                         ret = -ENOMEM;
1159                         goto out_contexts;
1160                 }
1161         }
1162
1163         ret = igt_live_test_begin(&live, i915, __func__, "");
1164         if (ret)
1165                 goto out_contexts;
1166
1167         for_each_engine(engine, i915, id) {
1168                 t[id] = t[0];
1169                 t[id].engine = engine;
1170                 t[id].max_batch = max_batches(t[0].contexts[0], engine);
1171                 if (t[id].max_batch < 0) {
1172                         ret = t[id].max_batch;
1173                         mutex_unlock(&i915->drm.struct_mutex);
1174                         goto out_flush;
1175                 }
1176                 /* One ring interleaved between requests from all cpus */
1177                 t[id].max_batch /= num_online_cpus() + 1;
1178                 pr_debug("Limiting batches to %d requests on %s\n",
1179                          t[id].max_batch, engine->name);
1180
1181                 for (n = 0; n < ncpus; n++) {
1182                         struct task_struct *tsk;
1183
1184                         tsk = kthread_run(__igt_breadcrumbs_smoketest,
1185                                           &t[id], "igt/%d.%d", id, n);
1186                         if (IS_ERR(tsk)) {
1187                                 ret = PTR_ERR(tsk);
1188                                 mutex_unlock(&i915->drm.struct_mutex);
1189                                 goto out_flush;
1190                         }
1191
1192                         get_task_struct(tsk);
1193                         threads[id * ncpus + n] = tsk;
1194                 }
1195         }
1196         mutex_unlock(&i915->drm.struct_mutex);
1197
1198         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1199
1200 out_flush:
1201         num_waits = 0;
1202         num_fences = 0;
1203         for_each_engine(engine, i915, id) {
1204                 for (n = 0; n < ncpus; n++) {
1205                         struct task_struct *tsk = threads[id * ncpus + n];
1206                         int err;
1207
1208                         if (!tsk)
1209                                 continue;
1210
1211                         err = kthread_stop(tsk);
1212                         if (err < 0 && !ret)
1213                                 ret = err;
1214
1215                         put_task_struct(tsk);
1216                 }
1217
1218                 num_waits += atomic_long_read(&t[id].num_waits);
1219                 num_fences += atomic_long_read(&t[id].num_fences);
1220         }
1221         pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1222                 num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
1223
1224         mutex_lock(&i915->drm.struct_mutex);
1225         ret = igt_live_test_end(&live) ?: ret;
1226 out_contexts:
1227         mutex_unlock(&i915->drm.struct_mutex);
1228         kfree(t[0].contexts);
1229 out_threads:
1230         kfree(threads);
1231 out_file:
1232         mock_file_free(i915, file);
1233 out_rpm:
1234         intel_runtime_pm_put(i915, wakeref);
1235
1236         return ret;
1237 }
1238
1239 int i915_request_live_selftests(struct drm_i915_private *i915)
1240 {
1241         static const struct i915_subtest tests[] = {
1242                 SUBTEST(live_nop_request),
1243                 SUBTEST(live_all_engines),
1244                 SUBTEST(live_sequential_engines),
1245                 SUBTEST(live_empty_request),
1246                 SUBTEST(live_breadcrumbs_smoketest),
1247         };
1248
1249         if (i915_terminally_wedged(&i915->gpu_error))
1250                 return 0;
1251
1252         return i915_subtests(tests, i915);
1253 }