Merge tag 'usb-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gt / selftest_engine_cs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5
6 #include <linux/sort.h>
7
8 #include "intel_gpu_commands.h"
9 #include "intel_gt_pm.h"
10 #include "intel_rps.h"
11
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14
15 #define COUNT 5
16
17 static int cmp_u32(const void *A, const void *B)
18 {
19         const u32 *a = A, *b = B;
20
21         return *a - *b;
22 }
23
24 static void perf_begin(struct intel_gt *gt)
25 {
26         intel_gt_pm_get(gt);
27
28         /* Boost gpufreq to max [waitboost] and keep it fixed */
29         atomic_inc(&gt->rps.num_waiters);
30         queue_work(gt->i915->unordered_wq, &gt->rps.work);
31         flush_work(&gt->rps.work);
32 }
33
34 static int perf_end(struct intel_gt *gt)
35 {
36         atomic_dec(&gt->rps.num_waiters);
37         intel_gt_pm_put(gt);
38
39         return igt_flush_test(gt->i915);
40 }
41
42 static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
43 {
44         struct drm_i915_private *i915 = engine->i915;
45
46         if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
47                 return RING_TIMESTAMP_UDW(engine->mmio_base);
48         else
49                 return RING_TIMESTAMP(engine->mmio_base);
50 }
51
52 static int write_timestamp(struct i915_request *rq, int slot)
53 {
54         struct intel_timeline *tl =
55                 rcu_dereference_protected(rq->timeline,
56                                           !i915_request_signaled(rq));
57         u32 cmd;
58         u32 *cs;
59
60         cs = intel_ring_begin(rq, 4);
61         if (IS_ERR(cs))
62                 return PTR_ERR(cs);
63
64         cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
65         if (GRAPHICS_VER(rq->engine->i915) >= 8)
66                 cmd++;
67         *cs++ = cmd;
68         *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
69         *cs++ = tl->hwsp_offset + slot * sizeof(u32);
70         *cs++ = 0;
71
72         intel_ring_advance(rq, cs);
73
74         return 0;
75 }
76
77 static struct i915_vma *create_empty_batch(struct intel_context *ce)
78 {
79         struct drm_i915_gem_object *obj;
80         struct i915_vma *vma;
81         u32 *cs;
82         int err;
83
84         obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
85         if (IS_ERR(obj))
86                 return ERR_CAST(obj);
87
88         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
89         if (IS_ERR(cs)) {
90                 err = PTR_ERR(cs);
91                 goto err_put;
92         }
93
94         cs[0] = MI_BATCH_BUFFER_END;
95
96         i915_gem_object_flush_map(obj);
97
98         vma = i915_vma_instance(obj, ce->vm, NULL);
99         if (IS_ERR(vma)) {
100                 err = PTR_ERR(vma);
101                 goto err_unpin;
102         }
103
104         err = i915_vma_pin(vma, 0, 0, PIN_USER);
105         if (err)
106                 goto err_unpin;
107
108         i915_gem_object_unpin_map(obj);
109         return vma;
110
111 err_unpin:
112         i915_gem_object_unpin_map(obj);
113 err_put:
114         i915_gem_object_put(obj);
115         return ERR_PTR(err);
116 }
117
118 static u32 trifilter(u32 *a)
119 {
120         u64 sum;
121
122         sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
123
124         sum = mul_u32_u32(a[2], 2);
125         sum += a[1];
126         sum += a[3];
127
128         return sum >> 2;
129 }
130
131 static int perf_mi_bb_start(void *arg)
132 {
133         struct intel_gt *gt = arg;
134         struct intel_engine_cs *engine;
135         enum intel_engine_id id;
136         int err = 0;
137
138         if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
139                 return 0;
140
141         perf_begin(gt);
142         for_each_engine(engine, gt, id) {
143                 struct intel_context *ce = engine->kernel_context;
144                 struct i915_vma *batch;
145                 u32 cycles[COUNT];
146                 int i;
147
148                 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
149                         continue;
150
151                 intel_engine_pm_get(engine);
152
153                 batch = create_empty_batch(ce);
154                 if (IS_ERR(batch)) {
155                         err = PTR_ERR(batch);
156                         intel_engine_pm_put(engine);
157                         break;
158                 }
159
160                 err = i915_vma_sync(batch);
161                 if (err) {
162                         intel_engine_pm_put(engine);
163                         i915_vma_put(batch);
164                         break;
165                 }
166
167                 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
168                         struct i915_request *rq;
169
170                         rq = i915_request_create(ce);
171                         if (IS_ERR(rq)) {
172                                 err = PTR_ERR(rq);
173                                 break;
174                         }
175
176                         err = write_timestamp(rq, 2);
177                         if (err)
178                                 goto out;
179
180                         err = rq->engine->emit_bb_start(rq,
181                                                         i915_vma_offset(batch), 8,
182                                                         0);
183                         if (err)
184                                 goto out;
185
186                         err = write_timestamp(rq, 3);
187                         if (err)
188                                 goto out;
189
190 out:
191                         i915_request_get(rq);
192                         i915_request_add(rq);
193
194                         if (i915_request_wait(rq, 0, HZ / 5) < 0)
195                                 err = -EIO;
196                         i915_request_put(rq);
197                         if (err)
198                                 break;
199
200                         cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
201                 }
202                 i915_vma_put(batch);
203                 intel_engine_pm_put(engine);
204                 if (err)
205                         break;
206
207                 pr_info("%s: MI_BB_START cycles: %u\n",
208                         engine->name, trifilter(cycles));
209         }
210         if (perf_end(gt))
211                 err = -EIO;
212
213         return err;
214 }
215
216 static struct i915_vma *create_nop_batch(struct intel_context *ce)
217 {
218         struct drm_i915_gem_object *obj;
219         struct i915_vma *vma;
220         u32 *cs;
221         int err;
222
223         obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
224         if (IS_ERR(obj))
225                 return ERR_CAST(obj);
226
227         cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
228         if (IS_ERR(cs)) {
229                 err = PTR_ERR(cs);
230                 goto err_put;
231         }
232
233         memset(cs, 0, SZ_64K);
234         cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
235
236         i915_gem_object_flush_map(obj);
237
238         vma = i915_vma_instance(obj, ce->vm, NULL);
239         if (IS_ERR(vma)) {
240                 err = PTR_ERR(vma);
241                 goto err_unpin;
242         }
243
244         err = i915_vma_pin(vma, 0, 0, PIN_USER);
245         if (err)
246                 goto err_unpin;
247
248         i915_gem_object_unpin_map(obj);
249         return vma;
250
251 err_unpin:
252         i915_gem_object_unpin_map(obj);
253 err_put:
254         i915_gem_object_put(obj);
255         return ERR_PTR(err);
256 }
257
258 static int perf_mi_noop(void *arg)
259 {
260         struct intel_gt *gt = arg;
261         struct intel_engine_cs *engine;
262         enum intel_engine_id id;
263         int err = 0;
264
265         if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
266                 return 0;
267
268         perf_begin(gt);
269         for_each_engine(engine, gt, id) {
270                 struct intel_context *ce = engine->kernel_context;
271                 struct i915_vma *base, *nop;
272                 u32 cycles[COUNT];
273                 int i;
274
275                 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
276                         continue;
277
278                 intel_engine_pm_get(engine);
279
280                 base = create_empty_batch(ce);
281                 if (IS_ERR(base)) {
282                         err = PTR_ERR(base);
283                         intel_engine_pm_put(engine);
284                         break;
285                 }
286
287                 err = i915_vma_sync(base);
288                 if (err) {
289                         i915_vma_put(base);
290                         intel_engine_pm_put(engine);
291                         break;
292                 }
293
294                 nop = create_nop_batch(ce);
295                 if (IS_ERR(nop)) {
296                         err = PTR_ERR(nop);
297                         i915_vma_put(base);
298                         intel_engine_pm_put(engine);
299                         break;
300                 }
301
302                 err = i915_vma_sync(nop);
303                 if (err) {
304                         i915_vma_put(nop);
305                         i915_vma_put(base);
306                         intel_engine_pm_put(engine);
307                         break;
308                 }
309
310                 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
311                         struct i915_request *rq;
312
313                         rq = i915_request_create(ce);
314                         if (IS_ERR(rq)) {
315                                 err = PTR_ERR(rq);
316                                 break;
317                         }
318
319                         err = write_timestamp(rq, 2);
320                         if (err)
321                                 goto out;
322
323                         err = rq->engine->emit_bb_start(rq,
324                                                         i915_vma_offset(base), 8,
325                                                         0);
326                         if (err)
327                                 goto out;
328
329                         err = write_timestamp(rq, 3);
330                         if (err)
331                                 goto out;
332
333                         err = rq->engine->emit_bb_start(rq,
334                                                         i915_vma_offset(nop),
335                                                         i915_vma_size(nop),
336                                                         0);
337                         if (err)
338                                 goto out;
339
340                         err = write_timestamp(rq, 4);
341                         if (err)
342                                 goto out;
343
344 out:
345                         i915_request_get(rq);
346                         i915_request_add(rq);
347
348                         if (i915_request_wait(rq, 0, HZ / 5) < 0)
349                                 err = -EIO;
350                         i915_request_put(rq);
351                         if (err)
352                                 break;
353
354                         cycles[i] =
355                                 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
356                                 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
357                 }
358                 i915_vma_put(nop);
359                 i915_vma_put(base);
360                 intel_engine_pm_put(engine);
361                 if (err)
362                         break;
363
364                 pr_info("%s: 16K MI_NOOP cycles: %u\n",
365                         engine->name, trifilter(cycles));
366         }
367         if (perf_end(gt))
368                 err = -EIO;
369
370         return err;
371 }
372
373 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
374 {
375         static const struct i915_subtest tests[] = {
376                 SUBTEST(perf_mi_bb_start),
377                 SUBTEST(perf_mi_noop),
378         };
379
380         if (intel_gt_is_wedged(to_gt(i915)))
381                 return 0;
382
383         return intel_gt_live_subtests(tests, to_gt(i915));
384 }
385
386 static int intel_mmio_bases_check(void *arg)
387 {
388         int i, j;
389
390         for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
391                 const struct engine_info *info = &intel_engines[i];
392                 u8 prev = U8_MAX;
393
394                 for (j = 0; j < MAX_MMIO_BASES; j++) {
395                         u8 ver = info->mmio_bases[j].graphics_ver;
396                         u32 base = info->mmio_bases[j].base;
397
398                         if (ver >= prev) {
399                                 pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n",
400                                        __func__,
401                                        intel_engine_class_repr(info->class),
402                                        info->class, info->instance,
403                                        prev, ver);
404                                 return -EINVAL;
405                         }
406
407                         if (ver == 0)
408                                 break;
409
410                         if (!base) {
411                                 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n",
412                                        __func__,
413                                        intel_engine_class_repr(info->class),
414                                        info->class, info->instance,
415                                        base, ver, j);
416                                 return -EINVAL;
417                         }
418
419                         prev = ver;
420                 }
421
422                 pr_debug("%s: min graphics version supported for %s%d is %u\n",
423                          __func__,
424                          intel_engine_class_repr(info->class),
425                          info->instance,
426                          prev);
427         }
428
429         return 0;
430 }
431
432 int intel_engine_cs_mock_selftests(void)
433 {
434         static const struct i915_subtest tests[] = {
435                 SUBTEST(intel_mmio_bases_check),
436         };
437
438         return i915_subtests(tests, NULL);
439 }