drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include <linux/sort.h>
   7
   8 #include "gt/intel_gt.h"
   9 #include "gt/intel_engine_user.h"
  10
  11 #include "i915_selftest.h"
  12
  13 #include "gem/i915_gem_context.h"
  14 #include "selftests/igt_flush_test.h"
  15 #include "selftests/i915_random.h"
  16 #include "selftests/mock_drm.h"
  17 #include "huge_gem_object.h"
  18 #include "mock_context.h"
  19
  20 static int wrap_ktime_compare(const void *A, const void *B)
  21 {
  22         const ktime_t *a = A, *b = B;
  23
  24         return ktime_compare(*a, *b);
  25 }
  26
  27 static int __perf_fill_blt(struct drm_i915_gem_object *obj)
  28 {
  29         struct drm_i915_private *i915 = to_i915(obj->base.dev);
  30         int inst = 0;
  31
  32         do {
  33                 struct intel_engine_cs *engine;
  34                 ktime_t t[5];
  35                 int pass;
  36                 int err;
  37
  38                 engine = intel_engine_lookup_user(i915,
  39                                                   I915_ENGINE_CLASS_COPY,
  40                                                   inst++);
  41                 if (!engine)
  42                         return 0;
  43
  44                 intel_engine_pm_get(engine);
  45                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
  46                         struct intel_context *ce = engine->kernel_context;
  47                         ktime_t t0, t1;
  48
  49                         t0 = ktime_get();
  50
  51                         err = i915_gem_object_fill_blt(obj, ce, 0);
  52                         if (err)
  53                                 break;
  54
  55                         err = i915_gem_object_wait(obj,
  56                                                    I915_WAIT_ALL,
  57                                                    MAX_SCHEDULE_TIMEOUT);
  58                         if (err)
  59                                 break;
  60
  61                         t1 = ktime_get();
  62                         t[pass] = ktime_sub(t1, t0);
  63                 }
  64                 intel_engine_pm_put(engine);
  65                 if (err)
  66                         return err;
  67
  68                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
  69                 pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
  70                         engine->name,
  71                         obj->base.size >> 10,
  72                         div64_u64(mul_u32_u32(4 * obj->base.size,
  73                                               1000 * 1000 * 1000),
  74                                   t[1] + 2 * t[2] + t[3]) >> 20);
  75         } while (1);
  76 }
  77
  78 static int perf_fill_blt(void *arg)
  79 {
  80         struct drm_i915_private *i915 = arg;
  81         static const unsigned long sizes[] = {
  82                 SZ_4K,
  83                 SZ_64K,
  84                 SZ_2M,
  85                 SZ_64M
  86         };
  87         int i;
  88
  89         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
  90                 struct drm_i915_gem_object *obj;
  91                 int err;
  92
  93                 obj = i915_gem_object_create_internal(i915, sizes[i]);
  94                 if (IS_ERR(obj))
  95                         return PTR_ERR(obj);
  96
  97                 err = __perf_fill_blt(obj);
  98                 i915_gem_object_put(obj);
  99                 if (err)
 100                         return err;
 101         }
 102
 103         return 0;
 104 }
 105
 106 static int __perf_copy_blt(struct drm_i915_gem_object *src,
 107                            struct drm_i915_gem_object *dst)
 108 {
 109         struct drm_i915_private *i915 = to_i915(src->base.dev);
 110         int inst = 0;
 111
 112         do {
 113                 struct intel_engine_cs *engine;
 114                 ktime_t t[5];
 115                 int pass;
 116                 int err = 0;
 117
 118                 engine = intel_engine_lookup_user(i915,
 119                                                   I915_ENGINE_CLASS_COPY,
 120                                                   inst++);
 121                 if (!engine)
 122                         return 0;
 123
 124                 intel_engine_pm_get(engine);
 125                 for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
 126                         struct intel_context *ce = engine->kernel_context;
 127                         ktime_t t0, t1;
 128
 129                         t0 = ktime_get();
 130
 131                         err = i915_gem_object_copy_blt(src, dst, ce);
 132                         if (err)
 133                                 break;
 134
 135                         err = i915_gem_object_wait(dst,
 136                                                    I915_WAIT_ALL,
 137                                                    MAX_SCHEDULE_TIMEOUT);
 138                         if (err)
 139                                 break;
 140
 141                         t1 = ktime_get();
 142                         t[pass] = ktime_sub(t1, t0);
 143                 }
 144                 intel_engine_pm_put(engine);
 145                 if (err)
 146                         return err;
 147
 148                 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
 149                 pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
 150                         engine->name,
 151                         src->base.size >> 10,
 152                         div64_u64(mul_u32_u32(4 * src->base.size,
 153                                               1000 * 1000 * 1000),
 154                                   t[1] + 2 * t[2] + t[3]) >> 20);
 155         } while (1);
 156 }
 157
 158 static int perf_copy_blt(void *arg)
 159 {
 160         struct drm_i915_private *i915 = arg;
 161         static const unsigned long sizes[] = {
 162                 SZ_4K,
 163                 SZ_64K,
 164                 SZ_2M,
 165                 SZ_64M
 166         };
 167         int i;
 168
 169         for (i = 0; i < ARRAY_SIZE(sizes); i++) {
 170                 struct drm_i915_gem_object *src, *dst;
 171                 int err;
 172
 173                 src = i915_gem_object_create_internal(i915, sizes[i]);
 174                 if (IS_ERR(src))
 175                         return PTR_ERR(src);
 176
 177                 dst = i915_gem_object_create_internal(i915, sizes[i]);
 178                 if (IS_ERR(dst)) {
 179                         err = PTR_ERR(dst);
 180                         goto err_src;
 181                 }
 182
 183                 err = __perf_copy_blt(src, dst);
 184
 185                 i915_gem_object_put(dst);
 186 err_src:
 187                 i915_gem_object_put(src);
 188                 if (err)
 189                         return err;
 190         }
 191
 192         return 0;
 193 }
 194
 195 struct igt_thread_arg {
 196         struct drm_i915_private *i915;
 197         struct i915_gem_context *ctx;
 198         struct file *file;
 199         struct rnd_state prng;
 200         unsigned int n_cpus;
 201 };
 202
 203 static int igt_fill_blt_thread(void *arg)
 204 {
 205         struct igt_thread_arg *thread = arg;
 206         struct drm_i915_private *i915 = thread->i915;
 207         struct rnd_state *prng = &thread->prng;
 208         struct drm_i915_gem_object *obj;
 209         struct i915_gem_context *ctx;
 210         struct intel_context *ce;
 211         unsigned int prio;
 212         IGT_TIMEOUT(end);
 213         int err;
 214
 215         ctx = thread->ctx;
 216         if (!ctx) {
 217                 ctx = live_context(i915, thread->file);
 218                 if (IS_ERR(ctx))
 219                         return PTR_ERR(ctx);
 220
 221                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 222                 ctx->sched.priority = I915_USER_PRIORITY(prio);
 223         }
 224
 225         ce = i915_gem_context_get_engine(ctx, BCS0);
 226         GEM_BUG_ON(IS_ERR(ce));
 227
 228         do {
 229                 const u32 max_block_size = S16_MAX * PAGE_SIZE;
 230                 u32 val = prandom_u32_state(prng);
 231                 u64 total = ce->vm->total;
 232                 u32 phys_sz;
 233                 u32 sz;
 234                 u32 *vaddr;
 235                 u32 i;
 236
 237                 /*
 238                  * If we have a tiny shared address space, like for the GGTT
 239                  * then we can't be too greedy.
 240                  */
 241                 if (i915_is_ggtt(ce->vm))
 242                         total = div64_u64(total, thread->n_cpus);
 243
 244                 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
 245                 phys_sz = sz % (max_block_size + 1);
 246
 247                 sz = round_up(sz, PAGE_SIZE);
 248                 phys_sz = round_up(phys_sz, PAGE_SIZE);
 249
 250                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 251                          phys_sz, sz, val);
 252
 253                 obj = huge_gem_object(i915, phys_sz, sz);
 254                 if (IS_ERR(obj)) {
 255                         err = PTR_ERR(obj);
 256                         goto err_flush;
 257                 }
 258
 259                 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 260                 if (IS_ERR(vaddr)) {
 261                         err = PTR_ERR(vaddr);
 262                         goto err_put;
 263                 }
 264
 265                 /*
 266                  * Make sure the potentially async clflush does its job, if
 267                  * required.
 268                  */
 269                 memset32(vaddr, val ^ 0xdeadbeaf,
 270                          huge_gem_object_phys_size(obj) / sizeof(u32));
 271
 272                 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 273                         obj->cache_dirty = true;
 274
 275                 err = i915_gem_object_fill_blt(obj, ce, val);
 276                 if (err)
 277                         goto err_unpin;
 278
 279                 i915_gem_object_lock(obj);
 280                 err = i915_gem_object_set_to_cpu_domain(obj, false);
 281                 i915_gem_object_unlock(obj);
 282                 if (err)
 283                         goto err_unpin;
 284
 285                 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) {
 286                         if (vaddr[i] != val) {
 287                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
 288                                        vaddr[i], val);
 289                                 err = -EINVAL;
 290                                 goto err_unpin;
 291                         }
 292                 }
 293
 294                 i915_gem_object_unpin_map(obj);
 295                 i915_gem_object_put(obj);
 296         } while (!time_after(jiffies, end));
 297
 298         goto err_flush;
 299
 300 err_unpin:
 301         i915_gem_object_unpin_map(obj);
 302 err_put:
 303         i915_gem_object_put(obj);
 304 err_flush:
 305         if (err == -ENOMEM)
 306                 err = 0;
 307
 308         intel_context_put(ce);
 309         return err;
 310 }
 311
 312 static int igt_copy_blt_thread(void *arg)
 313 {
 314         struct igt_thread_arg *thread = arg;
 315         struct drm_i915_private *i915 = thread->i915;
 316         struct rnd_state *prng = &thread->prng;
 317         struct drm_i915_gem_object *src, *dst;
 318         struct i915_gem_context *ctx;
 319         struct intel_context *ce;
 320         unsigned int prio;
 321         IGT_TIMEOUT(end);
 322         int err;
 323
 324         ctx = thread->ctx;
 325         if (!ctx) {
 326                 ctx = live_context(i915, thread->file);
 327                 if (IS_ERR(ctx))
 328                         return PTR_ERR(ctx);
 329
 330                 prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng);
 331                 ctx->sched.priority = I915_USER_PRIORITY(prio);
 332         }
 333
 334         ce = i915_gem_context_get_engine(ctx, BCS0);
 335         GEM_BUG_ON(IS_ERR(ce));
 336
 337         do {
 338                 const u32 max_block_size = S16_MAX * PAGE_SIZE;
 339                 u32 val = prandom_u32_state(prng);
 340                 u64 total = ce->vm->total;
 341                 u32 phys_sz;
 342                 u32 sz;
 343                 u32 *vaddr;
 344                 u32 i;
 345
 346                 if (i915_is_ggtt(ce->vm))
 347                         total = div64_u64(total, thread->n_cpus);
 348
 349                 sz = min_t(u64, total >> 4, prandom_u32_state(prng));
 350                 phys_sz = sz % (max_block_size + 1);
 351
 352                 sz = round_up(sz, PAGE_SIZE);
 353                 phys_sz = round_up(phys_sz, PAGE_SIZE);
 354
 355                 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__,
 356                          phys_sz, sz, val);
 357
 358                 src = huge_gem_object(i915, phys_sz, sz);
 359                 if (IS_ERR(src)) {
 360                         err = PTR_ERR(src);
 361                         goto err_flush;
 362                 }
 363
 364                 vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
 365                 if (IS_ERR(vaddr)) {
 366                         err = PTR_ERR(vaddr);
 367                         goto err_put_src;
 368                 }
 369
 370                 memset32(vaddr, val,
 371                          huge_gem_object_phys_size(src) / sizeof(u32));
 372
 373                 i915_gem_object_unpin_map(src);
 374
 375                 if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 376                         src->cache_dirty = true;
 377
 378                 dst = huge_gem_object(i915, phys_sz, sz);
 379                 if (IS_ERR(dst)) {
 380                         err = PTR_ERR(dst);
 381                         goto err_put_src;
 382                 }
 383
 384                 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
 385                 if (IS_ERR(vaddr)) {
 386                         err = PTR_ERR(vaddr);
 387                         goto err_put_dst;
 388                 }
 389
 390                 memset32(vaddr, val ^ 0xdeadbeaf,
 391                          huge_gem_object_phys_size(dst) / sizeof(u32));
 392
 393                 if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 394                         dst->cache_dirty = true;
 395
 396                 err = i915_gem_object_copy_blt(src, dst, ce);
 397                 if (err)
 398                         goto err_unpin;
 399
 400                 i915_gem_object_lock(dst);
 401                 err = i915_gem_object_set_to_cpu_domain(dst, false);
 402                 i915_gem_object_unlock(dst);
 403                 if (err)
 404                         goto err_unpin;
 405
 406                 for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); ++i) {
 407                         if (vaddr[i] != val) {
 408                                 pr_err("vaddr[%u]=%x, expected=%x\n", i,
 409                                        vaddr[i], val);
 410                                 err = -EINVAL;
 411                                 goto err_unpin;
 412                         }
 413                 }
 414
 415                 i915_gem_object_unpin_map(dst);
 416
 417                 i915_gem_object_put(src);
 418                 i915_gem_object_put(dst);
 419         } while (!time_after(jiffies, end));
 420
 421         goto err_flush;
 422
 423 err_unpin:
 424         i915_gem_object_unpin_map(dst);
 425 err_put_dst:
 426         i915_gem_object_put(dst);
 427 err_put_src:
 428         i915_gem_object_put(src);
 429 err_flush:
 430         if (err == -ENOMEM)
 431                 err = 0;
 432
 433         intel_context_put(ce);
 434         return err;
 435 }
 436
 437 static int igt_threaded_blt(struct drm_i915_private *i915,
 438                             int (*blt_fn)(void *arg),
 439                             unsigned int flags)
 440 #define SINGLE_CTX BIT(0)
 441 {
 442         struct igt_thread_arg *thread;
 443         struct task_struct **tsk;
 444         unsigned int n_cpus, i;
 445         I915_RND_STATE(prng);
 446         int err = 0;
 447
 448         n_cpus = num_online_cpus() + 1;
 449
 450         tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL);
 451         if (!tsk)
 452                 return 0;
 453
 454         thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL);
 455         if (!thread)
 456                 goto out_tsk;
 457
 458         thread[0].file = mock_file(i915);
 459         if (IS_ERR(thread[0].file)) {
 460                 err = PTR_ERR(thread[0].file);
 461                 goto out_thread;
 462         }
 463
 464         if (flags & SINGLE_CTX) {
 465                 thread[0].ctx = live_context(i915, thread[0].file);
 466                 if (IS_ERR(thread[0].ctx)) {
 467                         err = PTR_ERR(thread[0].ctx);
 468                         goto out_file;
 469                 }
 470         }
 471
 472         for (i = 0; i < n_cpus; ++i) {
 473                 thread[i].i915 = i915;
 474                 thread[i].file = thread[0].file;
 475                 thread[i].ctx = thread[0].ctx;
 476                 thread[i].n_cpus = n_cpus;
 477                 thread[i].prng =
 478                         I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
 479
 480                 tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i);
 481                 if (IS_ERR(tsk[i])) {
 482                         err = PTR_ERR(tsk[i]);
 483                         break;
 484                 }
 485
 486                 get_task_struct(tsk[i]);
 487         }
 488
 489         yield(); /* start all threads before we kthread_stop() */
 490
 491         for (i = 0; i < n_cpus; ++i) {
 492                 int status;
 493
 494                 if (IS_ERR_OR_NULL(tsk[i]))
 495                         continue;
 496
 497                 status = kthread_stop(tsk[i]);
 498                 if (status && !err)
 499                         err = status;
 500
 501                 put_task_struct(tsk[i]);
 502         }
 503
 504 out_file:
 505         fput(thread[0].file);
 506 out_thread:
 507         kfree(thread);
 508 out_tsk:
 509         kfree(tsk);
 510         return err;
 511 }
 512
 513 static int igt_fill_blt(void *arg)
 514 {
 515         return igt_threaded_blt(arg, igt_fill_blt_thread, 0);
 516 }
 517
 518 static int igt_fill_blt_ctx0(void *arg)
 519 {
 520         return igt_threaded_blt(arg, igt_fill_blt_thread, SINGLE_CTX);
 521 }
 522
 523 static int igt_copy_blt(void *arg)
 524 {
 525         return igt_threaded_blt(arg, igt_copy_blt_thread, 0);
 526 }
 527
 528 static int igt_copy_blt_ctx0(void *arg)
 529 {
 530         return igt_threaded_blt(arg, igt_copy_blt_thread, SINGLE_CTX);
 531 }
 532
 533 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 534 {
 535         static const struct i915_subtest tests[] = {
 536                 SUBTEST(igt_fill_blt),
 537                 SUBTEST(igt_fill_blt_ctx0),
 538                 SUBTEST(igt_copy_blt),
 539                 SUBTEST(igt_copy_blt_ctx0),
 540         };
 541
 542         if (intel_gt_is_wedged(&i915->gt))
 543                 return 0;
 544
 545         if (!HAS_ENGINE(i915, BCS0))
 546                 return 0;
 547
 548         return i915_live_subtests(tests, i915);
 549 }
 550
 551 int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915)
 552 {
 553         static const struct i915_subtest tests[] = {
 554                 SUBTEST(perf_fill_blt),
 555                 SUBTEST(perf_copy_blt),
 556         };
 557
 558         if (intel_gt_is_wedged(&i915->gt))
 559                 return 0;
 560
 561         return i915_live_subtests(tests, i915);
 562 }