drivers/gpu/drm/i915/i915_scheduler.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2018 Intel Corporation
   5  */
   6
   7 #include <linux/mutex.h>
   8
   9 #include "i915_drv.h"
  10 #include "i915_globals.h"
  11 #include "i915_request.h"
  12 #include "i915_scheduler.h"
  13
  14 static struct i915_global_scheduler {
  15         struct i915_global base;
  16         struct kmem_cache *slab_dependencies;
  17         struct kmem_cache *slab_priorities;
  18 } global;
  19
  20 static DEFINE_SPINLOCK(schedule_lock);
  21
  22 static const struct i915_request *
  23 node_to_request(const struct i915_sched_node *node)
  24 {
  25         return container_of(node, const struct i915_request, sched);
  26 }
  27
  28 static inline bool node_started(const struct i915_sched_node *node)
  29 {
  30         return i915_request_started(node_to_request(node));
  31 }
  32
  33 static inline bool node_signaled(const struct i915_sched_node *node)
  34 {
  35         return i915_request_completed(node_to_request(node));
  36 }
  37
  38 void i915_sched_node_init(struct i915_sched_node *node)
  39 {
  40         INIT_LIST_HEAD(&node->signalers_list);
  41         INIT_LIST_HEAD(&node->waiters_list);
  42         INIT_LIST_HEAD(&node->link);
  43         node->attr.priority = I915_PRIORITY_INVALID;
  44         node->semaphores = 0;
  45         node->flags = 0;
  46 }
  47
  48 static struct i915_dependency *
  49 i915_dependency_alloc(void)
  50 {
  51         return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
  52 }
  53
  54 static void
  55 i915_dependency_free(struct i915_dependency *dep)
  56 {
  57         kmem_cache_free(global.slab_dependencies, dep);
  58 }
  59
  60 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
  61                                       struct i915_sched_node *signal,
  62                                       struct i915_dependency *dep,
  63                                       unsigned long flags)
  64 {
  65         bool ret = false;
  66
  67         spin_lock_irq(&schedule_lock);
  68
  69         if (!node_signaled(signal)) {
  70                 INIT_LIST_HEAD(&dep->dfs_link);
  71                 list_add(&dep->wait_link, &signal->waiters_list);
  72                 list_add(&dep->signal_link, &node->signalers_list);
  73                 dep->signaler = signal;
  74                 dep->flags = flags;
  75
  76                 /* Keep track of whether anyone on this chain has a semaphore */
  77                 if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
  78                     !node_started(signal))
  79                         node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
  80
  81                 ret = true;
  82         }
  83
  84         spin_unlock_irq(&schedule_lock);
  85
  86         return ret;
  87 }
  88
  89 int i915_sched_node_add_dependency(struct i915_sched_node *node,
  90                                    struct i915_sched_node *signal)
  91 {
  92         struct i915_dependency *dep;
  93
  94         dep = i915_dependency_alloc();
  95         if (!dep)
  96                 return -ENOMEM;
  97
  98         if (!__i915_sched_node_add_dependency(node, signal, dep,
  99                                               I915_DEPENDENCY_ALLOC))
 100                 i915_dependency_free(dep);
 101
 102         return 0;
 103 }
 104
 105 void i915_sched_node_fini(struct i915_sched_node *node)
 106 {
 107         struct i915_dependency *dep, *tmp;
 108
 109         GEM_BUG_ON(!list_empty(&node->link));
 110
 111         spin_lock_irq(&schedule_lock);
 112
 113         /*
 114          * Everyone we depended upon (the fences we wait to be signaled)
 115          * should retire before us and remove themselves from our list.
 116          * However, retirement is run independently on each timeline and
 117          * so we may be called out-of-order.
 118          */
 119         list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
 120                 GEM_BUG_ON(!node_signaled(dep->signaler));
 121                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
 122
 123                 list_del(&dep->wait_link);
 124                 if (dep->flags & I915_DEPENDENCY_ALLOC)
 125                         i915_dependency_free(dep);
 126         }
 127
 128         /* Remove ourselves from everyone who depends upon us */
 129         list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
 130                 GEM_BUG_ON(dep->signaler != node);
 131                 GEM_BUG_ON(!list_empty(&dep->dfs_link));
 132
 133                 list_del(&dep->signal_link);
 134                 if (dep->flags & I915_DEPENDENCY_ALLOC)
 135                         i915_dependency_free(dep);
 136         }
 137
 138         spin_unlock_irq(&schedule_lock);
 139 }
 140
 141 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 142 {
 143         return rb_entry(rb, struct i915_priolist, node);
 144 }
 145
 146 static void assert_priolists(struct intel_engine_execlists * const execlists)
 147 {
 148         struct rb_node *rb;
 149         long last_prio, i;
 150
 151         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 152                 return;
 153
 154         GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
 155                    rb_first(&execlists->queue.rb_root));
 156
 157         last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
 158         for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
 159                 const struct i915_priolist *p = to_priolist(rb);
 160
 161                 GEM_BUG_ON(p->priority >= last_prio);
 162                 last_prio = p->priority;
 163
 164                 GEM_BUG_ON(!p->used);
 165                 for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
 166                         if (list_empty(&p->requests[i]))
 167                                 continue;
 168
 169                         GEM_BUG_ON(!(p->used & BIT(i)));
 170                 }
 171         }
 172 }
 173
 174 struct list_head *
 175 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 176 {
 177         struct intel_engine_execlists * const execlists = &engine->execlists;
 178         struct i915_priolist *p;
 179         struct rb_node **parent, *rb;
 180         bool first = true;
 181         int idx, i;
 182
 183         lockdep_assert_held(&engine->timeline.lock);
 184         assert_priolists(execlists);
 185
 186         /* buckets sorted from highest [in slot 0] to lowest priority */
 187         idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
 188         prio >>= I915_USER_PRIORITY_SHIFT;
 189         if (unlikely(execlists->no_priolist))
 190                 prio = I915_PRIORITY_NORMAL;
 191
 192 find_priolist:
 193         /* most positive priority is scheduled first, equal priorities fifo */
 194         rb = NULL;
 195         parent = &execlists->queue.rb_root.rb_node;
 196         while (*parent) {
 197                 rb = *parent;
 198                 p = to_priolist(rb);
 199                 if (prio > p->priority) {
 200                         parent = &rb->rb_left;
 201                 } else if (prio < p->priority) {
 202                         parent = &rb->rb_right;
 203                         first = false;
 204                 } else {
 205                         goto out;
 206                 }
 207         }
 208
 209         if (prio == I915_PRIORITY_NORMAL) {
 210                 p = &execlists->default_priolist;
 211         } else {
 212                 p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
 213                 /* Convert an allocation failure to a priority bump */
 214                 if (unlikely(!p)) {
 215                         prio = I915_PRIORITY_NORMAL; /* recurses just once */
 216
 217                         /* To maintain ordering with all rendering, after an
 218                          * allocation failure we have to disable all scheduling.
 219                          * Requests will then be executed in fifo, and schedule
 220                          * will ensure that dependencies are emitted in fifo.
 221                          * There will be still some reordering with existing
 222                          * requests, so if userspace lied about their
 223                          * dependencies that reordering may be visible.
 224                          */
 225                         execlists->no_priolist = true;
 226                         goto find_priolist;
 227                 }
 228         }
 229
 230         p->priority = prio;
 231         for (i = 0; i < ARRAY_SIZE(p->requests); i++)
 232                 INIT_LIST_HEAD(&p->requests[i]);
 233         rb_link_node(&p->node, rb, parent);
 234         rb_insert_color_cached(&p->node, &execlists->queue, first);
 235         p->used = 0;
 236
 237 out:
 238         p->used |= BIT(idx);
 239         return &p->requests[idx];
 240 }
 241
 242 struct sched_cache {
 243         struct list_head *priolist;
 244 };
 245
 246 static struct intel_engine_cs *
 247 sched_lock_engine(const struct i915_sched_node *node,
 248                   struct intel_engine_cs *locked,
 249                   struct sched_cache *cache)
 250 {
 251         struct intel_engine_cs *engine = node_to_request(node)->engine;
 252
 253         GEM_BUG_ON(!locked);
 254
 255         if (engine != locked) {
 256                 spin_unlock(&locked->timeline.lock);
 257                 memset(cache, 0, sizeof(*cache));
 258                 spin_lock(&engine->timeline.lock);
 259         }
 260
 261         return engine;
 262 }
 263
 264 static bool inflight(const struct i915_request *rq,
 265                      const struct intel_engine_cs *engine)
 266 {
 267         const struct i915_request *active;
 268
 269         if (!i915_request_is_active(rq))
 270                 return false;
 271
 272         active = port_request(engine->execlists.port);
 273         return active->hw_context == rq->hw_context;
 274 }
 275
 276 static void __i915_schedule(struct i915_request *rq,
 277                             const struct i915_sched_attr *attr)
 278 {
 279         struct intel_engine_cs *engine;
 280         struct i915_dependency *dep, *p;
 281         struct i915_dependency stack;
 282         const int prio = attr->priority;
 283         struct sched_cache cache;
 284         LIST_HEAD(dfs);
 285
 286         /* Needed in order to use the temporary link inside i915_dependency */
 287         lockdep_assert_held(&schedule_lock);
 288         GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 289
 290         if (i915_request_completed(rq))
 291                 return;
 292
 293         if (prio <= READ_ONCE(rq->sched.attr.priority))
 294                 return;
 295
 296         stack.signaler = &rq->sched;
 297         list_add(&stack.dfs_link, &dfs);
 298
 299         /*
 300          * Recursively bump all dependent priorities to match the new request.
 301          *
 302          * A naive approach would be to use recursion:
 303          * static void update_priorities(struct i915_sched_node *node, prio) {
 304          *      list_for_each_entry(dep, &node->signalers_list, signal_link)
 305          *              update_priorities(dep->signal, prio)
 306          *      queue_request(node);
 307          * }
 308          * but that may have unlimited recursion depth and so runs a very
 309          * real risk of overunning the kernel stack. Instead, we build
 310          * a flat list of all dependencies starting with the current request.
 311          * As we walk the list of dependencies, we add all of its dependencies
 312          * to the end of the list (this may include an already visited
 313          * request) and continue to walk onwards onto the new dependencies. The
 314          * end result is a topological list of requests in reverse order, the
 315          * last element in the list is the request we must execute first.
 316          */
 317         list_for_each_entry(dep, &dfs, dfs_link) {
 318                 struct i915_sched_node *node = dep->signaler;
 319
 320                 /* If we are already flying, we know we have no signalers */
 321                 if (node_started(node))
 322                         continue;
 323
 324                 /*
 325                  * Within an engine, there can be no cycle, but we may
 326                  * refer to the same dependency chain multiple times
 327                  * (redundant dependencies are not eliminated) and across
 328                  * engines.
 329                  */
 330                 list_for_each_entry(p, &node->signalers_list, signal_link) {
 331                         GEM_BUG_ON(p == dep); /* no cycles! */
 332
 333                         if (node_signaled(p->signaler))
 334                                 continue;
 335
 336                         if (prio > READ_ONCE(p->signaler->attr.priority))
 337                                 list_move_tail(&p->dfs_link, &dfs);
 338                 }
 339         }
 340
 341         /*
 342          * If we didn't need to bump any existing priorities, and we haven't
 343          * yet submitted this request (i.e. there is no potential race with
 344          * execlists_submit_request()), we can set our own priority and skip
 345          * acquiring the engine locks.
 346          */
 347         if (rq->sched.attr.priority == I915_PRIORITY_INVALID) {
 348                 GEM_BUG_ON(!list_empty(&rq->sched.link));
 349                 rq->sched.attr = *attr;
 350
 351                 if (stack.dfs_link.next == stack.dfs_link.prev)
 352                         return;
 353
 354                 __list_del_entry(&stack.dfs_link);
 355         }
 356
 357         memset(&cache, 0, sizeof(cache));
 358         engine = rq->engine;
 359         spin_lock(&engine->timeline.lock);
 360
 361         /* Fifo and depth-first replacement ensure our deps execute before us */
 362         list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 363                 struct i915_sched_node *node = dep->signaler;
 364
 365                 INIT_LIST_HEAD(&dep->dfs_link);
 366
 367                 engine = sched_lock_engine(node, engine, &cache);
 368                 lockdep_assert_held(&engine->timeline.lock);
 369
 370                 /* Recheck after acquiring the engine->timeline.lock */
 371                 if (prio <= node->attr.priority || node_signaled(node))
 372                         continue;
 373
 374                 node->attr.priority = prio;
 375                 if (!list_empty(&node->link)) {
 376                         if (!cache.priolist)
 377                                 cache.priolist =
 378                                         i915_sched_lookup_priolist(engine,
 379                                                                    prio);
 380                         list_move_tail(&node->link, cache.priolist);
 381                 } else {
 382                         /*
 383                          * If the request is not in the priolist queue because
 384                          * it is not yet runnable, then it doesn't contribute
 385                          * to our preemption decisions. On the other hand,
 386                          * if the request is on the HW, it too is not in the
 387                          * queue; but in that case we may still need to reorder
 388                          * the inflight requests.
 389                          */
 390                         if (!i915_sw_fence_done(&node_to_request(node)->submit))
 391                                 continue;
 392                 }
 393
 394                 if (prio <= engine->execlists.queue_priority_hint)
 395                         continue;
 396
 397                 engine->execlists.queue_priority_hint = prio;
 398
 399                 /*
 400                  * If we are already the currently executing context, don't
 401                  * bother evaluating if we should preempt ourselves.
 402                  */
 403                 if (inflight(node_to_request(node), engine))
 404                         continue;
 405
 406                 /* Defer (tasklet) submission until after all of our updates. */
 407                 tasklet_hi_schedule(&engine->execlists.tasklet);
 408         }
 409
 410         spin_unlock(&engine->timeline.lock);
 411 }
 412
 413 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 414 {
 415         spin_lock_irq(&schedule_lock);
 416         __i915_schedule(rq, attr);
 417         spin_unlock_irq(&schedule_lock);
 418 }
 419
 420 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 421 {
 422         struct i915_sched_attr attr;
 423         unsigned long flags;
 424
 425         GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
 426
 427         if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
 428                 return;
 429
 430         spin_lock_irqsave(&schedule_lock, flags);
 431
 432         attr = rq->sched.attr;
 433         attr.priority |= bump;
 434         __i915_schedule(rq, &attr);
 435
 436         spin_unlock_irqrestore(&schedule_lock, flags);
 437 }
 438
 439 void __i915_priolist_free(struct i915_priolist *p)
 440 {
 441         kmem_cache_free(global.slab_priorities, p);
 442 }
 443
 444 static void i915_global_scheduler_shrink(void)
 445 {
 446         kmem_cache_shrink(global.slab_dependencies);
 447         kmem_cache_shrink(global.slab_priorities);
 448 }
 449
 450 static void i915_global_scheduler_exit(void)
 451 {
 452         kmem_cache_destroy(global.slab_dependencies);
 453         kmem_cache_destroy(global.slab_priorities);
 454 }
 455
 456 static struct i915_global_scheduler global = { {
 457         .shrink = i915_global_scheduler_shrink,
 458         .exit = i915_global_scheduler_exit,
 459 } };
 460
 461 int __init i915_global_scheduler_init(void)
 462 {
 463         global.slab_dependencies = KMEM_CACHE(i915_dependency,
 464                                               SLAB_HWCACHE_ALIGN);
 465         if (!global.slab_dependencies)
 466                 return -ENOMEM;
 467
 468         global.slab_priorities = KMEM_CACHE(i915_priolist,
 469                                             SLAB_HWCACHE_ALIGN);
 470         if (!global.slab_priorities)
 471                 goto err_priorities;
 472
 473         i915_global_register(&global.base);
 474         return 0;
 475
 476 err_priorities:
 477         kmem_cache_destroy(global.slab_priorities);
 478         return -ENOMEM;
 479 }