drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include <drm/drmP.h>
  29 #include <drm/drm_vma_manager.h>
  30 #include <drm/i915_drm.h>
  31 #include "i915_drv.h"
  32 #include "i915_vgpu.h"
  33 #include "i915_trace.h"
  34 #include "intel_drv.h"
  35 #include "intel_frontbuffer.h"
  36 #include "intel_mocs.h"
  37 #include <linux/dma-fence-array.h>
  38 #include <linux/reservation.h>
  39 #include <linux/shmem_fs.h>
  40 #include <linux/slab.h>
  41 #include <linux/swap.h>
  42 #include <linux/pci.h>
  43 #include <linux/dma-buf.h>
  44
  45 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  46 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  47 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  48
  49 static bool cpu_cache_is_coherent(struct drm_device *dev,
  50                                   enum i915_cache_level level)
  51 {
  52         return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
  53 }
  54
  55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  56 {
  57         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
  58                 return false;
  59
  60         if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
  61                 return true;
  62
  63         return obj->pin_display;
  64 }
  65
  66 static int
  67 insert_mappable_node(struct i915_ggtt *ggtt,
  68                      struct drm_mm_node *node, u32 size)
  69 {
  70         memset(node, 0, sizeof(*node));
  71         return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
  72                                                    size, 0, -1,
  73                                                    0, ggtt->mappable_end,
  74                                                    DRM_MM_SEARCH_DEFAULT,
  75                                                    DRM_MM_CREATE_DEFAULT);
  76 }
  77
  78 static void
  79 remove_mappable_node(struct drm_mm_node *node)
  80 {
  81         drm_mm_remove_node(node);
  82 }
  83
  84 /* some bookkeeping */
  85 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  86                                   u64 size)
  87 {
  88         spin_lock(&dev_priv->mm.object_stat_lock);
  89         dev_priv->mm.object_count++;
  90         dev_priv->mm.object_memory += size;
  91         spin_unlock(&dev_priv->mm.object_stat_lock);
  92 }
  93
  94 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  95                                      u64 size)
  96 {
  97         spin_lock(&dev_priv->mm.object_stat_lock);
  98         dev_priv->mm.object_count--;
  99         dev_priv->mm.object_memory -= size;
 100         spin_unlock(&dev_priv->mm.object_stat_lock);
 101 }
 102
 103 static int
 104 i915_gem_wait_for_error(struct i915_gpu_error *error)
 105 {
 106         int ret;
 107
 108         might_sleep();
 109
 110         if (!i915_reset_in_progress(error))
 111                 return 0;
 112
 113         /*
 114          * Only wait 10 seconds for the gpu reset to complete to avoid hanging
 115          * userspace. If it takes that long something really bad is going on and
 116          * we should simply try to bail out and fail as gracefully as possible.
 117          */
 118         ret = wait_event_interruptible_timeout(error->reset_queue,
 119                                                !i915_reset_in_progress(error),
 120                                                I915_RESET_TIMEOUT);
 121         if (ret == 0) {
 122                 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
 123                 return -EIO;
 124         } else if (ret < 0) {
 125                 return ret;
 126         } else {
 127                 return 0;
 128         }
 129 }
 130
 131 int i915_mutex_lock_interruptible(struct drm_device *dev)
 132 {
 133         struct drm_i915_private *dev_priv = to_i915(dev);
 134         int ret;
 135
 136         ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 137         if (ret)
 138                 return ret;
 139
 140         ret = mutex_lock_interruptible(&dev->struct_mutex);
 141         if (ret)
 142                 return ret;
 143
 144         return 0;
 145 }
 146
 147 int
 148 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 149                             struct drm_file *file)
 150 {
 151         struct drm_i915_private *dev_priv = to_i915(dev);
 152         struct i915_ggtt *ggtt = &dev_priv->ggtt;
 153         struct drm_i915_gem_get_aperture *args = data;
 154         struct i915_vma *vma;
 155         size_t pinned;
 156
 157         pinned = 0;
 158         mutex_lock(&dev->struct_mutex);
 159         list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
 160                 if (i915_vma_is_pinned(vma))
 161                         pinned += vma->node.size;
 162         list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
 163                 if (i915_vma_is_pinned(vma))
 164                         pinned += vma->node.size;
 165         mutex_unlock(&dev->struct_mutex);
 166
 167         args->aper_size = ggtt->base.total;
 168         args->aper_available_size = args->aper_size - pinned;
 169
 170         return 0;
 171 }
 172
 173 static struct sg_table *
 174 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 175 {
 176         struct address_space *mapping = obj->base.filp->f_mapping;
 177         char *vaddr = obj->phys_handle->vaddr;
 178         struct sg_table *st;
 179         struct scatterlist *sg;
 180         int i;
 181
 182         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 183                 return ERR_PTR(-EINVAL);
 184
 185         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 186                 struct page *page;
 187                 char *src;
 188
 189                 page = shmem_read_mapping_page(mapping, i);
 190                 if (IS_ERR(page))
 191                         return ERR_CAST(page);
 192
 193                 src = kmap_atomic(page);
 194                 memcpy(vaddr, src, PAGE_SIZE);
 195                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
 196                 kunmap_atomic(src);
 197
 198                 put_page(page);
 199                 vaddr += PAGE_SIZE;
 200         }
 201
 202         i915_gem_chipset_flush(to_i915(obj->base.dev));
 203
 204         st = kmalloc(sizeof(*st), GFP_KERNEL);
 205         if (st == NULL)
 206                 return ERR_PTR(-ENOMEM);
 207
 208         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 209                 kfree(st);
 210                 return ERR_PTR(-ENOMEM);
 211         }
 212
 213         sg = st->sgl;
 214         sg->offset = 0;
 215         sg->length = obj->base.size;
 216
 217         sg_dma_address(sg) = obj->phys_handle->busaddr;
 218         sg_dma_len(sg) = obj->base.size;
 219
 220         return st;
 221 }
 222
 223 static void
 224 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 225                                 struct sg_table *pages)
 226 {
 227         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 228
 229         if (obj->mm.madv == I915_MADV_DONTNEED)
 230                 obj->mm.dirty = false;
 231
 232         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 233             !cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
 234                 drm_clflush_sg(pages);
 235
 236         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 237         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 238 }
 239
 240 static void
 241 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 242                                struct sg_table *pages)
 243 {
 244         __i915_gem_object_release_shmem(obj, pages);
 245
 246         if (obj->mm.dirty) {
 247                 struct address_space *mapping = obj->base.filp->f_mapping;
 248                 char *vaddr = obj->phys_handle->vaddr;
 249                 int i;
 250
 251                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 252                         struct page *page;
 253                         char *dst;
 254
 255                         page = shmem_read_mapping_page(mapping, i);
 256                         if (IS_ERR(page))
 257                                 continue;
 258
 259                         dst = kmap_atomic(page);
 260                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 261                         memcpy(dst, vaddr, PAGE_SIZE);
 262                         kunmap_atomic(dst);
 263
 264                         set_page_dirty(page);
 265                         if (obj->mm.madv == I915_MADV_WILLNEED)
 266                                 mark_page_accessed(page);
 267                         put_page(page);
 268                         vaddr += PAGE_SIZE;
 269                 }
 270                 obj->mm.dirty = false;
 271         }
 272
 273         sg_free_table(pages);
 274         kfree(pages);
 275 }
 276
 277 static void
 278 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 279 {
 280         drm_pci_free(obj->base.dev, obj->phys_handle);
 281         i915_gem_object_unpin_pages(obj);
 282 }
 283
 284 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 285         .get_pages = i915_gem_object_get_pages_phys,
 286         .put_pages = i915_gem_object_put_pages_phys,
 287         .release = i915_gem_object_release_phys,
 288 };
 289
 290 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 291 {
 292         struct i915_vma *vma;
 293         LIST_HEAD(still_in_list);
 294         int ret;
 295
 296         lockdep_assert_held(&obj->base.dev->struct_mutex);
 297
 298         /* Closed vma are removed from the obj->vma_list - but they may
 299          * still have an active binding on the object. To remove those we
 300          * must wait for all rendering to complete to the object (as unbinding
 301          * must anyway), and retire the requests.
 302          */
 303         ret = i915_gem_object_wait(obj,
 304                                    I915_WAIT_INTERRUPTIBLE |
 305                                    I915_WAIT_LOCKED |
 306                                    I915_WAIT_ALL,
 307                                    MAX_SCHEDULE_TIMEOUT,
 308                                    NULL);
 309         if (ret)
 310                 return ret;
 311
 312         i915_gem_retire_requests(to_i915(obj->base.dev));
 313
 314         while ((vma = list_first_entry_or_null(&obj->vma_list,
 315                                                struct i915_vma,
 316                                                obj_link))) {
 317                 list_move_tail(&vma->obj_link, &still_in_list);
 318                 ret = i915_vma_unbind(vma);
 319                 if (ret)
 320                         break;
 321         }
 322         list_splice(&still_in_list, &obj->vma_list);
 323
 324         return ret;
 325 }
 326
 327 static long
 328 i915_gem_object_wait_fence(struct dma_fence *fence,
 329                            unsigned int flags,
 330                            long timeout,
 331                            struct intel_rps_client *rps)
 332 {
 333         struct drm_i915_gem_request *rq;
 334
 335         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 336
 337         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 338                 return timeout;
 339
 340         if (!dma_fence_is_i915(fence))
 341                 return dma_fence_wait_timeout(fence,
 342                                               flags & I915_WAIT_INTERRUPTIBLE,
 343                                               timeout);
 344
 345         rq = to_request(fence);
 346         if (i915_gem_request_completed(rq))
 347                 goto out;
 348
 349         /* This client is about to stall waiting for the GPU. In many cases
 350          * this is undesirable and limits the throughput of the system, as
 351          * many clients cannot continue processing user input/output whilst
 352          * blocked. RPS autotuning may take tens of milliseconds to respond
 353          * to the GPU load and thus incurs additional latency for the client.
 354          * We can circumvent that by promoting the GPU frequency to maximum
 355          * before we wait. This makes the GPU throttle up much more quickly
 356          * (good for benchmarks and user experience, e.g. window animations),
 357          * but at a cost of spending more power processing the workload
 358          * (bad for battery). Not all clients even want their results
 359          * immediately and for them we should just let the GPU select its own
 360          * frequency to maximise efficiency. To prevent a single client from
 361          * forcing the clocks too high for the whole system, we only allow
 362          * each client to waitboost once in a busy period.
 363          */
 364         if (rps) {
 365                 if (INTEL_GEN(rq->i915) >= 6)
 366                         gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
 367                 else
 368                         rps = NULL;
 369         }
 370
 371         timeout = i915_wait_request(rq, flags, timeout);
 372
 373 out:
 374         if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 375                 i915_gem_request_retire_upto(rq);
 376
 377         if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) {
 378                 /* The GPU is now idle and this client has stalled.
 379                  * Since no other client has submitted a request in the
 380                  * meantime, assume that this client is the only one
 381                  * supplying work to the GPU but is unable to keep that
 382                  * work supplied because it is waiting. Since the GPU is
 383                  * then never kept fully busy, RPS autoclocking will
 384                  * keep the clocks relatively low, causing further delays.
 385                  * Compensate by giving the synchronous client credit for
 386                  * a waitboost next time.
 387                  */
 388                 spin_lock(&rq->i915->rps.client_lock);
 389                 list_del_init(&rps->link);
 390                 spin_unlock(&rq->i915->rps.client_lock);
 391         }
 392
 393         return timeout;
 394 }
 395
 396 static long
 397 i915_gem_object_wait_reservation(struct reservation_object *resv,
 398                                  unsigned int flags,
 399                                  long timeout,
 400                                  struct intel_rps_client *rps)
 401 {
 402         struct dma_fence *excl;
 403
 404         if (flags & I915_WAIT_ALL) {
 405                 struct dma_fence **shared;
 406                 unsigned int count, i;
 407                 int ret;
 408
 409                 ret = reservation_object_get_fences_rcu(resv,
 410                                                         &excl, &count, &shared);
 411                 if (ret)
 412                         return ret;
 413
 414                 for (i = 0; i < count; i++) {
 415                         timeout = i915_gem_object_wait_fence(shared[i],
 416                                                              flags, timeout,
 417                                                              rps);
 418                         if (timeout <= 0)
 419                                 break;
 420
 421                         dma_fence_put(shared[i]);
 422                 }
 423
 424                 for (; i < count; i++)
 425                         dma_fence_put(shared[i]);
 426                 kfree(shared);
 427         } else {
 428                 excl = reservation_object_get_excl_rcu(resv);
 429         }
 430
 431         if (excl && timeout > 0)
 432                 timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
 433
 434         dma_fence_put(excl);
 435
 436         return timeout;
 437 }
 438
 439 static void __fence_set_priority(struct dma_fence *fence, int prio)
 440 {
 441         struct drm_i915_gem_request *rq;
 442         struct intel_engine_cs *engine;
 443
 444         if (!dma_fence_is_i915(fence))
 445                 return;
 446
 447         rq = to_request(fence);
 448         engine = rq->engine;
 449         if (!engine->schedule)
 450                 return;
 451
 452         engine->schedule(rq, prio);
 453 }
 454
 455 static void fence_set_priority(struct dma_fence *fence, int prio)
 456 {
 457         /* Recurse once into a fence-array */
 458         if (dma_fence_is_array(fence)) {
 459                 struct dma_fence_array *array = to_dma_fence_array(fence);
 460                 int i;
 461
 462                 for (i = 0; i < array->num_fences; i++)
 463                         __fence_set_priority(array->fences[i], prio);
 464         } else {
 465                 __fence_set_priority(fence, prio);
 466         }
 467 }
 468
 469 int
 470 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 471                               unsigned int flags,
 472                               int prio)
 473 {
 474         struct dma_fence *excl;
 475
 476         if (flags & I915_WAIT_ALL) {
 477                 struct dma_fence **shared;
 478                 unsigned int count, i;
 479                 int ret;
 480
 481                 ret = reservation_object_get_fences_rcu(obj->resv,
 482                                                         &excl, &count, &shared);
 483                 if (ret)
 484                         return ret;
 485
 486                 for (i = 0; i < count; i++) {
 487                         fence_set_priority(shared[i], prio);
 488                         dma_fence_put(shared[i]);
 489                 }
 490
 491                 kfree(shared);
 492         } else {
 493                 excl = reservation_object_get_excl_rcu(obj->resv);
 494         }
 495
 496         if (excl) {
 497                 fence_set_priority(excl, prio);
 498                 dma_fence_put(excl);
 499         }
 500         return 0;
 501 }
 502
 503 /**
 504  * Waits for rendering to the object to be completed
 505  * @obj: i915 gem object
 506  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 507  * @timeout: how long to wait
 508  * @rps: client (user process) to charge for any waitboosting
 509  */
 510 int
 511 i915_gem_object_wait(struct drm_i915_gem_object *obj,
 512                      unsigned int flags,
 513                      long timeout,
 514                      struct intel_rps_client *rps)
 515 {
 516         might_sleep();
 517 #if IS_ENABLED(CONFIG_LOCKDEP)
 518         GEM_BUG_ON(debug_locks &&
 519                    !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
 520                    !!(flags & I915_WAIT_LOCKED));
 521 #endif
 522         GEM_BUG_ON(timeout < 0);
 523
 524         timeout = i915_gem_object_wait_reservation(obj->resv,
 525                                                    flags, timeout,
 526                                                    rps);
 527         return timeout < 0 ? timeout : 0;
 528 }
 529
 530 static struct intel_rps_client *to_rps_client(struct drm_file *file)
 531 {
 532         struct drm_i915_file_private *fpriv = file->driver_priv;
 533
 534         return &fpriv->rps;
 535 }
 536
 537 int
 538 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 539                             int align)
 540 {
 541         drm_dma_handle_t *phys;
 542         int ret;
 543
 544         if (obj->phys_handle) {
 545                 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
 546                         return -EBUSY;
 547
 548                 return 0;
 549         }
 550
 551         if (obj->mm.madv != I915_MADV_WILLNEED)
 552                 return -EFAULT;
 553
 554         if (obj->base.filp == NULL)
 555                 return -EINVAL;
 556
 557         ret = i915_gem_object_unbind(obj);
 558         if (ret)
 559                 return ret;
 560
 561         __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
 562         if (obj->mm.pages)
 563                 return -EBUSY;
 564
 565         /* create a new object */
 566         phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 567         if (!phys)
 568                 return -ENOMEM;
 569
 570         obj->phys_handle = phys;
 571         obj->ops = &i915_gem_phys_ops;
 572
 573         return i915_gem_object_pin_pages(obj);
 574 }
 575
 576 static int
 577 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 578                      struct drm_i915_gem_pwrite *args,
 579                      struct drm_file *file)
 580 {
 581         struct drm_device *dev = obj->base.dev;
 582         void *vaddr = obj->phys_handle->vaddr + args->offset;
 583         char __user *user_data = u64_to_user_ptr(args->data_ptr);
 584         int ret;
 585
 586         /* We manually control the domain here and pretend that it
 587          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 588          */
 589         lockdep_assert_held(&obj->base.dev->struct_mutex);
 590         ret = i915_gem_object_wait(obj,
 591                                    I915_WAIT_INTERRUPTIBLE |
 592                                    I915_WAIT_LOCKED |
 593                                    I915_WAIT_ALL,
 594                                    MAX_SCHEDULE_TIMEOUT,
 595                                    to_rps_client(file));
 596         if (ret)
 597                 return ret;
 598
 599         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 600         if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
 601                 unsigned long unwritten;
 602
 603                 /* The physical object once assigned is fixed for the lifetime
 604                  * of the obj, so we can safely drop the lock and continue
 605                  * to access vaddr.
 606                  */
 607                 mutex_unlock(&dev->struct_mutex);
 608                 unwritten = copy_from_user(vaddr, user_data, args->size);
 609                 mutex_lock(&dev->struct_mutex);
 610                 if (unwritten) {
 611                         ret = -EFAULT;
 612                         goto out;
 613                 }
 614         }
 615
 616         drm_clflush_virt_range(vaddr, args->size);
 617         i915_gem_chipset_flush(to_i915(dev));
 618
 619 out:
 620         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
 621         return ret;
 622 }
 623
 624 void *i915_gem_object_alloc(struct drm_device *dev)
 625 {
 626         struct drm_i915_private *dev_priv = to_i915(dev);
 627         return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
 628 }
 629
 630 void i915_gem_object_free(struct drm_i915_gem_object *obj)
 631 {
 632         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 633         kmem_cache_free(dev_priv->objects, obj);
 634 }
 635
 636 static int
 637 i915_gem_create(struct drm_file *file,
 638                 struct drm_device *dev,
 639                 uint64_t size,
 640                 uint32_t *handle_p)
 641 {
 642         struct drm_i915_gem_object *obj;
 643         int ret;
 644         u32 handle;
 645
 646         size = roundup(size, PAGE_SIZE);
 647         if (size == 0)
 648                 return -EINVAL;
 649
 650         /* Allocate the new object */
 651         obj = i915_gem_object_create(dev, size);
 652         if (IS_ERR(obj))
 653                 return PTR_ERR(obj);
 654
 655         ret = drm_gem_handle_create(file, &obj->base, &handle);
 656         /* drop reference from allocate - handle holds it now */
 657         i915_gem_object_put(obj);
 658         if (ret)
 659                 return ret;
 660
 661         *handle_p = handle;
 662         return 0;
 663 }
 664
 665 int
 666 i915_gem_dumb_create(struct drm_file *file,
 667                      struct drm_device *dev,
 668                      struct drm_mode_create_dumb *args)
 669 {
 670         /* have to work out size/pitch and return them */
 671         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 672         args->size = args->pitch * args->height;
 673         return i915_gem_create(file, dev,
 674                                args->size, &args->handle);
 675 }
 676
 677 /**
 678  * Creates a new mm object and returns a handle to it.
 679  * @dev: drm device pointer
 680  * @data: ioctl data blob
 681  * @file: drm file pointer
 682  */
 683 int
 684 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 685                       struct drm_file *file)
 686 {
 687         struct drm_i915_gem_create *args = data;
 688
 689         i915_gem_flush_free_objects(to_i915(dev));
 690
 691         return i915_gem_create(file, dev,
 692                                args->size, &args->handle);
 693 }
 694
 695 static inline int
 696 __copy_to_user_swizzled(char __user *cpu_vaddr,
 697                         const char *gpu_vaddr, int gpu_offset,
 698                         int length)
 699 {
 700         int ret, cpu_offset = 0;
 701
 702         while (length > 0) {
 703                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 704                 int this_length = min(cacheline_end - gpu_offset, length);
 705                 int swizzled_gpu_offset = gpu_offset ^ 64;
 706
 707                 ret = __copy_to_user(cpu_vaddr + cpu_offset,
 708                                      gpu_vaddr + swizzled_gpu_offset,
 709                                      this_length);
 710                 if (ret)
 711                         return ret + length;
 712
 713                 cpu_offset += this_length;
 714                 gpu_offset += this_length;
 715                 length -= this_length;
 716         }
 717
 718         return 0;
 719 }
 720
 721 static inline int
 722 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 723                           const char __user *cpu_vaddr,
 724                           int length)
 725 {
 726         int ret, cpu_offset = 0;
 727
 728         while (length > 0) {
 729                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
 730                 int this_length = min(cacheline_end - gpu_offset, length);
 731                 int swizzled_gpu_offset = gpu_offset ^ 64;
 732
 733                 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
 734                                        cpu_vaddr + cpu_offset,
 735                                        this_length);
 736                 if (ret)
 737                         return ret + length;
 738
 739                 cpu_offset += this_length;
 740                 gpu_offset += this_length;
 741                 length -= this_length;
 742         }
 743
 744         return 0;
 745 }
 746
 747 /*
 748  * Pins the specified object's pages and synchronizes the object with
 749  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 750  * flush the object from the CPU cache.
 751  */
 752 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 753                                     unsigned int *needs_clflush)
 754 {
 755         int ret;
 756
 757         lockdep_assert_held(&obj->base.dev->struct_mutex);
 758
 759         *needs_clflush = 0;
 760         if (!i915_gem_object_has_struct_page(obj))
 761                 return -ENODEV;
 762
 763         ret = i915_gem_object_wait(obj,
 764                                    I915_WAIT_INTERRUPTIBLE |
 765                                    I915_WAIT_LOCKED,
 766                                    MAX_SCHEDULE_TIMEOUT,
 767                                    NULL);
 768         if (ret)
 769                 return ret;
 770
 771         ret = i915_gem_object_pin_pages(obj);
 772         if (ret)
 773                 return ret;
 774
 775         i915_gem_object_flush_gtt_write_domain(obj);
 776
 777         /* If we're not in the cpu read domain, set ourself into the gtt
 778          * read domain and manually flush cachelines (if required). This
 779          * optimizes for the case when the gpu will dirty the data
 780          * anyway again before the next pread happens.
 781          */
 782         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 783                 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
 784                                                         obj->cache_level);
 785
 786         if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 787                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 788                 if (ret)
 789                         goto err_unpin;
 790
 791                 *needs_clflush = 0;
 792         }
 793
 794         /* return with the pages pinned */
 795         return 0;
 796
 797 err_unpin:
 798         i915_gem_object_unpin_pages(obj);
 799         return ret;
 800 }
 801
 802 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 803                                      unsigned int *needs_clflush)
 804 {
 805         int ret;
 806
 807         lockdep_assert_held(&obj->base.dev->struct_mutex);
 808
 809         *needs_clflush = 0;
 810         if (!i915_gem_object_has_struct_page(obj))
 811                 return -ENODEV;
 812
 813         ret = i915_gem_object_wait(obj,
 814                                    I915_WAIT_INTERRUPTIBLE |
 815                                    I915_WAIT_LOCKED |
 816                                    I915_WAIT_ALL,
 817                                    MAX_SCHEDULE_TIMEOUT,
 818                                    NULL);
 819         if (ret)
 820                 return ret;
 821
 822         ret = i915_gem_object_pin_pages(obj);
 823         if (ret)
 824                 return ret;
 825
 826         i915_gem_object_flush_gtt_write_domain(obj);
 827
 828         /* If we're not in the cpu write domain, set ourself into the
 829          * gtt write domain and manually flush cachelines (as required).
 830          * This optimizes for the case when the gpu will use the data
 831          * right away and we therefore have to clflush anyway.
 832          */
 833         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 834                 *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
 835
 836         /* Same trick applies to invalidate partially written cachelines read
 837          * before writing.
 838          */
 839         if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
 840                 *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
 841                                                          obj->cache_level);
 842
 843         if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 844                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 845                 if (ret)
 846                         goto err_unpin;
 847
 848                 *needs_clflush = 0;
 849         }
 850
 851         if ((*needs_clflush & CLFLUSH_AFTER) == 0)
 852                 obj->cache_dirty = true;
 853
 854         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 855         obj->mm.dirty = true;
 856         /* return with the pages pinned */
 857         return 0;
 858
 859 err_unpin:
 860         i915_gem_object_unpin_pages(obj);
 861         return ret;
 862 }
 863
 864 static void
 865 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 866                              bool swizzled)
 867 {
 868         if (unlikely(swizzled)) {
 869                 unsigned long start = (unsigned long) addr;
 870                 unsigned long end = (unsigned long) addr + length;
 871
 872                 /* For swizzling simply ensure that we always flush both
 873                  * channels. Lame, but simple and it works. Swizzled
 874                  * pwrite/pread is far from a hotpath - current userspace
 875                  * doesn't use it at all. */
 876                 start = round_down(start, 128);
 877                 end = round_up(end, 128);
 878
 879                 drm_clflush_virt_range((void *)start, end - start);
 880         } else {
 881                 drm_clflush_virt_range(addr, length);
 882         }
 883
 884 }
 885
 886 /* Only difference to the fast-path function is that this can handle bit17
 887  * and uses non-atomic copy and kmap functions. */
 888 static int
 889 shmem_pread_slow(struct page *page, int offset, int length,
 890                  char __user *user_data,
 891                  bool page_do_bit17_swizzling, bool needs_clflush)
 892 {
 893         char *vaddr;
 894         int ret;
 895
 896         vaddr = kmap(page);
 897         if (needs_clflush)
 898                 shmem_clflush_swizzled_range(vaddr + offset, length,
 899                                              page_do_bit17_swizzling);
 900
 901         if (page_do_bit17_swizzling)
 902                 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
 903         else
 904                 ret = __copy_to_user(user_data, vaddr + offset, length);
 905         kunmap(page);
 906
 907         return ret ? - EFAULT : 0;
 908 }
 909
 910 static int
 911 shmem_pread(struct page *page, int offset, int length, char __user *user_data,
 912             bool page_do_bit17_swizzling, bool needs_clflush)
 913 {
 914         int ret;
 915
 916         ret = -ENODEV;
 917         if (!page_do_bit17_swizzling) {
 918                 char *vaddr = kmap_atomic(page);
 919
 920                 if (needs_clflush)
 921                         drm_clflush_virt_range(vaddr + offset, length);
 922                 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
 923                 kunmap_atomic(vaddr);
 924         }
 925         if (ret == 0)
 926                 return 0;
 927
 928         return shmem_pread_slow(page, offset, length, user_data,
 929                                 page_do_bit17_swizzling, needs_clflush);
 930 }
 931
 932 static int
 933 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 934                      struct drm_i915_gem_pread *args)
 935 {
 936         char __user *user_data;
 937         u64 remain;
 938         unsigned int obj_do_bit17_swizzling;
 939         unsigned int needs_clflush;
 940         unsigned int idx, offset;
 941         int ret;
 942
 943         obj_do_bit17_swizzling = 0;
 944         if (i915_gem_object_needs_bit17_swizzle(obj))
 945                 obj_do_bit17_swizzling = BIT(17);
 946
 947         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 948         if (ret)
 949                 return ret;
 950
 951         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 952         mutex_unlock(&obj->base.dev->struct_mutex);
 953         if (ret)
 954                 return ret;
 955
 956         remain = args->size;
 957         user_data = u64_to_user_ptr(args->data_ptr);
 958         offset = offset_in_page(args->offset);
 959         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 960                 struct page *page = i915_gem_object_get_page(obj, idx);
 961                 int length;
 962
 963                 length = remain;
 964                 if (offset + length > PAGE_SIZE)
 965                         length = PAGE_SIZE - offset;
 966
 967                 ret = shmem_pread(page, offset, length, user_data,
 968                                   page_to_phys(page) & obj_do_bit17_swizzling,
 969                                   needs_clflush);
 970                 if (ret)
 971                         break;
 972
 973                 remain -= length;
 974                 user_data += length;
 975                 offset = 0;
 976         }
 977
 978         i915_gem_obj_finish_shmem_access(obj);
 979         return ret;
 980 }
 981
 982 static inline bool
 983 gtt_user_read(struct io_mapping *mapping,
 984               loff_t base, int offset,
 985               char __user *user_data, int length)
 986 {
 987         void *vaddr;
 988         unsigned long unwritten;
 989
 990         /* We can use the cpu mem copy function because this is X86. */
 991         vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
 992         unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length);
 993         io_mapping_unmap_atomic(vaddr);
 994         if (unwritten) {
 995                 vaddr = (void __force *)
 996                         io_mapping_map_wc(mapping, base, PAGE_SIZE);
 997                 unwritten = copy_to_user(user_data, vaddr + offset, length);
 998                 io_mapping_unmap(vaddr);
 999         }
1000         return unwritten;
1001 }
1002
1003 static int
1004 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
1005                    const struct drm_i915_gem_pread *args)
1006 {
1007         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1008         struct i915_ggtt *ggtt = &i915->ggtt;
1009         struct drm_mm_node node;
1010         struct i915_vma *vma;
1011         void __user *user_data;
1012         u64 remain, offset;
1013         int ret;
1014
1015         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1016         if (ret)
1017                 return ret;
1018
1019         intel_runtime_pm_get(i915);
1020         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1021                                        PIN_MAPPABLE | PIN_NONBLOCK);
1022         if (!IS_ERR(vma)) {
1023                 node.start = i915_ggtt_offset(vma);
1024                 node.allocated = false;
1025                 ret = i915_vma_put_fence(vma);
1026                 if (ret) {
1027                         i915_vma_unpin(vma);
1028                         vma = ERR_PTR(ret);
1029                 }
1030         }
1031         if (IS_ERR(vma)) {
1032                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1033                 if (ret)
1034                         goto out_unlock;
1035                 GEM_BUG_ON(!node.allocated);
1036         }
1037
1038         ret = i915_gem_object_set_to_gtt_domain(obj, false);
1039         if (ret)
1040                 goto out_unpin;
1041
1042         mutex_unlock(&i915->drm.struct_mutex);
1043
1044         user_data = u64_to_user_ptr(args->data_ptr);
1045         remain = args->size;
1046         offset = args->offset;
1047
1048         while (remain > 0) {
1049                 /* Operation in this page
1050                  *
1051                  * page_base = page offset within aperture
1052                  * page_offset = offset within page
1053                  * page_length = bytes to copy for this page
1054                  */
1055                 u32 page_base = node.start;
1056                 unsigned page_offset = offset_in_page(offset);
1057                 unsigned page_length = PAGE_SIZE - page_offset;
1058                 page_length = remain < page_length ? remain : page_length;
1059                 if (node.allocated) {
1060                         wmb();
1061                         ggtt->base.insert_page(&ggtt->base,
1062                                                i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1063                                                node.start, I915_CACHE_NONE, 0);
1064                         wmb();
1065                 } else {
1066                         page_base += offset & PAGE_MASK;
1067                 }
1068
1069                 if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
1070                                   user_data, page_length)) {
1071                         ret = -EFAULT;
1072                         break;
1073                 }
1074
1075                 remain -= page_length;
1076                 user_data += page_length;
1077                 offset += page_length;
1078         }
1079
1080         mutex_lock(&i915->drm.struct_mutex);
1081 out_unpin:
1082         if (node.allocated) {
1083                 wmb();
1084                 ggtt->base.clear_range(&ggtt->base,
1085                                        node.start, node.size);
1086                 remove_mappable_node(&node);
1087         } else {
1088                 i915_vma_unpin(vma);
1089         }
1090 out_unlock:
1091         intel_runtime_pm_put(i915);
1092         mutex_unlock(&i915->drm.struct_mutex);
1093
1094         return ret;
1095 }
1096
1097 /**
1098  * Reads data from the object referenced by handle.
1099  * @dev: drm device pointer
1100  * @data: ioctl data blob
1101  * @file: drm file pointer
1102  *
1103  * On error, the contents of *data are undefined.
1104  */
1105 int
1106 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1107                      struct drm_file *file)
1108 {
1109         struct drm_i915_gem_pread *args = data;
1110         struct drm_i915_gem_object *obj;
1111         int ret;
1112
1113         if (args->size == 0)
1114                 return 0;
1115
1116         if (!access_ok(VERIFY_WRITE,
1117                        u64_to_user_ptr(args->data_ptr),
1118                        args->size))
1119                 return -EFAULT;
1120
1121         obj = i915_gem_object_lookup(file, args->handle);
1122         if (!obj)
1123                 return -ENOENT;
1124
1125         /* Bounds check source.  */
1126         if (args->offset > obj->base.size ||
1127             args->size > obj->base.size - args->offset) {
1128                 ret = -EINVAL;
1129                 goto out;
1130         }
1131
1132         trace_i915_gem_object_pread(obj, args->offset, args->size);
1133
1134         ret = i915_gem_object_wait(obj,
1135                                    I915_WAIT_INTERRUPTIBLE,
1136                                    MAX_SCHEDULE_TIMEOUT,
1137                                    to_rps_client(file));
1138         if (ret)
1139                 goto out;
1140
1141         ret = i915_gem_object_pin_pages(obj);
1142         if (ret)
1143                 goto out;
1144
1145         ret = i915_gem_shmem_pread(obj, args);
1146         if (ret == -EFAULT || ret == -ENODEV)
1147                 ret = i915_gem_gtt_pread(obj, args);
1148
1149         i915_gem_object_unpin_pages(obj);
1150 out:
1151         i915_gem_object_put(obj);
1152         return ret;
1153 }
1154
1155 /* This is the fast write path which cannot handle
1156  * page faults in the source data
1157  */
1158
1159 static inline bool
1160 ggtt_write(struct io_mapping *mapping,
1161            loff_t base, int offset,
1162            char __user *user_data, int length)
1163 {
1164         void *vaddr;
1165         unsigned long unwritten;
1166
1167         /* We can use the cpu mem copy function because this is X86. */
1168         vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base);
1169         unwritten = __copy_from_user_inatomic_nocache(vaddr + offset,
1170                                                       user_data, length);
1171         io_mapping_unmap_atomic(vaddr);
1172         if (unwritten) {
1173                 vaddr = (void __force *)
1174                         io_mapping_map_wc(mapping, base, PAGE_SIZE);
1175                 unwritten = copy_from_user(vaddr + offset, user_data, length);
1176                 io_mapping_unmap(vaddr);
1177         }
1178
1179         return unwritten;
1180 }
1181
1182 /**
1183  * This is the fast pwrite path, where we copy the data directly from the
1184  * user into the GTT, uncached.
1185  * @obj: i915 GEM object
1186  * @args: pwrite arguments structure
1187  */
1188 static int
1189 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1190                          const struct drm_i915_gem_pwrite *args)
1191 {
1192         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1193         struct i915_ggtt *ggtt = &i915->ggtt;
1194         struct drm_mm_node node;
1195         struct i915_vma *vma;
1196         u64 remain, offset;
1197         void __user *user_data;
1198         int ret;
1199
1200         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1201         if (ret)
1202                 return ret;
1203
1204         intel_runtime_pm_get(i915);
1205         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1206                                        PIN_MAPPABLE | PIN_NONBLOCK);
1207         if (!IS_ERR(vma)) {
1208                 node.start = i915_ggtt_offset(vma);
1209                 node.allocated = false;
1210                 ret = i915_vma_put_fence(vma);
1211                 if (ret) {
1212                         i915_vma_unpin(vma);
1213                         vma = ERR_PTR(ret);
1214                 }
1215         }
1216         if (IS_ERR(vma)) {
1217                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1218                 if (ret)
1219                         goto out_unlock;
1220                 GEM_BUG_ON(!node.allocated);
1221         }
1222
1223         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1224         if (ret)
1225                 goto out_unpin;
1226
1227         mutex_unlock(&i915->drm.struct_mutex);
1228
1229         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1230
1231         user_data = u64_to_user_ptr(args->data_ptr);
1232         offset = args->offset;
1233         remain = args->size;
1234         while (remain) {
1235                 /* Operation in this page
1236                  *
1237                  * page_base = page offset within aperture
1238                  * page_offset = offset within page
1239                  * page_length = bytes to copy for this page
1240                  */
1241                 u32 page_base = node.start;
1242                 unsigned int page_offset = offset_in_page(offset);
1243                 unsigned int page_length = PAGE_SIZE - page_offset;
1244                 page_length = remain < page_length ? remain : page_length;
1245                 if (node.allocated) {
1246                         wmb(); /* flush the write before we modify the GGTT */
1247                         ggtt->base.insert_page(&ggtt->base,
1248                                                i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1249                                                node.start, I915_CACHE_NONE, 0);
1250                         wmb(); /* flush modifications to the GGTT (insert_page) */
1251                 } else {
1252                         page_base += offset & PAGE_MASK;
1253                 }
1254                 /* If we get a fault while copying data, then (presumably) our
1255                  * source page isn't available.  Return the error and we'll
1256                  * retry in the slow path.
1257                  * If the object is non-shmem backed, we retry again with the
1258                  * path that handles page fault.
1259                  */
1260                 if (ggtt_write(&ggtt->mappable, page_base, page_offset,
1261                                user_data, page_length)) {
1262                         ret = -EFAULT;
1263                         break;
1264                 }
1265
1266                 remain -= page_length;
1267                 user_data += page_length;
1268                 offset += page_length;
1269         }
1270         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1271
1272         mutex_lock(&i915->drm.struct_mutex);
1273 out_unpin:
1274         if (node.allocated) {
1275                 wmb();
1276                 ggtt->base.clear_range(&ggtt->base,
1277                                        node.start, node.size);
1278                 remove_mappable_node(&node);
1279         } else {
1280                 i915_vma_unpin(vma);
1281         }
1282 out_unlock:
1283         intel_runtime_pm_put(i915);
1284         mutex_unlock(&i915->drm.struct_mutex);
1285         return ret;
1286 }
1287
1288 static int
1289 shmem_pwrite_slow(struct page *page, int offset, int length,
1290                   char __user *user_data,
1291                   bool page_do_bit17_swizzling,
1292                   bool needs_clflush_before,
1293                   bool needs_clflush_after)
1294 {
1295         char *vaddr;
1296         int ret;
1297
1298         vaddr = kmap(page);
1299         if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
1300                 shmem_clflush_swizzled_range(vaddr + offset, length,
1301                                              page_do_bit17_swizzling);
1302         if (page_do_bit17_swizzling)
1303                 ret = __copy_from_user_swizzled(vaddr, offset, user_data,
1304                                                 length);
1305         else
1306                 ret = __copy_from_user(vaddr + offset, user_data, length);
1307         if (needs_clflush_after)
1308                 shmem_clflush_swizzled_range(vaddr + offset, length,
1309                                              page_do_bit17_swizzling);
1310         kunmap(page);
1311
1312         return ret ? -EFAULT : 0;
1313 }
1314
1315 /* Per-page copy function for the shmem pwrite fastpath.
1316  * Flushes invalid cachelines before writing to the target if
1317  * needs_clflush_before is set and flushes out any written cachelines after
1318  * writing if needs_clflush is set.
1319  */
1320 static int
1321 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1322              bool page_do_bit17_swizzling,
1323              bool needs_clflush_before,
1324              bool needs_clflush_after)
1325 {
1326         int ret;
1327
1328         ret = -ENODEV;
1329         if (!page_do_bit17_swizzling) {
1330                 char *vaddr = kmap_atomic(page);
1331
1332                 if (needs_clflush_before)
1333                         drm_clflush_virt_range(vaddr + offset, len);
1334                 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
1335                 if (needs_clflush_after)
1336                         drm_clflush_virt_range(vaddr + offset, len);
1337
1338                 kunmap_atomic(vaddr);
1339         }
1340         if (ret == 0)
1341                 return ret;
1342
1343         return shmem_pwrite_slow(page, offset, len, user_data,
1344                                  page_do_bit17_swizzling,
1345                                  needs_clflush_before,
1346                                  needs_clflush_after);
1347 }
1348
1349 static int
1350 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1351                       const struct drm_i915_gem_pwrite *args)
1352 {
1353         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1354         void __user *user_data;
1355         u64 remain;
1356         unsigned int obj_do_bit17_swizzling;
1357         unsigned int partial_cacheline_write;
1358         unsigned int needs_clflush;
1359         unsigned int offset, idx;
1360         int ret;
1361
1362         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1363         if (ret)
1364                 return ret;
1365
1366         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1367         mutex_unlock(&i915->drm.struct_mutex);
1368         if (ret)
1369                 return ret;
1370
1371         obj_do_bit17_swizzling = 0;
1372         if (i915_gem_object_needs_bit17_swizzle(obj))
1373                 obj_do_bit17_swizzling = BIT(17);
1374
1375         /* If we don't overwrite a cacheline completely we need to be
1376          * careful to have up-to-date data by first clflushing. Don't
1377          * overcomplicate things and flush the entire patch.
1378          */
1379         partial_cacheline_write = 0;
1380         if (needs_clflush & CLFLUSH_BEFORE)
1381                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1382
1383         user_data = u64_to_user_ptr(args->data_ptr);
1384         remain = args->size;
1385         offset = offset_in_page(args->offset);
1386         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1387                 struct page *page = i915_gem_object_get_page(obj, idx);
1388                 int length;
1389
1390                 length = remain;
1391                 if (offset + length > PAGE_SIZE)
1392                         length = PAGE_SIZE - offset;
1393
1394                 ret = shmem_pwrite(page, offset, length, user_data,
1395                                    page_to_phys(page) & obj_do_bit17_swizzling,
1396                                    (offset | length) & partial_cacheline_write,
1397                                    needs_clflush & CLFLUSH_AFTER);
1398                 if (ret)
1399                         break;
1400
1401                 remain -= length;
1402                 user_data += length;
1403                 offset = 0;
1404         }
1405
1406         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
1407         i915_gem_obj_finish_shmem_access(obj);
1408         return ret;
1409 }
1410
1411 /**
1412  * Writes data to the object referenced by handle.
1413  * @dev: drm device
1414  * @data: ioctl data blob
1415  * @file: drm file
1416  *
1417  * On error, the contents of the buffer that were to be modified are undefined.
1418  */
1419 int
1420 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1421                       struct drm_file *file)
1422 {
1423         struct drm_i915_gem_pwrite *args = data;
1424         struct drm_i915_gem_object *obj;
1425         int ret;
1426
1427         if (args->size == 0)
1428                 return 0;
1429
1430         if (!access_ok(VERIFY_READ,
1431                        u64_to_user_ptr(args->data_ptr),
1432                        args->size))
1433                 return -EFAULT;
1434
1435         obj = i915_gem_object_lookup(file, args->handle);
1436         if (!obj)
1437                 return -ENOENT;
1438
1439         /* Bounds check destination. */
1440         if (args->offset > obj->base.size ||
1441             args->size > obj->base.size - args->offset) {
1442                 ret = -EINVAL;
1443                 goto err;
1444         }
1445
1446         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1447
1448         ret = i915_gem_object_wait(obj,
1449                                    I915_WAIT_INTERRUPTIBLE |
1450                                    I915_WAIT_ALL,
1451                                    MAX_SCHEDULE_TIMEOUT,
1452                                    to_rps_client(file));
1453         if (ret)
1454                 goto err;
1455
1456         ret = i915_gem_object_pin_pages(obj);
1457         if (ret)
1458                 goto err;
1459
1460         ret = -EFAULT;
1461         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1462          * it would end up going through the fenced access, and we'll get
1463          * different detiling behavior between reading and writing.
1464          * pread/pwrite currently are reading and writing from the CPU
1465          * perspective, requiring manual detiling by the client.
1466          */
1467         if (!i915_gem_object_has_struct_page(obj) ||
1468             cpu_write_needs_clflush(obj))
1469                 /* Note that the gtt paths might fail with non-page-backed user
1470                  * pointers (e.g. gtt mappings when moving data between
1471                  * textures). Fallback to the shmem path in that case.
1472                  */
1473                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1474
1475         if (ret == -EFAULT || ret == -ENOSPC) {
1476                 if (obj->phys_handle)
1477                         ret = i915_gem_phys_pwrite(obj, args, file);
1478                 else
1479                         ret = i915_gem_shmem_pwrite(obj, args);
1480         }
1481
1482         i915_gem_object_unpin_pages(obj);
1483 err:
1484         i915_gem_object_put(obj);
1485         return ret;
1486 }
1487
1488 static inline enum fb_op_origin
1489 write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1490 {
1491         return (domain == I915_GEM_DOMAIN_GTT ?
1492                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
1493 }
1494
1495 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1496 {
1497         struct drm_i915_private *i915;
1498         struct list_head *list;
1499         struct i915_vma *vma;
1500
1501         list_for_each_entry(vma, &obj->vma_list, obj_link) {
1502                 if (!i915_vma_is_ggtt(vma))
1503                         continue;
1504
1505                 if (i915_vma_is_active(vma))
1506                         continue;
1507
1508                 if (!drm_mm_node_allocated(&vma->node))
1509                         continue;
1510
1511                 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1512         }
1513
1514         i915 = to_i915(obj->base.dev);
1515         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1516         list_move_tail(&obj->global_link, list);
1517 }
1518
1519 /**
1520  * Called when user space prepares to use an object with the CPU, either
1521  * through the mmap ioctl's mapping or a GTT mapping.
1522  * @dev: drm device
1523  * @data: ioctl data blob
1524  * @file: drm file
1525  */
1526 int
1527 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1528                           struct drm_file *file)
1529 {
1530         struct drm_i915_gem_set_domain *args = data;
1531         struct drm_i915_gem_object *obj;
1532         uint32_t read_domains = args->read_domains;
1533         uint32_t write_domain = args->write_domain;
1534         int err;
1535
1536         /* Only handle setting domains to types used by the CPU. */
1537         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1538                 return -EINVAL;
1539
1540         /* Having something in the write domain implies it's in the read
1541          * domain, and only that read domain.  Enforce that in the request.
1542          */
1543         if (write_domain != 0 && read_domains != write_domain)
1544                 return -EINVAL;
1545
1546         obj = i915_gem_object_lookup(file, args->handle);
1547         if (!obj)
1548                 return -ENOENT;
1549
1550         /* Try to flush the object off the GPU without holding the lock.
1551          * We will repeat the flush holding the lock in the normal manner
1552          * to catch cases where we are gazumped.
1553          */
1554         err = i915_gem_object_wait(obj,
1555                                    I915_WAIT_INTERRUPTIBLE |
1556                                    (write_domain ? I915_WAIT_ALL : 0),
1557                                    MAX_SCHEDULE_TIMEOUT,
1558                                    to_rps_client(file));
1559         if (err)
1560                 goto out;
1561
1562         /* Flush and acquire obj->pages so that we are coherent through
1563          * direct access in memory with previous cached writes through
1564          * shmemfs and that our cache domain tracking remains valid.
1565          * For example, if the obj->filp was moved to swap without us
1566          * being notified and releasing the pages, we would mistakenly
1567          * continue to assume that the obj remained out of the CPU cached
1568          * domain.
1569          */
1570         err = i915_gem_object_pin_pages(obj);
1571         if (err)
1572                 goto out;
1573
1574         err = i915_mutex_lock_interruptible(dev);
1575         if (err)
1576                 goto out_unpin;
1577
1578         if (read_domains & I915_GEM_DOMAIN_GTT)
1579                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1580         else
1581                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1582
1583         /* And bump the LRU for this access */
1584         i915_gem_object_bump_inactive_ggtt(obj);
1585
1586         mutex_unlock(&dev->struct_mutex);
1587
1588         if (write_domain != 0)
1589                 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
1590
1591 out_unpin:
1592         i915_gem_object_unpin_pages(obj);
1593 out:
1594         i915_gem_object_put(obj);
1595         return err;
1596 }
1597
1598 /**
1599  * Called when user space has done writes to this buffer
1600  * @dev: drm device
1601  * @data: ioctl data blob
1602  * @file: drm file
1603  */
1604 int
1605 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1606                          struct drm_file *file)
1607 {
1608         struct drm_i915_gem_sw_finish *args = data;
1609         struct drm_i915_gem_object *obj;
1610         int err = 0;
1611
1612         obj = i915_gem_object_lookup(file, args->handle);
1613         if (!obj)
1614                 return -ENOENT;
1615
1616         /* Pinned buffers may be scanout, so flush the cache */
1617         if (READ_ONCE(obj->pin_display)) {
1618                 err = i915_mutex_lock_interruptible(dev);
1619                 if (!err) {
1620                         i915_gem_object_flush_cpu_write_domain(obj);
1621                         mutex_unlock(&dev->struct_mutex);
1622                 }
1623         }
1624
1625         i915_gem_object_put(obj);
1626         return err;
1627 }
1628
1629 /**
1630  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1631  *                       it is mapped to.
1632  * @dev: drm device
1633  * @data: ioctl data blob
1634  * @file: drm file
1635  *
1636  * While the mapping holds a reference on the contents of the object, it doesn't
1637  * imply a ref on the object itself.
1638  *
1639  * IMPORTANT:
1640  *
1641  * DRM driver writers who look a this function as an example for how to do GEM
1642  * mmap support, please don't implement mmap support like here. The modern way
1643  * to implement DRM mmap support is with an mmap offset ioctl (like
1644  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1645  * That way debug tooling like valgrind will understand what's going on, hiding
1646  * the mmap call in a driver private ioctl will break that. The i915 driver only
1647  * does cpu mmaps this way because we didn't know better.
1648  */
1649 int
1650 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1651                     struct drm_file *file)
1652 {
1653         struct drm_i915_gem_mmap *args = data;
1654         struct drm_i915_gem_object *obj;
1655         unsigned long addr;
1656
1657         if (args->flags & ~(I915_MMAP_WC))
1658                 return -EINVAL;
1659
1660         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1661                 return -ENODEV;
1662
1663         obj = i915_gem_object_lookup(file, args->handle);
1664         if (!obj)
1665                 return -ENOENT;
1666
1667         /* prime objects have no backing filp to GEM mmap
1668          * pages from.
1669          */
1670         if (!obj->base.filp) {
1671                 i915_gem_object_put(obj);
1672                 return -EINVAL;
1673         }
1674
1675         addr = vm_mmap(obj->base.filp, 0, args->size,
1676                        PROT_READ | PROT_WRITE, MAP_SHARED,
1677                        args->offset);
1678         if (args->flags & I915_MMAP_WC) {
1679                 struct mm_struct *mm = current->mm;
1680                 struct vm_area_struct *vma;
1681
1682                 if (down_write_killable(&mm->mmap_sem)) {
1683                         i915_gem_object_put(obj);
1684                         return -EINTR;
1685                 }
1686                 vma = find_vma(mm, addr);
1687                 if (vma)
1688                         vma->vm_page_prot =
1689                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1690                 else
1691                         addr = -ENOMEM;
1692                 up_write(&mm->mmap_sem);
1693
1694                 /* This may race, but that's ok, it only gets set */
1695                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1696         }
1697         i915_gem_object_put(obj);
1698         if (IS_ERR((void *)addr))
1699                 return addr;
1700
1701         args->addr_ptr = (uint64_t) addr;
1702
1703         return 0;
1704 }
1705
1706 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
1707 {
1708         u64 size;
1709
1710         size = i915_gem_object_get_stride(obj);
1711         size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8;
1712
1713         return size >> PAGE_SHIFT;
1714 }
1715
1716 /**
1717  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1718  *
1719  * A history of the GTT mmap interface:
1720  *
1721  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1722  *     aligned and suitable for fencing, and still fit into the available
1723  *     mappable space left by the pinned display objects. A classic problem
1724  *     we called the page-fault-of-doom where we would ping-pong between
1725  *     two objects that could not fit inside the GTT and so the memcpy
1726  *     would page one object in at the expense of the other between every
1727  *     single byte.
1728  *
1729  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1730  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1731  *     object is too large for the available space (or simply too large
1732  *     for the mappable aperture!), a view is created instead and faulted
1733  *     into userspace. (This view is aligned and sized appropriately for
1734  *     fenced access.)
1735  *
1736  * Restrictions:
1737  *
1738  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1739  *    hangs on some architectures, corruption on others. An attempt to service
1740  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1741  *
1742  *  * the object must be able to fit into RAM (physical memory, though no
1743  *    limited to the mappable aperture).
1744  *
1745  *
1746  * Caveats:
1747  *
1748  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1749  *    all data to system memory. Subsequent access will not be synchronized.
1750  *
1751  *  * all mappings are revoked on runtime device suspend.
1752  *
1753  *  * there are only 8, 16 or 32 fence registers to share between all users
1754  *    (older machines require fence register for display and blitter access
1755  *    as well). Contention of the fence registers will cause the previous users
1756  *    to be unmapped and any new access will generate new page faults.
1757  *
1758  *  * running out of memory while servicing a fault may generate a SIGBUS,
1759  *    rather than the expected SIGSEGV.
1760  */
1761 int i915_gem_mmap_gtt_version(void)
1762 {
1763         return 1;
1764 }
1765
1766 /**
1767  * i915_gem_fault - fault a page into the GTT
1768  * @area: CPU VMA in question
1769  * @vmf: fault info
1770  *
1771  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1772  * from userspace.  The fault handler takes care of binding the object to
1773  * the GTT (if needed), allocating and programming a fence register (again,
1774  * only if needed based on whether the old reg is still valid or the object
1775  * is tiled) and inserting a new PTE into the faulting process.
1776  *
1777  * Note that the faulting process may involve evicting existing objects
1778  * from the GTT and/or fence registers to make room.  So performance may
1779  * suffer if the GTT working set is large or there are few fence registers
1780  * left.
1781  *
1782  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1783  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1784  */
1785 int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1786 {
1787 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
1788         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1789         struct drm_device *dev = obj->base.dev;
1790         struct drm_i915_private *dev_priv = to_i915(dev);
1791         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1792         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1793         struct i915_vma *vma;
1794         pgoff_t page_offset;
1795         unsigned int flags;
1796         int ret;
1797
1798         /* We don't use vmf->pgoff since that has the fake offset */
1799         page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >>
1800                 PAGE_SHIFT;
1801
1802         trace_i915_gem_object_fault(obj, page_offset, true, write);
1803
1804         /* Try to flush the object off the GPU first without holding the lock.
1805          * Upon acquiring the lock, we will perform our sanity checks and then
1806          * repeat the flush holding the lock in the normal manner to catch cases
1807          * where we are gazumped.
1808          */
1809         ret = i915_gem_object_wait(obj,
1810                                    I915_WAIT_INTERRUPTIBLE,
1811                                    MAX_SCHEDULE_TIMEOUT,
1812                                    NULL);
1813         if (ret)
1814                 goto err;
1815
1816         ret = i915_gem_object_pin_pages(obj);
1817         if (ret)
1818                 goto err;
1819
1820         intel_runtime_pm_get(dev_priv);
1821
1822         ret = i915_mutex_lock_interruptible(dev);
1823         if (ret)
1824                 goto err_rpm;
1825
1826         /* Access to snoopable pages through the GTT is incoherent. */
1827         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1828                 ret = -EFAULT;
1829                 goto err_unlock;
1830         }
1831
1832         /* If the object is smaller than a couple of partial vma, it is
1833          * not worth only creating a single partial vma - we may as well
1834          * clear enough space for the full object.
1835          */
1836         flags = PIN_MAPPABLE;
1837         if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
1838                 flags |= PIN_NONBLOCK | PIN_NONFAULT;
1839
1840         /* Now pin it into the GTT as needed */
1841         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
1842         if (IS_ERR(vma)) {
1843                 struct i915_ggtt_view view;
1844                 unsigned int chunk_size;
1845
1846                 /* Use a partial view if it is bigger than available space */
1847                 chunk_size = MIN_CHUNK_PAGES;
1848                 if (i915_gem_object_is_tiled(obj))
1849                         chunk_size = roundup(chunk_size, tile_row_pages(obj));
1850
1851                 memset(&view, 0, sizeof(view));
1852                 view.type = I915_GGTT_VIEW_PARTIAL;
1853                 view.params.partial.offset = rounddown(page_offset, chunk_size);
1854                 view.params.partial.size =
1855                         min_t(unsigned int, chunk_size,
1856                               vma_pages(area) - view.params.partial.offset);
1857
1858                 /* If the partial covers the entire object, just create a
1859                  * normal VMA.
1860                  */
1861                 if (chunk_size >= obj->base.size >> PAGE_SHIFT)
1862                         view.type = I915_GGTT_VIEW_NORMAL;
1863
1864                 /* Userspace is now writing through an untracked VMA, abandon
1865                  * all hope that the hardware is able to track future writes.
1866                  */
1867                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1868
1869                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
1870         }
1871         if (IS_ERR(vma)) {
1872                 ret = PTR_ERR(vma);
1873                 goto err_unlock;
1874         }
1875
1876         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1877         if (ret)
1878                 goto err_unpin;
1879
1880         ret = i915_vma_get_fence(vma);
1881         if (ret)
1882                 goto err_unpin;
1883
1884         /* Mark as being mmapped into userspace for later revocation */
1885         assert_rpm_wakelock_held(dev_priv);
1886         if (list_empty(&obj->userfault_link))
1887                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1888
1889         /* Finally, remap it using the new GTT offset */
1890         ret = remap_io_mapping(area,
1891                                area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1892                                (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1893                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1894                                &ggtt->mappable);
1895
1896 err_unpin:
1897         __i915_vma_unpin(vma);
1898 err_unlock:
1899         mutex_unlock(&dev->struct_mutex);
1900 err_rpm:
1901         intel_runtime_pm_put(dev_priv);
1902         i915_gem_object_unpin_pages(obj);
1903 err:
1904         switch (ret) {
1905         case -EIO:
1906                 /*
1907                  * We eat errors when the gpu is terminally wedged to avoid
1908                  * userspace unduly crashing (gl has no provisions for mmaps to
1909                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1910                  * and so needs to be reported.
1911                  */
1912                 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
1913                         ret = VM_FAULT_SIGBUS;
1914                         break;
1915                 }
1916         case -EAGAIN:
1917                 /*
1918                  * EAGAIN means the gpu is hung and we'll wait for the error
1919                  * handler to reset everything when re-faulting in
1920                  * i915_mutex_lock_interruptible.
1921                  */
1922         case 0:
1923         case -ERESTARTSYS:
1924         case -EINTR:
1925         case -EBUSY:
1926                 /*
1927                  * EBUSY is ok: this just means that another thread
1928                  * already did the job.
1929                  */
1930                 ret = VM_FAULT_NOPAGE;
1931                 break;
1932         case -ENOMEM:
1933                 ret = VM_FAULT_OOM;
1934                 break;
1935         case -ENOSPC:
1936         case -EFAULT:
1937                 ret = VM_FAULT_SIGBUS;
1938                 break;
1939         default:
1940                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1941                 ret = VM_FAULT_SIGBUS;
1942                 break;
1943         }
1944         return ret;
1945 }
1946
1947 /**
1948  * i915_gem_release_mmap - remove physical page mappings
1949  * @obj: obj in question
1950  *
1951  * Preserve the reservation of the mmapping with the DRM core code, but
1952  * relinquish ownership of the pages back to the system.
1953  *
1954  * It is vital that we remove the page mapping if we have mapped a tiled
1955  * object through the GTT and then lose the fence register due to
1956  * resource pressure. Similarly if the object has been moved out of the
1957  * aperture, than pages mapped into userspace must be revoked. Removing the
1958  * mapping will then trigger a page fault on the next user access, allowing
1959  * fixup by i915_gem_fault().
1960  */
1961 void
1962 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1963 {
1964         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1965
1966         /* Serialisation between user GTT access and our code depends upon
1967          * revoking the CPU's PTE whilst the mutex is held. The next user
1968          * pagefault then has to wait until we release the mutex.
1969          *
1970          * Note that RPM complicates somewhat by adding an additional
1971          * requirement that operations to the GGTT be made holding the RPM
1972          * wakeref.
1973          */
1974         lockdep_assert_held(&i915->drm.struct_mutex);
1975         intel_runtime_pm_get(i915);
1976
1977         if (list_empty(&obj->userfault_link))
1978                 goto out;
1979
1980         list_del_init(&obj->userfault_link);
1981         drm_vma_node_unmap(&obj->base.vma_node,
1982                            obj->base.dev->anon_inode->i_mapping);
1983
1984         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1985          * memory transactions from userspace before we return. The TLB
1986          * flushing implied above by changing the PTE above *should* be
1987          * sufficient, an extra barrier here just provides us with a bit
1988          * of paranoid documentation about our requirement to serialise
1989          * memory writes before touching registers / GSM.
1990          */
1991         wmb();
1992
1993 out:
1994         intel_runtime_pm_put(i915);
1995 }
1996
1997 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1998 {
1999         struct drm_i915_gem_object *obj, *on;
2000         int i;
2001
2002         /*
2003          * Only called during RPM suspend. All users of the userfault_list
2004          * must be holding an RPM wakeref to ensure that this can not
2005          * run concurrently with themselves (and use the struct_mutex for
2006          * protection between themselves).
2007          */
2008
2009         list_for_each_entry_safe(obj, on,
2010                                  &dev_priv->mm.userfault_list, userfault_link) {
2011                 list_del_init(&obj->userfault_link);
2012                 drm_vma_node_unmap(&obj->base.vma_node,
2013                                    obj->base.dev->anon_inode->i_mapping);
2014         }
2015
2016         /* The fence will be lost when the device powers down. If any were
2017          * in use by hardware (i.e. they are pinned), we should not be powering
2018          * down! All other fences will be reacquired by the user upon waking.
2019          */
2020         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2021                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2022
2023                 if (WARN_ON(reg->pin_count))
2024                         continue;
2025
2026                 if (!reg->vma)
2027                         continue;
2028
2029                 GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
2030                 reg->dirty = true;
2031         }
2032 }
2033
2034 /**
2035  * i915_gem_get_ggtt_size - return required global GTT size for an object
2036  * @dev_priv: i915 device
2037  * @size: object size
2038  * @tiling_mode: tiling mode
2039  *
2040  * Return the required global GTT size for an object, taking into account
2041  * potential fence register mapping.
2042  */
2043 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv,
2044                            u64 size, int tiling_mode)
2045 {
2046         u64 ggtt_size;
2047
2048         GEM_BUG_ON(size == 0);
2049
2050         if (INTEL_GEN(dev_priv) >= 4 ||
2051             tiling_mode == I915_TILING_NONE)
2052                 return size;
2053
2054         /* Previous chips need a power-of-two fence region when tiling */
2055         if (IS_GEN3(dev_priv))
2056                 ggtt_size = 1024*1024;
2057         else
2058                 ggtt_size = 512*1024;
2059
2060         while (ggtt_size < size)
2061                 ggtt_size <<= 1;
2062
2063         return ggtt_size;
2064 }
2065
2066 /**
2067  * i915_gem_get_ggtt_alignment - return required global GTT alignment
2068  * @dev_priv: i915 device
2069  * @size: object size
2070  * @tiling_mode: tiling mode
2071  * @fenced: is fenced alignment required or not
2072  *
2073  * Return the required global GTT alignment for an object, taking into account
2074  * potential fence register mapping.
2075  */
2076 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size,
2077                                 int tiling_mode, bool fenced)
2078 {
2079         GEM_BUG_ON(size == 0);
2080
2081         /*
2082          * Minimum alignment is 4k (GTT page size), but might be greater
2083          * if a fence register is needed for the object.
2084          */
2085         if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) ||
2086             tiling_mode == I915_TILING_NONE)
2087                 return 4096;
2088
2089         /*
2090          * Previous chips need to be aligned to the size of the smallest
2091          * fence register that can contain the object.
2092          */
2093         return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode);
2094 }
2095
2096 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2097 {
2098         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2099         int err;
2100
2101         err = drm_gem_create_mmap_offset(&obj->base);
2102         if (!err)
2103                 return 0;
2104
2105         /* We can idle the GPU locklessly to flush stale objects, but in order
2106          * to claim that space for ourselves, we need to take the big
2107          * struct_mutex to free the requests+objects and allocate our slot.
2108          */
2109         err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
2110         if (err)
2111                 return err;
2112
2113         err = i915_mutex_lock_interruptible(&dev_priv->drm);
2114         if (!err) {
2115                 i915_gem_retire_requests(dev_priv);
2116                 err = drm_gem_create_mmap_offset(&obj->base);
2117                 mutex_unlock(&dev_priv->drm.struct_mutex);
2118         }
2119
2120         return err;
2121 }
2122
2123 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2124 {
2125         drm_gem_free_mmap_offset(&obj->base);
2126 }
2127
2128 int
2129 i915_gem_mmap_gtt(struct drm_file *file,
2130                   struct drm_device *dev,
2131                   uint32_t handle,
2132                   uint64_t *offset)
2133 {
2134         struct drm_i915_gem_object *obj;
2135         int ret;
2136
2137         obj = i915_gem_object_lookup(file, handle);
2138         if (!obj)
2139                 return -ENOENT;
2140
2141         ret = i915_gem_object_create_mmap_offset(obj);
2142         if (ret == 0)
2143                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2144
2145         i915_gem_object_put(obj);
2146         return ret;
2147 }
2148
2149 /**
2150  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2151  * @dev: DRM device
2152  * @data: GTT mapping ioctl data
2153  * @file: GEM object info
2154  *
2155  * Simply returns the fake offset to userspace so it can mmap it.
2156  * The mmap call will end up in drm_gem_mmap(), which will set things
2157  * up so we can get faults in the handler above.
2158  *
2159  * The fault handler will take care of binding the object into the GTT
2160  * (since it may have been evicted to make room for something), allocating
2161  * a fence register, and mapping the appropriate aperture address into
2162  * userspace.
2163  */
2164 int
2165 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2166                         struct drm_file *file)
2167 {
2168         struct drm_i915_gem_mmap_gtt *args = data;
2169
2170         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2171 }
2172
2173 /* Immediately discard the backing storage */
2174 static void
2175 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2176 {
2177         i915_gem_object_free_mmap_offset(obj);
2178
2179         if (obj->base.filp == NULL)
2180                 return;
2181
2182         /* Our goal here is to return as much of the memory as
2183          * is possible back to the system as we are called from OOM.
2184          * To do this we must instruct the shmfs to drop all of its
2185          * backing pages, *now*.
2186          */
2187         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2188         obj->mm.madv = __I915_MADV_PURGED;
2189 }
2190
2191 /* Try to discard unwanted pages */
2192 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2193 {
2194         struct address_space *mapping;
2195
2196         lockdep_assert_held(&obj->mm.lock);
2197         GEM_BUG_ON(obj->mm.pages);
2198
2199         switch (obj->mm.madv) {
2200         case I915_MADV_DONTNEED:
2201                 i915_gem_object_truncate(obj);
2202         case __I915_MADV_PURGED:
2203                 return;
2204         }
2205
2206         if (obj->base.filp == NULL)
2207                 return;
2208
2209         mapping = obj->base.filp->f_mapping,
2210         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2211 }
2212
2213 static void
2214 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2215                               struct sg_table *pages)
2216 {
2217         struct sgt_iter sgt_iter;
2218         struct page *page;
2219
2220         __i915_gem_object_release_shmem(obj, pages);
2221
2222         i915_gem_gtt_finish_pages(obj, pages);
2223
2224         if (i915_gem_object_needs_bit17_swizzle(obj))
2225                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2226
2227         for_each_sgt_page(page, sgt_iter, pages) {
2228                 if (obj->mm.dirty)
2229                         set_page_dirty(page);
2230
2231                 if (obj->mm.madv == I915_MADV_WILLNEED)
2232                         mark_page_accessed(page);
2233
2234                 put_page(page);
2235         }
2236         obj->mm.dirty = false;
2237
2238         sg_free_table(pages);
2239         kfree(pages);
2240 }
2241
2242 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2243 {
2244         struct radix_tree_iter iter;
2245         void **slot;
2246
2247         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2248                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2249 }
2250
2251 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2252                                  enum i915_mm_subclass subclass)
2253 {
2254         struct sg_table *pages;
2255
2256         if (i915_gem_object_has_pinned_pages(obj))
2257                 return;
2258
2259         GEM_BUG_ON(obj->bind_count);
2260         if (!READ_ONCE(obj->mm.pages))
2261                 return;
2262
2263         /* May be called by shrinker from within get_pages() (on another bo) */
2264         mutex_lock_nested(&obj->mm.lock, subclass);
2265         if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
2266                 goto unlock;
2267
2268         /* ->put_pages might need to allocate memory for the bit17 swizzle
2269          * array, hence protect them from being reaped by removing them from gtt
2270          * lists early. */
2271         pages = fetch_and_zero(&obj->mm.pages);
2272         GEM_BUG_ON(!pages);
2273
2274         if (obj->mm.mapping) {
2275                 void *ptr;
2276
2277                 ptr = ptr_mask_bits(obj->mm.mapping);
2278                 if (is_vmalloc_addr(ptr))
2279                         vunmap(ptr);
2280                 else
2281                         kunmap(kmap_to_page(ptr));
2282
2283                 obj->mm.mapping = NULL;
2284         }
2285
2286         __i915_gem_object_reset_page_iter(obj);
2287
2288         obj->ops->put_pages(obj, pages);
2289 unlock:
2290         mutex_unlock(&obj->mm.lock);
2291 }
2292
2293 static unsigned int swiotlb_max_size(void)
2294 {
2295 #if IS_ENABLED(CONFIG_SWIOTLB)
2296         return rounddown(swiotlb_nr_tbl() << IO_TLB_SHIFT, PAGE_SIZE);
2297 #else
2298         return 0;
2299 #endif
2300 }
2301
2302 static void i915_sg_trim(struct sg_table *orig_st)
2303 {
2304         struct sg_table new_st;
2305         struct scatterlist *sg, *new_sg;
2306         unsigned int i;
2307
2308         if (orig_st->nents == orig_st->orig_nents)
2309                 return;
2310
2311         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL))
2312                 return;
2313
2314         new_sg = new_st.sgl;
2315         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2316                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2317                 /* called before being DMA mapped, no need to copy sg->dma_* */
2318                 new_sg = sg_next(new_sg);
2319         }
2320
2321         sg_free_table(orig_st);
2322
2323         *orig_st = new_st;
2324 }
2325
2326 static struct sg_table *
2327 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2328 {
2329         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2330         int page_count, i;
2331         struct address_space *mapping;
2332         struct sg_table *st;
2333         struct scatterlist *sg;
2334         struct sgt_iter sgt_iter;
2335         struct page *page;
2336         unsigned long last_pfn = 0;     /* suppress gcc warning */
2337         unsigned int max_segment;
2338         int ret;
2339         gfp_t gfp;
2340
2341         /* Assert that the object is not currently in any GPU domain. As it
2342          * wasn't in the GTT, there shouldn't be any way it could have been in
2343          * a GPU cache
2344          */
2345         GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2346         GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2347
2348         max_segment = swiotlb_max_size();
2349         if (!max_segment)
2350                 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
2351
2352         st = kmalloc(sizeof(*st), GFP_KERNEL);
2353         if (st == NULL)
2354                 return ERR_PTR(-ENOMEM);
2355
2356         page_count = obj->base.size / PAGE_SIZE;
2357         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2358                 kfree(st);
2359                 return ERR_PTR(-ENOMEM);
2360         }
2361
2362         /* Get the list of pages out of our struct file.  They'll be pinned
2363          * at this point until we release them.
2364          *
2365          * Fail silently without starting the shrinker
2366          */
2367         mapping = obj->base.filp->f_mapping;
2368         gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
2369         gfp |= __GFP_NORETRY | __GFP_NOWARN;
2370         sg = st->sgl;
2371         st->nents = 0;
2372         for (i = 0; i < page_count; i++) {
2373                 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2374                 if (IS_ERR(page)) {
2375                         i915_gem_shrink(dev_priv,
2376                                         page_count,
2377                                         I915_SHRINK_BOUND |
2378                                         I915_SHRINK_UNBOUND |
2379                                         I915_SHRINK_PURGEABLE);
2380                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2381                 }
2382                 if (IS_ERR(page)) {
2383                         /* We've tried hard to allocate the memory by reaping
2384                          * our own buffer, now let the real VM do its job and
2385                          * go down in flames if truly OOM.
2386                          */
2387                         page = shmem_read_mapping_page(mapping, i);
2388                         if (IS_ERR(page)) {
2389                                 ret = PTR_ERR(page);
2390                                 goto err_sg;
2391                         }
2392                 }
2393                 if (!i ||
2394                     sg->length >= max_segment ||
2395                     page_to_pfn(page) != last_pfn + 1) {
2396                         if (i)
2397                                 sg = sg_next(sg);
2398                         st->nents++;
2399                         sg_set_page(sg, page, PAGE_SIZE, 0);
2400                 } else {
2401                         sg->length += PAGE_SIZE;
2402                 }
2403                 last_pfn = page_to_pfn(page);
2404
2405                 /* Check that the i965g/gm workaround works. */
2406                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2407         }
2408         if (sg) /* loop terminated early; short sg table */
2409                 sg_mark_end(sg);
2410
2411         /* Trim unused sg entries to avoid wasting memory. */
2412         i915_sg_trim(st);
2413
2414         ret = i915_gem_gtt_prepare_pages(obj, st);
2415         if (ret)
2416                 goto err_pages;
2417
2418         if (i915_gem_object_needs_bit17_swizzle(obj))
2419                 i915_gem_object_do_bit_17_swizzle(obj, st);
2420
2421         return st;
2422
2423 err_sg:
2424         sg_mark_end(sg);
2425 err_pages:
2426         for_each_sgt_page(page, sgt_iter, st)
2427                 put_page(page);
2428         sg_free_table(st);
2429         kfree(st);
2430
2431         /* shmemfs first checks if there is enough memory to allocate the page
2432          * and reports ENOSPC should there be insufficient, along with the usual
2433          * ENOMEM for a genuine allocation failure.
2434          *
2435          * We use ENOSPC in our driver to mean that we have run out of aperture
2436          * space and so want to translate the error from shmemfs back to our
2437          * usual understanding of ENOMEM.
2438          */
2439         if (ret == -ENOSPC)
2440                 ret = -ENOMEM;
2441
2442         return ERR_PTR(ret);
2443 }
2444
2445 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2446                                  struct sg_table *pages)
2447 {
2448         lockdep_assert_held(&obj->mm.lock);
2449
2450         obj->mm.get_page.sg_pos = pages->sgl;
2451         obj->mm.get_page.sg_idx = 0;
2452
2453         obj->mm.pages = pages;
2454
2455         if (i915_gem_object_is_tiled(obj) &&
2456             to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2457                 GEM_BUG_ON(obj->mm.quirked);
2458                 __i915_gem_object_pin_pages(obj);
2459                 obj->mm.quirked = true;
2460         }
2461 }
2462
2463 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2464 {
2465         struct sg_table *pages;
2466
2467         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2468
2469         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2470                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2471                 return -EFAULT;
2472         }
2473
2474         pages = obj->ops->get_pages(obj);
2475         if (unlikely(IS_ERR(pages)))
2476                 return PTR_ERR(pages);
2477
2478         __i915_gem_object_set_pages(obj, pages);
2479         return 0;
2480 }
2481
2482 /* Ensure that the associated pages are gathered from the backing storage
2483  * and pinned into our object. i915_gem_object_pin_pages() may be called
2484  * multiple times before they are released by a single call to
2485  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2486  * either as a result of memory pressure (reaping pages under the shrinker)
2487  * or as the object is itself released.
2488  */
2489 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2490 {
2491         int err;
2492
2493         err = mutex_lock_interruptible(&obj->mm.lock);
2494         if (err)
2495                 return err;
2496
2497         if (unlikely(!obj->mm.pages)) {
2498                 err = ____i915_gem_object_get_pages(obj);
2499                 if (err)
2500                         goto unlock;
2501
2502                 smp_mb__before_atomic();
2503         }
2504         atomic_inc(&obj->mm.pages_pin_count);
2505
2506 unlock:
2507         mutex_unlock(&obj->mm.lock);
2508         return err;
2509 }
2510
2511 /* The 'mapping' part of i915_gem_object_pin_map() below */
2512 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2513                                  enum i915_map_type type)
2514 {
2515         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2516         struct sg_table *sgt = obj->mm.pages;
2517         struct sgt_iter sgt_iter;
2518         struct page *page;
2519         struct page *stack_pages[32];
2520         struct page **pages = stack_pages;
2521         unsigned long i = 0;
2522         pgprot_t pgprot;
2523         void *addr;
2524
2525         /* A single page can always be kmapped */
2526         if (n_pages == 1 && type == I915_MAP_WB)
2527                 return kmap(sg_page(sgt->sgl));
2528
2529         if (n_pages > ARRAY_SIZE(stack_pages)) {
2530                 /* Too big for stack -- allocate temporary array instead */
2531                 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2532                 if (!pages)
2533                         return NULL;
2534         }
2535
2536         for_each_sgt_page(page, sgt_iter, sgt)
2537                 pages[i++] = page;
2538
2539         /* Check that we have the expected number of pages */
2540         GEM_BUG_ON(i != n_pages);
2541
2542         switch (type) {
2543         case I915_MAP_WB:
2544                 pgprot = PAGE_KERNEL;
2545                 break;
2546         case I915_MAP_WC:
2547                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2548                 break;
2549         }
2550         addr = vmap(pages, n_pages, 0, pgprot);
2551
2552         if (pages != stack_pages)
2553                 drm_free_large(pages);
2554
2555         return addr;
2556 }
2557
2558 /* get, pin, and map the pages of the object into kernel space */
2559 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2560                               enum i915_map_type type)
2561 {
2562         enum i915_map_type has_type;
2563         bool pinned;
2564         void *ptr;
2565         int ret;
2566
2567         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
2568
2569         ret = mutex_lock_interruptible(&obj->mm.lock);
2570         if (ret)
2571                 return ERR_PTR(ret);
2572
2573         pinned = true;
2574         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2575                 if (unlikely(!obj->mm.pages)) {
2576                         ret = ____i915_gem_object_get_pages(obj);
2577                         if (ret)
2578                                 goto err_unlock;
2579
2580                         smp_mb__before_atomic();
2581                 }
2582                 atomic_inc(&obj->mm.pages_pin_count);
2583                 pinned = false;
2584         }
2585         GEM_BUG_ON(!obj->mm.pages);
2586
2587         ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
2588         if (ptr && has_type != type) {
2589                 if (pinned) {
2590                         ret = -EBUSY;
2591                         goto err_unpin;
2592                 }
2593
2594                 if (is_vmalloc_addr(ptr))
2595                         vunmap(ptr);
2596                 else
2597                         kunmap(kmap_to_page(ptr));
2598
2599                 ptr = obj->mm.mapping = NULL;
2600         }
2601
2602         if (!ptr) {
2603                 ptr = i915_gem_object_map(obj, type);
2604                 if (!ptr) {
2605                         ret = -ENOMEM;
2606                         goto err_unpin;
2607                 }
2608
2609                 obj->mm.mapping = ptr_pack_bits(ptr, type);
2610         }
2611
2612 out_unlock:
2613         mutex_unlock(&obj->mm.lock);
2614         return ptr;
2615
2616 err_unpin:
2617         atomic_dec(&obj->mm.pages_pin_count);
2618 err_unlock:
2619         ptr = ERR_PTR(ret);
2620         goto out_unlock;
2621 }
2622
2623 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
2624 {
2625         unsigned long elapsed;
2626
2627         if (ctx->hang_stats.banned)
2628                 return true;
2629
2630         elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
2631         if (ctx->hang_stats.ban_period_seconds &&
2632             elapsed <= ctx->hang_stats.ban_period_seconds) {
2633                 DRM_DEBUG("context hanging too fast, banning!\n");
2634                 return true;
2635         }
2636
2637         return false;
2638 }
2639
2640 static void i915_set_reset_status(struct i915_gem_context *ctx,
2641                                   const bool guilty)
2642 {
2643         struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
2644
2645         if (guilty) {
2646                 hs->banned = i915_context_is_banned(ctx);
2647                 hs->batch_active++;
2648                 hs->guilty_ts = get_seconds();
2649         } else {
2650                 hs->batch_pending++;
2651         }
2652 }
2653
2654 struct drm_i915_gem_request *
2655 i915_gem_find_active_request(struct intel_engine_cs *engine)
2656 {
2657         struct drm_i915_gem_request *request;
2658
2659         /* We are called by the error capture and reset at a random
2660          * point in time. In particular, note that neither is crucially
2661          * ordered with an interrupt. After a hang, the GPU is dead and we
2662          * assume that no more writes can happen (we waited long enough for
2663          * all writes that were in transaction to be flushed) - adding an
2664          * extra delay for a recent interrupt is pointless. Hence, we do
2665          * not need an engine->irq_seqno_barrier() before the seqno reads.
2666          */
2667         list_for_each_entry(request, &engine->timeline->requests, link) {
2668                 if (__i915_gem_request_completed(request))
2669                         continue;
2670
2671                 return request;
2672         }
2673
2674         return NULL;
2675 }
2676
2677 static void reset_request(struct drm_i915_gem_request *request)
2678 {
2679         void *vaddr = request->ring->vaddr;
2680         u32 head;
2681
2682         /* As this request likely depends on state from the lost
2683          * context, clear out all the user operations leaving the
2684          * breadcrumb at the end (so we get the fence notifications).
2685          */
2686         head = request->head;
2687         if (request->postfix < head) {
2688                 memset(vaddr + head, 0, request->ring->size - head);
2689                 head = 0;
2690         }
2691         memset(vaddr + head, 0, request->postfix - head);
2692 }
2693
2694 static void i915_gem_reset_engine(struct intel_engine_cs *engine)
2695 {
2696         struct drm_i915_gem_request *request;
2697         struct i915_gem_context *incomplete_ctx;
2698         struct intel_timeline *timeline;
2699         bool ring_hung;
2700
2701         if (engine->irq_seqno_barrier)
2702                 engine->irq_seqno_barrier(engine);
2703
2704         request = i915_gem_find_active_request(engine);
2705         if (!request)
2706                 return;
2707
2708         ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2709         if (engine->hangcheck.seqno != intel_engine_get_seqno(engine))
2710                 ring_hung = false;
2711
2712         i915_set_reset_status(request->ctx, ring_hung);
2713         if (!ring_hung)
2714                 return;
2715
2716         DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
2717                          engine->name, request->global_seqno);
2718
2719         /* Setup the CS to resume from the breadcrumb of the hung request */
2720         engine->reset_hw(engine, request);
2721
2722         /* Users of the default context do not rely on logical state
2723          * preserved between batches. They have to emit full state on
2724          * every batch and so it is safe to execute queued requests following
2725          * the hang.
2726          *
2727          * Other contexts preserve state, now corrupt. We want to skip all
2728          * queued requests that reference the corrupt context.
2729          */
2730         incomplete_ctx = request->ctx;
2731         if (i915_gem_context_is_default(incomplete_ctx))
2732                 return;
2733
2734         list_for_each_entry_continue(request, &engine->timeline->requests, link)
2735                 if (request->ctx == incomplete_ctx)
2736                         reset_request(request);
2737
2738         timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
2739         list_for_each_entry(request, &timeline->requests, link)
2740                 reset_request(request);
2741 }
2742
2743 void i915_gem_reset(struct drm_i915_private *dev_priv)
2744 {
2745         struct intel_engine_cs *engine;
2746         enum intel_engine_id id;
2747
2748         lockdep_assert_held(&dev_priv->drm.struct_mutex);
2749
2750         i915_gem_retire_requests(dev_priv);
2751
2752         for_each_engine(engine, dev_priv, id)
2753                 i915_gem_reset_engine(engine);
2754
2755         i915_gem_restore_fences(dev_priv);
2756
2757         if (dev_priv->gt.awake) {
2758                 intel_sanitize_gt_powersave(dev_priv);
2759                 intel_enable_gt_powersave(dev_priv);
2760                 if (INTEL_GEN(dev_priv) >= 6)
2761                         gen6_rps_busy(dev_priv);
2762         }
2763 }
2764
2765 static void nop_submit_request(struct drm_i915_gem_request *request)
2766 {
2767         i915_gem_request_submit(request);
2768         intel_engine_init_global_seqno(request->engine, request->global_seqno);
2769 }
2770
2771 static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
2772 {
2773         engine->submit_request = nop_submit_request;
2774
2775         /* Mark all pending requests as complete so that any concurrent
2776          * (lockless) lookup doesn't try and wait upon the request as we
2777          * reset it.
2778          */
2779         intel_engine_init_global_seqno(engine,
2780                                        intel_engine_last_submit(engine));
2781
2782         /*
2783          * Clear the execlists queue up before freeing the requests, as those
2784          * are the ones that keep the context and ringbuffer backing objects
2785          * pinned in place.
2786          */
2787
2788         if (i915.enable_execlists) {
2789                 unsigned long flags;
2790
2791                 spin_lock_irqsave(&engine->timeline->lock, flags);
2792
2793                 i915_gem_request_put(engine->execlist_port[0].request);
2794                 i915_gem_request_put(engine->execlist_port[1].request);
2795                 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
2796                 engine->execlist_queue = RB_ROOT;
2797                 engine->execlist_first = NULL;
2798
2799                 spin_unlock_irqrestore(&engine->timeline->lock, flags);
2800         }
2801 }
2802
2803 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
2804 {
2805         struct intel_engine_cs *engine;
2806         enum intel_engine_id id;
2807
2808         lockdep_assert_held(&dev_priv->drm.struct_mutex);
2809         set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
2810
2811         i915_gem_context_lost(dev_priv);
2812         for_each_engine(engine, dev_priv, id)
2813                 i915_gem_cleanup_engine(engine);
2814         mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
2815
2816         i915_gem_retire_requests(dev_priv);
2817 }
2818
2819 static void
2820 i915_gem_retire_work_handler(struct work_struct *work)
2821 {
2822         struct drm_i915_private *dev_priv =
2823                 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2824         struct drm_device *dev = &dev_priv->drm;
2825
2826         /* Come back later if the device is busy... */
2827         if (mutex_trylock(&dev->struct_mutex)) {
2828                 i915_gem_retire_requests(dev_priv);
2829                 mutex_unlock(&dev->struct_mutex);
2830         }
2831
2832         /* Keep the retire handler running until we are finally idle.
2833          * We do not need to do this test under locking as in the worst-case
2834          * we queue the retire worker once too often.
2835          */
2836         if (READ_ONCE(dev_priv->gt.awake)) {
2837                 i915_queue_hangcheck(dev_priv);
2838                 queue_delayed_work(dev_priv->wq,
2839                                    &dev_priv->gt.retire_work,
2840                                    round_jiffies_up_relative(HZ));
2841         }
2842 }
2843
2844 static void
2845 i915_gem_idle_work_handler(struct work_struct *work)
2846 {
2847         struct drm_i915_private *dev_priv =
2848                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
2849         struct drm_device *dev = &dev_priv->drm;
2850         struct intel_engine_cs *engine;
2851         enum intel_engine_id id;
2852         bool rearm_hangcheck;
2853
2854         if (!READ_ONCE(dev_priv->gt.awake))
2855                 return;
2856
2857         /*
2858          * Wait for last execlists context complete, but bail out in case a
2859          * new request is submitted.
2860          */
2861         wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
2862                  intel_execlists_idle(dev_priv), 10);
2863
2864         if (READ_ONCE(dev_priv->gt.active_requests))
2865                 return;
2866
2867         rearm_hangcheck =
2868                 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2869
2870         if (!mutex_trylock(&dev->struct_mutex)) {
2871                 /* Currently busy, come back later */
2872                 mod_delayed_work(dev_priv->wq,
2873                                  &dev_priv->gt.idle_work,
2874                                  msecs_to_jiffies(50));
2875                 goto out_rearm;
2876         }
2877
2878         /*
2879          * New request retired after this work handler started, extend active
2880          * period until next instance of the work.
2881          */
2882         if (work_pending(work))
2883                 goto out_unlock;
2884
2885         if (dev_priv->gt.active_requests)
2886                 goto out_unlock;
2887
2888         if (wait_for(intel_execlists_idle(dev_priv), 10))
2889                 DRM_ERROR("Timeout waiting for engines to idle\n");
2890
2891         for_each_engine(engine, dev_priv, id)
2892                 i915_gem_batch_pool_fini(&engine->batch_pool);
2893
2894         GEM_BUG_ON(!dev_priv->gt.awake);
2895         dev_priv->gt.awake = false;
2896         rearm_hangcheck = false;
2897
2898         if (INTEL_GEN(dev_priv) >= 6)
2899                 gen6_rps_idle(dev_priv);
2900         intel_runtime_pm_put(dev_priv);
2901 out_unlock:
2902         mutex_unlock(&dev->struct_mutex);
2903
2904 out_rearm:
2905         if (rearm_hangcheck) {
2906                 GEM_BUG_ON(!dev_priv->gt.awake);
2907                 i915_queue_hangcheck(dev_priv);
2908         }
2909 }
2910
2911 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2912 {
2913         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2914         struct drm_i915_file_private *fpriv = file->driver_priv;
2915         struct i915_vma *vma, *vn;
2916
2917         mutex_lock(&obj->base.dev->struct_mutex);
2918         list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
2919                 if (vma->vm->file == fpriv)
2920                         i915_vma_close(vma);
2921
2922         if (i915_gem_object_is_active(obj) &&
2923             !i915_gem_object_has_active_reference(obj)) {
2924                 i915_gem_object_set_active_reference(obj);
2925                 i915_gem_object_get(obj);
2926         }
2927         mutex_unlock(&obj->base.dev->struct_mutex);
2928 }
2929
2930 static unsigned long to_wait_timeout(s64 timeout_ns)
2931 {
2932         if (timeout_ns < 0)
2933                 return MAX_SCHEDULE_TIMEOUT;
2934
2935         if (timeout_ns == 0)
2936                 return 0;
2937
2938         return nsecs_to_jiffies_timeout(timeout_ns);
2939 }
2940
2941 /**
2942  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2943  * @dev: drm device pointer
2944  * @data: ioctl data blob
2945  * @file: drm file pointer
2946  *
2947  * Returns 0 if successful, else an error is returned with the remaining time in
2948  * the timeout parameter.
2949  *  -ETIME: object is still busy after timeout
2950  *  -ERESTARTSYS: signal interrupted the wait
2951  *  -ENONENT: object doesn't exist
2952  * Also possible, but rare:
2953  *  -EAGAIN: GPU wedged
2954  *  -ENOMEM: damn
2955  *  -ENODEV: Internal IRQ fail
2956  *  -E?: The add request failed
2957  *
2958  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2959  * non-zero timeout parameter the wait ioctl will wait for the given number of
2960  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2961  * without holding struct_mutex the object may become re-busied before this
2962  * function completes. A similar but shorter * race condition exists in the busy
2963  * ioctl
2964  */
2965 int
2966 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2967 {
2968         struct drm_i915_gem_wait *args = data;
2969         struct drm_i915_gem_object *obj;
2970         ktime_t start;
2971         long ret;
2972
2973         if (args->flags != 0)
2974                 return -EINVAL;
2975
2976         obj = i915_gem_object_lookup(file, args->bo_handle);
2977         if (!obj)
2978                 return -ENOENT;
2979
2980         start = ktime_get();
2981
2982         ret = i915_gem_object_wait(obj,
2983                                    I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
2984                                    to_wait_timeout(args->timeout_ns),
2985                                    to_rps_client(file));
2986
2987         if (args->timeout_ns > 0) {
2988                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2989                 if (args->timeout_ns < 0)
2990                         args->timeout_ns = 0;
2991         }
2992
2993         i915_gem_object_put(obj);
2994         return ret;
2995 }
2996
2997 static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
2998 {
2999         int ret, i;
3000
3001         for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
3002                 ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
3003                 if (ret)
3004                         return ret;
3005         }
3006
3007         return 0;
3008 }
3009
3010 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
3011 {
3012         int ret;
3013
3014         if (flags & I915_WAIT_LOCKED) {
3015                 struct i915_gem_timeline *tl;
3016
3017                 lockdep_assert_held(&i915->drm.struct_mutex);
3018
3019                 list_for_each_entry(tl, &i915->gt.timelines, link) {
3020                         ret = wait_for_timeline(tl, flags);
3021                         if (ret)
3022                                 return ret;
3023                 }
3024         } else {
3025                 ret = wait_for_timeline(&i915->gt.global_timeline, flags);
3026                 if (ret)
3027                         return ret;
3028         }
3029
3030         return 0;
3031 }
3032
3033 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3034                              bool force)
3035 {
3036         /* If we don't have a page list set up, then we're not pinned
3037          * to GPU, and we can ignore the cache flush because it'll happen
3038          * again at bind time.
3039          */
3040         if (!obj->mm.pages)
3041                 return;
3042
3043         /*
3044          * Stolen memory is always coherent with the GPU as it is explicitly
3045          * marked as wc by the system, or the system is cache-coherent.
3046          */
3047         if (obj->stolen || obj->phys_handle)
3048                 return;
3049
3050         /* If the GPU is snooping the contents of the CPU cache,
3051          * we do not need to manually clear the CPU cache lines.  However,
3052          * the caches are only snooped when the render cache is
3053          * flushed/invalidated.  As we always have to emit invalidations
3054          * and flushes when moving into and out of the RENDER domain, correct
3055          * snooping behaviour occurs naturally as the result of our domain
3056          * tracking.
3057          */
3058         if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3059                 obj->cache_dirty = true;
3060                 return;
3061         }
3062
3063         trace_i915_gem_object_clflush(obj);
3064         drm_clflush_sg(obj->mm.pages);
3065         obj->cache_dirty = false;
3066 }
3067
3068 /** Flushes the GTT write domain for the object if it's dirty. */
3069 static void
3070 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3071 {
3072         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3073
3074         if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3075                 return;
3076
3077         /* No actual flushing is required for the GTT write domain.  Writes
3078          * to it "immediately" go to main memory as far as we know, so there's
3079          * no chipset flush.  It also doesn't land in render cache.
3080          *
3081          * However, we do have to enforce the order so that all writes through
3082          * the GTT land before any writes to the device, such as updates to
3083          * the GATT itself.
3084          *
3085          * We also have to wait a bit for the writes to land from the GTT.
3086          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
3087          * timing. This issue has only been observed when switching quickly
3088          * between GTT writes and CPU reads from inside the kernel on recent hw,
3089          * and it appears to only affect discrete GTT blocks (i.e. on LLC
3090          * system agents we cannot reproduce this behaviour).
3091          */
3092         wmb();
3093         if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
3094                 POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
3095
3096         intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT));
3097
3098         obj->base.write_domain = 0;
3099         trace_i915_gem_object_change_domain(obj,
3100                                             obj->base.read_domains,
3101                                             I915_GEM_DOMAIN_GTT);
3102 }
3103
3104 /** Flushes the CPU write domain for the object if it's dirty. */
3105 static void
3106 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3107 {
3108         if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3109                 return;
3110
3111         i915_gem_clflush_object(obj, obj->pin_display);
3112         intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3113
3114         obj->base.write_domain = 0;
3115         trace_i915_gem_object_change_domain(obj,
3116                                             obj->base.read_domains,
3117                                             I915_GEM_DOMAIN_CPU);
3118 }
3119
3120 /**
3121  * Moves a single object to the GTT read, and possibly write domain.
3122  * @obj: object to act on
3123  * @write: ask for write access or read only
3124  *
3125  * This function returns when the move is complete, including waiting on
3126  * flushes to occur.
3127  */
3128 int
3129 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3130 {
3131         uint32_t old_write_domain, old_read_domains;
3132         int ret;
3133
3134         lockdep_assert_held(&obj->base.dev->struct_mutex);
3135
3136         ret = i915_gem_object_wait(obj,
3137                                    I915_WAIT_INTERRUPTIBLE |
3138                                    I915_WAIT_LOCKED |
3139                                    (write ? I915_WAIT_ALL : 0),
3140                                    MAX_SCHEDULE_TIMEOUT,
3141                                    NULL);
3142         if (ret)
3143                 return ret;
3144
3145         if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3146                 return 0;
3147
3148         /* Flush and acquire obj->pages so that we are coherent through
3149          * direct access in memory with previous cached writes through
3150          * shmemfs and that our cache domain tracking remains valid.
3151          * For example, if the obj->filp was moved to swap without us
3152          * being notified and releasing the pages, we would mistakenly
3153          * continue to assume that the obj remained out of the CPU cached
3154          * domain.
3155          */
3156         ret = i915_gem_object_pin_pages(obj);
3157         if (ret)
3158                 return ret;
3159
3160         i915_gem_object_flush_cpu_write_domain(obj);
3161
3162         /* Serialise direct access to this object with the barriers for
3163          * coherent writes from the GPU, by effectively invalidating the
3164          * GTT domain upon first access.
3165          */
3166         if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3167                 mb();
3168
3169         old_write_domain = obj->base.write_domain;
3170         old_read_domains = obj->base.read_domains;
3171
3172         /* It should now be out of any other write domains, and we can update
3173          * the domain values for our changes.
3174          */
3175         GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3176         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3177         if (write) {
3178                 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3179                 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3180                 obj->mm.dirty = true;
3181         }
3182
3183         trace_i915_gem_object_change_domain(obj,
3184                                             old_read_domains,
3185                                             old_write_domain);
3186
3187         i915_gem_object_unpin_pages(obj);
3188         return 0;
3189 }
3190
3191 /**
3192  * Changes the cache-level of an object across all VMA.
3193  * @obj: object to act on
3194  * @cache_level: new cache level to set for the object
3195  *
3196  * After this function returns, the object will be in the new cache-level
3197  * across all GTT and the contents of the backing storage will be coherent,
3198  * with respect to the new cache-level. In order to keep the backing storage
3199  * coherent for all users, we only allow a single cache level to be set
3200  * globally on the object and prevent it from being changed whilst the
3201  * hardware is reading from the object. That is if the object is currently
3202  * on the scanout it will be set to uncached (or equivalent display
3203  * cache coherency) and all non-MOCS GPU access will also be uncached so
3204  * that all direct access to the scanout remains coherent.
3205  */
3206 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3207                                     enum i915_cache_level cache_level)
3208 {
3209         struct i915_vma *vma;
3210         int ret;
3211
3212         lockdep_assert_held(&obj->base.dev->struct_mutex);
3213
3214         if (obj->cache_level == cache_level)
3215                 return 0;
3216
3217         /* Inspect the list of currently bound VMA and unbind any that would
3218          * be invalid given the new cache-level. This is principally to
3219          * catch the issue of the CS prefetch crossing page boundaries and
3220          * reading an invalid PTE on older architectures.
3221          */
3222 restart:
3223         list_for_each_entry(vma, &obj->vma_list, obj_link) {
3224                 if (!drm_mm_node_allocated(&vma->node))
3225                         continue;
3226
3227                 if (i915_vma_is_pinned(vma)) {
3228                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3229                         return -EBUSY;
3230                 }
3231
3232                 if (i915_gem_valid_gtt_space(vma, cache_level))
3233                         continue;
3234
3235                 ret = i915_vma_unbind(vma);
3236                 if (ret)
3237                         return ret;
3238
3239                 /* As unbinding may affect other elements in the
3240                  * obj->vma_list (due to side-effects from retiring
3241                  * an active vma), play safe and restart the iterator.
3242                  */
3243                 goto restart;
3244         }
3245
3246         /* We can reuse the existing drm_mm nodes but need to change the
3247          * cache-level on the PTE. We could simply unbind them all and
3248          * rebind with the correct cache-level on next use. However since
3249          * we already have a valid slot, dma mapping, pages etc, we may as
3250          * rewrite the PTE in the belief that doing so tramples upon less
3251          * state and so involves less work.
3252          */
3253         if (obj->bind_count) {
3254                 /* Before we change the PTE, the GPU must not be accessing it.
3255                  * If we wait upon the object, we know that all the bound
3256                  * VMA are no longer active.
3257                  */
3258                 ret = i915_gem_object_wait(obj,
3259                                            I915_WAIT_INTERRUPTIBLE |
3260                                            I915_WAIT_LOCKED |
3261                                            I915_WAIT_ALL,
3262                                            MAX_SCHEDULE_TIMEOUT,
3263                                            NULL);
3264                 if (ret)
3265                         return ret;
3266
3267                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3268                     cache_level != I915_CACHE_NONE) {
3269                         /* Access to snoopable pages through the GTT is
3270                          * incoherent and on some machines causes a hard
3271                          * lockup. Relinquish the CPU mmaping to force
3272                          * userspace to refault in the pages and we can
3273                          * then double check if the GTT mapping is still
3274                          * valid for that pointer access.
3275                          */
3276                         i915_gem_release_mmap(obj);
3277
3278                         /* As we no longer need a fence for GTT access,
3279                          * we can relinquish it now (and so prevent having
3280                          * to steal a fence from someone else on the next
3281                          * fence request). Note GPU activity would have
3282                          * dropped the fence as all snoopable access is
3283                          * supposed to be linear.
3284                          */
3285                         list_for_each_entry(vma, &obj->vma_list, obj_link) {
3286                                 ret = i915_vma_put_fence(vma);
3287                                 if (ret)
3288                                         return ret;
3289                         }
3290                 } else {
3291                         /* We either have incoherent backing store and
3292                          * so no GTT access or the architecture is fully
3293                          * coherent. In such cases, existing GTT mmaps
3294                          * ignore the cache bit in the PTE and we can
3295                          * rewrite it without confusing the GPU or having
3296                          * to force userspace to fault back in its mmaps.
3297                          */
3298                 }
3299
3300                 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3301                         if (!drm_mm_node_allocated(&vma->node))
3302                                 continue;
3303
3304                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3305                         if (ret)
3306                                 return ret;
3307                 }
3308         }
3309
3310         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
3311             cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
3312                 obj->cache_dirty = true;
3313
3314         list_for_each_entry(vma, &obj->vma_list, obj_link)
3315                 vma->node.color = cache_level;
3316         obj->cache_level = cache_level;
3317
3318         return 0;
3319 }
3320
3321 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3322                                struct drm_file *file)
3323 {
3324         struct drm_i915_gem_caching *args = data;
3325         struct drm_i915_gem_object *obj;
3326         int err = 0;
3327
3328         rcu_read_lock();
3329         obj = i915_gem_object_lookup_rcu(file, args->handle);
3330         if (!obj) {
3331                 err = -ENOENT;
3332                 goto out;
3333         }
3334
3335         switch (obj->cache_level) {
3336         case I915_CACHE_LLC:
3337         case I915_CACHE_L3_LLC:
3338                 args->caching = I915_CACHING_CACHED;
3339                 break;
3340
3341         case I915_CACHE_WT:
3342                 args->caching = I915_CACHING_DISPLAY;
3343                 break;
3344
3345         default:
3346                 args->caching = I915_CACHING_NONE;
3347                 break;
3348         }
3349 out:
3350         rcu_read_unlock();
3351         return err;
3352 }
3353
3354 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3355                                struct drm_file *file)
3356 {
3357         struct drm_i915_private *i915 = to_i915(dev);
3358         struct drm_i915_gem_caching *args = data;
3359         struct drm_i915_gem_object *obj;
3360         enum i915_cache_level level;
3361         int ret;
3362
3363         switch (args->caching) {
3364         case I915_CACHING_NONE:
3365                 level = I915_CACHE_NONE;
3366                 break;
3367         case I915_CACHING_CACHED:
3368                 /*
3369                  * Due to a HW issue on BXT A stepping, GPU stores via a
3370                  * snooped mapping may leave stale data in a corresponding CPU
3371                  * cacheline, whereas normally such cachelines would get
3372                  * invalidated.
3373                  */
3374                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3375                         return -ENODEV;
3376
3377                 level = I915_CACHE_LLC;
3378                 break;
3379         case I915_CACHING_DISPLAY:
3380                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3381                 break;
3382         default:
3383                 return -EINVAL;
3384         }
3385
3386         ret = i915_mutex_lock_interruptible(dev);
3387         if (ret)
3388                 return ret;
3389
3390         obj = i915_gem_object_lookup(file, args->handle);
3391         if (!obj) {
3392                 ret = -ENOENT;
3393                 goto unlock;
3394         }
3395
3396         ret = i915_gem_object_set_cache_level(obj, level);
3397         i915_gem_object_put(obj);
3398 unlock:
3399         mutex_unlock(&dev->struct_mutex);
3400         return ret;
3401 }
3402
3403 /*
3404  * Prepare buffer for display plane (scanout, cursors, etc).
3405  * Can be called from an uninterruptible phase (modesetting) and allows
3406  * any flushes to be pipelined (for pageflips).
3407  */
3408 struct i915_vma *
3409 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3410                                      u32 alignment,
3411                                      const struct i915_ggtt_view *view)
3412 {
3413         struct i915_vma *vma;
3414         u32 old_read_domains, old_write_domain;
3415         int ret;
3416
3417         lockdep_assert_held(&obj->base.dev->struct_mutex);
3418
3419         /* Mark the pin_display early so that we account for the
3420          * display coherency whilst setting up the cache domains.
3421          */
3422         obj->pin_display++;
3423
3424         /* The display engine is not coherent with the LLC cache on gen6.  As
3425          * a result, we make sure that the pinning that is about to occur is
3426          * done with uncached PTEs. This is lowest common denominator for all
3427          * chipsets.
3428          *
3429          * However for gen6+, we could do better by using the GFDT bit instead
3430          * of uncaching, which would allow us to flush all the LLC-cached data
3431          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3432          */
3433         ret = i915_gem_object_set_cache_level(obj,
3434                                               HAS_WT(to_i915(obj->base.dev)) ?
3435                                               I915_CACHE_WT : I915_CACHE_NONE);
3436         if (ret) {
3437                 vma = ERR_PTR(ret);
3438                 goto err_unpin_display;
3439         }
3440
3441         /* As the user may map the buffer once pinned in the display plane
3442          * (e.g. libkms for the bootup splash), we have to ensure that we
3443          * always use map_and_fenceable for all scanout buffers. However,
3444          * it may simply be too big to fit into mappable, in which case
3445          * put it anyway and hope that userspace can cope (but always first
3446          * try to preserve the existing ABI).
3447          */
3448         vma = ERR_PTR(-ENOSPC);
3449         if (view->type == I915_GGTT_VIEW_NORMAL)
3450                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3451                                                PIN_MAPPABLE | PIN_NONBLOCK);
3452         if (IS_ERR(vma)) {
3453                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
3454                 unsigned int flags;
3455
3456                 /* Valleyview is definitely limited to scanning out the first
3457                  * 512MiB. Lets presume this behaviour was inherited from the
3458                  * g4x display engine and that all earlier gen are similarly
3459                  * limited. Testing suggests that it is a little more
3460                  * complicated than this. For example, Cherryview appears quite
3461                  * happy to scanout from anywhere within its global aperture.
3462                  */
3463                 flags = 0;
3464                 if (HAS_GMCH_DISPLAY(i915))
3465                         flags = PIN_MAPPABLE;
3466                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3467         }
3468         if (IS_ERR(vma))
3469                 goto err_unpin_display;
3470
3471         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3472
3473         /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
3474         if (obj->cache_dirty) {
3475                 i915_gem_clflush_object(obj, true);
3476                 intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
3477         }
3478
3479         old_write_domain = obj->base.write_domain;
3480         old_read_domains = obj->base.read_domains;
3481
3482         /* It should now be out of any other write domains, and we can update
3483          * the domain values for our changes.
3484          */
3485         obj->base.write_domain = 0;
3486         obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3487
3488         trace_i915_gem_object_change_domain(obj,
3489                                             old_read_domains,
3490                                             old_write_domain);
3491
3492         return vma;
3493
3494 err_unpin_display:
3495         obj->pin_display--;
3496         return vma;
3497 }
3498
3499 void
3500 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3501 {
3502         lockdep_assert_held(&vma->vm->dev->struct_mutex);
3503
3504         if (WARN_ON(vma->obj->pin_display == 0))
3505                 return;
3506
3507         if (--vma->obj->pin_display == 0)
3508                 vma->display_alignment = 0;
3509
3510         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3511         if (!i915_vma_is_active(vma))
3512                 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
3513
3514         i915_vma_unpin(vma);
3515 }
3516
3517 /**
3518  * Moves a single object to the CPU read, and possibly write domain.
3519  * @obj: object to act on
3520  * @write: requesting write or read-only access
3521  *
3522  * This function returns when the move is complete, including waiting on
3523  * flushes to occur.
3524  */
3525 int
3526 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3527 {
3528         uint32_t old_write_domain, old_read_domains;
3529         int ret;
3530
3531         lockdep_assert_held(&obj->base.dev->struct_mutex);
3532
3533         ret = i915_gem_object_wait(obj,
3534                                    I915_WAIT_INTERRUPTIBLE |
3535                                    I915_WAIT_LOCKED |
3536                                    (write ? I915_WAIT_ALL : 0),
3537                                    MAX_SCHEDULE_TIMEOUT,
3538                                    NULL);
3539         if (ret)
3540                 return ret;
3541
3542         if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3543                 return 0;
3544
3545         i915_gem_object_flush_gtt_write_domain(obj);
3546
3547         old_write_domain = obj->base.write_domain;
3548         old_read_domains = obj->base.read_domains;
3549
3550         /* Flush the CPU cache if it's still invalid. */
3551         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3552                 i915_gem_clflush_object(obj, false);
3553
3554                 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3555         }
3556
3557         /* It should now be out of any other write domains, and we can update
3558          * the domain values for our changes.
3559          */
3560         GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3561
3562         /* If we're writing through the CPU, then the GPU read domains will
3563          * need to be invalidated at next use.
3564          */
3565         if (write) {
3566                 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3567                 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3568         }
3569
3570         trace_i915_gem_object_change_domain(obj,
3571                                             old_read_domains,
3572                                             old_write_domain);
3573
3574         return 0;
3575 }
3576
3577 /* Throttle our rendering by waiting until the ring has completed our requests
3578  * emitted over 20 msec ago.
3579  *
3580  * Note that if we were to use the current jiffies each time around the loop,
3581  * we wouldn't escape the function with any frames outstanding if the time to
3582  * render a frame was over 20ms.
3583  *
3584  * This should get us reasonable parallelism between CPU and GPU but also
3585  * relatively low latency when blocking on a particular request to finish.
3586  */
3587 static int
3588 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3589 {
3590         struct drm_i915_private *dev_priv = to_i915(dev);
3591         struct drm_i915_file_private *file_priv = file->driver_priv;
3592         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3593         struct drm_i915_gem_request *request, *target = NULL;
3594         long ret;
3595
3596         /* ABI: return -EIO if already wedged */
3597         if (i915_terminally_wedged(&dev_priv->gpu_error))
3598                 return -EIO;
3599
3600         spin_lock(&file_priv->mm.lock);
3601         list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3602                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3603                         break;
3604
3605                 /*
3606                  * Note that the request might not have been submitted yet.
3607                  * In which case emitted_jiffies will be zero.
3608                  */
3609                 if (!request->emitted_jiffies)
3610                         continue;
3611
3612                 target = request;
3613         }
3614         if (target)
3615                 i915_gem_request_get(target);
3616         spin_unlock(&file_priv->mm.lock);
3617
3618         if (target == NULL)
3619                 return 0;
3620
3621         ret = i915_wait_request(target,
3622                                 I915_WAIT_INTERRUPTIBLE,
3623                                 MAX_SCHEDULE_TIMEOUT);
3624         i915_gem_request_put(target);
3625
3626         return ret < 0 ? ret : 0;
3627 }
3628
3629 struct i915_vma *
3630 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3631                          const struct i915_ggtt_view *view,
3632                          u64 size,
3633                          u64 alignment,
3634                          u64 flags)
3635 {
3636         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3637         struct i915_address_space *vm = &dev_priv->ggtt.base;
3638         struct i915_vma *vma;
3639         int ret;
3640
3641         lockdep_assert_held(&obj->base.dev->struct_mutex);
3642
3643         vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
3644         if (IS_ERR(vma))
3645                 return vma;
3646
3647         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3648                 if (flags & PIN_NONBLOCK &&
3649                     (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
3650                         return ERR_PTR(-ENOSPC);
3651
3652                 if (flags & PIN_MAPPABLE) {
3653                         u32 fence_size;
3654
3655                         fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size,
3656                                                             i915_gem_object_get_tiling(obj));
3657                         /* If the required space is larger than the available
3658                          * aperture, we will not able to find a slot for the
3659                          * object and unbinding the object now will be in
3660                          * vain. Worse, doing so may cause us to ping-pong
3661                          * the object in and out of the Global GTT and
3662                          * waste a lot of cycles under the mutex.
3663                          */
3664                         if (fence_size > dev_priv->ggtt.mappable_end)
3665                                 return ERR_PTR(-E2BIG);
3666
3667                         /* If NONBLOCK is set the caller is optimistically
3668                          * trying to cache the full object within the mappable
3669                          * aperture, and *must* have a fallback in place for
3670                          * situations where we cannot bind the object. We
3671                          * can be a little more lax here and use the fallback
3672                          * more often to avoid costly migrations of ourselves
3673                          * and other objects within the aperture.
3674                          *
3675                          * Half-the-aperture is used as a simple heuristic.
3676                          * More interesting would to do search for a free
3677                          * block prior to making the commitment to unbind.
3678                          * That caters for the self-harm case, and with a
3679                          * little more heuristics (e.g. NOFAULT, NOEVICT)
3680                          * we could try to minimise harm to others.
3681                          */
3682                         if (flags & PIN_NONBLOCK &&
3683                             fence_size > dev_priv->ggtt.mappable_end / 2)
3684                                 return ERR_PTR(-ENOSPC);
3685                 }
3686
3687                 WARN(i915_vma_is_pinned(vma),
3688                      "bo is already pinned in ggtt with incorrect alignment:"
3689                      " offset=%08x, req.alignment=%llx,"
3690                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3691                      i915_ggtt_offset(vma), alignment,
3692                      !!(flags & PIN_MAPPABLE),
3693                      i915_vma_is_map_and_fenceable(vma));
3694                 ret = i915_vma_unbind(vma);
3695                 if (ret)
3696                         return ERR_PTR(ret);
3697         }
3698
3699         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3700         if (ret)
3701                 return ERR_PTR(ret);
3702
3703         return vma;
3704 }
3705
3706 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3707 {
3708         /* Note that we could alias engines in the execbuf API, but
3709          * that would be very unwise as it prevents userspace from
3710          * fine control over engine selection. Ahem.
3711          *
3712          * This should be something like EXEC_MAX_ENGINE instead of
3713          * I915_NUM_ENGINES.
3714          */
3715         BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3716         return 0x10000 << id;
3717 }
3718
3719 static __always_inline unsigned int __busy_write_id(unsigned int id)
3720 {
3721         /* The uABI guarantees an active writer is also amongst the read
3722          * engines. This would be true if we accessed the activity tracking
3723          * under the lock, but as we perform the lookup of the object and
3724          * its activity locklessly we can not guarantee that the last_write
3725          * being active implies that we have set the same engine flag from
3726          * last_read - hence we always set both read and write busy for
3727          * last_write.
3728          */
3729         return id | __busy_read_flag(id);
3730 }
3731
3732 static __always_inline unsigned int
3733 __busy_set_if_active(const struct dma_fence *fence,
3734                      unsigned int (*flag)(unsigned int id))
3735 {
3736         struct drm_i915_gem_request *rq;
3737
3738         /* We have to check the current hw status of the fence as the uABI
3739          * guarantees forward progress. We could rely on the idle worker
3740          * to eventually flush us, but to minimise latency just ask the
3741          * hardware.
3742          *
3743          * Note we only report on the status of native fences.
3744          */
3745         if (!dma_fence_is_i915(fence))
3746                 return 0;
3747
3748         /* opencode to_request() in order to avoid const warnings */
3749         rq = container_of(fence, struct drm_i915_gem_request, fence);
3750         if (i915_gem_request_completed(rq))
3751                 return 0;
3752
3753         return flag(rq->engine->exec_id);
3754 }
3755
3756 static __always_inline unsigned int
3757 busy_check_reader(const struct dma_fence *fence)
3758 {
3759         return __busy_set_if_active(fence, __busy_read_flag);
3760 }
3761
3762 static __always_inline unsigned int
3763 busy_check_writer(const struct dma_fence *fence)
3764 {
3765         if (!fence)
3766                 return 0;
3767
3768         return __busy_set_if_active(fence, __busy_write_id);
3769 }
3770
3771 int
3772 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3773                     struct drm_file *file)
3774 {
3775         struct drm_i915_gem_busy *args = data;
3776         struct drm_i915_gem_object *obj;
3777         struct reservation_object_list *list;
3778         unsigned int seq;
3779         int err;
3780
3781         err = -ENOENT;
3782         rcu_read_lock();
3783         obj = i915_gem_object_lookup_rcu(file, args->handle);
3784         if (!obj)
3785                 goto out;
3786
3787         /* A discrepancy here is that we do not report the status of
3788          * non-i915 fences, i.e. even though we may report the object as idle,
3789          * a call to set-domain may still stall waiting for foreign rendering.
3790          * This also means that wait-ioctl may report an object as busy,
3791          * where busy-ioctl considers it idle.
3792          *
3793          * We trade the ability to warn of foreign fences to report on which
3794          * i915 engines are active for the object.
3795          *
3796          * Alternatively, we can trade that extra information on read/write
3797          * activity with
3798          *      args->busy =
3799          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3800          * to report the overall busyness. This is what the wait-ioctl does.
3801          *
3802          */
3803 retry:
3804         seq = raw_read_seqcount(&obj->resv->seq);
3805
3806         /* Translate the exclusive fence to the READ *and* WRITE engine */
3807         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3808
3809         /* Translate shared fences to READ set of engines */
3810         list = rcu_dereference(obj->resv->fence);
3811         if (list) {
3812                 unsigned int shared_count = list->shared_count, i;
3813
3814                 for (i = 0; i < shared_count; ++i) {
3815                         struct dma_fence *fence =
3816                                 rcu_dereference(list->shared[i]);
3817
3818                         args->busy |= busy_check_reader(fence);
3819                 }
3820         }
3821
3822         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3823                 goto retry;
3824
3825         err = 0;
3826 out:
3827         rcu_read_unlock();
3828         return err;
3829 }
3830
3831 int
3832 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3833                         struct drm_file *file_priv)
3834 {
3835         return i915_gem_ring_throttle(dev, file_priv);
3836 }
3837
3838 int
3839 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3840                        struct drm_file *file_priv)
3841 {
3842         struct drm_i915_private *dev_priv = to_i915(dev);
3843         struct drm_i915_gem_madvise *args = data;
3844         struct drm_i915_gem_object *obj;
3845         int err;
3846
3847         switch (args->madv) {
3848         case I915_MADV_DONTNEED:
3849         case I915_MADV_WILLNEED:
3850             break;
3851         default:
3852             return -EINVAL;
3853         }
3854
3855         obj = i915_gem_object_lookup(file_priv, args->handle);
3856         if (!obj)
3857                 return -ENOENT;
3858
3859         err = mutex_lock_interruptible(&obj->mm.lock);
3860         if (err)
3861                 goto out;
3862
3863         if (obj->mm.pages &&
3864             i915_gem_object_is_tiled(obj) &&
3865             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3866                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3867                         GEM_BUG_ON(!obj->mm.quirked);
3868                         __i915_gem_object_unpin_pages(obj);
3869                         obj->mm.quirked = false;
3870                 }
3871                 if (args->madv == I915_MADV_WILLNEED) {
3872                         GEM_BUG_ON(obj->mm.quirked);
3873                         __i915_gem_object_pin_pages(obj);
3874                         obj->mm.quirked = true;
3875                 }
3876         }
3877
3878         if (obj->mm.madv != __I915_MADV_PURGED)
3879                 obj->mm.madv = args->madv;
3880
3881         /* if the object is no longer attached, discard its backing storage */
3882         if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
3883                 i915_gem_object_truncate(obj);
3884
3885         args->retained = obj->mm.madv != __I915_MADV_PURGED;
3886         mutex_unlock(&obj->mm.lock);
3887
3888 out:
3889         i915_gem_object_put(obj);
3890         return err;
3891 }
3892
3893 static void
3894 frontbuffer_retire(struct i915_gem_active *active,
3895                    struct drm_i915_gem_request *request)
3896 {
3897         struct drm_i915_gem_object *obj =
3898                 container_of(active, typeof(*obj), frontbuffer_write);
3899
3900         intel_fb_obj_flush(obj, true, ORIGIN_CS);
3901 }
3902
3903 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3904                           const struct drm_i915_gem_object_ops *ops)
3905 {
3906         mutex_init(&obj->mm.lock);
3907
3908         INIT_LIST_HEAD(&obj->global_link);
3909         INIT_LIST_HEAD(&obj->userfault_link);
3910         INIT_LIST_HEAD(&obj->obj_exec_link);
3911         INIT_LIST_HEAD(&obj->vma_list);
3912         INIT_LIST_HEAD(&obj->batch_pool_link);
3913
3914         obj->ops = ops;
3915
3916         reservation_object_init(&obj->__builtin_resv);
3917         obj->resv = &obj->__builtin_resv;
3918
3919         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3920         init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
3921
3922         obj->mm.madv = I915_MADV_WILLNEED;
3923         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
3924         mutex_init(&obj->mm.get_page.lock);
3925
3926         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3927 }
3928
3929 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3930         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
3931                  I915_GEM_OBJECT_IS_SHRINKABLE,
3932         .get_pages = i915_gem_object_get_pages_gtt,
3933         .put_pages = i915_gem_object_put_pages_gtt,
3934 };
3935
3936 /* Note we don't consider signbits :| */
3937 #define overflows_type(x, T) \
3938         (sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
3939
3940 struct drm_i915_gem_object *
3941 i915_gem_object_create(struct drm_device *dev, u64 size)
3942 {
3943         struct drm_i915_private *dev_priv = to_i915(dev);
3944         struct drm_i915_gem_object *obj;
3945         struct address_space *mapping;
3946         gfp_t mask;
3947         int ret;
3948
3949         /* There is a prevalence of the assumption that we fit the object's
3950          * page count inside a 32bit _signed_ variable. Let's document this and
3951          * catch if we ever need to fix it. In the meantime, if you do spot
3952          * such a local variable, please consider fixing!
3953          */
3954         if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
3955                 return ERR_PTR(-E2BIG);
3956
3957         if (overflows_type(size, obj->base.size))
3958                 return ERR_PTR(-E2BIG);
3959
3960         obj = i915_gem_object_alloc(dev);
3961         if (obj == NULL)
3962                 return ERR_PTR(-ENOMEM);
3963
3964         ret = drm_gem_object_init(dev, &obj->base, size);
3965         if (ret)
3966                 goto fail;
3967
3968         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3969         if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
3970                 /* 965gm cannot relocate objects above 4GiB. */
3971                 mask &= ~__GFP_HIGHMEM;
3972                 mask |= __GFP_DMA32;
3973         }
3974
3975         mapping = obj->base.filp->f_mapping;
3976         mapping_set_gfp_mask(mapping, mask);
3977
3978         i915_gem_object_init(obj, &i915_gem_object_ops);
3979
3980         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3981         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3982
3983         if (HAS_LLC(dev_priv)) {
3984                 /* On some devices, we can have the GPU use the LLC (the CPU
3985                  * cache) for about a 10% performance improvement
3986                  * compared to uncached.  Graphics requests other than
3987                  * display scanout are coherent with the CPU in
3988                  * accessing this cache.  This means in this mode we
3989                  * don't need to clflush on the CPU side, and on the
3990                  * GPU side we only need to flush internal caches to
3991                  * get data visible to the CPU.
3992                  *
3993                  * However, we maintain the display planes as UC, and so
3994                  * need to rebind when first used as such.
3995                  */
3996                 obj->cache_level = I915_CACHE_LLC;
3997         } else
3998                 obj->cache_level = I915_CACHE_NONE;
3999
4000         trace_i915_gem_object_create(obj);
4001
4002         return obj;
4003
4004 fail:
4005         i915_gem_object_free(obj);
4006         return ERR_PTR(ret);
4007 }
4008
4009 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4010 {
4011         /* If we are the last user of the backing storage (be it shmemfs
4012          * pages or stolen etc), we know that the pages are going to be
4013          * immediately released. In this case, we can then skip copying
4014          * back the contents from the GPU.
4015          */
4016
4017         if (obj->mm.madv != I915_MADV_WILLNEED)
4018                 return false;
4019
4020         if (obj->base.filp == NULL)
4021                 return true;
4022
4023         /* At first glance, this looks racy, but then again so would be
4024          * userspace racing mmap against close. However, the first external
4025          * reference to the filp can only be obtained through the
4026          * i915_gem_mmap_ioctl() which safeguards us against the user
4027          * acquiring such a reference whilst we are in the middle of
4028          * freeing the object.
4029          */
4030         return atomic_long_read(&obj->base.filp->f_count) == 1;
4031 }
4032
4033 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4034                                     struct llist_node *freed)
4035 {
4036         struct drm_i915_gem_object *obj, *on;
4037
4038         mutex_lock(&i915->drm.struct_mutex);
4039         intel_runtime_pm_get(i915);
4040         llist_for_each_entry(obj, freed, freed) {
4041                 struct i915_vma *vma, *vn;
4042
4043                 trace_i915_gem_object_destroy(obj);
4044
4045                 GEM_BUG_ON(i915_gem_object_is_active(obj));
4046                 list_for_each_entry_safe(vma, vn,
4047                                          &obj->vma_list, obj_link) {
4048                         GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4049                         GEM_BUG_ON(i915_vma_is_active(vma));
4050                         vma->flags &= ~I915_VMA_PIN_MASK;
4051                         i915_vma_close(vma);
4052                 }
4053                 GEM_BUG_ON(!list_empty(&obj->vma_list));
4054                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4055
4056                 list_del(&obj->global_link);
4057         }
4058         intel_runtime_pm_put(i915);
4059         mutex_unlock(&i915->drm.struct_mutex);
4060
4061         llist_for_each_entry_safe(obj, on, freed, freed) {
4062                 GEM_BUG_ON(obj->bind_count);
4063                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4064
4065                 if (obj->ops->release)
4066                         obj->ops->release(obj);
4067
4068                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4069                         atomic_set(&obj->mm.pages_pin_count, 0);
4070                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4071                 GEM_BUG_ON(obj->mm.pages);
4072
4073                 if (obj->base.import_attach)
4074                         drm_prime_gem_destroy(&obj->base, NULL);
4075
4076                 reservation_object_fini(&obj->__builtin_resv);
4077                 drm_gem_object_release(&obj->base);
4078                 i915_gem_info_remove_obj(i915, obj->base.size);
4079
4080                 kfree(obj->bit_17);
4081                 i915_gem_object_free(obj);
4082         }
4083 }
4084
4085 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4086 {
4087         struct llist_node *freed;
4088
4089         freed = llist_del_all(&i915->mm.free_list);
4090         if (unlikely(freed))
4091                 __i915_gem_free_objects(i915, freed);
4092 }
4093
4094 static void __i915_gem_free_work(struct work_struct *work)
4095 {
4096         struct drm_i915_private *i915 =
4097                 container_of(work, struct drm_i915_private, mm.free_work);
4098         struct llist_node *freed;
4099
4100         /* All file-owned VMA should have been released by this point through
4101          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4102          * However, the object may also be bound into the global GTT (e.g.
4103          * older GPUs without per-process support, or for direct access through
4104          * the GTT either for the user or for scanout). Those VMA still need to
4105          * unbound now.
4106          */
4107
4108         while ((freed = llist_del_all(&i915->mm.free_list)))
4109                 __i915_gem_free_objects(i915, freed);
4110 }
4111
4112 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4113 {
4114         struct drm_i915_gem_object *obj =
4115                 container_of(head, typeof(*obj), rcu);
4116         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4117
4118         /* We can't simply use call_rcu() from i915_gem_free_object()
4119          * as we need to block whilst unbinding, and the call_rcu
4120          * task may be called from softirq context. So we take a
4121          * detour through a worker.
4122          */
4123         if (llist_add(&obj->freed, &i915->mm.free_list))
4124                 schedule_work(&i915->mm.free_work);
4125 }
4126
4127 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4128 {
4129         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4130
4131         if (obj->mm.quirked)
4132                 __i915_gem_object_unpin_pages(obj);
4133
4134         if (discard_backing_storage(obj))
4135                 obj->mm.madv = I915_MADV_DONTNEED;
4136
4137         /* Before we free the object, make sure any pure RCU-only
4138          * read-side critical sections are complete, e.g.
4139          * i915_gem_busy_ioctl(). For the corresponding synchronized
4140          * lookup see i915_gem_object_lookup_rcu().
4141          */
4142         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4143 }
4144
4145 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4146 {
4147         lockdep_assert_held(&obj->base.dev->struct_mutex);
4148
4149         GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
4150         if (i915_gem_object_is_active(obj))
4151                 i915_gem_object_set_active_reference(obj);
4152         else
4153                 i915_gem_object_put(obj);
4154 }
4155
4156 static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
4157 {
4158         struct intel_engine_cs *engine;
4159         enum intel_engine_id id;
4160
4161         for_each_engine(engine, dev_priv, id)
4162                 GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
4163 }
4164
4165 int i915_gem_suspend(struct drm_device *dev)
4166 {
4167         struct drm_i915_private *dev_priv = to_i915(dev);
4168         int ret;
4169
4170         intel_suspend_gt_powersave(dev_priv);
4171
4172         mutex_lock(&dev->struct_mutex);
4173
4174         /* We have to flush all the executing contexts to main memory so
4175          * that they can saved in the hibernation image. To ensure the last
4176          * context image is coherent, we have to switch away from it. That
4177          * leaves the dev_priv->kernel_context still active when
4178          * we actually suspend, and its image in memory may not match the GPU
4179          * state. Fortunately, the kernel_context is disposable and we do
4180          * not rely on its state.
4181          */
4182         ret = i915_gem_switch_to_kernel_context(dev_priv);
4183         if (ret)
4184                 goto err;
4185
4186         ret = i915_gem_wait_for_idle(dev_priv,
4187                                      I915_WAIT_INTERRUPTIBLE |
4188                                      I915_WAIT_LOCKED);
4189         if (ret)
4190                 goto err;
4191
4192         i915_gem_retire_requests(dev_priv);
4193         GEM_BUG_ON(dev_priv->gt.active_requests);
4194
4195         assert_kernel_context_is_current(dev_priv);
4196         i915_gem_context_lost(dev_priv);
4197         mutex_unlock(&dev->struct_mutex);
4198
4199         cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4200         cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4201         flush_delayed_work(&dev_priv->gt.idle_work);
4202         flush_work(&dev_priv->mm.free_work);
4203
4204         /* Assert that we sucessfully flushed all the work and
4205          * reset the GPU back to its idle, low power state.
4206          */
4207         WARN_ON(dev_priv->gt.awake);
4208         WARN_ON(!intel_execlists_idle(dev_priv));
4209
4210         /*
4211          * Neither the BIOS, ourselves or any other kernel
4212          * expects the system to be in execlists mode on startup,
4213          * so we need to reset the GPU back to legacy mode. And the only
4214          * known way to disable logical contexts is through a GPU reset.
4215          *
4216          * So in order to leave the system in a known default configuration,
4217          * always reset the GPU upon unload and suspend. Afterwards we then
4218          * clean up the GEM state tracking, flushing off the requests and
4219          * leaving the system in a known idle state.
4220          *
4221          * Note that is of the upmost importance that the GPU is idle and
4222          * all stray writes are flushed *before* we dismantle the backing
4223          * storage for the pinned objects.
4224          *
4225          * However, since we are uncertain that resetting the GPU on older
4226          * machines is a good idea, we don't - just in case it leaves the
4227          * machine in an unusable condition.
4228          */
4229         if (HAS_HW_CONTEXTS(dev_priv)) {
4230                 int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
4231                 WARN_ON(reset && reset != -ENODEV);
4232         }
4233
4234         return 0;
4235
4236 err:
4237         mutex_unlock(&dev->struct_mutex);
4238         return ret;
4239 }
4240
4241 void i915_gem_resume(struct drm_device *dev)
4242 {
4243         struct drm_i915_private *dev_priv = to_i915(dev);
4244
4245         WARN_ON(dev_priv->gt.awake);
4246
4247         mutex_lock(&dev->struct_mutex);
4248         i915_gem_restore_gtt_mappings(dev_priv);
4249
4250         /* As we didn't flush the kernel context before suspend, we cannot
4251          * guarantee that the context image is complete. So let's just reset
4252          * it and start again.
4253          */
4254         dev_priv->gt.resume(dev_priv);
4255
4256         mutex_unlock(&dev->struct_mutex);
4257 }
4258
4259 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4260 {
4261         if (INTEL_GEN(dev_priv) < 5 ||
4262             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4263                 return;
4264
4265         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4266                                  DISP_TILE_SURFACE_SWIZZLING);
4267
4268         if (IS_GEN5(dev_priv))
4269                 return;
4270
4271         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4272         if (IS_GEN6(dev_priv))
4273                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4274         else if (IS_GEN7(dev_priv))
4275                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4276         else if (IS_GEN8(dev_priv))
4277                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4278         else
4279                 BUG();
4280 }
4281
4282 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4283 {
4284         I915_WRITE(RING_CTL(base), 0);
4285         I915_WRITE(RING_HEAD(base), 0);
4286         I915_WRITE(RING_TAIL(base), 0);
4287         I915_WRITE(RING_START(base), 0);
4288 }
4289
4290 static void init_unused_rings(struct drm_i915_private *dev_priv)
4291 {
4292         if (IS_I830(dev_priv)) {
4293                 init_unused_ring(dev_priv, PRB1_BASE);
4294                 init_unused_ring(dev_priv, SRB0_BASE);
4295                 init_unused_ring(dev_priv, SRB1_BASE);
4296                 init_unused_ring(dev_priv, SRB2_BASE);
4297                 init_unused_ring(dev_priv, SRB3_BASE);
4298         } else if (IS_GEN2(dev_priv)) {
4299                 init_unused_ring(dev_priv, SRB0_BASE);
4300                 init_unused_ring(dev_priv, SRB1_BASE);
4301         } else if (IS_GEN3(dev_priv)) {
4302                 init_unused_ring(dev_priv, PRB1_BASE);
4303                 init_unused_ring(dev_priv, PRB2_BASE);
4304         }
4305 }
4306
4307 int
4308 i915_gem_init_hw(struct drm_device *dev)
4309 {
4310         struct drm_i915_private *dev_priv = to_i915(dev);
4311         struct intel_engine_cs *engine;
4312         enum intel_engine_id id;
4313         int ret;
4314
4315         dev_priv->gt.last_init_time = ktime_get();
4316
4317         /* Double layer security blanket, see i915_gem_init() */
4318         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4319
4320         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4321                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4322
4323         if (IS_HASWELL(dev_priv))
4324                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4325                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4326
4327         if (HAS_PCH_NOP(dev_priv)) {
4328                 if (IS_IVYBRIDGE(dev_priv)) {
4329                         u32 temp = I915_READ(GEN7_MSG_CTL);
4330                         temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4331                         I915_WRITE(GEN7_MSG_CTL, temp);
4332                 } else if (INTEL_GEN(dev_priv) >= 7) {
4333                         u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4334                         temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4335                         I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4336                 }
4337         }
4338
4339         i915_gem_init_swizzling(dev_priv);
4340
4341         /*
4342          * At least 830 can leave some of the unused rings
4343          * "active" (ie. head != tail) after resume which
4344          * will prevent c3 entry. Makes sure all unused rings
4345          * are totally idle.
4346          */
4347         init_unused_rings(dev_priv);
4348
4349         BUG_ON(!dev_priv->kernel_context);
4350
4351         ret = i915_ppgtt_init_hw(dev_priv);
4352         if (ret) {
4353                 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4354                 goto out;
4355         }
4356
4357         /* Need to do basic initialisation of all rings first: */
4358         for_each_engine(engine, dev_priv, id) {
4359                 ret = engine->init_hw(engine);
4360                 if (ret)
4361                         goto out;
4362         }
4363
4364         intel_mocs_init_l3cc_table(dev);
4365
4366         /* We can't enable contexts until all firmware is loaded */
4367         ret = intel_guc_setup(dev);
4368         if (ret)
4369                 goto out;
4370
4371 out:
4372         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4373         return ret;
4374 }
4375
4376 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4377 {
4378         if (INTEL_INFO(dev_priv)->gen < 6)
4379                 return false;
4380
4381         /* TODO: make semaphores and Execlists play nicely together */
4382         if (i915.enable_execlists)
4383                 return false;
4384
4385         if (value >= 0)
4386                 return value;
4387
4388 #ifdef CONFIG_INTEL_IOMMU
4389         /* Enable semaphores on SNB when IO remapping is off */
4390         if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4391                 return false;
4392 #endif
4393
4394         return true;
4395 }
4396
4397 int i915_gem_init(struct drm_device *dev)
4398 {
4399         struct drm_i915_private *dev_priv = to_i915(dev);
4400         int ret;
4401
4402         mutex_lock(&dev->struct_mutex);
4403
4404         if (!i915.enable_execlists) {
4405                 dev_priv->gt.resume = intel_legacy_submission_resume;
4406                 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4407         } else {
4408                 dev_priv->gt.resume = intel_lr_context_resume;
4409                 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4410         }
4411
4412         /* This is just a security blanket to placate dragons.
4413          * On some systems, we very sporadically observe that the first TLBs
4414          * used by the CS may be stale, despite us poking the TLB reset. If
4415          * we hold the forcewake during initialisation these problems
4416          * just magically go away.
4417          */
4418         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4419
4420         i915_gem_init_userptr(dev_priv);
4421
4422         ret = i915_gem_init_ggtt(dev_priv);
4423         if (ret)
4424                 goto out_unlock;
4425
4426         ret = i915_gem_context_init(dev);
4427         if (ret)
4428                 goto out_unlock;
4429
4430         ret = intel_engines_init(dev);
4431         if (ret)
4432                 goto out_unlock;
4433
4434         ret = i915_gem_init_hw(dev);
4435         if (ret == -EIO) {
4436                 /* Allow engine initialisation to fail by marking the GPU as
4437                  * wedged. But we only want to do this where the GPU is angry,
4438                  * for all other failure, such as an allocation failure, bail.
4439                  */
4440                 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
4441                 i915_gem_set_wedged(dev_priv);
4442                 ret = 0;
4443         }
4444
4445 out_unlock:
4446         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4447         mutex_unlock(&dev->struct_mutex);
4448
4449         return ret;
4450 }
4451
4452 void
4453 i915_gem_cleanup_engines(struct drm_device *dev)
4454 {
4455         struct drm_i915_private *dev_priv = to_i915(dev);
4456         struct intel_engine_cs *engine;
4457         enum intel_engine_id id;
4458
4459         for_each_engine(engine, dev_priv, id)
4460                 dev_priv->gt.cleanup_engine(engine);
4461 }
4462
4463 void
4464 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4465 {
4466         int i;
4467
4468         if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4469             !IS_CHERRYVIEW(dev_priv))
4470                 dev_priv->num_fence_regs = 32;
4471         else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4472                  IS_I945GM(dev_priv) || IS_G33(dev_priv))
4473                 dev_priv->num_fence_regs = 16;
4474         else
4475                 dev_priv->num_fence_regs = 8;
4476
4477         if (intel_vgpu_active(dev_priv))
4478                 dev_priv->num_fence_regs =
4479                                 I915_READ(vgtif_reg(avail_rs.fence_num));
4480
4481         /* Initialize fence registers to zero */
4482         for (i = 0; i < dev_priv->num_fence_regs; i++) {
4483                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4484
4485                 fence->i915 = dev_priv;
4486                 fence->id = i;
4487                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4488         }
4489         i915_gem_restore_fences(dev_priv);
4490
4491         i915_gem_detect_bit_6_swizzle(dev_priv);
4492 }
4493
4494 int
4495 i915_gem_load_init(struct drm_device *dev)
4496 {
4497         struct drm_i915_private *dev_priv = to_i915(dev);
4498         int err = -ENOMEM;
4499
4500         dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
4501         if (!dev_priv->objects)
4502                 goto err_out;
4503
4504         dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
4505         if (!dev_priv->vmas)
4506                 goto err_objects;
4507
4508         dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
4509                                         SLAB_HWCACHE_ALIGN |
4510                                         SLAB_RECLAIM_ACCOUNT |
4511                                         SLAB_DESTROY_BY_RCU);
4512         if (!dev_priv->requests)
4513                 goto err_vmas;
4514
4515         dev_priv->dependencies = KMEM_CACHE(i915_dependency,
4516                                             SLAB_HWCACHE_ALIGN |
4517                                             SLAB_RECLAIM_ACCOUNT);
4518         if (!dev_priv->dependencies)
4519                 goto err_requests;
4520
4521         mutex_lock(&dev_priv->drm.struct_mutex);
4522         INIT_LIST_HEAD(&dev_priv->gt.timelines);
4523         err = i915_gem_timeline_init__global(dev_priv);
4524         mutex_unlock(&dev_priv->drm.struct_mutex);
4525         if (err)
4526                 goto err_dependencies;
4527
4528         INIT_LIST_HEAD(&dev_priv->context_list);
4529         INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
4530         init_llist_head(&dev_priv->mm.free_list);
4531         INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4532         INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4533         INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4534         INIT_LIST_HEAD(&dev_priv->mm.userfault_list);
4535         INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4536                           i915_gem_retire_work_handler);
4537         INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
4538                           i915_gem_idle_work_handler);
4539         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4540         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4541
4542         dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4543
4544         init_waitqueue_head(&dev_priv->pending_flip_queue);
4545
4546         dev_priv->mm.interruptible = true;
4547
4548         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4549
4550         spin_lock_init(&dev_priv->fb_tracking.lock);
4551
4552         return 0;
4553
4554 err_dependencies:
4555         kmem_cache_destroy(dev_priv->dependencies);
4556 err_requests:
4557         kmem_cache_destroy(dev_priv->requests);
4558 err_vmas:
4559         kmem_cache_destroy(dev_priv->vmas);
4560 err_objects:
4561         kmem_cache_destroy(dev_priv->objects);
4562 err_out:
4563         return err;
4564 }
4565
4566 void i915_gem_load_cleanup(struct drm_device *dev)
4567 {
4568         struct drm_i915_private *dev_priv = to_i915(dev);
4569
4570         WARN_ON(!llist_empty(&dev_priv->mm.free_list));
4571
4572         mutex_lock(&dev_priv->drm.struct_mutex);
4573         i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
4574         WARN_ON(!list_empty(&dev_priv->gt.timelines));
4575         mutex_unlock(&dev_priv->drm.struct_mutex);
4576
4577         kmem_cache_destroy(dev_priv->dependencies);
4578         kmem_cache_destroy(dev_priv->requests);
4579         kmem_cache_destroy(dev_priv->vmas);
4580         kmem_cache_destroy(dev_priv->objects);
4581
4582         /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
4583         rcu_barrier();
4584 }
4585
4586 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4587 {
4588         intel_runtime_pm_get(dev_priv);
4589
4590         mutex_lock(&dev_priv->drm.struct_mutex);
4591         i915_gem_shrink_all(dev_priv);
4592         mutex_unlock(&dev_priv->drm.struct_mutex);
4593
4594         intel_runtime_pm_put(dev_priv);
4595
4596         return 0;
4597 }
4598
4599 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4600 {
4601         struct drm_i915_gem_object *obj;
4602         struct list_head *phases[] = {
4603                 &dev_priv->mm.unbound_list,
4604                 &dev_priv->mm.bound_list,
4605                 NULL
4606         }, **p;
4607
4608         /* Called just before we write the hibernation image.
4609          *
4610          * We need to update the domain tracking to reflect that the CPU
4611          * will be accessing all the pages to create and restore from the
4612          * hibernation, and so upon restoration those pages will be in the
4613          * CPU domain.
4614          *
4615          * To make sure the hibernation image contains the latest state,
4616          * we update that state just before writing out the image.
4617          *
4618          * To try and reduce the hibernation image, we manually shrink
4619          * the objects as well.
4620          */
4621
4622         mutex_lock(&dev_priv->drm.struct_mutex);
4623         i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
4624
4625         for (p = phases; *p; p++) {
4626                 list_for_each_entry(obj, *p, global_link) {
4627                         obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4628                         obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4629                 }
4630         }
4631         mutex_unlock(&dev_priv->drm.struct_mutex);
4632
4633         return 0;
4634 }
4635
4636 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4637 {
4638         struct drm_i915_file_private *file_priv = file->driver_priv;
4639         struct drm_i915_gem_request *request;
4640
4641         /* Clean up our request list when the client is going away, so that
4642          * later retire_requests won't dereference our soon-to-be-gone
4643          * file_priv.
4644          */
4645         spin_lock(&file_priv->mm.lock);
4646         list_for_each_entry(request, &file_priv->mm.request_list, client_list)
4647                 request->file_priv = NULL;
4648         spin_unlock(&file_priv->mm.lock);
4649
4650         if (!list_empty(&file_priv->rps.link)) {
4651                 spin_lock(&to_i915(dev)->rps.client_lock);
4652                 list_del(&file_priv->rps.link);
4653                 spin_unlock(&to_i915(dev)->rps.client_lock);
4654         }
4655 }
4656
4657 int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4658 {
4659         struct drm_i915_file_private *file_priv;
4660         int ret;
4661
4662         DRM_DEBUG("\n");
4663
4664         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4665         if (!file_priv)
4666                 return -ENOMEM;
4667
4668         file->driver_priv = file_priv;
4669         file_priv->dev_priv = to_i915(dev);
4670         file_priv->file = file;
4671         INIT_LIST_HEAD(&file_priv->rps.link);
4672
4673         spin_lock_init(&file_priv->mm.lock);
4674         INIT_LIST_HEAD(&file_priv->mm.request_list);
4675
4676         file_priv->bsd_engine = -1;
4677
4678         ret = i915_gem_context_open(dev, file);
4679         if (ret)
4680                 kfree(file_priv);
4681
4682         return ret;
4683 }
4684
4685 /**
4686  * i915_gem_track_fb - update frontbuffer tracking
4687  * @old: current GEM buffer for the frontbuffer slots
4688  * @new: new GEM buffer for the frontbuffer slots
4689  * @frontbuffer_bits: bitmask of frontbuffer slots
4690  *
4691  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4692  * from @old and setting them in @new. Both @old and @new can be NULL.
4693  */
4694 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4695                        struct drm_i915_gem_object *new,
4696                        unsigned frontbuffer_bits)
4697 {
4698         /* Control of individual bits within the mask are guarded by
4699          * the owning plane->mutex, i.e. we can never see concurrent
4700          * manipulation of individual bits. But since the bitfield as a whole
4701          * is updated using RMW, we need to use atomics in order to update
4702          * the bits.
4703          */
4704         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4705                      sizeof(atomic_t) * BITS_PER_BYTE);
4706
4707         if (old) {
4708                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4709                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4710         }
4711
4712         if (new) {
4713                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4714                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4715         }
4716 }
4717
4718 /* Allocate a new GEM object and fill it with the supplied data */
4719 struct drm_i915_gem_object *
4720 i915_gem_object_create_from_data(struct drm_device *dev,
4721                                  const void *data, size_t size)
4722 {
4723         struct drm_i915_gem_object *obj;
4724         struct sg_table *sg;
4725         size_t bytes;
4726         int ret;
4727
4728         obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
4729         if (IS_ERR(obj))
4730                 return obj;
4731
4732         ret = i915_gem_object_set_to_cpu_domain(obj, true);
4733         if (ret)
4734                 goto fail;
4735
4736         ret = i915_gem_object_pin_pages(obj);
4737         if (ret)
4738                 goto fail;
4739
4740         sg = obj->mm.pages;
4741         bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4742         obj->mm.dirty = true; /* Backing store is now out of date */
4743         i915_gem_object_unpin_pages(obj);
4744
4745         if (WARN_ON(bytes != size)) {
4746                 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4747                 ret = -EFAULT;
4748                 goto fail;
4749         }
4750
4751         return obj;
4752
4753 fail:
4754         i915_gem_object_put(obj);
4755         return ERR_PTR(ret);
4756 }
4757
4758 struct scatterlist *
4759 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4760                        unsigned int n,
4761                        unsigned int *offset)
4762 {
4763         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4764         struct scatterlist *sg;
4765         unsigned int idx, count;
4766
4767         might_sleep();
4768         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4769         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4770
4771         /* As we iterate forward through the sg, we record each entry in a
4772          * radixtree for quick repeated (backwards) lookups. If we have seen
4773          * this index previously, we will have an entry for it.
4774          *
4775          * Initial lookup is O(N), but this is amortized to O(1) for
4776          * sequential page access (where each new request is consecutive
4777          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4778          * i.e. O(1) with a large constant!
4779          */
4780         if (n < READ_ONCE(iter->sg_idx))
4781                 goto lookup;
4782
4783         mutex_lock(&iter->lock);
4784
4785         /* We prefer to reuse the last sg so that repeated lookup of this
4786          * (or the subsequent) sg are fast - comparing against the last
4787          * sg is faster than going through the radixtree.
4788          */
4789
4790         sg = iter->sg_pos;
4791         idx = iter->sg_idx;
4792         count = __sg_page_count(sg);
4793
4794         while (idx + count <= n) {
4795                 unsigned long exception, i;
4796                 int ret;
4797
4798                 /* If we cannot allocate and insert this entry, or the
4799                  * individual pages from this range, cancel updating the
4800                  * sg_idx so that on this lookup we are forced to linearly
4801                  * scan onwards, but on future lookups we will try the
4802                  * insertion again (in which case we need to be careful of
4803                  * the error return reporting that we have already inserted
4804                  * this index).
4805                  */
4806                 ret = radix_tree_insert(&iter->radix, idx, sg);
4807                 if (ret && ret != -EEXIST)
4808                         goto scan;
4809
4810                 exception =
4811                         RADIX_TREE_EXCEPTIONAL_ENTRY |
4812                         idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
4813                 for (i = 1; i < count; i++) {
4814                         ret = radix_tree_insert(&iter->radix, idx + i,
4815                                                 (void *)exception);
4816                         if (ret && ret != -EEXIST)
4817                                 goto scan;
4818                 }
4819
4820                 idx += count;
4821                 sg = ____sg_next(sg);
4822                 count = __sg_page_count(sg);
4823         }
4824
4825 scan:
4826         iter->sg_pos = sg;
4827         iter->sg_idx = idx;
4828
4829         mutex_unlock(&iter->lock);
4830
4831         if (unlikely(n < idx)) /* insertion completed by another thread */
4832                 goto lookup;
4833
4834         /* In case we failed to insert the entry into the radixtree, we need
4835          * to look beyond the current sg.
4836          */
4837         while (idx + count <= n) {
4838                 idx += count;
4839                 sg = ____sg_next(sg);
4840                 count = __sg_page_count(sg);
4841         }
4842
4843         *offset = n - idx;
4844         return sg;
4845
4846 lookup:
4847         rcu_read_lock();
4848
4849         sg = radix_tree_lookup(&iter->radix, n);
4850         GEM_BUG_ON(!sg);
4851
4852         /* If this index is in the middle of multi-page sg entry,
4853          * the radixtree will contain an exceptional entry that points
4854          * to the start of that range. We will return the pointer to
4855          * the base page and the offset of this page within the
4856          * sg entry's range.
4857          */
4858         *offset = 0;
4859         if (unlikely(radix_tree_exception(sg))) {
4860                 unsigned long base =
4861                         (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
4862
4863                 sg = radix_tree_lookup(&iter->radix, base);
4864                 GEM_BUG_ON(!sg);
4865
4866                 *offset = n - base;
4867         }
4868
4869         rcu_read_unlock();
4870
4871         return sg;
4872 }
4873
4874 struct page *
4875 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
4876 {
4877         struct scatterlist *sg;
4878         unsigned int offset;
4879
4880         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
4881
4882         sg = i915_gem_object_get_sg(obj, n, &offset);
4883         return nth_page(sg_page(sg), offset);
4884 }
4885
4886 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
4887 struct page *
4888 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
4889                                unsigned int n)
4890 {
4891         struct page *page;
4892
4893         page = i915_gem_object_get_page(obj, n);
4894         if (!obj->mm.dirty)
4895                 set_page_dirty(page);
4896
4897         return page;
4898 }
4899
4900 dma_addr_t
4901 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
4902                                 unsigned long n)
4903 {
4904         struct scatterlist *sg;
4905         unsigned int offset;
4906
4907         sg = i915_gem_object_get_sg(obj, n, &offset);
4908         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
4909 }