drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include <drm/drm_vma_manager.h>
  29 #include <drm/drm_pci.h>
  30 #include <drm/i915_drm.h>
  31 #include <linux/dma-fence-array.h>
  32 #include <linux/kthread.h>
  33 #include <linux/reservation.h>
  34 #include <linux/shmem_fs.h>
  35 #include <linux/slab.h>
  36 #include <linux/stop_machine.h>
  37 #include <linux/swap.h>
  38 #include <linux/pci.h>
  39 #include <linux/dma-buf.h>
  40 #include <linux/mman.h>
  41
  42 #include "gt/intel_engine_pm.h"
  43 #include "gt/intel_gt_pm.h"
  44 #include "gt/intel_mocs.h"
  45 #include "gt/intel_reset.h"
  46 #include "gt/intel_workarounds.h"
  47
  48 #include "i915_drv.h"
  49 #include "i915_gem_clflush.h"
  50 #include "i915_gemfs.h"
  51 #include "i915_gem_pm.h"
  52 #include "i915_trace.h"
  53 #include "i915_vgpu.h"
  54
  55 #include "intel_display.h"
  56 #include "intel_drv.h"
  57 #include "intel_frontbuffer.h"
  58 #include "intel_pm.h"
  59
  60 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  61
  62 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  63 {
  64         if (obj->cache_dirty)
  65                 return false;
  66
  67         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  68                 return true;
  69
  70         return obj->pin_global; /* currently in use by HW, keep flushed */
  71 }
  72
  73 static int
  74 insert_mappable_node(struct i915_ggtt *ggtt,
  75                      struct drm_mm_node *node, u32 size)
  76 {
  77         memset(node, 0, sizeof(*node));
  78         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
  79                                            size, 0, I915_COLOR_UNEVICTABLE,
  80                                            0, ggtt->mappable_end,
  81                                            DRM_MM_INSERT_LOW);
  82 }
  83
  84 static void
  85 remove_mappable_node(struct drm_mm_node *node)
  86 {
  87         drm_mm_remove_node(node);
  88 }
  89
  90 /* some bookkeeping */
  91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  92                                   u64 size)
  93 {
  94         spin_lock(&dev_priv->mm.object_stat_lock);
  95         dev_priv->mm.object_count++;
  96         dev_priv->mm.object_memory += size;
  97         spin_unlock(&dev_priv->mm.object_stat_lock);
  98 }
  99
 100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 101                                      u64 size)
 102 {
 103         spin_lock(&dev_priv->mm.object_stat_lock);
 104         dev_priv->mm.object_count--;
 105         dev_priv->mm.object_memory -= size;
 106         spin_unlock(&dev_priv->mm.object_stat_lock);
 107 }
 108
 109 int
 110 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 111                             struct drm_file *file)
 112 {
 113         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
 114         struct drm_i915_gem_get_aperture *args = data;
 115         struct i915_vma *vma;
 116         u64 pinned;
 117
 118         mutex_lock(&ggtt->vm.mutex);
 119
 120         pinned = ggtt->vm.reserved;
 121         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 122                 if (i915_vma_is_pinned(vma))
 123                         pinned += vma->node.size;
 124
 125         mutex_unlock(&ggtt->vm.mutex);
 126
 127         args->aper_size = ggtt->vm.total;
 128         args->aper_available_size = args->aper_size - pinned;
 129
 130         return 0;
 131 }
 132
 133 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 134 {
 135         struct address_space *mapping = obj->base.filp->f_mapping;
 136         drm_dma_handle_t *phys;
 137         struct sg_table *st;
 138         struct scatterlist *sg;
 139         char *vaddr;
 140         int i;
 141         int err;
 142
 143         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 144                 return -EINVAL;
 145
 146         /* Always aligning to the object size, allows a single allocation
 147          * to handle all possible callers, and given typical object sizes,
 148          * the alignment of the buddy allocation will naturally match.
 149          */
 150         phys = drm_pci_alloc(obj->base.dev,
 151                              roundup_pow_of_two(obj->base.size),
 152                              roundup_pow_of_two(obj->base.size));
 153         if (!phys)
 154                 return -ENOMEM;
 155
 156         vaddr = phys->vaddr;
 157         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 158                 struct page *page;
 159                 char *src;
 160
 161                 page = shmem_read_mapping_page(mapping, i);
 162                 if (IS_ERR(page)) {
 163                         err = PTR_ERR(page);
 164                         goto err_phys;
 165                 }
 166
 167                 src = kmap_atomic(page);
 168                 memcpy(vaddr, src, PAGE_SIZE);
 169                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
 170                 kunmap_atomic(src);
 171
 172                 put_page(page);
 173                 vaddr += PAGE_SIZE;
 174         }
 175
 176         i915_gem_chipset_flush(to_i915(obj->base.dev));
 177
 178         st = kmalloc(sizeof(*st), GFP_KERNEL);
 179         if (!st) {
 180                 err = -ENOMEM;
 181                 goto err_phys;
 182         }
 183
 184         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 185                 kfree(st);
 186                 err = -ENOMEM;
 187                 goto err_phys;
 188         }
 189
 190         sg = st->sgl;
 191         sg->offset = 0;
 192         sg->length = obj->base.size;
 193
 194         sg_dma_address(sg) = phys->busaddr;
 195         sg_dma_len(sg) = obj->base.size;
 196
 197         obj->phys_handle = phys;
 198
 199         __i915_gem_object_set_pages(obj, st, sg->length);
 200
 201         return 0;
 202
 203 err_phys:
 204         drm_pci_free(obj->base.dev, phys);
 205
 206         return err;
 207 }
 208
 209 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 210 {
 211         obj->read_domains = I915_GEM_DOMAIN_CPU;
 212         obj->write_domain = I915_GEM_DOMAIN_CPU;
 213         if (cpu_write_needs_clflush(obj))
 214                 obj->cache_dirty = true;
 215 }
 216
 217 void
 218 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 219                                 struct sg_table *pages,
 220                                 bool needs_clflush)
 221 {
 222         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 223
 224         if (obj->mm.madv == I915_MADV_DONTNEED)
 225                 obj->mm.dirty = false;
 226
 227         if (needs_clflush &&
 228             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 229             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 230                 drm_clflush_sg(pages);
 231
 232         __start_cpu_write(obj);
 233 }
 234
 235 static void
 236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 237                                struct sg_table *pages)
 238 {
 239         __i915_gem_object_release_shmem(obj, pages, false);
 240
 241         if (obj->mm.dirty) {
 242                 struct address_space *mapping = obj->base.filp->f_mapping;
 243                 char *vaddr = obj->phys_handle->vaddr;
 244                 int i;
 245
 246                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 247                         struct page *page;
 248                         char *dst;
 249
 250                         page = shmem_read_mapping_page(mapping, i);
 251                         if (IS_ERR(page))
 252                                 continue;
 253
 254                         dst = kmap_atomic(page);
 255                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 256                         memcpy(dst, vaddr, PAGE_SIZE);
 257                         kunmap_atomic(dst);
 258
 259                         set_page_dirty(page);
 260                         if (obj->mm.madv == I915_MADV_WILLNEED)
 261                                 mark_page_accessed(page);
 262                         put_page(page);
 263                         vaddr += PAGE_SIZE;
 264                 }
 265                 obj->mm.dirty = false;
 266         }
 267
 268         sg_free_table(pages);
 269         kfree(pages);
 270
 271         drm_pci_free(obj->base.dev, obj->phys_handle);
 272 }
 273
 274 static void
 275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 276 {
 277         i915_gem_object_unpin_pages(obj);
 278 }
 279
 280 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 281         .get_pages = i915_gem_object_get_pages_phys,
 282         .put_pages = i915_gem_object_put_pages_phys,
 283         .release = i915_gem_object_release_phys,
 284 };
 285
 286 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
 287
 288 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 289 {
 290         struct i915_vma *vma;
 291         LIST_HEAD(still_in_list);
 292         int ret;
 293
 294         lockdep_assert_held(&obj->base.dev->struct_mutex);
 295
 296         /* Closed vma are removed from the obj->vma_list - but they may
 297          * still have an active binding on the object. To remove those we
 298          * must wait for all rendering to complete to the object (as unbinding
 299          * must anyway), and retire the requests.
 300          */
 301         ret = i915_gem_object_set_to_cpu_domain(obj, false);
 302         if (ret)
 303                 return ret;
 304
 305         spin_lock(&obj->vma.lock);
 306         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 307                                                        struct i915_vma,
 308                                                        obj_link))) {
 309                 list_move_tail(&vma->obj_link, &still_in_list);
 310                 spin_unlock(&obj->vma.lock);
 311
 312                 ret = i915_vma_unbind(vma);
 313
 314                 spin_lock(&obj->vma.lock);
 315         }
 316         list_splice(&still_in_list, &obj->vma.list);
 317         spin_unlock(&obj->vma.lock);
 318
 319         return ret;
 320 }
 321
 322 static long
 323 i915_gem_object_wait_fence(struct dma_fence *fence,
 324                            unsigned int flags,
 325                            long timeout)
 326 {
 327         struct i915_request *rq;
 328
 329         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 330
 331         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 332                 return timeout;
 333
 334         if (!dma_fence_is_i915(fence))
 335                 return dma_fence_wait_timeout(fence,
 336                                               flags & I915_WAIT_INTERRUPTIBLE,
 337                                               timeout);
 338
 339         rq = to_request(fence);
 340         if (i915_request_completed(rq))
 341                 goto out;
 342
 343         timeout = i915_request_wait(rq, flags, timeout);
 344
 345 out:
 346         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
 347                 i915_request_retire_upto(rq);
 348
 349         return timeout;
 350 }
 351
 352 static long
 353 i915_gem_object_wait_reservation(struct reservation_object *resv,
 354                                  unsigned int flags,
 355                                  long timeout)
 356 {
 357         unsigned int seq = __read_seqcount_begin(&resv->seq);
 358         struct dma_fence *excl;
 359         bool prune_fences = false;
 360
 361         if (flags & I915_WAIT_ALL) {
 362                 struct dma_fence **shared;
 363                 unsigned int count, i;
 364                 int ret;
 365
 366                 ret = reservation_object_get_fences_rcu(resv,
 367                                                         &excl, &count, &shared);
 368                 if (ret)
 369                         return ret;
 370
 371                 for (i = 0; i < count; i++) {
 372                         timeout = i915_gem_object_wait_fence(shared[i],
 373                                                              flags, timeout);
 374                         if (timeout < 0)
 375                                 break;
 376
 377                         dma_fence_put(shared[i]);
 378                 }
 379
 380                 for (; i < count; i++)
 381                         dma_fence_put(shared[i]);
 382                 kfree(shared);
 383
 384                 /*
 385                  * If both shared fences and an exclusive fence exist,
 386                  * then by construction the shared fences must be later
 387                  * than the exclusive fence. If we successfully wait for
 388                  * all the shared fences, we know that the exclusive fence
 389                  * must all be signaled. If all the shared fences are
 390                  * signaled, we can prune the array and recover the
 391                  * floating references on the fences/requests.
 392                  */
 393                 prune_fences = count && timeout >= 0;
 394         } else {
 395                 excl = reservation_object_get_excl_rcu(resv);
 396         }
 397
 398         if (excl && timeout >= 0)
 399                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
 400
 401         dma_fence_put(excl);
 402
 403         /*
 404          * Opportunistically prune the fences iff we know they have *all* been
 405          * signaled and that the reservation object has not been changed (i.e.
 406          * no new fences have been added).
 407          */
 408         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
 409                 if (reservation_object_trylock(resv)) {
 410                         if (!__read_seqcount_retry(&resv->seq, seq))
 411                                 reservation_object_add_excl_fence(resv, NULL);
 412                         reservation_object_unlock(resv);
 413                 }
 414         }
 415
 416         return timeout;
 417 }
 418
 419 static void __fence_set_priority(struct dma_fence *fence,
 420                                  const struct i915_sched_attr *attr)
 421 {
 422         struct i915_request *rq;
 423         struct intel_engine_cs *engine;
 424
 425         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 426                 return;
 427
 428         rq = to_request(fence);
 429         engine = rq->engine;
 430
 431         local_bh_disable();
 432         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 433         if (engine->schedule)
 434                 engine->schedule(rq, attr);
 435         rcu_read_unlock();
 436         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
 437 }
 438
 439 static void fence_set_priority(struct dma_fence *fence,
 440                                const struct i915_sched_attr *attr)
 441 {
 442         /* Recurse once into a fence-array */
 443         if (dma_fence_is_array(fence)) {
 444                 struct dma_fence_array *array = to_dma_fence_array(fence);
 445                 int i;
 446
 447                 for (i = 0; i < array->num_fences; i++)
 448                         __fence_set_priority(array->fences[i], attr);
 449         } else {
 450                 __fence_set_priority(fence, attr);
 451         }
 452 }
 453
 454 int
 455 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 456                               unsigned int flags,
 457                               const struct i915_sched_attr *attr)
 458 {
 459         struct dma_fence *excl;
 460
 461         if (flags & I915_WAIT_ALL) {
 462                 struct dma_fence **shared;
 463                 unsigned int count, i;
 464                 int ret;
 465
 466                 ret = reservation_object_get_fences_rcu(obj->resv,
 467                                                         &excl, &count, &shared);
 468                 if (ret)
 469                         return ret;
 470
 471                 for (i = 0; i < count; i++) {
 472                         fence_set_priority(shared[i], attr);
 473                         dma_fence_put(shared[i]);
 474                 }
 475
 476                 kfree(shared);
 477         } else {
 478                 excl = reservation_object_get_excl_rcu(obj->resv);
 479         }
 480
 481         if (excl) {
 482                 fence_set_priority(excl, attr);
 483                 dma_fence_put(excl);
 484         }
 485         return 0;
 486 }
 487
 488 /**
 489  * Waits for rendering to the object to be completed
 490  * @obj: i915 gem object
 491  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 492  * @timeout: how long to wait
 493  */
 494 int
 495 i915_gem_object_wait(struct drm_i915_gem_object *obj,
 496                      unsigned int flags,
 497                      long timeout)
 498 {
 499         might_sleep();
 500         GEM_BUG_ON(timeout < 0);
 501
 502         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
 503         return timeout < 0 ? timeout : 0;
 504 }
 505
 506 static int
 507 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 508                      struct drm_i915_gem_pwrite *args,
 509                      struct drm_file *file)
 510 {
 511         void *vaddr = obj->phys_handle->vaddr + args->offset;
 512         char __user *user_data = u64_to_user_ptr(args->data_ptr);
 513
 514         /* We manually control the domain here and pretend that it
 515          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 516          */
 517         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 518         if (copy_from_user(vaddr, user_data, args->size))
 519                 return -EFAULT;
 520
 521         drm_clflush_virt_range(vaddr, args->size);
 522         i915_gem_chipset_flush(to_i915(obj->base.dev));
 523
 524         intel_fb_obj_flush(obj, ORIGIN_CPU);
 525         return 0;
 526 }
 527
 528 static int
 529 i915_gem_create(struct drm_file *file,
 530                 struct drm_i915_private *dev_priv,
 531                 u64 *size_p,
 532                 u32 *handle_p)
 533 {
 534         struct drm_i915_gem_object *obj;
 535         u32 handle;
 536         u64 size;
 537         int ret;
 538
 539         size = round_up(*size_p, PAGE_SIZE);
 540         if (size == 0)
 541                 return -EINVAL;
 542
 543         /* Allocate the new object */
 544         obj = i915_gem_object_create(dev_priv, size);
 545         if (IS_ERR(obj))
 546                 return PTR_ERR(obj);
 547
 548         ret = drm_gem_handle_create(file, &obj->base, &handle);
 549         /* drop reference from allocate - handle holds it now */
 550         i915_gem_object_put(obj);
 551         if (ret)
 552                 return ret;
 553
 554         *handle_p = handle;
 555         *size_p = size;
 556         return 0;
 557 }
 558
 559 int
 560 i915_gem_dumb_create(struct drm_file *file,
 561                      struct drm_device *dev,
 562                      struct drm_mode_create_dumb *args)
 563 {
 564         int cpp = DIV_ROUND_UP(args->bpp, 8);
 565         u32 format;
 566
 567         switch (cpp) {
 568         case 1:
 569                 format = DRM_FORMAT_C8;
 570                 break;
 571         case 2:
 572                 format = DRM_FORMAT_RGB565;
 573                 break;
 574         case 4:
 575                 format = DRM_FORMAT_XRGB8888;
 576                 break;
 577         default:
 578                 return -EINVAL;
 579         }
 580
 581         /* have to work out size/pitch and return them */
 582         args->pitch = ALIGN(args->width * cpp, 64);
 583
 584         /* align stride to page size so that we can remap */
 585         if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
 586                                                     DRM_FORMAT_MOD_LINEAR))
 587                 args->pitch = ALIGN(args->pitch, 4096);
 588
 589         args->size = args->pitch * args->height;
 590         return i915_gem_create(file, to_i915(dev),
 591                                &args->size, &args->handle);
 592 }
 593
 594 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 595 {
 596         return !(obj->cache_level == I915_CACHE_NONE ||
 597                  obj->cache_level == I915_CACHE_WT);
 598 }
 599
 600 /**
 601  * Creates a new mm object and returns a handle to it.
 602  * @dev: drm device pointer
 603  * @data: ioctl data blob
 604  * @file: drm file pointer
 605  */
 606 int
 607 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 608                       struct drm_file *file)
 609 {
 610         struct drm_i915_private *dev_priv = to_i915(dev);
 611         struct drm_i915_gem_create *args = data;
 612
 613         i915_gem_flush_free_objects(dev_priv);
 614
 615         return i915_gem_create(file, dev_priv,
 616                                &args->size, &args->handle);
 617 }
 618
 619 static inline enum fb_op_origin
 620 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 621 {
 622         return (domain == I915_GEM_DOMAIN_GTT ?
 623                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 624 }
 625
 626 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 627 {
 628         intel_wakeref_t wakeref;
 629
 630         /*
 631          * No actual flushing is required for the GTT write domain for reads
 632          * from the GTT domain. Writes to it "immediately" go to main memory
 633          * as far as we know, so there's no chipset flush. It also doesn't
 634          * land in the GPU render cache.
 635          *
 636          * However, we do have to enforce the order so that all writes through
 637          * the GTT land before any writes to the device, such as updates to
 638          * the GATT itself.
 639          *
 640          * We also have to wait a bit for the writes to land from the GTT.
 641          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 642          * timing. This issue has only been observed when switching quickly
 643          * between GTT writes and CPU reads from inside the kernel on recent hw,
 644          * and it appears to only affect discrete GTT blocks (i.e. on LLC
 645          * system agents we cannot reproduce this behaviour, until Cannonlake
 646          * that was!).
 647          */
 648
 649         wmb();
 650
 651         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
 652                 return;
 653
 654         i915_gem_chipset_flush(dev_priv);
 655
 656         with_intel_runtime_pm(dev_priv, wakeref) {
 657                 spin_lock_irq(&dev_priv->uncore.lock);
 658
 659                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 660
 661                 spin_unlock_irq(&dev_priv->uncore.lock);
 662         }
 663 }
 664
 665 static void
 666 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 667 {
 668         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 669         struct i915_vma *vma;
 670
 671         if (!(obj->write_domain & flush_domains))
 672                 return;
 673
 674         switch (obj->write_domain) {
 675         case I915_GEM_DOMAIN_GTT:
 676                 i915_gem_flush_ggtt_writes(dev_priv);
 677
 678                 intel_fb_obj_flush(obj,
 679                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
 680
 681                 for_each_ggtt_vma(vma, obj) {
 682                         if (vma->iomap)
 683                                 continue;
 684
 685                         i915_vma_unset_ggtt_write(vma);
 686                 }
 687                 break;
 688
 689         case I915_GEM_DOMAIN_WC:
 690                 wmb();
 691                 break;
 692
 693         case I915_GEM_DOMAIN_CPU:
 694                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 695                 break;
 696
 697         case I915_GEM_DOMAIN_RENDER:
 698                 if (gpu_write_needs_clflush(obj))
 699                         obj->cache_dirty = true;
 700                 break;
 701         }
 702
 703         obj->write_domain = 0;
 704 }
 705
 706 /*
 707  * Pins the specified object's pages and synchronizes the object with
 708  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 709  * flush the object from the CPU cache.
 710  */
 711 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 712                                     unsigned int *needs_clflush)
 713 {
 714         int ret;
 715
 716         lockdep_assert_held(&obj->base.dev->struct_mutex);
 717
 718         *needs_clflush = 0;
 719         if (!i915_gem_object_has_struct_page(obj))
 720                 return -ENODEV;
 721
 722         ret = i915_gem_object_wait(obj,
 723                                    I915_WAIT_INTERRUPTIBLE |
 724                                    I915_WAIT_LOCKED,
 725                                    MAX_SCHEDULE_TIMEOUT);
 726         if (ret)
 727                 return ret;
 728
 729         ret = i915_gem_object_pin_pages(obj);
 730         if (ret)
 731                 return ret;
 732
 733         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 734             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 735                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 736                 if (ret)
 737                         goto err_unpin;
 738                 else
 739                         goto out;
 740         }
 741
 742         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 743
 744         /* If we're not in the cpu read domain, set ourself into the gtt
 745          * read domain and manually flush cachelines (if required). This
 746          * optimizes for the case when the gpu will dirty the data
 747          * anyway again before the next pread happens.
 748          */
 749         if (!obj->cache_dirty &&
 750             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 751                 *needs_clflush = CLFLUSH_BEFORE;
 752
 753 out:
 754         /* return with the pages pinned */
 755         return 0;
 756
 757 err_unpin:
 758         i915_gem_object_unpin_pages(obj);
 759         return ret;
 760 }
 761
 762 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 763                                      unsigned int *needs_clflush)
 764 {
 765         int ret;
 766
 767         lockdep_assert_held(&obj->base.dev->struct_mutex);
 768
 769         *needs_clflush = 0;
 770         if (!i915_gem_object_has_struct_page(obj))
 771                 return -ENODEV;
 772
 773         ret = i915_gem_object_wait(obj,
 774                                    I915_WAIT_INTERRUPTIBLE |
 775                                    I915_WAIT_LOCKED |
 776                                    I915_WAIT_ALL,
 777                                    MAX_SCHEDULE_TIMEOUT);
 778         if (ret)
 779                 return ret;
 780
 781         ret = i915_gem_object_pin_pages(obj);
 782         if (ret)
 783                 return ret;
 784
 785         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 786             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 787                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 788                 if (ret)
 789                         goto err_unpin;
 790                 else
 791                         goto out;
 792         }
 793
 794         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 795
 796         /* If we're not in the cpu write domain, set ourself into the
 797          * gtt write domain and manually flush cachelines (as required).
 798          * This optimizes for the case when the gpu will use the data
 799          * right away and we therefore have to clflush anyway.
 800          */
 801         if (!obj->cache_dirty) {
 802                 *needs_clflush |= CLFLUSH_AFTER;
 803
 804                 /*
 805                  * Same trick applies to invalidate partially written
 806                  * cachelines read before writing.
 807                  */
 808                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 809                         *needs_clflush |= CLFLUSH_BEFORE;
 810         }
 811
 812 out:
 813         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 814         obj->mm.dirty = true;
 815         /* return with the pages pinned */
 816         return 0;
 817
 818 err_unpin:
 819         i915_gem_object_unpin_pages(obj);
 820         return ret;
 821 }
 822
 823 static int
 824 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
 825             bool needs_clflush)
 826 {
 827         char *vaddr;
 828         int ret;
 829
 830         vaddr = kmap(page);
 831
 832         if (needs_clflush)
 833                 drm_clflush_virt_range(vaddr + offset, len);
 834
 835         ret = __copy_to_user(user_data, vaddr + offset, len);
 836
 837         kunmap(page);
 838
 839         return ret ? -EFAULT : 0;
 840 }
 841
 842 static int
 843 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 844                      struct drm_i915_gem_pread *args)
 845 {
 846         char __user *user_data;
 847         u64 remain;
 848         unsigned int needs_clflush;
 849         unsigned int idx, offset;
 850         int ret;
 851
 852         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 853         if (ret)
 854                 return ret;
 855
 856         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 857         mutex_unlock(&obj->base.dev->struct_mutex);
 858         if (ret)
 859                 return ret;
 860
 861         remain = args->size;
 862         user_data = u64_to_user_ptr(args->data_ptr);
 863         offset = offset_in_page(args->offset);
 864         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 865                 struct page *page = i915_gem_object_get_page(obj, idx);
 866                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
 867
 868                 ret = shmem_pread(page, offset, length, user_data,
 869                                   needs_clflush);
 870                 if (ret)
 871                         break;
 872
 873                 remain -= length;
 874                 user_data += length;
 875                 offset = 0;
 876         }
 877
 878         i915_gem_obj_finish_shmem_access(obj);
 879         return ret;
 880 }
 881
 882 static inline bool
 883 gtt_user_read(struct io_mapping *mapping,
 884               loff_t base, int offset,
 885               char __user *user_data, int length)
 886 {
 887         void __iomem *vaddr;
 888         unsigned long unwritten;
 889
 890         /* We can use the cpu mem copy function because this is X86. */
 891         vaddr = io_mapping_map_atomic_wc(mapping, base);
 892         unwritten = __copy_to_user_inatomic(user_data,
 893                                             (void __force *)vaddr + offset,
 894                                             length);
 895         io_mapping_unmap_atomic(vaddr);
 896         if (unwritten) {
 897                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
 898                 unwritten = copy_to_user(user_data,
 899                                          (void __force *)vaddr + offset,
 900                                          length);
 901                 io_mapping_unmap(vaddr);
 902         }
 903         return unwritten;
 904 }
 905
 906 static int
 907 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 908                    const struct drm_i915_gem_pread *args)
 909 {
 910         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 911         struct i915_ggtt *ggtt = &i915->ggtt;
 912         intel_wakeref_t wakeref;
 913         struct drm_mm_node node;
 914         struct i915_vma *vma;
 915         void __user *user_data;
 916         u64 remain, offset;
 917         int ret;
 918
 919         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 920         if (ret)
 921                 return ret;
 922
 923         wakeref = intel_runtime_pm_get(i915);
 924         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 925                                        PIN_MAPPABLE |
 926                                        PIN_NONFAULT |
 927                                        PIN_NONBLOCK);
 928         if (!IS_ERR(vma)) {
 929                 node.start = i915_ggtt_offset(vma);
 930                 node.allocated = false;
 931                 ret = i915_vma_put_fence(vma);
 932                 if (ret) {
 933                         i915_vma_unpin(vma);
 934                         vma = ERR_PTR(ret);
 935                 }
 936         }
 937         if (IS_ERR(vma)) {
 938                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 939                 if (ret)
 940                         goto out_unlock;
 941                 GEM_BUG_ON(!node.allocated);
 942         }
 943
 944         ret = i915_gem_object_set_to_gtt_domain(obj, false);
 945         if (ret)
 946                 goto out_unpin;
 947
 948         mutex_unlock(&i915->drm.struct_mutex);
 949
 950         user_data = u64_to_user_ptr(args->data_ptr);
 951         remain = args->size;
 952         offset = args->offset;
 953
 954         while (remain > 0) {
 955                 /* Operation in this page
 956                  *
 957                  * page_base = page offset within aperture
 958                  * page_offset = offset within page
 959                  * page_length = bytes to copy for this page
 960                  */
 961                 u32 page_base = node.start;
 962                 unsigned page_offset = offset_in_page(offset);
 963                 unsigned page_length = PAGE_SIZE - page_offset;
 964                 page_length = remain < page_length ? remain : page_length;
 965                 if (node.allocated) {
 966                         wmb();
 967                         ggtt->vm.insert_page(&ggtt->vm,
 968                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 969                                              node.start, I915_CACHE_NONE, 0);
 970                         wmb();
 971                 } else {
 972                         page_base += offset & PAGE_MASK;
 973                 }
 974
 975                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
 976                                   user_data, page_length)) {
 977                         ret = -EFAULT;
 978                         break;
 979                 }
 980
 981                 remain -= page_length;
 982                 user_data += page_length;
 983                 offset += page_length;
 984         }
 985
 986         mutex_lock(&i915->drm.struct_mutex);
 987 out_unpin:
 988         if (node.allocated) {
 989                 wmb();
 990                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 991                 remove_mappable_node(&node);
 992         } else {
 993                 i915_vma_unpin(vma);
 994         }
 995 out_unlock:
 996         intel_runtime_pm_put(i915, wakeref);
 997         mutex_unlock(&i915->drm.struct_mutex);
 998
 999         return ret;
1000 }
1001
1002 /**
1003  * Reads data from the object referenced by handle.
1004  * @dev: drm device pointer
1005  * @data: ioctl data blob
1006  * @file: drm file pointer
1007  *
1008  * On error, the contents of *data are undefined.
1009  */
1010 int
1011 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1012                      struct drm_file *file)
1013 {
1014         struct drm_i915_gem_pread *args = data;
1015         struct drm_i915_gem_object *obj;
1016         int ret;
1017
1018         if (args->size == 0)
1019                 return 0;
1020
1021         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1022                        args->size))
1023                 return -EFAULT;
1024
1025         obj = i915_gem_object_lookup(file, args->handle);
1026         if (!obj)
1027                 return -ENOENT;
1028
1029         /* Bounds check source.  */
1030         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1031                 ret = -EINVAL;
1032                 goto out;
1033         }
1034
1035         trace_i915_gem_object_pread(obj, args->offset, args->size);
1036
1037         ret = i915_gem_object_wait(obj,
1038                                    I915_WAIT_INTERRUPTIBLE,
1039                                    MAX_SCHEDULE_TIMEOUT);
1040         if (ret)
1041                 goto out;
1042
1043         ret = i915_gem_object_pin_pages(obj);
1044         if (ret)
1045                 goto out;
1046
1047         ret = i915_gem_shmem_pread(obj, args);
1048         if (ret == -EFAULT || ret == -ENODEV)
1049                 ret = i915_gem_gtt_pread(obj, args);
1050
1051         i915_gem_object_unpin_pages(obj);
1052 out:
1053         i915_gem_object_put(obj);
1054         return ret;
1055 }
1056
1057 /* This is the fast write path which cannot handle
1058  * page faults in the source data
1059  */
1060
1061 static inline bool
1062 ggtt_write(struct io_mapping *mapping,
1063            loff_t base, int offset,
1064            char __user *user_data, int length)
1065 {
1066         void __iomem *vaddr;
1067         unsigned long unwritten;
1068
1069         /* We can use the cpu mem copy function because this is X86. */
1070         vaddr = io_mapping_map_atomic_wc(mapping, base);
1071         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1072                                                       user_data, length);
1073         io_mapping_unmap_atomic(vaddr);
1074         if (unwritten) {
1075                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1076                 unwritten = copy_from_user((void __force *)vaddr + offset,
1077                                            user_data, length);
1078                 io_mapping_unmap(vaddr);
1079         }
1080
1081         return unwritten;
1082 }
1083
1084 /**
1085  * This is the fast pwrite path, where we copy the data directly from the
1086  * user into the GTT, uncached.
1087  * @obj: i915 GEM object
1088  * @args: pwrite arguments structure
1089  */
1090 static int
1091 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1092                          const struct drm_i915_gem_pwrite *args)
1093 {
1094         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1095         struct i915_ggtt *ggtt = &i915->ggtt;
1096         intel_wakeref_t wakeref;
1097         struct drm_mm_node node;
1098         struct i915_vma *vma;
1099         u64 remain, offset;
1100         void __user *user_data;
1101         int ret;
1102
1103         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1104         if (ret)
1105                 return ret;
1106
1107         if (i915_gem_object_has_struct_page(obj)) {
1108                 /*
1109                  * Avoid waking the device up if we can fallback, as
1110                  * waking/resuming is very slow (worst-case 10-100 ms
1111                  * depending on PCI sleeps and our own resume time).
1112                  * This easily dwarfs any performance advantage from
1113                  * using the cache bypass of indirect GGTT access.
1114                  */
1115                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1116                 if (!wakeref) {
1117                         ret = -EFAULT;
1118                         goto out_unlock;
1119                 }
1120         } else {
1121                 /* No backing pages, no fallback, we must force GGTT access */
1122                 wakeref = intel_runtime_pm_get(i915);
1123         }
1124
1125         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1126                                        PIN_MAPPABLE |
1127                                        PIN_NONFAULT |
1128                                        PIN_NONBLOCK);
1129         if (!IS_ERR(vma)) {
1130                 node.start = i915_ggtt_offset(vma);
1131                 node.allocated = false;
1132                 ret = i915_vma_put_fence(vma);
1133                 if (ret) {
1134                         i915_vma_unpin(vma);
1135                         vma = ERR_PTR(ret);
1136                 }
1137         }
1138         if (IS_ERR(vma)) {
1139                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1140                 if (ret)
1141                         goto out_rpm;
1142                 GEM_BUG_ON(!node.allocated);
1143         }
1144
1145         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1146         if (ret)
1147                 goto out_unpin;
1148
1149         mutex_unlock(&i915->drm.struct_mutex);
1150
1151         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1152
1153         user_data = u64_to_user_ptr(args->data_ptr);
1154         offset = args->offset;
1155         remain = args->size;
1156         while (remain) {
1157                 /* Operation in this page
1158                  *
1159                  * page_base = page offset within aperture
1160                  * page_offset = offset within page
1161                  * page_length = bytes to copy for this page
1162                  */
1163                 u32 page_base = node.start;
1164                 unsigned int page_offset = offset_in_page(offset);
1165                 unsigned int page_length = PAGE_SIZE - page_offset;
1166                 page_length = remain < page_length ? remain : page_length;
1167                 if (node.allocated) {
1168                         wmb(); /* flush the write before we modify the GGTT */
1169                         ggtt->vm.insert_page(&ggtt->vm,
1170                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1171                                              node.start, I915_CACHE_NONE, 0);
1172                         wmb(); /* flush modifications to the GGTT (insert_page) */
1173                 } else {
1174                         page_base += offset & PAGE_MASK;
1175                 }
1176                 /* If we get a fault while copying data, then (presumably) our
1177                  * source page isn't available.  Return the error and we'll
1178                  * retry in the slow path.
1179                  * If the object is non-shmem backed, we retry again with the
1180                  * path that handles page fault.
1181                  */
1182                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1183                                user_data, page_length)) {
1184                         ret = -EFAULT;
1185                         break;
1186                 }
1187
1188                 remain -= page_length;
1189                 user_data += page_length;
1190                 offset += page_length;
1191         }
1192         intel_fb_obj_flush(obj, ORIGIN_CPU);
1193
1194         mutex_lock(&i915->drm.struct_mutex);
1195 out_unpin:
1196         if (node.allocated) {
1197                 wmb();
1198                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1199                 remove_mappable_node(&node);
1200         } else {
1201                 i915_vma_unpin(vma);
1202         }
1203 out_rpm:
1204         intel_runtime_pm_put(i915, wakeref);
1205 out_unlock:
1206         mutex_unlock(&i915->drm.struct_mutex);
1207         return ret;
1208 }
1209
1210 /* Per-page copy function for the shmem pwrite fastpath.
1211  * Flushes invalid cachelines before writing to the target if
1212  * needs_clflush_before is set and flushes out any written cachelines after
1213  * writing if needs_clflush is set.
1214  */
1215 static int
1216 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1217              bool needs_clflush_before,
1218              bool needs_clflush_after)
1219 {
1220         char *vaddr;
1221         int ret;
1222
1223         vaddr = kmap(page);
1224
1225         if (needs_clflush_before)
1226                 drm_clflush_virt_range(vaddr + offset, len);
1227
1228         ret = __copy_from_user(vaddr + offset, user_data, len);
1229         if (!ret && needs_clflush_after)
1230                 drm_clflush_virt_range(vaddr + offset, len);
1231
1232         kunmap(page);
1233
1234         return ret ? -EFAULT : 0;
1235 }
1236
1237 static int
1238 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1239                       const struct drm_i915_gem_pwrite *args)
1240 {
1241         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1242         void __user *user_data;
1243         u64 remain;
1244         unsigned int partial_cacheline_write;
1245         unsigned int needs_clflush;
1246         unsigned int offset, idx;
1247         int ret;
1248
1249         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1250         if (ret)
1251                 return ret;
1252
1253         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1254         mutex_unlock(&i915->drm.struct_mutex);
1255         if (ret)
1256                 return ret;
1257
1258         /* If we don't overwrite a cacheline completely we need to be
1259          * careful to have up-to-date data by first clflushing. Don't
1260          * overcomplicate things and flush the entire patch.
1261          */
1262         partial_cacheline_write = 0;
1263         if (needs_clflush & CLFLUSH_BEFORE)
1264                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1265
1266         user_data = u64_to_user_ptr(args->data_ptr);
1267         remain = args->size;
1268         offset = offset_in_page(args->offset);
1269         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1270                 struct page *page = i915_gem_object_get_page(obj, idx);
1271                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1272
1273                 ret = shmem_pwrite(page, offset, length, user_data,
1274                                    (offset | length) & partial_cacheline_write,
1275                                    needs_clflush & CLFLUSH_AFTER);
1276                 if (ret)
1277                         break;
1278
1279                 remain -= length;
1280                 user_data += length;
1281                 offset = 0;
1282         }
1283
1284         intel_fb_obj_flush(obj, ORIGIN_CPU);
1285         i915_gem_obj_finish_shmem_access(obj);
1286         return ret;
1287 }
1288
1289 /**
1290  * Writes data to the object referenced by handle.
1291  * @dev: drm device
1292  * @data: ioctl data blob
1293  * @file: drm file
1294  *
1295  * On error, the contents of the buffer that were to be modified are undefined.
1296  */
1297 int
1298 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1299                       struct drm_file *file)
1300 {
1301         struct drm_i915_gem_pwrite *args = data;
1302         struct drm_i915_gem_object *obj;
1303         int ret;
1304
1305         if (args->size == 0)
1306                 return 0;
1307
1308         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1309                 return -EFAULT;
1310
1311         obj = i915_gem_object_lookup(file, args->handle);
1312         if (!obj)
1313                 return -ENOENT;
1314
1315         /* Bounds check destination. */
1316         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1317                 ret = -EINVAL;
1318                 goto err;
1319         }
1320
1321         /* Writes not allowed into this read-only object */
1322         if (i915_gem_object_is_readonly(obj)) {
1323                 ret = -EINVAL;
1324                 goto err;
1325         }
1326
1327         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1328
1329         ret = -ENODEV;
1330         if (obj->ops->pwrite)
1331                 ret = obj->ops->pwrite(obj, args);
1332         if (ret != -ENODEV)
1333                 goto err;
1334
1335         ret = i915_gem_object_wait(obj,
1336                                    I915_WAIT_INTERRUPTIBLE |
1337                                    I915_WAIT_ALL,
1338                                    MAX_SCHEDULE_TIMEOUT);
1339         if (ret)
1340                 goto err;
1341
1342         ret = i915_gem_object_pin_pages(obj);
1343         if (ret)
1344                 goto err;
1345
1346         ret = -EFAULT;
1347         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1348          * it would end up going through the fenced access, and we'll get
1349          * different detiling behavior between reading and writing.
1350          * pread/pwrite currently are reading and writing from the CPU
1351          * perspective, requiring manual detiling by the client.
1352          */
1353         if (!i915_gem_object_has_struct_page(obj) ||
1354             cpu_write_needs_clflush(obj))
1355                 /* Note that the gtt paths might fail with non-page-backed user
1356                  * pointers (e.g. gtt mappings when moving data between
1357                  * textures). Fallback to the shmem path in that case.
1358                  */
1359                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1360
1361         if (ret == -EFAULT || ret == -ENOSPC) {
1362                 if (obj->phys_handle)
1363                         ret = i915_gem_phys_pwrite(obj, args, file);
1364                 else
1365                         ret = i915_gem_shmem_pwrite(obj, args);
1366         }
1367
1368         i915_gem_object_unpin_pages(obj);
1369 err:
1370         i915_gem_object_put(obj);
1371         return ret;
1372 }
1373
1374 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1375 {
1376         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1377         struct list_head *list;
1378         struct i915_vma *vma;
1379
1380         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1381
1382         mutex_lock(&i915->ggtt.vm.mutex);
1383         for_each_ggtt_vma(vma, obj) {
1384                 if (!drm_mm_node_allocated(&vma->node))
1385                         continue;
1386
1387                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1388         }
1389         mutex_unlock(&i915->ggtt.vm.mutex);
1390
1391         spin_lock(&i915->mm.obj_lock);
1392         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1393         list_move_tail(&obj->mm.link, list);
1394         spin_unlock(&i915->mm.obj_lock);
1395 }
1396
1397 /**
1398  * Called when user space prepares to use an object with the CPU, either
1399  * through the mmap ioctl's mapping or a GTT mapping.
1400  * @dev: drm device
1401  * @data: ioctl data blob
1402  * @file: drm file
1403  */
1404 int
1405 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1406                           struct drm_file *file)
1407 {
1408         struct drm_i915_gem_set_domain *args = data;
1409         struct drm_i915_gem_object *obj;
1410         u32 read_domains = args->read_domains;
1411         u32 write_domain = args->write_domain;
1412         int err;
1413
1414         /* Only handle setting domains to types used by the CPU. */
1415         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1416                 return -EINVAL;
1417
1418         /*
1419          * Having something in the write domain implies it's in the read
1420          * domain, and only that read domain.  Enforce that in the request.
1421          */
1422         if (write_domain && read_domains != write_domain)
1423                 return -EINVAL;
1424
1425         if (!read_domains)
1426                 return 0;
1427
1428         obj = i915_gem_object_lookup(file, args->handle);
1429         if (!obj)
1430                 return -ENOENT;
1431
1432         /*
1433          * Already in the desired write domain? Nothing for us to do!
1434          *
1435          * We apply a little bit of cunning here to catch a broader set of
1436          * no-ops. If obj->write_domain is set, we must be in the same
1437          * obj->read_domains, and only that domain. Therefore, if that
1438          * obj->write_domain matches the request read_domains, we are
1439          * already in the same read/write domain and can skip the operation,
1440          * without having to further check the requested write_domain.
1441          */
1442         if (READ_ONCE(obj->write_domain) == read_domains) {
1443                 err = 0;
1444                 goto out;
1445         }
1446
1447         /*
1448          * Try to flush the object off the GPU without holding the lock.
1449          * We will repeat the flush holding the lock in the normal manner
1450          * to catch cases where we are gazumped.
1451          */
1452         err = i915_gem_object_wait(obj,
1453                                    I915_WAIT_INTERRUPTIBLE |
1454                                    I915_WAIT_PRIORITY |
1455                                    (write_domain ? I915_WAIT_ALL : 0),
1456                                    MAX_SCHEDULE_TIMEOUT);
1457         if (err)
1458                 goto out;
1459
1460         /*
1461          * Proxy objects do not control access to the backing storage, ergo
1462          * they cannot be used as a means to manipulate the cache domain
1463          * tracking for that backing storage. The proxy object is always
1464          * considered to be outside of any cache domain.
1465          */
1466         if (i915_gem_object_is_proxy(obj)) {
1467                 err = -ENXIO;
1468                 goto out;
1469         }
1470
1471         /*
1472          * Flush and acquire obj->pages so that we are coherent through
1473          * direct access in memory with previous cached writes through
1474          * shmemfs and that our cache domain tracking remains valid.
1475          * For example, if the obj->filp was moved to swap without us
1476          * being notified and releasing the pages, we would mistakenly
1477          * continue to assume that the obj remained out of the CPU cached
1478          * domain.
1479          */
1480         err = i915_gem_object_pin_pages(obj);
1481         if (err)
1482                 goto out;
1483
1484         err = i915_mutex_lock_interruptible(dev);
1485         if (err)
1486                 goto out_unpin;
1487
1488         if (read_domains & I915_GEM_DOMAIN_WC)
1489                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1490         else if (read_domains & I915_GEM_DOMAIN_GTT)
1491                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1492         else
1493                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1494
1495         /* And bump the LRU for this access */
1496         i915_gem_object_bump_inactive_ggtt(obj);
1497
1498         mutex_unlock(&dev->struct_mutex);
1499
1500         if (write_domain != 0)
1501                 intel_fb_obj_invalidate(obj,
1502                                         fb_write_origin(obj, write_domain));
1503
1504 out_unpin:
1505         i915_gem_object_unpin_pages(obj);
1506 out:
1507         i915_gem_object_put(obj);
1508         return err;
1509 }
1510
1511 /**
1512  * Called when user space has done writes to this buffer
1513  * @dev: drm device
1514  * @data: ioctl data blob
1515  * @file: drm file
1516  */
1517 int
1518 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1519                          struct drm_file *file)
1520 {
1521         struct drm_i915_gem_sw_finish *args = data;
1522         struct drm_i915_gem_object *obj;
1523
1524         obj = i915_gem_object_lookup(file, args->handle);
1525         if (!obj)
1526                 return -ENOENT;
1527
1528         /*
1529          * Proxy objects are barred from CPU access, so there is no
1530          * need to ban sw_finish as it is a nop.
1531          */
1532
1533         /* Pinned buffers may be scanout, so flush the cache */
1534         i915_gem_object_flush_if_display(obj);
1535         i915_gem_object_put(obj);
1536
1537         return 0;
1538 }
1539
1540 static inline bool
1541 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1542               unsigned long addr, unsigned long size)
1543 {
1544         if (vma->vm_file != filp)
1545                 return false;
1546
1547         return vma->vm_start == addr &&
1548                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1549 }
1550
1551 /**
1552  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1553  *                       it is mapped to.
1554  * @dev: drm device
1555  * @data: ioctl data blob
1556  * @file: drm file
1557  *
1558  * While the mapping holds a reference on the contents of the object, it doesn't
1559  * imply a ref on the object itself.
1560  *
1561  * IMPORTANT:
1562  *
1563  * DRM driver writers who look a this function as an example for how to do GEM
1564  * mmap support, please don't implement mmap support like here. The modern way
1565  * to implement DRM mmap support is with an mmap offset ioctl (like
1566  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1567  * That way debug tooling like valgrind will understand what's going on, hiding
1568  * the mmap call in a driver private ioctl will break that. The i915 driver only
1569  * does cpu mmaps this way because we didn't know better.
1570  */
1571 int
1572 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1573                     struct drm_file *file)
1574 {
1575         struct drm_i915_gem_mmap *args = data;
1576         struct drm_i915_gem_object *obj;
1577         unsigned long addr;
1578
1579         if (args->flags & ~(I915_MMAP_WC))
1580                 return -EINVAL;
1581
1582         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1583                 return -ENODEV;
1584
1585         obj = i915_gem_object_lookup(file, args->handle);
1586         if (!obj)
1587                 return -ENOENT;
1588
1589         /* prime objects have no backing filp to GEM mmap
1590          * pages from.
1591          */
1592         if (!obj->base.filp) {
1593                 addr = -ENXIO;
1594                 goto err;
1595         }
1596
1597         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1598                 addr = -EINVAL;
1599                 goto err;
1600         }
1601
1602         addr = vm_mmap(obj->base.filp, 0, args->size,
1603                        PROT_READ | PROT_WRITE, MAP_SHARED,
1604                        args->offset);
1605         if (IS_ERR_VALUE(addr))
1606                 goto err;
1607
1608         if (args->flags & I915_MMAP_WC) {
1609                 struct mm_struct *mm = current->mm;
1610                 struct vm_area_struct *vma;
1611
1612                 if (down_write_killable(&mm->mmap_sem)) {
1613                         addr = -EINTR;
1614                         goto err;
1615                 }
1616                 vma = find_vma(mm, addr);
1617                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1618                         vma->vm_page_prot =
1619                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1620                 else
1621                         addr = -ENOMEM;
1622                 up_write(&mm->mmap_sem);
1623                 if (IS_ERR_VALUE(addr))
1624                         goto err;
1625
1626                 /* This may race, but that's ok, it only gets set */
1627                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1628         }
1629         i915_gem_object_put(obj);
1630
1631         args->addr_ptr = (u64)addr;
1632         return 0;
1633
1634 err:
1635         i915_gem_object_put(obj);
1636         return addr;
1637 }
1638
1639 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1640 {
1641         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1642 }
1643
1644 /**
1645  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1646  *
1647  * A history of the GTT mmap interface:
1648  *
1649  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1650  *     aligned and suitable for fencing, and still fit into the available
1651  *     mappable space left by the pinned display objects. A classic problem
1652  *     we called the page-fault-of-doom where we would ping-pong between
1653  *     two objects that could not fit inside the GTT and so the memcpy
1654  *     would page one object in at the expense of the other between every
1655  *     single byte.
1656  *
1657  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1658  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1659  *     object is too large for the available space (or simply too large
1660  *     for the mappable aperture!), a view is created instead and faulted
1661  *     into userspace. (This view is aligned and sized appropriately for
1662  *     fenced access.)
1663  *
1664  * 2 - Recognise WC as a separate cache domain so that we can flush the
1665  *     delayed writes via GTT before performing direct access via WC.
1666  *
1667  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1668  *     pagefault; swapin remains transparent.
1669  *
1670  * Restrictions:
1671  *
1672  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1673  *    hangs on some architectures, corruption on others. An attempt to service
1674  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1675  *
1676  *  * the object must be able to fit into RAM (physical memory, though no
1677  *    limited to the mappable aperture).
1678  *
1679  *
1680  * Caveats:
1681  *
1682  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1683  *    all data to system memory. Subsequent access will not be synchronized.
1684  *
1685  *  * all mappings are revoked on runtime device suspend.
1686  *
1687  *  * there are only 8, 16 or 32 fence registers to share between all users
1688  *    (older machines require fence register for display and blitter access
1689  *    as well). Contention of the fence registers will cause the previous users
1690  *    to be unmapped and any new access will generate new page faults.
1691  *
1692  *  * running out of memory while servicing a fault may generate a SIGBUS,
1693  *    rather than the expected SIGSEGV.
1694  */
1695 int i915_gem_mmap_gtt_version(void)
1696 {
1697         return 3;
1698 }
1699
1700 static inline struct i915_ggtt_view
1701 compute_partial_view(const struct drm_i915_gem_object *obj,
1702                      pgoff_t page_offset,
1703                      unsigned int chunk)
1704 {
1705         struct i915_ggtt_view view;
1706
1707         if (i915_gem_object_is_tiled(obj))
1708                 chunk = roundup(chunk, tile_row_pages(obj));
1709
1710         view.type = I915_GGTT_VIEW_PARTIAL;
1711         view.partial.offset = rounddown(page_offset, chunk);
1712         view.partial.size =
1713                 min_t(unsigned int, chunk,
1714                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1715
1716         /* If the partial covers the entire object, just create a normal VMA. */
1717         if (chunk >= obj->base.size >> PAGE_SHIFT)
1718                 view.type = I915_GGTT_VIEW_NORMAL;
1719
1720         return view;
1721 }
1722
1723 /**
1724  * i915_gem_fault - fault a page into the GTT
1725  * @vmf: fault info
1726  *
1727  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1728  * from userspace.  The fault handler takes care of binding the object to
1729  * the GTT (if needed), allocating and programming a fence register (again,
1730  * only if needed based on whether the old reg is still valid or the object
1731  * is tiled) and inserting a new PTE into the faulting process.
1732  *
1733  * Note that the faulting process may involve evicting existing objects
1734  * from the GTT and/or fence registers to make room.  So performance may
1735  * suffer if the GTT working set is large or there are few fence registers
1736  * left.
1737  *
1738  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1739  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1740  */
1741 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1742 {
1743 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1744         struct vm_area_struct *area = vmf->vma;
1745         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1746         struct drm_device *dev = obj->base.dev;
1747         struct drm_i915_private *dev_priv = to_i915(dev);
1748         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1749         bool write = area->vm_flags & VM_WRITE;
1750         intel_wakeref_t wakeref;
1751         struct i915_vma *vma;
1752         pgoff_t page_offset;
1753         int srcu;
1754         int ret;
1755
1756         /* Sanity check that we allow writing into this object */
1757         if (i915_gem_object_is_readonly(obj) && write)
1758                 return VM_FAULT_SIGBUS;
1759
1760         /* We don't use vmf->pgoff since that has the fake offset */
1761         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1762
1763         trace_i915_gem_object_fault(obj, page_offset, true, write);
1764
1765         ret = i915_gem_object_pin_pages(obj);
1766         if (ret)
1767                 goto err;
1768
1769         wakeref = intel_runtime_pm_get(dev_priv);
1770
1771         srcu = i915_reset_trylock(dev_priv);
1772         if (srcu < 0) {
1773                 ret = srcu;
1774                 goto err_rpm;
1775         }
1776
1777         ret = i915_mutex_lock_interruptible(dev);
1778         if (ret)
1779                 goto err_reset;
1780
1781         /* Access to snoopable pages through the GTT is incoherent. */
1782         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1783                 ret = -EFAULT;
1784                 goto err_unlock;
1785         }
1786
1787         /* Now pin it into the GTT as needed */
1788         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1789                                        PIN_MAPPABLE |
1790                                        PIN_NONBLOCK |
1791                                        PIN_NONFAULT);
1792         if (IS_ERR(vma)) {
1793                 /* Use a partial view if it is bigger than available space */
1794                 struct i915_ggtt_view view =
1795                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1796                 unsigned int flags;
1797
1798                 flags = PIN_MAPPABLE;
1799                 if (view.type == I915_GGTT_VIEW_NORMAL)
1800                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1801
1802                 /*
1803                  * Userspace is now writing through an untracked VMA, abandon
1804                  * all hope that the hardware is able to track future writes.
1805                  */
1806                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1807
1808                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1809                 if (IS_ERR(vma) && !view.type) {
1810                         flags = PIN_MAPPABLE;
1811                         view.type = I915_GGTT_VIEW_PARTIAL;
1812                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1813                 }
1814         }
1815         if (IS_ERR(vma)) {
1816                 ret = PTR_ERR(vma);
1817                 goto err_unlock;
1818         }
1819
1820         ret = i915_vma_pin_fence(vma);
1821         if (ret)
1822                 goto err_unpin;
1823
1824         /* Finally, remap it using the new GTT offset */
1825         ret = remap_io_mapping(area,
1826                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1827                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1828                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1829                                &ggtt->iomap);
1830         if (ret)
1831                 goto err_fence;
1832
1833         /* Mark as being mmapped into userspace for later revocation */
1834         assert_rpm_wakelock_held(dev_priv);
1835         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1836                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1837         if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
1838                 intel_wakeref_auto(&dev_priv->mm.userfault_wakeref,
1839                                    msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
1840         GEM_BUG_ON(!obj->userfault_count);
1841
1842         i915_vma_set_ggtt_write(vma);
1843
1844 err_fence:
1845         i915_vma_unpin_fence(vma);
1846 err_unpin:
1847         __i915_vma_unpin(vma);
1848 err_unlock:
1849         mutex_unlock(&dev->struct_mutex);
1850 err_reset:
1851         i915_reset_unlock(dev_priv, srcu);
1852 err_rpm:
1853         intel_runtime_pm_put(dev_priv, wakeref);
1854         i915_gem_object_unpin_pages(obj);
1855 err:
1856         switch (ret) {
1857         case -EIO:
1858                 /*
1859                  * We eat errors when the gpu is terminally wedged to avoid
1860                  * userspace unduly crashing (gl has no provisions for mmaps to
1861                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1862                  * and so needs to be reported.
1863                  */
1864                 if (!i915_terminally_wedged(dev_priv))
1865                         return VM_FAULT_SIGBUS;
1866                 /* else: fall through */
1867         case -EAGAIN:
1868                 /*
1869                  * EAGAIN means the gpu is hung and we'll wait for the error
1870                  * handler to reset everything when re-faulting in
1871                  * i915_mutex_lock_interruptible.
1872                  */
1873         case 0:
1874         case -ERESTARTSYS:
1875         case -EINTR:
1876         case -EBUSY:
1877                 /*
1878                  * EBUSY is ok: this just means that another thread
1879                  * already did the job.
1880                  */
1881                 return VM_FAULT_NOPAGE;
1882         case -ENOMEM:
1883                 return VM_FAULT_OOM;
1884         case -ENOSPC:
1885         case -EFAULT:
1886                 return VM_FAULT_SIGBUS;
1887         default:
1888                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1889                 return VM_FAULT_SIGBUS;
1890         }
1891 }
1892
1893 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1894 {
1895         struct i915_vma *vma;
1896
1897         GEM_BUG_ON(!obj->userfault_count);
1898
1899         obj->userfault_count = 0;
1900         list_del(&obj->userfault_link);
1901         drm_vma_node_unmap(&obj->base.vma_node,
1902                            obj->base.dev->anon_inode->i_mapping);
1903
1904         for_each_ggtt_vma(vma, obj)
1905                 i915_vma_unset_userfault(vma);
1906 }
1907
1908 /**
1909  * i915_gem_release_mmap - remove physical page mappings
1910  * @obj: obj in question
1911  *
1912  * Preserve the reservation of the mmapping with the DRM core code, but
1913  * relinquish ownership of the pages back to the system.
1914  *
1915  * It is vital that we remove the page mapping if we have mapped a tiled
1916  * object through the GTT and then lose the fence register due to
1917  * resource pressure. Similarly if the object has been moved out of the
1918  * aperture, than pages mapped into userspace must be revoked. Removing the
1919  * mapping will then trigger a page fault on the next user access, allowing
1920  * fixup by i915_gem_fault().
1921  */
1922 void
1923 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1924 {
1925         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1926         intel_wakeref_t wakeref;
1927
1928         /* Serialisation between user GTT access and our code depends upon
1929          * revoking the CPU's PTE whilst the mutex is held. The next user
1930          * pagefault then has to wait until we release the mutex.
1931          *
1932          * Note that RPM complicates somewhat by adding an additional
1933          * requirement that operations to the GGTT be made holding the RPM
1934          * wakeref.
1935          */
1936         lockdep_assert_held(&i915->drm.struct_mutex);
1937         wakeref = intel_runtime_pm_get(i915);
1938
1939         if (!obj->userfault_count)
1940                 goto out;
1941
1942         __i915_gem_object_release_mmap(obj);
1943
1944         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1945          * memory transactions from userspace before we return. The TLB
1946          * flushing implied above by changing the PTE above *should* be
1947          * sufficient, an extra barrier here just provides us with a bit
1948          * of paranoid documentation about our requirement to serialise
1949          * memory writes before touching registers / GSM.
1950          */
1951         wmb();
1952
1953 out:
1954         intel_runtime_pm_put(i915, wakeref);
1955 }
1956
1957 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1958 {
1959         struct drm_i915_gem_object *obj, *on;
1960         int i;
1961
1962         /*
1963          * Only called during RPM suspend. All users of the userfault_list
1964          * must be holding an RPM wakeref to ensure that this can not
1965          * run concurrently with themselves (and use the struct_mutex for
1966          * protection between themselves).
1967          */
1968
1969         list_for_each_entry_safe(obj, on,
1970                                  &dev_priv->mm.userfault_list, userfault_link)
1971                 __i915_gem_object_release_mmap(obj);
1972
1973         /* The fence will be lost when the device powers down. If any were
1974          * in use by hardware (i.e. they are pinned), we should not be powering
1975          * down! All other fences will be reacquired by the user upon waking.
1976          */
1977         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1978                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1979
1980                 /* Ideally we want to assert that the fence register is not
1981                  * live at this point (i.e. that no piece of code will be
1982                  * trying to write through fence + GTT, as that both violates
1983                  * our tracking of activity and associated locking/barriers,
1984                  * but also is illegal given that the hw is powered down).
1985                  *
1986                  * Previously we used reg->pin_count as a "liveness" indicator.
1987                  * That is not sufficient, and we need a more fine-grained
1988                  * tool if we want to have a sanity check here.
1989                  */
1990
1991                 if (!reg->vma)
1992                         continue;
1993
1994                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1995                 reg->dirty = true;
1996         }
1997 }
1998
1999 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2000 {
2001         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2002         int err;
2003
2004         err = drm_gem_create_mmap_offset(&obj->base);
2005         if (likely(!err))
2006                 return 0;
2007
2008         /* Attempt to reap some mmap space from dead objects */
2009         do {
2010                 err = i915_gem_wait_for_idle(dev_priv,
2011                                              I915_WAIT_INTERRUPTIBLE,
2012                                              MAX_SCHEDULE_TIMEOUT);
2013                 if (err)
2014                         break;
2015
2016                 i915_gem_drain_freed_objects(dev_priv);
2017                 err = drm_gem_create_mmap_offset(&obj->base);
2018                 if (!err)
2019                         break;
2020
2021         } while (flush_delayed_work(&dev_priv->gem.retire_work));
2022
2023         return err;
2024 }
2025
2026 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2027 {
2028         drm_gem_free_mmap_offset(&obj->base);
2029 }
2030
2031 int
2032 i915_gem_mmap_gtt(struct drm_file *file,
2033                   struct drm_device *dev,
2034                   u32 handle,
2035                   u64 *offset)
2036 {
2037         struct drm_i915_gem_object *obj;
2038         int ret;
2039
2040         obj = i915_gem_object_lookup(file, handle);
2041         if (!obj)
2042                 return -ENOENT;
2043
2044         ret = i915_gem_object_create_mmap_offset(obj);
2045         if (ret == 0)
2046                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2047
2048         i915_gem_object_put(obj);
2049         return ret;
2050 }
2051
2052 /**
2053  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2054  * @dev: DRM device
2055  * @data: GTT mapping ioctl data
2056  * @file: GEM object info
2057  *
2058  * Simply returns the fake offset to userspace so it can mmap it.
2059  * The mmap call will end up in drm_gem_mmap(), which will set things
2060  * up so we can get faults in the handler above.
2061  *
2062  * The fault handler will take care of binding the object into the GTT
2063  * (since it may have been evicted to make room for something), allocating
2064  * a fence register, and mapping the appropriate aperture address into
2065  * userspace.
2066  */
2067 int
2068 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2069                         struct drm_file *file)
2070 {
2071         struct drm_i915_gem_mmap_gtt *args = data;
2072
2073         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2074 }
2075
2076 /* Immediately discard the backing storage */
2077 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2078 {
2079         i915_gem_object_free_mmap_offset(obj);
2080
2081         if (obj->base.filp == NULL)
2082                 return;
2083
2084         /* Our goal here is to return as much of the memory as
2085          * is possible back to the system as we are called from OOM.
2086          * To do this we must instruct the shmfs to drop all of its
2087          * backing pages, *now*.
2088          */
2089         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2090         obj->mm.madv = __I915_MADV_PURGED;
2091         obj->mm.pages = ERR_PTR(-EFAULT);
2092 }
2093
2094 /*
2095  * Move pages to appropriate lru and release the pagevec, decrementing the
2096  * ref count of those pages.
2097  */
2098 static void check_release_pagevec(struct pagevec *pvec)
2099 {
2100         check_move_unevictable_pages(pvec);
2101         __pagevec_release(pvec);
2102         cond_resched();
2103 }
2104
2105 static void
2106 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2107                               struct sg_table *pages)
2108 {
2109         struct sgt_iter sgt_iter;
2110         struct pagevec pvec;
2111         struct page *page;
2112
2113         __i915_gem_object_release_shmem(obj, pages, true);
2114         i915_gem_gtt_finish_pages(obj, pages);
2115
2116         if (i915_gem_object_needs_bit17_swizzle(obj))
2117                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2118
2119         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2120
2121         pagevec_init(&pvec);
2122         for_each_sgt_page(page, sgt_iter, pages) {
2123                 if (obj->mm.dirty)
2124                         set_page_dirty(page);
2125
2126                 if (obj->mm.madv == I915_MADV_WILLNEED)
2127                         mark_page_accessed(page);
2128
2129                 if (!pagevec_add(&pvec, page))
2130                         check_release_pagevec(&pvec);
2131         }
2132         if (pagevec_count(&pvec))
2133                 check_release_pagevec(&pvec);
2134         obj->mm.dirty = false;
2135
2136         sg_free_table(pages);
2137         kfree(pages);
2138 }
2139
2140 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2141 {
2142         struct radix_tree_iter iter;
2143         void __rcu **slot;
2144
2145         rcu_read_lock();
2146         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2147                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2148         rcu_read_unlock();
2149 }
2150
2151 static struct sg_table *
2152 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2153 {
2154         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2155         struct sg_table *pages;
2156
2157         pages = fetch_and_zero(&obj->mm.pages);
2158         if (IS_ERR_OR_NULL(pages))
2159                 return pages;
2160
2161         spin_lock(&i915->mm.obj_lock);
2162         list_del(&obj->mm.link);
2163         spin_unlock(&i915->mm.obj_lock);
2164
2165         if (obj->mm.mapping) {
2166                 void *ptr;
2167
2168                 ptr = page_mask_bits(obj->mm.mapping);
2169                 if (is_vmalloc_addr(ptr))
2170                         vunmap(ptr);
2171                 else
2172                         kunmap(kmap_to_page(ptr));
2173
2174                 obj->mm.mapping = NULL;
2175         }
2176
2177         __i915_gem_object_reset_page_iter(obj);
2178         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2179
2180         return pages;
2181 }
2182
2183 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2184                                 enum i915_mm_subclass subclass)
2185 {
2186         struct sg_table *pages;
2187         int ret;
2188
2189         if (i915_gem_object_has_pinned_pages(obj))
2190                 return -EBUSY;
2191
2192         GEM_BUG_ON(obj->bind_count);
2193
2194         /* May be called by shrinker from within get_pages() (on another bo) */
2195         mutex_lock_nested(&obj->mm.lock, subclass);
2196         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2197                 ret = -EBUSY;
2198                 goto unlock;
2199         }
2200
2201         /*
2202          * ->put_pages might need to allocate memory for the bit17 swizzle
2203          * array, hence protect them from being reaped by removing them from gtt
2204          * lists early.
2205          */
2206         pages = __i915_gem_object_unset_pages(obj);
2207
2208         /*
2209          * XXX Temporary hijinx to avoid updating all backends to handle
2210          * NULL pages. In the future, when we have more asynchronous
2211          * get_pages backends we should be better able to handle the
2212          * cancellation of the async task in a more uniform manner.
2213          */
2214         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2215                 pages = ERR_PTR(-EINVAL);
2216
2217         if (!IS_ERR(pages))
2218                 obj->ops->put_pages(obj, pages);
2219
2220         ret = 0;
2221 unlock:
2222         mutex_unlock(&obj->mm.lock);
2223
2224         return ret;
2225 }
2226
2227 bool i915_sg_trim(struct sg_table *orig_st)
2228 {
2229         struct sg_table new_st;
2230         struct scatterlist *sg, *new_sg;
2231         unsigned int i;
2232
2233         if (orig_st->nents == orig_st->orig_nents)
2234                 return false;
2235
2236         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2237                 return false;
2238
2239         new_sg = new_st.sgl;
2240         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2241                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2242                 sg_dma_address(new_sg) = sg_dma_address(sg);
2243                 sg_dma_len(new_sg) = sg_dma_len(sg);
2244
2245                 new_sg = sg_next(new_sg);
2246         }
2247         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2248
2249         sg_free_table(orig_st);
2250
2251         *orig_st = new_st;
2252         return true;
2253 }
2254
2255 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2256 {
2257         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2258         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2259         unsigned long i;
2260         struct address_space *mapping;
2261         struct sg_table *st;
2262         struct scatterlist *sg;
2263         struct sgt_iter sgt_iter;
2264         struct page *page;
2265         unsigned long last_pfn = 0;     /* suppress gcc warning */
2266         unsigned int max_segment = i915_sg_segment_size();
2267         unsigned int sg_page_sizes;
2268         struct pagevec pvec;
2269         gfp_t noreclaim;
2270         int ret;
2271
2272         /*
2273          * Assert that the object is not currently in any GPU domain. As it
2274          * wasn't in the GTT, there shouldn't be any way it could have been in
2275          * a GPU cache
2276          */
2277         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2278         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2279
2280         /*
2281          * If there's no chance of allocating enough pages for the whole
2282          * object, bail early.
2283          */
2284         if (page_count > totalram_pages())
2285                 return -ENOMEM;
2286
2287         st = kmalloc(sizeof(*st), GFP_KERNEL);
2288         if (st == NULL)
2289                 return -ENOMEM;
2290
2291 rebuild_st:
2292         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2293                 kfree(st);
2294                 return -ENOMEM;
2295         }
2296
2297         /*
2298          * Get the list of pages out of our struct file.  They'll be pinned
2299          * at this point until we release them.
2300          *
2301          * Fail silently without starting the shrinker
2302          */
2303         mapping = obj->base.filp->f_mapping;
2304         mapping_set_unevictable(mapping);
2305         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2306         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2307
2308         sg = st->sgl;
2309         st->nents = 0;
2310         sg_page_sizes = 0;
2311         for (i = 0; i < page_count; i++) {
2312                 const unsigned int shrink[] = {
2313                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2314                         0,
2315                 }, *s = shrink;
2316                 gfp_t gfp = noreclaim;
2317
2318                 do {
2319                         cond_resched();
2320                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2321                         if (!IS_ERR(page))
2322                                 break;
2323
2324                         if (!*s) {
2325                                 ret = PTR_ERR(page);
2326                                 goto err_sg;
2327                         }
2328
2329                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2330
2331                         /*
2332                          * We've tried hard to allocate the memory by reaping
2333                          * our own buffer, now let the real VM do its job and
2334                          * go down in flames if truly OOM.
2335                          *
2336                          * However, since graphics tend to be disposable,
2337                          * defer the oom here by reporting the ENOMEM back
2338                          * to userspace.
2339                          */
2340                         if (!*s) {
2341                                 /* reclaim and warn, but no oom */
2342                                 gfp = mapping_gfp_mask(mapping);
2343
2344                                 /*
2345                                  * Our bo are always dirty and so we require
2346                                  * kswapd to reclaim our pages (direct reclaim
2347                                  * does not effectively begin pageout of our
2348                                  * buffers on its own). However, direct reclaim
2349                                  * only waits for kswapd when under allocation
2350                                  * congestion. So as a result __GFP_RECLAIM is
2351                                  * unreliable and fails to actually reclaim our
2352                                  * dirty pages -- unless you try over and over
2353                                  * again with !__GFP_NORETRY. However, we still
2354                                  * want to fail this allocation rather than
2355                                  * trigger the out-of-memory killer and for
2356                                  * this we want __GFP_RETRY_MAYFAIL.
2357                                  */
2358                                 gfp |= __GFP_RETRY_MAYFAIL;
2359                         }
2360                 } while (1);
2361
2362                 if (!i ||
2363                     sg->length >= max_segment ||
2364                     page_to_pfn(page) != last_pfn + 1) {
2365                         if (i) {
2366                                 sg_page_sizes |= sg->length;
2367                                 sg = sg_next(sg);
2368                         }
2369                         st->nents++;
2370                         sg_set_page(sg, page, PAGE_SIZE, 0);
2371                 } else {
2372                         sg->length += PAGE_SIZE;
2373                 }
2374                 last_pfn = page_to_pfn(page);
2375
2376                 /* Check that the i965g/gm workaround works. */
2377                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2378         }
2379         if (sg) { /* loop terminated early; short sg table */
2380                 sg_page_sizes |= sg->length;
2381                 sg_mark_end(sg);
2382         }
2383
2384         /* Trim unused sg entries to avoid wasting memory. */
2385         i915_sg_trim(st);
2386
2387         ret = i915_gem_gtt_prepare_pages(obj, st);
2388         if (ret) {
2389                 /*
2390                  * DMA remapping failed? One possible cause is that
2391                  * it could not reserve enough large entries, asking
2392                  * for PAGE_SIZE chunks instead may be helpful.
2393                  */
2394                 if (max_segment > PAGE_SIZE) {
2395                         for_each_sgt_page(page, sgt_iter, st)
2396                                 put_page(page);
2397                         sg_free_table(st);
2398
2399                         max_segment = PAGE_SIZE;
2400                         goto rebuild_st;
2401                 } else {
2402                         dev_warn(&dev_priv->drm.pdev->dev,
2403                                  "Failed to DMA remap %lu pages\n",
2404                                  page_count);
2405                         goto err_pages;
2406                 }
2407         }
2408
2409         if (i915_gem_object_needs_bit17_swizzle(obj))
2410                 i915_gem_object_do_bit_17_swizzle(obj, st);
2411
2412         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2413
2414         return 0;
2415
2416 err_sg:
2417         sg_mark_end(sg);
2418 err_pages:
2419         mapping_clear_unevictable(mapping);
2420         pagevec_init(&pvec);
2421         for_each_sgt_page(page, sgt_iter, st) {
2422                 if (!pagevec_add(&pvec, page))
2423                         check_release_pagevec(&pvec);
2424         }
2425         if (pagevec_count(&pvec))
2426                 check_release_pagevec(&pvec);
2427         sg_free_table(st);
2428         kfree(st);
2429
2430         /*
2431          * shmemfs first checks if there is enough memory to allocate the page
2432          * and reports ENOSPC should there be insufficient, along with the usual
2433          * ENOMEM for a genuine allocation failure.
2434          *
2435          * We use ENOSPC in our driver to mean that we have run out of aperture
2436          * space and so want to translate the error from shmemfs back to our
2437          * usual understanding of ENOMEM.
2438          */
2439         if (ret == -ENOSPC)
2440                 ret = -ENOMEM;
2441
2442         return ret;
2443 }
2444
2445 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2446                                  struct sg_table *pages,
2447                                  unsigned int sg_page_sizes)
2448 {
2449         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2450         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2451         int i;
2452
2453         lockdep_assert_held(&obj->mm.lock);
2454
2455         /* Make the pages coherent with the GPU (flushing any swapin). */
2456         if (obj->cache_dirty) {
2457                 obj->write_domain = 0;
2458                 if (i915_gem_object_has_struct_page(obj))
2459                         drm_clflush_sg(pages);
2460                 obj->cache_dirty = false;
2461         }
2462
2463         obj->mm.get_page.sg_pos = pages->sgl;
2464         obj->mm.get_page.sg_idx = 0;
2465
2466         obj->mm.pages = pages;
2467
2468         if (i915_gem_object_is_tiled(obj) &&
2469             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2470                 GEM_BUG_ON(obj->mm.quirked);
2471                 __i915_gem_object_pin_pages(obj);
2472                 obj->mm.quirked = true;
2473         }
2474
2475         GEM_BUG_ON(!sg_page_sizes);
2476         obj->mm.page_sizes.phys = sg_page_sizes;
2477
2478         /*
2479          * Calculate the supported page-sizes which fit into the given
2480          * sg_page_sizes. This will give us the page-sizes which we may be able
2481          * to use opportunistically when later inserting into the GTT. For
2482          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2483          * 64K or 4K pages, although in practice this will depend on a number of
2484          * other factors.
2485          */
2486         obj->mm.page_sizes.sg = 0;
2487         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2488                 if (obj->mm.page_sizes.phys & ~0u << i)
2489                         obj->mm.page_sizes.sg |= BIT(i);
2490         }
2491         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2492
2493         spin_lock(&i915->mm.obj_lock);
2494         list_add(&obj->mm.link, &i915->mm.unbound_list);
2495         spin_unlock(&i915->mm.obj_lock);
2496 }
2497
2498 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2499 {
2500         int err;
2501
2502         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2503                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2504                 return -EFAULT;
2505         }
2506
2507         err = obj->ops->get_pages(obj);
2508         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2509
2510         return err;
2511 }
2512
2513 /* Ensure that the associated pages are gathered from the backing storage
2514  * and pinned into our object. i915_gem_object_pin_pages() may be called
2515  * multiple times before they are released by a single call to
2516  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2517  * either as a result of memory pressure (reaping pages under the shrinker)
2518  * or as the object is itself released.
2519  */
2520 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2521 {
2522         int err;
2523
2524         err = mutex_lock_interruptible(&obj->mm.lock);
2525         if (err)
2526                 return err;
2527
2528         if (unlikely(!i915_gem_object_has_pages(obj))) {
2529                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2530
2531                 err = ____i915_gem_object_get_pages(obj);
2532                 if (err)
2533                         goto unlock;
2534
2535                 smp_mb__before_atomic();
2536         }
2537         atomic_inc(&obj->mm.pages_pin_count);
2538
2539 unlock:
2540         mutex_unlock(&obj->mm.lock);
2541         return err;
2542 }
2543
2544 /* The 'mapping' part of i915_gem_object_pin_map() below */
2545 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2546                                  enum i915_map_type type)
2547 {
2548         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2549         struct sg_table *sgt = obj->mm.pages;
2550         struct sgt_iter sgt_iter;
2551         struct page *page;
2552         struct page *stack_pages[32];
2553         struct page **pages = stack_pages;
2554         unsigned long i = 0;
2555         pgprot_t pgprot;
2556         void *addr;
2557
2558         /* A single page can always be kmapped */
2559         if (n_pages == 1 && type == I915_MAP_WB)
2560                 return kmap(sg_page(sgt->sgl));
2561
2562         if (n_pages > ARRAY_SIZE(stack_pages)) {
2563                 /* Too big for stack -- allocate temporary array instead */
2564                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2565                 if (!pages)
2566                         return NULL;
2567         }
2568
2569         for_each_sgt_page(page, sgt_iter, sgt)
2570                 pages[i++] = page;
2571
2572         /* Check that we have the expected number of pages */
2573         GEM_BUG_ON(i != n_pages);
2574
2575         switch (type) {
2576         default:
2577                 MISSING_CASE(type);
2578                 /* fallthrough to use PAGE_KERNEL anyway */
2579         case I915_MAP_WB:
2580                 pgprot = PAGE_KERNEL;
2581                 break;
2582         case I915_MAP_WC:
2583                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2584                 break;
2585         }
2586         addr = vmap(pages, n_pages, 0, pgprot);
2587
2588         if (pages != stack_pages)
2589                 kvfree(pages);
2590
2591         return addr;
2592 }
2593
2594 /* get, pin, and map the pages of the object into kernel space */
2595 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2596                               enum i915_map_type type)
2597 {
2598         enum i915_map_type has_type;
2599         bool pinned;
2600         void *ptr;
2601         int ret;
2602
2603         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2604                 return ERR_PTR(-ENXIO);
2605
2606         ret = mutex_lock_interruptible(&obj->mm.lock);
2607         if (ret)
2608                 return ERR_PTR(ret);
2609
2610         pinned = !(type & I915_MAP_OVERRIDE);
2611         type &= ~I915_MAP_OVERRIDE;
2612
2613         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2614                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2615                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2616
2617                         ret = ____i915_gem_object_get_pages(obj);
2618                         if (ret)
2619                                 goto err_unlock;
2620
2621                         smp_mb__before_atomic();
2622                 }
2623                 atomic_inc(&obj->mm.pages_pin_count);
2624                 pinned = false;
2625         }
2626         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2627
2628         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2629         if (ptr && has_type != type) {
2630                 if (pinned) {
2631                         ret = -EBUSY;
2632                         goto err_unpin;
2633                 }
2634
2635                 if (is_vmalloc_addr(ptr))
2636                         vunmap(ptr);
2637                 else
2638                         kunmap(kmap_to_page(ptr));
2639
2640                 ptr = obj->mm.mapping = NULL;
2641         }
2642
2643         if (!ptr) {
2644                 ptr = i915_gem_object_map(obj, type);
2645                 if (!ptr) {
2646                         ret = -ENOMEM;
2647                         goto err_unpin;
2648                 }
2649
2650                 obj->mm.mapping = page_pack_bits(ptr, type);
2651         }
2652
2653 out_unlock:
2654         mutex_unlock(&obj->mm.lock);
2655         return ptr;
2656
2657 err_unpin:
2658         atomic_dec(&obj->mm.pages_pin_count);
2659 err_unlock:
2660         ptr = ERR_PTR(ret);
2661         goto out_unlock;
2662 }
2663
2664 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2665                                  unsigned long offset,
2666                                  unsigned long size)
2667 {
2668         enum i915_map_type has_type;
2669         void *ptr;
2670
2671         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2672         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2673                                      offset, size, obj->base.size));
2674
2675         obj->mm.dirty = true;
2676
2677         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2678                 return;
2679
2680         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2681         if (has_type == I915_MAP_WC)
2682                 return;
2683
2684         drm_clflush_virt_range(ptr + offset, size);
2685         if (size == obj->base.size) {
2686                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2687                 obj->cache_dirty = false;
2688         }
2689 }
2690
2691 static int
2692 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2693                            const struct drm_i915_gem_pwrite *arg)
2694 {
2695         struct address_space *mapping = obj->base.filp->f_mapping;
2696         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2697         u64 remain, offset;
2698         unsigned int pg;
2699
2700         /* Caller already validated user args */
2701         GEM_BUG_ON(!access_ok(user_data, arg->size));
2702
2703         /*
2704          * Before we instantiate/pin the backing store for our use, we
2705          * can prepopulate the shmemfs filp efficiently using a write into
2706          * the pagecache. We avoid the penalty of instantiating all the
2707          * pages, important if the user is just writing to a few and never
2708          * uses the object on the GPU, and using a direct write into shmemfs
2709          * allows it to avoid the cost of retrieving a page (either swapin
2710          * or clearing-before-use) before it is overwritten.
2711          */
2712         if (i915_gem_object_has_pages(obj))
2713                 return -ENODEV;
2714
2715         if (obj->mm.madv != I915_MADV_WILLNEED)
2716                 return -EFAULT;
2717
2718         /*
2719          * Before the pages are instantiated the object is treated as being
2720          * in the CPU domain. The pages will be clflushed as required before
2721          * use, and we can freely write into the pages directly. If userspace
2722          * races pwrite with any other operation; corruption will ensue -
2723          * that is userspace's prerogative!
2724          */
2725
2726         remain = arg->size;
2727         offset = arg->offset;
2728         pg = offset_in_page(offset);
2729
2730         do {
2731                 unsigned int len, unwritten;
2732                 struct page *page;
2733                 void *data, *vaddr;
2734                 int err;
2735                 char c;
2736
2737                 len = PAGE_SIZE - pg;
2738                 if (len > remain)
2739                         len = remain;
2740
2741                 /* Prefault the user page to reduce potential recursion */
2742                 err = __get_user(c, user_data);
2743                 if (err)
2744                         return err;
2745
2746                 err = __get_user(c, user_data + len - 1);
2747                 if (err)
2748                         return err;
2749
2750                 err = pagecache_write_begin(obj->base.filp, mapping,
2751                                             offset, len, 0,
2752                                             &page, &data);
2753                 if (err < 0)
2754                         return err;
2755
2756                 vaddr = kmap_atomic(page);
2757                 unwritten = __copy_from_user_inatomic(vaddr + pg,
2758                                                       user_data,
2759                                                       len);
2760                 kunmap_atomic(vaddr);
2761
2762                 err = pagecache_write_end(obj->base.filp, mapping,
2763                                           offset, len, len - unwritten,
2764                                           page, data);
2765                 if (err < 0)
2766                         return err;
2767
2768                 /* We don't handle -EFAULT, leave it to the caller to check */
2769                 if (unwritten)
2770                         return -ENODEV;
2771
2772                 remain -= len;
2773                 user_data += len;
2774                 offset += len;
2775                 pg = 0;
2776         } while (remain);
2777
2778         return 0;
2779 }
2780
2781 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2782 {
2783         struct drm_i915_private *i915 = to_i915(gem->dev);
2784         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2785         struct drm_i915_file_private *fpriv = file->driver_priv;
2786         struct i915_lut_handle *lut, *ln;
2787
2788         mutex_lock(&i915->drm.struct_mutex);
2789
2790         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2791                 struct i915_gem_context *ctx = lut->ctx;
2792                 struct i915_vma *vma;
2793
2794                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2795                 if (ctx->file_priv != fpriv)
2796                         continue;
2797
2798                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2799                 GEM_BUG_ON(vma->obj != obj);
2800
2801                 /* We allow the process to have multiple handles to the same
2802                  * vma, in the same fd namespace, by virtue of flink/open.
2803                  */
2804                 GEM_BUG_ON(!vma->open_count);
2805                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
2806                         i915_vma_close(vma);
2807
2808                 list_del(&lut->obj_link);
2809                 list_del(&lut->ctx_link);
2810
2811                 i915_lut_handle_free(lut);
2812                 __i915_gem_object_release_unless_active(obj);
2813         }
2814
2815         mutex_unlock(&i915->drm.struct_mutex);
2816 }
2817
2818 static unsigned long to_wait_timeout(s64 timeout_ns)
2819 {
2820         if (timeout_ns < 0)
2821                 return MAX_SCHEDULE_TIMEOUT;
2822
2823         if (timeout_ns == 0)
2824                 return 0;
2825
2826         return nsecs_to_jiffies_timeout(timeout_ns);
2827 }
2828
2829 /**
2830  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2831  * @dev: drm device pointer
2832  * @data: ioctl data blob
2833  * @file: drm file pointer
2834  *
2835  * Returns 0 if successful, else an error is returned with the remaining time in
2836  * the timeout parameter.
2837  *  -ETIME: object is still busy after timeout
2838  *  -ERESTARTSYS: signal interrupted the wait
2839  *  -ENONENT: object doesn't exist
2840  * Also possible, but rare:
2841  *  -EAGAIN: incomplete, restart syscall
2842  *  -ENOMEM: damn
2843  *  -ENODEV: Internal IRQ fail
2844  *  -E?: The add request failed
2845  *
2846  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2847  * non-zero timeout parameter the wait ioctl will wait for the given number of
2848  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2849  * without holding struct_mutex the object may become re-busied before this
2850  * function completes. A similar but shorter * race condition exists in the busy
2851  * ioctl
2852  */
2853 int
2854 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2855 {
2856         struct drm_i915_gem_wait *args = data;
2857         struct drm_i915_gem_object *obj;
2858         ktime_t start;
2859         long ret;
2860
2861         if (args->flags != 0)
2862                 return -EINVAL;
2863
2864         obj = i915_gem_object_lookup(file, args->bo_handle);
2865         if (!obj)
2866                 return -ENOENT;
2867
2868         start = ktime_get();
2869
2870         ret = i915_gem_object_wait(obj,
2871                                    I915_WAIT_INTERRUPTIBLE |
2872                                    I915_WAIT_PRIORITY |
2873                                    I915_WAIT_ALL,
2874                                    to_wait_timeout(args->timeout_ns));
2875
2876         if (args->timeout_ns > 0) {
2877                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2878                 if (args->timeout_ns < 0)
2879                         args->timeout_ns = 0;
2880
2881                 /*
2882                  * Apparently ktime isn't accurate enough and occasionally has a
2883                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
2884                  * things up to make the test happy. We allow up to 1 jiffy.
2885                  *
2886                  * This is a regression from the timespec->ktime conversion.
2887                  */
2888                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
2889                         args->timeout_ns = 0;
2890
2891                 /* Asked to wait beyond the jiffie/scheduler precision? */
2892                 if (ret == -ETIME && args->timeout_ns)
2893                         ret = -EAGAIN;
2894         }
2895
2896         i915_gem_object_put(obj);
2897         return ret;
2898 }
2899
2900 static int wait_for_engines(struct drm_i915_private *i915)
2901 {
2902         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
2903                 dev_err(i915->drm.dev,
2904                         "Failed to idle engines, declaring wedged!\n");
2905                 GEM_TRACE_DUMP();
2906                 i915_gem_set_wedged(i915);
2907                 return -EIO;
2908         }
2909
2910         return 0;
2911 }
2912
2913 static long
2914 wait_for_timelines(struct drm_i915_private *i915,
2915                    unsigned int flags, long timeout)
2916 {
2917         struct i915_gt_timelines *gt = &i915->gt.timelines;
2918         struct i915_timeline *tl;
2919
2920         mutex_lock(&gt->mutex);
2921         list_for_each_entry(tl, &gt->active_list, link) {
2922                 struct i915_request *rq;
2923
2924                 rq = i915_active_request_get_unlocked(&tl->last_request);
2925                 if (!rq)
2926                         continue;
2927
2928                 mutex_unlock(&gt->mutex);
2929
2930                 /*
2931                  * "Race-to-idle".
2932                  *
2933                  * Switching to the kernel context is often used a synchronous
2934                  * step prior to idling, e.g. in suspend for flushing all
2935                  * current operations to memory before sleeping. These we
2936                  * want to complete as quickly as possible to avoid prolonged
2937                  * stalls, so allow the gpu to boost to maximum clocks.
2938                  */
2939                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
2940                         gen6_rps_boost(rq);
2941
2942                 timeout = i915_request_wait(rq, flags, timeout);
2943                 i915_request_put(rq);
2944                 if (timeout < 0)
2945                         return timeout;
2946
2947                 /* restart after reacquiring the lock */
2948                 mutex_lock(&gt->mutex);
2949                 tl = list_entry(&gt->active_list, typeof(*tl), link);
2950         }
2951         mutex_unlock(&gt->mutex);
2952
2953         return timeout;
2954 }
2955
2956 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
2957                            unsigned int flags, long timeout)
2958 {
2959         GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
2960                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
2961                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
2962                   yesno(i915->gt.awake));
2963
2964         /* If the device is asleep, we have no requests outstanding */
2965         if (!READ_ONCE(i915->gt.awake))
2966                 return 0;
2967
2968         timeout = wait_for_timelines(i915, flags, timeout);
2969         if (timeout < 0)
2970                 return timeout;
2971
2972         if (flags & I915_WAIT_LOCKED) {
2973                 int err;
2974
2975                 lockdep_assert_held(&i915->drm.struct_mutex);
2976
2977                 err = wait_for_engines(i915);
2978                 if (err)
2979                         return err;
2980
2981                 i915_retire_requests(i915);
2982         }
2983
2984         return 0;
2985 }
2986
2987 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
2988 {
2989         /*
2990          * We manually flush the CPU domain so that we can override and
2991          * force the flush for the display, and perform it asyncrhonously.
2992          */
2993         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
2994         if (obj->cache_dirty)
2995                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
2996         obj->write_domain = 0;
2997 }
2998
2999 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3000 {
3001         if (!READ_ONCE(obj->pin_global))
3002                 return;
3003
3004         mutex_lock(&obj->base.dev->struct_mutex);
3005         __i915_gem_object_flush_for_display(obj);
3006         mutex_unlock(&obj->base.dev->struct_mutex);
3007 }
3008
3009 /**
3010  * Moves a single object to the WC read, and possibly write domain.
3011  * @obj: object to act on
3012  * @write: ask for write access or read only
3013  *
3014  * This function returns when the move is complete, including waiting on
3015  * flushes to occur.
3016  */
3017 int
3018 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3019 {
3020         int ret;
3021
3022         lockdep_assert_held(&obj->base.dev->struct_mutex);
3023
3024         ret = i915_gem_object_wait(obj,
3025                                    I915_WAIT_INTERRUPTIBLE |
3026                                    I915_WAIT_LOCKED |
3027                                    (write ? I915_WAIT_ALL : 0),
3028                                    MAX_SCHEDULE_TIMEOUT);
3029         if (ret)
3030                 return ret;
3031
3032         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3033                 return 0;
3034
3035         /* Flush and acquire obj->pages so that we are coherent through
3036          * direct access in memory with previous cached writes through
3037          * shmemfs and that our cache domain tracking remains valid.
3038          * For example, if the obj->filp was moved to swap without us
3039          * being notified and releasing the pages, we would mistakenly
3040          * continue to assume that the obj remained out of the CPU cached
3041          * domain.
3042          */
3043         ret = i915_gem_object_pin_pages(obj);
3044         if (ret)
3045                 return ret;
3046
3047         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3048
3049         /* Serialise direct access to this object with the barriers for
3050          * coherent writes from the GPU, by effectively invalidating the
3051          * WC domain upon first access.
3052          */
3053         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3054                 mb();
3055
3056         /* It should now be out of any other write domains, and we can update
3057          * the domain values for our changes.
3058          */
3059         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3060         obj->read_domains |= I915_GEM_DOMAIN_WC;
3061         if (write) {
3062                 obj->read_domains = I915_GEM_DOMAIN_WC;
3063                 obj->write_domain = I915_GEM_DOMAIN_WC;
3064                 obj->mm.dirty = true;
3065         }
3066
3067         i915_gem_object_unpin_pages(obj);
3068         return 0;
3069 }
3070
3071 /**
3072  * Moves a single object to the GTT read, and possibly write domain.
3073  * @obj: object to act on
3074  * @write: ask for write access or read only
3075  *
3076  * This function returns when the move is complete, including waiting on
3077  * flushes to occur.
3078  */
3079 int
3080 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3081 {
3082         int ret;
3083
3084         lockdep_assert_held(&obj->base.dev->struct_mutex);
3085
3086         ret = i915_gem_object_wait(obj,
3087                                    I915_WAIT_INTERRUPTIBLE |
3088                                    I915_WAIT_LOCKED |
3089                                    (write ? I915_WAIT_ALL : 0),
3090                                    MAX_SCHEDULE_TIMEOUT);
3091         if (ret)
3092                 return ret;
3093
3094         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3095                 return 0;
3096
3097         /* Flush and acquire obj->pages so that we are coherent through
3098          * direct access in memory with previous cached writes through
3099          * shmemfs and that our cache domain tracking remains valid.
3100          * For example, if the obj->filp was moved to swap without us
3101          * being notified and releasing the pages, we would mistakenly
3102          * continue to assume that the obj remained out of the CPU cached
3103          * domain.
3104          */
3105         ret = i915_gem_object_pin_pages(obj);
3106         if (ret)
3107                 return ret;
3108
3109         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3110
3111         /* Serialise direct access to this object with the barriers for
3112          * coherent writes from the GPU, by effectively invalidating the
3113          * GTT domain upon first access.
3114          */
3115         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3116                 mb();
3117
3118         /* It should now be out of any other write domains, and we can update
3119          * the domain values for our changes.
3120          */
3121         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3122         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3123         if (write) {
3124                 obj->read_domains = I915_GEM_DOMAIN_GTT;
3125                 obj->write_domain = I915_GEM_DOMAIN_GTT;
3126                 obj->mm.dirty = true;
3127         }
3128
3129         i915_gem_object_unpin_pages(obj);
3130         return 0;
3131 }
3132
3133 /**
3134  * Changes the cache-level of an object across all VMA.
3135  * @obj: object to act on
3136  * @cache_level: new cache level to set for the object
3137  *
3138  * After this function returns, the object will be in the new cache-level
3139  * across all GTT and the contents of the backing storage will be coherent,
3140  * with respect to the new cache-level. In order to keep the backing storage
3141  * coherent for all users, we only allow a single cache level to be set
3142  * globally on the object and prevent it from being changed whilst the
3143  * hardware is reading from the object. That is if the object is currently
3144  * on the scanout it will be set to uncached (or equivalent display
3145  * cache coherency) and all non-MOCS GPU access will also be uncached so
3146  * that all direct access to the scanout remains coherent.
3147  */
3148 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3149                                     enum i915_cache_level cache_level)
3150 {
3151         struct i915_vma *vma;
3152         int ret;
3153
3154         lockdep_assert_held(&obj->base.dev->struct_mutex);
3155
3156         if (obj->cache_level == cache_level)
3157                 return 0;
3158
3159         /* Inspect the list of currently bound VMA and unbind any that would
3160          * be invalid given the new cache-level. This is principally to
3161          * catch the issue of the CS prefetch crossing page boundaries and
3162          * reading an invalid PTE on older architectures.
3163          */
3164 restart:
3165         list_for_each_entry(vma, &obj->vma.list, obj_link) {
3166                 if (!drm_mm_node_allocated(&vma->node))
3167                         continue;
3168
3169                 if (i915_vma_is_pinned(vma)) {
3170                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3171                         return -EBUSY;
3172                 }
3173
3174                 if (!i915_vma_is_closed(vma) &&
3175                     i915_gem_valid_gtt_space(vma, cache_level))
3176                         continue;
3177
3178                 ret = i915_vma_unbind(vma);
3179                 if (ret)
3180                         return ret;
3181
3182                 /* As unbinding may affect other elements in the
3183                  * obj->vma_list (due to side-effects from retiring
3184                  * an active vma), play safe and restart the iterator.
3185                  */
3186                 goto restart;
3187         }
3188
3189         /* We can reuse the existing drm_mm nodes but need to change the
3190          * cache-level on the PTE. We could simply unbind them all and
3191          * rebind with the correct cache-level on next use. However since
3192          * we already have a valid slot, dma mapping, pages etc, we may as
3193          * rewrite the PTE in the belief that doing so tramples upon less
3194          * state and so involves less work.
3195          */
3196         if (obj->bind_count) {
3197                 /* Before we change the PTE, the GPU must not be accessing it.
3198                  * If we wait upon the object, we know that all the bound
3199                  * VMA are no longer active.
3200                  */
3201                 ret = i915_gem_object_wait(obj,
3202                                            I915_WAIT_INTERRUPTIBLE |
3203                                            I915_WAIT_LOCKED |
3204                                            I915_WAIT_ALL,
3205                                            MAX_SCHEDULE_TIMEOUT);
3206                 if (ret)
3207                         return ret;
3208
3209                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3210                     cache_level != I915_CACHE_NONE) {
3211                         /* Access to snoopable pages through the GTT is
3212                          * incoherent and on some machines causes a hard
3213                          * lockup. Relinquish the CPU mmaping to force
3214                          * userspace to refault in the pages and we can
3215                          * then double check if the GTT mapping is still
3216                          * valid for that pointer access.
3217                          */
3218                         i915_gem_release_mmap(obj);
3219
3220                         /* As we no longer need a fence for GTT access,
3221                          * we can relinquish it now (and so prevent having
3222                          * to steal a fence from someone else on the next
3223                          * fence request). Note GPU activity would have
3224                          * dropped the fence as all snoopable access is
3225                          * supposed to be linear.
3226                          */
3227                         for_each_ggtt_vma(vma, obj) {
3228                                 ret = i915_vma_put_fence(vma);
3229                                 if (ret)
3230                                         return ret;
3231                         }
3232                 } else {
3233                         /* We either have incoherent backing store and
3234                          * so no GTT access or the architecture is fully
3235                          * coherent. In such cases, existing GTT mmaps
3236                          * ignore the cache bit in the PTE and we can
3237                          * rewrite it without confusing the GPU or having
3238                          * to force userspace to fault back in its mmaps.
3239                          */
3240                 }
3241
3242                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3243                         if (!drm_mm_node_allocated(&vma->node))
3244                                 continue;
3245
3246                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3247                         if (ret)
3248                                 return ret;
3249                 }
3250         }
3251
3252         list_for_each_entry(vma, &obj->vma.list, obj_link)
3253                 vma->node.color = cache_level;
3254         i915_gem_object_set_cache_coherency(obj, cache_level);
3255         obj->cache_dirty = true; /* Always invalidate stale cachelines */
3256
3257         return 0;
3258 }
3259
3260 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3261                                struct drm_file *file)
3262 {
3263         struct drm_i915_gem_caching *args = data;
3264         struct drm_i915_gem_object *obj;
3265         int err = 0;
3266
3267         rcu_read_lock();
3268         obj = i915_gem_object_lookup_rcu(file, args->handle);
3269         if (!obj) {
3270                 err = -ENOENT;
3271                 goto out;
3272         }
3273
3274         switch (obj->cache_level) {
3275         case I915_CACHE_LLC:
3276         case I915_CACHE_L3_LLC:
3277                 args->caching = I915_CACHING_CACHED;
3278                 break;
3279
3280         case I915_CACHE_WT:
3281                 args->caching = I915_CACHING_DISPLAY;
3282                 break;
3283
3284         default:
3285                 args->caching = I915_CACHING_NONE;
3286                 break;
3287         }
3288 out:
3289         rcu_read_unlock();
3290         return err;
3291 }
3292
3293 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3294                                struct drm_file *file)
3295 {
3296         struct drm_i915_private *i915 = to_i915(dev);
3297         struct drm_i915_gem_caching *args = data;
3298         struct drm_i915_gem_object *obj;
3299         enum i915_cache_level level;
3300         int ret = 0;
3301
3302         switch (args->caching) {
3303         case I915_CACHING_NONE:
3304                 level = I915_CACHE_NONE;
3305                 break;
3306         case I915_CACHING_CACHED:
3307                 /*
3308                  * Due to a HW issue on BXT A stepping, GPU stores via a
3309                  * snooped mapping may leave stale data in a corresponding CPU
3310                  * cacheline, whereas normally such cachelines would get
3311                  * invalidated.
3312                  */
3313                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3314                         return -ENODEV;
3315
3316                 level = I915_CACHE_LLC;
3317                 break;
3318         case I915_CACHING_DISPLAY:
3319                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3320                 break;
3321         default:
3322                 return -EINVAL;
3323         }
3324
3325         obj = i915_gem_object_lookup(file, args->handle);
3326         if (!obj)
3327                 return -ENOENT;
3328
3329         /*
3330          * The caching mode of proxy object is handled by its generator, and
3331          * not allowed to be changed by userspace.
3332          */
3333         if (i915_gem_object_is_proxy(obj)) {
3334                 ret = -ENXIO;
3335                 goto out;
3336         }
3337
3338         if (obj->cache_level == level)
3339                 goto out;
3340
3341         ret = i915_gem_object_wait(obj,
3342                                    I915_WAIT_INTERRUPTIBLE,
3343                                    MAX_SCHEDULE_TIMEOUT);
3344         if (ret)
3345                 goto out;
3346
3347         ret = i915_mutex_lock_interruptible(dev);
3348         if (ret)
3349                 goto out;
3350
3351         ret = i915_gem_object_set_cache_level(obj, level);
3352         mutex_unlock(&dev->struct_mutex);
3353
3354 out:
3355         i915_gem_object_put(obj);
3356         return ret;
3357 }
3358
3359 /*
3360  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3361  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3362  * (for pageflips). We only flush the caches while preparing the buffer for
3363  * display, the callers are responsible for frontbuffer flush.
3364  */
3365 struct i915_vma *
3366 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3367                                      u32 alignment,
3368                                      const struct i915_ggtt_view *view,
3369                                      unsigned int flags)
3370 {
3371         struct i915_vma *vma;
3372         int ret;
3373
3374         lockdep_assert_held(&obj->base.dev->struct_mutex);
3375
3376         /* Mark the global pin early so that we account for the
3377          * display coherency whilst setting up the cache domains.
3378          */
3379         obj->pin_global++;
3380
3381         /* The display engine is not coherent with the LLC cache on gen6.  As
3382          * a result, we make sure that the pinning that is about to occur is
3383          * done with uncached PTEs. This is lowest common denominator for all
3384          * chipsets.
3385          *
3386          * However for gen6+, we could do better by using the GFDT bit instead
3387          * of uncaching, which would allow us to flush all the LLC-cached data
3388          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3389          */
3390         ret = i915_gem_object_set_cache_level(obj,
3391                                               HAS_WT(to_i915(obj->base.dev)) ?
3392                                               I915_CACHE_WT : I915_CACHE_NONE);
3393         if (ret) {
3394                 vma = ERR_PTR(ret);
3395                 goto err_unpin_global;
3396         }
3397
3398         /* As the user may map the buffer once pinned in the display plane
3399          * (e.g. libkms for the bootup splash), we have to ensure that we
3400          * always use map_and_fenceable for all scanout buffers. However,
3401          * it may simply be too big to fit into mappable, in which case
3402          * put it anyway and hope that userspace can cope (but always first
3403          * try to preserve the existing ABI).
3404          */
3405         vma = ERR_PTR(-ENOSPC);
3406         if ((flags & PIN_MAPPABLE) == 0 &&
3407             (!view || view->type == I915_GGTT_VIEW_NORMAL))
3408                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3409                                                flags |
3410                                                PIN_MAPPABLE |
3411                                                PIN_NONBLOCK);
3412         if (IS_ERR(vma))
3413                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3414         if (IS_ERR(vma))
3415                 goto err_unpin_global;
3416
3417         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3418
3419         __i915_gem_object_flush_for_display(obj);
3420
3421         /* It should now be out of any other write domains, and we can update
3422          * the domain values for our changes.
3423          */
3424         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3425
3426         return vma;
3427
3428 err_unpin_global:
3429         obj->pin_global--;
3430         return vma;
3431 }
3432
3433 void
3434 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3435 {
3436         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3437
3438         if (WARN_ON(vma->obj->pin_global == 0))
3439                 return;
3440
3441         if (--vma->obj->pin_global == 0)
3442                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3443
3444         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3445         i915_gem_object_bump_inactive_ggtt(vma->obj);
3446
3447         i915_vma_unpin(vma);
3448 }
3449
3450 /**
3451  * Moves a single object to the CPU read, and possibly write domain.
3452  * @obj: object to act on
3453  * @write: requesting write or read-only access
3454  *
3455  * This function returns when the move is complete, including waiting on
3456  * flushes to occur.
3457  */
3458 int
3459 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3460 {
3461         int ret;
3462
3463         lockdep_assert_held(&obj->base.dev->struct_mutex);
3464
3465         ret = i915_gem_object_wait(obj,
3466                                    I915_WAIT_INTERRUPTIBLE |
3467                                    I915_WAIT_LOCKED |
3468                                    (write ? I915_WAIT_ALL : 0),
3469                                    MAX_SCHEDULE_TIMEOUT);
3470         if (ret)
3471                 return ret;
3472
3473         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3474
3475         /* Flush the CPU cache if it's still invalid. */
3476         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3477                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3478                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3479         }
3480
3481         /* It should now be out of any other write domains, and we can update
3482          * the domain values for our changes.
3483          */
3484         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3485
3486         /* If we're writing through the CPU, then the GPU read domains will
3487          * need to be invalidated at next use.
3488          */
3489         if (write)
3490                 __start_cpu_write(obj);
3491
3492         return 0;
3493 }
3494
3495 /* Throttle our rendering by waiting until the ring has completed our requests
3496  * emitted over 20 msec ago.
3497  *
3498  * Note that if we were to use the current jiffies each time around the loop,
3499  * we wouldn't escape the function with any frames outstanding if the time to
3500  * render a frame was over 20ms.
3501  *
3502  * This should get us reasonable parallelism between CPU and GPU but also
3503  * relatively low latency when blocking on a particular request to finish.
3504  */
3505 static int
3506 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3507 {
3508         struct drm_i915_private *dev_priv = to_i915(dev);
3509         struct drm_i915_file_private *file_priv = file->driver_priv;
3510         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3511         struct i915_request *request, *target = NULL;
3512         long ret;
3513
3514         /* ABI: return -EIO if already wedged */
3515         ret = i915_terminally_wedged(dev_priv);
3516         if (ret)
3517                 return ret;
3518
3519         spin_lock(&file_priv->mm.lock);
3520         list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3521                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3522                         break;
3523
3524                 if (target) {
3525                         list_del(&target->client_link);
3526                         target->file_priv = NULL;
3527                 }
3528
3529                 target = request;
3530         }
3531         if (target)
3532                 i915_request_get(target);
3533         spin_unlock(&file_priv->mm.lock);
3534
3535         if (target == NULL)
3536                 return 0;
3537
3538         ret = i915_request_wait(target,
3539                                 I915_WAIT_INTERRUPTIBLE,
3540                                 MAX_SCHEDULE_TIMEOUT);
3541         i915_request_put(target);
3542
3543         return ret < 0 ? ret : 0;
3544 }
3545
3546 struct i915_vma *
3547 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3548                          const struct i915_ggtt_view *view,
3549                          u64 size,
3550                          u64 alignment,
3551                          u64 flags)
3552 {
3553         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3554         struct i915_address_space *vm = &dev_priv->ggtt.vm;
3555         struct i915_vma *vma;
3556         int ret;
3557
3558         lockdep_assert_held(&obj->base.dev->struct_mutex);
3559
3560         if (flags & PIN_MAPPABLE &&
3561             (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3562                 /* If the required space is larger than the available
3563                  * aperture, we will not able to find a slot for the
3564                  * object and unbinding the object now will be in
3565                  * vain. Worse, doing so may cause us to ping-pong
3566                  * the object in and out of the Global GTT and
3567                  * waste a lot of cycles under the mutex.
3568                  */
3569                 if (obj->base.size > dev_priv->ggtt.mappable_end)
3570                         return ERR_PTR(-E2BIG);
3571
3572                 /* If NONBLOCK is set the caller is optimistically
3573                  * trying to cache the full object within the mappable
3574                  * aperture, and *must* have a fallback in place for
3575                  * situations where we cannot bind the object. We
3576                  * can be a little more lax here and use the fallback
3577                  * more often to avoid costly migrations of ourselves
3578                  * and other objects within the aperture.
3579                  *
3580                  * Half-the-aperture is used as a simple heuristic.
3581                  * More interesting would to do search for a free
3582                  * block prior to making the commitment to unbind.
3583                  * That caters for the self-harm case, and with a
3584                  * little more heuristics (e.g. NOFAULT, NOEVICT)
3585                  * we could try to minimise harm to others.
3586                  */
3587                 if (flags & PIN_NONBLOCK &&
3588                     obj->base.size > dev_priv->ggtt.mappable_end / 2)
3589                         return ERR_PTR(-ENOSPC);
3590         }
3591
3592         vma = i915_vma_instance(obj, vm, view);
3593         if (IS_ERR(vma))
3594                 return vma;
3595
3596         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3597                 if (flags & PIN_NONBLOCK) {
3598                         if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3599                                 return ERR_PTR(-ENOSPC);
3600
3601                         if (flags & PIN_MAPPABLE &&
3602                             vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3603                                 return ERR_PTR(-ENOSPC);
3604                 }
3605
3606                 WARN(i915_vma_is_pinned(vma),
3607                      "bo is already pinned in ggtt with incorrect alignment:"
3608                      " offset=%08x, req.alignment=%llx,"
3609                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3610                      i915_ggtt_offset(vma), alignment,
3611                      !!(flags & PIN_MAPPABLE),
3612                      i915_vma_is_map_and_fenceable(vma));
3613                 ret = i915_vma_unbind(vma);
3614                 if (ret)
3615                         return ERR_PTR(ret);
3616         }
3617
3618         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3619         if (ret)
3620                 return ERR_PTR(ret);
3621
3622         return vma;
3623 }
3624
3625 static __always_inline u32 __busy_read_flag(u8 id)
3626 {
3627         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3628                 return 0xffff0000u;
3629
3630         GEM_BUG_ON(id >= 16);
3631         return 0x10000u << id;
3632 }
3633
3634 static __always_inline u32 __busy_write_id(u8 id)
3635 {
3636         /*
3637          * The uABI guarantees an active writer is also amongst the read
3638          * engines. This would be true if we accessed the activity tracking
3639          * under the lock, but as we perform the lookup of the object and
3640          * its activity locklessly we can not guarantee that the last_write
3641          * being active implies that we have set the same engine flag from
3642          * last_read - hence we always set both read and write busy for
3643          * last_write.
3644          */
3645         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3646                 return 0xffffffffu;
3647
3648         return (id + 1) | __busy_read_flag(id);
3649 }
3650
3651 static __always_inline unsigned int
3652 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
3653 {
3654         const struct i915_request *rq;
3655
3656         /*
3657          * We have to check the current hw status of the fence as the uABI
3658          * guarantees forward progress. We could rely on the idle worker
3659          * to eventually flush us, but to minimise latency just ask the
3660          * hardware.
3661          *
3662          * Note we only report on the status of native fences.
3663          */
3664         if (!dma_fence_is_i915(fence))
3665                 return 0;
3666
3667         /* opencode to_request() in order to avoid const warnings */
3668         rq = container_of(fence, const struct i915_request, fence);
3669         if (i915_request_completed(rq))
3670                 return 0;
3671
3672         /* Beware type-expansion follies! */
3673         BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
3674         return flag(rq->engine->uabi_class);
3675 }
3676
3677 static __always_inline unsigned int
3678 busy_check_reader(const struct dma_fence *fence)
3679 {
3680         return __busy_set_if_active(fence, __busy_read_flag);
3681 }
3682
3683 static __always_inline unsigned int
3684 busy_check_writer(const struct dma_fence *fence)
3685 {
3686         if (!fence)
3687                 return 0;
3688
3689         return __busy_set_if_active(fence, __busy_write_id);
3690 }
3691
3692 int
3693 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3694                     struct drm_file *file)
3695 {
3696         struct drm_i915_gem_busy *args = data;
3697         struct drm_i915_gem_object *obj;
3698         struct reservation_object_list *list;
3699         unsigned int seq;
3700         int err;
3701
3702         err = -ENOENT;
3703         rcu_read_lock();
3704         obj = i915_gem_object_lookup_rcu(file, args->handle);
3705         if (!obj)
3706                 goto out;
3707
3708         /*
3709          * A discrepancy here is that we do not report the status of
3710          * non-i915 fences, i.e. even though we may report the object as idle,
3711          * a call to set-domain may still stall waiting for foreign rendering.
3712          * This also means that wait-ioctl may report an object as busy,
3713          * where busy-ioctl considers it idle.
3714          *
3715          * We trade the ability to warn of foreign fences to report on which
3716          * i915 engines are active for the object.
3717          *
3718          * Alternatively, we can trade that extra information on read/write
3719          * activity with
3720          *      args->busy =
3721          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3722          * to report the overall busyness. This is what the wait-ioctl does.
3723          *
3724          */
3725 retry:
3726         seq = raw_read_seqcount(&obj->resv->seq);
3727
3728         /* Translate the exclusive fence to the READ *and* WRITE engine */
3729         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3730
3731         /* Translate shared fences to READ set of engines */
3732         list = rcu_dereference(obj->resv->fence);
3733         if (list) {
3734                 unsigned int shared_count = list->shared_count, i;
3735
3736                 for (i = 0; i < shared_count; ++i) {
3737                         struct dma_fence *fence =
3738                                 rcu_dereference(list->shared[i]);
3739
3740                         args->busy |= busy_check_reader(fence);
3741                 }
3742         }
3743
3744         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3745                 goto retry;
3746
3747         err = 0;
3748 out:
3749         rcu_read_unlock();
3750         return err;
3751 }
3752
3753 int
3754 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3755                         struct drm_file *file_priv)
3756 {
3757         return i915_gem_ring_throttle(dev, file_priv);
3758 }
3759
3760 int
3761 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3762                        struct drm_file *file_priv)
3763 {
3764         struct drm_i915_private *dev_priv = to_i915(dev);
3765         struct drm_i915_gem_madvise *args = data;
3766         struct drm_i915_gem_object *obj;
3767         int err;
3768
3769         switch (args->madv) {
3770         case I915_MADV_DONTNEED:
3771         case I915_MADV_WILLNEED:
3772             break;
3773         default:
3774             return -EINVAL;
3775         }
3776
3777         obj = i915_gem_object_lookup(file_priv, args->handle);
3778         if (!obj)
3779                 return -ENOENT;
3780
3781         err = mutex_lock_interruptible(&obj->mm.lock);
3782         if (err)
3783                 goto out;
3784
3785         if (i915_gem_object_has_pages(obj) &&
3786             i915_gem_object_is_tiled(obj) &&
3787             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3788                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3789                         GEM_BUG_ON(!obj->mm.quirked);
3790                         __i915_gem_object_unpin_pages(obj);
3791                         obj->mm.quirked = false;
3792                 }
3793                 if (args->madv == I915_MADV_WILLNEED) {
3794                         GEM_BUG_ON(obj->mm.quirked);
3795                         __i915_gem_object_pin_pages(obj);
3796                         obj->mm.quirked = true;
3797                 }
3798         }
3799
3800         if (obj->mm.madv != __I915_MADV_PURGED)
3801                 obj->mm.madv = args->madv;
3802
3803         /* if the object is no longer attached, discard its backing storage */
3804         if (obj->mm.madv == I915_MADV_DONTNEED &&
3805             !i915_gem_object_has_pages(obj))
3806                 __i915_gem_object_truncate(obj);
3807
3808         args->retained = obj->mm.madv != __I915_MADV_PURGED;
3809         mutex_unlock(&obj->mm.lock);
3810
3811 out:
3812         i915_gem_object_put(obj);
3813         return err;
3814 }
3815
3816 static void
3817 frontbuffer_retire(struct i915_active_request *active,
3818                    struct i915_request *request)
3819 {
3820         struct drm_i915_gem_object *obj =
3821                 container_of(active, typeof(*obj), frontbuffer_write);
3822
3823         intel_fb_obj_flush(obj, ORIGIN_CS);
3824 }
3825
3826 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3827                           const struct drm_i915_gem_object_ops *ops)
3828 {
3829         mutex_init(&obj->mm.lock);
3830
3831         spin_lock_init(&obj->vma.lock);
3832         INIT_LIST_HEAD(&obj->vma.list);
3833
3834         INIT_LIST_HEAD(&obj->lut_list);
3835         INIT_LIST_HEAD(&obj->batch_pool_link);
3836
3837         init_rcu_head(&obj->rcu);
3838
3839         obj->ops = ops;
3840
3841         reservation_object_init(&obj->__builtin_resv);
3842         obj->resv = &obj->__builtin_resv;
3843
3844         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3845         i915_active_request_init(&obj->frontbuffer_write,
3846                                  NULL, frontbuffer_retire);
3847
3848         obj->mm.madv = I915_MADV_WILLNEED;
3849         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
3850         mutex_init(&obj->mm.get_page.lock);
3851
3852         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3853 }
3854
3855 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3856         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
3857                  I915_GEM_OBJECT_IS_SHRINKABLE,
3858
3859         .get_pages = i915_gem_object_get_pages_gtt,
3860         .put_pages = i915_gem_object_put_pages_gtt,
3861
3862         .pwrite = i915_gem_object_pwrite_gtt,
3863 };
3864
3865 static int i915_gem_object_create_shmem(struct drm_device *dev,
3866                                         struct drm_gem_object *obj,
3867                                         size_t size)
3868 {
3869         struct drm_i915_private *i915 = to_i915(dev);
3870         unsigned long flags = VM_NORESERVE;
3871         struct file *filp;
3872
3873         drm_gem_private_object_init(dev, obj, size);
3874
3875         if (i915->mm.gemfs)
3876                 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
3877                                                  flags);
3878         else
3879                 filp = shmem_file_setup("i915", size, flags);
3880
3881         if (IS_ERR(filp))
3882                 return PTR_ERR(filp);
3883
3884         obj->filp = filp;
3885
3886         return 0;
3887 }
3888
3889 struct drm_i915_gem_object *
3890 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
3891 {
3892         struct drm_i915_gem_object *obj;
3893         struct address_space *mapping;
3894         unsigned int cache_level;
3895         gfp_t mask;
3896         int ret;
3897
3898         /* There is a prevalence of the assumption that we fit the object's
3899          * page count inside a 32bit _signed_ variable. Let's document this and
3900          * catch if we ever need to fix it. In the meantime, if you do spot
3901          * such a local variable, please consider fixing!
3902          */
3903         if (size >> PAGE_SHIFT > INT_MAX)
3904                 return ERR_PTR(-E2BIG);
3905
3906         if (overflows_type(size, obj->base.size))
3907                 return ERR_PTR(-E2BIG);
3908
3909         obj = i915_gem_object_alloc();
3910         if (obj == NULL)
3911                 return ERR_PTR(-ENOMEM);
3912
3913         ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
3914         if (ret)
3915                 goto fail;
3916
3917         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3918         if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
3919                 /* 965gm cannot relocate objects above 4GiB. */
3920                 mask &= ~__GFP_HIGHMEM;
3921                 mask |= __GFP_DMA32;
3922         }
3923
3924         mapping = obj->base.filp->f_mapping;
3925         mapping_set_gfp_mask(mapping, mask);
3926         GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
3927
3928         i915_gem_object_init(obj, &i915_gem_object_ops);
3929
3930         obj->write_domain = I915_GEM_DOMAIN_CPU;
3931         obj->read_domains = I915_GEM_DOMAIN_CPU;
3932
3933         if (HAS_LLC(dev_priv))
3934                 /* On some devices, we can have the GPU use the LLC (the CPU
3935                  * cache) for about a 10% performance improvement
3936                  * compared to uncached.  Graphics requests other than
3937                  * display scanout are coherent with the CPU in
3938                  * accessing this cache.  This means in this mode we
3939                  * don't need to clflush on the CPU side, and on the
3940                  * GPU side we only need to flush internal caches to
3941                  * get data visible to the CPU.
3942                  *
3943                  * However, we maintain the display planes as UC, and so
3944                  * need to rebind when first used as such.
3945                  */
3946                 cache_level = I915_CACHE_LLC;
3947         else
3948                 cache_level = I915_CACHE_NONE;
3949
3950         i915_gem_object_set_cache_coherency(obj, cache_level);
3951
3952         trace_i915_gem_object_create(obj);
3953
3954         return obj;
3955
3956 fail:
3957         i915_gem_object_free(obj);
3958         return ERR_PTR(ret);
3959 }
3960
3961 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3962 {
3963         /* If we are the last user of the backing storage (be it shmemfs
3964          * pages or stolen etc), we know that the pages are going to be
3965          * immediately released. In this case, we can then skip copying
3966          * back the contents from the GPU.
3967          */
3968
3969         if (obj->mm.madv != I915_MADV_WILLNEED)
3970                 return false;
3971
3972         if (obj->base.filp == NULL)
3973                 return true;
3974
3975         /* At first glance, this looks racy, but then again so would be
3976          * userspace racing mmap against close. However, the first external
3977          * reference to the filp can only be obtained through the
3978          * i915_gem_mmap_ioctl() which safeguards us against the user
3979          * acquiring such a reference whilst we are in the middle of
3980          * freeing the object.
3981          */
3982         return atomic_long_read(&obj->base.filp->f_count) == 1;
3983 }
3984
3985 static void __i915_gem_free_objects(struct drm_i915_private *i915,
3986                                     struct llist_node *freed)
3987 {
3988         struct drm_i915_gem_object *obj, *on;
3989         intel_wakeref_t wakeref;
3990
3991         wakeref = intel_runtime_pm_get(i915);
3992         llist_for_each_entry_safe(obj, on, freed, freed) {
3993                 struct i915_vma *vma, *vn;
3994
3995                 trace_i915_gem_object_destroy(obj);
3996
3997                 mutex_lock(&i915->drm.struct_mutex);
3998
3999                 GEM_BUG_ON(i915_gem_object_is_active(obj));
4000                 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
4001                         GEM_BUG_ON(i915_vma_is_active(vma));
4002                         vma->flags &= ~I915_VMA_PIN_MASK;
4003                         i915_vma_destroy(vma);
4004                 }
4005                 GEM_BUG_ON(!list_empty(&obj->vma.list));
4006                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4007
4008                 /* This serializes freeing with the shrinker. Since the free
4009                  * is delayed, first by RCU then by the workqueue, we want the
4010                  * shrinker to be able to free pages of unreferenced objects,
4011                  * or else we may oom whilst there are plenty of deferred
4012                  * freed objects.
4013                  */
4014                 if (i915_gem_object_has_pages(obj)) {
4015                         spin_lock(&i915->mm.obj_lock);
4016                         list_del_init(&obj->mm.link);
4017                         spin_unlock(&i915->mm.obj_lock);
4018                 }
4019
4020                 mutex_unlock(&i915->drm.struct_mutex);
4021
4022                 GEM_BUG_ON(obj->bind_count);
4023                 GEM_BUG_ON(obj->userfault_count);
4024                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4025                 GEM_BUG_ON(!list_empty(&obj->lut_list));
4026
4027                 if (obj->ops->release)
4028                         obj->ops->release(obj);
4029
4030                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4031                         atomic_set(&obj->mm.pages_pin_count, 0);
4032                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4033                 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4034
4035                 if (obj->base.import_attach)
4036                         drm_prime_gem_destroy(&obj->base, NULL);
4037
4038                 reservation_object_fini(&obj->__builtin_resv);
4039                 drm_gem_object_release(&obj->base);
4040                 i915_gem_info_remove_obj(i915, obj->base.size);
4041
4042                 bitmap_free(obj->bit_17);
4043                 i915_gem_object_free(obj);
4044
4045                 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4046                 atomic_dec(&i915->mm.free_count);
4047
4048                 if (on)
4049                         cond_resched();
4050         }
4051         intel_runtime_pm_put(i915, wakeref);
4052 }
4053
4054 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4055 {
4056         struct llist_node *freed;
4057
4058         /* Free the oldest, most stale object to keep the free_list short */
4059         freed = NULL;
4060         if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4061                 /* Only one consumer of llist_del_first() allowed */
4062                 spin_lock(&i915->mm.free_lock);
4063                 freed = llist_del_first(&i915->mm.free_list);
4064                 spin_unlock(&i915->mm.free_lock);
4065         }
4066         if (unlikely(freed)) {
4067                 freed->next = NULL;
4068                 __i915_gem_free_objects(i915, freed);
4069         }
4070 }
4071
4072 static void __i915_gem_free_work(struct work_struct *work)
4073 {
4074         struct drm_i915_private *i915 =
4075                 container_of(work, struct drm_i915_private, mm.free_work);
4076         struct llist_node *freed;
4077
4078         /*
4079          * All file-owned VMA should have been released by this point through
4080          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4081          * However, the object may also be bound into the global GTT (e.g.
4082          * older GPUs without per-process support, or for direct access through
4083          * the GTT either for the user or for scanout). Those VMA still need to
4084          * unbound now.
4085          */
4086
4087         spin_lock(&i915->mm.free_lock);
4088         while ((freed = llist_del_all(&i915->mm.free_list))) {
4089                 spin_unlock(&i915->mm.free_lock);
4090
4091                 __i915_gem_free_objects(i915, freed);
4092                 if (need_resched())
4093                         return;
4094
4095                 spin_lock(&i915->mm.free_lock);
4096         }
4097         spin_unlock(&i915->mm.free_lock);
4098 }
4099
4100 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4101 {
4102         struct drm_i915_gem_object *obj =
4103                 container_of(head, typeof(*obj), rcu);
4104         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4105
4106         /*
4107          * We reuse obj->rcu for the freed list, so we had better not treat
4108          * it like a rcu_head from this point forwards. And we expect all
4109          * objects to be freed via this path.
4110          */
4111         destroy_rcu_head(&obj->rcu);
4112
4113         /*
4114          * Since we require blocking on struct_mutex to unbind the freed
4115          * object from the GPU before releasing resources back to the
4116          * system, we can not do that directly from the RCU callback (which may
4117          * be a softirq context), but must instead then defer that work onto a
4118          * kthread. We use the RCU callback rather than move the freed object
4119          * directly onto the work queue so that we can mix between using the
4120          * worker and performing frees directly from subsequent allocations for
4121          * crude but effective memory throttling.
4122          */
4123         if (llist_add(&obj->freed, &i915->mm.free_list))
4124                 queue_work(i915->wq, &i915->mm.free_work);
4125 }
4126
4127 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4128 {
4129         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4130
4131         if (obj->mm.quirked)
4132                 __i915_gem_object_unpin_pages(obj);
4133
4134         if (discard_backing_storage(obj))
4135                 obj->mm.madv = I915_MADV_DONTNEED;
4136
4137         /*
4138          * Before we free the object, make sure any pure RCU-only
4139          * read-side critical sections are complete, e.g.
4140          * i915_gem_busy_ioctl(). For the corresponding synchronized
4141          * lookup see i915_gem_object_lookup_rcu().
4142          */
4143         atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4144         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4145 }
4146
4147 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4148 {
4149         lockdep_assert_held(&obj->base.dev->struct_mutex);
4150
4151         if (!i915_gem_object_has_active_reference(obj) &&
4152             i915_gem_object_is_active(obj))
4153                 i915_gem_object_set_active_reference(obj);
4154         else
4155                 i915_gem_object_put(obj);
4156 }
4157
4158 void i915_gem_sanitize(struct drm_i915_private *i915)
4159 {
4160         intel_wakeref_t wakeref;
4161
4162         GEM_TRACE("\n");
4163
4164         wakeref = intel_runtime_pm_get(i915);
4165         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4166
4167         /*
4168          * As we have just resumed the machine and woken the device up from
4169          * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4170          * back to defaults, recovering from whatever wedged state we left it
4171          * in and so worth trying to use the device once more.
4172          */
4173         if (i915_terminally_wedged(i915))
4174                 i915_gem_unset_wedged(i915);
4175
4176         /*
4177          * If we inherit context state from the BIOS or earlier occupants
4178          * of the GPU, the GPU may be in an inconsistent state when we
4179          * try to take over. The only way to remove the earlier state
4180          * is by resetting. However, resetting on earlier gen is tricky as
4181          * it may impact the display and we are uncertain about the stability
4182          * of the reset, so this could be applied to even earlier gen.
4183          */
4184         intel_gt_sanitize(i915, false);
4185
4186         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4187         intel_runtime_pm_put(i915, wakeref);
4188
4189         mutex_lock(&i915->drm.struct_mutex);
4190         i915_gem_contexts_lost(i915);
4191         mutex_unlock(&i915->drm.struct_mutex);
4192 }
4193
4194 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4195 {
4196         if (INTEL_GEN(dev_priv) < 5 ||
4197             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4198                 return;
4199
4200         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4201                                  DISP_TILE_SURFACE_SWIZZLING);
4202
4203         if (IS_GEN(dev_priv, 5))
4204                 return;
4205
4206         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4207         if (IS_GEN(dev_priv, 6))
4208                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4209         else if (IS_GEN(dev_priv, 7))
4210                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4211         else if (IS_GEN(dev_priv, 8))
4212                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4213         else
4214                 BUG();
4215 }
4216
4217 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4218 {
4219         I915_WRITE(RING_CTL(base), 0);
4220         I915_WRITE(RING_HEAD(base), 0);
4221         I915_WRITE(RING_TAIL(base), 0);
4222         I915_WRITE(RING_START(base), 0);
4223 }
4224
4225 static void init_unused_rings(struct drm_i915_private *dev_priv)
4226 {
4227         if (IS_I830(dev_priv)) {
4228                 init_unused_ring(dev_priv, PRB1_BASE);
4229                 init_unused_ring(dev_priv, SRB0_BASE);
4230                 init_unused_ring(dev_priv, SRB1_BASE);
4231                 init_unused_ring(dev_priv, SRB2_BASE);
4232                 init_unused_ring(dev_priv, SRB3_BASE);
4233         } else if (IS_GEN(dev_priv, 2)) {
4234                 init_unused_ring(dev_priv, SRB0_BASE);
4235                 init_unused_ring(dev_priv, SRB1_BASE);
4236         } else if (IS_GEN(dev_priv, 3)) {
4237                 init_unused_ring(dev_priv, PRB1_BASE);
4238                 init_unused_ring(dev_priv, PRB2_BASE);
4239         }
4240 }
4241
4242 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4243 {
4244         int ret;
4245
4246         dev_priv->gt.last_init_time = ktime_get();
4247
4248         /* Double layer security blanket, see i915_gem_init() */
4249         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4250
4251         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4252                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4253
4254         if (IS_HASWELL(dev_priv))
4255                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4256                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4257
4258         /* Apply the GT workarounds... */
4259         intel_gt_apply_workarounds(dev_priv);
4260         /* ...and determine whether they are sticking. */
4261         intel_gt_verify_workarounds(dev_priv, "init");
4262
4263         i915_gem_init_swizzling(dev_priv);
4264
4265         /*
4266          * At least 830 can leave some of the unused rings
4267          * "active" (ie. head != tail) after resume which
4268          * will prevent c3 entry. Makes sure all unused rings
4269          * are totally idle.
4270          */
4271         init_unused_rings(dev_priv);
4272
4273         BUG_ON(!dev_priv->kernel_context);
4274         ret = i915_terminally_wedged(dev_priv);
4275         if (ret)
4276                 goto out;
4277
4278         ret = i915_ppgtt_init_hw(dev_priv);
4279         if (ret) {
4280                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4281                 goto out;
4282         }
4283
4284         ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4285         if (ret) {
4286                 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4287                 goto out;
4288         }
4289
4290         /* We can't enable contexts until all firmware is loaded */
4291         ret = intel_uc_init_hw(dev_priv);
4292         if (ret) {
4293                 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4294                 goto out;
4295         }
4296
4297         intel_mocs_init_l3cc_table(dev_priv);
4298
4299         /* Only when the HW is re-initialised, can we replay the requests */
4300         ret = intel_engines_resume(dev_priv);
4301         if (ret)
4302                 goto cleanup_uc;
4303
4304         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4305
4306         intel_engines_set_scheduler_caps(dev_priv);
4307         return 0;
4308
4309 cleanup_uc:
4310         intel_uc_fini_hw(dev_priv);
4311 out:
4312         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4313
4314         return ret;
4315 }
4316
4317 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4318 {
4319         struct intel_engine_cs *engine;
4320         struct i915_gem_context *ctx;
4321         struct i915_gem_engines *e;
4322         enum intel_engine_id id;
4323         int err = 0;
4324
4325         /*
4326          * As we reset the gpu during very early sanitisation, the current
4327          * register state on the GPU should reflect its defaults values.
4328          * We load a context onto the hw (with restore-inhibit), then switch
4329          * over to a second context to save that default register state. We
4330          * can then prime every new context with that state so they all start
4331          * from the same default HW values.
4332          */
4333
4334         ctx = i915_gem_context_create_kernel(i915, 0);
4335         if (IS_ERR(ctx))
4336                 return PTR_ERR(ctx);
4337
4338         e = i915_gem_context_lock_engines(ctx);
4339
4340         for_each_engine(engine, i915, id) {
4341                 struct intel_context *ce = e->engines[id];
4342                 struct i915_request *rq;
4343
4344                 rq = intel_context_create_request(ce);
4345                 if (IS_ERR(rq)) {
4346                         err = PTR_ERR(rq);
4347                         goto err_active;
4348                 }
4349
4350                 err = 0;
4351                 if (rq->engine->init_context)
4352                         err = rq->engine->init_context(rq);
4353
4354                 i915_request_add(rq);
4355                 if (err)
4356                         goto err_active;
4357         }
4358
4359         /* Flush the default context image to memory, and enable powersaving. */
4360         if (!i915_gem_load_power_context(i915)) {
4361                 err = -EIO;
4362                 goto err_active;
4363         }
4364
4365         for_each_engine(engine, i915, id) {
4366                 struct intel_context *ce = e->engines[id];
4367                 struct i915_vma *state = ce->state;
4368                 void *vaddr;
4369
4370                 if (!state)
4371                         continue;
4372
4373                 GEM_BUG_ON(intel_context_is_pinned(ce));
4374
4375                 /*
4376                  * As we will hold a reference to the logical state, it will
4377                  * not be torn down with the context, and importantly the
4378                  * object will hold onto its vma (making it possible for a
4379                  * stray GTT write to corrupt our defaults). Unmap the vma
4380                  * from the GTT to prevent such accidents and reclaim the
4381                  * space.
4382                  */
4383                 err = i915_vma_unbind(state);
4384                 if (err)
4385                         goto err_active;
4386
4387                 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4388                 if (err)
4389                         goto err_active;
4390
4391                 engine->default_state = i915_gem_object_get(state->obj);
4392                 i915_gem_object_set_cache_coherency(engine->default_state,
4393                                                     I915_CACHE_LLC);
4394
4395                 /* Check we can acquire the image of the context state */
4396                 vaddr = i915_gem_object_pin_map(engine->default_state,
4397                                                 I915_MAP_FORCE_WB);
4398                 if (IS_ERR(vaddr)) {
4399                         err = PTR_ERR(vaddr);
4400                         goto err_active;
4401                 }
4402
4403                 i915_gem_object_unpin_map(engine->default_state);
4404         }
4405
4406         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4407                 unsigned int found = intel_engines_has_context_isolation(i915);
4408
4409                 /*
4410                  * Make sure that classes with multiple engine instances all
4411                  * share the same basic configuration.
4412                  */
4413                 for_each_engine(engine, i915, id) {
4414                         unsigned int bit = BIT(engine->uabi_class);
4415                         unsigned int expected = engine->default_state ? bit : 0;
4416
4417                         if ((found & bit) != expected) {
4418                                 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4419                                           engine->uabi_class, engine->name);
4420                         }
4421                 }
4422         }
4423
4424 out_ctx:
4425         i915_gem_context_unlock_engines(ctx);
4426         i915_gem_context_set_closed(ctx);
4427         i915_gem_context_put(ctx);
4428         return err;
4429
4430 err_active:
4431         /*
4432          * If we have to abandon now, we expect the engines to be idle
4433          * and ready to be torn-down. The quickest way we can accomplish
4434          * this is by declaring ourselves wedged.
4435          */
4436         i915_gem_set_wedged(i915);
4437         goto out_ctx;
4438 }
4439
4440 static int
4441 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4442 {
4443         struct drm_i915_gem_object *obj;
4444         struct i915_vma *vma;
4445         int ret;
4446
4447         obj = i915_gem_object_create_stolen(i915, size);
4448         if (!obj)
4449                 obj = i915_gem_object_create_internal(i915, size);
4450         if (IS_ERR(obj)) {
4451                 DRM_ERROR("Failed to allocate scratch page\n");
4452                 return PTR_ERR(obj);
4453         }
4454
4455         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4456         if (IS_ERR(vma)) {
4457                 ret = PTR_ERR(vma);
4458                 goto err_unref;
4459         }
4460
4461         ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4462         if (ret)
4463                 goto err_unref;
4464
4465         i915->gt.scratch = vma;
4466         return 0;
4467
4468 err_unref:
4469         i915_gem_object_put(obj);
4470         return ret;
4471 }
4472
4473 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4474 {
4475         i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4476 }
4477
4478 static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
4479 {
4480         struct intel_engine_cs *engine;
4481         enum intel_engine_id id;
4482         int err = 0;
4483
4484         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4485                 return 0;
4486
4487         for_each_engine(engine, i915, id) {
4488                 if (intel_engine_verify_workarounds(engine, "load"))
4489                         err = -EIO;
4490         }
4491
4492         return err;
4493 }
4494
4495 int i915_gem_init(struct drm_i915_private *dev_priv)
4496 {
4497         int ret;
4498
4499         /* We need to fallback to 4K pages if host doesn't support huge gtt. */
4500         if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4501                 mkwrite_device_info(dev_priv)->page_sizes =
4502                         I915_GTT_PAGE_SIZE_4K;
4503
4504         dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4505
4506         i915_timelines_init(dev_priv);
4507
4508         ret = i915_gem_init_userptr(dev_priv);
4509         if (ret)
4510                 return ret;
4511
4512         ret = intel_uc_init_misc(dev_priv);
4513         if (ret)
4514                 return ret;
4515
4516         ret = intel_wopcm_init(&dev_priv->wopcm);
4517         if (ret)
4518                 goto err_uc_misc;
4519
4520         /* This is just a security blanket to placate dragons.
4521          * On some systems, we very sporadically observe that the first TLBs
4522          * used by the CS may be stale, despite us poking the TLB reset. If
4523          * we hold the forcewake during initialisation these problems
4524          * just magically go away.
4525          */
4526         mutex_lock(&dev_priv->drm.struct_mutex);
4527         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4528
4529         ret = i915_gem_init_ggtt(dev_priv);
4530         if (ret) {
4531                 GEM_BUG_ON(ret == -EIO);
4532                 goto err_unlock;
4533         }
4534
4535         ret = i915_gem_init_scratch(dev_priv,
4536                                     IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4537         if (ret) {
4538                 GEM_BUG_ON(ret == -EIO);
4539                 goto err_ggtt;
4540         }
4541
4542         ret = intel_engines_setup(dev_priv);
4543         if (ret) {
4544                 GEM_BUG_ON(ret == -EIO);
4545                 goto err_unlock;
4546         }
4547
4548         ret = i915_gem_contexts_init(dev_priv);
4549         if (ret) {
4550                 GEM_BUG_ON(ret == -EIO);
4551                 goto err_scratch;
4552         }
4553
4554         ret = intel_engines_init(dev_priv);
4555         if (ret) {
4556                 GEM_BUG_ON(ret == -EIO);
4557                 goto err_context;
4558         }
4559
4560         intel_init_gt_powersave(dev_priv);
4561
4562         ret = intel_uc_init(dev_priv);
4563         if (ret)
4564                 goto err_pm;
4565
4566         ret = i915_gem_init_hw(dev_priv);
4567         if (ret)
4568                 goto err_uc_init;
4569
4570         /*
4571          * Despite its name intel_init_clock_gating applies both display
4572          * clock gating workarounds; GT mmio workarounds and the occasional
4573          * GT power context workaround. Worse, sometimes it includes a context
4574          * register workaround which we need to apply before we record the
4575          * default HW state for all contexts.
4576          *
4577          * FIXME: break up the workarounds and apply them at the right time!
4578          */
4579         intel_init_clock_gating(dev_priv);
4580
4581         ret = intel_engines_verify_workarounds(dev_priv);
4582         if (ret)
4583                 goto err_init_hw;
4584
4585         ret = __intel_engines_record_defaults(dev_priv);
4586         if (ret)
4587                 goto err_init_hw;
4588
4589         if (i915_inject_load_failure()) {
4590                 ret = -ENODEV;
4591                 goto err_init_hw;
4592         }
4593
4594         if (i915_inject_load_failure()) {
4595                 ret = -EIO;
4596                 goto err_init_hw;
4597         }
4598
4599         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4600         mutex_unlock(&dev_priv->drm.struct_mutex);
4601
4602         return 0;
4603
4604         /*
4605          * Unwinding is complicated by that we want to handle -EIO to mean
4606          * disable GPU submission but keep KMS alive. We want to mark the
4607          * HW as irrevisibly wedged, but keep enough state around that the
4608          * driver doesn't explode during runtime.
4609          */
4610 err_init_hw:
4611         mutex_unlock(&dev_priv->drm.struct_mutex);
4612
4613         i915_gem_set_wedged(dev_priv);
4614         i915_gem_suspend(dev_priv);
4615         i915_gem_suspend_late(dev_priv);
4616
4617         i915_gem_drain_workqueue(dev_priv);
4618
4619         mutex_lock(&dev_priv->drm.struct_mutex);
4620         intel_uc_fini_hw(dev_priv);
4621 err_uc_init:
4622         intel_uc_fini(dev_priv);
4623 err_pm:
4624         if (ret != -EIO) {
4625                 intel_cleanup_gt_powersave(dev_priv);
4626                 intel_engines_cleanup(dev_priv);
4627         }
4628 err_context:
4629         if (ret != -EIO)
4630                 i915_gem_contexts_fini(dev_priv);
4631 err_scratch:
4632         i915_gem_fini_scratch(dev_priv);
4633 err_ggtt:
4634 err_unlock:
4635         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4636         mutex_unlock(&dev_priv->drm.struct_mutex);
4637
4638 err_uc_misc:
4639         intel_uc_fini_misc(dev_priv);
4640
4641         if (ret != -EIO) {
4642                 i915_gem_cleanup_userptr(dev_priv);
4643                 i915_timelines_fini(dev_priv);
4644         }
4645
4646         if (ret == -EIO) {
4647                 mutex_lock(&dev_priv->drm.struct_mutex);
4648
4649                 /*
4650                  * Allow engine initialisation to fail by marking the GPU as
4651                  * wedged. But we only want to do this where the GPU is angry,
4652                  * for all other failure, such as an allocation failure, bail.
4653                  */
4654                 if (!i915_reset_failed(dev_priv)) {
4655                         i915_load_error(dev_priv,
4656                                         "Failed to initialize GPU, declaring it wedged!\n");
4657                         i915_gem_set_wedged(dev_priv);
4658                 }
4659
4660                 /* Minimal basic recovery for KMS */
4661                 ret = i915_ggtt_enable_hw(dev_priv);
4662                 i915_gem_restore_gtt_mappings(dev_priv);
4663                 i915_gem_restore_fences(dev_priv);
4664                 intel_init_clock_gating(dev_priv);
4665
4666                 mutex_unlock(&dev_priv->drm.struct_mutex);
4667         }
4668
4669         i915_gem_drain_freed_objects(dev_priv);
4670         return ret;
4671 }
4672
4673 void i915_gem_fini(struct drm_i915_private *dev_priv)
4674 {
4675         GEM_BUG_ON(dev_priv->gt.awake);
4676
4677         intel_wakeref_auto_fini(&dev_priv->mm.userfault_wakeref);
4678
4679         i915_gem_suspend_late(dev_priv);
4680         intel_disable_gt_powersave(dev_priv);
4681
4682         /* Flush any outstanding unpin_work. */
4683         i915_gem_drain_workqueue(dev_priv);
4684
4685         mutex_lock(&dev_priv->drm.struct_mutex);
4686         intel_uc_fini_hw(dev_priv);
4687         intel_uc_fini(dev_priv);
4688         intel_engines_cleanup(dev_priv);
4689         i915_gem_contexts_fini(dev_priv);
4690         i915_gem_fini_scratch(dev_priv);
4691         mutex_unlock(&dev_priv->drm.struct_mutex);
4692
4693         intel_wa_list_free(&dev_priv->gt_wa_list);
4694
4695         intel_cleanup_gt_powersave(dev_priv);
4696
4697         intel_uc_fini_misc(dev_priv);
4698         i915_gem_cleanup_userptr(dev_priv);
4699         i915_timelines_fini(dev_priv);
4700
4701         i915_gem_drain_freed_objects(dev_priv);
4702
4703         WARN_ON(!list_empty(&dev_priv->contexts.list));
4704 }
4705
4706 void i915_gem_init_mmio(struct drm_i915_private *i915)
4707 {
4708         i915_gem_sanitize(i915);
4709 }
4710
4711 void
4712 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4713 {
4714         int i;
4715
4716         if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4717             !IS_CHERRYVIEW(dev_priv))
4718                 dev_priv->num_fence_regs = 32;
4719         else if (INTEL_GEN(dev_priv) >= 4 ||
4720                  IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
4721                  IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
4722                 dev_priv->num_fence_regs = 16;
4723         else
4724                 dev_priv->num_fence_regs = 8;
4725
4726         if (intel_vgpu_active(dev_priv))
4727                 dev_priv->num_fence_regs =
4728                                 I915_READ(vgtif_reg(avail_rs.fence_num));
4729
4730         /* Initialize fence registers to zero */
4731         for (i = 0; i < dev_priv->num_fence_regs; i++) {
4732                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4733
4734                 fence->i915 = dev_priv;
4735                 fence->id = i;
4736                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4737         }
4738         i915_gem_restore_fences(dev_priv);
4739
4740         i915_gem_detect_bit_6_swizzle(dev_priv);
4741 }
4742
4743 static void i915_gem_init__mm(struct drm_i915_private *i915)
4744 {
4745         spin_lock_init(&i915->mm.object_stat_lock);
4746         spin_lock_init(&i915->mm.obj_lock);
4747         spin_lock_init(&i915->mm.free_lock);
4748
4749         init_llist_head(&i915->mm.free_list);
4750
4751         INIT_LIST_HEAD(&i915->mm.unbound_list);
4752         INIT_LIST_HEAD(&i915->mm.bound_list);
4753         INIT_LIST_HEAD(&i915->mm.fence_list);
4754
4755         INIT_LIST_HEAD(&i915->mm.userfault_list);
4756         intel_wakeref_auto_init(&i915->mm.userfault_wakeref, i915);
4757
4758         INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
4759 }
4760
4761 int i915_gem_init_early(struct drm_i915_private *dev_priv)
4762 {
4763         int err;
4764
4765         intel_gt_pm_init(dev_priv);
4766
4767         INIT_LIST_HEAD(&dev_priv->gt.active_rings);
4768         INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
4769
4770         i915_gem_init__mm(dev_priv);
4771         i915_gem_init__pm(dev_priv);
4772
4773         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4774         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4775         mutex_init(&dev_priv->gpu_error.wedge_mutex);
4776         init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4777
4778         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4779
4780         spin_lock_init(&dev_priv->fb_tracking.lock);
4781
4782         err = i915_gemfs_init(dev_priv);
4783         if (err)
4784                 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
4785
4786         return 0;
4787 }
4788
4789 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
4790 {
4791         i915_gem_drain_freed_objects(dev_priv);
4792         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
4793         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
4794         WARN_ON(dev_priv->mm.object_count);
4795
4796         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4797
4798         i915_gemfs_fini(dev_priv);
4799 }
4800
4801 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4802 {
4803         /* Discard all purgeable objects, let userspace recover those as
4804          * required after resuming.
4805          */
4806         i915_gem_shrink_all(dev_priv);
4807
4808         return 0;
4809 }
4810
4811 int i915_gem_freeze_late(struct drm_i915_private *i915)
4812 {
4813         struct drm_i915_gem_object *obj;
4814         struct list_head *phases[] = {
4815                 &i915->mm.unbound_list,
4816                 &i915->mm.bound_list,
4817                 NULL
4818         }, **phase;
4819
4820         /*
4821          * Called just before we write the hibernation image.
4822          *
4823          * We need to update the domain tracking to reflect that the CPU
4824          * will be accessing all the pages to create and restore from the
4825          * hibernation, and so upon restoration those pages will be in the
4826          * CPU domain.
4827          *
4828          * To make sure the hibernation image contains the latest state,
4829          * we update that state just before writing out the image.
4830          *
4831          * To try and reduce the hibernation image, we manually shrink
4832          * the objects as well, see i915_gem_freeze()
4833          */
4834
4835         i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
4836         i915_gem_drain_freed_objects(i915);
4837
4838         mutex_lock(&i915->drm.struct_mutex);
4839         for (phase = phases; *phase; phase++) {
4840                 list_for_each_entry(obj, *phase, mm.link)
4841                         WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
4842         }
4843         mutex_unlock(&i915->drm.struct_mutex);
4844
4845         return 0;
4846 }
4847
4848 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4849 {
4850         struct drm_i915_file_private *file_priv = file->driver_priv;
4851         struct i915_request *request;
4852
4853         /* Clean up our request list when the client is going away, so that
4854          * later retire_requests won't dereference our soon-to-be-gone
4855          * file_priv.
4856          */
4857         spin_lock(&file_priv->mm.lock);
4858         list_for_each_entry(request, &file_priv->mm.request_list, client_link)
4859                 request->file_priv = NULL;
4860         spin_unlock(&file_priv->mm.lock);
4861 }
4862
4863 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
4864 {
4865         struct drm_i915_file_private *file_priv;
4866         int ret;
4867
4868         DRM_DEBUG("\n");
4869
4870         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4871         if (!file_priv)
4872                 return -ENOMEM;
4873
4874         file->driver_priv = file_priv;
4875         file_priv->dev_priv = i915;
4876         file_priv->file = file;
4877
4878         spin_lock_init(&file_priv->mm.lock);
4879         INIT_LIST_HEAD(&file_priv->mm.request_list);
4880
4881         file_priv->bsd_engine = -1;
4882         file_priv->hang_timestamp = jiffies;
4883
4884         ret = i915_gem_context_open(i915, file);
4885         if (ret)
4886                 kfree(file_priv);
4887
4888         return ret;
4889 }
4890
4891 /**
4892  * i915_gem_track_fb - update frontbuffer tracking
4893  * @old: current GEM buffer for the frontbuffer slots
4894  * @new: new GEM buffer for the frontbuffer slots
4895  * @frontbuffer_bits: bitmask of frontbuffer slots
4896  *
4897  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4898  * from @old and setting them in @new. Both @old and @new can be NULL.
4899  */
4900 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4901                        struct drm_i915_gem_object *new,
4902                        unsigned frontbuffer_bits)
4903 {
4904         /* Control of individual bits within the mask are guarded by
4905          * the owning plane->mutex, i.e. we can never see concurrent
4906          * manipulation of individual bits. But since the bitfield as a whole
4907          * is updated using RMW, we need to use atomics in order to update
4908          * the bits.
4909          */
4910         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4911                      BITS_PER_TYPE(atomic_t));
4912
4913         if (old) {
4914                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4915                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4916         }
4917
4918         if (new) {
4919                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4920                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4921         }
4922 }
4923
4924 /* Allocate a new GEM object and fill it with the supplied data */
4925 struct drm_i915_gem_object *
4926 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
4927                                  const void *data, size_t size)
4928 {
4929         struct drm_i915_gem_object *obj;
4930         struct file *file;
4931         size_t offset;
4932         int err;
4933
4934         obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
4935         if (IS_ERR(obj))
4936                 return obj;
4937
4938         GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
4939
4940         file = obj->base.filp;
4941         offset = 0;
4942         do {
4943                 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
4944                 struct page *page;
4945                 void *pgdata, *vaddr;
4946
4947                 err = pagecache_write_begin(file, file->f_mapping,
4948                                             offset, len, 0,
4949                                             &page, &pgdata);
4950                 if (err < 0)
4951                         goto fail;
4952
4953                 vaddr = kmap(page);
4954                 memcpy(vaddr, data, len);
4955                 kunmap(page);
4956
4957                 err = pagecache_write_end(file, file->f_mapping,
4958                                           offset, len, len,
4959                                           page, pgdata);
4960                 if (err < 0)
4961                         goto fail;
4962
4963                 size -= len;
4964                 data += len;
4965                 offset += len;
4966         } while (size);
4967
4968         return obj;
4969
4970 fail:
4971         i915_gem_object_put(obj);
4972         return ERR_PTR(err);
4973 }
4974
4975 struct scatterlist *
4976 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4977                        unsigned int n,
4978                        unsigned int *offset)
4979 {
4980         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4981         struct scatterlist *sg;
4982         unsigned int idx, count;
4983
4984         might_sleep();
4985         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4986         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4987
4988         /* As we iterate forward through the sg, we record each entry in a
4989          * radixtree for quick repeated (backwards) lookups. If we have seen
4990          * this index previously, we will have an entry for it.
4991          *
4992          * Initial lookup is O(N), but this is amortized to O(1) for
4993          * sequential page access (where each new request is consecutive
4994          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4995          * i.e. O(1) with a large constant!
4996          */
4997         if (n < READ_ONCE(iter->sg_idx))
4998                 goto lookup;
4999
5000         mutex_lock(&iter->lock);
5001
5002         /* We prefer to reuse the last sg so that repeated lookup of this
5003          * (or the subsequent) sg are fast - comparing against the last
5004          * sg is faster than going through the radixtree.
5005          */
5006
5007         sg = iter->sg_pos;
5008         idx = iter->sg_idx;
5009         count = __sg_page_count(sg);
5010
5011         while (idx + count <= n) {
5012                 void *entry;
5013                 unsigned long i;
5014                 int ret;
5015
5016                 /* If we cannot allocate and insert this entry, or the
5017                  * individual pages from this range, cancel updating the
5018                  * sg_idx so that on this lookup we are forced to linearly
5019                  * scan onwards, but on future lookups we will try the
5020                  * insertion again (in which case we need to be careful of
5021                  * the error return reporting that we have already inserted
5022                  * this index).
5023                  */
5024                 ret = radix_tree_insert(&iter->radix, idx, sg);
5025                 if (ret && ret != -EEXIST)
5026                         goto scan;
5027
5028                 entry = xa_mk_value(idx);
5029                 for (i = 1; i < count; i++) {
5030                         ret = radix_tree_insert(&iter->radix, idx + i, entry);
5031                         if (ret && ret != -EEXIST)
5032                                 goto scan;
5033                 }
5034
5035                 idx += count;
5036                 sg = ____sg_next(sg);
5037                 count = __sg_page_count(sg);
5038         }
5039
5040 scan:
5041         iter->sg_pos = sg;
5042         iter->sg_idx = idx;
5043
5044         mutex_unlock(&iter->lock);
5045
5046         if (unlikely(n < idx)) /* insertion completed by another thread */
5047                 goto lookup;
5048
5049         /* In case we failed to insert the entry into the radixtree, we need
5050          * to look beyond the current sg.
5051          */
5052         while (idx + count <= n) {
5053                 idx += count;
5054                 sg = ____sg_next(sg);
5055                 count = __sg_page_count(sg);
5056         }
5057
5058         *offset = n - idx;
5059         return sg;
5060
5061 lookup:
5062         rcu_read_lock();
5063
5064         sg = radix_tree_lookup(&iter->radix, n);
5065         GEM_BUG_ON(!sg);
5066
5067         /* If this index is in the middle of multi-page sg entry,
5068          * the radix tree will contain a value entry that points
5069          * to the start of that range. We will return the pointer to
5070          * the base page and the offset of this page within the
5071          * sg entry's range.
5072          */
5073         *offset = 0;
5074         if (unlikely(xa_is_value(sg))) {
5075                 unsigned long base = xa_to_value(sg);
5076
5077                 sg = radix_tree_lookup(&iter->radix, base);
5078                 GEM_BUG_ON(!sg);
5079
5080                 *offset = n - base;
5081         }
5082
5083         rcu_read_unlock();
5084
5085         return sg;
5086 }
5087
5088 struct page *
5089 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5090 {
5091         struct scatterlist *sg;
5092         unsigned int offset;
5093
5094         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5095
5096         sg = i915_gem_object_get_sg(obj, n, &offset);
5097         return nth_page(sg_page(sg), offset);
5098 }
5099
5100 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5101 struct page *
5102 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5103                                unsigned int n)
5104 {
5105         struct page *page;
5106
5107         page = i915_gem_object_get_page(obj, n);
5108         if (!obj->mm.dirty)
5109                 set_page_dirty(page);
5110
5111         return page;
5112 }
5113
5114 dma_addr_t
5115 i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
5116                                     unsigned long n,
5117                                     unsigned int *len)
5118 {
5119         struct scatterlist *sg;
5120         unsigned int offset;
5121
5122         sg = i915_gem_object_get_sg(obj, n, &offset);
5123
5124         if (len)
5125                 *len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
5126
5127         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5128 }
5129
5130 dma_addr_t
5131 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5132                                 unsigned long n)
5133 {
5134         return i915_gem_object_get_dma_address_len(obj, n, NULL);
5135 }
5136
5137
5138 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5139 {
5140         struct sg_table *pages;
5141         int err;
5142
5143         if (align > obj->base.size)
5144                 return -EINVAL;
5145
5146         if (obj->ops == &i915_gem_phys_ops)
5147                 return 0;
5148
5149         if (obj->ops != &i915_gem_object_ops)
5150                 return -EINVAL;
5151
5152         err = i915_gem_object_unbind(obj);
5153         if (err)
5154                 return err;
5155
5156         mutex_lock(&obj->mm.lock);
5157
5158         if (obj->mm.madv != I915_MADV_WILLNEED) {
5159                 err = -EFAULT;
5160                 goto err_unlock;
5161         }
5162
5163         if (obj->mm.quirked) {
5164                 err = -EFAULT;
5165                 goto err_unlock;
5166         }
5167
5168         if (obj->mm.mapping) {
5169                 err = -EBUSY;
5170                 goto err_unlock;
5171         }
5172
5173         pages = __i915_gem_object_unset_pages(obj);
5174
5175         obj->ops = &i915_gem_phys_ops;
5176
5177         err = ____i915_gem_object_get_pages(obj);
5178         if (err)
5179                 goto err_xfer;
5180
5181         /* Perma-pin (until release) the physical set of pages */
5182         __i915_gem_object_pin_pages(obj);
5183
5184         if (!IS_ERR_OR_NULL(pages))
5185                 i915_gem_object_ops.put_pages(obj, pages);
5186         mutex_unlock(&obj->mm.lock);
5187         return 0;
5188
5189 err_xfer:
5190         obj->ops = &i915_gem_object_ops;
5191         if (!IS_ERR_OR_NULL(pages)) {
5192                 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5193
5194                 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5195         }
5196 err_unlock:
5197         mutex_unlock(&obj->mm.lock);
5198         return err;
5199 }
5200
5201 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5202 #include "selftests/scatterlist.c"
5203 #include "selftests/mock_gem_device.c"
5204 #include "selftests/huge_gem_object.c"
5205 #include "selftests/huge_pages.c"
5206 #include "selftests/i915_gem_object.c"
5207 #include "selftests/i915_gem_coherency.c"
5208 #include "selftests/i915_gem.c"
5209 #endif