drivers/gpu/drm/i915/i915_gem.c

   1 /*
   2  * Copyright © 2008-2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Eric Anholt <eric@anholt.net>
  25  *
  26  */
  27
  28 #include <drm/drm_vma_manager.h>
  29 #include <drm/drm_pci.h>
  30 #include <drm/i915_drm.h>
  31 #include <linux/dma-fence-array.h>
  32 #include <linux/kthread.h>
  33 #include <linux/reservation.h>
  34 #include <linux/shmem_fs.h>
  35 #include <linux/slab.h>
  36 #include <linux/stop_machine.h>
  37 #include <linux/swap.h>
  38 #include <linux/pci.h>
  39 #include <linux/dma-buf.h>
  40 #include <linux/mman.h>
  41
  42 #include "gt/intel_engine_pm.h"
  43 #include "gt/intel_gt_pm.h"
  44 #include "gt/intel_mocs.h"
  45 #include "gt/intel_reset.h"
  46 #include "gt/intel_workarounds.h"
  47
  48 #include "i915_drv.h"
  49 #include "i915_gem_clflush.h"
  50 #include "i915_gemfs.h"
  51 #include "i915_gem_pm.h"
  52 #include "i915_trace.h"
  53 #include "i915_vgpu.h"
  54
  55 #include "intel_display.h"
  56 #include "intel_drv.h"
  57 #include "intel_frontbuffer.h"
  58 #include "intel_pm.h"
  59
  60 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
  61
  62 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  63 {
  64         if (obj->cache_dirty)
  65                 return false;
  66
  67         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
  68                 return true;
  69
  70         return obj->pin_global; /* currently in use by HW, keep flushed */
  71 }
  72
  73 static int
  74 insert_mappable_node(struct i915_ggtt *ggtt,
  75                      struct drm_mm_node *node, u32 size)
  76 {
  77         memset(node, 0, sizeof(*node));
  78         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
  79                                            size, 0, I915_COLOR_UNEVICTABLE,
  80                                            0, ggtt->mappable_end,
  81                                            DRM_MM_INSERT_LOW);
  82 }
  83
  84 static void
  85 remove_mappable_node(struct drm_mm_node *node)
  86 {
  87         drm_mm_remove_node(node);
  88 }
  89
  90 /* some bookkeeping */
  91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  92                                   u64 size)
  93 {
  94         spin_lock(&dev_priv->mm.object_stat_lock);
  95         dev_priv->mm.object_count++;
  96         dev_priv->mm.object_memory += size;
  97         spin_unlock(&dev_priv->mm.object_stat_lock);
  98 }
  99
 100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 101                                      u64 size)
 102 {
 103         spin_lock(&dev_priv->mm.object_stat_lock);
 104         dev_priv->mm.object_count--;
 105         dev_priv->mm.object_memory -= size;
 106         spin_unlock(&dev_priv->mm.object_stat_lock);
 107 }
 108
 109 int
 110 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 111                             struct drm_file *file)
 112 {
 113         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
 114         struct drm_i915_gem_get_aperture *args = data;
 115         struct i915_vma *vma;
 116         u64 pinned;
 117
 118         mutex_lock(&ggtt->vm.mutex);
 119
 120         pinned = ggtt->vm.reserved;
 121         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
 122                 if (i915_vma_is_pinned(vma))
 123                         pinned += vma->node.size;
 124
 125         mutex_unlock(&ggtt->vm.mutex);
 126
 127         args->aper_size = ggtt->vm.total;
 128         args->aper_available_size = args->aper_size - pinned;
 129
 130         return 0;
 131 }
 132
 133 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 134 {
 135         struct address_space *mapping = obj->base.filp->f_mapping;
 136         drm_dma_handle_t *phys;
 137         struct sg_table *st;
 138         struct scatterlist *sg;
 139         char *vaddr;
 140         int i;
 141         int err;
 142
 143         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
 144                 return -EINVAL;
 145
 146         /* Always aligning to the object size, allows a single allocation
 147          * to handle all possible callers, and given typical object sizes,
 148          * the alignment of the buddy allocation will naturally match.
 149          */
 150         phys = drm_pci_alloc(obj->base.dev,
 151                              roundup_pow_of_two(obj->base.size),
 152                              roundup_pow_of_two(obj->base.size));
 153         if (!phys)
 154                 return -ENOMEM;
 155
 156         vaddr = phys->vaddr;
 157         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 158                 struct page *page;
 159                 char *src;
 160
 161                 page = shmem_read_mapping_page(mapping, i);
 162                 if (IS_ERR(page)) {
 163                         err = PTR_ERR(page);
 164                         goto err_phys;
 165                 }
 166
 167                 src = kmap_atomic(page);
 168                 memcpy(vaddr, src, PAGE_SIZE);
 169                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
 170                 kunmap_atomic(src);
 171
 172                 put_page(page);
 173                 vaddr += PAGE_SIZE;
 174         }
 175
 176         i915_gem_chipset_flush(to_i915(obj->base.dev));
 177
 178         st = kmalloc(sizeof(*st), GFP_KERNEL);
 179         if (!st) {
 180                 err = -ENOMEM;
 181                 goto err_phys;
 182         }
 183
 184         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 185                 kfree(st);
 186                 err = -ENOMEM;
 187                 goto err_phys;
 188         }
 189
 190         sg = st->sgl;
 191         sg->offset = 0;
 192         sg->length = obj->base.size;
 193
 194         sg_dma_address(sg) = phys->busaddr;
 195         sg_dma_len(sg) = obj->base.size;
 196
 197         obj->phys_handle = phys;
 198
 199         __i915_gem_object_set_pages(obj, st, sg->length);
 200
 201         return 0;
 202
 203 err_phys:
 204         drm_pci_free(obj->base.dev, phys);
 205
 206         return err;
 207 }
 208
 209 static void __start_cpu_write(struct drm_i915_gem_object *obj)
 210 {
 211         obj->read_domains = I915_GEM_DOMAIN_CPU;
 212         obj->write_domain = I915_GEM_DOMAIN_CPU;
 213         if (cpu_write_needs_clflush(obj))
 214                 obj->cache_dirty = true;
 215 }
 216
 217 void
 218 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 219                                 struct sg_table *pages,
 220                                 bool needs_clflush)
 221 {
 222         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 223
 224         if (obj->mm.madv == I915_MADV_DONTNEED)
 225                 obj->mm.dirty = false;
 226
 227         if (needs_clflush &&
 228             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
 229             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
 230                 drm_clflush_sg(pages);
 231
 232         __start_cpu_write(obj);
 233 }
 234
 235 static void
 236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 237                                struct sg_table *pages)
 238 {
 239         __i915_gem_object_release_shmem(obj, pages, false);
 240
 241         if (obj->mm.dirty) {
 242                 struct address_space *mapping = obj->base.filp->f_mapping;
 243                 char *vaddr = obj->phys_handle->vaddr;
 244                 int i;
 245
 246                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 247                         struct page *page;
 248                         char *dst;
 249
 250                         page = shmem_read_mapping_page(mapping, i);
 251                         if (IS_ERR(page))
 252                                 continue;
 253
 254                         dst = kmap_atomic(page);
 255                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
 256                         memcpy(dst, vaddr, PAGE_SIZE);
 257                         kunmap_atomic(dst);
 258
 259                         set_page_dirty(page);
 260                         if (obj->mm.madv == I915_MADV_WILLNEED)
 261                                 mark_page_accessed(page);
 262                         put_page(page);
 263                         vaddr += PAGE_SIZE;
 264                 }
 265                 obj->mm.dirty = false;
 266         }
 267
 268         sg_free_table(pages);
 269         kfree(pages);
 270
 271         drm_pci_free(obj->base.dev, obj->phys_handle);
 272 }
 273
 274 static void
 275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 276 {
 277         i915_gem_object_unpin_pages(obj);
 278 }
 279
 280 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 281         .get_pages = i915_gem_object_get_pages_phys,
 282         .put_pages = i915_gem_object_put_pages_phys,
 283         .release = i915_gem_object_release_phys,
 284 };
 285
 286 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
 287
 288 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 289 {
 290         struct i915_vma *vma;
 291         LIST_HEAD(still_in_list);
 292         int ret;
 293
 294         lockdep_assert_held(&obj->base.dev->struct_mutex);
 295
 296         /* Closed vma are removed from the obj->vma_list - but they may
 297          * still have an active binding on the object. To remove those we
 298          * must wait for all rendering to complete to the object (as unbinding
 299          * must anyway), and retire the requests.
 300          */
 301         ret = i915_gem_object_set_to_cpu_domain(obj, false);
 302         if (ret)
 303                 return ret;
 304
 305         spin_lock(&obj->vma.lock);
 306         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 307                                                        struct i915_vma,
 308                                                        obj_link))) {
 309                 list_move_tail(&vma->obj_link, &still_in_list);
 310                 spin_unlock(&obj->vma.lock);
 311
 312                 ret = i915_vma_unbind(vma);
 313
 314                 spin_lock(&obj->vma.lock);
 315         }
 316         list_splice(&still_in_list, &obj->vma.list);
 317         spin_unlock(&obj->vma.lock);
 318
 319         return ret;
 320 }
 321
 322 static long
 323 i915_gem_object_wait_fence(struct dma_fence *fence,
 324                            unsigned int flags,
 325                            long timeout)
 326 {
 327         struct i915_request *rq;
 328
 329         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 330
 331         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 332                 return timeout;
 333
 334         if (!dma_fence_is_i915(fence))
 335                 return dma_fence_wait_timeout(fence,
 336                                               flags & I915_WAIT_INTERRUPTIBLE,
 337                                               timeout);
 338
 339         rq = to_request(fence);
 340         if (i915_request_completed(rq))
 341                 goto out;
 342
 343         timeout = i915_request_wait(rq, flags, timeout);
 344
 345 out:
 346         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
 347                 i915_request_retire_upto(rq);
 348
 349         return timeout;
 350 }
 351
 352 static long
 353 i915_gem_object_wait_reservation(struct reservation_object *resv,
 354                                  unsigned int flags,
 355                                  long timeout)
 356 {
 357         unsigned int seq = __read_seqcount_begin(&resv->seq);
 358         struct dma_fence *excl;
 359         bool prune_fences = false;
 360
 361         if (flags & I915_WAIT_ALL) {
 362                 struct dma_fence **shared;
 363                 unsigned int count, i;
 364                 int ret;
 365
 366                 ret = reservation_object_get_fences_rcu(resv,
 367                                                         &excl, &count, &shared);
 368                 if (ret)
 369                         return ret;
 370
 371                 for (i = 0; i < count; i++) {
 372                         timeout = i915_gem_object_wait_fence(shared[i],
 373                                                              flags, timeout);
 374                         if (timeout < 0)
 375                                 break;
 376
 377                         dma_fence_put(shared[i]);
 378                 }
 379
 380                 for (; i < count; i++)
 381                         dma_fence_put(shared[i]);
 382                 kfree(shared);
 383
 384                 /*
 385                  * If both shared fences and an exclusive fence exist,
 386                  * then by construction the shared fences must be later
 387                  * than the exclusive fence. If we successfully wait for
 388                  * all the shared fences, we know that the exclusive fence
 389                  * must all be signaled. If all the shared fences are
 390                  * signaled, we can prune the array and recover the
 391                  * floating references on the fences/requests.
 392                  */
 393                 prune_fences = count && timeout >= 0;
 394         } else {
 395                 excl = reservation_object_get_excl_rcu(resv);
 396         }
 397
 398         if (excl && timeout >= 0)
 399                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
 400
 401         dma_fence_put(excl);
 402
 403         /*
 404          * Opportunistically prune the fences iff we know they have *all* been
 405          * signaled and that the reservation object has not been changed (i.e.
 406          * no new fences have been added).
 407          */
 408         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
 409                 if (reservation_object_trylock(resv)) {
 410                         if (!__read_seqcount_retry(&resv->seq, seq))
 411                                 reservation_object_add_excl_fence(resv, NULL);
 412                         reservation_object_unlock(resv);
 413                 }
 414         }
 415
 416         return timeout;
 417 }
 418
 419 static void __fence_set_priority(struct dma_fence *fence,
 420                                  const struct i915_sched_attr *attr)
 421 {
 422         struct i915_request *rq;
 423         struct intel_engine_cs *engine;
 424
 425         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
 426                 return;
 427
 428         rq = to_request(fence);
 429         engine = rq->engine;
 430
 431         local_bh_disable();
 432         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 433         if (engine->schedule)
 434                 engine->schedule(rq, attr);
 435         rcu_read_unlock();
 436         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
 437 }
 438
 439 static void fence_set_priority(struct dma_fence *fence,
 440                                const struct i915_sched_attr *attr)
 441 {
 442         /* Recurse once into a fence-array */
 443         if (dma_fence_is_array(fence)) {
 444                 struct dma_fence_array *array = to_dma_fence_array(fence);
 445                 int i;
 446
 447                 for (i = 0; i < array->num_fences; i++)
 448                         __fence_set_priority(array->fences[i], attr);
 449         } else {
 450                 __fence_set_priority(fence, attr);
 451         }
 452 }
 453
 454 int
 455 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 456                               unsigned int flags,
 457                               const struct i915_sched_attr *attr)
 458 {
 459         struct dma_fence *excl;
 460
 461         if (flags & I915_WAIT_ALL) {
 462                 struct dma_fence **shared;
 463                 unsigned int count, i;
 464                 int ret;
 465
 466                 ret = reservation_object_get_fences_rcu(obj->resv,
 467                                                         &excl, &count, &shared);
 468                 if (ret)
 469                         return ret;
 470
 471                 for (i = 0; i < count; i++) {
 472                         fence_set_priority(shared[i], attr);
 473                         dma_fence_put(shared[i]);
 474                 }
 475
 476                 kfree(shared);
 477         } else {
 478                 excl = reservation_object_get_excl_rcu(obj->resv);
 479         }
 480
 481         if (excl) {
 482                 fence_set_priority(excl, attr);
 483                 dma_fence_put(excl);
 484         }
 485         return 0;
 486 }
 487
 488 /**
 489  * Waits for rendering to the object to be completed
 490  * @obj: i915 gem object
 491  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 492  * @timeout: how long to wait
 493  */
 494 int
 495 i915_gem_object_wait(struct drm_i915_gem_object *obj,
 496                      unsigned int flags,
 497                      long timeout)
 498 {
 499         might_sleep();
 500         GEM_BUG_ON(timeout < 0);
 501
 502         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
 503         return timeout < 0 ? timeout : 0;
 504 }
 505
 506 static int
 507 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 508                      struct drm_i915_gem_pwrite *args,
 509                      struct drm_file *file)
 510 {
 511         void *vaddr = obj->phys_handle->vaddr + args->offset;
 512         char __user *user_data = u64_to_user_ptr(args->data_ptr);
 513
 514         /* We manually control the domain here and pretend that it
 515          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 516          */
 517         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 518         if (copy_from_user(vaddr, user_data, args->size))
 519                 return -EFAULT;
 520
 521         drm_clflush_virt_range(vaddr, args->size);
 522         i915_gem_chipset_flush(to_i915(obj->base.dev));
 523
 524         intel_fb_obj_flush(obj, ORIGIN_CPU);
 525         return 0;
 526 }
 527
 528 static int
 529 i915_gem_create(struct drm_file *file,
 530                 struct drm_i915_private *dev_priv,
 531                 u64 *size_p,
 532                 u32 *handle_p)
 533 {
 534         struct drm_i915_gem_object *obj;
 535         u32 handle;
 536         u64 size;
 537         int ret;
 538
 539         size = round_up(*size_p, PAGE_SIZE);
 540         if (size == 0)
 541                 return -EINVAL;
 542
 543         /* Allocate the new object */
 544         obj = i915_gem_object_create(dev_priv, size);
 545         if (IS_ERR(obj))
 546                 return PTR_ERR(obj);
 547
 548         ret = drm_gem_handle_create(file, &obj->base, &handle);
 549         /* drop reference from allocate - handle holds it now */
 550         i915_gem_object_put(obj);
 551         if (ret)
 552                 return ret;
 553
 554         *handle_p = handle;
 555         *size_p = size;
 556         return 0;
 557 }
 558
 559 int
 560 i915_gem_dumb_create(struct drm_file *file,
 561                      struct drm_device *dev,
 562                      struct drm_mode_create_dumb *args)
 563 {
 564         int cpp = DIV_ROUND_UP(args->bpp, 8);
 565         u32 format;
 566
 567         switch (cpp) {
 568         case 1:
 569                 format = DRM_FORMAT_C8;
 570                 break;
 571         case 2:
 572                 format = DRM_FORMAT_RGB565;
 573                 break;
 574         case 4:
 575                 format = DRM_FORMAT_XRGB8888;
 576                 break;
 577         default:
 578                 return -EINVAL;
 579         }
 580
 581         /* have to work out size/pitch and return them */
 582         args->pitch = ALIGN(args->width * cpp, 64);
 583
 584         /* align stride to page size so that we can remap */
 585         if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
 586                                                     DRM_FORMAT_MOD_LINEAR))
 587                 args->pitch = ALIGN(args->pitch, 4096);
 588
 589         args->size = args->pitch * args->height;
 590         return i915_gem_create(file, to_i915(dev),
 591                                &args->size, &args->handle);
 592 }
 593
 594 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 595 {
 596         return !(obj->cache_level == I915_CACHE_NONE ||
 597                  obj->cache_level == I915_CACHE_WT);
 598 }
 599
 600 /**
 601  * Creates a new mm object and returns a handle to it.
 602  * @dev: drm device pointer
 603  * @data: ioctl data blob
 604  * @file: drm file pointer
 605  */
 606 int
 607 i915_gem_create_ioctl(struct drm_device *dev, void *data,
 608                       struct drm_file *file)
 609 {
 610         struct drm_i915_private *dev_priv = to_i915(dev);
 611         struct drm_i915_gem_create *args = data;
 612
 613         i915_gem_flush_free_objects(dev_priv);
 614
 615         return i915_gem_create(file, dev_priv,
 616                                &args->size, &args->handle);
 617 }
 618
 619 static inline enum fb_op_origin
 620 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 621 {
 622         return (domain == I915_GEM_DOMAIN_GTT ?
 623                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 624 }
 625
 626 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 627 {
 628         intel_wakeref_t wakeref;
 629
 630         /*
 631          * No actual flushing is required for the GTT write domain for reads
 632          * from the GTT domain. Writes to it "immediately" go to main memory
 633          * as far as we know, so there's no chipset flush. It also doesn't
 634          * land in the GPU render cache.
 635          *
 636          * However, we do have to enforce the order so that all writes through
 637          * the GTT land before any writes to the device, such as updates to
 638          * the GATT itself.
 639          *
 640          * We also have to wait a bit for the writes to land from the GTT.
 641          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
 642          * timing. This issue has only been observed when switching quickly
 643          * between GTT writes and CPU reads from inside the kernel on recent hw,
 644          * and it appears to only affect discrete GTT blocks (i.e. on LLC
 645          * system agents we cannot reproduce this behaviour, until Cannonlake
 646          * that was!).
 647          */
 648
 649         wmb();
 650
 651         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
 652                 return;
 653
 654         i915_gem_chipset_flush(dev_priv);
 655
 656         with_intel_runtime_pm(dev_priv, wakeref) {
 657                 spin_lock_irq(&dev_priv->uncore.lock);
 658
 659                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
 660
 661                 spin_unlock_irq(&dev_priv->uncore.lock);
 662         }
 663 }
 664
 665 static void
 666 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 667 {
 668         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 669         struct i915_vma *vma;
 670
 671         if (!(obj->write_domain & flush_domains))
 672                 return;
 673
 674         switch (obj->write_domain) {
 675         case I915_GEM_DOMAIN_GTT:
 676                 i915_gem_flush_ggtt_writes(dev_priv);
 677
 678                 intel_fb_obj_flush(obj,
 679                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
 680
 681                 for_each_ggtt_vma(vma, obj) {
 682                         if (vma->iomap)
 683                                 continue;
 684
 685                         i915_vma_unset_ggtt_write(vma);
 686                 }
 687                 break;
 688
 689         case I915_GEM_DOMAIN_WC:
 690                 wmb();
 691                 break;
 692
 693         case I915_GEM_DOMAIN_CPU:
 694                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 695                 break;
 696
 697         case I915_GEM_DOMAIN_RENDER:
 698                 if (gpu_write_needs_clflush(obj))
 699                         obj->cache_dirty = true;
 700                 break;
 701         }
 702
 703         obj->write_domain = 0;
 704 }
 705
 706 /*
 707  * Pins the specified object's pages and synchronizes the object with
 708  * GPU accesses. Sets needs_clflush to non-zero if the caller should
 709  * flush the object from the CPU cache.
 710  */
 711 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 712                                     unsigned int *needs_clflush)
 713 {
 714         int ret;
 715
 716         lockdep_assert_held(&obj->base.dev->struct_mutex);
 717
 718         *needs_clflush = 0;
 719         if (!i915_gem_object_has_struct_page(obj))
 720                 return -ENODEV;
 721
 722         ret = i915_gem_object_wait(obj,
 723                                    I915_WAIT_INTERRUPTIBLE |
 724                                    I915_WAIT_LOCKED,
 725                                    MAX_SCHEDULE_TIMEOUT);
 726         if (ret)
 727                 return ret;
 728
 729         ret = i915_gem_object_pin_pages(obj);
 730         if (ret)
 731                 return ret;
 732
 733         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 734             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 735                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
 736                 if (ret)
 737                         goto err_unpin;
 738                 else
 739                         goto out;
 740         }
 741
 742         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 743
 744         /* If we're not in the cpu read domain, set ourself into the gtt
 745          * read domain and manually flush cachelines (if required). This
 746          * optimizes for the case when the gpu will dirty the data
 747          * anyway again before the next pread happens.
 748          */
 749         if (!obj->cache_dirty &&
 750             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
 751                 *needs_clflush = CLFLUSH_BEFORE;
 752
 753 out:
 754         /* return with the pages pinned */
 755         return 0;
 756
 757 err_unpin:
 758         i915_gem_object_unpin_pages(obj);
 759         return ret;
 760 }
 761
 762 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 763                                      unsigned int *needs_clflush)
 764 {
 765         int ret;
 766
 767         lockdep_assert_held(&obj->base.dev->struct_mutex);
 768
 769         *needs_clflush = 0;
 770         if (!i915_gem_object_has_struct_page(obj))
 771                 return -ENODEV;
 772
 773         ret = i915_gem_object_wait(obj,
 774                                    I915_WAIT_INTERRUPTIBLE |
 775                                    I915_WAIT_LOCKED |
 776                                    I915_WAIT_ALL,
 777                                    MAX_SCHEDULE_TIMEOUT);
 778         if (ret)
 779                 return ret;
 780
 781         ret = i915_gem_object_pin_pages(obj);
 782         if (ret)
 783                 return ret;
 784
 785         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 786             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 787                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
 788                 if (ret)
 789                         goto err_unpin;
 790                 else
 791                         goto out;
 792         }
 793
 794         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 795
 796         /* If we're not in the cpu write domain, set ourself into the
 797          * gtt write domain and manually flush cachelines (as required).
 798          * This optimizes for the case when the gpu will use the data
 799          * right away and we therefore have to clflush anyway.
 800          */
 801         if (!obj->cache_dirty) {
 802                 *needs_clflush |= CLFLUSH_AFTER;
 803
 804                 /*
 805                  * Same trick applies to invalidate partially written
 806                  * cachelines read before writing.
 807                  */
 808                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
 809                         *needs_clflush |= CLFLUSH_BEFORE;
 810         }
 811
 812 out:
 813         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 814         obj->mm.dirty = true;
 815         /* return with the pages pinned */
 816         return 0;
 817
 818 err_unpin:
 819         i915_gem_object_unpin_pages(obj);
 820         return ret;
 821 }
 822
 823 static int
 824 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
 825             bool needs_clflush)
 826 {
 827         char *vaddr;
 828         int ret;
 829
 830         vaddr = kmap(page);
 831
 832         if (needs_clflush)
 833                 drm_clflush_virt_range(vaddr + offset, len);
 834
 835         ret = __copy_to_user(user_data, vaddr + offset, len);
 836
 837         kunmap(page);
 838
 839         return ret ? -EFAULT : 0;
 840 }
 841
 842 static int
 843 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 844                      struct drm_i915_gem_pread *args)
 845 {
 846         char __user *user_data;
 847         u64 remain;
 848         unsigned int needs_clflush;
 849         unsigned int idx, offset;
 850         int ret;
 851
 852         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 853         if (ret)
 854                 return ret;
 855
 856         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
 857         mutex_unlock(&obj->base.dev->struct_mutex);
 858         if (ret)
 859                 return ret;
 860
 861         remain = args->size;
 862         user_data = u64_to_user_ptr(args->data_ptr);
 863         offset = offset_in_page(args->offset);
 864         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 865                 struct page *page = i915_gem_object_get_page(obj, idx);
 866                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
 867
 868                 ret = shmem_pread(page, offset, length, user_data,
 869                                   needs_clflush);
 870                 if (ret)
 871                         break;
 872
 873                 remain -= length;
 874                 user_data += length;
 875                 offset = 0;
 876         }
 877
 878         i915_gem_obj_finish_shmem_access(obj);
 879         return ret;
 880 }
 881
 882 static inline bool
 883 gtt_user_read(struct io_mapping *mapping,
 884               loff_t base, int offset,
 885               char __user *user_data, int length)
 886 {
 887         void __iomem *vaddr;
 888         unsigned long unwritten;
 889
 890         /* We can use the cpu mem copy function because this is X86. */
 891         vaddr = io_mapping_map_atomic_wc(mapping, base);
 892         unwritten = __copy_to_user_inatomic(user_data,
 893                                             (void __force *)vaddr + offset,
 894                                             length);
 895         io_mapping_unmap_atomic(vaddr);
 896         if (unwritten) {
 897                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
 898                 unwritten = copy_to_user(user_data,
 899                                          (void __force *)vaddr + offset,
 900                                          length);
 901                 io_mapping_unmap(vaddr);
 902         }
 903         return unwritten;
 904 }
 905
 906 static int
 907 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 908                    const struct drm_i915_gem_pread *args)
 909 {
 910         struct drm_i915_private *i915 = to_i915(obj->base.dev);
 911         struct i915_ggtt *ggtt = &i915->ggtt;
 912         intel_wakeref_t wakeref;
 913         struct drm_mm_node node;
 914         struct i915_vma *vma;
 915         void __user *user_data;
 916         u64 remain, offset;
 917         int ret;
 918
 919         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 920         if (ret)
 921                 return ret;
 922
 923         wakeref = intel_runtime_pm_get(i915);
 924         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 925                                        PIN_MAPPABLE |
 926                                        PIN_NONFAULT |
 927                                        PIN_NONBLOCK);
 928         if (!IS_ERR(vma)) {
 929                 node.start = i915_ggtt_offset(vma);
 930                 node.allocated = false;
 931                 ret = i915_vma_put_fence(vma);
 932                 if (ret) {
 933                         i915_vma_unpin(vma);
 934                         vma = ERR_PTR(ret);
 935                 }
 936         }
 937         if (IS_ERR(vma)) {
 938                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 939                 if (ret)
 940                         goto out_unlock;
 941                 GEM_BUG_ON(!node.allocated);
 942         }
 943
 944         ret = i915_gem_object_set_to_gtt_domain(obj, false);
 945         if (ret)
 946                 goto out_unpin;
 947
 948         mutex_unlock(&i915->drm.struct_mutex);
 949
 950         user_data = u64_to_user_ptr(args->data_ptr);
 951         remain = args->size;
 952         offset = args->offset;
 953
 954         while (remain > 0) {
 955                 /* Operation in this page
 956                  *
 957                  * page_base = page offset within aperture
 958                  * page_offset = offset within page
 959                  * page_length = bytes to copy for this page
 960                  */
 961                 u32 page_base = node.start;
 962                 unsigned page_offset = offset_in_page(offset);
 963                 unsigned page_length = PAGE_SIZE - page_offset;
 964                 page_length = remain < page_length ? remain : page_length;
 965                 if (node.allocated) {
 966                         wmb();
 967                         ggtt->vm.insert_page(&ggtt->vm,
 968                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
 969                                              node.start, I915_CACHE_NONE, 0);
 970                         wmb();
 971                 } else {
 972                         page_base += offset & PAGE_MASK;
 973                 }
 974
 975                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
 976                                   user_data, page_length)) {
 977                         ret = -EFAULT;
 978                         break;
 979                 }
 980
 981                 remain -= page_length;
 982                 user_data += page_length;
 983                 offset += page_length;
 984         }
 985
 986         mutex_lock(&i915->drm.struct_mutex);
 987 out_unpin:
 988         if (node.allocated) {
 989                 wmb();
 990                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
 991                 remove_mappable_node(&node);
 992         } else {
 993                 i915_vma_unpin(vma);
 994         }
 995 out_unlock:
 996         intel_runtime_pm_put(i915, wakeref);
 997         mutex_unlock(&i915->drm.struct_mutex);
 998
 999         return ret;
1000 }
1001
1002 /**
1003  * Reads data from the object referenced by handle.
1004  * @dev: drm device pointer
1005  * @data: ioctl data blob
1006  * @file: drm file pointer
1007  *
1008  * On error, the contents of *data are undefined.
1009  */
1010 int
1011 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1012                      struct drm_file *file)
1013 {
1014         struct drm_i915_gem_pread *args = data;
1015         struct drm_i915_gem_object *obj;
1016         int ret;
1017
1018         if (args->size == 0)
1019                 return 0;
1020
1021         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1022                        args->size))
1023                 return -EFAULT;
1024
1025         obj = i915_gem_object_lookup(file, args->handle);
1026         if (!obj)
1027                 return -ENOENT;
1028
1029         /* Bounds check source.  */
1030         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1031                 ret = -EINVAL;
1032                 goto out;
1033         }
1034
1035         trace_i915_gem_object_pread(obj, args->offset, args->size);
1036
1037         ret = i915_gem_object_wait(obj,
1038                                    I915_WAIT_INTERRUPTIBLE,
1039                                    MAX_SCHEDULE_TIMEOUT);
1040         if (ret)
1041                 goto out;
1042
1043         ret = i915_gem_object_pin_pages(obj);
1044         if (ret)
1045                 goto out;
1046
1047         ret = i915_gem_shmem_pread(obj, args);
1048         if (ret == -EFAULT || ret == -ENODEV)
1049                 ret = i915_gem_gtt_pread(obj, args);
1050
1051         i915_gem_object_unpin_pages(obj);
1052 out:
1053         i915_gem_object_put(obj);
1054         return ret;
1055 }
1056
1057 /* This is the fast write path which cannot handle
1058  * page faults in the source data
1059  */
1060
1061 static inline bool
1062 ggtt_write(struct io_mapping *mapping,
1063            loff_t base, int offset,
1064            char __user *user_data, int length)
1065 {
1066         void __iomem *vaddr;
1067         unsigned long unwritten;
1068
1069         /* We can use the cpu mem copy function because this is X86. */
1070         vaddr = io_mapping_map_atomic_wc(mapping, base);
1071         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1072                                                       user_data, length);
1073         io_mapping_unmap_atomic(vaddr);
1074         if (unwritten) {
1075                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1076                 unwritten = copy_from_user((void __force *)vaddr + offset,
1077                                            user_data, length);
1078                 io_mapping_unmap(vaddr);
1079         }
1080
1081         return unwritten;
1082 }
1083
1084 /**
1085  * This is the fast pwrite path, where we copy the data directly from the
1086  * user into the GTT, uncached.
1087  * @obj: i915 GEM object
1088  * @args: pwrite arguments structure
1089  */
1090 static int
1091 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1092                          const struct drm_i915_gem_pwrite *args)
1093 {
1094         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1095         struct i915_ggtt *ggtt = &i915->ggtt;
1096         intel_wakeref_t wakeref;
1097         struct drm_mm_node node;
1098         struct i915_vma *vma;
1099         u64 remain, offset;
1100         void __user *user_data;
1101         int ret;
1102
1103         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1104         if (ret)
1105                 return ret;
1106
1107         if (i915_gem_object_has_struct_page(obj)) {
1108                 /*
1109                  * Avoid waking the device up if we can fallback, as
1110                  * waking/resuming is very slow (worst-case 10-100 ms
1111                  * depending on PCI sleeps and our own resume time).
1112                  * This easily dwarfs any performance advantage from
1113                  * using the cache bypass of indirect GGTT access.
1114                  */
1115                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1116                 if (!wakeref) {
1117                         ret = -EFAULT;
1118                         goto out_unlock;
1119                 }
1120         } else {
1121                 /* No backing pages, no fallback, we must force GGTT access */
1122                 wakeref = intel_runtime_pm_get(i915);
1123         }
1124
1125         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1126                                        PIN_MAPPABLE |
1127                                        PIN_NONFAULT |
1128                                        PIN_NONBLOCK);
1129         if (!IS_ERR(vma)) {
1130                 node.start = i915_ggtt_offset(vma);
1131                 node.allocated = false;
1132                 ret = i915_vma_put_fence(vma);
1133                 if (ret) {
1134                         i915_vma_unpin(vma);
1135                         vma = ERR_PTR(ret);
1136                 }
1137         }
1138         if (IS_ERR(vma)) {
1139                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1140                 if (ret)
1141                         goto out_rpm;
1142                 GEM_BUG_ON(!node.allocated);
1143         }
1144
1145         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1146         if (ret)
1147                 goto out_unpin;
1148
1149         mutex_unlock(&i915->drm.struct_mutex);
1150
1151         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1152
1153         user_data = u64_to_user_ptr(args->data_ptr);
1154         offset = args->offset;
1155         remain = args->size;
1156         while (remain) {
1157                 /* Operation in this page
1158                  *
1159                  * page_base = page offset within aperture
1160                  * page_offset = offset within page
1161                  * page_length = bytes to copy for this page
1162                  */
1163                 u32 page_base = node.start;
1164                 unsigned int page_offset = offset_in_page(offset);
1165                 unsigned int page_length = PAGE_SIZE - page_offset;
1166                 page_length = remain < page_length ? remain : page_length;
1167                 if (node.allocated) {
1168                         wmb(); /* flush the write before we modify the GGTT */
1169                         ggtt->vm.insert_page(&ggtt->vm,
1170                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1171                                              node.start, I915_CACHE_NONE, 0);
1172                         wmb(); /* flush modifications to the GGTT (insert_page) */
1173                 } else {
1174                         page_base += offset & PAGE_MASK;
1175                 }
1176                 /* If we get a fault while copying data, then (presumably) our
1177                  * source page isn't available.  Return the error and we'll
1178                  * retry in the slow path.
1179                  * If the object is non-shmem backed, we retry again with the
1180                  * path that handles page fault.
1181                  */
1182                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1183                                user_data, page_length)) {
1184                         ret = -EFAULT;
1185                         break;
1186                 }
1187
1188                 remain -= page_length;
1189                 user_data += page_length;
1190                 offset += page_length;
1191         }
1192         intel_fb_obj_flush(obj, ORIGIN_CPU);
1193
1194         mutex_lock(&i915->drm.struct_mutex);
1195 out_unpin:
1196         if (node.allocated) {
1197                 wmb();
1198                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1199                 remove_mappable_node(&node);
1200         } else {
1201                 i915_vma_unpin(vma);
1202         }
1203 out_rpm:
1204         intel_runtime_pm_put(i915, wakeref);
1205 out_unlock:
1206         mutex_unlock(&i915->drm.struct_mutex);
1207         return ret;
1208 }
1209
1210 /* Per-page copy function for the shmem pwrite fastpath.
1211  * Flushes invalid cachelines before writing to the target if
1212  * needs_clflush_before is set and flushes out any written cachelines after
1213  * writing if needs_clflush is set.
1214  */
1215 static int
1216 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1217              bool needs_clflush_before,
1218              bool needs_clflush_after)
1219 {
1220         char *vaddr;
1221         int ret;
1222
1223         vaddr = kmap(page);
1224
1225         if (needs_clflush_before)
1226                 drm_clflush_virt_range(vaddr + offset, len);
1227
1228         ret = __copy_from_user(vaddr + offset, user_data, len);
1229         if (!ret && needs_clflush_after)
1230                 drm_clflush_virt_range(vaddr + offset, len);
1231
1232         kunmap(page);
1233
1234         return ret ? -EFAULT : 0;
1235 }
1236
1237 static int
1238 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1239                       const struct drm_i915_gem_pwrite *args)
1240 {
1241         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1242         void __user *user_data;
1243         u64 remain;
1244         unsigned int partial_cacheline_write;
1245         unsigned int needs_clflush;
1246         unsigned int offset, idx;
1247         int ret;
1248
1249         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1250         if (ret)
1251                 return ret;
1252
1253         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1254         mutex_unlock(&i915->drm.struct_mutex);
1255         if (ret)
1256                 return ret;
1257
1258         /* If we don't overwrite a cacheline completely we need to be
1259          * careful to have up-to-date data by first clflushing. Don't
1260          * overcomplicate things and flush the entire patch.
1261          */
1262         partial_cacheline_write = 0;
1263         if (needs_clflush & CLFLUSH_BEFORE)
1264                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1265
1266         user_data = u64_to_user_ptr(args->data_ptr);
1267         remain = args->size;
1268         offset = offset_in_page(args->offset);
1269         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1270                 struct page *page = i915_gem_object_get_page(obj, idx);
1271                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1272
1273                 ret = shmem_pwrite(page, offset, length, user_data,
1274                                    (offset | length) & partial_cacheline_write,
1275                                    needs_clflush & CLFLUSH_AFTER);
1276                 if (ret)
1277                         break;
1278
1279                 remain -= length;
1280                 user_data += length;
1281                 offset = 0;
1282         }
1283
1284         intel_fb_obj_flush(obj, ORIGIN_CPU);
1285         i915_gem_obj_finish_shmem_access(obj);
1286         return ret;
1287 }
1288
1289 /**
1290  * Writes data to the object referenced by handle.
1291  * @dev: drm device
1292  * @data: ioctl data blob
1293  * @file: drm file
1294  *
1295  * On error, the contents of the buffer that were to be modified are undefined.
1296  */
1297 int
1298 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1299                       struct drm_file *file)
1300 {
1301         struct drm_i915_gem_pwrite *args = data;
1302         struct drm_i915_gem_object *obj;
1303         int ret;
1304
1305         if (args->size == 0)
1306                 return 0;
1307
1308         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1309                 return -EFAULT;
1310
1311         obj = i915_gem_object_lookup(file, args->handle);
1312         if (!obj)
1313                 return -ENOENT;
1314
1315         /* Bounds check destination. */
1316         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1317                 ret = -EINVAL;
1318                 goto err;
1319         }
1320
1321         /* Writes not allowed into this read-only object */
1322         if (i915_gem_object_is_readonly(obj)) {
1323                 ret = -EINVAL;
1324                 goto err;
1325         }
1326
1327         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1328
1329         ret = -ENODEV;
1330         if (obj->ops->pwrite)
1331                 ret = obj->ops->pwrite(obj, args);
1332         if (ret != -ENODEV)
1333                 goto err;
1334
1335         ret = i915_gem_object_wait(obj,
1336                                    I915_WAIT_INTERRUPTIBLE |
1337                                    I915_WAIT_ALL,
1338                                    MAX_SCHEDULE_TIMEOUT);
1339         if (ret)
1340                 goto err;
1341
1342         ret = i915_gem_object_pin_pages(obj);
1343         if (ret)
1344                 goto err;
1345
1346         ret = -EFAULT;
1347         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1348          * it would end up going through the fenced access, and we'll get
1349          * different detiling behavior between reading and writing.
1350          * pread/pwrite currently are reading and writing from the CPU
1351          * perspective, requiring manual detiling by the client.
1352          */
1353         if (!i915_gem_object_has_struct_page(obj) ||
1354             cpu_write_needs_clflush(obj))
1355                 /* Note that the gtt paths might fail with non-page-backed user
1356                  * pointers (e.g. gtt mappings when moving data between
1357                  * textures). Fallback to the shmem path in that case.
1358                  */
1359                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1360
1361         if (ret == -EFAULT || ret == -ENOSPC) {
1362                 if (obj->phys_handle)
1363                         ret = i915_gem_phys_pwrite(obj, args, file);
1364                 else
1365                         ret = i915_gem_shmem_pwrite(obj, args);
1366         }
1367
1368         i915_gem_object_unpin_pages(obj);
1369 err:
1370         i915_gem_object_put(obj);
1371         return ret;
1372 }
1373
1374 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1375 {
1376         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1377         struct list_head *list;
1378         struct i915_vma *vma;
1379
1380         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1381
1382         mutex_lock(&i915->ggtt.vm.mutex);
1383         for_each_ggtt_vma(vma, obj) {
1384                 if (!drm_mm_node_allocated(&vma->node))
1385                         continue;
1386
1387                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1388         }
1389         mutex_unlock(&i915->ggtt.vm.mutex);
1390
1391         spin_lock(&i915->mm.obj_lock);
1392         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1393         list_move_tail(&obj->mm.link, list);
1394         spin_unlock(&i915->mm.obj_lock);
1395 }
1396
1397 /**
1398  * Called when user space prepares to use an object with the CPU, either
1399  * through the mmap ioctl's mapping or a GTT mapping.
1400  * @dev: drm device
1401  * @data: ioctl data blob
1402  * @file: drm file
1403  */
1404 int
1405 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1406                           struct drm_file *file)
1407 {
1408         struct drm_i915_gem_set_domain *args = data;
1409         struct drm_i915_gem_object *obj;
1410         u32 read_domains = args->read_domains;
1411         u32 write_domain = args->write_domain;
1412         int err;
1413
1414         /* Only handle setting domains to types used by the CPU. */
1415         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1416                 return -EINVAL;
1417
1418         /*
1419          * Having something in the write domain implies it's in the read
1420          * domain, and only that read domain.  Enforce that in the request.
1421          */
1422         if (write_domain && read_domains != write_domain)
1423                 return -EINVAL;
1424
1425         if (!read_domains)
1426                 return 0;
1427
1428         obj = i915_gem_object_lookup(file, args->handle);
1429         if (!obj)
1430                 return -ENOENT;
1431
1432         /*
1433          * Already in the desired write domain? Nothing for us to do!
1434          *
1435          * We apply a little bit of cunning here to catch a broader set of
1436          * no-ops. If obj->write_domain is set, we must be in the same
1437          * obj->read_domains, and only that domain. Therefore, if that
1438          * obj->write_domain matches the request read_domains, we are
1439          * already in the same read/write domain and can skip the operation,
1440          * without having to further check the requested write_domain.
1441          */
1442         if (READ_ONCE(obj->write_domain) == read_domains) {
1443                 err = 0;
1444                 goto out;
1445         }
1446
1447         /*
1448          * Try to flush the object off the GPU without holding the lock.
1449          * We will repeat the flush holding the lock in the normal manner
1450          * to catch cases where we are gazumped.
1451          */
1452         err = i915_gem_object_wait(obj,
1453                                    I915_WAIT_INTERRUPTIBLE |
1454                                    I915_WAIT_PRIORITY |
1455                                    (write_domain ? I915_WAIT_ALL : 0),
1456                                    MAX_SCHEDULE_TIMEOUT);
1457         if (err)
1458                 goto out;
1459
1460         /*
1461          * Proxy objects do not control access to the backing storage, ergo
1462          * they cannot be used as a means to manipulate the cache domain
1463          * tracking for that backing storage. The proxy object is always
1464          * considered to be outside of any cache domain.
1465          */
1466         if (i915_gem_object_is_proxy(obj)) {
1467                 err = -ENXIO;
1468                 goto out;
1469         }
1470
1471         /*
1472          * Flush and acquire obj->pages so that we are coherent through
1473          * direct access in memory with previous cached writes through
1474          * shmemfs and that our cache domain tracking remains valid.
1475          * For example, if the obj->filp was moved to swap without us
1476          * being notified and releasing the pages, we would mistakenly
1477          * continue to assume that the obj remained out of the CPU cached
1478          * domain.
1479          */
1480         err = i915_gem_object_pin_pages(obj);
1481         if (err)
1482                 goto out;
1483
1484         err = i915_mutex_lock_interruptible(dev);
1485         if (err)
1486                 goto out_unpin;
1487
1488         if (read_domains & I915_GEM_DOMAIN_WC)
1489                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1490         else if (read_domains & I915_GEM_DOMAIN_GTT)
1491                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1492         else
1493                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1494
1495         /* And bump the LRU for this access */
1496         i915_gem_object_bump_inactive_ggtt(obj);
1497
1498         mutex_unlock(&dev->struct_mutex);
1499
1500         if (write_domain != 0)
1501                 intel_fb_obj_invalidate(obj,
1502                                         fb_write_origin(obj, write_domain));
1503
1504 out_unpin:
1505         i915_gem_object_unpin_pages(obj);
1506 out:
1507         i915_gem_object_put(obj);
1508         return err;
1509 }
1510
1511 /**
1512  * Called when user space has done writes to this buffer
1513  * @dev: drm device
1514  * @data: ioctl data blob
1515  * @file: drm file
1516  */
1517 int
1518 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1519                          struct drm_file *file)
1520 {
1521         struct drm_i915_gem_sw_finish *args = data;
1522         struct drm_i915_gem_object *obj;
1523
1524         obj = i915_gem_object_lookup(file, args->handle);
1525         if (!obj)
1526                 return -ENOENT;
1527
1528         /*
1529          * Proxy objects are barred from CPU access, so there is no
1530          * need to ban sw_finish as it is a nop.
1531          */
1532
1533         /* Pinned buffers may be scanout, so flush the cache */
1534         i915_gem_object_flush_if_display(obj);
1535         i915_gem_object_put(obj);
1536
1537         return 0;
1538 }
1539
1540 static inline bool
1541 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1542               unsigned long addr, unsigned long size)
1543 {
1544         if (vma->vm_file != filp)
1545                 return false;
1546
1547         return vma->vm_start == addr &&
1548                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1549 }
1550
1551 /**
1552  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1553  *                       it is mapped to.
1554  * @dev: drm device
1555  * @data: ioctl data blob
1556  * @file: drm file
1557  *
1558  * While the mapping holds a reference on the contents of the object, it doesn't
1559  * imply a ref on the object itself.
1560  *
1561  * IMPORTANT:
1562  *
1563  * DRM driver writers who look a this function as an example for how to do GEM
1564  * mmap support, please don't implement mmap support like here. The modern way
1565  * to implement DRM mmap support is with an mmap offset ioctl (like
1566  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1567  * That way debug tooling like valgrind will understand what's going on, hiding
1568  * the mmap call in a driver private ioctl will break that. The i915 driver only
1569  * does cpu mmaps this way because we didn't know better.
1570  */
1571 int
1572 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1573                     struct drm_file *file)
1574 {
1575         struct drm_i915_gem_mmap *args = data;
1576         struct drm_i915_gem_object *obj;
1577         unsigned long addr;
1578
1579         if (args->flags & ~(I915_MMAP_WC))
1580                 return -EINVAL;
1581
1582         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1583                 return -ENODEV;
1584
1585         obj = i915_gem_object_lookup(file, args->handle);
1586         if (!obj)
1587                 return -ENOENT;
1588
1589         /* prime objects have no backing filp to GEM mmap
1590          * pages from.
1591          */
1592         if (!obj->base.filp) {
1593                 addr = -ENXIO;
1594                 goto err;
1595         }
1596
1597         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1598                 addr = -EINVAL;
1599                 goto err;
1600         }
1601
1602         addr = vm_mmap(obj->base.filp, 0, args->size,
1603                        PROT_READ | PROT_WRITE, MAP_SHARED,
1604                        args->offset);
1605         if (IS_ERR_VALUE(addr))
1606                 goto err;
1607
1608         if (args->flags & I915_MMAP_WC) {
1609                 struct mm_struct *mm = current->mm;
1610                 struct vm_area_struct *vma;
1611
1612                 if (down_write_killable(&mm->mmap_sem)) {
1613                         addr = -EINTR;
1614                         goto err;
1615                 }
1616                 vma = find_vma(mm, addr);
1617                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1618                         vma->vm_page_prot =
1619                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1620                 else
1621                         addr = -ENOMEM;
1622                 up_write(&mm->mmap_sem);
1623                 if (IS_ERR_VALUE(addr))
1624                         goto err;
1625
1626                 /* This may race, but that's ok, it only gets set */
1627                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1628         }
1629         i915_gem_object_put(obj);
1630
1631         args->addr_ptr = (u64)addr;
1632         return 0;
1633
1634 err:
1635         i915_gem_object_put(obj);
1636         return addr;
1637 }
1638
1639 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1640 {
1641         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1642 }
1643
1644 /**
1645  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1646  *
1647  * A history of the GTT mmap interface:
1648  *
1649  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1650  *     aligned and suitable for fencing, and still fit into the available
1651  *     mappable space left by the pinned display objects. A classic problem
1652  *     we called the page-fault-of-doom where we would ping-pong between
1653  *     two objects that could not fit inside the GTT and so the memcpy
1654  *     would page one object in at the expense of the other between every
1655  *     single byte.
1656  *
1657  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1658  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1659  *     object is too large for the available space (or simply too large
1660  *     for the mappable aperture!), a view is created instead and faulted
1661  *     into userspace. (This view is aligned and sized appropriately for
1662  *     fenced access.)
1663  *
1664  * 2 - Recognise WC as a separate cache domain so that we can flush the
1665  *     delayed writes via GTT before performing direct access via WC.
1666  *
1667  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1668  *     pagefault; swapin remains transparent.
1669  *
1670  * Restrictions:
1671  *
1672  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1673  *    hangs on some architectures, corruption on others. An attempt to service
1674  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1675  *
1676  *  * the object must be able to fit into RAM (physical memory, though no
1677  *    limited to the mappable aperture).
1678  *
1679  *
1680  * Caveats:
1681  *
1682  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1683  *    all data to system memory. Subsequent access will not be synchronized.
1684  *
1685  *  * all mappings are revoked on runtime device suspend.
1686  *
1687  *  * there are only 8, 16 or 32 fence registers to share between all users
1688  *    (older machines require fence register for display and blitter access
1689  *    as well). Contention of the fence registers will cause the previous users
1690  *    to be unmapped and any new access will generate new page faults.
1691  *
1692  *  * running out of memory while servicing a fault may generate a SIGBUS,
1693  *    rather than the expected SIGSEGV.
1694  */
1695 int i915_gem_mmap_gtt_version(void)
1696 {
1697         return 3;
1698 }
1699
1700 static inline struct i915_ggtt_view
1701 compute_partial_view(const struct drm_i915_gem_object *obj,
1702                      pgoff_t page_offset,
1703                      unsigned int chunk)
1704 {
1705         struct i915_ggtt_view view;
1706
1707         if (i915_gem_object_is_tiled(obj))
1708                 chunk = roundup(chunk, tile_row_pages(obj));
1709
1710         view.type = I915_GGTT_VIEW_PARTIAL;
1711         view.partial.offset = rounddown(page_offset, chunk);
1712         view.partial.size =
1713                 min_t(unsigned int, chunk,
1714                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1715
1716         /* If the partial covers the entire object, just create a normal VMA. */
1717         if (chunk >= obj->base.size >> PAGE_SHIFT)
1718                 view.type = I915_GGTT_VIEW_NORMAL;
1719
1720         return view;
1721 }
1722
1723 /**
1724  * i915_gem_fault - fault a page into the GTT
1725  * @vmf: fault info
1726  *
1727  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1728  * from userspace.  The fault handler takes care of binding the object to
1729  * the GTT (if needed), allocating and programming a fence register (again,
1730  * only if needed based on whether the old reg is still valid or the object
1731  * is tiled) and inserting a new PTE into the faulting process.
1732  *
1733  * Note that the faulting process may involve evicting existing objects
1734  * from the GTT and/or fence registers to make room.  So performance may
1735  * suffer if the GTT working set is large or there are few fence registers
1736  * left.
1737  *
1738  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1739  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1740  */
1741 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1742 {
1743 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1744         struct vm_area_struct *area = vmf->vma;
1745         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1746         struct drm_device *dev = obj->base.dev;
1747         struct drm_i915_private *dev_priv = to_i915(dev);
1748         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1749         bool write = area->vm_flags & VM_WRITE;
1750         intel_wakeref_t wakeref;
1751         struct i915_vma *vma;
1752         pgoff_t page_offset;
1753         int srcu;
1754         int ret;
1755
1756         /* Sanity check that we allow writing into this object */
1757         if (i915_gem_object_is_readonly(obj) && write)
1758                 return VM_FAULT_SIGBUS;
1759
1760         /* We don't use vmf->pgoff since that has the fake offset */
1761         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1762
1763         trace_i915_gem_object_fault(obj, page_offset, true, write);
1764
1765         ret = i915_gem_object_pin_pages(obj);
1766         if (ret)
1767                 goto err;
1768
1769         wakeref = intel_runtime_pm_get(dev_priv);
1770
1771         srcu = i915_reset_trylock(dev_priv);
1772         if (srcu < 0) {
1773                 ret = srcu;
1774                 goto err_rpm;
1775         }
1776
1777         ret = i915_mutex_lock_interruptible(dev);
1778         if (ret)
1779                 goto err_reset;
1780
1781         /* Access to snoopable pages through the GTT is incoherent. */
1782         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1783                 ret = -EFAULT;
1784                 goto err_unlock;
1785         }
1786
1787         /* Now pin it into the GTT as needed */
1788         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1789                                        PIN_MAPPABLE |
1790                                        PIN_NONBLOCK |
1791                                        PIN_NONFAULT);
1792         if (IS_ERR(vma)) {
1793                 /* Use a partial view if it is bigger than available space */
1794                 struct i915_ggtt_view view =
1795                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1796                 unsigned int flags;
1797
1798                 flags = PIN_MAPPABLE;
1799                 if (view.type == I915_GGTT_VIEW_NORMAL)
1800                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1801
1802                 /*
1803                  * Userspace is now writing through an untracked VMA, abandon
1804                  * all hope that the hardware is able to track future writes.
1805                  */
1806                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1807
1808                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1809                 if (IS_ERR(vma) && !view.type) {
1810                         flags = PIN_MAPPABLE;
1811                         view.type = I915_GGTT_VIEW_PARTIAL;
1812                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1813                 }
1814         }
1815         if (IS_ERR(vma)) {
1816                 ret = PTR_ERR(vma);
1817                 goto err_unlock;
1818         }
1819
1820         ret = i915_vma_pin_fence(vma);
1821         if (ret)
1822                 goto err_unpin;
1823
1824         /* Finally, remap it using the new GTT offset */
1825         ret = remap_io_mapping(area,
1826                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1827                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1828                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1829                                &ggtt->iomap);
1830         if (ret)
1831                 goto err_fence;
1832
1833         /* Mark as being mmapped into userspace for later revocation */
1834         assert_rpm_wakelock_held(dev_priv);
1835         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1836                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1837         GEM_BUG_ON(!obj->userfault_count);
1838
1839         i915_vma_set_ggtt_write(vma);
1840
1841 err_fence:
1842         i915_vma_unpin_fence(vma);
1843 err_unpin:
1844         __i915_vma_unpin(vma);
1845 err_unlock:
1846         mutex_unlock(&dev->struct_mutex);
1847 err_reset:
1848         i915_reset_unlock(dev_priv, srcu);
1849 err_rpm:
1850         intel_runtime_pm_put(dev_priv, wakeref);
1851         i915_gem_object_unpin_pages(obj);
1852 err:
1853         switch (ret) {
1854         case -EIO:
1855                 /*
1856                  * We eat errors when the gpu is terminally wedged to avoid
1857                  * userspace unduly crashing (gl has no provisions for mmaps to
1858                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1859                  * and so needs to be reported.
1860                  */
1861                 if (!i915_terminally_wedged(dev_priv))
1862                         return VM_FAULT_SIGBUS;
1863                 /* else: fall through */
1864         case -EAGAIN:
1865                 /*
1866                  * EAGAIN means the gpu is hung and we'll wait for the error
1867                  * handler to reset everything when re-faulting in
1868                  * i915_mutex_lock_interruptible.
1869                  */
1870         case 0:
1871         case -ERESTARTSYS:
1872         case -EINTR:
1873         case -EBUSY:
1874                 /*
1875                  * EBUSY is ok: this just means that another thread
1876                  * already did the job.
1877                  */
1878                 return VM_FAULT_NOPAGE;
1879         case -ENOMEM:
1880                 return VM_FAULT_OOM;
1881         case -ENOSPC:
1882         case -EFAULT:
1883                 return VM_FAULT_SIGBUS;
1884         default:
1885                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1886                 return VM_FAULT_SIGBUS;
1887         }
1888 }
1889
1890 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1891 {
1892         struct i915_vma *vma;
1893
1894         GEM_BUG_ON(!obj->userfault_count);
1895
1896         obj->userfault_count = 0;
1897         list_del(&obj->userfault_link);
1898         drm_vma_node_unmap(&obj->base.vma_node,
1899                            obj->base.dev->anon_inode->i_mapping);
1900
1901         for_each_ggtt_vma(vma, obj)
1902                 i915_vma_unset_userfault(vma);
1903 }
1904
1905 /**
1906  * i915_gem_release_mmap - remove physical page mappings
1907  * @obj: obj in question
1908  *
1909  * Preserve the reservation of the mmapping with the DRM core code, but
1910  * relinquish ownership of the pages back to the system.
1911  *
1912  * It is vital that we remove the page mapping if we have mapped a tiled
1913  * object through the GTT and then lose the fence register due to
1914  * resource pressure. Similarly if the object has been moved out of the
1915  * aperture, than pages mapped into userspace must be revoked. Removing the
1916  * mapping will then trigger a page fault on the next user access, allowing
1917  * fixup by i915_gem_fault().
1918  */
1919 void
1920 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1921 {
1922         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1923         intel_wakeref_t wakeref;
1924
1925         /* Serialisation between user GTT access and our code depends upon
1926          * revoking the CPU's PTE whilst the mutex is held. The next user
1927          * pagefault then has to wait until we release the mutex.
1928          *
1929          * Note that RPM complicates somewhat by adding an additional
1930          * requirement that operations to the GGTT be made holding the RPM
1931          * wakeref.
1932          */
1933         lockdep_assert_held(&i915->drm.struct_mutex);
1934         wakeref = intel_runtime_pm_get(i915);
1935
1936         if (!obj->userfault_count)
1937                 goto out;
1938
1939         __i915_gem_object_release_mmap(obj);
1940
1941         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1942          * memory transactions from userspace before we return. The TLB
1943          * flushing implied above by changing the PTE above *should* be
1944          * sufficient, an extra barrier here just provides us with a bit
1945          * of paranoid documentation about our requirement to serialise
1946          * memory writes before touching registers / GSM.
1947          */
1948         wmb();
1949
1950 out:
1951         intel_runtime_pm_put(i915, wakeref);
1952 }
1953
1954 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1955 {
1956         struct drm_i915_gem_object *obj, *on;
1957         int i;
1958
1959         /*
1960          * Only called during RPM suspend. All users of the userfault_list
1961          * must be holding an RPM wakeref to ensure that this can not
1962          * run concurrently with themselves (and use the struct_mutex for
1963          * protection between themselves).
1964          */
1965
1966         list_for_each_entry_safe(obj, on,
1967                                  &dev_priv->mm.userfault_list, userfault_link)
1968                 __i915_gem_object_release_mmap(obj);
1969
1970         /* The fence will be lost when the device powers down. If any were
1971          * in use by hardware (i.e. they are pinned), we should not be powering
1972          * down! All other fences will be reacquired by the user upon waking.
1973          */
1974         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1975                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1976
1977                 /* Ideally we want to assert that the fence register is not
1978                  * live at this point (i.e. that no piece of code will be
1979                  * trying to write through fence + GTT, as that both violates
1980                  * our tracking of activity and associated locking/barriers,
1981                  * but also is illegal given that the hw is powered down).
1982                  *
1983                  * Previously we used reg->pin_count as a "liveness" indicator.
1984                  * That is not sufficient, and we need a more fine-grained
1985                  * tool if we want to have a sanity check here.
1986                  */
1987
1988                 if (!reg->vma)
1989                         continue;
1990
1991                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1992                 reg->dirty = true;
1993         }
1994 }
1995
1996 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1997 {
1998         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1999         int err;
2000
2001         err = drm_gem_create_mmap_offset(&obj->base);
2002         if (likely(!err))
2003                 return 0;
2004
2005         /* Attempt to reap some mmap space from dead objects */
2006         do {
2007                 err = i915_gem_wait_for_idle(dev_priv,
2008                                              I915_WAIT_INTERRUPTIBLE,
2009                                              MAX_SCHEDULE_TIMEOUT);
2010                 if (err)
2011                         break;
2012
2013                 i915_gem_drain_freed_objects(dev_priv);
2014                 err = drm_gem_create_mmap_offset(&obj->base);
2015                 if (!err)
2016                         break;
2017
2018         } while (flush_delayed_work(&dev_priv->gem.retire_work));
2019
2020         return err;
2021 }
2022
2023 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2024 {
2025         drm_gem_free_mmap_offset(&obj->base);
2026 }
2027
2028 int
2029 i915_gem_mmap_gtt(struct drm_file *file,
2030                   struct drm_device *dev,
2031                   u32 handle,
2032                   u64 *offset)
2033 {
2034         struct drm_i915_gem_object *obj;
2035         int ret;
2036
2037         obj = i915_gem_object_lookup(file, handle);
2038         if (!obj)
2039                 return -ENOENT;
2040
2041         ret = i915_gem_object_create_mmap_offset(obj);
2042         if (ret == 0)
2043                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2044
2045         i915_gem_object_put(obj);
2046         return ret;
2047 }
2048
2049 /**
2050  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2051  * @dev: DRM device
2052  * @data: GTT mapping ioctl data
2053  * @file: GEM object info
2054  *
2055  * Simply returns the fake offset to userspace so it can mmap it.
2056  * The mmap call will end up in drm_gem_mmap(), which will set things
2057  * up so we can get faults in the handler above.
2058  *
2059  * The fault handler will take care of binding the object into the GTT
2060  * (since it may have been evicted to make room for something), allocating
2061  * a fence register, and mapping the appropriate aperture address into
2062  * userspace.
2063  */
2064 int
2065 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2066                         struct drm_file *file)
2067 {
2068         struct drm_i915_gem_mmap_gtt *args = data;
2069
2070         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2071 }
2072
2073 /* Immediately discard the backing storage */
2074 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2075 {
2076         i915_gem_object_free_mmap_offset(obj);
2077
2078         if (obj->base.filp == NULL)
2079                 return;
2080
2081         /* Our goal here is to return as much of the memory as
2082          * is possible back to the system as we are called from OOM.
2083          * To do this we must instruct the shmfs to drop all of its
2084          * backing pages, *now*.
2085          */
2086         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2087         obj->mm.madv = __I915_MADV_PURGED;
2088         obj->mm.pages = ERR_PTR(-EFAULT);
2089 }
2090
2091 /*
2092  * Move pages to appropriate lru and release the pagevec, decrementing the
2093  * ref count of those pages.
2094  */
2095 static void check_release_pagevec(struct pagevec *pvec)
2096 {
2097         check_move_unevictable_pages(pvec);
2098         __pagevec_release(pvec);
2099         cond_resched();
2100 }
2101
2102 static void
2103 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2104                               struct sg_table *pages)
2105 {
2106         struct sgt_iter sgt_iter;
2107         struct pagevec pvec;
2108         struct page *page;
2109
2110         __i915_gem_object_release_shmem(obj, pages, true);
2111         i915_gem_gtt_finish_pages(obj, pages);
2112
2113         if (i915_gem_object_needs_bit17_swizzle(obj))
2114                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2115
2116         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2117
2118         pagevec_init(&pvec);
2119         for_each_sgt_page(page, sgt_iter, pages) {
2120                 if (obj->mm.dirty)
2121                         set_page_dirty(page);
2122
2123                 if (obj->mm.madv == I915_MADV_WILLNEED)
2124                         mark_page_accessed(page);
2125
2126                 if (!pagevec_add(&pvec, page))
2127                         check_release_pagevec(&pvec);
2128         }
2129         if (pagevec_count(&pvec))
2130                 check_release_pagevec(&pvec);
2131         obj->mm.dirty = false;
2132
2133         sg_free_table(pages);
2134         kfree(pages);
2135 }
2136
2137 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2138 {
2139         struct radix_tree_iter iter;
2140         void __rcu **slot;
2141
2142         rcu_read_lock();
2143         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2144                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2145         rcu_read_unlock();
2146 }
2147
2148 static struct sg_table *
2149 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2150 {
2151         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2152         struct sg_table *pages;
2153
2154         pages = fetch_and_zero(&obj->mm.pages);
2155         if (IS_ERR_OR_NULL(pages))
2156                 return pages;
2157
2158         spin_lock(&i915->mm.obj_lock);
2159         list_del(&obj->mm.link);
2160         spin_unlock(&i915->mm.obj_lock);
2161
2162         if (obj->mm.mapping) {
2163                 void *ptr;
2164
2165                 ptr = page_mask_bits(obj->mm.mapping);
2166                 if (is_vmalloc_addr(ptr))
2167                         vunmap(ptr);
2168                 else
2169                         kunmap(kmap_to_page(ptr));
2170
2171                 obj->mm.mapping = NULL;
2172         }
2173
2174         __i915_gem_object_reset_page_iter(obj);
2175         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2176
2177         return pages;
2178 }
2179
2180 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2181                                 enum i915_mm_subclass subclass)
2182 {
2183         struct sg_table *pages;
2184         int ret;
2185
2186         if (i915_gem_object_has_pinned_pages(obj))
2187                 return -EBUSY;
2188
2189         GEM_BUG_ON(obj->bind_count);
2190
2191         /* May be called by shrinker from within get_pages() (on another bo) */
2192         mutex_lock_nested(&obj->mm.lock, subclass);
2193         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2194                 ret = -EBUSY;
2195                 goto unlock;
2196         }
2197
2198         /*
2199          * ->put_pages might need to allocate memory for the bit17 swizzle
2200          * array, hence protect them from being reaped by removing them from gtt
2201          * lists early.
2202          */
2203         pages = __i915_gem_object_unset_pages(obj);
2204
2205         /*
2206          * XXX Temporary hijinx to avoid updating all backends to handle
2207          * NULL pages. In the future, when we have more asynchronous
2208          * get_pages backends we should be better able to handle the
2209          * cancellation of the async task in a more uniform manner.
2210          */
2211         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2212                 pages = ERR_PTR(-EINVAL);
2213
2214         if (!IS_ERR(pages))
2215                 obj->ops->put_pages(obj, pages);
2216
2217         ret = 0;
2218 unlock:
2219         mutex_unlock(&obj->mm.lock);
2220
2221         return ret;
2222 }
2223
2224 bool i915_sg_trim(struct sg_table *orig_st)
2225 {
2226         struct sg_table new_st;
2227         struct scatterlist *sg, *new_sg;
2228         unsigned int i;
2229
2230         if (orig_st->nents == orig_st->orig_nents)
2231                 return false;
2232
2233         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2234                 return false;
2235
2236         new_sg = new_st.sgl;
2237         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2238                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2239                 sg_dma_address(new_sg) = sg_dma_address(sg);
2240                 sg_dma_len(new_sg) = sg_dma_len(sg);
2241
2242                 new_sg = sg_next(new_sg);
2243         }
2244         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2245
2246         sg_free_table(orig_st);
2247
2248         *orig_st = new_st;
2249         return true;
2250 }
2251
2252 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2253 {
2254         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2255         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2256         unsigned long i;
2257         struct address_space *mapping;
2258         struct sg_table *st;
2259         struct scatterlist *sg;
2260         struct sgt_iter sgt_iter;
2261         struct page *page;
2262         unsigned long last_pfn = 0;     /* suppress gcc warning */
2263         unsigned int max_segment = i915_sg_segment_size();
2264         unsigned int sg_page_sizes;
2265         struct pagevec pvec;
2266         gfp_t noreclaim;
2267         int ret;
2268
2269         /*
2270          * Assert that the object is not currently in any GPU domain. As it
2271          * wasn't in the GTT, there shouldn't be any way it could have been in
2272          * a GPU cache
2273          */
2274         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2275         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2276
2277         /*
2278          * If there's no chance of allocating enough pages for the whole
2279          * object, bail early.
2280          */
2281         if (page_count > totalram_pages())
2282                 return -ENOMEM;
2283
2284         st = kmalloc(sizeof(*st), GFP_KERNEL);
2285         if (st == NULL)
2286                 return -ENOMEM;
2287
2288 rebuild_st:
2289         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2290                 kfree(st);
2291                 return -ENOMEM;
2292         }
2293
2294         /*
2295          * Get the list of pages out of our struct file.  They'll be pinned
2296          * at this point until we release them.
2297          *
2298          * Fail silently without starting the shrinker
2299          */
2300         mapping = obj->base.filp->f_mapping;
2301         mapping_set_unevictable(mapping);
2302         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2303         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2304
2305         sg = st->sgl;
2306         st->nents = 0;
2307         sg_page_sizes = 0;
2308         for (i = 0; i < page_count; i++) {
2309                 const unsigned int shrink[] = {
2310                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2311                         0,
2312                 }, *s = shrink;
2313                 gfp_t gfp = noreclaim;
2314
2315                 do {
2316                         cond_resched();
2317                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2318                         if (!IS_ERR(page))
2319                                 break;
2320
2321                         if (!*s) {
2322                                 ret = PTR_ERR(page);
2323                                 goto err_sg;
2324                         }
2325
2326                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2327
2328                         /*
2329                          * We've tried hard to allocate the memory by reaping
2330                          * our own buffer, now let the real VM do its job and
2331                          * go down in flames if truly OOM.
2332                          *
2333                          * However, since graphics tend to be disposable,
2334                          * defer the oom here by reporting the ENOMEM back
2335                          * to userspace.
2336                          */
2337                         if (!*s) {
2338                                 /* reclaim and warn, but no oom */
2339                                 gfp = mapping_gfp_mask(mapping);
2340
2341                                 /*
2342                                  * Our bo are always dirty and so we require
2343                                  * kswapd to reclaim our pages (direct reclaim
2344                                  * does not effectively begin pageout of our
2345                                  * buffers on its own). However, direct reclaim
2346                                  * only waits for kswapd when under allocation
2347                                  * congestion. So as a result __GFP_RECLAIM is
2348                                  * unreliable and fails to actually reclaim our
2349                                  * dirty pages -- unless you try over and over
2350                                  * again with !__GFP_NORETRY. However, we still
2351                                  * want to fail this allocation rather than
2352                                  * trigger the out-of-memory killer and for
2353                                  * this we want __GFP_RETRY_MAYFAIL.
2354                                  */
2355                                 gfp |= __GFP_RETRY_MAYFAIL;
2356                         }
2357                 } while (1);
2358
2359                 if (!i ||
2360                     sg->length >= max_segment ||
2361                     page_to_pfn(page) != last_pfn + 1) {
2362                         if (i) {
2363                                 sg_page_sizes |= sg->length;
2364                                 sg = sg_next(sg);
2365                         }
2366                         st->nents++;
2367                         sg_set_page(sg, page, PAGE_SIZE, 0);
2368                 } else {
2369                         sg->length += PAGE_SIZE;
2370                 }
2371                 last_pfn = page_to_pfn(page);
2372
2373                 /* Check that the i965g/gm workaround works. */
2374                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2375         }
2376         if (sg) { /* loop terminated early; short sg table */
2377                 sg_page_sizes |= sg->length;
2378                 sg_mark_end(sg);
2379         }
2380
2381         /* Trim unused sg entries to avoid wasting memory. */
2382         i915_sg_trim(st);
2383
2384         ret = i915_gem_gtt_prepare_pages(obj, st);
2385         if (ret) {
2386                 /*
2387                  * DMA remapping failed? One possible cause is that
2388                  * it could not reserve enough large entries, asking
2389                  * for PAGE_SIZE chunks instead may be helpful.
2390                  */
2391                 if (max_segment > PAGE_SIZE) {
2392                         for_each_sgt_page(page, sgt_iter, st)
2393                                 put_page(page);
2394                         sg_free_table(st);
2395
2396                         max_segment = PAGE_SIZE;
2397                         goto rebuild_st;
2398                 } else {
2399                         dev_warn(&dev_priv->drm.pdev->dev,
2400                                  "Failed to DMA remap %lu pages\n",
2401                                  page_count);
2402                         goto err_pages;
2403                 }
2404         }
2405
2406         if (i915_gem_object_needs_bit17_swizzle(obj))
2407                 i915_gem_object_do_bit_17_swizzle(obj, st);
2408
2409         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2410
2411         return 0;
2412
2413 err_sg:
2414         sg_mark_end(sg);
2415 err_pages:
2416         mapping_clear_unevictable(mapping);
2417         pagevec_init(&pvec);
2418         for_each_sgt_page(page, sgt_iter, st) {
2419                 if (!pagevec_add(&pvec, page))
2420                         check_release_pagevec(&pvec);
2421         }
2422         if (pagevec_count(&pvec))
2423                 check_release_pagevec(&pvec);
2424         sg_free_table(st);
2425         kfree(st);
2426
2427         /*
2428          * shmemfs first checks if there is enough memory to allocate the page
2429          * and reports ENOSPC should there be insufficient, along with the usual
2430          * ENOMEM for a genuine allocation failure.
2431          *
2432          * We use ENOSPC in our driver to mean that we have run out of aperture
2433          * space and so want to translate the error from shmemfs back to our
2434          * usual understanding of ENOMEM.
2435          */
2436         if (ret == -ENOSPC)
2437                 ret = -ENOMEM;
2438
2439         return ret;
2440 }
2441
2442 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2443                                  struct sg_table *pages,
2444                                  unsigned int sg_page_sizes)
2445 {
2446         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2447         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2448         int i;
2449
2450         lockdep_assert_held(&obj->mm.lock);
2451
2452         /* Make the pages coherent with the GPU (flushing any swapin). */
2453         if (obj->cache_dirty) {
2454                 obj->write_domain = 0;
2455                 if (i915_gem_object_has_struct_page(obj))
2456                         drm_clflush_sg(pages);
2457                 obj->cache_dirty = false;
2458         }
2459
2460         obj->mm.get_page.sg_pos = pages->sgl;
2461         obj->mm.get_page.sg_idx = 0;
2462
2463         obj->mm.pages = pages;
2464
2465         if (i915_gem_object_is_tiled(obj) &&
2466             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2467                 GEM_BUG_ON(obj->mm.quirked);
2468                 __i915_gem_object_pin_pages(obj);
2469                 obj->mm.quirked = true;
2470         }
2471
2472         GEM_BUG_ON(!sg_page_sizes);
2473         obj->mm.page_sizes.phys = sg_page_sizes;
2474
2475         /*
2476          * Calculate the supported page-sizes which fit into the given
2477          * sg_page_sizes. This will give us the page-sizes which we may be able
2478          * to use opportunistically when later inserting into the GTT. For
2479          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2480          * 64K or 4K pages, although in practice this will depend on a number of
2481          * other factors.
2482          */
2483         obj->mm.page_sizes.sg = 0;
2484         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2485                 if (obj->mm.page_sizes.phys & ~0u << i)
2486                         obj->mm.page_sizes.sg |= BIT(i);
2487         }
2488         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2489
2490         spin_lock(&i915->mm.obj_lock);
2491         list_add(&obj->mm.link, &i915->mm.unbound_list);
2492         spin_unlock(&i915->mm.obj_lock);
2493 }
2494
2495 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2496 {
2497         int err;
2498
2499         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2500                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2501                 return -EFAULT;
2502         }
2503
2504         err = obj->ops->get_pages(obj);
2505         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2506
2507         return err;
2508 }
2509
2510 /* Ensure that the associated pages are gathered from the backing storage
2511  * and pinned into our object. i915_gem_object_pin_pages() may be called
2512  * multiple times before they are released by a single call to
2513  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2514  * either as a result of memory pressure (reaping pages under the shrinker)
2515  * or as the object is itself released.
2516  */
2517 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2518 {
2519         int err;
2520
2521         err = mutex_lock_interruptible(&obj->mm.lock);
2522         if (err)
2523                 return err;
2524
2525         if (unlikely(!i915_gem_object_has_pages(obj))) {
2526                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2527
2528                 err = ____i915_gem_object_get_pages(obj);
2529                 if (err)
2530                         goto unlock;
2531
2532                 smp_mb__before_atomic();
2533         }
2534         atomic_inc(&obj->mm.pages_pin_count);
2535
2536 unlock:
2537         mutex_unlock(&obj->mm.lock);
2538         return err;
2539 }
2540
2541 /* The 'mapping' part of i915_gem_object_pin_map() below */
2542 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2543                                  enum i915_map_type type)
2544 {
2545         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2546         struct sg_table *sgt = obj->mm.pages;
2547         struct sgt_iter sgt_iter;
2548         struct page *page;
2549         struct page *stack_pages[32];
2550         struct page **pages = stack_pages;
2551         unsigned long i = 0;
2552         pgprot_t pgprot;
2553         void *addr;
2554
2555         /* A single page can always be kmapped */
2556         if (n_pages == 1 && type == I915_MAP_WB)
2557                 return kmap(sg_page(sgt->sgl));
2558
2559         if (n_pages > ARRAY_SIZE(stack_pages)) {
2560                 /* Too big for stack -- allocate temporary array instead */
2561                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2562                 if (!pages)
2563                         return NULL;
2564         }
2565
2566         for_each_sgt_page(page, sgt_iter, sgt)
2567                 pages[i++] = page;
2568
2569         /* Check that we have the expected number of pages */
2570         GEM_BUG_ON(i != n_pages);
2571
2572         switch (type) {
2573         default:
2574                 MISSING_CASE(type);
2575                 /* fallthrough to use PAGE_KERNEL anyway */
2576         case I915_MAP_WB:
2577                 pgprot = PAGE_KERNEL;
2578                 break;
2579         case I915_MAP_WC:
2580                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2581                 break;
2582         }
2583         addr = vmap(pages, n_pages, 0, pgprot);
2584
2585         if (pages != stack_pages)
2586                 kvfree(pages);
2587
2588         return addr;
2589 }
2590
2591 /* get, pin, and map the pages of the object into kernel space */
2592 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2593                               enum i915_map_type type)
2594 {
2595         enum i915_map_type has_type;
2596         bool pinned;
2597         void *ptr;
2598         int ret;
2599
2600         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2601                 return ERR_PTR(-ENXIO);
2602
2603         ret = mutex_lock_interruptible(&obj->mm.lock);
2604         if (ret)
2605                 return ERR_PTR(ret);
2606
2607         pinned = !(type & I915_MAP_OVERRIDE);
2608         type &= ~I915_MAP_OVERRIDE;
2609
2610         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2611                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2612                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2613
2614                         ret = ____i915_gem_object_get_pages(obj);
2615                         if (ret)
2616                                 goto err_unlock;
2617
2618                         smp_mb__before_atomic();
2619                 }
2620                 atomic_inc(&obj->mm.pages_pin_count);
2621                 pinned = false;
2622         }
2623         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2624
2625         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2626         if (ptr && has_type != type) {
2627                 if (pinned) {
2628                         ret = -EBUSY;
2629                         goto err_unpin;
2630                 }
2631
2632                 if (is_vmalloc_addr(ptr))
2633                         vunmap(ptr);
2634                 else
2635                         kunmap(kmap_to_page(ptr));
2636
2637                 ptr = obj->mm.mapping = NULL;
2638         }
2639
2640         if (!ptr) {
2641                 ptr = i915_gem_object_map(obj, type);
2642                 if (!ptr) {
2643                         ret = -ENOMEM;
2644                         goto err_unpin;
2645                 }
2646
2647                 obj->mm.mapping = page_pack_bits(ptr, type);
2648         }
2649
2650 out_unlock:
2651         mutex_unlock(&obj->mm.lock);
2652         return ptr;
2653
2654 err_unpin:
2655         atomic_dec(&obj->mm.pages_pin_count);
2656 err_unlock:
2657         ptr = ERR_PTR(ret);
2658         goto out_unlock;
2659 }
2660
2661 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2662                                  unsigned long offset,
2663                                  unsigned long size)
2664 {
2665         enum i915_map_type has_type;
2666         void *ptr;
2667
2668         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2669         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2670                                      offset, size, obj->base.size));
2671
2672         obj->mm.dirty = true;
2673
2674         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2675                 return;
2676
2677         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2678         if (has_type == I915_MAP_WC)
2679                 return;
2680
2681         drm_clflush_virt_range(ptr + offset, size);
2682         if (size == obj->base.size) {
2683                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2684                 obj->cache_dirty = false;
2685         }
2686 }
2687
2688 static int
2689 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2690                            const struct drm_i915_gem_pwrite *arg)
2691 {
2692         struct address_space *mapping = obj->base.filp->f_mapping;
2693         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2694         u64 remain, offset;
2695         unsigned int pg;
2696
2697         /* Caller already validated user args */
2698         GEM_BUG_ON(!access_ok(user_data, arg->size));
2699
2700         /*
2701          * Before we instantiate/pin the backing store for our use, we
2702          * can prepopulate the shmemfs filp efficiently using a write into
2703          * the pagecache. We avoid the penalty of instantiating all the
2704          * pages, important if the user is just writing to a few and never
2705          * uses the object on the GPU, and using a direct write into shmemfs
2706          * allows it to avoid the cost of retrieving a page (either swapin
2707          * or clearing-before-use) before it is overwritten.
2708          */
2709         if (i915_gem_object_has_pages(obj))
2710                 return -ENODEV;
2711
2712         if (obj->mm.madv != I915_MADV_WILLNEED)
2713                 return -EFAULT;
2714
2715         /*
2716          * Before the pages are instantiated the object is treated as being
2717          * in the CPU domain. The pages will be clflushed as required before
2718          * use, and we can freely write into the pages directly. If userspace
2719          * races pwrite with any other operation; corruption will ensue -
2720          * that is userspace's prerogative!
2721          */
2722
2723         remain = arg->size;
2724         offset = arg->offset;
2725         pg = offset_in_page(offset);
2726
2727         do {
2728                 unsigned int len, unwritten;
2729                 struct page *page;
2730                 void *data, *vaddr;
2731                 int err;
2732                 char c;
2733
2734                 len = PAGE_SIZE - pg;
2735                 if (len > remain)
2736                         len = remain;
2737
2738                 /* Prefault the user page to reduce potential recursion */
2739                 err = __get_user(c, user_data);
2740                 if (err)
2741                         return err;
2742
2743                 err = __get_user(c, user_data + len - 1);
2744                 if (err)
2745                         return err;
2746
2747                 err = pagecache_write_begin(obj->base.filp, mapping,
2748                                             offset, len, 0,
2749                                             &page, &data);
2750                 if (err < 0)
2751                         return err;
2752
2753                 vaddr = kmap_atomic(page);
2754                 unwritten = __copy_from_user_inatomic(vaddr + pg,
2755                                                       user_data,
2756                                                       len);
2757                 kunmap_atomic(vaddr);
2758
2759                 err = pagecache_write_end(obj->base.filp, mapping,
2760                                           offset, len, len - unwritten,
2761                                           page, data);
2762                 if (err < 0)
2763                         return err;
2764
2765                 /* We don't handle -EFAULT, leave it to the caller to check */
2766                 if (unwritten)
2767                         return -ENODEV;
2768
2769                 remain -= len;
2770                 user_data += len;
2771                 offset += len;
2772                 pg = 0;
2773         } while (remain);
2774
2775         return 0;
2776 }
2777
2778 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2779 {
2780         struct drm_i915_private *i915 = to_i915(gem->dev);
2781         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2782         struct drm_i915_file_private *fpriv = file->driver_priv;
2783         struct i915_lut_handle *lut, *ln;
2784
2785         mutex_lock(&i915->drm.struct_mutex);
2786
2787         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2788                 struct i915_gem_context *ctx = lut->ctx;
2789                 struct i915_vma *vma;
2790
2791                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2792                 if (ctx->file_priv != fpriv)
2793                         continue;
2794
2795                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2796                 GEM_BUG_ON(vma->obj != obj);
2797
2798                 /* We allow the process to have multiple handles to the same
2799                  * vma, in the same fd namespace, by virtue of flink/open.
2800                  */
2801                 GEM_BUG_ON(!vma->open_count);
2802                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
2803                         i915_vma_close(vma);
2804
2805                 list_del(&lut->obj_link);
2806                 list_del(&lut->ctx_link);
2807
2808                 i915_lut_handle_free(lut);
2809                 __i915_gem_object_release_unless_active(obj);
2810         }
2811
2812         mutex_unlock(&i915->drm.struct_mutex);
2813 }
2814
2815 static unsigned long to_wait_timeout(s64 timeout_ns)
2816 {
2817         if (timeout_ns < 0)
2818                 return MAX_SCHEDULE_TIMEOUT;
2819
2820         if (timeout_ns == 0)
2821                 return 0;
2822
2823         return nsecs_to_jiffies_timeout(timeout_ns);
2824 }
2825
2826 /**
2827  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2828  * @dev: drm device pointer
2829  * @data: ioctl data blob
2830  * @file: drm file pointer
2831  *
2832  * Returns 0 if successful, else an error is returned with the remaining time in
2833  * the timeout parameter.
2834  *  -ETIME: object is still busy after timeout
2835  *  -ERESTARTSYS: signal interrupted the wait
2836  *  -ENONENT: object doesn't exist
2837  * Also possible, but rare:
2838  *  -EAGAIN: incomplete, restart syscall
2839  *  -ENOMEM: damn
2840  *  -ENODEV: Internal IRQ fail
2841  *  -E?: The add request failed
2842  *
2843  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2844  * non-zero timeout parameter the wait ioctl will wait for the given number of
2845  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2846  * without holding struct_mutex the object may become re-busied before this
2847  * function completes. A similar but shorter * race condition exists in the busy
2848  * ioctl
2849  */
2850 int
2851 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2852 {
2853         struct drm_i915_gem_wait *args = data;
2854         struct drm_i915_gem_object *obj;
2855         ktime_t start;
2856         long ret;
2857
2858         if (args->flags != 0)
2859                 return -EINVAL;
2860
2861         obj = i915_gem_object_lookup(file, args->bo_handle);
2862         if (!obj)
2863                 return -ENOENT;
2864
2865         start = ktime_get();
2866
2867         ret = i915_gem_object_wait(obj,
2868                                    I915_WAIT_INTERRUPTIBLE |
2869                                    I915_WAIT_PRIORITY |
2870                                    I915_WAIT_ALL,
2871                                    to_wait_timeout(args->timeout_ns));
2872
2873         if (args->timeout_ns > 0) {
2874                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2875                 if (args->timeout_ns < 0)
2876                         args->timeout_ns = 0;
2877
2878                 /*
2879                  * Apparently ktime isn't accurate enough and occasionally has a
2880                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
2881                  * things up to make the test happy. We allow up to 1 jiffy.
2882                  *
2883                  * This is a regression from the timespec->ktime conversion.
2884                  */
2885                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
2886                         args->timeout_ns = 0;
2887
2888                 /* Asked to wait beyond the jiffie/scheduler precision? */
2889                 if (ret == -ETIME && args->timeout_ns)
2890                         ret = -EAGAIN;
2891         }
2892
2893         i915_gem_object_put(obj);
2894         return ret;
2895 }
2896
2897 static int wait_for_engines(struct drm_i915_private *i915)
2898 {
2899         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
2900                 dev_err(i915->drm.dev,
2901                         "Failed to idle engines, declaring wedged!\n");
2902                 GEM_TRACE_DUMP();
2903                 i915_gem_set_wedged(i915);
2904                 return -EIO;
2905         }
2906
2907         return 0;
2908 }
2909
2910 static long
2911 wait_for_timelines(struct drm_i915_private *i915,
2912                    unsigned int flags, long timeout)
2913 {
2914         struct i915_gt_timelines *gt = &i915->gt.timelines;
2915         struct i915_timeline *tl;
2916
2917         mutex_lock(&gt->mutex);
2918         list_for_each_entry(tl, &gt->active_list, link) {
2919                 struct i915_request *rq;
2920
2921                 rq = i915_active_request_get_unlocked(&tl->last_request);
2922                 if (!rq)
2923                         continue;
2924
2925                 mutex_unlock(&gt->mutex);
2926
2927                 /*
2928                  * "Race-to-idle".
2929                  *
2930                  * Switching to the kernel context is often used a synchronous
2931                  * step prior to idling, e.g. in suspend for flushing all
2932                  * current operations to memory before sleeping. These we
2933                  * want to complete as quickly as possible to avoid prolonged
2934                  * stalls, so allow the gpu to boost to maximum clocks.
2935                  */
2936                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
2937                         gen6_rps_boost(rq);
2938
2939                 timeout = i915_request_wait(rq, flags, timeout);
2940                 i915_request_put(rq);
2941                 if (timeout < 0)
2942                         return timeout;
2943
2944                 /* restart after reacquiring the lock */
2945                 mutex_lock(&gt->mutex);
2946                 tl = list_entry(&gt->active_list, typeof(*tl), link);
2947         }
2948         mutex_unlock(&gt->mutex);
2949
2950         return timeout;
2951 }
2952
2953 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
2954                            unsigned int flags, long timeout)
2955 {
2956         GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
2957                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
2958                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
2959                   yesno(i915->gt.awake));
2960
2961         /* If the device is asleep, we have no requests outstanding */
2962         if (!READ_ONCE(i915->gt.awake))
2963                 return 0;
2964
2965         timeout = wait_for_timelines(i915, flags, timeout);
2966         if (timeout < 0)
2967                 return timeout;
2968
2969         if (flags & I915_WAIT_LOCKED) {
2970                 int err;
2971
2972                 lockdep_assert_held(&i915->drm.struct_mutex);
2973
2974                 err = wait_for_engines(i915);
2975                 if (err)
2976                         return err;
2977
2978                 i915_retire_requests(i915);
2979         }
2980
2981         return 0;
2982 }
2983
2984 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
2985 {
2986         /*
2987          * We manually flush the CPU domain so that we can override and
2988          * force the flush for the display, and perform it asyncrhonously.
2989          */
2990         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
2991         if (obj->cache_dirty)
2992                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
2993         obj->write_domain = 0;
2994 }
2995
2996 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
2997 {
2998         if (!READ_ONCE(obj->pin_global))
2999                 return;
3000
3001         mutex_lock(&obj->base.dev->struct_mutex);
3002         __i915_gem_object_flush_for_display(obj);
3003         mutex_unlock(&obj->base.dev->struct_mutex);
3004 }
3005
3006 /**
3007  * Moves a single object to the WC read, and possibly write domain.
3008  * @obj: object to act on
3009  * @write: ask for write access or read only
3010  *
3011  * This function returns when the move is complete, including waiting on
3012  * flushes to occur.
3013  */
3014 int
3015 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3016 {
3017         int ret;
3018
3019         lockdep_assert_held(&obj->base.dev->struct_mutex);
3020
3021         ret = i915_gem_object_wait(obj,
3022                                    I915_WAIT_INTERRUPTIBLE |
3023                                    I915_WAIT_LOCKED |
3024                                    (write ? I915_WAIT_ALL : 0),
3025                                    MAX_SCHEDULE_TIMEOUT);
3026         if (ret)
3027                 return ret;
3028
3029         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3030                 return 0;
3031
3032         /* Flush and acquire obj->pages so that we are coherent through
3033          * direct access in memory with previous cached writes through
3034          * shmemfs and that our cache domain tracking remains valid.
3035          * For example, if the obj->filp was moved to swap without us
3036          * being notified and releasing the pages, we would mistakenly
3037          * continue to assume that the obj remained out of the CPU cached
3038          * domain.
3039          */
3040         ret = i915_gem_object_pin_pages(obj);
3041         if (ret)
3042                 return ret;
3043
3044         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3045
3046         /* Serialise direct access to this object with the barriers for
3047          * coherent writes from the GPU, by effectively invalidating the
3048          * WC domain upon first access.
3049          */
3050         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3051                 mb();
3052
3053         /* It should now be out of any other write domains, and we can update
3054          * the domain values for our changes.
3055          */
3056         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3057         obj->read_domains |= I915_GEM_DOMAIN_WC;
3058         if (write) {
3059                 obj->read_domains = I915_GEM_DOMAIN_WC;
3060                 obj->write_domain = I915_GEM_DOMAIN_WC;
3061                 obj->mm.dirty = true;
3062         }
3063
3064         i915_gem_object_unpin_pages(obj);
3065         return 0;
3066 }
3067
3068 /**
3069  * Moves a single object to the GTT read, and possibly write domain.
3070  * @obj: object to act on
3071  * @write: ask for write access or read only
3072  *
3073  * This function returns when the move is complete, including waiting on
3074  * flushes to occur.
3075  */
3076 int
3077 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3078 {
3079         int ret;
3080
3081         lockdep_assert_held(&obj->base.dev->struct_mutex);
3082
3083         ret = i915_gem_object_wait(obj,
3084                                    I915_WAIT_INTERRUPTIBLE |
3085                                    I915_WAIT_LOCKED |
3086                                    (write ? I915_WAIT_ALL : 0),
3087                                    MAX_SCHEDULE_TIMEOUT);
3088         if (ret)
3089                 return ret;
3090
3091         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3092                 return 0;
3093
3094         /* Flush and acquire obj->pages so that we are coherent through
3095          * direct access in memory with previous cached writes through
3096          * shmemfs and that our cache domain tracking remains valid.
3097          * For example, if the obj->filp was moved to swap without us
3098          * being notified and releasing the pages, we would mistakenly
3099          * continue to assume that the obj remained out of the CPU cached
3100          * domain.
3101          */
3102         ret = i915_gem_object_pin_pages(obj);
3103         if (ret)
3104                 return ret;
3105
3106         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3107
3108         /* Serialise direct access to this object with the barriers for
3109          * coherent writes from the GPU, by effectively invalidating the
3110          * GTT domain upon first access.
3111          */
3112         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3113                 mb();
3114
3115         /* It should now be out of any other write domains, and we can update
3116          * the domain values for our changes.
3117          */
3118         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3119         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3120         if (write) {
3121                 obj->read_domains = I915_GEM_DOMAIN_GTT;
3122                 obj->write_domain = I915_GEM_DOMAIN_GTT;
3123                 obj->mm.dirty = true;
3124         }
3125
3126         i915_gem_object_unpin_pages(obj);
3127         return 0;
3128 }
3129
3130 /**
3131  * Changes the cache-level of an object across all VMA.
3132  * @obj: object to act on
3133  * @cache_level: new cache level to set for the object
3134  *
3135  * After this function returns, the object will be in the new cache-level
3136  * across all GTT and the contents of the backing storage will be coherent,
3137  * with respect to the new cache-level. In order to keep the backing storage
3138  * coherent for all users, we only allow a single cache level to be set
3139  * globally on the object and prevent it from being changed whilst the
3140  * hardware is reading from the object. That is if the object is currently
3141  * on the scanout it will be set to uncached (or equivalent display
3142  * cache coherency) and all non-MOCS GPU access will also be uncached so
3143  * that all direct access to the scanout remains coherent.
3144  */
3145 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3146                                     enum i915_cache_level cache_level)
3147 {
3148         struct i915_vma *vma;
3149         int ret;
3150
3151         lockdep_assert_held(&obj->base.dev->struct_mutex);
3152
3153         if (obj->cache_level == cache_level)
3154                 return 0;
3155
3156         /* Inspect the list of currently bound VMA and unbind any that would
3157          * be invalid given the new cache-level. This is principally to
3158          * catch the issue of the CS prefetch crossing page boundaries and
3159          * reading an invalid PTE on older architectures.
3160          */
3161 restart:
3162         list_for_each_entry(vma, &obj->vma.list, obj_link) {
3163                 if (!drm_mm_node_allocated(&vma->node))
3164                         continue;
3165
3166                 if (i915_vma_is_pinned(vma)) {
3167                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3168                         return -EBUSY;
3169                 }
3170
3171                 if (!i915_vma_is_closed(vma) &&
3172                     i915_gem_valid_gtt_space(vma, cache_level))
3173                         continue;
3174
3175                 ret = i915_vma_unbind(vma);
3176                 if (ret)
3177                         return ret;
3178
3179                 /* As unbinding may affect other elements in the
3180                  * obj->vma_list (due to side-effects from retiring
3181                  * an active vma), play safe and restart the iterator.
3182                  */
3183                 goto restart;
3184         }
3185
3186         /* We can reuse the existing drm_mm nodes but need to change the
3187          * cache-level on the PTE. We could simply unbind them all and
3188          * rebind with the correct cache-level on next use. However since
3189          * we already have a valid slot, dma mapping, pages etc, we may as
3190          * rewrite the PTE in the belief that doing so tramples upon less
3191          * state and so involves less work.
3192          */
3193         if (obj->bind_count) {
3194                 /* Before we change the PTE, the GPU must not be accessing it.
3195                  * If we wait upon the object, we know that all the bound
3196                  * VMA are no longer active.
3197                  */
3198                 ret = i915_gem_object_wait(obj,
3199                                            I915_WAIT_INTERRUPTIBLE |
3200                                            I915_WAIT_LOCKED |
3201                                            I915_WAIT_ALL,
3202                                            MAX_SCHEDULE_TIMEOUT);
3203                 if (ret)
3204                         return ret;
3205
3206                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3207                     cache_level != I915_CACHE_NONE) {
3208                         /* Access to snoopable pages through the GTT is
3209                          * incoherent and on some machines causes a hard
3210                          * lockup. Relinquish the CPU mmaping to force
3211                          * userspace to refault in the pages and we can
3212                          * then double check if the GTT mapping is still
3213                          * valid for that pointer access.
3214                          */
3215                         i915_gem_release_mmap(obj);
3216
3217                         /* As we no longer need a fence for GTT access,
3218                          * we can relinquish it now (and so prevent having
3219                          * to steal a fence from someone else on the next
3220                          * fence request). Note GPU activity would have
3221                          * dropped the fence as all snoopable access is
3222                          * supposed to be linear.
3223                          */
3224                         for_each_ggtt_vma(vma, obj) {
3225                                 ret = i915_vma_put_fence(vma);
3226                                 if (ret)
3227                                         return ret;
3228                         }
3229                 } else {
3230                         /* We either have incoherent backing store and
3231                          * so no GTT access or the architecture is fully
3232                          * coherent. In such cases, existing GTT mmaps
3233                          * ignore the cache bit in the PTE and we can
3234                          * rewrite it without confusing the GPU or having
3235                          * to force userspace to fault back in its mmaps.
3236                          */
3237                 }
3238
3239                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
3240                         if (!drm_mm_node_allocated(&vma->node))
3241                                 continue;
3242
3243                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3244                         if (ret)
3245                                 return ret;
3246                 }
3247         }
3248
3249         list_for_each_entry(vma, &obj->vma.list, obj_link)
3250                 vma->node.color = cache_level;
3251         i915_gem_object_set_cache_coherency(obj, cache_level);
3252         obj->cache_dirty = true; /* Always invalidate stale cachelines */
3253
3254         return 0;
3255 }
3256
3257 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3258                                struct drm_file *file)
3259 {
3260         struct drm_i915_gem_caching *args = data;
3261         struct drm_i915_gem_object *obj;
3262         int err = 0;
3263
3264         rcu_read_lock();
3265         obj = i915_gem_object_lookup_rcu(file, args->handle);
3266         if (!obj) {
3267                 err = -ENOENT;
3268                 goto out;
3269         }
3270
3271         switch (obj->cache_level) {
3272         case I915_CACHE_LLC:
3273         case I915_CACHE_L3_LLC:
3274                 args->caching = I915_CACHING_CACHED;
3275                 break;
3276
3277         case I915_CACHE_WT:
3278                 args->caching = I915_CACHING_DISPLAY;
3279                 break;
3280
3281         default:
3282                 args->caching = I915_CACHING_NONE;
3283                 break;
3284         }
3285 out:
3286         rcu_read_unlock();
3287         return err;
3288 }
3289
3290 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3291                                struct drm_file *file)
3292 {
3293         struct drm_i915_private *i915 = to_i915(dev);
3294         struct drm_i915_gem_caching *args = data;
3295         struct drm_i915_gem_object *obj;
3296         enum i915_cache_level level;
3297         int ret = 0;
3298
3299         switch (args->caching) {
3300         case I915_CACHING_NONE:
3301                 level = I915_CACHE_NONE;
3302                 break;
3303         case I915_CACHING_CACHED:
3304                 /*
3305                  * Due to a HW issue on BXT A stepping, GPU stores via a
3306                  * snooped mapping may leave stale data in a corresponding CPU
3307                  * cacheline, whereas normally such cachelines would get
3308                  * invalidated.
3309                  */
3310                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3311                         return -ENODEV;
3312
3313                 level = I915_CACHE_LLC;
3314                 break;
3315         case I915_CACHING_DISPLAY:
3316                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3317                 break;
3318         default:
3319                 return -EINVAL;
3320         }
3321
3322         obj = i915_gem_object_lookup(file, args->handle);
3323         if (!obj)
3324                 return -ENOENT;
3325
3326         /*
3327          * The caching mode of proxy object is handled by its generator, and
3328          * not allowed to be changed by userspace.
3329          */
3330         if (i915_gem_object_is_proxy(obj)) {
3331                 ret = -ENXIO;
3332                 goto out;
3333         }
3334
3335         if (obj->cache_level == level)
3336                 goto out;
3337
3338         ret = i915_gem_object_wait(obj,
3339                                    I915_WAIT_INTERRUPTIBLE,
3340                                    MAX_SCHEDULE_TIMEOUT);
3341         if (ret)
3342                 goto out;
3343
3344         ret = i915_mutex_lock_interruptible(dev);
3345         if (ret)
3346                 goto out;
3347
3348         ret = i915_gem_object_set_cache_level(obj, level);
3349         mutex_unlock(&dev->struct_mutex);
3350
3351 out:
3352         i915_gem_object_put(obj);
3353         return ret;
3354 }
3355
3356 /*
3357  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3358  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3359  * (for pageflips). We only flush the caches while preparing the buffer for
3360  * display, the callers are responsible for frontbuffer flush.
3361  */
3362 struct i915_vma *
3363 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3364                                      u32 alignment,
3365                                      const struct i915_ggtt_view *view,
3366                                      unsigned int flags)
3367 {
3368         struct i915_vma *vma;
3369         int ret;
3370
3371         lockdep_assert_held(&obj->base.dev->struct_mutex);
3372
3373         /* Mark the global pin early so that we account for the
3374          * display coherency whilst setting up the cache domains.
3375          */
3376         obj->pin_global++;
3377
3378         /* The display engine is not coherent with the LLC cache on gen6.  As
3379          * a result, we make sure that the pinning that is about to occur is
3380          * done with uncached PTEs. This is lowest common denominator for all
3381          * chipsets.
3382          *
3383          * However for gen6+, we could do better by using the GFDT bit instead
3384          * of uncaching, which would allow us to flush all the LLC-cached data
3385          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3386          */
3387         ret = i915_gem_object_set_cache_level(obj,
3388                                               HAS_WT(to_i915(obj->base.dev)) ?
3389                                               I915_CACHE_WT : I915_CACHE_NONE);
3390         if (ret) {
3391                 vma = ERR_PTR(ret);
3392                 goto err_unpin_global;
3393         }
3394
3395         /* As the user may map the buffer once pinned in the display plane
3396          * (e.g. libkms for the bootup splash), we have to ensure that we
3397          * always use map_and_fenceable for all scanout buffers. However,
3398          * it may simply be too big to fit into mappable, in which case
3399          * put it anyway and hope that userspace can cope (but always first
3400          * try to preserve the existing ABI).
3401          */
3402         vma = ERR_PTR(-ENOSPC);
3403         if ((flags & PIN_MAPPABLE) == 0 &&
3404             (!view || view->type == I915_GGTT_VIEW_NORMAL))
3405                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3406                                                flags |
3407                                                PIN_MAPPABLE |
3408                                                PIN_NONBLOCK);
3409         if (IS_ERR(vma))
3410                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3411         if (IS_ERR(vma))
3412                 goto err_unpin_global;
3413
3414         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3415
3416         __i915_gem_object_flush_for_display(obj);
3417
3418         /* It should now be out of any other write domains, and we can update
3419          * the domain values for our changes.
3420          */
3421         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3422
3423         return vma;
3424
3425 err_unpin_global:
3426         obj->pin_global--;
3427         return vma;
3428 }
3429
3430 void
3431 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3432 {
3433         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3434
3435         if (WARN_ON(vma->obj->pin_global == 0))
3436                 return;
3437
3438         if (--vma->obj->pin_global == 0)
3439                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3440
3441         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
3442         i915_gem_object_bump_inactive_ggtt(vma->obj);
3443
3444         i915_vma_unpin(vma);
3445 }
3446
3447 /**
3448  * Moves a single object to the CPU read, and possibly write domain.
3449  * @obj: object to act on
3450  * @write: requesting write or read-only access
3451  *
3452  * This function returns when the move is complete, including waiting on
3453  * flushes to occur.
3454  */
3455 int
3456 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3457 {
3458         int ret;
3459
3460         lockdep_assert_held(&obj->base.dev->struct_mutex);
3461
3462         ret = i915_gem_object_wait(obj,
3463                                    I915_WAIT_INTERRUPTIBLE |
3464                                    I915_WAIT_LOCKED |
3465                                    (write ? I915_WAIT_ALL : 0),
3466                                    MAX_SCHEDULE_TIMEOUT);
3467         if (ret)
3468                 return ret;
3469
3470         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3471
3472         /* Flush the CPU cache if it's still invalid. */
3473         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3474                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3475                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3476         }
3477
3478         /* It should now be out of any other write domains, and we can update
3479          * the domain values for our changes.
3480          */
3481         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3482
3483         /* If we're writing through the CPU, then the GPU read domains will
3484          * need to be invalidated at next use.
3485          */
3486         if (write)
3487                 __start_cpu_write(obj);
3488
3489         return 0;
3490 }
3491
3492 /* Throttle our rendering by waiting until the ring has completed our requests
3493  * emitted over 20 msec ago.
3494  *
3495  * Note that if we were to use the current jiffies each time around the loop,
3496  * we wouldn't escape the function with any frames outstanding if the time to
3497  * render a frame was over 20ms.
3498  *
3499  * This should get us reasonable parallelism between CPU and GPU but also
3500  * relatively low latency when blocking on a particular request to finish.
3501  */
3502 static int
3503 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3504 {
3505         struct drm_i915_private *dev_priv = to_i915(dev);
3506         struct drm_i915_file_private *file_priv = file->driver_priv;
3507         unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3508         struct i915_request *request, *target = NULL;
3509         long ret;
3510
3511         /* ABI: return -EIO if already wedged */
3512         ret = i915_terminally_wedged(dev_priv);
3513         if (ret)
3514                 return ret;
3515
3516         spin_lock(&file_priv->mm.lock);
3517         list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3518                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3519                         break;
3520
3521                 if (target) {
3522                         list_del(&target->client_link);
3523                         target->file_priv = NULL;
3524                 }
3525
3526                 target = request;
3527         }
3528         if (target)
3529                 i915_request_get(target);
3530         spin_unlock(&file_priv->mm.lock);
3531
3532         if (target == NULL)
3533                 return 0;
3534
3535         ret = i915_request_wait(target,
3536                                 I915_WAIT_INTERRUPTIBLE,
3537                                 MAX_SCHEDULE_TIMEOUT);
3538         i915_request_put(target);
3539
3540         return ret < 0 ? ret : 0;
3541 }
3542
3543 struct i915_vma *
3544 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3545                          const struct i915_ggtt_view *view,
3546                          u64 size,
3547                          u64 alignment,
3548                          u64 flags)
3549 {
3550         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3551         struct i915_address_space *vm = &dev_priv->ggtt.vm;
3552         struct i915_vma *vma;
3553         int ret;
3554
3555         lockdep_assert_held(&obj->base.dev->struct_mutex);
3556
3557         if (flags & PIN_MAPPABLE &&
3558             (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3559                 /* If the required space is larger than the available
3560                  * aperture, we will not able to find a slot for the
3561                  * object and unbinding the object now will be in
3562                  * vain. Worse, doing so may cause us to ping-pong
3563                  * the object in and out of the Global GTT and
3564                  * waste a lot of cycles under the mutex.
3565                  */
3566                 if (obj->base.size > dev_priv->ggtt.mappable_end)
3567                         return ERR_PTR(-E2BIG);
3568
3569                 /* If NONBLOCK is set the caller is optimistically
3570                  * trying to cache the full object within the mappable
3571                  * aperture, and *must* have a fallback in place for
3572                  * situations where we cannot bind the object. We
3573                  * can be a little more lax here and use the fallback
3574                  * more often to avoid costly migrations of ourselves
3575                  * and other objects within the aperture.
3576                  *
3577                  * Half-the-aperture is used as a simple heuristic.
3578                  * More interesting would to do search for a free
3579                  * block prior to making the commitment to unbind.
3580                  * That caters for the self-harm case, and with a
3581                  * little more heuristics (e.g. NOFAULT, NOEVICT)
3582                  * we could try to minimise harm to others.
3583                  */
3584                 if (flags & PIN_NONBLOCK &&
3585                     obj->base.size > dev_priv->ggtt.mappable_end / 2)
3586                         return ERR_PTR(-ENOSPC);
3587         }
3588
3589         vma = i915_vma_instance(obj, vm, view);
3590         if (IS_ERR(vma))
3591                 return vma;
3592
3593         if (i915_vma_misplaced(vma, size, alignment, flags)) {
3594                 if (flags & PIN_NONBLOCK) {
3595                         if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3596                                 return ERR_PTR(-ENOSPC);
3597
3598                         if (flags & PIN_MAPPABLE &&
3599                             vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3600                                 return ERR_PTR(-ENOSPC);
3601                 }
3602
3603                 WARN(i915_vma_is_pinned(vma),
3604                      "bo is already pinned in ggtt with incorrect alignment:"
3605                      " offset=%08x, req.alignment=%llx,"
3606                      " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3607                      i915_ggtt_offset(vma), alignment,
3608                      !!(flags & PIN_MAPPABLE),
3609                      i915_vma_is_map_and_fenceable(vma));
3610                 ret = i915_vma_unbind(vma);
3611                 if (ret)
3612                         return ERR_PTR(ret);
3613         }
3614
3615         ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3616         if (ret)
3617                 return ERR_PTR(ret);
3618
3619         return vma;
3620 }
3621
3622 static __always_inline u32 __busy_read_flag(u8 id)
3623 {
3624         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3625                 return 0xffff0000u;
3626
3627         GEM_BUG_ON(id >= 16);
3628         return 0x10000u << id;
3629 }
3630
3631 static __always_inline u32 __busy_write_id(u8 id)
3632 {
3633         /*
3634          * The uABI guarantees an active writer is also amongst the read
3635          * engines. This would be true if we accessed the activity tracking
3636          * under the lock, but as we perform the lookup of the object and
3637          * its activity locklessly we can not guarantee that the last_write
3638          * being active implies that we have set the same engine flag from
3639          * last_read - hence we always set both read and write busy for
3640          * last_write.
3641          */
3642         if (id == (u8)I915_ENGINE_CLASS_INVALID)
3643                 return 0xffffffffu;
3644
3645         return (id + 1) | __busy_read_flag(id);
3646 }
3647
3648 static __always_inline unsigned int
3649 __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
3650 {
3651         const struct i915_request *rq;
3652
3653         /*
3654          * We have to check the current hw status of the fence as the uABI
3655          * guarantees forward progress. We could rely on the idle worker
3656          * to eventually flush us, but to minimise latency just ask the
3657          * hardware.
3658          *
3659          * Note we only report on the status of native fences.
3660          */
3661         if (!dma_fence_is_i915(fence))
3662                 return 0;
3663
3664         /* opencode to_request() in order to avoid const warnings */
3665         rq = container_of(fence, const struct i915_request, fence);
3666         if (i915_request_completed(rq))
3667                 return 0;
3668
3669         /* Beware type-expansion follies! */
3670         BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
3671         return flag(rq->engine->uabi_class);
3672 }
3673
3674 static __always_inline unsigned int
3675 busy_check_reader(const struct dma_fence *fence)
3676 {
3677         return __busy_set_if_active(fence, __busy_read_flag);
3678 }
3679
3680 static __always_inline unsigned int
3681 busy_check_writer(const struct dma_fence *fence)
3682 {
3683         if (!fence)
3684                 return 0;
3685
3686         return __busy_set_if_active(fence, __busy_write_id);
3687 }
3688
3689 int
3690 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3691                     struct drm_file *file)
3692 {
3693         struct drm_i915_gem_busy *args = data;
3694         struct drm_i915_gem_object *obj;
3695         struct reservation_object_list *list;
3696         unsigned int seq;
3697         int err;
3698
3699         err = -ENOENT;
3700         rcu_read_lock();
3701         obj = i915_gem_object_lookup_rcu(file, args->handle);
3702         if (!obj)
3703                 goto out;
3704
3705         /*
3706          * A discrepancy here is that we do not report the status of
3707          * non-i915 fences, i.e. even though we may report the object as idle,
3708          * a call to set-domain may still stall waiting for foreign rendering.
3709          * This also means that wait-ioctl may report an object as busy,
3710          * where busy-ioctl considers it idle.
3711          *
3712          * We trade the ability to warn of foreign fences to report on which
3713          * i915 engines are active for the object.
3714          *
3715          * Alternatively, we can trade that extra information on read/write
3716          * activity with
3717          *      args->busy =
3718          *              !reservation_object_test_signaled_rcu(obj->resv, true);
3719          * to report the overall busyness. This is what the wait-ioctl does.
3720          *
3721          */
3722 retry:
3723         seq = raw_read_seqcount(&obj->resv->seq);
3724
3725         /* Translate the exclusive fence to the READ *and* WRITE engine */
3726         args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
3727
3728         /* Translate shared fences to READ set of engines */
3729         list = rcu_dereference(obj->resv->fence);
3730         if (list) {
3731                 unsigned int shared_count = list->shared_count, i;
3732
3733                 for (i = 0; i < shared_count; ++i) {
3734                         struct dma_fence *fence =
3735                                 rcu_dereference(list->shared[i]);
3736
3737                         args->busy |= busy_check_reader(fence);
3738                 }
3739         }
3740
3741         if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
3742                 goto retry;
3743
3744         err = 0;
3745 out:
3746         rcu_read_unlock();
3747         return err;
3748 }
3749
3750 int
3751 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3752                         struct drm_file *file_priv)
3753 {
3754         return i915_gem_ring_throttle(dev, file_priv);
3755 }
3756
3757 int
3758 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3759                        struct drm_file *file_priv)
3760 {
3761         struct drm_i915_private *dev_priv = to_i915(dev);
3762         struct drm_i915_gem_madvise *args = data;
3763         struct drm_i915_gem_object *obj;
3764         int err;
3765
3766         switch (args->madv) {
3767         case I915_MADV_DONTNEED:
3768         case I915_MADV_WILLNEED:
3769             break;
3770         default:
3771             return -EINVAL;
3772         }
3773
3774         obj = i915_gem_object_lookup(file_priv, args->handle);
3775         if (!obj)
3776                 return -ENOENT;
3777
3778         err = mutex_lock_interruptible(&obj->mm.lock);
3779         if (err)
3780                 goto out;
3781
3782         if (i915_gem_object_has_pages(obj) &&
3783             i915_gem_object_is_tiled(obj) &&
3784             dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3785                 if (obj->mm.madv == I915_MADV_WILLNEED) {
3786                         GEM_BUG_ON(!obj->mm.quirked);
3787                         __i915_gem_object_unpin_pages(obj);
3788                         obj->mm.quirked = false;
3789                 }
3790                 if (args->madv == I915_MADV_WILLNEED) {
3791                         GEM_BUG_ON(obj->mm.quirked);
3792                         __i915_gem_object_pin_pages(obj);
3793                         obj->mm.quirked = true;
3794                 }
3795         }
3796
3797         if (obj->mm.madv != __I915_MADV_PURGED)
3798                 obj->mm.madv = args->madv;
3799
3800         /* if the object is no longer attached, discard its backing storage */
3801         if (obj->mm.madv == I915_MADV_DONTNEED &&
3802             !i915_gem_object_has_pages(obj))
3803                 __i915_gem_object_truncate(obj);
3804
3805         args->retained = obj->mm.madv != __I915_MADV_PURGED;
3806         mutex_unlock(&obj->mm.lock);
3807
3808 out:
3809         i915_gem_object_put(obj);
3810         return err;
3811 }
3812
3813 static void
3814 frontbuffer_retire(struct i915_active_request *active,
3815                    struct i915_request *request)
3816 {
3817         struct drm_i915_gem_object *obj =
3818                 container_of(active, typeof(*obj), frontbuffer_write);
3819
3820         intel_fb_obj_flush(obj, ORIGIN_CS);
3821 }
3822
3823 void i915_gem_object_init(struct drm_i915_gem_object *obj,
3824                           const struct drm_i915_gem_object_ops *ops)
3825 {
3826         mutex_init(&obj->mm.lock);
3827
3828         spin_lock_init(&obj->vma.lock);
3829         INIT_LIST_HEAD(&obj->vma.list);
3830
3831         INIT_LIST_HEAD(&obj->lut_list);
3832         INIT_LIST_HEAD(&obj->batch_pool_link);
3833
3834         init_rcu_head(&obj->rcu);
3835
3836         obj->ops = ops;
3837
3838         reservation_object_init(&obj->__builtin_resv);
3839         obj->resv = &obj->__builtin_resv;
3840
3841         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
3842         i915_active_request_init(&obj->frontbuffer_write,
3843                                  NULL, frontbuffer_retire);
3844
3845         obj->mm.madv = I915_MADV_WILLNEED;
3846         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
3847         mutex_init(&obj->mm.get_page.lock);
3848
3849         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
3850 }
3851
3852 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3853         .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
3854                  I915_GEM_OBJECT_IS_SHRINKABLE,
3855
3856         .get_pages = i915_gem_object_get_pages_gtt,
3857         .put_pages = i915_gem_object_put_pages_gtt,
3858
3859         .pwrite = i915_gem_object_pwrite_gtt,
3860 };
3861
3862 static int i915_gem_object_create_shmem(struct drm_device *dev,
3863                                         struct drm_gem_object *obj,
3864                                         size_t size)
3865 {
3866         struct drm_i915_private *i915 = to_i915(dev);
3867         unsigned long flags = VM_NORESERVE;
3868         struct file *filp;
3869
3870         drm_gem_private_object_init(dev, obj, size);
3871
3872         if (i915->mm.gemfs)
3873                 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
3874                                                  flags);
3875         else
3876                 filp = shmem_file_setup("i915", size, flags);
3877
3878         if (IS_ERR(filp))
3879                 return PTR_ERR(filp);
3880
3881         obj->filp = filp;
3882
3883         return 0;
3884 }
3885
3886 struct drm_i915_gem_object *
3887 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
3888 {
3889         struct drm_i915_gem_object *obj;
3890         struct address_space *mapping;
3891         unsigned int cache_level;
3892         gfp_t mask;
3893         int ret;
3894
3895         /* There is a prevalence of the assumption that we fit the object's
3896          * page count inside a 32bit _signed_ variable. Let's document this and
3897          * catch if we ever need to fix it. In the meantime, if you do spot
3898          * such a local variable, please consider fixing!
3899          */
3900         if (size >> PAGE_SHIFT > INT_MAX)
3901                 return ERR_PTR(-E2BIG);
3902
3903         if (overflows_type(size, obj->base.size))
3904                 return ERR_PTR(-E2BIG);
3905
3906         obj = i915_gem_object_alloc();
3907         if (obj == NULL)
3908                 return ERR_PTR(-ENOMEM);
3909
3910         ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
3911         if (ret)
3912                 goto fail;
3913
3914         mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3915         if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
3916                 /* 965gm cannot relocate objects above 4GiB. */
3917                 mask &= ~__GFP_HIGHMEM;
3918                 mask |= __GFP_DMA32;
3919         }
3920
3921         mapping = obj->base.filp->f_mapping;
3922         mapping_set_gfp_mask(mapping, mask);
3923         GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
3924
3925         i915_gem_object_init(obj, &i915_gem_object_ops);
3926
3927         obj->write_domain = I915_GEM_DOMAIN_CPU;
3928         obj->read_domains = I915_GEM_DOMAIN_CPU;
3929
3930         if (HAS_LLC(dev_priv))
3931                 /* On some devices, we can have the GPU use the LLC (the CPU
3932                  * cache) for about a 10% performance improvement
3933                  * compared to uncached.  Graphics requests other than
3934                  * display scanout are coherent with the CPU in
3935                  * accessing this cache.  This means in this mode we
3936                  * don't need to clflush on the CPU side, and on the
3937                  * GPU side we only need to flush internal caches to
3938                  * get data visible to the CPU.
3939                  *
3940                  * However, we maintain the display planes as UC, and so
3941                  * need to rebind when first used as such.
3942                  */
3943                 cache_level = I915_CACHE_LLC;
3944         else
3945                 cache_level = I915_CACHE_NONE;
3946
3947         i915_gem_object_set_cache_coherency(obj, cache_level);
3948
3949         trace_i915_gem_object_create(obj);
3950
3951         return obj;
3952
3953 fail:
3954         i915_gem_object_free(obj);
3955         return ERR_PTR(ret);
3956 }
3957
3958 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3959 {
3960         /* If we are the last user of the backing storage (be it shmemfs
3961          * pages or stolen etc), we know that the pages are going to be
3962          * immediately released. In this case, we can then skip copying
3963          * back the contents from the GPU.
3964          */
3965
3966         if (obj->mm.madv != I915_MADV_WILLNEED)
3967                 return false;
3968
3969         if (obj->base.filp == NULL)
3970                 return true;
3971
3972         /* At first glance, this looks racy, but then again so would be
3973          * userspace racing mmap against close. However, the first external
3974          * reference to the filp can only be obtained through the
3975          * i915_gem_mmap_ioctl() which safeguards us against the user
3976          * acquiring such a reference whilst we are in the middle of
3977          * freeing the object.
3978          */
3979         return file_count(obj->base.filp) == 1;
3980 }
3981
3982 static void __i915_gem_free_objects(struct drm_i915_private *i915,
3983                                     struct llist_node *freed)
3984 {
3985         struct drm_i915_gem_object *obj, *on;
3986         intel_wakeref_t wakeref;
3987
3988         wakeref = intel_runtime_pm_get(i915);
3989         llist_for_each_entry_safe(obj, on, freed, freed) {
3990                 struct i915_vma *vma, *vn;
3991
3992                 trace_i915_gem_object_destroy(obj);
3993
3994                 mutex_lock(&i915->drm.struct_mutex);
3995
3996                 GEM_BUG_ON(i915_gem_object_is_active(obj));
3997                 list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
3998                         GEM_BUG_ON(i915_vma_is_active(vma));
3999                         vma->flags &= ~I915_VMA_PIN_MASK;
4000                         i915_vma_destroy(vma);
4001                 }
4002                 GEM_BUG_ON(!list_empty(&obj->vma.list));
4003                 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
4004
4005                 /* This serializes freeing with the shrinker. Since the free
4006                  * is delayed, first by RCU then by the workqueue, we want the
4007                  * shrinker to be able to free pages of unreferenced objects,
4008                  * or else we may oom whilst there are plenty of deferred
4009                  * freed objects.
4010                  */
4011                 if (i915_gem_object_has_pages(obj)) {
4012                         spin_lock(&i915->mm.obj_lock);
4013                         list_del_init(&obj->mm.link);
4014                         spin_unlock(&i915->mm.obj_lock);
4015                 }
4016
4017                 mutex_unlock(&i915->drm.struct_mutex);
4018
4019                 GEM_BUG_ON(obj->bind_count);
4020                 GEM_BUG_ON(obj->userfault_count);
4021                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4022                 GEM_BUG_ON(!list_empty(&obj->lut_list));
4023
4024                 if (obj->ops->release)
4025                         obj->ops->release(obj);
4026
4027                 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4028                         atomic_set(&obj->mm.pages_pin_count, 0);
4029                 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4030                 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4031
4032                 if (obj->base.import_attach)
4033                         drm_prime_gem_destroy(&obj->base, NULL);
4034
4035                 reservation_object_fini(&obj->__builtin_resv);
4036                 drm_gem_object_release(&obj->base);
4037                 i915_gem_info_remove_obj(i915, obj->base.size);
4038
4039                 bitmap_free(obj->bit_17);
4040                 i915_gem_object_free(obj);
4041
4042                 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4043                 atomic_dec(&i915->mm.free_count);
4044
4045                 if (on)
4046                         cond_resched();
4047         }
4048         intel_runtime_pm_put(i915, wakeref);
4049 }
4050
4051 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4052 {
4053         struct llist_node *freed;
4054
4055         /* Free the oldest, most stale object to keep the free_list short */
4056         freed = NULL;
4057         if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4058                 /* Only one consumer of llist_del_first() allowed */
4059                 spin_lock(&i915->mm.free_lock);
4060                 freed = llist_del_first(&i915->mm.free_list);
4061                 spin_unlock(&i915->mm.free_lock);
4062         }
4063         if (unlikely(freed)) {
4064                 freed->next = NULL;
4065                 __i915_gem_free_objects(i915, freed);
4066         }
4067 }
4068
4069 static void __i915_gem_free_work(struct work_struct *work)
4070 {
4071         struct drm_i915_private *i915 =
4072                 container_of(work, struct drm_i915_private, mm.free_work);
4073         struct llist_node *freed;
4074
4075         /*
4076          * All file-owned VMA should have been released by this point through
4077          * i915_gem_close_object(), or earlier by i915_gem_context_close().
4078          * However, the object may also be bound into the global GTT (e.g.
4079          * older GPUs without per-process support, or for direct access through
4080          * the GTT either for the user or for scanout). Those VMA still need to
4081          * unbound now.
4082          */
4083
4084         spin_lock(&i915->mm.free_lock);
4085         while ((freed = llist_del_all(&i915->mm.free_list))) {
4086                 spin_unlock(&i915->mm.free_lock);
4087
4088                 __i915_gem_free_objects(i915, freed);
4089                 if (need_resched())
4090                         return;
4091
4092                 spin_lock(&i915->mm.free_lock);
4093         }
4094         spin_unlock(&i915->mm.free_lock);
4095 }
4096
4097 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4098 {
4099         struct drm_i915_gem_object *obj =
4100                 container_of(head, typeof(*obj), rcu);
4101         struct drm_i915_private *i915 = to_i915(obj->base.dev);
4102
4103         /*
4104          * We reuse obj->rcu for the freed list, so we had better not treat
4105          * it like a rcu_head from this point forwards. And we expect all
4106          * objects to be freed via this path.
4107          */
4108         destroy_rcu_head(&obj->rcu);
4109
4110         /*
4111          * Since we require blocking on struct_mutex to unbind the freed
4112          * object from the GPU before releasing resources back to the
4113          * system, we can not do that directly from the RCU callback (which may
4114          * be a softirq context), but must instead then defer that work onto a
4115          * kthread. We use the RCU callback rather than move the freed object
4116          * directly onto the work queue so that we can mix between using the
4117          * worker and performing frees directly from subsequent allocations for
4118          * crude but effective memory throttling.
4119          */
4120         if (llist_add(&obj->freed, &i915->mm.free_list))
4121                 queue_work(i915->wq, &i915->mm.free_work);
4122 }
4123
4124 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4125 {
4126         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4127
4128         if (obj->mm.quirked)
4129                 __i915_gem_object_unpin_pages(obj);
4130
4131         if (discard_backing_storage(obj))
4132                 obj->mm.madv = I915_MADV_DONTNEED;
4133
4134         /*
4135          * Before we free the object, make sure any pure RCU-only
4136          * read-side critical sections are complete, e.g.
4137          * i915_gem_busy_ioctl(). For the corresponding synchronized
4138          * lookup see i915_gem_object_lookup_rcu().
4139          */
4140         atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4141         call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4142 }
4143
4144 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4145 {
4146         lockdep_assert_held(&obj->base.dev->struct_mutex);
4147
4148         if (!i915_gem_object_has_active_reference(obj) &&
4149             i915_gem_object_is_active(obj))
4150                 i915_gem_object_set_active_reference(obj);
4151         else
4152                 i915_gem_object_put(obj);
4153 }
4154
4155 void i915_gem_sanitize(struct drm_i915_private *i915)
4156 {
4157         intel_wakeref_t wakeref;
4158
4159         GEM_TRACE("\n");
4160
4161         wakeref = intel_runtime_pm_get(i915);
4162         intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
4163
4164         /*
4165          * As we have just resumed the machine and woken the device up from
4166          * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4167          * back to defaults, recovering from whatever wedged state we left it
4168          * in and so worth trying to use the device once more.
4169          */
4170         if (i915_terminally_wedged(i915))
4171                 i915_gem_unset_wedged(i915);
4172
4173         /*
4174          * If we inherit context state from the BIOS or earlier occupants
4175          * of the GPU, the GPU may be in an inconsistent state when we
4176          * try to take over. The only way to remove the earlier state
4177          * is by resetting. However, resetting on earlier gen is tricky as
4178          * it may impact the display and we are uncertain about the stability
4179          * of the reset, so this could be applied to even earlier gen.
4180          */
4181         intel_gt_sanitize(i915, false);
4182
4183         intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
4184         intel_runtime_pm_put(i915, wakeref);
4185
4186         mutex_lock(&i915->drm.struct_mutex);
4187         i915_gem_contexts_lost(i915);
4188         mutex_unlock(&i915->drm.struct_mutex);
4189 }
4190
4191 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4192 {
4193         if (INTEL_GEN(dev_priv) < 5 ||
4194             dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4195                 return;
4196
4197         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4198                                  DISP_TILE_SURFACE_SWIZZLING);
4199
4200         if (IS_GEN(dev_priv, 5))
4201                 return;
4202
4203         I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4204         if (IS_GEN(dev_priv, 6))
4205                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4206         else if (IS_GEN(dev_priv, 7))
4207                 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4208         else if (IS_GEN(dev_priv, 8))
4209                 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4210         else
4211                 BUG();
4212 }
4213
4214 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4215 {
4216         I915_WRITE(RING_CTL(base), 0);
4217         I915_WRITE(RING_HEAD(base), 0);
4218         I915_WRITE(RING_TAIL(base), 0);
4219         I915_WRITE(RING_START(base), 0);
4220 }
4221
4222 static void init_unused_rings(struct drm_i915_private *dev_priv)
4223 {
4224         if (IS_I830(dev_priv)) {
4225                 init_unused_ring(dev_priv, PRB1_BASE);
4226                 init_unused_ring(dev_priv, SRB0_BASE);
4227                 init_unused_ring(dev_priv, SRB1_BASE);
4228                 init_unused_ring(dev_priv, SRB2_BASE);
4229                 init_unused_ring(dev_priv, SRB3_BASE);
4230         } else if (IS_GEN(dev_priv, 2)) {
4231                 init_unused_ring(dev_priv, SRB0_BASE);
4232                 init_unused_ring(dev_priv, SRB1_BASE);
4233         } else if (IS_GEN(dev_priv, 3)) {
4234                 init_unused_ring(dev_priv, PRB1_BASE);
4235                 init_unused_ring(dev_priv, PRB2_BASE);
4236         }
4237 }
4238
4239 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4240 {
4241         int ret;
4242
4243         dev_priv->gt.last_init_time = ktime_get();
4244
4245         /* Double layer security blanket, see i915_gem_init() */
4246         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4247
4248         if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4249                 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4250
4251         if (IS_HASWELL(dev_priv))
4252                 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4253                            LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4254
4255         /* Apply the GT workarounds... */
4256         intel_gt_apply_workarounds(dev_priv);
4257         /* ...and determine whether they are sticking. */
4258         intel_gt_verify_workarounds(dev_priv, "init");
4259
4260         i915_gem_init_swizzling(dev_priv);
4261
4262         /*
4263          * At least 830 can leave some of the unused rings
4264          * "active" (ie. head != tail) after resume which
4265          * will prevent c3 entry. Makes sure all unused rings
4266          * are totally idle.
4267          */
4268         init_unused_rings(dev_priv);
4269
4270         BUG_ON(!dev_priv->kernel_context);
4271         ret = i915_terminally_wedged(dev_priv);
4272         if (ret)
4273                 goto out;
4274
4275         ret = i915_ppgtt_init_hw(dev_priv);
4276         if (ret) {
4277                 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4278                 goto out;
4279         }
4280
4281         ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4282         if (ret) {
4283                 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4284                 goto out;
4285         }
4286
4287         /* We can't enable contexts until all firmware is loaded */
4288         ret = intel_uc_init_hw(dev_priv);
4289         if (ret) {
4290                 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4291                 goto out;
4292         }
4293
4294         intel_mocs_init_l3cc_table(dev_priv);
4295
4296         /* Only when the HW is re-initialised, can we replay the requests */
4297         ret = intel_engines_resume(dev_priv);
4298         if (ret)
4299                 goto cleanup_uc;
4300
4301         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4302
4303         intel_engines_set_scheduler_caps(dev_priv);
4304         return 0;
4305
4306 cleanup_uc:
4307         intel_uc_fini_hw(dev_priv);
4308 out:
4309         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4310
4311         return ret;
4312 }
4313
4314 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4315 {
4316         struct intel_engine_cs *engine;
4317         struct i915_gem_context *ctx;
4318         struct i915_gem_engines *e;
4319         enum intel_engine_id id;
4320         int err = 0;
4321
4322         /*
4323          * As we reset the gpu during very early sanitisation, the current
4324          * register state on the GPU should reflect its defaults values.
4325          * We load a context onto the hw (with restore-inhibit), then switch
4326          * over to a second context to save that default register state. We
4327          * can then prime every new context with that state so they all start
4328          * from the same default HW values.
4329          */
4330
4331         ctx = i915_gem_context_create_kernel(i915, 0);
4332         if (IS_ERR(ctx))
4333                 return PTR_ERR(ctx);
4334
4335         e = i915_gem_context_lock_engines(ctx);
4336
4337         for_each_engine(engine, i915, id) {
4338                 struct intel_context *ce = e->engines[id];
4339                 struct i915_request *rq;
4340
4341                 rq = intel_context_create_request(ce);
4342                 if (IS_ERR(rq)) {
4343                         err = PTR_ERR(rq);
4344                         goto err_active;
4345                 }
4346
4347                 err = 0;
4348                 if (rq->engine->init_context)
4349                         err = rq->engine->init_context(rq);
4350
4351                 i915_request_add(rq);
4352                 if (err)
4353                         goto err_active;
4354         }
4355
4356         /* Flush the default context image to memory, and enable powersaving. */
4357         if (!i915_gem_load_power_context(i915)) {
4358                 err = -EIO;
4359                 goto err_active;
4360         }
4361
4362         for_each_engine(engine, i915, id) {
4363                 struct intel_context *ce = e->engines[id];
4364                 struct i915_vma *state = ce->state;
4365                 void *vaddr;
4366
4367                 if (!state)
4368                         continue;
4369
4370                 GEM_BUG_ON(intel_context_is_pinned(ce));
4371
4372                 /*
4373                  * As we will hold a reference to the logical state, it will
4374                  * not be torn down with the context, and importantly the
4375                  * object will hold onto its vma (making it possible for a
4376                  * stray GTT write to corrupt our defaults). Unmap the vma
4377                  * from the GTT to prevent such accidents and reclaim the
4378                  * space.
4379                  */
4380                 err = i915_vma_unbind(state);
4381                 if (err)
4382                         goto err_active;
4383
4384                 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4385                 if (err)
4386                         goto err_active;
4387
4388                 engine->default_state = i915_gem_object_get(state->obj);
4389                 i915_gem_object_set_cache_coherency(engine->default_state,
4390                                                     I915_CACHE_LLC);
4391
4392                 /* Check we can acquire the image of the context state */
4393                 vaddr = i915_gem_object_pin_map(engine->default_state,
4394                                                 I915_MAP_FORCE_WB);
4395                 if (IS_ERR(vaddr)) {
4396                         err = PTR_ERR(vaddr);
4397                         goto err_active;
4398                 }
4399
4400                 i915_gem_object_unpin_map(engine->default_state);
4401         }
4402
4403         if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4404                 unsigned int found = intel_engines_has_context_isolation(i915);
4405
4406                 /*
4407                  * Make sure that classes with multiple engine instances all
4408                  * share the same basic configuration.
4409                  */
4410                 for_each_engine(engine, i915, id) {
4411                         unsigned int bit = BIT(engine->uabi_class);
4412                         unsigned int expected = engine->default_state ? bit : 0;
4413
4414                         if ((found & bit) != expected) {
4415                                 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4416                                           engine->uabi_class, engine->name);
4417                         }
4418                 }
4419         }
4420
4421 out_ctx:
4422         i915_gem_context_unlock_engines(ctx);
4423         i915_gem_context_set_closed(ctx);
4424         i915_gem_context_put(ctx);
4425         return err;
4426
4427 err_active:
4428         /*
4429          * If we have to abandon now, we expect the engines to be idle
4430          * and ready to be torn-down. The quickest way we can accomplish
4431          * this is by declaring ourselves wedged.
4432          */
4433         i915_gem_set_wedged(i915);
4434         goto out_ctx;
4435 }
4436
4437 static int
4438 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4439 {
4440         struct drm_i915_gem_object *obj;
4441         struct i915_vma *vma;
4442         int ret;
4443
4444         obj = i915_gem_object_create_stolen(i915, size);
4445         if (!obj)
4446                 obj = i915_gem_object_create_internal(i915, size);
4447         if (IS_ERR(obj)) {
4448                 DRM_ERROR("Failed to allocate scratch page\n");
4449                 return PTR_ERR(obj);
4450         }
4451
4452         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4453         if (IS_ERR(vma)) {
4454                 ret = PTR_ERR(vma);
4455                 goto err_unref;
4456         }
4457
4458         ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4459         if (ret)
4460                 goto err_unref;
4461
4462         i915->gt.scratch = vma;
4463         return 0;
4464
4465 err_unref:
4466         i915_gem_object_put(obj);
4467         return ret;
4468 }
4469
4470 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4471 {
4472         i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4473 }
4474
4475 static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
4476 {
4477         struct intel_engine_cs *engine;
4478         enum intel_engine_id id;
4479         int err = 0;
4480
4481         if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
4482                 return 0;
4483
4484         for_each_engine(engine, i915, id) {
4485                 if (intel_engine_verify_workarounds(engine, "load"))
4486                         err = -EIO;
4487         }
4488
4489         return err;
4490 }
4491
4492 int i915_gem_init(struct drm_i915_private *dev_priv)
4493 {
4494         int ret;
4495
4496         /* We need to fallback to 4K pages if host doesn't support huge gtt. */
4497         if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
4498                 mkwrite_device_info(dev_priv)->page_sizes =
4499                         I915_GTT_PAGE_SIZE_4K;
4500
4501         dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
4502
4503         i915_timelines_init(dev_priv);
4504
4505         ret = i915_gem_init_userptr(dev_priv);
4506         if (ret)
4507                 return ret;
4508
4509         ret = intel_uc_init_misc(dev_priv);
4510         if (ret)
4511                 return ret;
4512
4513         ret = intel_wopcm_init(&dev_priv->wopcm);
4514         if (ret)
4515                 goto err_uc_misc;
4516
4517         /* This is just a security blanket to placate dragons.
4518          * On some systems, we very sporadically observe that the first TLBs
4519          * used by the CS may be stale, despite us poking the TLB reset. If
4520          * we hold the forcewake during initialisation these problems
4521          * just magically go away.
4522          */
4523         mutex_lock(&dev_priv->drm.struct_mutex);
4524         intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
4525
4526         ret = i915_gem_init_ggtt(dev_priv);
4527         if (ret) {
4528                 GEM_BUG_ON(ret == -EIO);
4529                 goto err_unlock;
4530         }
4531
4532         ret = i915_gem_init_scratch(dev_priv,
4533                                     IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
4534         if (ret) {
4535                 GEM_BUG_ON(ret == -EIO);
4536                 goto err_ggtt;
4537         }
4538
4539         ret = intel_engines_setup(dev_priv);
4540         if (ret) {
4541                 GEM_BUG_ON(ret == -EIO);
4542                 goto err_unlock;
4543         }
4544
4545         ret = i915_gem_contexts_init(dev_priv);
4546         if (ret) {
4547                 GEM_BUG_ON(ret == -EIO);
4548                 goto err_scratch;
4549         }
4550
4551         ret = intel_engines_init(dev_priv);
4552         if (ret) {
4553                 GEM_BUG_ON(ret == -EIO);
4554                 goto err_context;
4555         }
4556
4557         intel_init_gt_powersave(dev_priv);
4558
4559         ret = intel_uc_init(dev_priv);
4560         if (ret)
4561                 goto err_pm;
4562
4563         ret = i915_gem_init_hw(dev_priv);
4564         if (ret)
4565                 goto err_uc_init;
4566
4567         /*
4568          * Despite its name intel_init_clock_gating applies both display
4569          * clock gating workarounds; GT mmio workarounds and the occasional
4570          * GT power context workaround. Worse, sometimes it includes a context
4571          * register workaround which we need to apply before we record the
4572          * default HW state for all contexts.
4573          *
4574          * FIXME: break up the workarounds and apply them at the right time!
4575          */
4576         intel_init_clock_gating(dev_priv);
4577
4578         ret = intel_engines_verify_workarounds(dev_priv);
4579         if (ret)
4580                 goto err_init_hw;
4581
4582         ret = __intel_engines_record_defaults(dev_priv);
4583         if (ret)
4584                 goto err_init_hw;
4585
4586         if (i915_inject_load_failure()) {
4587                 ret = -ENODEV;
4588                 goto err_init_hw;
4589         }
4590
4591         if (i915_inject_load_failure()) {
4592                 ret = -EIO;
4593                 goto err_init_hw;
4594         }
4595
4596         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4597         mutex_unlock(&dev_priv->drm.struct_mutex);
4598
4599         return 0;
4600
4601         /*
4602          * Unwinding is complicated by that we want to handle -EIO to mean
4603          * disable GPU submission but keep KMS alive. We want to mark the
4604          * HW as irrevisibly wedged, but keep enough state around that the
4605          * driver doesn't explode during runtime.
4606          */
4607 err_init_hw:
4608         mutex_unlock(&dev_priv->drm.struct_mutex);
4609
4610         i915_gem_set_wedged(dev_priv);
4611         i915_gem_suspend(dev_priv);
4612         i915_gem_suspend_late(dev_priv);
4613
4614         i915_gem_drain_workqueue(dev_priv);
4615
4616         mutex_lock(&dev_priv->drm.struct_mutex);
4617         intel_uc_fini_hw(dev_priv);
4618 err_uc_init:
4619         intel_uc_fini(dev_priv);
4620 err_pm:
4621         if (ret != -EIO) {
4622                 intel_cleanup_gt_powersave(dev_priv);
4623                 intel_engines_cleanup(dev_priv);
4624         }
4625 err_context:
4626         if (ret != -EIO)
4627                 i915_gem_contexts_fini(dev_priv);
4628 err_scratch:
4629         i915_gem_fini_scratch(dev_priv);
4630 err_ggtt:
4631 err_unlock:
4632         intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
4633         mutex_unlock(&dev_priv->drm.struct_mutex);
4634
4635 err_uc_misc:
4636         intel_uc_fini_misc(dev_priv);
4637
4638         if (ret != -EIO) {
4639                 i915_gem_cleanup_userptr(dev_priv);
4640                 i915_timelines_fini(dev_priv);
4641         }
4642
4643         if (ret == -EIO) {
4644                 mutex_lock(&dev_priv->drm.struct_mutex);
4645
4646                 /*
4647                  * Allow engine initialisation to fail by marking the GPU as
4648                  * wedged. But we only want to do this where the GPU is angry,
4649                  * for all other failure, such as an allocation failure, bail.
4650                  */
4651                 if (!i915_reset_failed(dev_priv)) {
4652                         i915_load_error(dev_priv,
4653                                         "Failed to initialize GPU, declaring it wedged!\n");
4654                         i915_gem_set_wedged(dev_priv);
4655                 }
4656
4657                 /* Minimal basic recovery for KMS */
4658                 ret = i915_ggtt_enable_hw(dev_priv);
4659                 i915_gem_restore_gtt_mappings(dev_priv);
4660                 i915_gem_restore_fences(dev_priv);
4661                 intel_init_clock_gating(dev_priv);
4662
4663                 mutex_unlock(&dev_priv->drm.struct_mutex);
4664         }
4665
4666         i915_gem_drain_freed_objects(dev_priv);
4667         return ret;
4668 }
4669
4670 void i915_gem_fini(struct drm_i915_private *dev_priv)
4671 {
4672         GEM_BUG_ON(dev_priv->gt.awake);
4673
4674         i915_gem_suspend_late(dev_priv);
4675         intel_disable_gt_powersave(dev_priv);
4676
4677         /* Flush any outstanding unpin_work. */
4678         i915_gem_drain_workqueue(dev_priv);
4679
4680         mutex_lock(&dev_priv->drm.struct_mutex);
4681         intel_uc_fini_hw(dev_priv);
4682         intel_uc_fini(dev_priv);
4683         intel_engines_cleanup(dev_priv);
4684         i915_gem_contexts_fini(dev_priv);
4685         i915_gem_fini_scratch(dev_priv);
4686         mutex_unlock(&dev_priv->drm.struct_mutex);
4687
4688         intel_wa_list_free(&dev_priv->gt_wa_list);
4689
4690         intel_cleanup_gt_powersave(dev_priv);
4691
4692         intel_uc_fini_misc(dev_priv);
4693         i915_gem_cleanup_userptr(dev_priv);
4694         i915_timelines_fini(dev_priv);
4695
4696         i915_gem_drain_freed_objects(dev_priv);
4697
4698         WARN_ON(!list_empty(&dev_priv->contexts.list));
4699 }
4700
4701 void i915_gem_init_mmio(struct drm_i915_private *i915)
4702 {
4703         i915_gem_sanitize(i915);
4704 }
4705
4706 void
4707 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4708 {
4709         int i;
4710
4711         if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4712             !IS_CHERRYVIEW(dev_priv))
4713                 dev_priv->num_fence_regs = 32;
4714         else if (INTEL_GEN(dev_priv) >= 4 ||
4715                  IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
4716                  IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
4717                 dev_priv->num_fence_regs = 16;
4718         else
4719                 dev_priv->num_fence_regs = 8;
4720
4721         if (intel_vgpu_active(dev_priv))
4722                 dev_priv->num_fence_regs =
4723                                 I915_READ(vgtif_reg(avail_rs.fence_num));
4724
4725         /* Initialize fence registers to zero */
4726         for (i = 0; i < dev_priv->num_fence_regs; i++) {
4727                 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4728
4729                 fence->i915 = dev_priv;
4730                 fence->id = i;
4731                 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4732         }
4733         i915_gem_restore_fences(dev_priv);
4734
4735         i915_gem_detect_bit_6_swizzle(dev_priv);
4736 }
4737
4738 static void i915_gem_init__mm(struct drm_i915_private *i915)
4739 {
4740         spin_lock_init(&i915->mm.object_stat_lock);
4741         spin_lock_init(&i915->mm.obj_lock);
4742         spin_lock_init(&i915->mm.free_lock);
4743
4744         init_llist_head(&i915->mm.free_list);
4745
4746         INIT_LIST_HEAD(&i915->mm.unbound_list);
4747         INIT_LIST_HEAD(&i915->mm.bound_list);
4748         INIT_LIST_HEAD(&i915->mm.fence_list);
4749         INIT_LIST_HEAD(&i915->mm.userfault_list);
4750
4751         INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
4752 }
4753
4754 int i915_gem_init_early(struct drm_i915_private *dev_priv)
4755 {
4756         int err;
4757
4758         intel_gt_pm_init(dev_priv);
4759
4760         INIT_LIST_HEAD(&dev_priv->gt.active_rings);
4761         INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
4762
4763         i915_gem_init__mm(dev_priv);
4764         i915_gem_init__pm(dev_priv);
4765
4766         init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
4767         init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
4768         mutex_init(&dev_priv->gpu_error.wedge_mutex);
4769         init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4770
4771         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
4772
4773         spin_lock_init(&dev_priv->fb_tracking.lock);
4774
4775         err = i915_gemfs_init(dev_priv);
4776         if (err)
4777                 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
4778
4779         return 0;
4780 }
4781
4782 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
4783 {
4784         i915_gem_drain_freed_objects(dev_priv);
4785         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
4786         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
4787         WARN_ON(dev_priv->mm.object_count);
4788
4789         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
4790
4791         i915_gemfs_fini(dev_priv);
4792 }
4793
4794 int i915_gem_freeze(struct drm_i915_private *dev_priv)
4795 {
4796         /* Discard all purgeable objects, let userspace recover those as
4797          * required after resuming.
4798          */
4799         i915_gem_shrink_all(dev_priv);
4800
4801         return 0;
4802 }
4803
4804 int i915_gem_freeze_late(struct drm_i915_private *i915)
4805 {
4806         struct drm_i915_gem_object *obj;
4807         struct list_head *phases[] = {
4808                 &i915->mm.unbound_list,
4809                 &i915->mm.bound_list,
4810                 NULL
4811         }, **phase;
4812
4813         /*
4814          * Called just before we write the hibernation image.
4815          *
4816          * We need to update the domain tracking to reflect that the CPU
4817          * will be accessing all the pages to create and restore from the
4818          * hibernation, and so upon restoration those pages will be in the
4819          * CPU domain.
4820          *
4821          * To make sure the hibernation image contains the latest state,
4822          * we update that state just before writing out the image.
4823          *
4824          * To try and reduce the hibernation image, we manually shrink
4825          * the objects as well, see i915_gem_freeze()
4826          */
4827
4828         i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
4829         i915_gem_drain_freed_objects(i915);
4830
4831         mutex_lock(&i915->drm.struct_mutex);
4832         for (phase = phases; *phase; phase++) {
4833                 list_for_each_entry(obj, *phase, mm.link)
4834                         WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
4835         }
4836         mutex_unlock(&i915->drm.struct_mutex);
4837
4838         return 0;
4839 }
4840
4841 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4842 {
4843         struct drm_i915_file_private *file_priv = file->driver_priv;
4844         struct i915_request *request;
4845
4846         /* Clean up our request list when the client is going away, so that
4847          * later retire_requests won't dereference our soon-to-be-gone
4848          * file_priv.
4849          */
4850         spin_lock(&file_priv->mm.lock);
4851         list_for_each_entry(request, &file_priv->mm.request_list, client_link)
4852                 request->file_priv = NULL;
4853         spin_unlock(&file_priv->mm.lock);
4854 }
4855
4856 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
4857 {
4858         struct drm_i915_file_private *file_priv;
4859         int ret;
4860
4861         DRM_DEBUG("\n");
4862
4863         file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4864         if (!file_priv)
4865                 return -ENOMEM;
4866
4867         file->driver_priv = file_priv;
4868         file_priv->dev_priv = i915;
4869         file_priv->file = file;
4870
4871         spin_lock_init(&file_priv->mm.lock);
4872         INIT_LIST_HEAD(&file_priv->mm.request_list);
4873
4874         file_priv->bsd_engine = -1;
4875         file_priv->hang_timestamp = jiffies;
4876
4877         ret = i915_gem_context_open(i915, file);
4878         if (ret)
4879                 kfree(file_priv);
4880
4881         return ret;
4882 }
4883
4884 /**
4885  * i915_gem_track_fb - update frontbuffer tracking
4886  * @old: current GEM buffer for the frontbuffer slots
4887  * @new: new GEM buffer for the frontbuffer slots
4888  * @frontbuffer_bits: bitmask of frontbuffer slots
4889  *
4890  * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4891  * from @old and setting them in @new. Both @old and @new can be NULL.
4892  */
4893 void i915_gem_track_fb(struct drm_i915_gem_object *old,
4894                        struct drm_i915_gem_object *new,
4895                        unsigned frontbuffer_bits)
4896 {
4897         /* Control of individual bits within the mask are guarded by
4898          * the owning plane->mutex, i.e. we can never see concurrent
4899          * manipulation of individual bits. But since the bitfield as a whole
4900          * is updated using RMW, we need to use atomics in order to update
4901          * the bits.
4902          */
4903         BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
4904                      BITS_PER_TYPE(atomic_t));
4905
4906         if (old) {
4907                 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
4908                 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
4909         }
4910
4911         if (new) {
4912                 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
4913                 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
4914         }
4915 }
4916
4917 /* Allocate a new GEM object and fill it with the supplied data */
4918 struct drm_i915_gem_object *
4919 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
4920                                  const void *data, size_t size)
4921 {
4922         struct drm_i915_gem_object *obj;
4923         struct file *file;
4924         size_t offset;
4925         int err;
4926
4927         obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
4928         if (IS_ERR(obj))
4929                 return obj;
4930
4931         GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
4932
4933         file = obj->base.filp;
4934         offset = 0;
4935         do {
4936                 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
4937                 struct page *page;
4938                 void *pgdata, *vaddr;
4939
4940                 err = pagecache_write_begin(file, file->f_mapping,
4941                                             offset, len, 0,
4942                                             &page, &pgdata);
4943                 if (err < 0)
4944                         goto fail;
4945
4946                 vaddr = kmap(page);
4947                 memcpy(vaddr, data, len);
4948                 kunmap(page);
4949
4950                 err = pagecache_write_end(file, file->f_mapping,
4951                                           offset, len, len,
4952                                           page, pgdata);
4953                 if (err < 0)
4954                         goto fail;
4955
4956                 size -= len;
4957                 data += len;
4958                 offset += len;
4959         } while (size);
4960
4961         return obj;
4962
4963 fail:
4964         i915_gem_object_put(obj);
4965         return ERR_PTR(err);
4966 }
4967
4968 struct scatterlist *
4969 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
4970                        unsigned int n,
4971                        unsigned int *offset)
4972 {
4973         struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
4974         struct scatterlist *sg;
4975         unsigned int idx, count;
4976
4977         might_sleep();
4978         GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
4979         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
4980
4981         /* As we iterate forward through the sg, we record each entry in a
4982          * radixtree for quick repeated (backwards) lookups. If we have seen
4983          * this index previously, we will have an entry for it.
4984          *
4985          * Initial lookup is O(N), but this is amortized to O(1) for
4986          * sequential page access (where each new request is consecutive
4987          * to the previous one). Repeated lookups are O(lg(obj->base.size)),
4988          * i.e. O(1) with a large constant!
4989          */
4990         if (n < READ_ONCE(iter->sg_idx))
4991                 goto lookup;
4992
4993         mutex_lock(&iter->lock);
4994
4995         /* We prefer to reuse the last sg so that repeated lookup of this
4996          * (or the subsequent) sg are fast - comparing against the last
4997          * sg is faster than going through the radixtree.
4998          */
4999
5000         sg = iter->sg_pos;
5001         idx = iter->sg_idx;
5002         count = __sg_page_count(sg);
5003
5004         while (idx + count <= n) {
5005                 void *entry;
5006                 unsigned long i;
5007                 int ret;
5008
5009                 /* If we cannot allocate and insert this entry, or the
5010                  * individual pages from this range, cancel updating the
5011                  * sg_idx so that on this lookup we are forced to linearly
5012                  * scan onwards, but on future lookups we will try the
5013                  * insertion again (in which case we need to be careful of
5014                  * the error return reporting that we have already inserted
5015                  * this index).
5016                  */
5017                 ret = radix_tree_insert(&iter->radix, idx, sg);
5018                 if (ret && ret != -EEXIST)
5019                         goto scan;
5020
5021                 entry = xa_mk_value(idx);
5022                 for (i = 1; i < count; i++) {
5023                         ret = radix_tree_insert(&iter->radix, idx + i, entry);
5024                         if (ret && ret != -EEXIST)
5025                                 goto scan;
5026                 }
5027
5028                 idx += count;
5029                 sg = ____sg_next(sg);
5030                 count = __sg_page_count(sg);
5031         }
5032
5033 scan:
5034         iter->sg_pos = sg;
5035         iter->sg_idx = idx;
5036
5037         mutex_unlock(&iter->lock);
5038
5039         if (unlikely(n < idx)) /* insertion completed by another thread */
5040                 goto lookup;
5041
5042         /* In case we failed to insert the entry into the radixtree, we need
5043          * to look beyond the current sg.
5044          */
5045         while (idx + count <= n) {
5046                 idx += count;
5047                 sg = ____sg_next(sg);
5048                 count = __sg_page_count(sg);
5049         }
5050
5051         *offset = n - idx;
5052         return sg;
5053
5054 lookup:
5055         rcu_read_lock();
5056
5057         sg = radix_tree_lookup(&iter->radix, n);
5058         GEM_BUG_ON(!sg);
5059
5060         /* If this index is in the middle of multi-page sg entry,
5061          * the radix tree will contain a value entry that points
5062          * to the start of that range. We will return the pointer to
5063          * the base page and the offset of this page within the
5064          * sg entry's range.
5065          */
5066         *offset = 0;
5067         if (unlikely(xa_is_value(sg))) {
5068                 unsigned long base = xa_to_value(sg);
5069
5070                 sg = radix_tree_lookup(&iter->radix, base);
5071                 GEM_BUG_ON(!sg);
5072
5073                 *offset = n - base;
5074         }
5075
5076         rcu_read_unlock();
5077
5078         return sg;
5079 }
5080
5081 struct page *
5082 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5083 {
5084         struct scatterlist *sg;
5085         unsigned int offset;
5086
5087         GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5088
5089         sg = i915_gem_object_get_sg(obj, n, &offset);
5090         return nth_page(sg_page(sg), offset);
5091 }
5092
5093 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5094 struct page *
5095 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5096                                unsigned int n)
5097 {
5098         struct page *page;
5099
5100         page = i915_gem_object_get_page(obj, n);
5101         if (!obj->mm.dirty)
5102                 set_page_dirty(page);
5103
5104         return page;
5105 }
5106
5107 dma_addr_t
5108 i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
5109                                     unsigned long n,
5110                                     unsigned int *len)
5111 {
5112         struct scatterlist *sg;
5113         unsigned int offset;
5114
5115         sg = i915_gem_object_get_sg(obj, n, &offset);
5116
5117         if (len)
5118                 *len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
5119
5120         return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5121 }
5122
5123 dma_addr_t
5124 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5125                                 unsigned long n)
5126 {
5127         return i915_gem_object_get_dma_address_len(obj, n, NULL);
5128 }
5129
5130
5131 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5132 {
5133         struct sg_table *pages;
5134         int err;
5135
5136         if (align > obj->base.size)
5137                 return -EINVAL;
5138
5139         if (obj->ops == &i915_gem_phys_ops)
5140                 return 0;
5141
5142         if (obj->ops != &i915_gem_object_ops)
5143                 return -EINVAL;
5144
5145         err = i915_gem_object_unbind(obj);
5146         if (err)
5147                 return err;
5148
5149         mutex_lock(&obj->mm.lock);
5150
5151         if (obj->mm.madv != I915_MADV_WILLNEED) {
5152                 err = -EFAULT;
5153                 goto err_unlock;
5154         }
5155
5156         if (obj->mm.quirked) {
5157                 err = -EFAULT;
5158                 goto err_unlock;
5159         }
5160
5161         if (obj->mm.mapping) {
5162                 err = -EBUSY;
5163                 goto err_unlock;
5164         }
5165
5166         pages = __i915_gem_object_unset_pages(obj);
5167
5168         obj->ops = &i915_gem_phys_ops;
5169
5170         err = ____i915_gem_object_get_pages(obj);
5171         if (err)
5172                 goto err_xfer;
5173
5174         /* Perma-pin (until release) the physical set of pages */
5175         __i915_gem_object_pin_pages(obj);
5176
5177         if (!IS_ERR_OR_NULL(pages))
5178                 i915_gem_object_ops.put_pages(obj, pages);
5179         mutex_unlock(&obj->mm.lock);
5180         return 0;
5181
5182 err_xfer:
5183         obj->ops = &i915_gem_object_ops;
5184         if (!IS_ERR_OR_NULL(pages)) {
5185                 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5186
5187                 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5188         }
5189 err_unlock:
5190         mutex_unlock(&obj->mm.lock);
5191         return err;
5192 }
5193
5194 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5195 #include "selftests/scatterlist.c"
5196 #include "selftests/mock_gem_device.c"
5197 #include "selftests/huge_gem_object.c"
5198 #include "selftests/huge_pages.c"
5199 #include "selftests/i915_gem_object.c"
5200 #include "selftests/i915_gem_coherency.c"
5201 #include "selftests/i915_gem.c"
5202 #endif