Merge tag 'drm-intel-next-2019-03-20' of git://anongit.freedesktop.org/drm/drm-intel...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41
42 #include "i915_drv.h"
43 #include "i915_gem_clflush.h"
44 #include "i915_gemfs.h"
45 #include "i915_globals.h"
46 #include "i915_reset.h"
47 #include "i915_trace.h"
48 #include "i915_vgpu.h"
49
50 #include "intel_drv.h"
51 #include "intel_frontbuffer.h"
52 #include "intel_mocs.h"
53 #include "intel_workarounds.h"
54
55 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
56
57 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
58 {
59         if (obj->cache_dirty)
60                 return false;
61
62         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
63                 return true;
64
65         return obj->pin_global; /* currently in use by HW, keep flushed */
66 }
67
68 static int
69 insert_mappable_node(struct i915_ggtt *ggtt,
70                      struct drm_mm_node *node, u32 size)
71 {
72         memset(node, 0, sizeof(*node));
73         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
74                                            size, 0, I915_COLOR_UNEVICTABLE,
75                                            0, ggtt->mappable_end,
76                                            DRM_MM_INSERT_LOW);
77 }
78
79 static void
80 remove_mappable_node(struct drm_mm_node *node)
81 {
82         drm_mm_remove_node(node);
83 }
84
85 /* some bookkeeping */
86 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
87                                   u64 size)
88 {
89         spin_lock(&dev_priv->mm.object_stat_lock);
90         dev_priv->mm.object_count++;
91         dev_priv->mm.object_memory += size;
92         spin_unlock(&dev_priv->mm.object_stat_lock);
93 }
94
95 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
96                                      u64 size)
97 {
98         spin_lock(&dev_priv->mm.object_stat_lock);
99         dev_priv->mm.object_count--;
100         dev_priv->mm.object_memory -= size;
101         spin_unlock(&dev_priv->mm.object_stat_lock);
102 }
103
104 static void __i915_gem_park(struct drm_i915_private *i915)
105 {
106         intel_wakeref_t wakeref;
107
108         GEM_TRACE("\n");
109
110         lockdep_assert_held(&i915->drm.struct_mutex);
111         GEM_BUG_ON(i915->gt.active_requests);
112         GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
113
114         if (!i915->gt.awake)
115                 return;
116
117         /*
118          * Be paranoid and flush a concurrent interrupt to make sure
119          * we don't reactivate any irq tasklets after parking.
120          *
121          * FIXME: Note that even though we have waited for execlists to be idle,
122          * there may still be an in-flight interrupt even though the CSB
123          * is now empty. synchronize_irq() makes sure that a residual interrupt
124          * is completed before we continue, but it doesn't prevent the HW from
125          * raising a spurious interrupt later. To complete the shield we should
126          * coordinate disabling the CS irq with flushing the interrupts.
127          */
128         synchronize_irq(i915->drm.irq);
129
130         intel_engines_park(i915);
131         i915_timelines_park(i915);
132
133         i915_pmu_gt_parked(i915);
134         i915_vma_parked(i915);
135
136         wakeref = fetch_and_zero(&i915->gt.awake);
137         GEM_BUG_ON(!wakeref);
138
139         if (INTEL_GEN(i915) >= 6)
140                 gen6_rps_idle(i915);
141
142         intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
143
144         i915_globals_park();
145 }
146
147 void i915_gem_park(struct drm_i915_private *i915)
148 {
149         GEM_TRACE("\n");
150
151         lockdep_assert_held(&i915->drm.struct_mutex);
152         GEM_BUG_ON(i915->gt.active_requests);
153
154         if (!i915->gt.awake)
155                 return;
156
157         /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
158         mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
159 }
160
161 void i915_gem_unpark(struct drm_i915_private *i915)
162 {
163         GEM_TRACE("\n");
164
165         lockdep_assert_held(&i915->drm.struct_mutex);
166         GEM_BUG_ON(!i915->gt.active_requests);
167         assert_rpm_wakelock_held(i915);
168
169         if (i915->gt.awake)
170                 return;
171
172         /*
173          * It seems that the DMC likes to transition between the DC states a lot
174          * when there are no connected displays (no active power domains) during
175          * command submission.
176          *
177          * This activity has negative impact on the performance of the chip with
178          * huge latencies observed in the interrupt handler and elsewhere.
179          *
180          * Work around it by grabbing a GT IRQ power domain whilst there is any
181          * GT activity, preventing any DC state transitions.
182          */
183         i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
184         GEM_BUG_ON(!i915->gt.awake);
185
186         i915_globals_unpark();
187
188         intel_enable_gt_powersave(i915);
189         i915_update_gfx_val(i915);
190         if (INTEL_GEN(i915) >= 6)
191                 gen6_rps_busy(i915);
192         i915_pmu_gt_unparked(i915);
193
194         intel_engines_unpark(i915);
195
196         i915_queue_hangcheck(i915);
197
198         queue_delayed_work(i915->wq,
199                            &i915->gt.retire_work,
200                            round_jiffies_up_relative(HZ));
201 }
202
203 int
204 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
205                             struct drm_file *file)
206 {
207         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
208         struct drm_i915_gem_get_aperture *args = data;
209         struct i915_vma *vma;
210         u64 pinned;
211
212         mutex_lock(&ggtt->vm.mutex);
213
214         pinned = ggtt->vm.reserved;
215         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
216                 if (i915_vma_is_pinned(vma))
217                         pinned += vma->node.size;
218
219         mutex_unlock(&ggtt->vm.mutex);
220
221         args->aper_size = ggtt->vm.total;
222         args->aper_available_size = args->aper_size - pinned;
223
224         return 0;
225 }
226
227 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
228 {
229         struct address_space *mapping = obj->base.filp->f_mapping;
230         drm_dma_handle_t *phys;
231         struct sg_table *st;
232         struct scatterlist *sg;
233         char *vaddr;
234         int i;
235         int err;
236
237         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
238                 return -EINVAL;
239
240         /* Always aligning to the object size, allows a single allocation
241          * to handle all possible callers, and given typical object sizes,
242          * the alignment of the buddy allocation will naturally match.
243          */
244         phys = drm_pci_alloc(obj->base.dev,
245                              roundup_pow_of_two(obj->base.size),
246                              roundup_pow_of_two(obj->base.size));
247         if (!phys)
248                 return -ENOMEM;
249
250         vaddr = phys->vaddr;
251         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
252                 struct page *page;
253                 char *src;
254
255                 page = shmem_read_mapping_page(mapping, i);
256                 if (IS_ERR(page)) {
257                         err = PTR_ERR(page);
258                         goto err_phys;
259                 }
260
261                 src = kmap_atomic(page);
262                 memcpy(vaddr, src, PAGE_SIZE);
263                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
264                 kunmap_atomic(src);
265
266                 put_page(page);
267                 vaddr += PAGE_SIZE;
268         }
269
270         i915_gem_chipset_flush(to_i915(obj->base.dev));
271
272         st = kmalloc(sizeof(*st), GFP_KERNEL);
273         if (!st) {
274                 err = -ENOMEM;
275                 goto err_phys;
276         }
277
278         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
279                 kfree(st);
280                 err = -ENOMEM;
281                 goto err_phys;
282         }
283
284         sg = st->sgl;
285         sg->offset = 0;
286         sg->length = obj->base.size;
287
288         sg_dma_address(sg) = phys->busaddr;
289         sg_dma_len(sg) = obj->base.size;
290
291         obj->phys_handle = phys;
292
293         __i915_gem_object_set_pages(obj, st, sg->length);
294
295         return 0;
296
297 err_phys:
298         drm_pci_free(obj->base.dev, phys);
299
300         return err;
301 }
302
303 static void __start_cpu_write(struct drm_i915_gem_object *obj)
304 {
305         obj->read_domains = I915_GEM_DOMAIN_CPU;
306         obj->write_domain = I915_GEM_DOMAIN_CPU;
307         if (cpu_write_needs_clflush(obj))
308                 obj->cache_dirty = true;
309 }
310
311 static void
312 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
313                                 struct sg_table *pages,
314                                 bool needs_clflush)
315 {
316         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
317
318         if (obj->mm.madv == I915_MADV_DONTNEED)
319                 obj->mm.dirty = false;
320
321         if (needs_clflush &&
322             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
323             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
324                 drm_clflush_sg(pages);
325
326         __start_cpu_write(obj);
327 }
328
329 static void
330 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
331                                struct sg_table *pages)
332 {
333         __i915_gem_object_release_shmem(obj, pages, false);
334
335         if (obj->mm.dirty) {
336                 struct address_space *mapping = obj->base.filp->f_mapping;
337                 char *vaddr = obj->phys_handle->vaddr;
338                 int i;
339
340                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
341                         struct page *page;
342                         char *dst;
343
344                         page = shmem_read_mapping_page(mapping, i);
345                         if (IS_ERR(page))
346                                 continue;
347
348                         dst = kmap_atomic(page);
349                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
350                         memcpy(dst, vaddr, PAGE_SIZE);
351                         kunmap_atomic(dst);
352
353                         set_page_dirty(page);
354                         if (obj->mm.madv == I915_MADV_WILLNEED)
355                                 mark_page_accessed(page);
356                         put_page(page);
357                         vaddr += PAGE_SIZE;
358                 }
359                 obj->mm.dirty = false;
360         }
361
362         sg_free_table(pages);
363         kfree(pages);
364
365         drm_pci_free(obj->base.dev, obj->phys_handle);
366 }
367
368 static void
369 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
370 {
371         i915_gem_object_unpin_pages(obj);
372 }
373
374 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
375         .get_pages = i915_gem_object_get_pages_phys,
376         .put_pages = i915_gem_object_put_pages_phys,
377         .release = i915_gem_object_release_phys,
378 };
379
380 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
381
382 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
383 {
384         struct i915_vma *vma;
385         LIST_HEAD(still_in_list);
386         int ret;
387
388         lockdep_assert_held(&obj->base.dev->struct_mutex);
389
390         /* Closed vma are removed from the obj->vma_list - but they may
391          * still have an active binding on the object. To remove those we
392          * must wait for all rendering to complete to the object (as unbinding
393          * must anyway), and retire the requests.
394          */
395         ret = i915_gem_object_set_to_cpu_domain(obj, false);
396         if (ret)
397                 return ret;
398
399         spin_lock(&obj->vma.lock);
400         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
401                                                        struct i915_vma,
402                                                        obj_link))) {
403                 list_move_tail(&vma->obj_link, &still_in_list);
404                 spin_unlock(&obj->vma.lock);
405
406                 ret = i915_vma_unbind(vma);
407
408                 spin_lock(&obj->vma.lock);
409         }
410         list_splice(&still_in_list, &obj->vma.list);
411         spin_unlock(&obj->vma.lock);
412
413         return ret;
414 }
415
416 static long
417 i915_gem_object_wait_fence(struct dma_fence *fence,
418                            unsigned int flags,
419                            long timeout)
420 {
421         struct i915_request *rq;
422
423         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
424
425         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
426                 return timeout;
427
428         if (!dma_fence_is_i915(fence))
429                 return dma_fence_wait_timeout(fence,
430                                               flags & I915_WAIT_INTERRUPTIBLE,
431                                               timeout);
432
433         rq = to_request(fence);
434         if (i915_request_completed(rq))
435                 goto out;
436
437         timeout = i915_request_wait(rq, flags, timeout);
438
439 out:
440         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
441                 i915_request_retire_upto(rq);
442
443         return timeout;
444 }
445
446 static long
447 i915_gem_object_wait_reservation(struct reservation_object *resv,
448                                  unsigned int flags,
449                                  long timeout)
450 {
451         unsigned int seq = __read_seqcount_begin(&resv->seq);
452         struct dma_fence *excl;
453         bool prune_fences = false;
454
455         if (flags & I915_WAIT_ALL) {
456                 struct dma_fence **shared;
457                 unsigned int count, i;
458                 int ret;
459
460                 ret = reservation_object_get_fences_rcu(resv,
461                                                         &excl, &count, &shared);
462                 if (ret)
463                         return ret;
464
465                 for (i = 0; i < count; i++) {
466                         timeout = i915_gem_object_wait_fence(shared[i],
467                                                              flags, timeout);
468                         if (timeout < 0)
469                                 break;
470
471                         dma_fence_put(shared[i]);
472                 }
473
474                 for (; i < count; i++)
475                         dma_fence_put(shared[i]);
476                 kfree(shared);
477
478                 /*
479                  * If both shared fences and an exclusive fence exist,
480                  * then by construction the shared fences must be later
481                  * than the exclusive fence. If we successfully wait for
482                  * all the shared fences, we know that the exclusive fence
483                  * must all be signaled. If all the shared fences are
484                  * signaled, we can prune the array and recover the
485                  * floating references on the fences/requests.
486                  */
487                 prune_fences = count && timeout >= 0;
488         } else {
489                 excl = reservation_object_get_excl_rcu(resv);
490         }
491
492         if (excl && timeout >= 0)
493                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
494
495         dma_fence_put(excl);
496
497         /*
498          * Opportunistically prune the fences iff we know they have *all* been
499          * signaled and that the reservation object has not been changed (i.e.
500          * no new fences have been added).
501          */
502         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
503                 if (reservation_object_trylock(resv)) {
504                         if (!__read_seqcount_retry(&resv->seq, seq))
505                                 reservation_object_add_excl_fence(resv, NULL);
506                         reservation_object_unlock(resv);
507                 }
508         }
509
510         return timeout;
511 }
512
513 static void __fence_set_priority(struct dma_fence *fence,
514                                  const struct i915_sched_attr *attr)
515 {
516         struct i915_request *rq;
517         struct intel_engine_cs *engine;
518
519         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
520                 return;
521
522         rq = to_request(fence);
523         engine = rq->engine;
524
525         local_bh_disable();
526         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
527         if (engine->schedule)
528                 engine->schedule(rq, attr);
529         rcu_read_unlock();
530         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
531 }
532
533 static void fence_set_priority(struct dma_fence *fence,
534                                const struct i915_sched_attr *attr)
535 {
536         /* Recurse once into a fence-array */
537         if (dma_fence_is_array(fence)) {
538                 struct dma_fence_array *array = to_dma_fence_array(fence);
539                 int i;
540
541                 for (i = 0; i < array->num_fences; i++)
542                         __fence_set_priority(array->fences[i], attr);
543         } else {
544                 __fence_set_priority(fence, attr);
545         }
546 }
547
548 int
549 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
550                               unsigned int flags,
551                               const struct i915_sched_attr *attr)
552 {
553         struct dma_fence *excl;
554
555         if (flags & I915_WAIT_ALL) {
556                 struct dma_fence **shared;
557                 unsigned int count, i;
558                 int ret;
559
560                 ret = reservation_object_get_fences_rcu(obj->resv,
561                                                         &excl, &count, &shared);
562                 if (ret)
563                         return ret;
564
565                 for (i = 0; i < count; i++) {
566                         fence_set_priority(shared[i], attr);
567                         dma_fence_put(shared[i]);
568                 }
569
570                 kfree(shared);
571         } else {
572                 excl = reservation_object_get_excl_rcu(obj->resv);
573         }
574
575         if (excl) {
576                 fence_set_priority(excl, attr);
577                 dma_fence_put(excl);
578         }
579         return 0;
580 }
581
582 /**
583  * Waits for rendering to the object to be completed
584  * @obj: i915 gem object
585  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
586  * @timeout: how long to wait
587  */
588 int
589 i915_gem_object_wait(struct drm_i915_gem_object *obj,
590                      unsigned int flags,
591                      long timeout)
592 {
593         might_sleep();
594         GEM_BUG_ON(timeout < 0);
595
596         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
597         return timeout < 0 ? timeout : 0;
598 }
599
600 static int
601 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
602                      struct drm_i915_gem_pwrite *args,
603                      struct drm_file *file)
604 {
605         void *vaddr = obj->phys_handle->vaddr + args->offset;
606         char __user *user_data = u64_to_user_ptr(args->data_ptr);
607
608         /* We manually control the domain here and pretend that it
609          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
610          */
611         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
612         if (copy_from_user(vaddr, user_data, args->size))
613                 return -EFAULT;
614
615         drm_clflush_virt_range(vaddr, args->size);
616         i915_gem_chipset_flush(to_i915(obj->base.dev));
617
618         intel_fb_obj_flush(obj, ORIGIN_CPU);
619         return 0;
620 }
621
622 static int
623 i915_gem_create(struct drm_file *file,
624                 struct drm_i915_private *dev_priv,
625                 u64 size,
626                 u32 *handle_p)
627 {
628         struct drm_i915_gem_object *obj;
629         int ret;
630         u32 handle;
631
632         size = roundup(size, PAGE_SIZE);
633         if (size == 0)
634                 return -EINVAL;
635
636         /* Allocate the new object */
637         obj = i915_gem_object_create(dev_priv, size);
638         if (IS_ERR(obj))
639                 return PTR_ERR(obj);
640
641         ret = drm_gem_handle_create(file, &obj->base, &handle);
642         /* drop reference from allocate - handle holds it now */
643         i915_gem_object_put(obj);
644         if (ret)
645                 return ret;
646
647         *handle_p = handle;
648         return 0;
649 }
650
651 int
652 i915_gem_dumb_create(struct drm_file *file,
653                      struct drm_device *dev,
654                      struct drm_mode_create_dumb *args)
655 {
656         /* have to work out size/pitch and return them */
657         args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
658         args->size = args->pitch * args->height;
659         return i915_gem_create(file, to_i915(dev),
660                                args->size, &args->handle);
661 }
662
663 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
664 {
665         return !(obj->cache_level == I915_CACHE_NONE ||
666                  obj->cache_level == I915_CACHE_WT);
667 }
668
669 /**
670  * Creates a new mm object and returns a handle to it.
671  * @dev: drm device pointer
672  * @data: ioctl data blob
673  * @file: drm file pointer
674  */
675 int
676 i915_gem_create_ioctl(struct drm_device *dev, void *data,
677                       struct drm_file *file)
678 {
679         struct drm_i915_private *dev_priv = to_i915(dev);
680         struct drm_i915_gem_create *args = data;
681
682         i915_gem_flush_free_objects(dev_priv);
683
684         return i915_gem_create(file, dev_priv,
685                                args->size, &args->handle);
686 }
687
688 static inline enum fb_op_origin
689 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
690 {
691         return (domain == I915_GEM_DOMAIN_GTT ?
692                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
693 }
694
695 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
696 {
697         intel_wakeref_t wakeref;
698
699         /*
700          * No actual flushing is required for the GTT write domain for reads
701          * from the GTT domain. Writes to it "immediately" go to main memory
702          * as far as we know, so there's no chipset flush. It also doesn't
703          * land in the GPU render cache.
704          *
705          * However, we do have to enforce the order so that all writes through
706          * the GTT land before any writes to the device, such as updates to
707          * the GATT itself.
708          *
709          * We also have to wait a bit for the writes to land from the GTT.
710          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
711          * timing. This issue has only been observed when switching quickly
712          * between GTT writes and CPU reads from inside the kernel on recent hw,
713          * and it appears to only affect discrete GTT blocks (i.e. on LLC
714          * system agents we cannot reproduce this behaviour, until Cannonlake
715          * that was!).
716          */
717
718         wmb();
719
720         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
721                 return;
722
723         i915_gem_chipset_flush(dev_priv);
724
725         with_intel_runtime_pm(dev_priv, wakeref) {
726                 spin_lock_irq(&dev_priv->uncore.lock);
727
728                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
729
730                 spin_unlock_irq(&dev_priv->uncore.lock);
731         }
732 }
733
734 static void
735 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
736 {
737         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
738         struct i915_vma *vma;
739
740         if (!(obj->write_domain & flush_domains))
741                 return;
742
743         switch (obj->write_domain) {
744         case I915_GEM_DOMAIN_GTT:
745                 i915_gem_flush_ggtt_writes(dev_priv);
746
747                 intel_fb_obj_flush(obj,
748                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
749
750                 for_each_ggtt_vma(vma, obj) {
751                         if (vma->iomap)
752                                 continue;
753
754                         i915_vma_unset_ggtt_write(vma);
755                 }
756                 break;
757
758         case I915_GEM_DOMAIN_WC:
759                 wmb();
760                 break;
761
762         case I915_GEM_DOMAIN_CPU:
763                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
764                 break;
765
766         case I915_GEM_DOMAIN_RENDER:
767                 if (gpu_write_needs_clflush(obj))
768                         obj->cache_dirty = true;
769                 break;
770         }
771
772         obj->write_domain = 0;
773 }
774
775 /*
776  * Pins the specified object's pages and synchronizes the object with
777  * GPU accesses. Sets needs_clflush to non-zero if the caller should
778  * flush the object from the CPU cache.
779  */
780 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
781                                     unsigned int *needs_clflush)
782 {
783         int ret;
784
785         lockdep_assert_held(&obj->base.dev->struct_mutex);
786
787         *needs_clflush = 0;
788         if (!i915_gem_object_has_struct_page(obj))
789                 return -ENODEV;
790
791         ret = i915_gem_object_wait(obj,
792                                    I915_WAIT_INTERRUPTIBLE |
793                                    I915_WAIT_LOCKED,
794                                    MAX_SCHEDULE_TIMEOUT);
795         if (ret)
796                 return ret;
797
798         ret = i915_gem_object_pin_pages(obj);
799         if (ret)
800                 return ret;
801
802         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
803             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
804                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
805                 if (ret)
806                         goto err_unpin;
807                 else
808                         goto out;
809         }
810
811         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
812
813         /* If we're not in the cpu read domain, set ourself into the gtt
814          * read domain and manually flush cachelines (if required). This
815          * optimizes for the case when the gpu will dirty the data
816          * anyway again before the next pread happens.
817          */
818         if (!obj->cache_dirty &&
819             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
820                 *needs_clflush = CLFLUSH_BEFORE;
821
822 out:
823         /* return with the pages pinned */
824         return 0;
825
826 err_unpin:
827         i915_gem_object_unpin_pages(obj);
828         return ret;
829 }
830
831 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
832                                      unsigned int *needs_clflush)
833 {
834         int ret;
835
836         lockdep_assert_held(&obj->base.dev->struct_mutex);
837
838         *needs_clflush = 0;
839         if (!i915_gem_object_has_struct_page(obj))
840                 return -ENODEV;
841
842         ret = i915_gem_object_wait(obj,
843                                    I915_WAIT_INTERRUPTIBLE |
844                                    I915_WAIT_LOCKED |
845                                    I915_WAIT_ALL,
846                                    MAX_SCHEDULE_TIMEOUT);
847         if (ret)
848                 return ret;
849
850         ret = i915_gem_object_pin_pages(obj);
851         if (ret)
852                 return ret;
853
854         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
855             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
856                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
857                 if (ret)
858                         goto err_unpin;
859                 else
860                         goto out;
861         }
862
863         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
864
865         /* If we're not in the cpu write domain, set ourself into the
866          * gtt write domain and manually flush cachelines (as required).
867          * This optimizes for the case when the gpu will use the data
868          * right away and we therefore have to clflush anyway.
869          */
870         if (!obj->cache_dirty) {
871                 *needs_clflush |= CLFLUSH_AFTER;
872
873                 /*
874                  * Same trick applies to invalidate partially written
875                  * cachelines read before writing.
876                  */
877                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
878                         *needs_clflush |= CLFLUSH_BEFORE;
879         }
880
881 out:
882         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
883         obj->mm.dirty = true;
884         /* return with the pages pinned */
885         return 0;
886
887 err_unpin:
888         i915_gem_object_unpin_pages(obj);
889         return ret;
890 }
891
892 static int
893 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
894             bool needs_clflush)
895 {
896         char *vaddr;
897         int ret;
898
899         vaddr = kmap(page);
900
901         if (needs_clflush)
902                 drm_clflush_virt_range(vaddr + offset, len);
903
904         ret = __copy_to_user(user_data, vaddr + offset, len);
905
906         kunmap(page);
907
908         return ret ? -EFAULT : 0;
909 }
910
911 static int
912 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
913                      struct drm_i915_gem_pread *args)
914 {
915         char __user *user_data;
916         u64 remain;
917         unsigned int needs_clflush;
918         unsigned int idx, offset;
919         int ret;
920
921         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
922         if (ret)
923                 return ret;
924
925         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
926         mutex_unlock(&obj->base.dev->struct_mutex);
927         if (ret)
928                 return ret;
929
930         remain = args->size;
931         user_data = u64_to_user_ptr(args->data_ptr);
932         offset = offset_in_page(args->offset);
933         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
934                 struct page *page = i915_gem_object_get_page(obj, idx);
935                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
936
937                 ret = shmem_pread(page, offset, length, user_data,
938                                   needs_clflush);
939                 if (ret)
940                         break;
941
942                 remain -= length;
943                 user_data += length;
944                 offset = 0;
945         }
946
947         i915_gem_obj_finish_shmem_access(obj);
948         return ret;
949 }
950
951 static inline bool
952 gtt_user_read(struct io_mapping *mapping,
953               loff_t base, int offset,
954               char __user *user_data, int length)
955 {
956         void __iomem *vaddr;
957         unsigned long unwritten;
958
959         /* We can use the cpu mem copy function because this is X86. */
960         vaddr = io_mapping_map_atomic_wc(mapping, base);
961         unwritten = __copy_to_user_inatomic(user_data,
962                                             (void __force *)vaddr + offset,
963                                             length);
964         io_mapping_unmap_atomic(vaddr);
965         if (unwritten) {
966                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
967                 unwritten = copy_to_user(user_data,
968                                          (void __force *)vaddr + offset,
969                                          length);
970                 io_mapping_unmap(vaddr);
971         }
972         return unwritten;
973 }
974
975 static int
976 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
977                    const struct drm_i915_gem_pread *args)
978 {
979         struct drm_i915_private *i915 = to_i915(obj->base.dev);
980         struct i915_ggtt *ggtt = &i915->ggtt;
981         intel_wakeref_t wakeref;
982         struct drm_mm_node node;
983         struct i915_vma *vma;
984         void __user *user_data;
985         u64 remain, offset;
986         int ret;
987
988         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
989         if (ret)
990                 return ret;
991
992         wakeref = intel_runtime_pm_get(i915);
993         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
994                                        PIN_MAPPABLE |
995                                        PIN_NONFAULT |
996                                        PIN_NONBLOCK);
997         if (!IS_ERR(vma)) {
998                 node.start = i915_ggtt_offset(vma);
999                 node.allocated = false;
1000                 ret = i915_vma_put_fence(vma);
1001                 if (ret) {
1002                         i915_vma_unpin(vma);
1003                         vma = ERR_PTR(ret);
1004                 }
1005         }
1006         if (IS_ERR(vma)) {
1007                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1008                 if (ret)
1009                         goto out_unlock;
1010                 GEM_BUG_ON(!node.allocated);
1011         }
1012
1013         ret = i915_gem_object_set_to_gtt_domain(obj, false);
1014         if (ret)
1015                 goto out_unpin;
1016
1017         mutex_unlock(&i915->drm.struct_mutex);
1018
1019         user_data = u64_to_user_ptr(args->data_ptr);
1020         remain = args->size;
1021         offset = args->offset;
1022
1023         while (remain > 0) {
1024                 /* Operation in this page
1025                  *
1026                  * page_base = page offset within aperture
1027                  * page_offset = offset within page
1028                  * page_length = bytes to copy for this page
1029                  */
1030                 u32 page_base = node.start;
1031                 unsigned page_offset = offset_in_page(offset);
1032                 unsigned page_length = PAGE_SIZE - page_offset;
1033                 page_length = remain < page_length ? remain : page_length;
1034                 if (node.allocated) {
1035                         wmb();
1036                         ggtt->vm.insert_page(&ggtt->vm,
1037                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1038                                              node.start, I915_CACHE_NONE, 0);
1039                         wmb();
1040                 } else {
1041                         page_base += offset & PAGE_MASK;
1042                 }
1043
1044                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1045                                   user_data, page_length)) {
1046                         ret = -EFAULT;
1047                         break;
1048                 }
1049
1050                 remain -= page_length;
1051                 user_data += page_length;
1052                 offset += page_length;
1053         }
1054
1055         mutex_lock(&i915->drm.struct_mutex);
1056 out_unpin:
1057         if (node.allocated) {
1058                 wmb();
1059                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1060                 remove_mappable_node(&node);
1061         } else {
1062                 i915_vma_unpin(vma);
1063         }
1064 out_unlock:
1065         intel_runtime_pm_put(i915, wakeref);
1066         mutex_unlock(&i915->drm.struct_mutex);
1067
1068         return ret;
1069 }
1070
1071 /**
1072  * Reads data from the object referenced by handle.
1073  * @dev: drm device pointer
1074  * @data: ioctl data blob
1075  * @file: drm file pointer
1076  *
1077  * On error, the contents of *data are undefined.
1078  */
1079 int
1080 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1081                      struct drm_file *file)
1082 {
1083         struct drm_i915_gem_pread *args = data;
1084         struct drm_i915_gem_object *obj;
1085         int ret;
1086
1087         if (args->size == 0)
1088                 return 0;
1089
1090         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1091                        args->size))
1092                 return -EFAULT;
1093
1094         obj = i915_gem_object_lookup(file, args->handle);
1095         if (!obj)
1096                 return -ENOENT;
1097
1098         /* Bounds check source.  */
1099         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1100                 ret = -EINVAL;
1101                 goto out;
1102         }
1103
1104         trace_i915_gem_object_pread(obj, args->offset, args->size);
1105
1106         ret = i915_gem_object_wait(obj,
1107                                    I915_WAIT_INTERRUPTIBLE,
1108                                    MAX_SCHEDULE_TIMEOUT);
1109         if (ret)
1110                 goto out;
1111
1112         ret = i915_gem_object_pin_pages(obj);
1113         if (ret)
1114                 goto out;
1115
1116         ret = i915_gem_shmem_pread(obj, args);
1117         if (ret == -EFAULT || ret == -ENODEV)
1118                 ret = i915_gem_gtt_pread(obj, args);
1119
1120         i915_gem_object_unpin_pages(obj);
1121 out:
1122         i915_gem_object_put(obj);
1123         return ret;
1124 }
1125
1126 /* This is the fast write path which cannot handle
1127  * page faults in the source data
1128  */
1129
1130 static inline bool
1131 ggtt_write(struct io_mapping *mapping,
1132            loff_t base, int offset,
1133            char __user *user_data, int length)
1134 {
1135         void __iomem *vaddr;
1136         unsigned long unwritten;
1137
1138         /* We can use the cpu mem copy function because this is X86. */
1139         vaddr = io_mapping_map_atomic_wc(mapping, base);
1140         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1141                                                       user_data, length);
1142         io_mapping_unmap_atomic(vaddr);
1143         if (unwritten) {
1144                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1145                 unwritten = copy_from_user((void __force *)vaddr + offset,
1146                                            user_data, length);
1147                 io_mapping_unmap(vaddr);
1148         }
1149
1150         return unwritten;
1151 }
1152
1153 /**
1154  * This is the fast pwrite path, where we copy the data directly from the
1155  * user into the GTT, uncached.
1156  * @obj: i915 GEM object
1157  * @args: pwrite arguments structure
1158  */
1159 static int
1160 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1161                          const struct drm_i915_gem_pwrite *args)
1162 {
1163         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1164         struct i915_ggtt *ggtt = &i915->ggtt;
1165         intel_wakeref_t wakeref;
1166         struct drm_mm_node node;
1167         struct i915_vma *vma;
1168         u64 remain, offset;
1169         void __user *user_data;
1170         int ret;
1171
1172         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1173         if (ret)
1174                 return ret;
1175
1176         if (i915_gem_object_has_struct_page(obj)) {
1177                 /*
1178                  * Avoid waking the device up if we can fallback, as
1179                  * waking/resuming is very slow (worst-case 10-100 ms
1180                  * depending on PCI sleeps and our own resume time).
1181                  * This easily dwarfs any performance advantage from
1182                  * using the cache bypass of indirect GGTT access.
1183                  */
1184                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1185                 if (!wakeref) {
1186                         ret = -EFAULT;
1187                         goto out_unlock;
1188                 }
1189         } else {
1190                 /* No backing pages, no fallback, we must force GGTT access */
1191                 wakeref = intel_runtime_pm_get(i915);
1192         }
1193
1194         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1195                                        PIN_MAPPABLE |
1196                                        PIN_NONFAULT |
1197                                        PIN_NONBLOCK);
1198         if (!IS_ERR(vma)) {
1199                 node.start = i915_ggtt_offset(vma);
1200                 node.allocated = false;
1201                 ret = i915_vma_put_fence(vma);
1202                 if (ret) {
1203                         i915_vma_unpin(vma);
1204                         vma = ERR_PTR(ret);
1205                 }
1206         }
1207         if (IS_ERR(vma)) {
1208                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1209                 if (ret)
1210                         goto out_rpm;
1211                 GEM_BUG_ON(!node.allocated);
1212         }
1213
1214         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1215         if (ret)
1216                 goto out_unpin;
1217
1218         mutex_unlock(&i915->drm.struct_mutex);
1219
1220         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1221
1222         user_data = u64_to_user_ptr(args->data_ptr);
1223         offset = args->offset;
1224         remain = args->size;
1225         while (remain) {
1226                 /* Operation in this page
1227                  *
1228                  * page_base = page offset within aperture
1229                  * page_offset = offset within page
1230                  * page_length = bytes to copy for this page
1231                  */
1232                 u32 page_base = node.start;
1233                 unsigned int page_offset = offset_in_page(offset);
1234                 unsigned int page_length = PAGE_SIZE - page_offset;
1235                 page_length = remain < page_length ? remain : page_length;
1236                 if (node.allocated) {
1237                         wmb(); /* flush the write before we modify the GGTT */
1238                         ggtt->vm.insert_page(&ggtt->vm,
1239                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1240                                              node.start, I915_CACHE_NONE, 0);
1241                         wmb(); /* flush modifications to the GGTT (insert_page) */
1242                 } else {
1243                         page_base += offset & PAGE_MASK;
1244                 }
1245                 /* If we get a fault while copying data, then (presumably) our
1246                  * source page isn't available.  Return the error and we'll
1247                  * retry in the slow path.
1248                  * If the object is non-shmem backed, we retry again with the
1249                  * path that handles page fault.
1250                  */
1251                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1252                                user_data, page_length)) {
1253                         ret = -EFAULT;
1254                         break;
1255                 }
1256
1257                 remain -= page_length;
1258                 user_data += page_length;
1259                 offset += page_length;
1260         }
1261         intel_fb_obj_flush(obj, ORIGIN_CPU);
1262
1263         mutex_lock(&i915->drm.struct_mutex);
1264 out_unpin:
1265         if (node.allocated) {
1266                 wmb();
1267                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1268                 remove_mappable_node(&node);
1269         } else {
1270                 i915_vma_unpin(vma);
1271         }
1272 out_rpm:
1273         intel_runtime_pm_put(i915, wakeref);
1274 out_unlock:
1275         mutex_unlock(&i915->drm.struct_mutex);
1276         return ret;
1277 }
1278
1279 /* Per-page copy function for the shmem pwrite fastpath.
1280  * Flushes invalid cachelines before writing to the target if
1281  * needs_clflush_before is set and flushes out any written cachelines after
1282  * writing if needs_clflush is set.
1283  */
1284 static int
1285 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1286              bool needs_clflush_before,
1287              bool needs_clflush_after)
1288 {
1289         char *vaddr;
1290         int ret;
1291
1292         vaddr = kmap(page);
1293
1294         if (needs_clflush_before)
1295                 drm_clflush_virt_range(vaddr + offset, len);
1296
1297         ret = __copy_from_user(vaddr + offset, user_data, len);
1298         if (!ret && needs_clflush_after)
1299                 drm_clflush_virt_range(vaddr + offset, len);
1300
1301         kunmap(page);
1302
1303         return ret ? -EFAULT : 0;
1304 }
1305
1306 static int
1307 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1308                       const struct drm_i915_gem_pwrite *args)
1309 {
1310         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1311         void __user *user_data;
1312         u64 remain;
1313         unsigned int partial_cacheline_write;
1314         unsigned int needs_clflush;
1315         unsigned int offset, idx;
1316         int ret;
1317
1318         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1319         if (ret)
1320                 return ret;
1321
1322         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1323         mutex_unlock(&i915->drm.struct_mutex);
1324         if (ret)
1325                 return ret;
1326
1327         /* If we don't overwrite a cacheline completely we need to be
1328          * careful to have up-to-date data by first clflushing. Don't
1329          * overcomplicate things and flush the entire patch.
1330          */
1331         partial_cacheline_write = 0;
1332         if (needs_clflush & CLFLUSH_BEFORE)
1333                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1334
1335         user_data = u64_to_user_ptr(args->data_ptr);
1336         remain = args->size;
1337         offset = offset_in_page(args->offset);
1338         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1339                 struct page *page = i915_gem_object_get_page(obj, idx);
1340                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1341
1342                 ret = shmem_pwrite(page, offset, length, user_data,
1343                                    (offset | length) & partial_cacheline_write,
1344                                    needs_clflush & CLFLUSH_AFTER);
1345                 if (ret)
1346                         break;
1347
1348                 remain -= length;
1349                 user_data += length;
1350                 offset = 0;
1351         }
1352
1353         intel_fb_obj_flush(obj, ORIGIN_CPU);
1354         i915_gem_obj_finish_shmem_access(obj);
1355         return ret;
1356 }
1357
1358 /**
1359  * Writes data to the object referenced by handle.
1360  * @dev: drm device
1361  * @data: ioctl data blob
1362  * @file: drm file
1363  *
1364  * On error, the contents of the buffer that were to be modified are undefined.
1365  */
1366 int
1367 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1368                       struct drm_file *file)
1369 {
1370         struct drm_i915_gem_pwrite *args = data;
1371         struct drm_i915_gem_object *obj;
1372         int ret;
1373
1374         if (args->size == 0)
1375                 return 0;
1376
1377         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1378                 return -EFAULT;
1379
1380         obj = i915_gem_object_lookup(file, args->handle);
1381         if (!obj)
1382                 return -ENOENT;
1383
1384         /* Bounds check destination. */
1385         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1386                 ret = -EINVAL;
1387                 goto err;
1388         }
1389
1390         /* Writes not allowed into this read-only object */
1391         if (i915_gem_object_is_readonly(obj)) {
1392                 ret = -EINVAL;
1393                 goto err;
1394         }
1395
1396         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1397
1398         ret = -ENODEV;
1399         if (obj->ops->pwrite)
1400                 ret = obj->ops->pwrite(obj, args);
1401         if (ret != -ENODEV)
1402                 goto err;
1403
1404         ret = i915_gem_object_wait(obj,
1405                                    I915_WAIT_INTERRUPTIBLE |
1406                                    I915_WAIT_ALL,
1407                                    MAX_SCHEDULE_TIMEOUT);
1408         if (ret)
1409                 goto err;
1410
1411         ret = i915_gem_object_pin_pages(obj);
1412         if (ret)
1413                 goto err;
1414
1415         ret = -EFAULT;
1416         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1417          * it would end up going through the fenced access, and we'll get
1418          * different detiling behavior between reading and writing.
1419          * pread/pwrite currently are reading and writing from the CPU
1420          * perspective, requiring manual detiling by the client.
1421          */
1422         if (!i915_gem_object_has_struct_page(obj) ||
1423             cpu_write_needs_clflush(obj))
1424                 /* Note that the gtt paths might fail with non-page-backed user
1425                  * pointers (e.g. gtt mappings when moving data between
1426                  * textures). Fallback to the shmem path in that case.
1427                  */
1428                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1429
1430         if (ret == -EFAULT || ret == -ENOSPC) {
1431                 if (obj->phys_handle)
1432                         ret = i915_gem_phys_pwrite(obj, args, file);
1433                 else
1434                         ret = i915_gem_shmem_pwrite(obj, args);
1435         }
1436
1437         i915_gem_object_unpin_pages(obj);
1438 err:
1439         i915_gem_object_put(obj);
1440         return ret;
1441 }
1442
1443 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1444 {
1445         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1446         struct list_head *list;
1447         struct i915_vma *vma;
1448
1449         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1450
1451         mutex_lock(&i915->ggtt.vm.mutex);
1452         for_each_ggtt_vma(vma, obj) {
1453                 if (!drm_mm_node_allocated(&vma->node))
1454                         continue;
1455
1456                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1457         }
1458         mutex_unlock(&i915->ggtt.vm.mutex);
1459
1460         spin_lock(&i915->mm.obj_lock);
1461         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1462         list_move_tail(&obj->mm.link, list);
1463         spin_unlock(&i915->mm.obj_lock);
1464 }
1465
1466 /**
1467  * Called when user space prepares to use an object with the CPU, either
1468  * through the mmap ioctl's mapping or a GTT mapping.
1469  * @dev: drm device
1470  * @data: ioctl data blob
1471  * @file: drm file
1472  */
1473 int
1474 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1475                           struct drm_file *file)
1476 {
1477         struct drm_i915_gem_set_domain *args = data;
1478         struct drm_i915_gem_object *obj;
1479         u32 read_domains = args->read_domains;
1480         u32 write_domain = args->write_domain;
1481         int err;
1482
1483         /* Only handle setting domains to types used by the CPU. */
1484         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1485                 return -EINVAL;
1486
1487         /* Having something in the write domain implies it's in the read
1488          * domain, and only that read domain.  Enforce that in the request.
1489          */
1490         if (write_domain != 0 && read_domains != write_domain)
1491                 return -EINVAL;
1492
1493         obj = i915_gem_object_lookup(file, args->handle);
1494         if (!obj)
1495                 return -ENOENT;
1496
1497         /* Try to flush the object off the GPU without holding the lock.
1498          * We will repeat the flush holding the lock in the normal manner
1499          * to catch cases where we are gazumped.
1500          */
1501         err = i915_gem_object_wait(obj,
1502                                    I915_WAIT_INTERRUPTIBLE |
1503                                    I915_WAIT_PRIORITY |
1504                                    (write_domain ? I915_WAIT_ALL : 0),
1505                                    MAX_SCHEDULE_TIMEOUT);
1506         if (err)
1507                 goto out;
1508
1509         /*
1510          * Proxy objects do not control access to the backing storage, ergo
1511          * they cannot be used as a means to manipulate the cache domain
1512          * tracking for that backing storage. The proxy object is always
1513          * considered to be outside of any cache domain.
1514          */
1515         if (i915_gem_object_is_proxy(obj)) {
1516                 err = -ENXIO;
1517                 goto out;
1518         }
1519
1520         /*
1521          * Flush and acquire obj->pages so that we are coherent through
1522          * direct access in memory with previous cached writes through
1523          * shmemfs and that our cache domain tracking remains valid.
1524          * For example, if the obj->filp was moved to swap without us
1525          * being notified and releasing the pages, we would mistakenly
1526          * continue to assume that the obj remained out of the CPU cached
1527          * domain.
1528          */
1529         err = i915_gem_object_pin_pages(obj);
1530         if (err)
1531                 goto out;
1532
1533         err = i915_mutex_lock_interruptible(dev);
1534         if (err)
1535                 goto out_unpin;
1536
1537         if (read_domains & I915_GEM_DOMAIN_WC)
1538                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1539         else if (read_domains & I915_GEM_DOMAIN_GTT)
1540                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1541         else
1542                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1543
1544         /* And bump the LRU for this access */
1545         i915_gem_object_bump_inactive_ggtt(obj);
1546
1547         mutex_unlock(&dev->struct_mutex);
1548
1549         if (write_domain != 0)
1550                 intel_fb_obj_invalidate(obj,
1551                                         fb_write_origin(obj, write_domain));
1552
1553 out_unpin:
1554         i915_gem_object_unpin_pages(obj);
1555 out:
1556         i915_gem_object_put(obj);
1557         return err;
1558 }
1559
1560 /**
1561  * Called when user space has done writes to this buffer
1562  * @dev: drm device
1563  * @data: ioctl data blob
1564  * @file: drm file
1565  */
1566 int
1567 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1568                          struct drm_file *file)
1569 {
1570         struct drm_i915_gem_sw_finish *args = data;
1571         struct drm_i915_gem_object *obj;
1572
1573         obj = i915_gem_object_lookup(file, args->handle);
1574         if (!obj)
1575                 return -ENOENT;
1576
1577         /*
1578          * Proxy objects are barred from CPU access, so there is no
1579          * need to ban sw_finish as it is a nop.
1580          */
1581
1582         /* Pinned buffers may be scanout, so flush the cache */
1583         i915_gem_object_flush_if_display(obj);
1584         i915_gem_object_put(obj);
1585
1586         return 0;
1587 }
1588
1589 static inline bool
1590 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1591               unsigned long addr, unsigned long size)
1592 {
1593         if (vma->vm_file != filp)
1594                 return false;
1595
1596         return vma->vm_start == addr &&
1597                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1598 }
1599
1600 /**
1601  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1602  *                       it is mapped to.
1603  * @dev: drm device
1604  * @data: ioctl data blob
1605  * @file: drm file
1606  *
1607  * While the mapping holds a reference on the contents of the object, it doesn't
1608  * imply a ref on the object itself.
1609  *
1610  * IMPORTANT:
1611  *
1612  * DRM driver writers who look a this function as an example for how to do GEM
1613  * mmap support, please don't implement mmap support like here. The modern way
1614  * to implement DRM mmap support is with an mmap offset ioctl (like
1615  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1616  * That way debug tooling like valgrind will understand what's going on, hiding
1617  * the mmap call in a driver private ioctl will break that. The i915 driver only
1618  * does cpu mmaps this way because we didn't know better.
1619  */
1620 int
1621 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1622                     struct drm_file *file)
1623 {
1624         struct drm_i915_gem_mmap *args = data;
1625         struct drm_i915_gem_object *obj;
1626         unsigned long addr;
1627
1628         if (args->flags & ~(I915_MMAP_WC))
1629                 return -EINVAL;
1630
1631         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1632                 return -ENODEV;
1633
1634         obj = i915_gem_object_lookup(file, args->handle);
1635         if (!obj)
1636                 return -ENOENT;
1637
1638         /* prime objects have no backing filp to GEM mmap
1639          * pages from.
1640          */
1641         if (!obj->base.filp) {
1642                 addr = -ENXIO;
1643                 goto err;
1644         }
1645
1646         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1647                 addr = -EINVAL;
1648                 goto err;
1649         }
1650
1651         addr = vm_mmap(obj->base.filp, 0, args->size,
1652                        PROT_READ | PROT_WRITE, MAP_SHARED,
1653                        args->offset);
1654         if (IS_ERR_VALUE(addr))
1655                 goto err;
1656
1657         if (args->flags & I915_MMAP_WC) {
1658                 struct mm_struct *mm = current->mm;
1659                 struct vm_area_struct *vma;
1660
1661                 if (down_write_killable(&mm->mmap_sem)) {
1662                         addr = -EINTR;
1663                         goto err;
1664                 }
1665                 vma = find_vma(mm, addr);
1666                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1667                         vma->vm_page_prot =
1668                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1669                 else
1670                         addr = -ENOMEM;
1671                 up_write(&mm->mmap_sem);
1672                 if (IS_ERR_VALUE(addr))
1673                         goto err;
1674
1675                 /* This may race, but that's ok, it only gets set */
1676                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1677         }
1678         i915_gem_object_put(obj);
1679
1680         args->addr_ptr = (u64)addr;
1681         return 0;
1682
1683 err:
1684         i915_gem_object_put(obj);
1685         return addr;
1686 }
1687
1688 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1689 {
1690         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1691 }
1692
1693 /**
1694  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1695  *
1696  * A history of the GTT mmap interface:
1697  *
1698  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1699  *     aligned and suitable for fencing, and still fit into the available
1700  *     mappable space left by the pinned display objects. A classic problem
1701  *     we called the page-fault-of-doom where we would ping-pong between
1702  *     two objects that could not fit inside the GTT and so the memcpy
1703  *     would page one object in at the expense of the other between every
1704  *     single byte.
1705  *
1706  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1707  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1708  *     object is too large for the available space (or simply too large
1709  *     for the mappable aperture!), a view is created instead and faulted
1710  *     into userspace. (This view is aligned and sized appropriately for
1711  *     fenced access.)
1712  *
1713  * 2 - Recognise WC as a separate cache domain so that we can flush the
1714  *     delayed writes via GTT before performing direct access via WC.
1715  *
1716  * Restrictions:
1717  *
1718  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1719  *    hangs on some architectures, corruption on others. An attempt to service
1720  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1721  *
1722  *  * the object must be able to fit into RAM (physical memory, though no
1723  *    limited to the mappable aperture).
1724  *
1725  *
1726  * Caveats:
1727  *
1728  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1729  *    all data to system memory. Subsequent access will not be synchronized.
1730  *
1731  *  * all mappings are revoked on runtime device suspend.
1732  *
1733  *  * there are only 8, 16 or 32 fence registers to share between all users
1734  *    (older machines require fence register for display and blitter access
1735  *    as well). Contention of the fence registers will cause the previous users
1736  *    to be unmapped and any new access will generate new page faults.
1737  *
1738  *  * running out of memory while servicing a fault may generate a SIGBUS,
1739  *    rather than the expected SIGSEGV.
1740  */
1741 int i915_gem_mmap_gtt_version(void)
1742 {
1743         return 2;
1744 }
1745
1746 static inline struct i915_ggtt_view
1747 compute_partial_view(const struct drm_i915_gem_object *obj,
1748                      pgoff_t page_offset,
1749                      unsigned int chunk)
1750 {
1751         struct i915_ggtt_view view;
1752
1753         if (i915_gem_object_is_tiled(obj))
1754                 chunk = roundup(chunk, tile_row_pages(obj));
1755
1756         view.type = I915_GGTT_VIEW_PARTIAL;
1757         view.partial.offset = rounddown(page_offset, chunk);
1758         view.partial.size =
1759                 min_t(unsigned int, chunk,
1760                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1761
1762         /* If the partial covers the entire object, just create a normal VMA. */
1763         if (chunk >= obj->base.size >> PAGE_SHIFT)
1764                 view.type = I915_GGTT_VIEW_NORMAL;
1765
1766         return view;
1767 }
1768
1769 /**
1770  * i915_gem_fault - fault a page into the GTT
1771  * @vmf: fault info
1772  *
1773  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1774  * from userspace.  The fault handler takes care of binding the object to
1775  * the GTT (if needed), allocating and programming a fence register (again,
1776  * only if needed based on whether the old reg is still valid or the object
1777  * is tiled) and inserting a new PTE into the faulting process.
1778  *
1779  * Note that the faulting process may involve evicting existing objects
1780  * from the GTT and/or fence registers to make room.  So performance may
1781  * suffer if the GTT working set is large or there are few fence registers
1782  * left.
1783  *
1784  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1785  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1786  */
1787 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1788 {
1789 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1790         struct vm_area_struct *area = vmf->vma;
1791         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1792         struct drm_device *dev = obj->base.dev;
1793         struct drm_i915_private *dev_priv = to_i915(dev);
1794         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1795         bool write = area->vm_flags & VM_WRITE;
1796         intel_wakeref_t wakeref;
1797         struct i915_vma *vma;
1798         pgoff_t page_offset;
1799         int srcu;
1800         int ret;
1801
1802         /* Sanity check that we allow writing into this object */
1803         if (i915_gem_object_is_readonly(obj) && write)
1804                 return VM_FAULT_SIGBUS;
1805
1806         /* We don't use vmf->pgoff since that has the fake offset */
1807         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1808
1809         trace_i915_gem_object_fault(obj, page_offset, true, write);
1810
1811         /* Try to flush the object off the GPU first without holding the lock.
1812          * Upon acquiring the lock, we will perform our sanity checks and then
1813          * repeat the flush holding the lock in the normal manner to catch cases
1814          * where we are gazumped.
1815          */
1816         ret = i915_gem_object_wait(obj,
1817                                    I915_WAIT_INTERRUPTIBLE,
1818                                    MAX_SCHEDULE_TIMEOUT);
1819         if (ret)
1820                 goto err;
1821
1822         ret = i915_gem_object_pin_pages(obj);
1823         if (ret)
1824                 goto err;
1825
1826         wakeref = intel_runtime_pm_get(dev_priv);
1827
1828         srcu = i915_reset_trylock(dev_priv);
1829         if (srcu < 0) {
1830                 ret = srcu;
1831                 goto err_rpm;
1832         }
1833
1834         ret = i915_mutex_lock_interruptible(dev);
1835         if (ret)
1836                 goto err_reset;
1837
1838         /* Access to snoopable pages through the GTT is incoherent. */
1839         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1840                 ret = -EFAULT;
1841                 goto err_unlock;
1842         }
1843
1844         /* Now pin it into the GTT as needed */
1845         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1846                                        PIN_MAPPABLE |
1847                                        PIN_NONBLOCK |
1848                                        PIN_NONFAULT);
1849         if (IS_ERR(vma)) {
1850                 /* Use a partial view if it is bigger than available space */
1851                 struct i915_ggtt_view view =
1852                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1853                 unsigned int flags;
1854
1855                 flags = PIN_MAPPABLE;
1856                 if (view.type == I915_GGTT_VIEW_NORMAL)
1857                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1858
1859                 /*
1860                  * Userspace is now writing through an untracked VMA, abandon
1861                  * all hope that the hardware is able to track future writes.
1862                  */
1863                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1864
1865                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1866                 if (IS_ERR(vma) && !view.type) {
1867                         flags = PIN_MAPPABLE;
1868                         view.type = I915_GGTT_VIEW_PARTIAL;
1869                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1870                 }
1871         }
1872         if (IS_ERR(vma)) {
1873                 ret = PTR_ERR(vma);
1874                 goto err_unlock;
1875         }
1876
1877         ret = i915_gem_object_set_to_gtt_domain(obj, write);
1878         if (ret)
1879                 goto err_unpin;
1880
1881         ret = i915_vma_pin_fence(vma);
1882         if (ret)
1883                 goto err_unpin;
1884
1885         /* Finally, remap it using the new GTT offset */
1886         ret = remap_io_mapping(area,
1887                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1888                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1889                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1890                                &ggtt->iomap);
1891         if (ret)
1892                 goto err_fence;
1893
1894         /* Mark as being mmapped into userspace for later revocation */
1895         assert_rpm_wakelock_held(dev_priv);
1896         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1897                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1898         GEM_BUG_ON(!obj->userfault_count);
1899
1900         i915_vma_set_ggtt_write(vma);
1901
1902 err_fence:
1903         i915_vma_unpin_fence(vma);
1904 err_unpin:
1905         __i915_vma_unpin(vma);
1906 err_unlock:
1907         mutex_unlock(&dev->struct_mutex);
1908 err_reset:
1909         i915_reset_unlock(dev_priv, srcu);
1910 err_rpm:
1911         intel_runtime_pm_put(dev_priv, wakeref);
1912         i915_gem_object_unpin_pages(obj);
1913 err:
1914         switch (ret) {
1915         case -EIO:
1916                 /*
1917                  * We eat errors when the gpu is terminally wedged to avoid
1918                  * userspace unduly crashing (gl has no provisions for mmaps to
1919                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1920                  * and so needs to be reported.
1921                  */
1922                 if (!i915_terminally_wedged(dev_priv))
1923                         return VM_FAULT_SIGBUS;
1924                 /* else: fall through */
1925         case -EAGAIN:
1926                 /*
1927                  * EAGAIN means the gpu is hung and we'll wait for the error
1928                  * handler to reset everything when re-faulting in
1929                  * i915_mutex_lock_interruptible.
1930                  */
1931         case 0:
1932         case -ERESTARTSYS:
1933         case -EINTR:
1934         case -EBUSY:
1935                 /*
1936                  * EBUSY is ok: this just means that another thread
1937                  * already did the job.
1938                  */
1939                 return VM_FAULT_NOPAGE;
1940         case -ENOMEM:
1941                 return VM_FAULT_OOM;
1942         case -ENOSPC:
1943         case -EFAULT:
1944                 return VM_FAULT_SIGBUS;
1945         default:
1946                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1947                 return VM_FAULT_SIGBUS;
1948         }
1949 }
1950
1951 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1952 {
1953         struct i915_vma *vma;
1954
1955         GEM_BUG_ON(!obj->userfault_count);
1956
1957         obj->userfault_count = 0;
1958         list_del(&obj->userfault_link);
1959         drm_vma_node_unmap(&obj->base.vma_node,
1960                            obj->base.dev->anon_inode->i_mapping);
1961
1962         for_each_ggtt_vma(vma, obj)
1963                 i915_vma_unset_userfault(vma);
1964 }
1965
1966 /**
1967  * i915_gem_release_mmap - remove physical page mappings
1968  * @obj: obj in question
1969  *
1970  * Preserve the reservation of the mmapping with the DRM core code, but
1971  * relinquish ownership of the pages back to the system.
1972  *
1973  * It is vital that we remove the page mapping if we have mapped a tiled
1974  * object through the GTT and then lose the fence register due to
1975  * resource pressure. Similarly if the object has been moved out of the
1976  * aperture, than pages mapped into userspace must be revoked. Removing the
1977  * mapping will then trigger a page fault on the next user access, allowing
1978  * fixup by i915_gem_fault().
1979  */
1980 void
1981 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1982 {
1983         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1984         intel_wakeref_t wakeref;
1985
1986         /* Serialisation between user GTT access and our code depends upon
1987          * revoking the CPU's PTE whilst the mutex is held. The next user
1988          * pagefault then has to wait until we release the mutex.
1989          *
1990          * Note that RPM complicates somewhat by adding an additional
1991          * requirement that operations to the GGTT be made holding the RPM
1992          * wakeref.
1993          */
1994         lockdep_assert_held(&i915->drm.struct_mutex);
1995         wakeref = intel_runtime_pm_get(i915);
1996
1997         if (!obj->userfault_count)
1998                 goto out;
1999
2000         __i915_gem_object_release_mmap(obj);
2001
2002         /* Ensure that the CPU's PTE are revoked and there are not outstanding
2003          * memory transactions from userspace before we return. The TLB
2004          * flushing implied above by changing the PTE above *should* be
2005          * sufficient, an extra barrier here just provides us with a bit
2006          * of paranoid documentation about our requirement to serialise
2007          * memory writes before touching registers / GSM.
2008          */
2009         wmb();
2010
2011 out:
2012         intel_runtime_pm_put(i915, wakeref);
2013 }
2014
2015 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2016 {
2017         struct drm_i915_gem_object *obj, *on;
2018         int i;
2019
2020         /*
2021          * Only called during RPM suspend. All users of the userfault_list
2022          * must be holding an RPM wakeref to ensure that this can not
2023          * run concurrently with themselves (and use the struct_mutex for
2024          * protection between themselves).
2025          */
2026
2027         list_for_each_entry_safe(obj, on,
2028                                  &dev_priv->mm.userfault_list, userfault_link)
2029                 __i915_gem_object_release_mmap(obj);
2030
2031         /* The fence will be lost when the device powers down. If any were
2032          * in use by hardware (i.e. they are pinned), we should not be powering
2033          * down! All other fences will be reacquired by the user upon waking.
2034          */
2035         for (i = 0; i < dev_priv->num_fence_regs; i++) {
2036                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2037
2038                 /* Ideally we want to assert that the fence register is not
2039                  * live at this point (i.e. that no piece of code will be
2040                  * trying to write through fence + GTT, as that both violates
2041                  * our tracking of activity and associated locking/barriers,
2042                  * but also is illegal given that the hw is powered down).
2043                  *
2044                  * Previously we used reg->pin_count as a "liveness" indicator.
2045                  * That is not sufficient, and we need a more fine-grained
2046                  * tool if we want to have a sanity check here.
2047                  */
2048
2049                 if (!reg->vma)
2050                         continue;
2051
2052                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2053                 reg->dirty = true;
2054         }
2055 }
2056
2057 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2058 {
2059         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2060         int err;
2061
2062         err = drm_gem_create_mmap_offset(&obj->base);
2063         if (likely(!err))
2064                 return 0;
2065
2066         /* Attempt to reap some mmap space from dead objects */
2067         do {
2068                 err = i915_gem_wait_for_idle(dev_priv,
2069                                              I915_WAIT_INTERRUPTIBLE,
2070                                              MAX_SCHEDULE_TIMEOUT);
2071                 if (err)
2072                         break;
2073
2074                 i915_gem_drain_freed_objects(dev_priv);
2075                 err = drm_gem_create_mmap_offset(&obj->base);
2076                 if (!err)
2077                         break;
2078
2079         } while (flush_delayed_work(&dev_priv->gt.retire_work));
2080
2081         return err;
2082 }
2083
2084 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2085 {
2086         drm_gem_free_mmap_offset(&obj->base);
2087 }
2088
2089 int
2090 i915_gem_mmap_gtt(struct drm_file *file,
2091                   struct drm_device *dev,
2092                   u32 handle,
2093                   u64 *offset)
2094 {
2095         struct drm_i915_gem_object *obj;
2096         int ret;
2097
2098         obj = i915_gem_object_lookup(file, handle);
2099         if (!obj)
2100                 return -ENOENT;
2101
2102         ret = i915_gem_object_create_mmap_offset(obj);
2103         if (ret == 0)
2104                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2105
2106         i915_gem_object_put(obj);
2107         return ret;
2108 }
2109
2110 /**
2111  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2112  * @dev: DRM device
2113  * @data: GTT mapping ioctl data
2114  * @file: GEM object info
2115  *
2116  * Simply returns the fake offset to userspace so it can mmap it.
2117  * The mmap call will end up in drm_gem_mmap(), which will set things
2118  * up so we can get faults in the handler above.
2119  *
2120  * The fault handler will take care of binding the object into the GTT
2121  * (since it may have been evicted to make room for something), allocating
2122  * a fence register, and mapping the appropriate aperture address into
2123  * userspace.
2124  */
2125 int
2126 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2127                         struct drm_file *file)
2128 {
2129         struct drm_i915_gem_mmap_gtt *args = data;
2130
2131         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2132 }
2133
2134 /* Immediately discard the backing storage */
2135 static void
2136 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2137 {
2138         i915_gem_object_free_mmap_offset(obj);
2139
2140         if (obj->base.filp == NULL)
2141                 return;
2142
2143         /* Our goal here is to return as much of the memory as
2144          * is possible back to the system as we are called from OOM.
2145          * To do this we must instruct the shmfs to drop all of its
2146          * backing pages, *now*.
2147          */
2148         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2149         obj->mm.madv = __I915_MADV_PURGED;
2150         obj->mm.pages = ERR_PTR(-EFAULT);
2151 }
2152
2153 /* Try to discard unwanted pages */
2154 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2155 {
2156         struct address_space *mapping;
2157
2158         lockdep_assert_held(&obj->mm.lock);
2159         GEM_BUG_ON(i915_gem_object_has_pages(obj));
2160
2161         switch (obj->mm.madv) {
2162         case I915_MADV_DONTNEED:
2163                 i915_gem_object_truncate(obj);
2164         case __I915_MADV_PURGED:
2165                 return;
2166         }
2167
2168         if (obj->base.filp == NULL)
2169                 return;
2170
2171         mapping = obj->base.filp->f_mapping,
2172         invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2173 }
2174
2175 /*
2176  * Move pages to appropriate lru and release the pagevec, decrementing the
2177  * ref count of those pages.
2178  */
2179 static void check_release_pagevec(struct pagevec *pvec)
2180 {
2181         check_move_unevictable_pages(pvec);
2182         __pagevec_release(pvec);
2183         cond_resched();
2184 }
2185
2186 static void
2187 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2188                               struct sg_table *pages)
2189 {
2190         struct sgt_iter sgt_iter;
2191         struct pagevec pvec;
2192         struct page *page;
2193
2194         __i915_gem_object_release_shmem(obj, pages, true);
2195
2196         i915_gem_gtt_finish_pages(obj, pages);
2197
2198         if (i915_gem_object_needs_bit17_swizzle(obj))
2199                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2200
2201         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2202
2203         pagevec_init(&pvec);
2204         for_each_sgt_page(page, sgt_iter, pages) {
2205                 if (obj->mm.dirty)
2206                         set_page_dirty(page);
2207
2208                 if (obj->mm.madv == I915_MADV_WILLNEED)
2209                         mark_page_accessed(page);
2210
2211                 if (!pagevec_add(&pvec, page))
2212                         check_release_pagevec(&pvec);
2213         }
2214         if (pagevec_count(&pvec))
2215                 check_release_pagevec(&pvec);
2216         obj->mm.dirty = false;
2217
2218         sg_free_table(pages);
2219         kfree(pages);
2220 }
2221
2222 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2223 {
2224         struct radix_tree_iter iter;
2225         void __rcu **slot;
2226
2227         rcu_read_lock();
2228         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2229                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2230         rcu_read_unlock();
2231 }
2232
2233 static struct sg_table *
2234 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2235 {
2236         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2237         struct sg_table *pages;
2238
2239         pages = fetch_and_zero(&obj->mm.pages);
2240         if (IS_ERR_OR_NULL(pages))
2241                 return pages;
2242
2243         spin_lock(&i915->mm.obj_lock);
2244         list_del(&obj->mm.link);
2245         spin_unlock(&i915->mm.obj_lock);
2246
2247         if (obj->mm.mapping) {
2248                 void *ptr;
2249
2250                 ptr = page_mask_bits(obj->mm.mapping);
2251                 if (is_vmalloc_addr(ptr))
2252                         vunmap(ptr);
2253                 else
2254                         kunmap(kmap_to_page(ptr));
2255
2256                 obj->mm.mapping = NULL;
2257         }
2258
2259         __i915_gem_object_reset_page_iter(obj);
2260         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2261
2262         return pages;
2263 }
2264
2265 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2266                                 enum i915_mm_subclass subclass)
2267 {
2268         struct sg_table *pages;
2269         int ret;
2270
2271         if (i915_gem_object_has_pinned_pages(obj))
2272                 return -EBUSY;
2273
2274         GEM_BUG_ON(obj->bind_count);
2275
2276         /* May be called by shrinker from within get_pages() (on another bo) */
2277         mutex_lock_nested(&obj->mm.lock, subclass);
2278         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2279                 ret = -EBUSY;
2280                 goto unlock;
2281         }
2282
2283         /*
2284          * ->put_pages might need to allocate memory for the bit17 swizzle
2285          * array, hence protect them from being reaped by removing them from gtt
2286          * lists early.
2287          */
2288         pages = __i915_gem_object_unset_pages(obj);
2289
2290         /*
2291          * XXX Temporary hijinx to avoid updating all backends to handle
2292          * NULL pages. In the future, when we have more asynchronous
2293          * get_pages backends we should be better able to handle the
2294          * cancellation of the async task in a more uniform manner.
2295          */
2296         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2297                 pages = ERR_PTR(-EINVAL);
2298
2299         if (!IS_ERR(pages))
2300                 obj->ops->put_pages(obj, pages);
2301
2302         ret = 0;
2303 unlock:
2304         mutex_unlock(&obj->mm.lock);
2305
2306         return ret;
2307 }
2308
2309 bool i915_sg_trim(struct sg_table *orig_st)
2310 {
2311         struct sg_table new_st;
2312         struct scatterlist *sg, *new_sg;
2313         unsigned int i;
2314
2315         if (orig_st->nents == orig_st->orig_nents)
2316                 return false;
2317
2318         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2319                 return false;
2320
2321         new_sg = new_st.sgl;
2322         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2323                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2324                 sg_dma_address(new_sg) = sg_dma_address(sg);
2325                 sg_dma_len(new_sg) = sg_dma_len(sg);
2326
2327                 new_sg = sg_next(new_sg);
2328         }
2329         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2330
2331         sg_free_table(orig_st);
2332
2333         *orig_st = new_st;
2334         return true;
2335 }
2336
2337 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2338 {
2339         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2340         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2341         unsigned long i;
2342         struct address_space *mapping;
2343         struct sg_table *st;
2344         struct scatterlist *sg;
2345         struct sgt_iter sgt_iter;
2346         struct page *page;
2347         unsigned long last_pfn = 0;     /* suppress gcc warning */
2348         unsigned int max_segment = i915_sg_segment_size();
2349         unsigned int sg_page_sizes;
2350         struct pagevec pvec;
2351         gfp_t noreclaim;
2352         int ret;
2353
2354         /*
2355          * Assert that the object is not currently in any GPU domain. As it
2356          * wasn't in the GTT, there shouldn't be any way it could have been in
2357          * a GPU cache
2358          */
2359         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2360         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2361
2362         /*
2363          * If there's no chance of allocating enough pages for the whole
2364          * object, bail early.
2365          */
2366         if (page_count > totalram_pages())
2367                 return -ENOMEM;
2368
2369         st = kmalloc(sizeof(*st), GFP_KERNEL);
2370         if (st == NULL)
2371                 return -ENOMEM;
2372
2373 rebuild_st:
2374         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2375                 kfree(st);
2376                 return -ENOMEM;
2377         }
2378
2379         /*
2380          * Get the list of pages out of our struct file.  They'll be pinned
2381          * at this point until we release them.
2382          *
2383          * Fail silently without starting the shrinker
2384          */
2385         mapping = obj->base.filp->f_mapping;
2386         mapping_set_unevictable(mapping);
2387         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2388         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2389
2390         sg = st->sgl;
2391         st->nents = 0;
2392         sg_page_sizes = 0;
2393         for (i = 0; i < page_count; i++) {
2394                 const unsigned int shrink[] = {
2395                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2396                         0,
2397                 }, *s = shrink;
2398                 gfp_t gfp = noreclaim;
2399
2400                 do {
2401                         cond_resched();
2402                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2403                         if (!IS_ERR(page))
2404                                 break;
2405
2406                         if (!*s) {
2407                                 ret = PTR_ERR(page);
2408                                 goto err_sg;
2409                         }
2410
2411                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2412
2413                         /*
2414                          * We've tried hard to allocate the memory by reaping
2415                          * our own buffer, now let the real VM do its job and
2416                          * go down in flames if truly OOM.
2417                          *
2418                          * However, since graphics tend to be disposable,
2419                          * defer the oom here by reporting the ENOMEM back
2420                          * to userspace.
2421                          */
2422                         if (!*s) {
2423                                 /* reclaim and warn, but no oom */
2424                                 gfp = mapping_gfp_mask(mapping);
2425
2426                                 /*
2427                                  * Our bo are always dirty and so we require
2428                                  * kswapd to reclaim our pages (direct reclaim
2429                                  * does not effectively begin pageout of our
2430                                  * buffers on its own). However, direct reclaim
2431                                  * only waits for kswapd when under allocation
2432                                  * congestion. So as a result __GFP_RECLAIM is
2433                                  * unreliable and fails to actually reclaim our
2434                                  * dirty pages -- unless you try over and over
2435                                  * again with !__GFP_NORETRY. However, we still
2436                                  * want to fail this allocation rather than
2437                                  * trigger the out-of-memory killer and for
2438                                  * this we want __GFP_RETRY_MAYFAIL.
2439                                  */
2440                                 gfp |= __GFP_RETRY_MAYFAIL;
2441                         }
2442                 } while (1);
2443
2444                 if (!i ||
2445                     sg->length >= max_segment ||
2446                     page_to_pfn(page) != last_pfn + 1) {
2447                         if (i) {
2448                                 sg_page_sizes |= sg->length;
2449                                 sg = sg_next(sg);
2450                         }
2451                         st->nents++;
2452                         sg_set_page(sg, page, PAGE_SIZE, 0);
2453                 } else {
2454                         sg->length += PAGE_SIZE;
2455                 }
2456                 last_pfn = page_to_pfn(page);
2457
2458                 /* Check that the i965g/gm workaround works. */
2459                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2460         }
2461         if (sg) { /* loop terminated early; short sg table */
2462                 sg_page_sizes |= sg->length;
2463                 sg_mark_end(sg);
2464         }
2465
2466         /* Trim unused sg entries to avoid wasting memory. */
2467         i915_sg_trim(st);
2468
2469         ret = i915_gem_gtt_prepare_pages(obj, st);
2470         if (ret) {
2471                 /*
2472                  * DMA remapping failed? One possible cause is that
2473                  * it could not reserve enough large entries, asking
2474                  * for PAGE_SIZE chunks instead may be helpful.
2475                  */
2476                 if (max_segment > PAGE_SIZE) {
2477                         for_each_sgt_page(page, sgt_iter, st)
2478                                 put_page(page);
2479                         sg_free_table(st);
2480
2481                         max_segment = PAGE_SIZE;
2482                         goto rebuild_st;
2483                 } else {
2484                         dev_warn(&dev_priv->drm.pdev->dev,
2485                                  "Failed to DMA remap %lu pages\n",
2486                                  page_count);
2487                         goto err_pages;
2488                 }
2489         }
2490
2491         if (i915_gem_object_needs_bit17_swizzle(obj))
2492                 i915_gem_object_do_bit_17_swizzle(obj, st);
2493
2494         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2495
2496         return 0;
2497
2498 err_sg:
2499         sg_mark_end(sg);
2500 err_pages:
2501         mapping_clear_unevictable(mapping);
2502         pagevec_init(&pvec);
2503         for_each_sgt_page(page, sgt_iter, st) {
2504                 if (!pagevec_add(&pvec, page))
2505                         check_release_pagevec(&pvec);
2506         }
2507         if (pagevec_count(&pvec))
2508                 check_release_pagevec(&pvec);
2509         sg_free_table(st);
2510         kfree(st);
2511
2512         /*
2513          * shmemfs first checks if there is enough memory to allocate the page
2514          * and reports ENOSPC should there be insufficient, along with the usual
2515          * ENOMEM for a genuine allocation failure.
2516          *
2517          * We use ENOSPC in our driver to mean that we have run out of aperture
2518          * space and so want to translate the error from shmemfs back to our
2519          * usual understanding of ENOMEM.
2520          */
2521         if (ret == -ENOSPC)
2522                 ret = -ENOMEM;
2523
2524         return ret;
2525 }
2526
2527 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2528                                  struct sg_table *pages,
2529                                  unsigned int sg_page_sizes)
2530 {
2531         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2532         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2533         int i;
2534
2535         lockdep_assert_held(&obj->mm.lock);
2536
2537         obj->mm.get_page.sg_pos = pages->sgl;
2538         obj->mm.get_page.sg_idx = 0;
2539
2540         obj->mm.pages = pages;
2541
2542         if (i915_gem_object_is_tiled(obj) &&
2543             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2544                 GEM_BUG_ON(obj->mm.quirked);
2545                 __i915_gem_object_pin_pages(obj);
2546                 obj->mm.quirked = true;
2547         }
2548
2549         GEM_BUG_ON(!sg_page_sizes);
2550         obj->mm.page_sizes.phys = sg_page_sizes;
2551
2552         /*
2553          * Calculate the supported page-sizes which fit into the given
2554          * sg_page_sizes. This will give us the page-sizes which we may be able
2555          * to use opportunistically when later inserting into the GTT. For
2556          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2557          * 64K or 4K pages, although in practice this will depend on a number of
2558          * other factors.
2559          */
2560         obj->mm.page_sizes.sg = 0;
2561         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2562                 if (obj->mm.page_sizes.phys & ~0u << i)
2563                         obj->mm.page_sizes.sg |= BIT(i);
2564         }
2565         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2566
2567         spin_lock(&i915->mm.obj_lock);
2568         list_add(&obj->mm.link, &i915->mm.unbound_list);
2569         spin_unlock(&i915->mm.obj_lock);
2570 }
2571
2572 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2573 {
2574         int err;
2575
2576         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2577                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2578                 return -EFAULT;
2579         }
2580
2581         err = obj->ops->get_pages(obj);
2582         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2583
2584         return err;
2585 }
2586
2587 /* Ensure that the associated pages are gathered from the backing storage
2588  * and pinned into our object. i915_gem_object_pin_pages() may be called
2589  * multiple times before they are released by a single call to
2590  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2591  * either as a result of memory pressure (reaping pages under the shrinker)
2592  * or as the object is itself released.
2593  */
2594 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2595 {
2596         int err;
2597
2598         err = mutex_lock_interruptible(&obj->mm.lock);
2599         if (err)
2600                 return err;
2601
2602         if (unlikely(!i915_gem_object_has_pages(obj))) {
2603                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2604
2605                 err = ____i915_gem_object_get_pages(obj);
2606                 if (err)
2607                         goto unlock;
2608
2609                 smp_mb__before_atomic();
2610         }
2611         atomic_inc(&obj->mm.pages_pin_count);
2612
2613 unlock:
2614         mutex_unlock(&obj->mm.lock);
2615         return err;
2616 }
2617
2618 /* The 'mapping' part of i915_gem_object_pin_map() below */
2619 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2620                                  enum i915_map_type type)
2621 {
2622         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2623         struct sg_table *sgt = obj->mm.pages;
2624         struct sgt_iter sgt_iter;
2625         struct page *page;
2626         struct page *stack_pages[32];
2627         struct page **pages = stack_pages;
2628         unsigned long i = 0;
2629         pgprot_t pgprot;
2630         void *addr;
2631
2632         /* A single page can always be kmapped */
2633         if (n_pages == 1 && type == I915_MAP_WB)
2634                 return kmap(sg_page(sgt->sgl));
2635
2636         if (n_pages > ARRAY_SIZE(stack_pages)) {
2637                 /* Too big for stack -- allocate temporary array instead */
2638                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2639                 if (!pages)
2640                         return NULL;
2641         }
2642
2643         for_each_sgt_page(page, sgt_iter, sgt)
2644                 pages[i++] = page;
2645
2646         /* Check that we have the expected number of pages */
2647         GEM_BUG_ON(i != n_pages);
2648
2649         switch (type) {
2650         default:
2651                 MISSING_CASE(type);
2652                 /* fallthrough to use PAGE_KERNEL anyway */
2653         case I915_MAP_WB:
2654                 pgprot = PAGE_KERNEL;
2655                 break;
2656         case I915_MAP_WC:
2657                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2658                 break;
2659         }
2660         addr = vmap(pages, n_pages, 0, pgprot);
2661
2662         if (pages != stack_pages)
2663                 kvfree(pages);
2664
2665         return addr;
2666 }
2667
2668 /* get, pin, and map the pages of the object into kernel space */
2669 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2670                               enum i915_map_type type)
2671 {
2672         enum i915_map_type has_type;
2673         bool pinned;
2674         void *ptr;
2675         int ret;
2676
2677         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2678                 return ERR_PTR(-ENXIO);
2679
2680         ret = mutex_lock_interruptible(&obj->mm.lock);
2681         if (ret)
2682                 return ERR_PTR(ret);
2683
2684         pinned = !(type & I915_MAP_OVERRIDE);
2685         type &= ~I915_MAP_OVERRIDE;
2686
2687         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2688                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2689                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2690
2691                         ret = ____i915_gem_object_get_pages(obj);
2692                         if (ret)
2693                                 goto err_unlock;
2694
2695                         smp_mb__before_atomic();
2696                 }
2697                 atomic_inc(&obj->mm.pages_pin_count);
2698                 pinned = false;
2699         }
2700         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2701
2702         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2703         if (ptr && has_type != type) {
2704                 if (pinned) {
2705                         ret = -EBUSY;
2706                         goto err_unpin;
2707                 }
2708
2709                 if (is_vmalloc_addr(ptr))
2710                         vunmap(ptr);
2711                 else
2712                         kunmap(kmap_to_page(ptr));
2713
2714                 ptr = obj->mm.mapping = NULL;
2715         }
2716
2717         if (!ptr) {
2718                 ptr = i915_gem_object_map(obj, type);
2719                 if (!ptr) {
2720                         ret = -ENOMEM;
2721                         goto err_unpin;
2722                 }
2723
2724                 obj->mm.mapping = page_pack_bits(ptr, type);
2725         }
2726
2727 out_unlock:
2728         mutex_unlock(&obj->mm.lock);
2729         return ptr;
2730
2731 err_unpin:
2732         atomic_dec(&obj->mm.pages_pin_count);
2733 err_unlock:
2734         ptr = ERR_PTR(ret);
2735         goto out_unlock;
2736 }
2737
2738 static int
2739 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2740                            const struct drm_i915_gem_pwrite *arg)
2741 {
2742         struct address_space *mapping = obj->base.filp->f_mapping;
2743         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2744         u64 remain, offset;
2745         unsigned int pg;
2746
2747         /* Before we instantiate/pin the backing store for our use, we
2748          * can prepopulate the shmemfs filp efficiently using a write into
2749          * the pagecache. We avoid the penalty of instantiating all the
2750          * pages, important if the user is just writing to a few and never
2751          * uses the object on the GPU, and using a direct write into shmemfs
2752          * allows it to avoid the cost of retrieving a page (either swapin
2753          * or clearing-before-use) before it is overwritten.
2754          */
2755         if (i915_gem_object_has_pages(obj))
2756                 return -ENODEV;
2757
2758         if (obj->mm.madv != I915_MADV_WILLNEED)
2759                 return -EFAULT;
2760
2761         /* Before the pages are instantiated the object is treated as being
2762          * in the CPU domain. The pages will be clflushed as required before
2763          * use, and we can freely write into the pages directly. If userspace
2764          * races pwrite with any other operation; corruption will ensue -
2765          * that is userspace's prerogative!
2766          */
2767
2768         remain = arg->size;
2769         offset = arg->offset;
2770         pg = offset_in_page(offset);
2771
2772         do {
2773                 unsigned int len, unwritten;
2774                 struct page *page;
2775                 void *data, *vaddr;
2776                 int err;
2777
2778                 len = PAGE_SIZE - pg;
2779                 if (len > remain)
2780                         len = remain;
2781
2782                 err = pagecache_write_begin(obj->base.filp, mapping,
2783                                             offset, len, 0,
2784                                             &page, &data);
2785                 if (err < 0)
2786                         return err;
2787
2788                 vaddr = kmap(page);
2789                 unwritten = copy_from_user(vaddr + pg, user_data, len);
2790                 kunmap(page);
2791
2792                 err = pagecache_write_end(obj->base.filp, mapping,
2793                                           offset, len, len - unwritten,
2794                                           page, data);
2795                 if (err < 0)
2796                         return err;
2797
2798                 if (unwritten)
2799                         return -EFAULT;
2800
2801                 remain -= len;
2802                 user_data += len;
2803                 offset += len;
2804                 pg = 0;
2805         } while (remain);
2806
2807         return 0;
2808 }
2809
2810 static void
2811 i915_gem_retire_work_handler(struct work_struct *work)
2812 {
2813         struct drm_i915_private *dev_priv =
2814                 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2815         struct drm_device *dev = &dev_priv->drm;
2816
2817         /* Come back later if the device is busy... */
2818         if (mutex_trylock(&dev->struct_mutex)) {
2819                 i915_retire_requests(dev_priv);
2820                 mutex_unlock(&dev->struct_mutex);
2821         }
2822
2823         /*
2824          * Keep the retire handler running until we are finally idle.
2825          * We do not need to do this test under locking as in the worst-case
2826          * we queue the retire worker once too often.
2827          */
2828         if (READ_ONCE(dev_priv->gt.awake))
2829                 queue_delayed_work(dev_priv->wq,
2830                                    &dev_priv->gt.retire_work,
2831                                    round_jiffies_up_relative(HZ));
2832 }
2833
2834 static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
2835                                           unsigned long mask)
2836 {
2837         bool result = true;
2838
2839         /*
2840          * Even if we fail to switch, give whatever is running a small chance
2841          * to save itself before we report the failure. Yes, this may be a
2842          * false positive due to e.g. ENOMEM, caveat emptor!
2843          */
2844         if (i915_gem_switch_to_kernel_context(i915, mask))
2845                 result = false;
2846
2847         if (i915_gem_wait_for_idle(i915,
2848                                    I915_WAIT_LOCKED |
2849                                    I915_WAIT_FOR_IDLE_BOOST,
2850                                    I915_GEM_IDLE_TIMEOUT))
2851                 result = false;
2852
2853         if (!result) {
2854                 if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
2855                         dev_err(i915->drm.dev,
2856                                 "Failed to idle engines, declaring wedged!\n");
2857                         GEM_TRACE_DUMP();
2858                 }
2859
2860                 /* Forcibly cancel outstanding work and leave the gpu quiet. */
2861                 i915_gem_set_wedged(i915);
2862         }
2863
2864         i915_retire_requests(i915); /* ensure we flush after wedging */
2865         return result;
2866 }
2867
2868 static bool load_power_context(struct drm_i915_private *i915)
2869 {
2870         /* Force loading the kernel context on all engines */
2871         if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
2872                 return false;
2873
2874         /*
2875          * Immediately park the GPU so that we enable powersaving and
2876          * treat it as idle. The next time we issue a request, we will
2877          * unpark and start using the engine->pinned_default_state, otherwise
2878          * it is in limbo and an early reset may fail.
2879          */
2880         __i915_gem_park(i915);
2881
2882         return true;
2883 }
2884
2885 static void
2886 i915_gem_idle_work_handler(struct work_struct *work)
2887 {
2888         struct drm_i915_private *i915 =
2889                 container_of(work, typeof(*i915), gt.idle_work.work);
2890         bool rearm_hangcheck;
2891
2892         if (!READ_ONCE(i915->gt.awake))
2893                 return;
2894
2895         if (READ_ONCE(i915->gt.active_requests))
2896                 return;
2897
2898         rearm_hangcheck =
2899                 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
2900
2901         if (!mutex_trylock(&i915->drm.struct_mutex)) {
2902                 /* Currently busy, come back later */
2903                 mod_delayed_work(i915->wq,
2904                                  &i915->gt.idle_work,
2905                                  msecs_to_jiffies(50));
2906                 goto out_rearm;
2907         }
2908
2909         /*
2910          * Flush out the last user context, leaving only the pinned
2911          * kernel context resident. Should anything unfortunate happen
2912          * while we are idle (such as the GPU being power cycled), no users
2913          * will be harmed.
2914          */
2915         if (!work_pending(&i915->gt.idle_work.work) &&
2916             !i915->gt.active_requests) {
2917                 ++i915->gt.active_requests; /* don't requeue idle */
2918
2919                 switch_to_kernel_context_sync(i915, i915->gt.active_engines);
2920
2921                 if (!--i915->gt.active_requests) {
2922                         __i915_gem_park(i915);
2923                         rearm_hangcheck = false;
2924                 }
2925         }
2926
2927         mutex_unlock(&i915->drm.struct_mutex);
2928
2929 out_rearm:
2930         if (rearm_hangcheck) {
2931                 GEM_BUG_ON(!i915->gt.awake);
2932                 i915_queue_hangcheck(i915);
2933         }
2934 }
2935
2936 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2937 {
2938         struct drm_i915_private *i915 = to_i915(gem->dev);
2939         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2940         struct drm_i915_file_private *fpriv = file->driver_priv;
2941         struct i915_lut_handle *lut, *ln;
2942
2943         mutex_lock(&i915->drm.struct_mutex);
2944
2945         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2946                 struct i915_gem_context *ctx = lut->ctx;
2947                 struct i915_vma *vma;
2948
2949                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2950                 if (ctx->file_priv != fpriv)
2951                         continue;
2952
2953                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2954                 GEM_BUG_ON(vma->obj != obj);
2955
2956                 /* We allow the process to have multiple handles to the same
2957                  * vma, in the same fd namespace, by virtue of flink/open.
2958                  */
2959                 GEM_BUG_ON(!vma->open_count);
2960                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
2961                         i915_vma_close(vma);
2962
2963                 list_del(&lut->obj_link);
2964                 list_del(&lut->ctx_link);
2965
2966                 i915_lut_handle_free(lut);
2967                 __i915_gem_object_release_unless_active(obj);
2968         }
2969
2970         mutex_unlock(&i915->drm.struct_mutex);
2971 }
2972
2973 static unsigned long to_wait_timeout(s64 timeout_ns)
2974 {
2975         if (timeout_ns < 0)
2976                 return MAX_SCHEDULE_TIMEOUT;
2977
2978         if (timeout_ns == 0)
2979                 return 0;
2980
2981         return nsecs_to_jiffies_timeout(timeout_ns);
2982 }
2983
2984 /**
2985  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2986  * @dev: drm device pointer
2987  * @data: ioctl data blob
2988  * @file: drm file pointer
2989  *
2990  * Returns 0 if successful, else an error is returned with the remaining time in
2991  * the timeout parameter.
2992  *  -ETIME: object is still busy after timeout
2993  *  -ERESTARTSYS: signal interrupted the wait
2994  *  -ENONENT: object doesn't exist
2995  * Also possible, but rare:
2996  *  -EAGAIN: incomplete, restart syscall
2997  *  -ENOMEM: damn
2998  *  -ENODEV: Internal IRQ fail
2999  *  -E?: The add request failed
3000  *
3001  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3002  * non-zero timeout parameter the wait ioctl will wait for the given number of
3003  * nanoseconds on an object becoming unbusy. Since the wait itself does so
3004  * without holding struct_mutex the object may become re-busied before this
3005  * function completes. A similar but shorter * race condition exists in the busy
3006  * ioctl
3007  */
3008 int
3009 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3010 {
3011         struct drm_i915_gem_wait *args = data;
3012         struct drm_i915_gem_object *obj;
3013         ktime_t start;
3014         long ret;
3015
3016         if (args->flags != 0)
3017                 return -EINVAL;
3018
3019         obj = i915_gem_object_lookup(file, args->bo_handle);
3020         if (!obj)
3021                 return -ENOENT;
3022
3023         start = ktime_get();
3024
3025         ret = i915_gem_object_wait(obj,
3026                                    I915_WAIT_INTERRUPTIBLE |
3027                                    I915_WAIT_PRIORITY |
3028                                    I915_WAIT_ALL,
3029                                    to_wait_timeout(args->timeout_ns));
3030
3031         if (args->timeout_ns > 0) {
3032                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3033                 if (args->timeout_ns < 0)
3034                         args->timeout_ns = 0;
3035
3036                 /*
3037                  * Apparently ktime isn't accurate enough and occasionally has a
3038                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3039                  * things up to make the test happy. We allow up to 1 jiffy.
3040                  *
3041                  * This is a regression from the timespec->ktime conversion.
3042                  */
3043                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3044                         args->timeout_ns = 0;
3045
3046                 /* Asked to wait beyond the jiffie/scheduler precision? */
3047                 if (ret == -ETIME && args->timeout_ns)
3048                         ret = -EAGAIN;
3049         }
3050
3051         i915_gem_object_put(obj);
3052         return ret;
3053 }
3054
3055 static int wait_for_engines(struct drm_i915_private *i915)
3056 {
3057         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3058                 dev_err(i915->drm.dev,
3059                         "Failed to idle engines, declaring wedged!\n");
3060                 GEM_TRACE_DUMP();
3061                 i915_gem_set_wedged(i915);
3062                 return -EIO;
3063         }
3064
3065         return 0;
3066 }
3067
3068 static long
3069 wait_for_timelines(struct drm_i915_private *i915,
3070                    unsigned int flags, long timeout)
3071 {
3072         struct i915_gt_timelines *gt = &i915->gt.timelines;
3073         struct i915_timeline *tl;
3074
3075         if (!READ_ONCE(i915->gt.active_requests))
3076                 return timeout;
3077
3078         mutex_lock(&gt->mutex);
3079         list_for_each_entry(tl, &gt->active_list, link) {
3080                 struct i915_request *rq;
3081
3082                 rq = i915_active_request_get_unlocked(&tl->last_request);
3083                 if (!rq)
3084                         continue;
3085
3086                 mutex_unlock(&gt->mutex);
3087
3088                 /*
3089                  * "Race-to-idle".
3090                  *
3091                  * Switching to the kernel context is often used a synchronous
3092                  * step prior to idling, e.g. in suspend for flushing all
3093                  * current operations to memory before sleeping. These we
3094                  * want to complete as quickly as possible to avoid prolonged
3095                  * stalls, so allow the gpu to boost to maximum clocks.
3096                  */
3097                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
3098                         gen6_rps_boost(rq);
3099
3100                 timeout = i915_request_wait(rq, flags, timeout);
3101                 i915_request_put(rq);
3102                 if (timeout < 0)
3103                         return timeout;
3104
3105                 /* restart after reacquiring the lock */
3106                 mutex_lock(&gt->mutex);
3107                 tl = list_entry(&gt->active_list, typeof(*tl), link);
3108         }
3109         mutex_unlock(&gt->mutex);
3110
3111         return timeout;
3112 }
3113
3114 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3115                            unsigned int flags, long timeout)
3116 {
3117         GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3118                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3119                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3120
3121         /* If the device is asleep, we have no requests outstanding */
3122         if (!READ_ONCE(i915->gt.awake))
3123                 return 0;
3124
3125         timeout = wait_for_timelines(i915, flags, timeout);
3126         if (timeout < 0)
3127                 return timeout;
3128
3129         if (flags & I915_WAIT_LOCKED) {
3130                 int err;
3131
3132                 lockdep_assert_held(&i915->drm.struct_mutex);
3133
3134                 err = wait_for_engines(i915);
3135                 if (err)
3136                         return err;
3137
3138                 i915_retire_requests(i915);
3139         }
3140
3141         return 0;
3142 }
3143
3144 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3145 {
3146         /*
3147          * We manually flush the CPU domain so that we can override and
3148          * force the flush for the display, and perform it asyncrhonously.
3149          */
3150         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3151         if (obj->cache_dirty)
3152                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3153         obj->write_domain = 0;
3154 }
3155
3156 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3157 {
3158         if (!READ_ONCE(obj->pin_global))
3159                 return;
3160
3161         mutex_lock(&obj->base.dev->struct_mutex);
3162         __i915_gem_object_flush_for_display(obj);
3163         mutex_unlock(&obj->base.dev->struct_mutex);
3164 }
3165
3166 /**
3167  * Moves a single object to the WC read, and possibly write domain.
3168  * @obj: object to act on
3169  * @write: ask for write access or read only
3170  *
3171  * This function returns when the move is complete, including waiting on
3172  * flushes to occur.
3173  */
3174 int
3175 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3176 {
3177         int ret;
3178
3179         lockdep_assert_held(&obj->base.dev->struct_mutex);
3180
3181         ret = i915_gem_object_wait(obj,
3182                                    I915_WAIT_INTERRUPTIBLE |
3183                                    I915_WAIT_LOCKED |
3184                                    (write ? I915_WAIT_ALL : 0),
3185                                    MAX_SCHEDULE_TIMEOUT);
3186         if (ret)
3187                 return ret;
3188
3189         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3190                 return 0;
3191