Merge drm/drm-next into drm-intel-next-queued
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include <drm/drm_vma_manager.h>
29 #include <drm/drm_pci.h>
30 #include <drm/i915_drm.h>
31 #include <linux/dma-fence-array.h>
32 #include <linux/kthread.h>
33 #include <linux/reservation.h>
34 #include <linux/shmem_fs.h>
35 #include <linux/slab.h>
36 #include <linux/stop_machine.h>
37 #include <linux/swap.h>
38 #include <linux/pci.h>
39 #include <linux/dma-buf.h>
40 #include <linux/mman.h>
41
42 #include "gt/intel_engine_pm.h"
43 #include "gt/intel_gt_pm.h"
44 #include "gt/intel_mocs.h"
45 #include "gt/intel_reset.h"
46 #include "gt/intel_workarounds.h"
47
48 #include "i915_drv.h"
49 #include "i915_gem_clflush.h"
50 #include "i915_gemfs.h"
51 #include "i915_gem_pm.h"
52 #include "i915_trace.h"
53 #include "i915_vgpu.h"
54
55 #include "intel_display.h"
56 #include "intel_drv.h"
57 #include "intel_frontbuffer.h"
58 #include "intel_pm.h"
59
60 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
61
62 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
63 {
64         if (obj->cache_dirty)
65                 return false;
66
67         if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
68                 return true;
69
70         return obj->pin_global; /* currently in use by HW, keep flushed */
71 }
72
73 static int
74 insert_mappable_node(struct i915_ggtt *ggtt,
75                      struct drm_mm_node *node, u32 size)
76 {
77         memset(node, 0, sizeof(*node));
78         return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
79                                            size, 0, I915_COLOR_UNEVICTABLE,
80                                            0, ggtt->mappable_end,
81                                            DRM_MM_INSERT_LOW);
82 }
83
84 static void
85 remove_mappable_node(struct drm_mm_node *node)
86 {
87         drm_mm_remove_node(node);
88 }
89
90 /* some bookkeeping */
91 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
92                                   u64 size)
93 {
94         spin_lock(&dev_priv->mm.object_stat_lock);
95         dev_priv->mm.object_count++;
96         dev_priv->mm.object_memory += size;
97         spin_unlock(&dev_priv->mm.object_stat_lock);
98 }
99
100 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
101                                      u64 size)
102 {
103         spin_lock(&dev_priv->mm.object_stat_lock);
104         dev_priv->mm.object_count--;
105         dev_priv->mm.object_memory -= size;
106         spin_unlock(&dev_priv->mm.object_stat_lock);
107 }
108
109 int
110 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
111                             struct drm_file *file)
112 {
113         struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
114         struct drm_i915_gem_get_aperture *args = data;
115         struct i915_vma *vma;
116         u64 pinned;
117
118         mutex_lock(&ggtt->vm.mutex);
119
120         pinned = ggtt->vm.reserved;
121         list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
122                 if (i915_vma_is_pinned(vma))
123                         pinned += vma->node.size;
124
125         mutex_unlock(&ggtt->vm.mutex);
126
127         args->aper_size = ggtt->vm.total;
128         args->aper_available_size = args->aper_size - pinned;
129
130         return 0;
131 }
132
133 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
134 {
135         struct address_space *mapping = obj->base.filp->f_mapping;
136         drm_dma_handle_t *phys;
137         struct sg_table *st;
138         struct scatterlist *sg;
139         char *vaddr;
140         int i;
141         int err;
142
143         if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
144                 return -EINVAL;
145
146         /* Always aligning to the object size, allows a single allocation
147          * to handle all possible callers, and given typical object sizes,
148          * the alignment of the buddy allocation will naturally match.
149          */
150         phys = drm_pci_alloc(obj->base.dev,
151                              roundup_pow_of_two(obj->base.size),
152                              roundup_pow_of_two(obj->base.size));
153         if (!phys)
154                 return -ENOMEM;
155
156         vaddr = phys->vaddr;
157         for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
158                 struct page *page;
159                 char *src;
160
161                 page = shmem_read_mapping_page(mapping, i);
162                 if (IS_ERR(page)) {
163                         err = PTR_ERR(page);
164                         goto err_phys;
165                 }
166
167                 src = kmap_atomic(page);
168                 memcpy(vaddr, src, PAGE_SIZE);
169                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
170                 kunmap_atomic(src);
171
172                 put_page(page);
173                 vaddr += PAGE_SIZE;
174         }
175
176         i915_gem_chipset_flush(to_i915(obj->base.dev));
177
178         st = kmalloc(sizeof(*st), GFP_KERNEL);
179         if (!st) {
180                 err = -ENOMEM;
181                 goto err_phys;
182         }
183
184         if (sg_alloc_table(st, 1, GFP_KERNEL)) {
185                 kfree(st);
186                 err = -ENOMEM;
187                 goto err_phys;
188         }
189
190         sg = st->sgl;
191         sg->offset = 0;
192         sg->length = obj->base.size;
193
194         sg_dma_address(sg) = phys->busaddr;
195         sg_dma_len(sg) = obj->base.size;
196
197         obj->phys_handle = phys;
198
199         __i915_gem_object_set_pages(obj, st, sg->length);
200
201         return 0;
202
203 err_phys:
204         drm_pci_free(obj->base.dev, phys);
205
206         return err;
207 }
208
209 static void __start_cpu_write(struct drm_i915_gem_object *obj)
210 {
211         obj->read_domains = I915_GEM_DOMAIN_CPU;
212         obj->write_domain = I915_GEM_DOMAIN_CPU;
213         if (cpu_write_needs_clflush(obj))
214                 obj->cache_dirty = true;
215 }
216
217 void
218 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
219                                 struct sg_table *pages,
220                                 bool needs_clflush)
221 {
222         GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
223
224         if (obj->mm.madv == I915_MADV_DONTNEED)
225                 obj->mm.dirty = false;
226
227         if (needs_clflush &&
228             (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
229             !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
230                 drm_clflush_sg(pages);
231
232         __start_cpu_write(obj);
233 }
234
235 static void
236 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
237                                struct sg_table *pages)
238 {
239         __i915_gem_object_release_shmem(obj, pages, false);
240
241         if (obj->mm.dirty) {
242                 struct address_space *mapping = obj->base.filp->f_mapping;
243                 char *vaddr = obj->phys_handle->vaddr;
244                 int i;
245
246                 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
247                         struct page *page;
248                         char *dst;
249
250                         page = shmem_read_mapping_page(mapping, i);
251                         if (IS_ERR(page))
252                                 continue;
253
254                         dst = kmap_atomic(page);
255                         drm_clflush_virt_range(vaddr, PAGE_SIZE);
256                         memcpy(dst, vaddr, PAGE_SIZE);
257                         kunmap_atomic(dst);
258
259                         set_page_dirty(page);
260                         if (obj->mm.madv == I915_MADV_WILLNEED)
261                                 mark_page_accessed(page);
262                         put_page(page);
263                         vaddr += PAGE_SIZE;
264                 }
265                 obj->mm.dirty = false;
266         }
267
268         sg_free_table(pages);
269         kfree(pages);
270
271         drm_pci_free(obj->base.dev, obj->phys_handle);
272 }
273
274 static void
275 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
276 {
277         i915_gem_object_unpin_pages(obj);
278 }
279
280 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
281         .get_pages = i915_gem_object_get_pages_phys,
282         .put_pages = i915_gem_object_put_pages_phys,
283         .release = i915_gem_object_release_phys,
284 };
285
286 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
287
288 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
289 {
290         struct i915_vma *vma;
291         LIST_HEAD(still_in_list);
292         int ret;
293
294         lockdep_assert_held(&obj->base.dev->struct_mutex);
295
296         /* Closed vma are removed from the obj->vma_list - but they may
297          * still have an active binding on the object. To remove those we
298          * must wait for all rendering to complete to the object (as unbinding
299          * must anyway), and retire the requests.
300          */
301         ret = i915_gem_object_set_to_cpu_domain(obj, false);
302         if (ret)
303                 return ret;
304
305         spin_lock(&obj->vma.lock);
306         while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
307                                                        struct i915_vma,
308                                                        obj_link))) {
309                 list_move_tail(&vma->obj_link, &still_in_list);
310                 spin_unlock(&obj->vma.lock);
311
312                 ret = i915_vma_unbind(vma);
313
314                 spin_lock(&obj->vma.lock);
315         }
316         list_splice(&still_in_list, &obj->vma.list);
317         spin_unlock(&obj->vma.lock);
318
319         return ret;
320 }
321
322 static long
323 i915_gem_object_wait_fence(struct dma_fence *fence,
324                            unsigned int flags,
325                            long timeout)
326 {
327         struct i915_request *rq;
328
329         BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
330
331         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
332                 return timeout;
333
334         if (!dma_fence_is_i915(fence))
335                 return dma_fence_wait_timeout(fence,
336                                               flags & I915_WAIT_INTERRUPTIBLE,
337                                               timeout);
338
339         rq = to_request(fence);
340         if (i915_request_completed(rq))
341                 goto out;
342
343         timeout = i915_request_wait(rq, flags, timeout);
344
345 out:
346         if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
347                 i915_request_retire_upto(rq);
348
349         return timeout;
350 }
351
352 static long
353 i915_gem_object_wait_reservation(struct reservation_object *resv,
354                                  unsigned int flags,
355                                  long timeout)
356 {
357         unsigned int seq = __read_seqcount_begin(&resv->seq);
358         struct dma_fence *excl;
359         bool prune_fences = false;
360
361         if (flags & I915_WAIT_ALL) {
362                 struct dma_fence **shared;
363                 unsigned int count, i;
364                 int ret;
365
366                 ret = reservation_object_get_fences_rcu(resv,
367                                                         &excl, &count, &shared);
368                 if (ret)
369                         return ret;
370
371                 for (i = 0; i < count; i++) {
372                         timeout = i915_gem_object_wait_fence(shared[i],
373                                                              flags, timeout);
374                         if (timeout < 0)
375                                 break;
376
377                         dma_fence_put(shared[i]);
378                 }
379
380                 for (; i < count; i++)
381                         dma_fence_put(shared[i]);
382                 kfree(shared);
383
384                 /*
385                  * If both shared fences and an exclusive fence exist,
386                  * then by construction the shared fences must be later
387                  * than the exclusive fence. If we successfully wait for
388                  * all the shared fences, we know that the exclusive fence
389                  * must all be signaled. If all the shared fences are
390                  * signaled, we can prune the array and recover the
391                  * floating references on the fences/requests.
392                  */
393                 prune_fences = count && timeout >= 0;
394         } else {
395                 excl = reservation_object_get_excl_rcu(resv);
396         }
397
398         if (excl && timeout >= 0)
399                 timeout = i915_gem_object_wait_fence(excl, flags, timeout);
400
401         dma_fence_put(excl);
402
403         /*
404          * Opportunistically prune the fences iff we know they have *all* been
405          * signaled and that the reservation object has not been changed (i.e.
406          * no new fences have been added).
407          */
408         if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
409                 if (reservation_object_trylock(resv)) {
410                         if (!__read_seqcount_retry(&resv->seq, seq))
411                                 reservation_object_add_excl_fence(resv, NULL);
412                         reservation_object_unlock(resv);
413                 }
414         }
415
416         return timeout;
417 }
418
419 static void __fence_set_priority(struct dma_fence *fence,
420                                  const struct i915_sched_attr *attr)
421 {
422         struct i915_request *rq;
423         struct intel_engine_cs *engine;
424
425         if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
426                 return;
427
428         rq = to_request(fence);
429         engine = rq->engine;
430
431         local_bh_disable();
432         rcu_read_lock(); /* RCU serialisation for set-wedged protection */
433         if (engine->schedule)
434                 engine->schedule(rq, attr);
435         rcu_read_unlock();
436         local_bh_enable(); /* kick the tasklets if queues were reprioritised */
437 }
438
439 static void fence_set_priority(struct dma_fence *fence,
440                                const struct i915_sched_attr *attr)
441 {
442         /* Recurse once into a fence-array */
443         if (dma_fence_is_array(fence)) {
444                 struct dma_fence_array *array = to_dma_fence_array(fence);
445                 int i;
446
447                 for (i = 0; i < array->num_fences; i++)
448                         __fence_set_priority(array->fences[i], attr);
449         } else {
450                 __fence_set_priority(fence, attr);
451         }
452 }
453
454 int
455 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
456                               unsigned int flags,
457                               const struct i915_sched_attr *attr)
458 {
459         struct dma_fence *excl;
460
461         if (flags & I915_WAIT_ALL) {
462                 struct dma_fence **shared;
463                 unsigned int count, i;
464                 int ret;
465
466                 ret = reservation_object_get_fences_rcu(obj->resv,
467                                                         &excl, &count, &shared);
468                 if (ret)
469                         return ret;
470
471                 for (i = 0; i < count; i++) {
472                         fence_set_priority(shared[i], attr);
473                         dma_fence_put(shared[i]);
474                 }
475
476                 kfree(shared);
477         } else {
478                 excl = reservation_object_get_excl_rcu(obj->resv);
479         }
480
481         if (excl) {
482                 fence_set_priority(excl, attr);
483                 dma_fence_put(excl);
484         }
485         return 0;
486 }
487
488 /**
489  * Waits for rendering to the object to be completed
490  * @obj: i915 gem object
491  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
492  * @timeout: how long to wait
493  */
494 int
495 i915_gem_object_wait(struct drm_i915_gem_object *obj,
496                      unsigned int flags,
497                      long timeout)
498 {
499         might_sleep();
500         GEM_BUG_ON(timeout < 0);
501
502         timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
503         return timeout < 0 ? timeout : 0;
504 }
505
506 static int
507 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
508                      struct drm_i915_gem_pwrite *args,
509                      struct drm_file *file)
510 {
511         void *vaddr = obj->phys_handle->vaddr + args->offset;
512         char __user *user_data = u64_to_user_ptr(args->data_ptr);
513
514         /* We manually control the domain here and pretend that it
515          * remains coherent i.e. in the GTT domain, like shmem_pwrite.
516          */
517         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
518         if (copy_from_user(vaddr, user_data, args->size))
519                 return -EFAULT;
520
521         drm_clflush_virt_range(vaddr, args->size);
522         i915_gem_chipset_flush(to_i915(obj->base.dev));
523
524         intel_fb_obj_flush(obj, ORIGIN_CPU);
525         return 0;
526 }
527
528 static int
529 i915_gem_create(struct drm_file *file,
530                 struct drm_i915_private *dev_priv,
531                 u64 *size_p,
532                 u32 *handle_p)
533 {
534         struct drm_i915_gem_object *obj;
535         u32 handle;
536         u64 size;
537         int ret;
538
539         size = round_up(*size_p, PAGE_SIZE);
540         if (size == 0)
541                 return -EINVAL;
542
543         /* Allocate the new object */
544         obj = i915_gem_object_create(dev_priv, size);
545         if (IS_ERR(obj))
546                 return PTR_ERR(obj);
547
548         ret = drm_gem_handle_create(file, &obj->base, &handle);
549         /* drop reference from allocate - handle holds it now */
550         i915_gem_object_put(obj);
551         if (ret)
552                 return ret;
553
554         *handle_p = handle;
555         *size_p = size;
556         return 0;
557 }
558
559 int
560 i915_gem_dumb_create(struct drm_file *file,
561                      struct drm_device *dev,
562                      struct drm_mode_create_dumb *args)
563 {
564         int cpp = DIV_ROUND_UP(args->bpp, 8);
565         u32 format;
566
567         switch (cpp) {
568         case 1:
569                 format = DRM_FORMAT_C8;
570                 break;
571         case 2:
572                 format = DRM_FORMAT_RGB565;
573                 break;
574         case 4:
575                 format = DRM_FORMAT_XRGB8888;
576                 break;
577         default:
578                 return -EINVAL;
579         }
580
581         /* have to work out size/pitch and return them */
582         args->pitch = ALIGN(args->width * cpp, 64);
583
584         /* align stride to page size so that we can remap */
585         if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
586                                                     DRM_FORMAT_MOD_LINEAR))
587                 args->pitch = ALIGN(args->pitch, 4096);
588
589         args->size = args->pitch * args->height;
590         return i915_gem_create(file, to_i915(dev),
591                                &args->size, &args->handle);
592 }
593
594 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
595 {
596         return !(obj->cache_level == I915_CACHE_NONE ||
597                  obj->cache_level == I915_CACHE_WT);
598 }
599
600 /**
601  * Creates a new mm object and returns a handle to it.
602  * @dev: drm device pointer
603  * @data: ioctl data blob
604  * @file: drm file pointer
605  */
606 int
607 i915_gem_create_ioctl(struct drm_device *dev, void *data,
608                       struct drm_file *file)
609 {
610         struct drm_i915_private *dev_priv = to_i915(dev);
611         struct drm_i915_gem_create *args = data;
612
613         i915_gem_flush_free_objects(dev_priv);
614
615         return i915_gem_create(file, dev_priv,
616                                &args->size, &args->handle);
617 }
618
619 static inline enum fb_op_origin
620 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
621 {
622         return (domain == I915_GEM_DOMAIN_GTT ?
623                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
624 }
625
626 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
627 {
628         intel_wakeref_t wakeref;
629
630         /*
631          * No actual flushing is required for the GTT write domain for reads
632          * from the GTT domain. Writes to it "immediately" go to main memory
633          * as far as we know, so there's no chipset flush. It also doesn't
634          * land in the GPU render cache.
635          *
636          * However, we do have to enforce the order so that all writes through
637          * the GTT land before any writes to the device, such as updates to
638          * the GATT itself.
639          *
640          * We also have to wait a bit for the writes to land from the GTT.
641          * An uncached read (i.e. mmio) seems to be ideal for the round-trip
642          * timing. This issue has only been observed when switching quickly
643          * between GTT writes and CPU reads from inside the kernel on recent hw,
644          * and it appears to only affect discrete GTT blocks (i.e. on LLC
645          * system agents we cannot reproduce this behaviour, until Cannonlake
646          * that was!).
647          */
648
649         wmb();
650
651         if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
652                 return;
653
654         i915_gem_chipset_flush(dev_priv);
655
656         with_intel_runtime_pm(dev_priv, wakeref) {
657                 spin_lock_irq(&dev_priv->uncore.lock);
658
659                 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
660
661                 spin_unlock_irq(&dev_priv->uncore.lock);
662         }
663 }
664
665 static void
666 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
667 {
668         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
669         struct i915_vma *vma;
670
671         if (!(obj->write_domain & flush_domains))
672                 return;
673
674         switch (obj->write_domain) {
675         case I915_GEM_DOMAIN_GTT:
676                 i915_gem_flush_ggtt_writes(dev_priv);
677
678                 intel_fb_obj_flush(obj,
679                                    fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
680
681                 for_each_ggtt_vma(vma, obj) {
682                         if (vma->iomap)
683                                 continue;
684
685                         i915_vma_unset_ggtt_write(vma);
686                 }
687                 break;
688
689         case I915_GEM_DOMAIN_WC:
690                 wmb();
691                 break;
692
693         case I915_GEM_DOMAIN_CPU:
694                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
695                 break;
696
697         case I915_GEM_DOMAIN_RENDER:
698                 if (gpu_write_needs_clflush(obj))
699                         obj->cache_dirty = true;
700                 break;
701         }
702
703         obj->write_domain = 0;
704 }
705
706 /*
707  * Pins the specified object's pages and synchronizes the object with
708  * GPU accesses. Sets needs_clflush to non-zero if the caller should
709  * flush the object from the CPU cache.
710  */
711 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
712                                     unsigned int *needs_clflush)
713 {
714         int ret;
715
716         lockdep_assert_held(&obj->base.dev->struct_mutex);
717
718         *needs_clflush = 0;
719         if (!i915_gem_object_has_struct_page(obj))
720                 return -ENODEV;
721
722         ret = i915_gem_object_wait(obj,
723                                    I915_WAIT_INTERRUPTIBLE |
724                                    I915_WAIT_LOCKED,
725                                    MAX_SCHEDULE_TIMEOUT);
726         if (ret)
727                 return ret;
728
729         ret = i915_gem_object_pin_pages(obj);
730         if (ret)
731                 return ret;
732
733         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
734             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
735                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
736                 if (ret)
737                         goto err_unpin;
738                 else
739                         goto out;
740         }
741
742         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
743
744         /* If we're not in the cpu read domain, set ourself into the gtt
745          * read domain and manually flush cachelines (if required). This
746          * optimizes for the case when the gpu will dirty the data
747          * anyway again before the next pread happens.
748          */
749         if (!obj->cache_dirty &&
750             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
751                 *needs_clflush = CLFLUSH_BEFORE;
752
753 out:
754         /* return with the pages pinned */
755         return 0;
756
757 err_unpin:
758         i915_gem_object_unpin_pages(obj);
759         return ret;
760 }
761
762 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
763                                      unsigned int *needs_clflush)
764 {
765         int ret;
766
767         lockdep_assert_held(&obj->base.dev->struct_mutex);
768
769         *needs_clflush = 0;
770         if (!i915_gem_object_has_struct_page(obj))
771                 return -ENODEV;
772
773         ret = i915_gem_object_wait(obj,
774                                    I915_WAIT_INTERRUPTIBLE |
775                                    I915_WAIT_LOCKED |
776                                    I915_WAIT_ALL,
777                                    MAX_SCHEDULE_TIMEOUT);
778         if (ret)
779                 return ret;
780
781         ret = i915_gem_object_pin_pages(obj);
782         if (ret)
783                 return ret;
784
785         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
786             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
787                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
788                 if (ret)
789                         goto err_unpin;
790                 else
791                         goto out;
792         }
793
794         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
795
796         /* If we're not in the cpu write domain, set ourself into the
797          * gtt write domain and manually flush cachelines (as required).
798          * This optimizes for the case when the gpu will use the data
799          * right away and we therefore have to clflush anyway.
800          */
801         if (!obj->cache_dirty) {
802                 *needs_clflush |= CLFLUSH_AFTER;
803
804                 /*
805                  * Same trick applies to invalidate partially written
806                  * cachelines read before writing.
807                  */
808                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
809                         *needs_clflush |= CLFLUSH_BEFORE;
810         }
811
812 out:
813         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
814         obj->mm.dirty = true;
815         /* return with the pages pinned */
816         return 0;
817
818 err_unpin:
819         i915_gem_object_unpin_pages(obj);
820         return ret;
821 }
822
823 static int
824 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
825             bool needs_clflush)
826 {
827         char *vaddr;
828         int ret;
829
830         vaddr = kmap(page);
831
832         if (needs_clflush)
833                 drm_clflush_virt_range(vaddr + offset, len);
834
835         ret = __copy_to_user(user_data, vaddr + offset, len);
836
837         kunmap(page);
838
839         return ret ? -EFAULT : 0;
840 }
841
842 static int
843 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
844                      struct drm_i915_gem_pread *args)
845 {
846         char __user *user_data;
847         u64 remain;
848         unsigned int needs_clflush;
849         unsigned int idx, offset;
850         int ret;
851
852         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
853         if (ret)
854                 return ret;
855
856         ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
857         mutex_unlock(&obj->base.dev->struct_mutex);
858         if (ret)
859                 return ret;
860
861         remain = args->size;
862         user_data = u64_to_user_ptr(args->data_ptr);
863         offset = offset_in_page(args->offset);
864         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
865                 struct page *page = i915_gem_object_get_page(obj, idx);
866                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
867
868                 ret = shmem_pread(page, offset, length, user_data,
869                                   needs_clflush);
870                 if (ret)
871                         break;
872
873                 remain -= length;
874                 user_data += length;
875                 offset = 0;
876         }
877
878         i915_gem_obj_finish_shmem_access(obj);
879         return ret;
880 }
881
882 static inline bool
883 gtt_user_read(struct io_mapping *mapping,
884               loff_t base, int offset,
885               char __user *user_data, int length)
886 {
887         void __iomem *vaddr;
888         unsigned long unwritten;
889
890         /* We can use the cpu mem copy function because this is X86. */
891         vaddr = io_mapping_map_atomic_wc(mapping, base);
892         unwritten = __copy_to_user_inatomic(user_data,
893                                             (void __force *)vaddr + offset,
894                                             length);
895         io_mapping_unmap_atomic(vaddr);
896         if (unwritten) {
897                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
898                 unwritten = copy_to_user(user_data,
899                                          (void __force *)vaddr + offset,
900                                          length);
901                 io_mapping_unmap(vaddr);
902         }
903         return unwritten;
904 }
905
906 static int
907 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
908                    const struct drm_i915_gem_pread *args)
909 {
910         struct drm_i915_private *i915 = to_i915(obj->base.dev);
911         struct i915_ggtt *ggtt = &i915->ggtt;
912         intel_wakeref_t wakeref;
913         struct drm_mm_node node;
914         struct i915_vma *vma;
915         void __user *user_data;
916         u64 remain, offset;
917         int ret;
918
919         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
920         if (ret)
921                 return ret;
922
923         wakeref = intel_runtime_pm_get(i915);
924         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
925                                        PIN_MAPPABLE |
926                                        PIN_NONFAULT |
927                                        PIN_NONBLOCK);
928         if (!IS_ERR(vma)) {
929                 node.start = i915_ggtt_offset(vma);
930                 node.allocated = false;
931                 ret = i915_vma_put_fence(vma);
932                 if (ret) {
933                         i915_vma_unpin(vma);
934                         vma = ERR_PTR(ret);
935                 }
936         }
937         if (IS_ERR(vma)) {
938                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
939                 if (ret)
940                         goto out_unlock;
941                 GEM_BUG_ON(!node.allocated);
942         }
943
944         ret = i915_gem_object_set_to_gtt_domain(obj, false);
945         if (ret)
946                 goto out_unpin;
947
948         mutex_unlock(&i915->drm.struct_mutex);
949
950         user_data = u64_to_user_ptr(args->data_ptr);
951         remain = args->size;
952         offset = args->offset;
953
954         while (remain > 0) {
955                 /* Operation in this page
956                  *
957                  * page_base = page offset within aperture
958                  * page_offset = offset within page
959                  * page_length = bytes to copy for this page
960                  */
961                 u32 page_base = node.start;
962                 unsigned page_offset = offset_in_page(offset);
963                 unsigned page_length = PAGE_SIZE - page_offset;
964                 page_length = remain < page_length ? remain : page_length;
965                 if (node.allocated) {
966                         wmb();
967                         ggtt->vm.insert_page(&ggtt->vm,
968                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
969                                              node.start, I915_CACHE_NONE, 0);
970                         wmb();
971                 } else {
972                         page_base += offset & PAGE_MASK;
973                 }
974
975                 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
976                                   user_data, page_length)) {
977                         ret = -EFAULT;
978                         break;
979                 }
980
981                 remain -= page_length;
982                 user_data += page_length;
983                 offset += page_length;
984         }
985
986         mutex_lock(&i915->drm.struct_mutex);
987 out_unpin:
988         if (node.allocated) {
989                 wmb();
990                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
991                 remove_mappable_node(&node);
992         } else {
993                 i915_vma_unpin(vma);
994         }
995 out_unlock:
996         intel_runtime_pm_put(i915, wakeref);
997         mutex_unlock(&i915->drm.struct_mutex);
998
999         return ret;
1000 }
1001
1002 /**
1003  * Reads data from the object referenced by handle.
1004  * @dev: drm device pointer
1005  * @data: ioctl data blob
1006  * @file: drm file pointer
1007  *
1008  * On error, the contents of *data are undefined.
1009  */
1010 int
1011 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1012                      struct drm_file *file)
1013 {
1014         struct drm_i915_gem_pread *args = data;
1015         struct drm_i915_gem_object *obj;
1016         int ret;
1017
1018         if (args->size == 0)
1019                 return 0;
1020
1021         if (!access_ok(u64_to_user_ptr(args->data_ptr),
1022                        args->size))
1023                 return -EFAULT;
1024
1025         obj = i915_gem_object_lookup(file, args->handle);
1026         if (!obj)
1027                 return -ENOENT;
1028
1029         /* Bounds check source.  */
1030         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1031                 ret = -EINVAL;
1032                 goto out;
1033         }
1034
1035         trace_i915_gem_object_pread(obj, args->offset, args->size);
1036
1037         ret = i915_gem_object_wait(obj,
1038                                    I915_WAIT_INTERRUPTIBLE,
1039                                    MAX_SCHEDULE_TIMEOUT);
1040         if (ret)
1041                 goto out;
1042
1043         ret = i915_gem_object_pin_pages(obj);
1044         if (ret)
1045                 goto out;
1046
1047         ret = i915_gem_shmem_pread(obj, args);
1048         if (ret == -EFAULT || ret == -ENODEV)
1049                 ret = i915_gem_gtt_pread(obj, args);
1050
1051         i915_gem_object_unpin_pages(obj);
1052 out:
1053         i915_gem_object_put(obj);
1054         return ret;
1055 }
1056
1057 /* This is the fast write path which cannot handle
1058  * page faults in the source data
1059  */
1060
1061 static inline bool
1062 ggtt_write(struct io_mapping *mapping,
1063            loff_t base, int offset,
1064            char __user *user_data, int length)
1065 {
1066         void __iomem *vaddr;
1067         unsigned long unwritten;
1068
1069         /* We can use the cpu mem copy function because this is X86. */
1070         vaddr = io_mapping_map_atomic_wc(mapping, base);
1071         unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1072                                                       user_data, length);
1073         io_mapping_unmap_atomic(vaddr);
1074         if (unwritten) {
1075                 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1076                 unwritten = copy_from_user((void __force *)vaddr + offset,
1077                                            user_data, length);
1078                 io_mapping_unmap(vaddr);
1079         }
1080
1081         return unwritten;
1082 }
1083
1084 /**
1085  * This is the fast pwrite path, where we copy the data directly from the
1086  * user into the GTT, uncached.
1087  * @obj: i915 GEM object
1088  * @args: pwrite arguments structure
1089  */
1090 static int
1091 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1092                          const struct drm_i915_gem_pwrite *args)
1093 {
1094         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1095         struct i915_ggtt *ggtt = &i915->ggtt;
1096         intel_wakeref_t wakeref;
1097         struct drm_mm_node node;
1098         struct i915_vma *vma;
1099         u64 remain, offset;
1100         void __user *user_data;
1101         int ret;
1102
1103         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1104         if (ret)
1105                 return ret;
1106
1107         if (i915_gem_object_has_struct_page(obj)) {
1108                 /*
1109                  * Avoid waking the device up if we can fallback, as
1110                  * waking/resuming is very slow (worst-case 10-100 ms
1111                  * depending on PCI sleeps and our own resume time).
1112                  * This easily dwarfs any performance advantage from
1113                  * using the cache bypass of indirect GGTT access.
1114                  */
1115                 wakeref = intel_runtime_pm_get_if_in_use(i915);
1116                 if (!wakeref) {
1117                         ret = -EFAULT;
1118                         goto out_unlock;
1119                 }
1120         } else {
1121                 /* No backing pages, no fallback, we must force GGTT access */
1122                 wakeref = intel_runtime_pm_get(i915);
1123         }
1124
1125         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1126                                        PIN_MAPPABLE |
1127                                        PIN_NONFAULT |
1128                                        PIN_NONBLOCK);
1129         if (!IS_ERR(vma)) {
1130                 node.start = i915_ggtt_offset(vma);
1131                 node.allocated = false;
1132                 ret = i915_vma_put_fence(vma);
1133                 if (ret) {
1134                         i915_vma_unpin(vma);
1135                         vma = ERR_PTR(ret);
1136                 }
1137         }
1138         if (IS_ERR(vma)) {
1139                 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1140                 if (ret)
1141                         goto out_rpm;
1142                 GEM_BUG_ON(!node.allocated);
1143         }
1144
1145         ret = i915_gem_object_set_to_gtt_domain(obj, true);
1146         if (ret)
1147                 goto out_unpin;
1148
1149         mutex_unlock(&i915->drm.struct_mutex);
1150
1151         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1152
1153         user_data = u64_to_user_ptr(args->data_ptr);
1154         offset = args->offset;
1155         remain = args->size;
1156         while (remain) {
1157                 /* Operation in this page
1158                  *
1159                  * page_base = page offset within aperture
1160                  * page_offset = offset within page
1161                  * page_length = bytes to copy for this page
1162                  */
1163                 u32 page_base = node.start;
1164                 unsigned int page_offset = offset_in_page(offset);
1165                 unsigned int page_length = PAGE_SIZE - page_offset;
1166                 page_length = remain < page_length ? remain : page_length;
1167                 if (node.allocated) {
1168                         wmb(); /* flush the write before we modify the GGTT */
1169                         ggtt->vm.insert_page(&ggtt->vm,
1170                                              i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1171                                              node.start, I915_CACHE_NONE, 0);
1172                         wmb(); /* flush modifications to the GGTT (insert_page) */
1173                 } else {
1174                         page_base += offset & PAGE_MASK;
1175                 }
1176                 /* If we get a fault while copying data, then (presumably) our
1177                  * source page isn't available.  Return the error and we'll
1178                  * retry in the slow path.
1179                  * If the object is non-shmem backed, we retry again with the
1180                  * path that handles page fault.
1181                  */
1182                 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1183                                user_data, page_length)) {
1184                         ret = -EFAULT;
1185                         break;
1186                 }
1187
1188                 remain -= page_length;
1189                 user_data += page_length;
1190                 offset += page_length;
1191         }
1192         intel_fb_obj_flush(obj, ORIGIN_CPU);
1193
1194         mutex_lock(&i915->drm.struct_mutex);
1195 out_unpin:
1196         if (node.allocated) {
1197                 wmb();
1198                 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1199                 remove_mappable_node(&node);
1200         } else {
1201                 i915_vma_unpin(vma);
1202         }
1203 out_rpm:
1204         intel_runtime_pm_put(i915, wakeref);
1205 out_unlock:
1206         mutex_unlock(&i915->drm.struct_mutex);
1207         return ret;
1208 }
1209
1210 /* Per-page copy function for the shmem pwrite fastpath.
1211  * Flushes invalid cachelines before writing to the target if
1212  * needs_clflush_before is set and flushes out any written cachelines after
1213  * writing if needs_clflush is set.
1214  */
1215 static int
1216 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1217              bool needs_clflush_before,
1218              bool needs_clflush_after)
1219 {
1220         char *vaddr;
1221         int ret;
1222
1223         vaddr = kmap(page);
1224
1225         if (needs_clflush_before)
1226                 drm_clflush_virt_range(vaddr + offset, len);
1227
1228         ret = __copy_from_user(vaddr + offset, user_data, len);
1229         if (!ret && needs_clflush_after)
1230                 drm_clflush_virt_range(vaddr + offset, len);
1231
1232         kunmap(page);
1233
1234         return ret ? -EFAULT : 0;
1235 }
1236
1237 static int
1238 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1239                       const struct drm_i915_gem_pwrite *args)
1240 {
1241         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1242         void __user *user_data;
1243         u64 remain;
1244         unsigned int partial_cacheline_write;
1245         unsigned int needs_clflush;
1246         unsigned int offset, idx;
1247         int ret;
1248
1249         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1250         if (ret)
1251                 return ret;
1252
1253         ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1254         mutex_unlock(&i915->drm.struct_mutex);
1255         if (ret)
1256                 return ret;
1257
1258         /* If we don't overwrite a cacheline completely we need to be
1259          * careful to have up-to-date data by first clflushing. Don't
1260          * overcomplicate things and flush the entire patch.
1261          */
1262         partial_cacheline_write = 0;
1263         if (needs_clflush & CLFLUSH_BEFORE)
1264                 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1265
1266         user_data = u64_to_user_ptr(args->data_ptr);
1267         remain = args->size;
1268         offset = offset_in_page(args->offset);
1269         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1270                 struct page *page = i915_gem_object_get_page(obj, idx);
1271                 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1272
1273                 ret = shmem_pwrite(page, offset, length, user_data,
1274                                    (offset | length) & partial_cacheline_write,
1275                                    needs_clflush & CLFLUSH_AFTER);
1276                 if (ret)
1277                         break;
1278
1279                 remain -= length;
1280                 user_data += length;
1281                 offset = 0;
1282         }
1283
1284         intel_fb_obj_flush(obj, ORIGIN_CPU);
1285         i915_gem_obj_finish_shmem_access(obj);
1286         return ret;
1287 }
1288
1289 /**
1290  * Writes data to the object referenced by handle.
1291  * @dev: drm device
1292  * @data: ioctl data blob
1293  * @file: drm file
1294  *
1295  * On error, the contents of the buffer that were to be modified are undefined.
1296  */
1297 int
1298 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1299                       struct drm_file *file)
1300 {
1301         struct drm_i915_gem_pwrite *args = data;
1302         struct drm_i915_gem_object *obj;
1303         int ret;
1304
1305         if (args->size == 0)
1306                 return 0;
1307
1308         if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1309                 return -EFAULT;
1310
1311         obj = i915_gem_object_lookup(file, args->handle);
1312         if (!obj)
1313                 return -ENOENT;
1314
1315         /* Bounds check destination. */
1316         if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1317                 ret = -EINVAL;
1318                 goto err;
1319         }
1320
1321         /* Writes not allowed into this read-only object */
1322         if (i915_gem_object_is_readonly(obj)) {
1323                 ret = -EINVAL;
1324                 goto err;
1325         }
1326
1327         trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1328
1329         ret = -ENODEV;
1330         if (obj->ops->pwrite)
1331                 ret = obj->ops->pwrite(obj, args);
1332         if (ret != -ENODEV)
1333                 goto err;
1334
1335         ret = i915_gem_object_wait(obj,
1336                                    I915_WAIT_INTERRUPTIBLE |
1337                                    I915_WAIT_ALL,
1338                                    MAX_SCHEDULE_TIMEOUT);
1339         if (ret)
1340                 goto err;
1341
1342         ret = i915_gem_object_pin_pages(obj);
1343         if (ret)
1344                 goto err;
1345
1346         ret = -EFAULT;
1347         /* We can only do the GTT pwrite on untiled buffers, as otherwise
1348          * it would end up going through the fenced access, and we'll get
1349          * different detiling behavior between reading and writing.
1350          * pread/pwrite currently are reading and writing from the CPU
1351          * perspective, requiring manual detiling by the client.
1352          */
1353         if (!i915_gem_object_has_struct_page(obj) ||
1354             cpu_write_needs_clflush(obj))
1355                 /* Note that the gtt paths might fail with non-page-backed user
1356                  * pointers (e.g. gtt mappings when moving data between
1357                  * textures). Fallback to the shmem path in that case.
1358                  */
1359                 ret = i915_gem_gtt_pwrite_fast(obj, args);
1360
1361         if (ret == -EFAULT || ret == -ENOSPC) {
1362                 if (obj->phys_handle)
1363                         ret = i915_gem_phys_pwrite(obj, args, file);
1364                 else
1365                         ret = i915_gem_shmem_pwrite(obj, args);
1366         }
1367
1368         i915_gem_object_unpin_pages(obj);
1369 err:
1370         i915_gem_object_put(obj);
1371         return ret;
1372 }
1373
1374 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1375 {
1376         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1377         struct list_head *list;
1378         struct i915_vma *vma;
1379
1380         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1381
1382         mutex_lock(&i915->ggtt.vm.mutex);
1383         for_each_ggtt_vma(vma, obj) {
1384                 if (!drm_mm_node_allocated(&vma->node))
1385                         continue;
1386
1387                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
1388         }
1389         mutex_unlock(&i915->ggtt.vm.mutex);
1390
1391         spin_lock(&i915->mm.obj_lock);
1392         list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1393         list_move_tail(&obj->mm.link, list);
1394         spin_unlock(&i915->mm.obj_lock);
1395 }
1396
1397 /**
1398  * Called when user space prepares to use an object with the CPU, either
1399  * through the mmap ioctl's mapping or a GTT mapping.
1400  * @dev: drm device
1401  * @data: ioctl data blob
1402  * @file: drm file
1403  */
1404 int
1405 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1406                           struct drm_file *file)
1407 {
1408         struct drm_i915_gem_set_domain *args = data;
1409         struct drm_i915_gem_object *obj;
1410         u32 read_domains = args->read_domains;
1411         u32 write_domain = args->write_domain;
1412         int err;
1413
1414         /* Only handle setting domains to types used by the CPU. */
1415         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1416                 return -EINVAL;
1417
1418         /*
1419          * Having something in the write domain implies it's in the read
1420          * domain, and only that read domain.  Enforce that in the request.
1421          */
1422         if (write_domain && read_domains != write_domain)
1423                 return -EINVAL;
1424
1425         if (!read_domains)
1426                 return 0;
1427
1428         obj = i915_gem_object_lookup(file, args->handle);
1429         if (!obj)
1430                 return -ENOENT;
1431
1432         /*
1433          * Already in the desired write domain? Nothing for us to do!
1434          *
1435          * We apply a little bit of cunning here to catch a broader set of
1436          * no-ops. If obj->write_domain is set, we must be in the same
1437          * obj->read_domains, and only that domain. Therefore, if that
1438          * obj->write_domain matches the request read_domains, we are
1439          * already in the same read/write domain and can skip the operation,
1440          * without having to further check the requested write_domain.
1441          */
1442         if (READ_ONCE(obj->write_domain) == read_domains) {
1443                 err = 0;
1444                 goto out;
1445         }
1446
1447         /*
1448          * Try to flush the object off the GPU without holding the lock.
1449          * We will repeat the flush holding the lock in the normal manner
1450          * to catch cases where we are gazumped.
1451          */
1452         err = i915_gem_object_wait(obj,
1453                                    I915_WAIT_INTERRUPTIBLE |
1454                                    I915_WAIT_PRIORITY |
1455                                    (write_domain ? I915_WAIT_ALL : 0),
1456                                    MAX_SCHEDULE_TIMEOUT);
1457         if (err)
1458                 goto out;
1459
1460         /*
1461          * Proxy objects do not control access to the backing storage, ergo
1462          * they cannot be used as a means to manipulate the cache domain
1463          * tracking for that backing storage. The proxy object is always
1464          * considered to be outside of any cache domain.
1465          */
1466         if (i915_gem_object_is_proxy(obj)) {
1467                 err = -ENXIO;
1468                 goto out;
1469         }
1470
1471         /*
1472          * Flush and acquire obj->pages so that we are coherent through
1473          * direct access in memory with previous cached writes through
1474          * shmemfs and that our cache domain tracking remains valid.
1475          * For example, if the obj->filp was moved to swap without us
1476          * being notified and releasing the pages, we would mistakenly
1477          * continue to assume that the obj remained out of the CPU cached
1478          * domain.
1479          */
1480         err = i915_gem_object_pin_pages(obj);
1481         if (err)
1482                 goto out;
1483
1484         err = i915_mutex_lock_interruptible(dev);
1485         if (err)
1486                 goto out_unpin;
1487
1488         if (read_domains & I915_GEM_DOMAIN_WC)
1489                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1490         else if (read_domains & I915_GEM_DOMAIN_GTT)
1491                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1492         else
1493                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1494
1495         /* And bump the LRU for this access */
1496         i915_gem_object_bump_inactive_ggtt(obj);
1497
1498         mutex_unlock(&dev->struct_mutex);
1499
1500         if (write_domain != 0)
1501                 intel_fb_obj_invalidate(obj,
1502                                         fb_write_origin(obj, write_domain));
1503
1504 out_unpin:
1505         i915_gem_object_unpin_pages(obj);
1506 out:
1507         i915_gem_object_put(obj);
1508         return err;
1509 }
1510
1511 /**
1512  * Called when user space has done writes to this buffer
1513  * @dev: drm device
1514  * @data: ioctl data blob
1515  * @file: drm file
1516  */
1517 int
1518 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1519                          struct drm_file *file)
1520 {
1521         struct drm_i915_gem_sw_finish *args = data;
1522         struct drm_i915_gem_object *obj;
1523
1524         obj = i915_gem_object_lookup(file, args->handle);
1525         if (!obj)
1526                 return -ENOENT;
1527
1528         /*
1529          * Proxy objects are barred from CPU access, so there is no
1530          * need to ban sw_finish as it is a nop.
1531          */
1532
1533         /* Pinned buffers may be scanout, so flush the cache */
1534         i915_gem_object_flush_if_display(obj);
1535         i915_gem_object_put(obj);
1536
1537         return 0;
1538 }
1539
1540 static inline bool
1541 __vma_matches(struct vm_area_struct *vma, struct file *filp,
1542               unsigned long addr, unsigned long size)
1543 {
1544         if (vma->vm_file != filp)
1545                 return false;
1546
1547         return vma->vm_start == addr &&
1548                (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
1549 }
1550
1551 /**
1552  * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1553  *                       it is mapped to.
1554  * @dev: drm device
1555  * @data: ioctl data blob
1556  * @file: drm file
1557  *
1558  * While the mapping holds a reference on the contents of the object, it doesn't
1559  * imply a ref on the object itself.
1560  *
1561  * IMPORTANT:
1562  *
1563  * DRM driver writers who look a this function as an example for how to do GEM
1564  * mmap support, please don't implement mmap support like here. The modern way
1565  * to implement DRM mmap support is with an mmap offset ioctl (like
1566  * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1567  * That way debug tooling like valgrind will understand what's going on, hiding
1568  * the mmap call in a driver private ioctl will break that. The i915 driver only
1569  * does cpu mmaps this way because we didn't know better.
1570  */
1571 int
1572 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1573                     struct drm_file *file)
1574 {
1575         struct drm_i915_gem_mmap *args = data;
1576         struct drm_i915_gem_object *obj;
1577         unsigned long addr;
1578
1579         if (args->flags & ~(I915_MMAP_WC))
1580                 return -EINVAL;
1581
1582         if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1583                 return -ENODEV;
1584
1585         obj = i915_gem_object_lookup(file, args->handle);
1586         if (!obj)
1587                 return -ENOENT;
1588
1589         /* prime objects have no backing filp to GEM mmap
1590          * pages from.
1591          */
1592         if (!obj->base.filp) {
1593                 addr = -ENXIO;
1594                 goto err;
1595         }
1596
1597         if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
1598                 addr = -EINVAL;
1599                 goto err;
1600         }
1601
1602         addr = vm_mmap(obj->base.filp, 0, args->size,
1603                        PROT_READ | PROT_WRITE, MAP_SHARED,
1604                        args->offset);
1605         if (IS_ERR_VALUE(addr))
1606                 goto err;
1607
1608         if (args->flags & I915_MMAP_WC) {
1609                 struct mm_struct *mm = current->mm;
1610                 struct vm_area_struct *vma;
1611
1612                 if (down_write_killable(&mm->mmap_sem)) {
1613                         addr = -EINTR;
1614                         goto err;
1615                 }
1616                 vma = find_vma(mm, addr);
1617                 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
1618                         vma->vm_page_prot =
1619                                 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1620                 else
1621                         addr = -ENOMEM;
1622                 up_write(&mm->mmap_sem);
1623                 if (IS_ERR_VALUE(addr))
1624                         goto err;
1625
1626                 /* This may race, but that's ok, it only gets set */
1627                 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1628         }
1629         i915_gem_object_put(obj);
1630
1631         args->addr_ptr = (u64)addr;
1632         return 0;
1633
1634 err:
1635         i915_gem_object_put(obj);
1636         return addr;
1637 }
1638
1639 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1640 {
1641         return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1642 }
1643
1644 /**
1645  * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1646  *
1647  * A history of the GTT mmap interface:
1648  *
1649  * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1650  *     aligned and suitable for fencing, and still fit into the available
1651  *     mappable space left by the pinned display objects. A classic problem
1652  *     we called the page-fault-of-doom where we would ping-pong between
1653  *     two objects that could not fit inside the GTT and so the memcpy
1654  *     would page one object in at the expense of the other between every
1655  *     single byte.
1656  *
1657  * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1658  *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1659  *     object is too large for the available space (or simply too large
1660  *     for the mappable aperture!), a view is created instead and faulted
1661  *     into userspace. (This view is aligned and sized appropriately for
1662  *     fenced access.)
1663  *
1664  * 2 - Recognise WC as a separate cache domain so that we can flush the
1665  *     delayed writes via GTT before performing direct access via WC.
1666  *
1667  * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
1668  *     pagefault; swapin remains transparent.
1669  *
1670  * Restrictions:
1671  *
1672  *  * snoopable objects cannot be accessed via the GTT. It can cause machine
1673  *    hangs on some architectures, corruption on others. An attempt to service
1674  *    a GTT page fault from a snoopable object will generate a SIGBUS.
1675  *
1676  *  * the object must be able to fit into RAM (physical memory, though no
1677  *    limited to the mappable aperture).
1678  *
1679  *
1680  * Caveats:
1681  *
1682  *  * a new GTT page fault will synchronize rendering from the GPU and flush
1683  *    all data to system memory. Subsequent access will not be synchronized.
1684  *
1685  *  * all mappings are revoked on runtime device suspend.
1686  *
1687  *  * there are only 8, 16 or 32 fence registers to share between all users
1688  *    (older machines require fence register for display and blitter access
1689  *    as well). Contention of the fence registers will cause the previous users
1690  *    to be unmapped and any new access will generate new page faults.
1691  *
1692  *  * running out of memory while servicing a fault may generate a SIGBUS,
1693  *    rather than the expected SIGSEGV.
1694  */
1695 int i915_gem_mmap_gtt_version(void)
1696 {
1697         return 3;
1698 }
1699
1700 static inline struct i915_ggtt_view
1701 compute_partial_view(const struct drm_i915_gem_object *obj,
1702                      pgoff_t page_offset,
1703                      unsigned int chunk)
1704 {
1705         struct i915_ggtt_view view;
1706
1707         if (i915_gem_object_is_tiled(obj))
1708                 chunk = roundup(chunk, tile_row_pages(obj));
1709
1710         view.type = I915_GGTT_VIEW_PARTIAL;
1711         view.partial.offset = rounddown(page_offset, chunk);
1712         view.partial.size =
1713                 min_t(unsigned int, chunk,
1714                       (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1715
1716         /* If the partial covers the entire object, just create a normal VMA. */
1717         if (chunk >= obj->base.size >> PAGE_SHIFT)
1718                 view.type = I915_GGTT_VIEW_NORMAL;
1719
1720         return view;
1721 }
1722
1723 /**
1724  * i915_gem_fault - fault a page into the GTT
1725  * @vmf: fault info
1726  *
1727  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1728  * from userspace.  The fault handler takes care of binding the object to
1729  * the GTT (if needed), allocating and programming a fence register (again,
1730  * only if needed based on whether the old reg is still valid or the object
1731  * is tiled) and inserting a new PTE into the faulting process.
1732  *
1733  * Note that the faulting process may involve evicting existing objects
1734  * from the GTT and/or fence registers to make room.  So performance may
1735  * suffer if the GTT working set is large or there are few fence registers
1736  * left.
1737  *
1738  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1739  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1740  */
1741 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1742 {
1743 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1744         struct vm_area_struct *area = vmf->vma;
1745         struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1746         struct drm_device *dev = obj->base.dev;
1747         struct drm_i915_private *dev_priv = to_i915(dev);
1748         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1749         bool write = area->vm_flags & VM_WRITE;
1750         intel_wakeref_t wakeref;
1751         struct i915_vma *vma;
1752         pgoff_t page_offset;
1753         int srcu;
1754         int ret;
1755
1756         /* Sanity check that we allow writing into this object */
1757         if (i915_gem_object_is_readonly(obj) && write)
1758                 return VM_FAULT_SIGBUS;
1759
1760         /* We don't use vmf->pgoff since that has the fake offset */
1761         page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1762
1763         trace_i915_gem_object_fault(obj, page_offset, true, write);
1764
1765         ret = i915_gem_object_pin_pages(obj);
1766         if (ret)
1767                 goto err;
1768
1769         wakeref = intel_runtime_pm_get(dev_priv);
1770
1771         srcu = i915_reset_trylock(dev_priv);
1772         if (srcu < 0) {
1773                 ret = srcu;
1774                 goto err_rpm;
1775         }
1776
1777         ret = i915_mutex_lock_interruptible(dev);
1778         if (ret)
1779                 goto err_reset;
1780
1781         /* Access to snoopable pages through the GTT is incoherent. */
1782         if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1783                 ret = -EFAULT;
1784                 goto err_unlock;
1785         }
1786
1787         /* Now pin it into the GTT as needed */
1788         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1789                                        PIN_MAPPABLE |
1790                                        PIN_NONBLOCK |
1791                                        PIN_NONFAULT);
1792         if (IS_ERR(vma)) {
1793                 /* Use a partial view if it is bigger than available space */
1794                 struct i915_ggtt_view view =
1795                         compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1796                 unsigned int flags;
1797
1798                 flags = PIN_MAPPABLE;
1799                 if (view.type == I915_GGTT_VIEW_NORMAL)
1800                         flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1801
1802                 /*
1803                  * Userspace is now writing through an untracked VMA, abandon
1804                  * all hope that the hardware is able to track future writes.
1805                  */
1806                 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1807
1808                 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1809                 if (IS_ERR(vma) && !view.type) {
1810                         flags = PIN_MAPPABLE;
1811                         view.type = I915_GGTT_VIEW_PARTIAL;
1812                         vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1813                 }
1814         }
1815         if (IS_ERR(vma)) {
1816                 ret = PTR_ERR(vma);
1817                 goto err_unlock;
1818         }
1819
1820         ret = i915_vma_pin_fence(vma);
1821         if (ret)
1822                 goto err_unpin;
1823
1824         /* Finally, remap it using the new GTT offset */
1825         ret = remap_io_mapping(area,
1826                                area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1827                                (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1828                                min_t(u64, vma->size, area->vm_end - area->vm_start),
1829                                &ggtt->iomap);
1830         if (ret)
1831                 goto err_fence;
1832
1833         /* Mark as being mmapped into userspace for later revocation */
1834         assert_rpm_wakelock_held(dev_priv);
1835         if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1836                 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1837         if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
1838                 intel_wakeref_auto(&dev_priv->mm.userfault_wakeref,
1839                                    msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
1840         GEM_BUG_ON(!obj->userfault_count);
1841
1842         i915_vma_set_ggtt_write(vma);
1843
1844 err_fence:
1845         i915_vma_unpin_fence(vma);
1846 err_unpin:
1847         __i915_vma_unpin(vma);
1848 err_unlock:
1849         mutex_unlock(&dev->struct_mutex);
1850 err_reset:
1851         i915_reset_unlock(dev_priv, srcu);
1852 err_rpm:
1853         intel_runtime_pm_put(dev_priv, wakeref);
1854         i915_gem_object_unpin_pages(obj);
1855 err:
1856         switch (ret) {
1857         case -EIO:
1858                 /*
1859                  * We eat errors when the gpu is terminally wedged to avoid
1860                  * userspace unduly crashing (gl has no provisions for mmaps to
1861                  * fail). But any other -EIO isn't ours (e.g. swap in failure)
1862                  * and so needs to be reported.
1863                  */
1864                 if (!i915_terminally_wedged(dev_priv))
1865                         return VM_FAULT_SIGBUS;
1866                 /* else: fall through */
1867         case -EAGAIN:
1868                 /*
1869                  * EAGAIN means the gpu is hung and we'll wait for the error
1870                  * handler to reset everything when re-faulting in
1871                  * i915_mutex_lock_interruptible.
1872                  */
1873         case 0:
1874         case -ERESTARTSYS:
1875         case -EINTR:
1876         case -EBUSY:
1877                 /*
1878                  * EBUSY is ok: this just means that another thread
1879                  * already did the job.
1880                  */
1881                 return VM_FAULT_NOPAGE;
1882         case -ENOMEM:
1883                 return VM_FAULT_OOM;
1884         case -ENOSPC:
1885         case -EFAULT:
1886                 return VM_FAULT_SIGBUS;
1887         default:
1888                 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1889                 return VM_FAULT_SIGBUS;
1890         }
1891 }
1892
1893 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
1894 {
1895         struct i915_vma *vma;
1896
1897         GEM_BUG_ON(!obj->userfault_count);
1898
1899         obj->userfault_count = 0;
1900         list_del(&obj->userfault_link);
1901         drm_vma_node_unmap(&obj->base.vma_node,
1902                            obj->base.dev->anon_inode->i_mapping);
1903
1904         for_each_ggtt_vma(vma, obj)
1905                 i915_vma_unset_userfault(vma);
1906 }
1907
1908 /**
1909  * i915_gem_release_mmap - remove physical page mappings
1910  * @obj: obj in question
1911  *
1912  * Preserve the reservation of the mmapping with the DRM core code, but
1913  * relinquish ownership of the pages back to the system.
1914  *
1915  * It is vital that we remove the page mapping if we have mapped a tiled
1916  * object through the GTT and then lose the fence register due to
1917  * resource pressure. Similarly if the object has been moved out of the
1918  * aperture, than pages mapped into userspace must be revoked. Removing the
1919  * mapping will then trigger a page fault on the next user access, allowing
1920  * fixup by i915_gem_fault().
1921  */
1922 void
1923 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1924 {
1925         struct drm_i915_private *i915 = to_i915(obj->base.dev);
1926         intel_wakeref_t wakeref;
1927
1928         /* Serialisation between user GTT access and our code depends upon
1929          * revoking the CPU's PTE whilst the mutex is held. The next user
1930          * pagefault then has to wait until we release the mutex.
1931          *
1932          * Note that RPM complicates somewhat by adding an additional
1933          * requirement that operations to the GGTT be made holding the RPM
1934          * wakeref.
1935          */
1936         lockdep_assert_held(&i915->drm.struct_mutex);
1937         wakeref = intel_runtime_pm_get(i915);
1938
1939         if (!obj->userfault_count)
1940                 goto out;
1941
1942         __i915_gem_object_release_mmap(obj);
1943
1944         /* Ensure that the CPU's PTE are revoked and there are not outstanding
1945          * memory transactions from userspace before we return. The TLB
1946          * flushing implied above by changing the PTE above *should* be
1947          * sufficient, an extra barrier here just provides us with a bit
1948          * of paranoid documentation about our requirement to serialise
1949          * memory writes before touching registers / GSM.
1950          */
1951         wmb();
1952
1953 out:
1954         intel_runtime_pm_put(i915, wakeref);
1955 }
1956
1957 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1958 {
1959         struct drm_i915_gem_object *obj, *on;
1960         int i;
1961
1962         /*
1963          * Only called during RPM suspend. All users of the userfault_list
1964          * must be holding an RPM wakeref to ensure that this can not
1965          * run concurrently with themselves (and use the struct_mutex for
1966          * protection between themselves).
1967          */
1968
1969         list_for_each_entry_safe(obj, on,
1970                                  &dev_priv->mm.userfault_list, userfault_link)
1971                 __i915_gem_object_release_mmap(obj);
1972
1973         /* The fence will be lost when the device powers down. If any were
1974          * in use by hardware (i.e. they are pinned), we should not be powering
1975          * down! All other fences will be reacquired by the user upon waking.
1976          */
1977         for (i = 0; i < dev_priv->num_fence_regs; i++) {
1978                 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1979
1980                 /* Ideally we want to assert that the fence register is not
1981                  * live at this point (i.e. that no piece of code will be
1982                  * trying to write through fence + GTT, as that both violates
1983                  * our tracking of activity and associated locking/barriers,
1984                  * but also is illegal given that the hw is powered down).
1985                  *
1986                  * Previously we used reg->pin_count as a "liveness" indicator.
1987                  * That is not sufficient, and we need a more fine-grained
1988                  * tool if we want to have a sanity check here.
1989                  */
1990
1991                 if (!reg->vma)
1992                         continue;
1993
1994                 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1995                 reg->dirty = true;
1996         }
1997 }
1998
1999 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2000 {
2001         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2002         int err;
2003
2004         err = drm_gem_create_mmap_offset(&obj->base);
2005         if (likely(!err))
2006                 return 0;
2007
2008         /* Attempt to reap some mmap space from dead objects */
2009         do {
2010                 err = i915_gem_wait_for_idle(dev_priv,
2011                                              I915_WAIT_INTERRUPTIBLE,
2012                                              MAX_SCHEDULE_TIMEOUT);
2013                 if (err)
2014                         break;
2015
2016                 i915_gem_drain_freed_objects(dev_priv);
2017                 err = drm_gem_create_mmap_offset(&obj->base);
2018                 if (!err)
2019                         break;
2020
2021         } while (flush_delayed_work(&dev_priv->gem.retire_work));
2022
2023         return err;
2024 }
2025
2026 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2027 {
2028         drm_gem_free_mmap_offset(&obj->base);
2029 }
2030
2031 int
2032 i915_gem_mmap_gtt(struct drm_file *file,
2033                   struct drm_device *dev,
2034                   u32 handle,
2035                   u64 *offset)
2036 {
2037         struct drm_i915_gem_object *obj;
2038         int ret;
2039
2040         obj = i915_gem_object_lookup(file, handle);
2041         if (!obj)
2042                 return -ENOENT;
2043
2044         ret = i915_gem_object_create_mmap_offset(obj);
2045         if (ret == 0)
2046                 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2047
2048         i915_gem_object_put(obj);
2049         return ret;
2050 }
2051
2052 /**
2053  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2054  * @dev: DRM device
2055  * @data: GTT mapping ioctl data
2056  * @file: GEM object info
2057  *
2058  * Simply returns the fake offset to userspace so it can mmap it.
2059  * The mmap call will end up in drm_gem_mmap(), which will set things
2060  * up so we can get faults in the handler above.
2061  *
2062  * The fault handler will take care of binding the object into the GTT
2063  * (since it may have been evicted to make room for something), allocating
2064  * a fence register, and mapping the appropriate aperture address into
2065  * userspace.
2066  */
2067 int
2068 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2069                         struct drm_file *file)
2070 {
2071         struct drm_i915_gem_mmap_gtt *args = data;
2072
2073         return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2074 }
2075
2076 /* Immediately discard the backing storage */
2077 void __i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2078 {
2079         i915_gem_object_free_mmap_offset(obj);
2080
2081         if (obj->base.filp == NULL)
2082                 return;
2083
2084         /* Our goal here is to return as much of the memory as
2085          * is possible back to the system as we are called from OOM.
2086          * To do this we must instruct the shmfs to drop all of its
2087          * backing pages, *now*.
2088          */
2089         shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2090         obj->mm.madv = __I915_MADV_PURGED;
2091         obj->mm.pages = ERR_PTR(-EFAULT);
2092 }
2093
2094 /*
2095  * Move pages to appropriate lru and release the pagevec, decrementing the
2096  * ref count of those pages.
2097  */
2098 static void check_release_pagevec(struct pagevec *pvec)
2099 {
2100         check_move_unevictable_pages(pvec);
2101         __pagevec_release(pvec);
2102         cond_resched();
2103 }
2104
2105 static void
2106 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2107                               struct sg_table *pages)
2108 {
2109         struct sgt_iter sgt_iter;
2110         struct pagevec pvec;
2111         struct page *page;
2112
2113         __i915_gem_object_release_shmem(obj, pages, true);
2114         i915_gem_gtt_finish_pages(obj, pages);
2115
2116         if (i915_gem_object_needs_bit17_swizzle(obj))
2117                 i915_gem_object_save_bit_17_swizzle(obj, pages);
2118
2119         mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2120
2121         pagevec_init(&pvec);
2122         for_each_sgt_page(page, sgt_iter, pages) {
2123                 if (obj->mm.dirty)
2124                         set_page_dirty(page);
2125
2126                 if (obj->mm.madv == I915_MADV_WILLNEED)
2127                         mark_page_accessed(page);
2128
2129                 if (!pagevec_add(&pvec, page))
2130                         check_release_pagevec(&pvec);
2131         }
2132         if (pagevec_count(&pvec))
2133                 check_release_pagevec(&pvec);
2134         obj->mm.dirty = false;
2135
2136         sg_free_table(pages);
2137         kfree(pages);
2138 }
2139
2140 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2141 {
2142         struct radix_tree_iter iter;
2143         void __rcu **slot;
2144
2145         rcu_read_lock();
2146         radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2147                 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2148         rcu_read_unlock();
2149 }
2150
2151 static struct sg_table *
2152 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2153 {
2154         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2155         struct sg_table *pages;
2156
2157         pages = fetch_and_zero(&obj->mm.pages);
2158         if (IS_ERR_OR_NULL(pages))
2159                 return pages;
2160
2161         spin_lock(&i915->mm.obj_lock);
2162         list_del(&obj->mm.link);
2163         spin_unlock(&i915->mm.obj_lock);
2164
2165         if (obj->mm.mapping) {
2166                 void *ptr;
2167
2168                 ptr = page_mask_bits(obj->mm.mapping);
2169                 if (is_vmalloc_addr(ptr))
2170                         vunmap(ptr);
2171                 else
2172                         kunmap(kmap_to_page(ptr));
2173
2174                 obj->mm.mapping = NULL;
2175         }
2176
2177         __i915_gem_object_reset_page_iter(obj);
2178         obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2179
2180         return pages;
2181 }
2182
2183 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2184                                 enum i915_mm_subclass subclass)
2185 {
2186         struct sg_table *pages;
2187         int ret;
2188
2189         if (i915_gem_object_has_pinned_pages(obj))
2190                 return -EBUSY;
2191
2192         GEM_BUG_ON(obj->bind_count);
2193
2194         /* May be called by shrinker from within get_pages() (on another bo) */
2195         mutex_lock_nested(&obj->mm.lock, subclass);
2196         if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2197                 ret = -EBUSY;
2198                 goto unlock;
2199         }
2200
2201         /*
2202          * ->put_pages might need to allocate memory for the bit17 swizzle
2203          * array, hence protect them from being reaped by removing them from gtt
2204          * lists early.
2205          */
2206         pages = __i915_gem_object_unset_pages(obj);
2207
2208         /*
2209          * XXX Temporary hijinx to avoid updating all backends to handle
2210          * NULL pages. In the future, when we have more asynchronous
2211          * get_pages backends we should be better able to handle the
2212          * cancellation of the async task in a more uniform manner.
2213          */
2214         if (!pages && !i915_gem_object_needs_async_cancel(obj))
2215                 pages = ERR_PTR(-EINVAL);
2216
2217         if (!IS_ERR(pages))
2218                 obj->ops->put_pages(obj, pages);
2219
2220         ret = 0;
2221 unlock:
2222         mutex_unlock(&obj->mm.lock);
2223
2224         return ret;
2225 }
2226
2227 bool i915_sg_trim(struct sg_table *orig_st)
2228 {
2229         struct sg_table new_st;
2230         struct scatterlist *sg, *new_sg;
2231         unsigned int i;
2232
2233         if (orig_st->nents == orig_st->orig_nents)
2234                 return false;
2235
2236         if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2237                 return false;
2238
2239         new_sg = new_st.sgl;
2240         for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2241                 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2242                 sg_dma_address(new_sg) = sg_dma_address(sg);
2243                 sg_dma_len(new_sg) = sg_dma_len(sg);
2244
2245                 new_sg = sg_next(new_sg);
2246         }
2247         GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2248
2249         sg_free_table(orig_st);
2250
2251         *orig_st = new_st;
2252         return true;
2253 }
2254
2255 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2256 {
2257         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2258         const unsigned long page_count = obj->base.size / PAGE_SIZE;
2259         unsigned long i;
2260         struct address_space *mapping;
2261         struct sg_table *st;
2262         struct scatterlist *sg;
2263         struct sgt_iter sgt_iter;
2264         struct page *page;
2265         unsigned long last_pfn = 0;     /* suppress gcc warning */
2266         unsigned int max_segment = i915_sg_segment_size();
2267         unsigned int sg_page_sizes;
2268         struct pagevec pvec;
2269         gfp_t noreclaim;
2270         int ret;
2271
2272         /*
2273          * Assert that the object is not currently in any GPU domain. As it
2274          * wasn't in the GTT, there shouldn't be any way it could have been in
2275          * a GPU cache
2276          */
2277         GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2278         GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2279
2280         /*
2281          * If there's no chance of allocating enough pages for the whole
2282          * object, bail early.
2283          */
2284         if (page_count > totalram_pages())
2285                 return -ENOMEM;
2286
2287         st = kmalloc(sizeof(*st), GFP_KERNEL);
2288         if (st == NULL)
2289                 return -ENOMEM;
2290
2291 rebuild_st:
2292         if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2293                 kfree(st);
2294                 return -ENOMEM;
2295         }
2296
2297         /*
2298          * Get the list of pages out of our struct file.  They'll be pinned
2299          * at this point until we release them.
2300          *
2301          * Fail silently without starting the shrinker
2302          */
2303         mapping = obj->base.filp->f_mapping;
2304         mapping_set_unevictable(mapping);
2305         noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2306         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2307
2308         sg = st->sgl;
2309         st->nents = 0;
2310         sg_page_sizes = 0;
2311         for (i = 0; i < page_count; i++) {
2312                 const unsigned int shrink[] = {
2313                         I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2314                         0,
2315                 }, *s = shrink;
2316                 gfp_t gfp = noreclaim;
2317
2318                 do {
2319                         cond_resched();
2320                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2321                         if (!IS_ERR(page))
2322                                 break;
2323
2324                         if (!*s) {
2325                                 ret = PTR_ERR(page);
2326                                 goto err_sg;
2327                         }
2328
2329                         i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2330
2331                         /*
2332                          * We've tried hard to allocate the memory by reaping
2333                          * our own buffer, now let the real VM do its job and
2334                          * go down in flames if truly OOM.
2335                          *
2336                          * However, since graphics tend to be disposable,
2337                          * defer the oom here by reporting the ENOMEM back
2338                          * to userspace.
2339                          */
2340                         if (!*s) {
2341                                 /* reclaim and warn, but no oom */
2342                                 gfp = mapping_gfp_mask(mapping);
2343
2344                                 /*
2345                                  * Our bo are always dirty and so we require
2346                                  * kswapd to reclaim our pages (direct reclaim
2347                                  * does not effectively begin pageout of our
2348                                  * buffers on its own). However, direct reclaim
2349                                  * only waits for kswapd when under allocation
2350                                  * congestion. So as a result __GFP_RECLAIM is
2351                                  * unreliable and fails to actually reclaim our
2352                                  * dirty pages -- unless you try over and over
2353                                  * again with !__GFP_NORETRY. However, we still
2354                                  * want to fail this allocation rather than
2355                                  * trigger the out-of-memory killer and for
2356                                  * this we want __GFP_RETRY_MAYFAIL.
2357                                  */
2358                                 gfp |= __GFP_RETRY_MAYFAIL;
2359                         }
2360                 } while (1);
2361
2362                 if (!i ||
2363                     sg->length >= max_segment ||
2364                     page_to_pfn(page) != last_pfn + 1) {
2365                         if (i) {
2366                                 sg_page_sizes |= sg->length;
2367                                 sg = sg_next(sg);
2368                         }
2369                         st->nents++;
2370                         sg_set_page(sg, page, PAGE_SIZE, 0);
2371                 } else {
2372                         sg->length += PAGE_SIZE;
2373                 }
2374                 last_pfn = page_to_pfn(page);
2375
2376                 /* Check that the i965g/gm workaround works. */
2377                 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2378         }
2379         if (sg) { /* loop terminated early; short sg table */
2380                 sg_page_sizes |= sg->length;
2381                 sg_mark_end(sg);
2382         }
2383
2384         /* Trim unused sg entries to avoid wasting memory. */
2385         i915_sg_trim(st);
2386
2387         ret = i915_gem_gtt_prepare_pages(obj, st);
2388         if (ret) {
2389                 /*
2390                  * DMA remapping failed? One possible cause is that
2391                  * it could not reserve enough large entries, asking
2392                  * for PAGE_SIZE chunks instead may be helpful.
2393                  */
2394                 if (max_segment > PAGE_SIZE) {
2395                         for_each_sgt_page(page, sgt_iter, st)
2396                                 put_page(page);
2397                         sg_free_table(st);
2398
2399                         max_segment = PAGE_SIZE;
2400                         goto rebuild_st;
2401                 } else {
2402                         dev_warn(&dev_priv->drm.pdev->dev,
2403                                  "Failed to DMA remap %lu pages\n",
2404                                  page_count);
2405                         goto err_pages;
2406                 }
2407         }
2408
2409         if (i915_gem_object_needs_bit17_swizzle(obj))
2410                 i915_gem_object_do_bit_17_swizzle(obj, st);
2411
2412         __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2413
2414         return 0;
2415
2416 err_sg:
2417         sg_mark_end(sg);
2418 err_pages:
2419         mapping_clear_unevictable(mapping);
2420         pagevec_init(&pvec);
2421         for_each_sgt_page(page, sgt_iter, st) {
2422                 if (!pagevec_add(&pvec, page))
2423                         check_release_pagevec(&pvec);
2424         }
2425         if (pagevec_count(&pvec))
2426                 check_release_pagevec(&pvec);
2427         sg_free_table(st);
2428         kfree(st);
2429
2430         /*
2431          * shmemfs first checks if there is enough memory to allocate the page
2432          * and reports ENOSPC should there be insufficient, along with the usual
2433          * ENOMEM for a genuine allocation failure.
2434          *
2435          * We use ENOSPC in our driver to mean that we have run out of aperture
2436          * space and so want to translate the error from shmemfs back to our
2437          * usual understanding of ENOMEM.
2438          */
2439         if (ret == -ENOSPC)
2440                 ret = -ENOMEM;
2441
2442         return ret;
2443 }
2444
2445 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2446                                  struct sg_table *pages,
2447                                  unsigned int sg_page_sizes)
2448 {
2449         struct drm_i915_private *i915 = to_i915(obj->base.dev);
2450         unsigned long supported = INTEL_INFO(i915)->page_sizes;
2451         int i;
2452
2453         lockdep_assert_held(&obj->mm.lock);
2454
2455         /* Make the pages coherent with the GPU (flushing any swapin). */
2456         if (obj->cache_dirty) {
2457                 obj->write_domain = 0;
2458                 if (i915_gem_object_has_struct_page(obj))
2459                         drm_clflush_sg(pages);
2460                 obj->cache_dirty = false;
2461         }
2462
2463         obj->mm.get_page.sg_pos = pages->sgl;
2464         obj->mm.get_page.sg_idx = 0;
2465
2466         obj->mm.pages = pages;
2467
2468         if (i915_gem_object_is_tiled(obj) &&
2469             i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2470                 GEM_BUG_ON(obj->mm.quirked);
2471                 __i915_gem_object_pin_pages(obj);
2472                 obj->mm.quirked = true;
2473         }
2474
2475         GEM_BUG_ON(!sg_page_sizes);
2476         obj->mm.page_sizes.phys = sg_page_sizes;
2477
2478         /*
2479          * Calculate the supported page-sizes which fit into the given
2480          * sg_page_sizes. This will give us the page-sizes which we may be able
2481          * to use opportunistically when later inserting into the GTT. For
2482          * example if phys=2G, then in theory we should be able to use 1G, 2M,
2483          * 64K or 4K pages, although in practice this will depend on a number of
2484          * other factors.
2485          */
2486         obj->mm.page_sizes.sg = 0;
2487         for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2488                 if (obj->mm.page_sizes.phys & ~0u << i)
2489                         obj->mm.page_sizes.sg |= BIT(i);
2490         }
2491         GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2492
2493         spin_lock(&i915->mm.obj_lock);
2494         list_add(&obj->mm.link, &i915->mm.unbound_list);
2495         spin_unlock(&i915->mm.obj_lock);
2496 }
2497
2498 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2499 {
2500         int err;
2501
2502         if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2503                 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2504                 return -EFAULT;
2505         }
2506
2507         err = obj->ops->get_pages(obj);
2508         GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2509
2510         return err;
2511 }
2512
2513 /* Ensure that the associated pages are gathered from the backing storage
2514  * and pinned into our object. i915_gem_object_pin_pages() may be called
2515  * multiple times before they are released by a single call to
2516  * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2517  * either as a result of memory pressure (reaping pages under the shrinker)
2518  * or as the object is itself released.
2519  */
2520 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2521 {
2522         int err;
2523
2524         err = mutex_lock_interruptible(&obj->mm.lock);
2525         if (err)
2526                 return err;
2527
2528         if (unlikely(!i915_gem_object_has_pages(obj))) {
2529                 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2530
2531                 err = ____i915_gem_object_get_pages(obj);
2532                 if (err)
2533                         goto unlock;
2534
2535                 smp_mb__before_atomic();
2536         }
2537         atomic_inc(&obj->mm.pages_pin_count);
2538
2539 unlock:
2540         mutex_unlock(&obj->mm.lock);
2541         return err;
2542 }
2543
2544 /* The 'mapping' part of i915_gem_object_pin_map() below */
2545 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2546                                  enum i915_map_type type)
2547 {
2548         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2549         struct sg_table *sgt = obj->mm.pages;
2550         struct sgt_iter sgt_iter;
2551         struct page *page;
2552         struct page *stack_pages[32];
2553         struct page **pages = stack_pages;
2554         unsigned long i = 0;
2555         pgprot_t pgprot;
2556         void *addr;
2557
2558         /* A single page can always be kmapped */
2559         if (n_pages == 1 && type == I915_MAP_WB)
2560                 return kmap(sg_page(sgt->sgl));
2561
2562         if (n_pages > ARRAY_SIZE(stack_pages)) {
2563                 /* Too big for stack -- allocate temporary array instead */
2564                 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2565                 if (!pages)
2566                         return NULL;
2567         }
2568
2569         for_each_sgt_page(page, sgt_iter, sgt)
2570                 pages[i++] = page;
2571
2572         /* Check that we have the expected number of pages */
2573         GEM_BUG_ON(i != n_pages);
2574
2575         switch (type) {
2576         default:
2577                 MISSING_CASE(type);
2578                 /* fallthrough to use PAGE_KERNEL anyway */
2579         case I915_MAP_WB:
2580                 pgprot = PAGE_KERNEL;
2581                 break;
2582         case I915_MAP_WC:
2583                 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2584                 break;
2585         }
2586         addr = vmap(pages, n_pages, 0, pgprot);
2587
2588         if (pages != stack_pages)
2589                 kvfree(pages);
2590
2591         return addr;
2592 }
2593
2594 /* get, pin, and map the pages of the object into kernel space */
2595 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2596                               enum i915_map_type type)
2597 {
2598         enum i915_map_type has_type;
2599         bool pinned;
2600         void *ptr;
2601         int ret;
2602
2603         if (unlikely(!i915_gem_object_has_struct_page(obj)))
2604                 return ERR_PTR(-ENXIO);
2605
2606         ret = mutex_lock_interruptible(&obj->mm.lock);
2607         if (ret)
2608                 return ERR_PTR(ret);
2609
2610         pinned = !(type & I915_MAP_OVERRIDE);
2611         type &= ~I915_MAP_OVERRIDE;
2612
2613         if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2614                 if (unlikely(!i915_gem_object_has_pages(obj))) {
2615                         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2616
2617                         ret = ____i915_gem_object_get_pages(obj);
2618                         if (ret)
2619                                 goto err_unlock;
2620
2621                         smp_mb__before_atomic();
2622                 }
2623                 atomic_inc(&obj->mm.pages_pin_count);
2624                 pinned = false;
2625         }
2626         GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2627
2628         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2629         if (ptr && has_type != type) {
2630                 if (pinned) {
2631                         ret = -EBUSY;
2632                         goto err_unpin;
2633                 }
2634
2635                 if (is_vmalloc_addr(ptr))
2636                         vunmap(ptr);
2637                 else
2638                         kunmap(kmap_to_page(ptr));
2639
2640                 ptr = obj->mm.mapping = NULL;
2641         }
2642
2643         if (!ptr) {
2644                 ptr = i915_gem_object_map(obj, type);
2645                 if (!ptr) {
2646                         ret = -ENOMEM;
2647                         goto err_unpin;
2648                 }
2649
2650                 obj->mm.mapping = page_pack_bits(ptr, type);
2651         }
2652
2653 out_unlock:
2654         mutex_unlock(&obj->mm.lock);
2655         return ptr;
2656
2657 err_unpin:
2658         atomic_dec(&obj->mm.pages_pin_count);
2659 err_unlock:
2660         ptr = ERR_PTR(ret);
2661         goto out_unlock;
2662 }
2663
2664 void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
2665                                  unsigned long offset,
2666                                  unsigned long size)
2667 {
2668         enum i915_map_type has_type;
2669         void *ptr;
2670
2671         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
2672         GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
2673                                      offset, size, obj->base.size));
2674
2675         obj->mm.dirty = true;
2676
2677         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
2678                 return;
2679
2680         ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2681         if (has_type == I915_MAP_WC)
2682                 return;
2683
2684         drm_clflush_virt_range(ptr + offset, size);
2685         if (size == obj->base.size) {
2686                 obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
2687                 obj->cache_dirty = false;
2688         }
2689 }
2690
2691 static int
2692 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2693                            const struct drm_i915_gem_pwrite *arg)
2694 {
2695         struct address_space *mapping = obj->base.filp->f_mapping;
2696         char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2697         u64 remain, offset;
2698         unsigned int pg;
2699
2700         /* Caller already validated user args */
2701         GEM_BUG_ON(!access_ok(user_data, arg->size));
2702
2703         /*
2704          * Before we instantiate/pin the backing store for our use, we
2705          * can prepopulate the shmemfs filp efficiently using a write into
2706          * the pagecache. We avoid the penalty of instantiating all the
2707          * pages, important if the user is just writing to a few and never
2708          * uses the object on the GPU, and using a direct write into shmemfs
2709          * allows it to avoid the cost of retrieving a page (either swapin
2710          * or clearing-before-use) before it is overwritten.
2711          */
2712         if (i915_gem_object_has_pages(obj))
2713                 return -ENODEV;
2714
2715         if (obj->mm.madv != I915_MADV_WILLNEED)
2716                 return -EFAULT;
2717
2718         /*
2719          * Before the pages are instantiated the object is treated as being
2720          * in the CPU domain. The pages will be clflushed as required before
2721          * use, and we can freely write into the pages directly. If userspace
2722          * races pwrite with any other operation; corruption will ensue -
2723          * that is userspace's prerogative!
2724          */
2725
2726         remain = arg->size;
2727         offset = arg->offset;
2728         pg = offset_in_page(offset);
2729
2730         do {
2731                 unsigned int len, unwritten;
2732                 struct page *page;
2733                 void *data, *vaddr;
2734                 int err;
2735                 char c;
2736
2737                 len = PAGE_SIZE - pg;
2738                 if (len > remain)
2739                         len = remain;
2740
2741                 /* Prefault the user page to reduce potential recursion */
2742                 err = __get_user(c, user_data);
2743                 if (err)
2744                         return err;
2745
2746                 err = __get_user(c, user_data + len - 1);
2747                 if (err)
2748                         return err;
2749
2750                 err = pagecache_write_begin(obj->base.filp, mapping,
2751                                             offset, len, 0,
2752                                             &page, &data);
2753                 if (err < 0)
2754                         return err;
2755
2756                 vaddr = kmap_atomic(page);
2757                 unwritten = __copy_from_user_inatomic(vaddr + pg,
2758                                                       user_data,
2759                                                       len);
2760                 kunmap_atomic(vaddr);
2761
2762                 err = pagecache_write_end(obj->base.filp, mapping,
2763                                           offset, len, len - unwritten,
2764                                           page, data);
2765                 if (err < 0)
2766                         return err;
2767
2768                 /* We don't handle -EFAULT, leave it to the caller to check */
2769                 if (unwritten)
2770                         return -ENODEV;
2771
2772                 remain -= len;
2773                 user_data += len;
2774                 offset += len;
2775                 pg = 0;
2776         } while (remain);
2777
2778         return 0;
2779 }
2780
2781 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
2782 {
2783         struct drm_i915_private *i915 = to_i915(gem->dev);
2784         struct drm_i915_gem_object *obj = to_intel_bo(gem);
2785         struct drm_i915_file_private *fpriv = file->driver_priv;
2786         struct i915_lut_handle *lut, *ln;
2787
2788         mutex_lock(&i915->drm.struct_mutex);
2789
2790         list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
2791                 struct i915_gem_context *ctx = lut->ctx;
2792                 struct i915_vma *vma;
2793
2794                 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
2795                 if (ctx->file_priv != fpriv)
2796                         continue;
2797
2798                 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
2799                 GEM_BUG_ON(vma->obj != obj);
2800
2801                 /* We allow the process to have multiple handles to the same
2802                  * vma, in the same fd namespace, by virtue of flink/open.
2803                  */
2804                 GEM_BUG_ON(!vma->open_count);
2805                 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
2806                         i915_vma_close(vma);
2807
2808                 list_del(&lut->obj_link);
2809                 list_del(&lut->ctx_link);
2810
2811                 i915_lut_handle_free(lut);
2812                 __i915_gem_object_release_unless_active(obj);
2813         }
2814
2815         mutex_unlock(&i915->drm.struct_mutex);
2816 }
2817
2818 static unsigned long to_wait_timeout(s64 timeout_ns)
2819 {
2820         if (timeout_ns < 0)
2821                 return MAX_SCHEDULE_TIMEOUT;
2822
2823         if (timeout_ns == 0)
2824                 return 0;
2825
2826         return nsecs_to_jiffies_timeout(timeout_ns);
2827 }
2828
2829 /**
2830  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2831  * @dev: drm device pointer
2832  * @data: ioctl data blob
2833  * @file: drm file pointer
2834  *
2835  * Returns 0 if successful, else an error is returned with the remaining time in
2836  * the timeout parameter.
2837  *  -ETIME: object is still busy after timeout
2838  *  -ERESTARTSYS: signal interrupted the wait
2839  *  -ENONENT: object doesn't exist
2840  * Also possible, but rare:
2841  *  -EAGAIN: incomplete, restart syscall
2842  *  -ENOMEM: damn
2843  *  -ENODEV: Internal IRQ fail
2844  *  -E?: The add request failed
2845  *
2846  * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2847  * non-zero timeout parameter the wait ioctl will wait for the given number of
2848  * nanoseconds on an object becoming unbusy. Since the wait itself does so
2849  * without holding struct_mutex the object may become re-busied before this
2850  * function completes. A similar but shorter * race condition exists in the busy
2851  * ioctl
2852  */
2853 int
2854 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2855 {
2856         struct drm_i915_gem_wait *args = data;
2857         struct drm_i915_gem_object *obj;
2858         ktime_t start;
2859         long ret;
2860
2861         if (args->flags != 0)
2862                 return -EINVAL;
2863
2864         obj = i915_gem_object_lookup(file, args->bo_handle);
2865         if (!obj)
2866                 return -ENOENT;
2867
2868         start = ktime_get();
2869
2870         ret = i915_gem_object_wait(obj,
2871                                    I915_WAIT_INTERRUPTIBLE |
2872                                    I915_WAIT_PRIORITY |
2873                                    I915_WAIT_ALL,
2874                                    to_wait_timeout(args->timeout_ns));
2875
2876         if (args->timeout_ns > 0) {
2877                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
2878                 if (args->timeout_ns < 0)
2879                         args->timeout_ns = 0;
2880
2881                 /*
2882                  * Apparently ktime isn't accurate enough and occasionally has a
2883                  * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
2884                  * things up to make the test happy. We allow up to 1 jiffy.
2885                  *
2886                  * This is a regression from the timespec->ktime conversion.
2887                  */
2888                 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
2889                         args->timeout_ns = 0;
2890
2891                 /* Asked to wait beyond the jiffie/scheduler precision? */
2892                 if (ret == -ETIME && args->timeout_ns)
2893                         ret = -EAGAIN;
2894         }
2895
2896         i915_gem_object_put(obj);
2897         return ret;
2898 }
2899
2900 static int wait_for_engines(struct drm_i915_private *i915)
2901 {
2902         if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
2903                 dev_err(i915->drm.dev,
2904                         "Failed to idle engines, declaring wedged!\n");
2905                 GEM_TRACE_DUMP();
2906                 i915_gem_set_wedged(i915);
2907                 return -EIO;
2908         }
2909
2910         return 0;
2911 }
2912
2913 static long
2914 wait_for_timelines(struct drm_i915_private *i915,
2915                    unsigned int flags, long timeout)
2916 {
2917         struct i915_gt_timelines *gt = &i915->gt.timelines;
2918         struct i915_timeline *tl;
2919
2920         mutex_lock(&gt->mutex);
2921         list_for_each_entry(tl, &gt->active_list, link) {
2922                 struct i915_request *rq;
2923
2924                 rq = i915_active_request_get_unlocked(&tl->last_request);
2925                 if (!rq)
2926                         continue;
2927
2928                 mutex_unlock(&gt->mutex);
2929
2930                 /*
2931                  * "Race-to-idle".
2932                  *
2933                  * Switching to the kernel context is often used a synchronous
2934                  * step prior to idling, e.g. in suspend for flushing all
2935                  * current operations to memory before sleeping. These we
2936                  * want to complete as quickly as possible to avoid prolonged
2937                  * stalls, so allow the gpu to boost to maximum clocks.
2938                  */
2939                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
2940                         gen6_rps_boost(rq);
2941
2942                 timeout = i915_request_wait(rq, flags, timeout);
2943                 i915_request_put(rq);
2944                 if (timeout < 0)
2945                         return timeout;
2946
2947                 /* restart after reacquiring the lock */
2948                 mutex_lock(&gt->mutex);
2949                 tl = list_entry(&gt->active_list, typeof(*tl), link);
2950         }
2951         mutex_unlock(&gt->mutex);
2952
2953         return timeout;
2954 }
2955
2956 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
2957                            unsigned int flags, long timeout)
2958 {
2959         GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
2960                   flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
2961                   timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
2962                   yesno(i915->gt.awake));
2963
2964         /* If the device is asleep, we have no requests outstanding */
2965         if (!READ_ONCE(i915->gt.awake))
2966                 return 0;
2967
2968         timeout = wait_for_timelines(i915, flags, timeout);
2969         if (timeout < 0)
2970                 return timeout;
2971
2972         if (flags & I915_WAIT_LOCKED) {
2973                 int err;
2974
2975                 lockdep_assert_held(&i915->drm.struct_mutex);
2976
2977                 err = wait_for_engines(i915);
2978                 if (err)
2979                         return err;
2980
2981                 i915_retire_requests(i915);
2982         }
2983
2984         return 0;
2985 }
2986
2987 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
2988 {
2989         /*
2990          * We manually flush the CPU domain so that we can override and
2991          * force the flush for the display, and perform it asyncrhonously.
2992          */
2993         flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
2994         if (obj->cache_dirty)
2995                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
2996         obj->write_domain = 0;
2997 }
2998
2999 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3000 {
3001         if (!READ_ONCE(obj->pin_global))
3002                 return;
3003
3004         mutex_lock(&obj->base.dev->struct_mutex);
3005         __i915_gem_object_flush_for_display(obj);
3006         mutex_unlock(&obj->base.dev->struct_mutex);
3007 }
3008
3009 /**
3010  * Moves a single object to the WC read, and possibly write domain.
3011  * @obj: object to act on
3012  * @write: ask for write access or read only
3013  *
3014  * This function returns when the move is complete, including waiting on
3015  * flushes to occur.
3016  */
3017 int
3018 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3019 {
3020         int ret;
3021
3022         lockdep_assert_held(&obj->base.dev->struct_mutex);
3023
3024         ret = i915_gem_object_wait(obj,
3025                                    I915_WAIT_INTERRUPTIBLE |
3026                                    I915_WAIT_LOCKED |
3027                                    (write ? I915_WAIT_ALL : 0),
3028                                    MAX_SCHEDULE_TIMEOUT);
3029         if (ret)
3030                 return ret;
3031
3032         if (obj->write_domain == I915_GEM_DOMAIN_WC)
3033                 return 0;
3034
3035         /* Flush and acquire obj->pages so that we are coherent through
3036          * direct access in memory with previous cached writes through
3037          * shmemfs and that our cache domain tracking remains valid.
3038          * For example, if the obj->filp was moved to swap without us
3039          * being notified and releasing the pages, we would mistakenly
3040          * continue to assume that the obj remained out of the CPU cached
3041          * domain.
3042          */
3043         ret = i915_gem_object_pin_pages(obj);
3044         if (ret)
3045                 return ret;
3046
3047         flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3048
3049         /* Serialise direct access to this object with the barriers for
3050          * coherent writes from the GPU, by effectively invalidating the
3051          * WC domain upon first access.
3052          */
3053         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3054                 mb();
3055
3056         /* It should now be out of any other write domains, and we can update
3057          * the domain values for our changes.
3058          */
3059         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3060         obj->read_domains |= I915_GEM_DOMAIN_WC;
3061         if (write) {
3062                 obj->read_domains = I915_GEM_DOMAIN_WC;
3063                 obj->write_domain = I915_GEM_DOMAIN_WC;
3064                 obj->mm.dirty = true;
3065         }
3066
3067         i915_gem_object_unpin_pages(obj);
3068         return 0;
3069 }
3070
3071 /**
3072  * Moves a single object to the GTT read, and possibly write domain.
3073  * @obj: object to act on
3074  * @write: ask for write access or read only
3075  *
3076  * This function returns when the move is complete, including waiting on
3077  * flushes to occur.
3078  */
3079 int
3080 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3081 {
3082         int ret;
3083
3084         lockdep_assert_held(&obj->base.dev->struct_mutex);
3085
3086         ret = i915_gem_object_wait(obj,
3087                                    I915_WAIT_INTERRUPTIBLE |
3088                                    I915_WAIT_LOCKED |
3089                                    (write ? I915_WAIT_ALL : 0),
3090                                    MAX_SCHEDULE_TIMEOUT);
3091         if (ret)
3092                 return ret;
3093
3094         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3095                 return 0;
3096
3097         /* Flush and acquire obj->pages so that we are coherent through
3098          * direct access in memory with previous cached writes through
3099          * shmemfs and that our cache domain tracking remains valid.
3100          * For example, if the obj->filp was moved to swap without us
3101          * being notified and releasing the pages, we would mistakenly
3102          * continue to assume that the obj remained out of the CPU cached
3103          * domain.
3104          */
3105         ret = i915_gem_object_pin_pages(obj);
3106         if (ret)
3107                 return ret;
3108
3109         flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3110
3111         /* Serialise direct access to this object with the barriers for
3112          * coherent writes from the GPU, by effectively invalidating the
3113          * GTT domain upon first access.
3114          */
3115         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3116                 mb();
3117
3118         /* It should now be out of any other write domains, and we can update
3119          * the domain values for our changes.
3120          */
3121         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3122         obj->read_domains |= I915_GEM_DOMAIN_GTT;
3123         if (write) {
3124                 obj->read_domains = I915_GEM_DOMAIN_GTT;
3125                 obj->write_domain = I915_GEM_DOMAIN_GTT;
3126                 obj->mm.dirty = true;
3127         }
3128
3129         i915_gem_object_unpin_pages(obj);
3130         return 0;
3131 }
3132
3133 /**
3134  * Changes the cache-level of an object across all VMA.
3135  * @obj: object to act on
3136  * @cache_level: new cache level to set for the object
3137  *
3138  * After this function returns, the object will be in the new cache-level
3139  * across all GTT and the contents of the backing storage will be coherent,
3140  * with respect to the new cache-level. In order to keep the backing storage
3141  * coherent for all users, we only allow a single cache level to be set
3142  * globally on the object and prevent it from being changed whilst the
3143  * hardware is reading from the object. That is if the object is currently
3144  * on the scanout it will be set to uncached (or equivalent display
3145  * cache coherency) and all non-MOCS GPU access will also be uncached so
3146  * that all direct access to the scanout remains coherent.
3147  */
3148 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3149                                     enum i915_cache_level cache_level)
3150 {
3151         struct i915_vma *vma;
3152         int ret;
3153
3154         lockdep_assert_held(&obj->base.dev->struct_mutex);
3155
3156         if (obj->cache_level == cache_level)
3157                 return 0;
3158
3159         /* Inspect the list of currently bound VMA and unbind any that would
3160          * be invalid given the new cache-level. This is principally to
3161          * catch the issue of the CS prefetch crossing page boundaries and
3162          * reading an invalid PTE on older architectures.
3163          */
3164 restart:
3165         list_for_each_entry(vma, &obj->vma.list, obj_link) {
3166                 if (!drm_mm_node_allocated(&vma->node))
3167                         continue;
3168
3169                 if (i915_vma_is_pinned(vma)) {
3170                         DRM_DEBUG("can not change the cache level of pinned objects\n");
3171                         return -EBUSY;
3172                 }
3173
3174                 if (!i915_vma_is_closed(vma) &&
3175                     i915_gem_valid_gtt_space(vma, cache_level))
3176                         continue;
3177
3178                 ret = i915_vma_unbind(vma);
3179                 if (ret)
3180                         return ret;