2 * Copyright © 2008-2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 #include <drm/drm_vma_manager.h>
29 #include <drm/i915_drm.h>
30 #include <linux/dma-fence-array.h>
31 #include <linux/kthread.h>
32 #include <linux/reservation.h>
33 #include <linux/shmem_fs.h>
34 #include <linux/slab.h>
35 #include <linux/stop_machine.h>
36 #include <linux/swap.h>
37 #include <linux/pci.h>
38 #include <linux/dma-buf.h>
41 #include "i915_gem_clflush.h"
42 #include "i915_gemfs.h"
43 #include "i915_reset.h"
44 #include "i915_trace.h"
45 #include "i915_vgpu.h"
47 #include "intel_drv.h"
48 #include "intel_frontbuffer.h"
49 #include "intel_mocs.h"
50 #include "intel_workarounds.h"
52 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
54 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
59 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
62 return obj->pin_global; /* currently in use by HW, keep flushed */
66 insert_mappable_node(struct i915_ggtt *ggtt,
67 struct drm_mm_node *node, u32 size)
69 memset(node, 0, sizeof(*node));
70 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
71 size, 0, I915_COLOR_UNEVICTABLE,
72 0, ggtt->mappable_end,
77 remove_mappable_node(struct drm_mm_node *node)
79 drm_mm_remove_node(node);
82 /* some bookkeeping */
83 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
86 spin_lock(&dev_priv->mm.object_stat_lock);
87 dev_priv->mm.object_count++;
88 dev_priv->mm.object_memory += size;
89 spin_unlock(&dev_priv->mm.object_stat_lock);
92 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
95 spin_lock(&dev_priv->mm.object_stat_lock);
96 dev_priv->mm.object_count--;
97 dev_priv->mm.object_memory -= size;
98 spin_unlock(&dev_priv->mm.object_stat_lock);
102 i915_gem_wait_for_error(struct i915_gpu_error *error)
109 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
110 * userspace. If it takes that long something really bad is going on and
111 * we should simply try to bail out and fail as gracefully as possible.
113 ret = wait_event_interruptible_timeout(error->reset_queue,
114 !i915_reset_backoff(error),
117 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
119 } else if (ret < 0) {
126 int i915_mutex_lock_interruptible(struct drm_device *dev)
128 struct drm_i915_private *dev_priv = to_i915(dev);
131 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
135 ret = mutex_lock_interruptible(&dev->struct_mutex);
142 static u32 __i915_gem_park(struct drm_i915_private *i915)
144 intel_wakeref_t wakeref;
148 lockdep_assert_held(&i915->drm.struct_mutex);
149 GEM_BUG_ON(i915->gt.active_requests);
150 GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
153 return I915_EPOCH_INVALID;
155 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
158 * Be paranoid and flush a concurrent interrupt to make sure
159 * we don't reactivate any irq tasklets after parking.
161 * FIXME: Note that even though we have waited for execlists to be idle,
162 * there may still be an in-flight interrupt even though the CSB
163 * is now empty. synchronize_irq() makes sure that a residual interrupt
164 * is completed before we continue, but it doesn't prevent the HW from
165 * raising a spurious interrupt later. To complete the shield we should
166 * coordinate disabling the CS irq with flushing the interrupts.
168 synchronize_irq(i915->drm.irq);
170 intel_engines_park(i915);
171 i915_timelines_park(i915);
173 i915_pmu_gt_parked(i915);
174 i915_vma_parked(i915);
176 wakeref = fetch_and_zero(&i915->gt.awake);
177 GEM_BUG_ON(!wakeref);
179 if (INTEL_GEN(i915) >= 6)
182 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
184 return i915->gt.epoch;
187 void i915_gem_park(struct drm_i915_private *i915)
191 lockdep_assert_held(&i915->drm.struct_mutex);
192 GEM_BUG_ON(i915->gt.active_requests);
197 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
198 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
201 void i915_gem_unpark(struct drm_i915_private *i915)
205 lockdep_assert_held(&i915->drm.struct_mutex);
206 GEM_BUG_ON(!i915->gt.active_requests);
207 assert_rpm_wakelock_held(i915);
213 * It seems that the DMC likes to transition between the DC states a lot
214 * when there are no connected displays (no active power domains) during
215 * command submission.
217 * This activity has negative impact on the performance of the chip with
218 * huge latencies observed in the interrupt handler and elsewhere.
220 * Work around it by grabbing a GT IRQ power domain whilst there is any
221 * GT activity, preventing any DC state transitions.
223 i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
224 GEM_BUG_ON(!i915->gt.awake);
226 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
229 intel_enable_gt_powersave(i915);
230 i915_update_gfx_val(i915);
231 if (INTEL_GEN(i915) >= 6)
233 i915_pmu_gt_unparked(i915);
235 intel_engines_unpark(i915);
237 i915_queue_hangcheck(i915);
239 queue_delayed_work(i915->wq,
240 &i915->gt.retire_work,
241 round_jiffies_up_relative(HZ));
245 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
246 struct drm_file *file)
248 struct drm_i915_private *dev_priv = to_i915(dev);
249 struct i915_ggtt *ggtt = &dev_priv->ggtt;
250 struct drm_i915_gem_get_aperture *args = data;
251 struct i915_vma *vma;
254 pinned = ggtt->vm.reserved;
255 mutex_lock(&dev->struct_mutex);
256 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link)
257 if (i915_vma_is_pinned(vma))
258 pinned += vma->node.size;
259 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link)
260 if (i915_vma_is_pinned(vma))
261 pinned += vma->node.size;
262 mutex_unlock(&dev->struct_mutex);
264 args->aper_size = ggtt->vm.total;
265 args->aper_available_size = args->aper_size - pinned;
270 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
272 struct address_space *mapping = obj->base.filp->f_mapping;
273 drm_dma_handle_t *phys;
275 struct scatterlist *sg;
280 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
283 /* Always aligning to the object size, allows a single allocation
284 * to handle all possible callers, and given typical object sizes,
285 * the alignment of the buddy allocation will naturally match.
287 phys = drm_pci_alloc(obj->base.dev,
288 roundup_pow_of_two(obj->base.size),
289 roundup_pow_of_two(obj->base.size));
294 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
298 page = shmem_read_mapping_page(mapping, i);
304 src = kmap_atomic(page);
305 memcpy(vaddr, src, PAGE_SIZE);
306 drm_clflush_virt_range(vaddr, PAGE_SIZE);
313 i915_gem_chipset_flush(to_i915(obj->base.dev));
315 st = kmalloc(sizeof(*st), GFP_KERNEL);
321 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
329 sg->length = obj->base.size;
331 sg_dma_address(sg) = phys->busaddr;
332 sg_dma_len(sg) = obj->base.size;
334 obj->phys_handle = phys;
336 __i915_gem_object_set_pages(obj, st, sg->length);
341 drm_pci_free(obj->base.dev, phys);
346 static void __start_cpu_write(struct drm_i915_gem_object *obj)
348 obj->read_domains = I915_GEM_DOMAIN_CPU;
349 obj->write_domain = I915_GEM_DOMAIN_CPU;
350 if (cpu_write_needs_clflush(obj))
351 obj->cache_dirty = true;
355 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
356 struct sg_table *pages,
359 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
361 if (obj->mm.madv == I915_MADV_DONTNEED)
362 obj->mm.dirty = false;
365 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
366 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
367 drm_clflush_sg(pages);
369 __start_cpu_write(obj);
373 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
374 struct sg_table *pages)
376 __i915_gem_object_release_shmem(obj, pages, false);
379 struct address_space *mapping = obj->base.filp->f_mapping;
380 char *vaddr = obj->phys_handle->vaddr;
383 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
387 page = shmem_read_mapping_page(mapping, i);
391 dst = kmap_atomic(page);
392 drm_clflush_virt_range(vaddr, PAGE_SIZE);
393 memcpy(dst, vaddr, PAGE_SIZE);
396 set_page_dirty(page);
397 if (obj->mm.madv == I915_MADV_WILLNEED)
398 mark_page_accessed(page);
402 obj->mm.dirty = false;
405 sg_free_table(pages);
408 drm_pci_free(obj->base.dev, obj->phys_handle);
412 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
414 i915_gem_object_unpin_pages(obj);
417 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
418 .get_pages = i915_gem_object_get_pages_phys,
419 .put_pages = i915_gem_object_put_pages_phys,
420 .release = i915_gem_object_release_phys,
423 static const struct drm_i915_gem_object_ops i915_gem_object_ops;
425 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
427 struct i915_vma *vma;
428 LIST_HEAD(still_in_list);
431 lockdep_assert_held(&obj->base.dev->struct_mutex);
433 /* Closed vma are removed from the obj->vma_list - but they may
434 * still have an active binding on the object. To remove those we
435 * must wait for all rendering to complete to the object (as unbinding
436 * must anyway), and retire the requests.
438 ret = i915_gem_object_set_to_cpu_domain(obj, false);
442 while ((vma = list_first_entry_or_null(&obj->vma_list,
445 list_move_tail(&vma->obj_link, &still_in_list);
446 ret = i915_vma_unbind(vma);
450 list_splice(&still_in_list, &obj->vma_list);
456 i915_gem_object_wait_fence(struct dma_fence *fence,
459 struct intel_rps_client *rps_client)
461 struct i915_request *rq;
463 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
465 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
468 if (!dma_fence_is_i915(fence))
469 return dma_fence_wait_timeout(fence,
470 flags & I915_WAIT_INTERRUPTIBLE,
473 rq = to_request(fence);
474 if (i915_request_completed(rq))
478 * This client is about to stall waiting for the GPU. In many cases
479 * this is undesirable and limits the throughput of the system, as
480 * many clients cannot continue processing user input/output whilst
481 * blocked. RPS autotuning may take tens of milliseconds to respond
482 * to the GPU load and thus incurs additional latency for the client.
483 * We can circumvent that by promoting the GPU frequency to maximum
484 * before we wait. This makes the GPU throttle up much more quickly
485 * (good for benchmarks and user experience, e.g. window animations),
486 * but at a cost of spending more power processing the workload
487 * (bad for battery). Not all clients even want their results
488 * immediately and for them we should just let the GPU select its own
489 * frequency to maximise efficiency. To prevent a single client from
490 * forcing the clocks too high for the whole system, we only allow
491 * each client to waitboost once in a busy period.
493 if (rps_client && !i915_request_started(rq)) {
494 if (INTEL_GEN(rq->i915) >= 6)
495 gen6_rps_boost(rq, rps_client);
498 timeout = i915_request_wait(rq, flags, timeout);
501 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
502 i915_request_retire_upto(rq);
508 i915_gem_object_wait_reservation(struct reservation_object *resv,
511 struct intel_rps_client *rps_client)
513 unsigned int seq = __read_seqcount_begin(&resv->seq);
514 struct dma_fence *excl;
515 bool prune_fences = false;
517 if (flags & I915_WAIT_ALL) {
518 struct dma_fence **shared;
519 unsigned int count, i;
522 ret = reservation_object_get_fences_rcu(resv,
523 &excl, &count, &shared);
527 for (i = 0; i < count; i++) {
528 timeout = i915_gem_object_wait_fence(shared[i],
534 dma_fence_put(shared[i]);
537 for (; i < count; i++)
538 dma_fence_put(shared[i]);
542 * If both shared fences and an exclusive fence exist,
543 * then by construction the shared fences must be later
544 * than the exclusive fence. If we successfully wait for
545 * all the shared fences, we know that the exclusive fence
546 * must all be signaled. If all the shared fences are
547 * signaled, we can prune the array and recover the
548 * floating references on the fences/requests.
550 prune_fences = count && timeout >= 0;
552 excl = reservation_object_get_excl_rcu(resv);
555 if (excl && timeout >= 0)
556 timeout = i915_gem_object_wait_fence(excl, flags, timeout,
562 * Opportunistically prune the fences iff we know they have *all* been
563 * signaled and that the reservation object has not been changed (i.e.
564 * no new fences have been added).
566 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
567 if (reservation_object_trylock(resv)) {
568 if (!__read_seqcount_retry(&resv->seq, seq))
569 reservation_object_add_excl_fence(resv, NULL);
570 reservation_object_unlock(resv);
577 static void __fence_set_priority(struct dma_fence *fence,
578 const struct i915_sched_attr *attr)
580 struct i915_request *rq;
581 struct intel_engine_cs *engine;
583 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
586 rq = to_request(fence);
590 rcu_read_lock(); /* RCU serialisation for set-wedged protection */
591 if (engine->schedule)
592 engine->schedule(rq, attr);
594 local_bh_enable(); /* kick the tasklets if queues were reprioritised */
597 static void fence_set_priority(struct dma_fence *fence,
598 const struct i915_sched_attr *attr)
600 /* Recurse once into a fence-array */
601 if (dma_fence_is_array(fence)) {
602 struct dma_fence_array *array = to_dma_fence_array(fence);
605 for (i = 0; i < array->num_fences; i++)
606 __fence_set_priority(array->fences[i], attr);
608 __fence_set_priority(fence, attr);
613 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
615 const struct i915_sched_attr *attr)
617 struct dma_fence *excl;
619 if (flags & I915_WAIT_ALL) {
620 struct dma_fence **shared;
621 unsigned int count, i;
624 ret = reservation_object_get_fences_rcu(obj->resv,
625 &excl, &count, &shared);
629 for (i = 0; i < count; i++) {
630 fence_set_priority(shared[i], attr);
631 dma_fence_put(shared[i]);
636 excl = reservation_object_get_excl_rcu(obj->resv);
640 fence_set_priority(excl, attr);
647 * Waits for rendering to the object to be completed
648 * @obj: i915 gem object
649 * @flags: how to wait (under a lock, for all rendering or just for writes etc)
650 * @timeout: how long to wait
651 * @rps_client: client (user process) to charge for any waitboosting
654 i915_gem_object_wait(struct drm_i915_gem_object *obj,
657 struct intel_rps_client *rps_client)
660 #if IS_ENABLED(CONFIG_LOCKDEP)
661 GEM_BUG_ON(debug_locks &&
662 !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
663 !!(flags & I915_WAIT_LOCKED));
665 GEM_BUG_ON(timeout < 0);
667 timeout = i915_gem_object_wait_reservation(obj->resv,
670 return timeout < 0 ? timeout : 0;
673 static struct intel_rps_client *to_rps_client(struct drm_file *file)
675 struct drm_i915_file_private *fpriv = file->driver_priv;
677 return &fpriv->rps_client;
681 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
682 struct drm_i915_gem_pwrite *args,
683 struct drm_file *file)
685 void *vaddr = obj->phys_handle->vaddr + args->offset;
686 char __user *user_data = u64_to_user_ptr(args->data_ptr);
688 /* We manually control the domain here and pretend that it
689 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
691 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
692 if (copy_from_user(vaddr, user_data, args->size))
695 drm_clflush_virt_range(vaddr, args->size);
696 i915_gem_chipset_flush(to_i915(obj->base.dev));
698 intel_fb_obj_flush(obj, ORIGIN_CPU);
702 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv)
704 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
707 void i915_gem_object_free(struct drm_i915_gem_object *obj)
709 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
710 kmem_cache_free(dev_priv->objects, obj);
714 i915_gem_create(struct drm_file *file,
715 struct drm_i915_private *dev_priv,
719 struct drm_i915_gem_object *obj;
723 size = roundup(size, PAGE_SIZE);
727 /* Allocate the new object */
728 obj = i915_gem_object_create(dev_priv, size);
732 ret = drm_gem_handle_create(file, &obj->base, &handle);
733 /* drop reference from allocate - handle holds it now */
734 i915_gem_object_put(obj);
743 i915_gem_dumb_create(struct drm_file *file,
744 struct drm_device *dev,
745 struct drm_mode_create_dumb *args)
747 /* have to work out size/pitch and return them */
748 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
749 args->size = args->pitch * args->height;
750 return i915_gem_create(file, to_i915(dev),
751 args->size, &args->handle);
754 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
756 return !(obj->cache_level == I915_CACHE_NONE ||
757 obj->cache_level == I915_CACHE_WT);
761 * Creates a new mm object and returns a handle to it.
762 * @dev: drm device pointer
763 * @data: ioctl data blob
764 * @file: drm file pointer
767 i915_gem_create_ioctl(struct drm_device *dev, void *data,
768 struct drm_file *file)
770 struct drm_i915_private *dev_priv = to_i915(dev);
771 struct drm_i915_gem_create *args = data;
773 i915_gem_flush_free_objects(dev_priv);
775 return i915_gem_create(file, dev_priv,
776 args->size, &args->handle);
779 static inline enum fb_op_origin
780 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
782 return (domain == I915_GEM_DOMAIN_GTT ?
783 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
786 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
788 intel_wakeref_t wakeref;
791 * No actual flushing is required for the GTT write domain for reads
792 * from the GTT domain. Writes to it "immediately" go to main memory
793 * as far as we know, so there's no chipset flush. It also doesn't
794 * land in the GPU render cache.
796 * However, we do have to enforce the order so that all writes through
797 * the GTT land before any writes to the device, such as updates to
800 * We also have to wait a bit for the writes to land from the GTT.
801 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
802 * timing. This issue has only been observed when switching quickly
803 * between GTT writes and CPU reads from inside the kernel on recent hw,
804 * and it appears to only affect discrete GTT blocks (i.e. on LLC
805 * system agents we cannot reproduce this behaviour, until Cannonlake
811 if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
814 i915_gem_chipset_flush(dev_priv);
816 with_intel_runtime_pm(dev_priv, wakeref) {
817 spin_lock_irq(&dev_priv->uncore.lock);
819 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
821 spin_unlock_irq(&dev_priv->uncore.lock);
826 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
828 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
829 struct i915_vma *vma;
831 if (!(obj->write_domain & flush_domains))
834 switch (obj->write_domain) {
835 case I915_GEM_DOMAIN_GTT:
836 i915_gem_flush_ggtt_writes(dev_priv);
838 intel_fb_obj_flush(obj,
839 fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
841 for_each_ggtt_vma(vma, obj) {
845 i915_vma_unset_ggtt_write(vma);
849 case I915_GEM_DOMAIN_WC:
853 case I915_GEM_DOMAIN_CPU:
854 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
857 case I915_GEM_DOMAIN_RENDER:
858 if (gpu_write_needs_clflush(obj))
859 obj->cache_dirty = true;
863 obj->write_domain = 0;
867 * Pins the specified object's pages and synchronizes the object with
868 * GPU accesses. Sets needs_clflush to non-zero if the caller should
869 * flush the object from the CPU cache.
871 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
872 unsigned int *needs_clflush)
876 lockdep_assert_held(&obj->base.dev->struct_mutex);
879 if (!i915_gem_object_has_struct_page(obj))
882 ret = i915_gem_object_wait(obj,
883 I915_WAIT_INTERRUPTIBLE |
885 MAX_SCHEDULE_TIMEOUT,
890 ret = i915_gem_object_pin_pages(obj);
894 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
895 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
896 ret = i915_gem_object_set_to_cpu_domain(obj, false);
903 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
905 /* If we're not in the cpu read domain, set ourself into the gtt
906 * read domain and manually flush cachelines (if required). This
907 * optimizes for the case when the gpu will dirty the data
908 * anyway again before the next pread happens.
910 if (!obj->cache_dirty &&
911 !(obj->read_domains & I915_GEM_DOMAIN_CPU))
912 *needs_clflush = CLFLUSH_BEFORE;
915 /* return with the pages pinned */
919 i915_gem_object_unpin_pages(obj);
923 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
924 unsigned int *needs_clflush)
928 lockdep_assert_held(&obj->base.dev->struct_mutex);
931 if (!i915_gem_object_has_struct_page(obj))
934 ret = i915_gem_object_wait(obj,
935 I915_WAIT_INTERRUPTIBLE |
938 MAX_SCHEDULE_TIMEOUT,
943 ret = i915_gem_object_pin_pages(obj);
947 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
948 !static_cpu_has(X86_FEATURE_CLFLUSH)) {
949 ret = i915_gem_object_set_to_cpu_domain(obj, true);
956 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
958 /* If we're not in the cpu write domain, set ourself into the
959 * gtt write domain and manually flush cachelines (as required).
960 * This optimizes for the case when the gpu will use the data
961 * right away and we therefore have to clflush anyway.
963 if (!obj->cache_dirty) {
964 *needs_clflush |= CLFLUSH_AFTER;
967 * Same trick applies to invalidate partially written
968 * cachelines read before writing.
970 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
971 *needs_clflush |= CLFLUSH_BEFORE;
975 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
976 obj->mm.dirty = true;
977 /* return with the pages pinned */
981 i915_gem_object_unpin_pages(obj);
986 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
995 drm_clflush_virt_range(vaddr + offset, len);
997 ret = __copy_to_user(user_data, vaddr + offset, len);
1001 return ret ? -EFAULT : 0;
1005 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
1006 struct drm_i915_gem_pread *args)
1008 char __user *user_data;
1010 unsigned int needs_clflush;
1011 unsigned int idx, offset;
1014 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
1018 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
1019 mutex_unlock(&obj->base.dev->struct_mutex);
1023 remain = args->size;
1024 user_data = u64_to_user_ptr(args->data_ptr);
1025 offset = offset_in_page(args->offset);
1026 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1027 struct page *page = i915_gem_object_get_page(obj, idx);
1028 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1030 ret = shmem_pread(page, offset, length, user_data,
1036 user_data += length;
1040 i915_gem_obj_finish_shmem_access(obj);
1045 gtt_user_read(struct io_mapping *mapping,
1046 loff_t base, int offset,
1047 char __user *user_data, int length)
1049 void __iomem *vaddr;
1050 unsigned long unwritten;
1052 /* We can use the cpu mem copy function because this is X86. */
1053 vaddr = io_mapping_map_atomic_wc(mapping, base);
1054 unwritten = __copy_to_user_inatomic(user_data,
1055 (void __force *)vaddr + offset,
1057 io_mapping_unmap_atomic(vaddr);
1059 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1060 unwritten = copy_to_user(user_data,
1061 (void __force *)vaddr + offset,
1063 io_mapping_unmap(vaddr);
1069 i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
1070 const struct drm_i915_gem_pread *args)
1072 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1073 struct i915_ggtt *ggtt = &i915->ggtt;
1074 intel_wakeref_t wakeref;
1075 struct drm_mm_node node;
1076 struct i915_vma *vma;
1077 void __user *user_data;
1081 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1085 wakeref = intel_runtime_pm_get(i915);
1086 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1091 node.start = i915_ggtt_offset(vma);
1092 node.allocated = false;
1093 ret = i915_vma_put_fence(vma);
1095 i915_vma_unpin(vma);
1100 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1103 GEM_BUG_ON(!node.allocated);
1106 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1110 mutex_unlock(&i915->drm.struct_mutex);
1112 user_data = u64_to_user_ptr(args->data_ptr);
1113 remain = args->size;
1114 offset = args->offset;
1116 while (remain > 0) {
1117 /* Operation in this page
1119 * page_base = page offset within aperture
1120 * page_offset = offset within page
1121 * page_length = bytes to copy for this page
1123 u32 page_base = node.start;
1124 unsigned page_offset = offset_in_page(offset);
1125 unsigned page_length = PAGE_SIZE - page_offset;
1126 page_length = remain < page_length ? remain : page_length;
1127 if (node.allocated) {
1129 ggtt->vm.insert_page(&ggtt->vm,
1130 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1131 node.start, I915_CACHE_NONE, 0);
1134 page_base += offset & PAGE_MASK;
1137 if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
1138 user_data, page_length)) {
1143 remain -= page_length;
1144 user_data += page_length;
1145 offset += page_length;
1148 mutex_lock(&i915->drm.struct_mutex);
1150 if (node.allocated) {
1152 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1153 remove_mappable_node(&node);
1155 i915_vma_unpin(vma);
1158 intel_runtime_pm_put(i915, wakeref);
1159 mutex_unlock(&i915->drm.struct_mutex);
1165 * Reads data from the object referenced by handle.
1166 * @dev: drm device pointer
1167 * @data: ioctl data blob
1168 * @file: drm file pointer
1170 * On error, the contents of *data are undefined.
1173 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
1174 struct drm_file *file)
1176 struct drm_i915_gem_pread *args = data;
1177 struct drm_i915_gem_object *obj;
1180 if (args->size == 0)
1183 if (!access_ok(u64_to_user_ptr(args->data_ptr),
1187 obj = i915_gem_object_lookup(file, args->handle);
1191 /* Bounds check source. */
1192 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1197 trace_i915_gem_object_pread(obj, args->offset, args->size);
1199 ret = i915_gem_object_wait(obj,
1200 I915_WAIT_INTERRUPTIBLE,
1201 MAX_SCHEDULE_TIMEOUT,
1202 to_rps_client(file));
1206 ret = i915_gem_object_pin_pages(obj);
1210 ret = i915_gem_shmem_pread(obj, args);
1211 if (ret == -EFAULT || ret == -ENODEV)
1212 ret = i915_gem_gtt_pread(obj, args);
1214 i915_gem_object_unpin_pages(obj);
1216 i915_gem_object_put(obj);
1220 /* This is the fast write path which cannot handle
1221 * page faults in the source data
1225 ggtt_write(struct io_mapping *mapping,
1226 loff_t base, int offset,
1227 char __user *user_data, int length)
1229 void __iomem *vaddr;
1230 unsigned long unwritten;
1232 /* We can use the cpu mem copy function because this is X86. */
1233 vaddr = io_mapping_map_atomic_wc(mapping, base);
1234 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
1236 io_mapping_unmap_atomic(vaddr);
1238 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
1239 unwritten = copy_from_user((void __force *)vaddr + offset,
1241 io_mapping_unmap(vaddr);
1248 * This is the fast pwrite path, where we copy the data directly from the
1249 * user into the GTT, uncached.
1250 * @obj: i915 GEM object
1251 * @args: pwrite arguments structure
1254 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
1255 const struct drm_i915_gem_pwrite *args)
1257 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1258 struct i915_ggtt *ggtt = &i915->ggtt;
1259 intel_wakeref_t wakeref;
1260 struct drm_mm_node node;
1261 struct i915_vma *vma;
1263 void __user *user_data;
1266 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1270 if (i915_gem_object_has_struct_page(obj)) {
1272 * Avoid waking the device up if we can fallback, as
1273 * waking/resuming is very slow (worst-case 10-100 ms
1274 * depending on PCI sleeps and our own resume time).
1275 * This easily dwarfs any performance advantage from
1276 * using the cache bypass of indirect GGTT access.
1278 wakeref = intel_runtime_pm_get_if_in_use(i915);
1284 /* No backing pages, no fallback, we must force GGTT access */
1285 wakeref = intel_runtime_pm_get(i915);
1288 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1293 node.start = i915_ggtt_offset(vma);
1294 node.allocated = false;
1295 ret = i915_vma_put_fence(vma);
1297 i915_vma_unpin(vma);
1302 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
1305 GEM_BUG_ON(!node.allocated);
1308 ret = i915_gem_object_set_to_gtt_domain(obj, true);
1312 mutex_unlock(&i915->drm.struct_mutex);
1314 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
1316 user_data = u64_to_user_ptr(args->data_ptr);
1317 offset = args->offset;
1318 remain = args->size;
1320 /* Operation in this page
1322 * page_base = page offset within aperture
1323 * page_offset = offset within page
1324 * page_length = bytes to copy for this page
1326 u32 page_base = node.start;
1327 unsigned int page_offset = offset_in_page(offset);
1328 unsigned int page_length = PAGE_SIZE - page_offset;
1329 page_length = remain < page_length ? remain : page_length;
1330 if (node.allocated) {
1331 wmb(); /* flush the write before we modify the GGTT */
1332 ggtt->vm.insert_page(&ggtt->vm,
1333 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
1334 node.start, I915_CACHE_NONE, 0);
1335 wmb(); /* flush modifications to the GGTT (insert_page) */
1337 page_base += offset & PAGE_MASK;
1339 /* If we get a fault while copying data, then (presumably) our
1340 * source page isn't available. Return the error and we'll
1341 * retry in the slow path.
1342 * If the object is non-shmem backed, we retry again with the
1343 * path that handles page fault.
1345 if (ggtt_write(&ggtt->iomap, page_base, page_offset,
1346 user_data, page_length)) {
1351 remain -= page_length;
1352 user_data += page_length;
1353 offset += page_length;
1355 intel_fb_obj_flush(obj, ORIGIN_CPU);
1357 mutex_lock(&i915->drm.struct_mutex);
1359 if (node.allocated) {
1361 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
1362 remove_mappable_node(&node);
1364 i915_vma_unpin(vma);
1367 intel_runtime_pm_put(i915, wakeref);
1369 mutex_unlock(&i915->drm.struct_mutex);
1373 /* Per-page copy function for the shmem pwrite fastpath.
1374 * Flushes invalid cachelines before writing to the target if
1375 * needs_clflush_before is set and flushes out any written cachelines after
1376 * writing if needs_clflush is set.
1379 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
1380 bool needs_clflush_before,
1381 bool needs_clflush_after)
1388 if (needs_clflush_before)
1389 drm_clflush_virt_range(vaddr + offset, len);
1391 ret = __copy_from_user(vaddr + offset, user_data, len);
1392 if (!ret && needs_clflush_after)
1393 drm_clflush_virt_range(vaddr + offset, len);
1397 return ret ? -EFAULT : 0;
1401 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
1402 const struct drm_i915_gem_pwrite *args)
1404 struct drm_i915_private *i915 = to_i915(obj->base.dev);
1405 void __user *user_data;
1407 unsigned int partial_cacheline_write;
1408 unsigned int needs_clflush;
1409 unsigned int offset, idx;
1412 ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
1416 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
1417 mutex_unlock(&i915->drm.struct_mutex);
1421 /* If we don't overwrite a cacheline completely we need to be
1422 * careful to have up-to-date data by first clflushing. Don't
1423 * overcomplicate things and flush the entire patch.
1425 partial_cacheline_write = 0;
1426 if (needs_clflush & CLFLUSH_BEFORE)
1427 partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
1429 user_data = u64_to_user_ptr(args->data_ptr);
1430 remain = args->size;
1431 offset = offset_in_page(args->offset);
1432 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
1433 struct page *page = i915_gem_object_get_page(obj, idx);
1434 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
1436 ret = shmem_pwrite(page, offset, length, user_data,
1437 (offset | length) & partial_cacheline_write,
1438 needs_clflush & CLFLUSH_AFTER);
1443 user_data += length;
1447 intel_fb_obj_flush(obj, ORIGIN_CPU);
1448 i915_gem_obj_finish_shmem_access(obj);
1453 * Writes data to the object referenced by handle.
1455 * @data: ioctl data blob
1458 * On error, the contents of the buffer that were to be modified are undefined.
1461 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1462 struct drm_file *file)
1464 struct drm_i915_gem_pwrite *args = data;
1465 struct drm_i915_gem_object *obj;
1468 if (args->size == 0)
1471 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
1474 obj = i915_gem_object_lookup(file, args->handle);
1478 /* Bounds check destination. */
1479 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
1484 /* Writes not allowed into this read-only object */
1485 if (i915_gem_object_is_readonly(obj)) {
1490 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1493 if (obj->ops->pwrite)
1494 ret = obj->ops->pwrite(obj, args);
1498 ret = i915_gem_object_wait(obj,
1499 I915_WAIT_INTERRUPTIBLE |
1501 MAX_SCHEDULE_TIMEOUT,
1502 to_rps_client(file));
1506 ret = i915_gem_object_pin_pages(obj);
1511 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1512 * it would end up going through the fenced access, and we'll get
1513 * different detiling behavior between reading and writing.
1514 * pread/pwrite currently are reading and writing from the CPU
1515 * perspective, requiring manual detiling by the client.
1517 if (!i915_gem_object_has_struct_page(obj) ||
1518 cpu_write_needs_clflush(obj))
1519 /* Note that the gtt paths might fail with non-page-backed user
1520 * pointers (e.g. gtt mappings when moving data between
1521 * textures). Fallback to the shmem path in that case.
1523 ret = i915_gem_gtt_pwrite_fast(obj, args);
1525 if (ret == -EFAULT || ret == -ENOSPC) {
1526 if (obj->phys_handle)
1527 ret = i915_gem_phys_pwrite(obj, args, file);
1529 ret = i915_gem_shmem_pwrite(obj, args);
1532 i915_gem_object_unpin_pages(obj);
1534 i915_gem_object_put(obj);
1538 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
1540 struct drm_i915_private *i915;
1541 struct list_head *list;
1542 struct i915_vma *vma;
1544 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1546 for_each_ggtt_vma(vma, obj) {
1547 if (i915_vma_is_active(vma))
1550 if (!drm_mm_node_allocated(&vma->node))
1553 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
1556 i915 = to_i915(obj->base.dev);
1557 spin_lock(&i915->mm.obj_lock);
1558 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
1559 list_move_tail(&obj->mm.link, list);
1560 spin_unlock(&i915->mm.obj_lock);
1564 * Called when user space prepares to use an object with the CPU, either
1565 * through the mmap ioctl's mapping or a GTT mapping.
1567 * @data: ioctl data blob
1571 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1572 struct drm_file *file)
1574 struct drm_i915_gem_set_domain *args = data;
1575 struct drm_i915_gem_object *obj;
1576 uint32_t read_domains = args->read_domains;
1577 uint32_t write_domain = args->write_domain;
1580 /* Only handle setting domains to types used by the CPU. */
1581 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
1584 /* Having something in the write domain implies it's in the read
1585 * domain, and only that read domain. Enforce that in the request.
1587 if (write_domain != 0 && read_domains != write_domain)
1590 obj = i915_gem_object_lookup(file, args->handle);
1594 /* Try to flush the object off the GPU without holding the lock.
1595 * We will repeat the flush holding the lock in the normal manner
1596 * to catch cases where we are gazumped.
1598 err = i915_gem_object_wait(obj,
1599 I915_WAIT_INTERRUPTIBLE |
1600 I915_WAIT_PRIORITY |
1601 (write_domain ? I915_WAIT_ALL : 0),
1602 MAX_SCHEDULE_TIMEOUT,
1603 to_rps_client(file));
1608 * Proxy objects do not control access to the backing storage, ergo
1609 * they cannot be used as a means to manipulate the cache domain
1610 * tracking for that backing storage. The proxy object is always
1611 * considered to be outside of any cache domain.
1613 if (i915_gem_object_is_proxy(obj)) {
1619 * Flush and acquire obj->pages so that we are coherent through
1620 * direct access in memory with previous cached writes through
1621 * shmemfs and that our cache domain tracking remains valid.
1622 * For example, if the obj->filp was moved to swap without us
1623 * being notified and releasing the pages, we would mistakenly
1624 * continue to assume that the obj remained out of the CPU cached
1627 err = i915_gem_object_pin_pages(obj);
1631 err = i915_mutex_lock_interruptible(dev);
1635 if (read_domains & I915_GEM_DOMAIN_WC)
1636 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
1637 else if (read_domains & I915_GEM_DOMAIN_GTT)
1638 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
1640 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
1642 /* And bump the LRU for this access */
1643 i915_gem_object_bump_inactive_ggtt(obj);
1645 mutex_unlock(&dev->struct_mutex);
1647 if (write_domain != 0)
1648 intel_fb_obj_invalidate(obj,
1649 fb_write_origin(obj, write_domain));
1652 i915_gem_object_unpin_pages(obj);
1654 i915_gem_object_put(obj);
1659 * Called when user space has done writes to this buffer
1661 * @data: ioctl data blob
1665 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1666 struct drm_file *file)
1668 struct drm_i915_gem_sw_finish *args = data;
1669 struct drm_i915_gem_object *obj;
1671 obj = i915_gem_object_lookup(file, args->handle);
1676 * Proxy objects are barred from CPU access, so there is no
1677 * need to ban sw_finish as it is a nop.
1680 /* Pinned buffers may be scanout, so flush the cache */
1681 i915_gem_object_flush_if_display(obj);
1682 i915_gem_object_put(obj);
1688 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1691 * @data: ioctl data blob
1694 * While the mapping holds a reference on the contents of the object, it doesn't
1695 * imply a ref on the object itself.
1699 * DRM driver writers who look a this function as an example for how to do GEM
1700 * mmap support, please don't implement mmap support like here. The modern way
1701 * to implement DRM mmap support is with an mmap offset ioctl (like
1702 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1703 * That way debug tooling like valgrind will understand what's going on, hiding
1704 * the mmap call in a driver private ioctl will break that. The i915 driver only
1705 * does cpu mmaps this way because we didn't know better.
1708 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1709 struct drm_file *file)
1711 struct drm_i915_gem_mmap *args = data;
1712 struct drm_i915_gem_object *obj;
1715 if (args->flags & ~(I915_MMAP_WC))
1718 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1721 obj = i915_gem_object_lookup(file, args->handle);
1725 /* prime objects have no backing filp to GEM mmap
1728 if (!obj->base.filp) {
1729 i915_gem_object_put(obj);
1733 addr = vm_mmap(obj->base.filp, 0, args->size,
1734 PROT_READ | PROT_WRITE, MAP_SHARED,
1736 if (args->flags & I915_MMAP_WC) {
1737 struct mm_struct *mm = current->mm;
1738 struct vm_area_struct *vma;
1740 if (down_write_killable(&mm->mmap_sem)) {
1741 i915_gem_object_put(obj);
1744 vma = find_vma(mm, addr);
1747 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1750 up_write(&mm->mmap_sem);
1752 /* This may race, but that's ok, it only gets set */
1753 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
1755 i915_gem_object_put(obj);
1756 if (IS_ERR((void *)addr))
1759 args->addr_ptr = (uint64_t) addr;
1764 static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
1766 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
1770 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
1772 * A history of the GTT mmap interface:
1774 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
1775 * aligned and suitable for fencing, and still fit into the available
1776 * mappable space left by the pinned display objects. A classic problem
1777 * we called the page-fault-of-doom where we would ping-pong between
1778 * two objects that could not fit inside the GTT and so the memcpy
1779 * would page one object in at the expense of the other between every
1782 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
1783 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
1784 * object is too large for the available space (or simply too large
1785 * for the mappable aperture!), a view is created instead and faulted
1786 * into userspace. (This view is aligned and sized appropriately for
1789 * 2 - Recognise WC as a separate cache domain so that we can flush the
1790 * delayed writes via GTT before performing direct access via WC.
1794 * * snoopable objects cannot be accessed via the GTT. It can cause machine
1795 * hangs on some architectures, corruption on others. An attempt to service
1796 * a GTT page fault from a snoopable object will generate a SIGBUS.
1798 * * the object must be able to fit into RAM (physical memory, though no
1799 * limited to the mappable aperture).
1804 * * a new GTT page fault will synchronize rendering from the GPU and flush
1805 * all data to system memory. Subsequent access will not be synchronized.
1807 * * all mappings are revoked on runtime device suspend.
1809 * * there are only 8, 16 or 32 fence registers to share between all users
1810 * (older machines require fence register for display and blitter access
1811 * as well). Contention of the fence registers will cause the previous users
1812 * to be unmapped and any new access will generate new page faults.
1814 * * running out of memory while servicing a fault may generate a SIGBUS,
1815 * rather than the expected SIGSEGV.
1817 int i915_gem_mmap_gtt_version(void)
1822 static inline struct i915_ggtt_view
1823 compute_partial_view(const struct drm_i915_gem_object *obj,
1824 pgoff_t page_offset,
1827 struct i915_ggtt_view view;
1829 if (i915_gem_object_is_tiled(obj))
1830 chunk = roundup(chunk, tile_row_pages(obj));
1832 view.type = I915_GGTT_VIEW_PARTIAL;
1833 view.partial.offset = rounddown(page_offset, chunk);
1835 min_t(unsigned int, chunk,
1836 (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
1838 /* If the partial covers the entire object, just create a normal VMA. */
1839 if (chunk >= obj->base.size >> PAGE_SHIFT)
1840 view.type = I915_GGTT_VIEW_NORMAL;
1846 * i915_gem_fault - fault a page into the GTT
1849 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1850 * from userspace. The fault handler takes care of binding the object to
1851 * the GTT (if needed), allocating and programming a fence register (again,
1852 * only if needed based on whether the old reg is still valid or the object
1853 * is tiled) and inserting a new PTE into the faulting process.
1855 * Note that the faulting process may involve evicting existing objects
1856 * from the GTT and/or fence registers to make room. So performance may
1857 * suffer if the GTT working set is large or there are few fence registers
1860 * The current feature set supported by i915_gem_fault() and thus GTT mmaps
1861 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
1863 vm_fault_t i915_gem_fault(struct vm_fault *vmf)
1865 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
1866 struct vm_area_struct *area = vmf->vma;
1867 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
1868 struct drm_device *dev = obj->base.dev;
1869 struct drm_i915_private *dev_priv = to_i915(dev);
1870 struct i915_ggtt *ggtt = &dev_priv->ggtt;
1871 bool write = area->vm_flags & VM_WRITE;
1872 intel_wakeref_t wakeref;
1873 struct i915_vma *vma;
1874 pgoff_t page_offset;
1877 /* Sanity check that we allow writing into this object */
1878 if (i915_gem_object_is_readonly(obj) && write)
1879 return VM_FAULT_SIGBUS;
1881 /* We don't use vmf->pgoff since that has the fake offset */
1882 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
1884 trace_i915_gem_object_fault(obj, page_offset, true, write);
1886 /* Try to flush the object off the GPU first without holding the lock.
1887 * Upon acquiring the lock, we will perform our sanity checks and then
1888 * repeat the flush holding the lock in the normal manner to catch cases
1889 * where we are gazumped.
1891 ret = i915_gem_object_wait(obj,
1892 I915_WAIT_INTERRUPTIBLE,
1893 MAX_SCHEDULE_TIMEOUT,
1898 ret = i915_gem_object_pin_pages(obj);
1902 wakeref = intel_runtime_pm_get(dev_priv);
1904 ret = i915_mutex_lock_interruptible(dev);
1908 /* Access to snoopable pages through the GTT is incoherent. */
1909 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
1915 /* Now pin it into the GTT as needed */
1916 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
1921 /* Use a partial view if it is bigger than available space */
1922 struct i915_ggtt_view view =
1923 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
1926 flags = PIN_MAPPABLE;
1927 if (view.type == I915_GGTT_VIEW_NORMAL)
1928 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
1931 * Userspace is now writing through an untracked VMA, abandon
1932 * all hope that the hardware is able to track future writes.
1934 obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
1936 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1937 if (IS_ERR(vma) && !view.type) {
1938 flags = PIN_MAPPABLE;
1939 view.type = I915_GGTT_VIEW_PARTIAL;
1940 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
1948 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1952 ret = i915_vma_pin_fence(vma);
1956 /* Finally, remap it using the new GTT offset */
1957 ret = remap_io_mapping(area,
1958 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
1959 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
1960 min_t(u64, vma->size, area->vm_end - area->vm_start),
1965 /* Mark as being mmapped into userspace for later revocation */
1966 assert_rpm_wakelock_held(dev_priv);
1967 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
1968 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
1969 GEM_BUG_ON(!obj->userfault_count);
1971 i915_vma_set_ggtt_write(vma);
1974 i915_vma_unpin_fence(vma);
1976 __i915_vma_unpin(vma);
1978 mutex_unlock(&dev->struct_mutex);
1980 intel_runtime_pm_put(dev_priv, wakeref);
1981 i915_gem_object_unpin_pages(obj);
1986 * We eat errors when the gpu is terminally wedged to avoid
1987 * userspace unduly crashing (gl has no provisions for mmaps to
1988 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1989 * and so needs to be reported.
1991 if (!i915_terminally_wedged(&dev_priv->gpu_error))
1992 return VM_FAULT_SIGBUS;
1993 /* else: fall through */
1996 * EAGAIN means the gpu is hung and we'll wait for the error
1997 * handler to reset everything when re-faulting in
1998 * i915_mutex_lock_interruptible.
2005 * EBUSY is ok: this just means that another thread
2006 * already did the job.
2008 return VM_FAULT_NOPAGE;
2010 return VM_FAULT_OOM;
2013 return VM_FAULT_SIGBUS;
2015 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
2016 return VM_FAULT_SIGBUS;
2020 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
2022 struct i915_vma *vma;
2024 GEM_BUG_ON(!obj->userfault_count);
2026 obj->userfault_count = 0;
2027 list_del(&obj->userfault_link);
2028 drm_vma_node_unmap(&obj->base.vma_node,
2029 obj->base.dev->anon_inode->i_mapping);
2031 for_each_ggtt_vma(vma, obj)
2032 i915_vma_unset_userfault(vma);
2036 * i915_gem_release_mmap - remove physical page mappings
2037 * @obj: obj in question
2039 * Preserve the reservation of the mmapping with the DRM core code, but
2040 * relinquish ownership of the pages back to the system.
2042 * It is vital that we remove the page mapping if we have mapped a tiled
2043 * object through the GTT and then lose the fence register due to
2044 * resource pressure. Similarly if the object has been moved out of the
2045 * aperture, than pages mapped into userspace must be revoked. Removing the
2046 * mapping will then trigger a page fault on the next user access, allowing
2047 * fixup by i915_gem_fault().
2050 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
2052 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2053 intel_wakeref_t wakeref;
2055 /* Serialisation between user GTT access and our code depends upon
2056 * revoking the CPU's PTE whilst the mutex is held. The next user
2057 * pagefault then has to wait until we release the mutex.
2059 * Note that RPM complicates somewhat by adding an additional
2060 * requirement that operations to the GGTT be made holding the RPM
2063 lockdep_assert_held(&i915->drm.struct_mutex);
2064 wakeref = intel_runtime_pm_get(i915);
2066 if (!obj->userfault_count)
2069 __i915_gem_object_release_mmap(obj);
2071 /* Ensure that the CPU's PTE are revoked and there are not outstanding
2072 * memory transactions from userspace before we return. The TLB
2073 * flushing implied above by changing the PTE above *should* be
2074 * sufficient, an extra barrier here just provides us with a bit
2075 * of paranoid documentation about our requirement to serialise
2076 * memory writes before touching registers / GSM.
2081 intel_runtime_pm_put(i915, wakeref);
2084 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
2086 struct drm_i915_gem_object *obj, *on;
2090 * Only called during RPM suspend. All users of the userfault_list
2091 * must be holding an RPM wakeref to ensure that this can not
2092 * run concurrently with themselves (and use the struct_mutex for
2093 * protection between themselves).
2096 list_for_each_entry_safe(obj, on,
2097 &dev_priv->mm.userfault_list, userfault_link)
2098 __i915_gem_object_release_mmap(obj);
2100 /* The fence will be lost when the device powers down. If any were
2101 * in use by hardware (i.e. they are pinned), we should not be powering
2102 * down! All other fences will be reacquired by the user upon waking.
2104 for (i = 0; i < dev_priv->num_fence_regs; i++) {
2105 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2107 /* Ideally we want to assert that the fence register is not
2108 * live at this point (i.e. that no piece of code will be
2109 * trying to write through fence + GTT, as that both violates
2110 * our tracking of activity and associated locking/barriers,
2111 * but also is illegal given that the hw is powered down).
2113 * Previously we used reg->pin_count as a "liveness" indicator.
2114 * That is not sufficient, and we need a more fine-grained
2115 * tool if we want to have a sanity check here.
2121 GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
2126 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
2128 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2131 err = drm_gem_create_mmap_offset(&obj->base);
2135 /* Attempt to reap some mmap space from dead objects */
2137 err = i915_gem_wait_for_idle(dev_priv,
2138 I915_WAIT_INTERRUPTIBLE,
2139 MAX_SCHEDULE_TIMEOUT);
2143 i915_gem_drain_freed_objects(dev_priv);
2144 err = drm_gem_create_mmap_offset(&obj->base);
2148 } while (flush_delayed_work(&dev_priv->gt.retire_work));
2153 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
2155 drm_gem_free_mmap_offset(&obj->base);
2159 i915_gem_mmap_gtt(struct drm_file *file,
2160 struct drm_device *dev,
2164 struct drm_i915_gem_object *obj;
2167 obj = i915_gem_object_lookup(file, handle);
2171 ret = i915_gem_object_create_mmap_offset(obj);
2173 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
2175 i915_gem_object_put(obj);
2180 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
2182 * @data: GTT mapping ioctl data
2183 * @file: GEM object info
2185 * Simply returns the fake offset to userspace so it can mmap it.
2186 * The mmap call will end up in drm_gem_mmap(), which will set things
2187 * up so we can get faults in the handler above.
2189 * The fault handler will take care of binding the object into the GTT
2190 * (since it may have been evicted to make room for something), allocating
2191 * a fence register, and mapping the appropriate aperture address into
2195 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
2196 struct drm_file *file)
2198 struct drm_i915_gem_mmap_gtt *args = data;
2200 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
2203 /* Immediately discard the backing storage */
2205 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
2207 i915_gem_object_free_mmap_offset(obj);
2209 if (obj->base.filp == NULL)
2212 /* Our goal here is to return as much of the memory as
2213 * is possible back to the system as we are called from OOM.
2214 * To do this we must instruct the shmfs to drop all of its
2215 * backing pages, *now*.
2217 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
2218 obj->mm.madv = __I915_MADV_PURGED;
2219 obj->mm.pages = ERR_PTR(-EFAULT);
2222 /* Try to discard unwanted pages */
2223 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
2225 struct address_space *mapping;
2227 lockdep_assert_held(&obj->mm.lock);
2228 GEM_BUG_ON(i915_gem_object_has_pages(obj));
2230 switch (obj->mm.madv) {
2231 case I915_MADV_DONTNEED:
2232 i915_gem_object_truncate(obj);
2233 case __I915_MADV_PURGED:
2237 if (obj->base.filp == NULL)
2240 mapping = obj->base.filp->f_mapping,
2241 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
2245 * Move pages to appropriate lru and release the pagevec, decrementing the
2246 * ref count of those pages.
2248 static void check_release_pagevec(struct pagevec *pvec)
2250 check_move_unevictable_pages(pvec);
2251 __pagevec_release(pvec);
2256 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
2257 struct sg_table *pages)
2259 struct sgt_iter sgt_iter;
2260 struct pagevec pvec;
2263 __i915_gem_object_release_shmem(obj, pages, true);
2265 i915_gem_gtt_finish_pages(obj, pages);
2267 if (i915_gem_object_needs_bit17_swizzle(obj))
2268 i915_gem_object_save_bit_17_swizzle(obj, pages);
2270 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
2272 pagevec_init(&pvec);
2273 for_each_sgt_page(page, sgt_iter, pages) {
2275 set_page_dirty(page);
2277 if (obj->mm.madv == I915_MADV_WILLNEED)
2278 mark_page_accessed(page);
2280 if (!pagevec_add(&pvec, page))
2281 check_release_pagevec(&pvec);
2283 if (pagevec_count(&pvec))
2284 check_release_pagevec(&pvec);
2285 obj->mm.dirty = false;
2287 sg_free_table(pages);
2291 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
2293 struct radix_tree_iter iter;
2297 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
2298 radix_tree_delete(&obj->mm.get_page.radix, iter.index);
2302 static struct sg_table *
2303 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
2305 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2306 struct sg_table *pages;
2308 pages = fetch_and_zero(&obj->mm.pages);
2309 if (IS_ERR_OR_NULL(pages))
2312 spin_lock(&i915->mm.obj_lock);
2313 list_del(&obj->mm.link);
2314 spin_unlock(&i915->mm.obj_lock);
2316 if (obj->mm.mapping) {
2319 ptr = page_mask_bits(obj->mm.mapping);
2320 if (is_vmalloc_addr(ptr))
2323 kunmap(kmap_to_page(ptr));
2325 obj->mm.mapping = NULL;
2328 __i915_gem_object_reset_page_iter(obj);
2329 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
2334 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
2335 enum i915_mm_subclass subclass)
2337 struct sg_table *pages;
2340 if (i915_gem_object_has_pinned_pages(obj))
2343 GEM_BUG_ON(obj->bind_count);
2345 /* May be called by shrinker from within get_pages() (on another bo) */
2346 mutex_lock_nested(&obj->mm.lock, subclass);
2347 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
2353 * ->put_pages might need to allocate memory for the bit17 swizzle
2354 * array, hence protect them from being reaped by removing them from gtt
2357 pages = __i915_gem_object_unset_pages(obj);
2360 * XXX Temporary hijinx to avoid updating all backends to handle
2361 * NULL pages. In the future, when we have more asynchronous
2362 * get_pages backends we should be better able to handle the
2363 * cancellation of the async task in a more uniform manner.
2365 if (!pages && !i915_gem_object_needs_async_cancel(obj))
2366 pages = ERR_PTR(-EINVAL);
2369 obj->ops->put_pages(obj, pages);
2373 mutex_unlock(&obj->mm.lock);
2378 bool i915_sg_trim(struct sg_table *orig_st)
2380 struct sg_table new_st;
2381 struct scatterlist *sg, *new_sg;
2384 if (orig_st->nents == orig_st->orig_nents)
2387 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
2390 new_sg = new_st.sgl;
2391 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
2392 sg_set_page(new_sg, sg_page(sg), sg->length, 0);
2393 sg_dma_address(new_sg) = sg_dma_address(sg);
2394 sg_dma_len(new_sg) = sg_dma_len(sg);
2396 new_sg = sg_next(new_sg);
2398 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
2400 sg_free_table(orig_st);
2406 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2408 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2409 const unsigned long page_count = obj->base.size / PAGE_SIZE;
2411 struct address_space *mapping;
2412 struct sg_table *st;
2413 struct scatterlist *sg;
2414 struct sgt_iter sgt_iter;
2416 unsigned long last_pfn = 0; /* suppress gcc warning */
2417 unsigned int max_segment = i915_sg_segment_size();
2418 unsigned int sg_page_sizes;
2419 struct pagevec pvec;
2424 * Assert that the object is not currently in any GPU domain. As it
2425 * wasn't in the GTT, there shouldn't be any way it could have been in
2428 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2429 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2432 * If there's no chance of allocating enough pages for the whole
2433 * object, bail early.
2435 if (page_count > totalram_pages())
2438 st = kmalloc(sizeof(*st), GFP_KERNEL);
2443 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
2449 * Get the list of pages out of our struct file. They'll be pinned
2450 * at this point until we release them.
2452 * Fail silently without starting the shrinker
2454 mapping = obj->base.filp->f_mapping;
2455 mapping_set_unevictable(mapping);
2456 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
2457 noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
2462 for (i = 0; i < page_count; i++) {
2463 const unsigned int shrink[] = {
2464 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
2467 gfp_t gfp = noreclaim;
2471 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2472 if (likely(!IS_ERR(page)))
2476 ret = PTR_ERR(page);
2480 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
2483 * We've tried hard to allocate the memory by reaping
2484 * our own buffer, now let the real VM do its job and
2485 * go down in flames if truly OOM.
2487 * However, since graphics tend to be disposable,
2488 * defer the oom here by reporting the ENOMEM back
2492 /* reclaim and warn, but no oom */
2493 gfp = mapping_gfp_mask(mapping);
2496 * Our bo are always dirty and so we require
2497 * kswapd to reclaim our pages (direct reclaim
2498 * does not effectively begin pageout of our
2499 * buffers on its own). However, direct reclaim
2500 * only waits for kswapd when under allocation
2501 * congestion. So as a result __GFP_RECLAIM is
2502 * unreliable and fails to actually reclaim our
2503 * dirty pages -- unless you try over and over
2504 * again with !__GFP_NORETRY. However, we still
2505 * want to fail this allocation rather than
2506 * trigger the out-of-memory killer and for
2507 * this we want __GFP_RETRY_MAYFAIL.
2509 gfp |= __GFP_RETRY_MAYFAIL;
2514 sg->length >= max_segment ||
2515 page_to_pfn(page) != last_pfn + 1) {
2517 sg_page_sizes |= sg->length;
2521 sg_set_page(sg, page, PAGE_SIZE, 0);
2523 sg->length += PAGE_SIZE;
2525 last_pfn = page_to_pfn(page);
2527 /* Check that the i965g/gm workaround works. */
2528 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
2530 if (sg) { /* loop terminated early; short sg table */
2531 sg_page_sizes |= sg->length;
2535 /* Trim unused sg entries to avoid wasting memory. */
2538 ret = i915_gem_gtt_prepare_pages(obj, st);
2541 * DMA remapping failed? One possible cause is that
2542 * it could not reserve enough large entries, asking
2543 * for PAGE_SIZE chunks instead may be helpful.
2545 if (max_segment > PAGE_SIZE) {
2546 for_each_sgt_page(page, sgt_iter, st)
2550 max_segment = PAGE_SIZE;
2553 dev_warn(&dev_priv->drm.pdev->dev,
2554 "Failed to DMA remap %lu pages\n",
2560 if (i915_gem_object_needs_bit17_swizzle(obj))
2561 i915_gem_object_do_bit_17_swizzle(obj, st);
2563 __i915_gem_object_set_pages(obj, st, sg_page_sizes);
2570 mapping_clear_unevictable(mapping);
2571 pagevec_init(&pvec);
2572 for_each_sgt_page(page, sgt_iter, st) {
2573 if (!pagevec_add(&pvec, page))
2574 check_release_pagevec(&pvec);
2576 if (pagevec_count(&pvec))
2577 check_release_pagevec(&pvec);
2582 * shmemfs first checks if there is enough memory to allocate the page
2583 * and reports ENOSPC should there be insufficient, along with the usual
2584 * ENOMEM for a genuine allocation failure.
2586 * We use ENOSPC in our driver to mean that we have run out of aperture
2587 * space and so want to translate the error from shmemfs back to our
2588 * usual understanding of ENOMEM.
2596 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
2597 struct sg_table *pages,
2598 unsigned int sg_page_sizes)
2600 struct drm_i915_private *i915 = to_i915(obj->base.dev);
2601 unsigned long supported = INTEL_INFO(i915)->page_sizes;
2604 lockdep_assert_held(&obj->mm.lock);
2606 obj->mm.get_page.sg_pos = pages->sgl;
2607 obj->mm.get_page.sg_idx = 0;
2609 obj->mm.pages = pages;
2611 if (i915_gem_object_is_tiled(obj) &&
2612 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
2613 GEM_BUG_ON(obj->mm.quirked);
2614 __i915_gem_object_pin_pages(obj);
2615 obj->mm.quirked = true;
2618 GEM_BUG_ON(!sg_page_sizes);
2619 obj->mm.page_sizes.phys = sg_page_sizes;
2622 * Calculate the supported page-sizes which fit into the given
2623 * sg_page_sizes. This will give us the page-sizes which we may be able
2624 * to use opportunistically when later inserting into the GTT. For
2625 * example if phys=2G, then in theory we should be able to use 1G, 2M,
2626 * 64K or 4K pages, although in practice this will depend on a number of
2629 obj->mm.page_sizes.sg = 0;
2630 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
2631 if (obj->mm.page_sizes.phys & ~0u << i)
2632 obj->mm.page_sizes.sg |= BIT(i);
2634 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
2636 spin_lock(&i915->mm.obj_lock);
2637 list_add(&obj->mm.link, &i915->mm.unbound_list);
2638 spin_unlock(&i915->mm.obj_lock);
2641 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2645 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
2646 DRM_DEBUG("Attempting to obtain a purgeable object\n");
2650 err = obj->ops->get_pages(obj);
2651 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
2656 /* Ensure that the associated pages are gathered from the backing storage
2657 * and pinned into our object. i915_gem_object_pin_pages() may be called
2658 * multiple times before they are released by a single call to
2659 * i915_gem_object_unpin_pages() - once the pages are no longer referenced
2660 * either as a result of memory pressure (reaping pages under the shrinker)
2661 * or as the object is itself released.
2663 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2667 err = mutex_lock_interruptible(&obj->mm.lock);
2671 if (unlikely(!i915_gem_object_has_pages(obj))) {
2672 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2674 err = ____i915_gem_object_get_pages(obj);
2678 smp_mb__before_atomic();
2680 atomic_inc(&obj->mm.pages_pin_count);
2683 mutex_unlock(&obj->mm.lock);
2687 /* The 'mapping' part of i915_gem_object_pin_map() below */
2688 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
2689 enum i915_map_type type)
2691 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2692 struct sg_table *sgt = obj->mm.pages;
2693 struct sgt_iter sgt_iter;
2695 struct page *stack_pages[32];
2696 struct page **pages = stack_pages;
2697 unsigned long i = 0;
2701 /* A single page can always be kmapped */
2702 if (n_pages == 1 && type == I915_MAP_WB)
2703 return kmap(sg_page(sgt->sgl));
2705 if (n_pages > ARRAY_SIZE(stack_pages)) {
2706 /* Too big for stack -- allocate temporary array instead */
2707 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
2712 for_each_sgt_page(page, sgt_iter, sgt)
2715 /* Check that we have the expected number of pages */
2716 GEM_BUG_ON(i != n_pages);
2721 /* fallthrough to use PAGE_KERNEL anyway */
2723 pgprot = PAGE_KERNEL;
2726 pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
2729 addr = vmap(pages, n_pages, 0, pgprot);
2731 if (pages != stack_pages)
2737 /* get, pin, and map the pages of the object into kernel space */
2738 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
2739 enum i915_map_type type)
2741 enum i915_map_type has_type;
2746 if (unlikely(!i915_gem_object_has_struct_page(obj)))
2747 return ERR_PTR(-ENXIO);
2749 ret = mutex_lock_interruptible(&obj->mm.lock);
2751 return ERR_PTR(ret);
2753 pinned = !(type & I915_MAP_OVERRIDE);
2754 type &= ~I915_MAP_OVERRIDE;
2756 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
2757 if (unlikely(!i915_gem_object_has_pages(obj))) {
2758 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
2760 ret = ____i915_gem_object_get_pages(obj);
2764 smp_mb__before_atomic();
2766 atomic_inc(&obj->mm.pages_pin_count);
2769 GEM_BUG_ON(!i915_gem_object_has_pages(obj));
2771 ptr = page_unpack_bits(obj->mm.mapping, &has_type);
2772 if (ptr && has_type != type) {
2778 if (is_vmalloc_addr(ptr))
2781 kunmap(kmap_to_page(ptr));
2783 ptr = obj->mm.mapping = NULL;
2787 ptr = i915_gem_object_map(obj, type);
2793 obj->mm.mapping = page_pack_bits(ptr, type);
2797 mutex_unlock(&obj->mm.lock);
2801 atomic_dec(&obj->mm.pages_pin_count);
2808 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
2809 const struct drm_i915_gem_pwrite *arg)
2811 struct address_space *mapping = obj->base.filp->f_mapping;
2812 char __user *user_data = u64_to_user_ptr(arg->data_ptr);
2816 /* Before we instantiate/pin the backing store for our use, we
2817 * can prepopulate the shmemfs filp efficiently using a write into
2818 * the pagecache. We avoid the penalty of instantiating all the
2819 * pages, important if the user is just writing to a few and never
2820 * uses the object on the GPU, and using a direct write into shmemfs
2821 * allows it to avoid the cost of retrieving a page (either swapin
2822 * or clearing-before-use) before it is overwritten.
2824 if (i915_gem_object_has_pages(obj))
2827 if (obj->mm.madv != I915_MADV_WILLNEED)
2830 /* Before the pages are instantiated the object is treated as being
2831 * in the CPU domain. The pages will be clflushed as required before
2832 * use, and we can freely write into the pages directly. If userspace
2833 * races pwrite with any other operation; corruption will ensue -
2834 * that is userspace's prerogative!
2838 offset = arg->offset;
2839 pg = offset_in_page(offset);
2842 unsigned int len, unwritten;
2847 len = PAGE_SIZE - pg;
2851 err = pagecache_write_begin(obj->base.filp, mapping,
2858 unwritten = copy_from_user(vaddr + pg, user_data, len);
2861 err = pagecache_write_end(obj->base.filp, mapping,
2862 offset, len, len - unwritten,
2879 struct i915_request *
2880 i915_gem_find_active_request(struct intel_engine_cs *engine)
2882 struct i915_request *request, *active = NULL;
2883 unsigned long flags;
2886 * We are called by the error capture, reset and to dump engine
2887 * state at random points in time. In particular, note that neither is
2888 * crucially ordered with an interrupt. After a hang, the GPU is dead
2889 * and we assume that no more writes can happen (we waited long enough
2890 * for all writes that were in transaction to be flushed) - adding an
2891 * extra delay for a recent interrupt is pointless. Hence, we do
2892 * not need an engine->irq_seqno_barrier() before the seqno reads.
2893 * At all other times, we must assume the GPU is still running, but
2894 * we only care about the snapshot of this moment.
2896 spin_lock_irqsave(&engine->timeline.lock, flags);
2897 list_for_each_entry(request, &engine->timeline.requests, link) {
2898 if (__i915_request_completed(request, request->global_seqno))
2904 spin_unlock_irqrestore(&engine->timeline.lock, flags);
2910 i915_gem_retire_work_handler(struct work_struct *work)
2912 struct drm_i915_private *dev_priv =
2913 container_of(work, typeof(*dev_priv), gt.retire_work.work);
2914 struct drm_device *dev = &dev_priv->drm;
2916 /* Come back later if the device is busy... */
2917 if (mutex_trylock(&dev->struct_mutex)) {
2918 i915_retire_requests(dev_priv);
2919 mutex_unlock(&dev->struct_mutex);
2923 * Keep the retire handler running until we are finally idle.
2924 * We do not need to do this test under locking as in the worst-case
2925 * we queue the retire worker once too often.
2927 if (READ_ONCE(dev_priv->gt.awake))
2928 queue_delayed_work(dev_priv->wq,
2929 &dev_priv->gt.retire_work,
2930 round_jiffies_up_relative(HZ));
2933 static void shrink_caches(struct drm_i915_private *i915)
2936 * kmem_cache_shrink() discards empty slabs and reorders partially
2937 * filled slabs to prioritise allocating from the mostly full slabs,
2938 * with the aim of reducing fragmentation.
2940 kmem_cache_shrink(i915->priorities);
2941 kmem_cache_shrink(i915->dependencies);
2942 kmem_cache_shrink(i915->requests);
2943 kmem_cache_shrink(i915->luts);
2944 kmem_cache_shrink(i915->vmas);
2945 kmem_cache_shrink(i915->objects);
2948 struct sleep_rcu_work {
2950 struct rcu_head rcu;
2951 struct work_struct work;
2953 struct drm_i915_private *i915;
2958 same_epoch(struct drm_i915_private *i915, unsigned int epoch)
2961 * There is a small chance that the epoch wrapped since we started
2962 * sleeping. If we assume that epoch is at least a u32, then it will
2963 * take at least 2^32 * 100ms for it to wrap, or about 326 years.
2965 return epoch == READ_ONCE(i915->gt.epoch);
2968 static void __sleep_work(struct work_struct *work)
2970 struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
2971 struct drm_i915_private *i915 = s->i915;
2972 unsigned int epoch = s->epoch;
2975 if (same_epoch(i915, epoch))
2976 shrink_caches(i915);
2979 static void __sleep_rcu(struct rcu_head *rcu)
2981 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
2982 struct drm_i915_private *i915 = s->i915;
2984 destroy_rcu_head(&s->rcu);
2986 if (same_epoch(i915, s->epoch)) {
2987 INIT_WORK(&s->work, __sleep_work);
2988 queue_work(i915->wq, &s->work);
2995 new_requests_since_last_retire(const struct drm_i915_private *i915)
2997 return (READ_ONCE(i915->gt.active_requests) ||
2998 work_pending(&i915->gt.idle_work.work));
3001 static void assert_kernel_context_is_current(struct drm_i915_private *i915)
3003 struct intel_engine_cs *engine;
3004 enum intel_engine_id id;
3006 if (i915_terminally_wedged(&i915->gpu_error))
3009 GEM_BUG_ON(i915->gt.active_requests);
3010 for_each_engine(engine, i915, id) {
3011 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
3012 GEM_BUG_ON(engine->last_retired_context !=
3013 to_intel_context(i915->kernel_context, engine));
3018 i915_gem_idle_work_handler(struct work_struct *work)
3020 struct drm_i915_private *dev_priv =
3021 container_of(work, typeof(*dev_priv), gt.idle_work.work);
3022 unsigned int epoch = I915_EPOCH_INVALID;
3023 bool rearm_hangcheck;
3025 if (!READ_ONCE(dev_priv->gt.awake))
3028 if (READ_ONCE(dev_priv->gt.active_requests))
3032 * Flush out the last user context, leaving only the pinned
3033 * kernel context resident. When we are idling on the kernel_context,
3034 * no more new requests (with a context switch) are emitted and we
3035 * can finally rest. A consequence is that the idle work handler is
3036 * always called at least twice before idling (and if the system is
3037 * idle that implies a round trip through the retire worker).
3039 mutex_lock(&dev_priv->drm.struct_mutex);
3040 i915_gem_switch_to_kernel_context(dev_priv);
3041 mutex_unlock(&dev_priv->drm.struct_mutex);
3043 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
3044 READ_ONCE(dev_priv->gt.active_requests));
3047 * Wait for last execlists context complete, but bail out in case a
3048 * new request is submitted. As we don't trust the hardware, we
3049 * continue on if the wait times out. This is necessary to allow
3050 * the machine to suspend even if the hardware dies, and we will
3051 * try to recover in resume (after depriving the hardware of power,
3052 * it may be in a better mmod).
3054 __wait_for(if (new_requests_since_last_retire(dev_priv)) return,
3055 intel_engines_are_idle(dev_priv),
3056 I915_IDLE_ENGINES_TIMEOUT * 1000,
3060 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3062 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) {
3063 /* Currently busy, come back later */
3064 mod_delayed_work(dev_priv->wq,
3065 &dev_priv->gt.idle_work,
3066 msecs_to_jiffies(50));
3071 * New request retired after this work handler started, extend active
3072 * period until next instance of the work.
3074 if (new_requests_since_last_retire(dev_priv))
3077 epoch = __i915_gem_park(dev_priv);
3079 assert_kernel_context_is_current(dev_priv);
3081 rearm_hangcheck = false;
3083 mutex_unlock(&dev_priv->drm.struct_mutex);
3086 if (rearm_hangcheck) {
3087 GEM_BUG_ON(!dev_priv->gt.awake);
3088 i915_queue_hangcheck(dev_priv);
3092 * When we are idle, it is an opportune time to reap our caches.
3093 * However, we have many objects that utilise RCU and the ordered
3094 * i915->wq that this work is executing on. To try and flush any
3095 * pending frees now we are idle, we first wait for an RCU grace
3096 * period, and then queue a task (that will run last on the wq) to
3097 * shrink and re-optimize the caches.
3099 if (same_epoch(dev_priv, epoch)) {
3100 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
3102 init_rcu_head(&s->rcu);
3105 call_rcu(&s->rcu, __sleep_rcu);
3110 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
3112 struct drm_i915_private *i915 = to_i915(gem->dev);
3113 struct drm_i915_gem_object *obj = to_intel_bo(gem);
3114 struct drm_i915_file_private *fpriv = file->driver_priv;
3115 struct i915_lut_handle *lut, *ln;
3117 mutex_lock(&i915->drm.struct_mutex);
3119 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
3120 struct i915_gem_context *ctx = lut->ctx;
3121 struct i915_vma *vma;
3123 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
3124 if (ctx->file_priv != fpriv)
3127 vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
3128 GEM_BUG_ON(vma->obj != obj);
3130 /* We allow the process to have multiple handles to the same
3131 * vma, in the same fd namespace, by virtue of flink/open.
3133 GEM_BUG_ON(!vma->open_count);
3134 if (!--vma->open_count && !i915_vma_is_ggtt(vma))
3135 i915_vma_close(vma);
3137 list_del(&lut->obj_link);
3138 list_del(&lut->ctx_link);
3140 kmem_cache_free(i915->luts, lut);
3141 __i915_gem_object_release_unless_active(obj);
3144 mutex_unlock(&i915->drm.struct_mutex);
3147 static unsigned long to_wait_timeout(s64 timeout_ns)
3150 return MAX_SCHEDULE_TIMEOUT;
3152 if (timeout_ns == 0)
3155 return nsecs_to_jiffies_timeout(timeout_ns);
3159 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
3160 * @dev: drm device pointer
3161 * @data: ioctl data blob
3162 * @file: drm file pointer
3164 * Returns 0 if successful, else an error is returned with the remaining time in
3165 * the timeout parameter.
3166 * -ETIME: object is still busy after timeout
3167 * -ERESTARTSYS: signal interrupted the wait
3168 * -ENONENT: object doesn't exist
3169 * Also possible, but rare:
3170 * -EAGAIN: incomplete, restart syscall
3172 * -ENODEV: Internal IRQ fail
3173 * -E?: The add request failed
3175 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
3176 * non-zero timeout parameter the wait ioctl will wait for the given number of
3177 * nanoseconds on an object becoming unbusy. Since the wait itself does so
3178 * without holding struct_mutex the object may become re-busied before this
3179 * function completes. A similar but shorter * race condition exists in the busy
3183 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3185 struct drm_i915_gem_wait *args = data;
3186 struct drm_i915_gem_object *obj;
3190 if (args->flags != 0)
3193 obj = i915_gem_object_lookup(file, args->bo_handle);
3197 start = ktime_get();
3199 ret = i915_gem_object_wait(obj,
3200 I915_WAIT_INTERRUPTIBLE |
3201 I915_WAIT_PRIORITY |
3203 to_wait_timeout(args->timeout_ns),
3204 to_rps_client(file));
3206 if (args->timeout_ns > 0) {
3207 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
3208 if (args->timeout_ns < 0)
3209 args->timeout_ns = 0;
3212 * Apparently ktime isn't accurate enough and occasionally has a
3213 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
3214 * things up to make the test happy. We allow up to 1 jiffy.
3216 * This is a regression from the timespec->ktime conversion.
3218 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
3219 args->timeout_ns = 0;
3221 /* Asked to wait beyond the jiffie/scheduler precision? */
3222 if (ret == -ETIME && args->timeout_ns)
3226 i915_gem_object_put(obj);
3230 static long wait_for_timeline(struct i915_timeline *tl,
3231 unsigned int flags, long timeout)
3233 struct i915_request *rq;
3235 rq = i915_gem_active_get_unlocked(&tl->last_request);
3242 * Switching to the kernel context is often used a synchronous
3243 * step prior to idling, e.g. in suspend for flushing all
3244 * current operations to memory before sleeping. These we
3245 * want to complete as quickly as possible to avoid prolonged
3246 * stalls, so allow the gpu to boost to maximum clocks.
3248 if (flags & I915_WAIT_FOR_IDLE_BOOST)
3249 gen6_rps_boost(rq, NULL);
3251 timeout = i915_request_wait(rq, flags, timeout);
3252 i915_request_put(rq);
3257 static int wait_for_engines(struct drm_i915_private *i915)
3259 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
3260 dev_err(i915->drm.dev,
3261 "Failed to idle engines, declaring wedged!\n");
3263 i915_gem_set_wedged(i915);
3270 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3271 unsigned int flags, long timeout)
3273 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
3274 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
3275 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
3277 /* If the device is asleep, we have no requests outstanding */
3278 if (!READ_ONCE(i915->gt.awake))
3281 if (flags & I915_WAIT_LOCKED) {
3282 struct i915_timeline *tl;
3285 lockdep_assert_held(&i915->drm.struct_mutex);
3287 list_for_each_entry(tl, &i915->gt.timelines, link) {
3288 timeout = wait_for_timeline(tl, flags, timeout);
3292 if (GEM_SHOW_DEBUG() && !timeout) {
3293 /* Presume that timeout was non-zero to begin with! */
3294 dev_warn(&i915->drm.pdev->dev,
3295 "Missed idle-completion interrupt!\n");
3299 err = wait_for_engines(i915);
3303 i915_retire_requests(i915);
3304 GEM_BUG_ON(i915->gt.active_requests);
3306 struct intel_engine_cs *engine;
3307 enum intel_engine_id id;
3309 for_each_engine(engine, i915, id) {
3310 struct i915_timeline *tl = &engine->timeline;
3312 timeout = wait_for_timeline(tl, flags, timeout);
3321 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
3324 * We manually flush the CPU domain so that we can override and
3325 * force the flush for the display, and perform it asyncrhonously.
3327 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3328 if (obj->cache_dirty)
3329 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
3330 obj->write_domain = 0;
3333 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
3335 if (!READ_ONCE(obj->pin_global))
3338 mutex_lock(&obj->base.dev->struct_mutex);
3339 __i915_gem_object_flush_for_display(obj);
3340 mutex_unlock(&obj->base.dev->struct_mutex);
3344 * Moves a single object to the WC read, and possibly write domain.
3345 * @obj: object to act on
3346 * @write: ask for write access or read only
3348 * This function returns when the move is complete, including waiting on
3352 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
3356 lockdep_assert_held(&obj->base.dev->struct_mutex);
3358 ret = i915_gem_object_wait(obj,
3359 I915_WAIT_INTERRUPTIBLE |
3361 (write ? I915_WAIT_ALL : 0),
3362 MAX_SCHEDULE_TIMEOUT,
3367 if (obj->write_domain == I915_GEM_DOMAIN_WC)
3370 /* Flush and acquire obj->pages so that we are coherent through
3371 * direct access in memory with previous cached writes through
3372 * shmemfs and that our cache domain tracking remains valid.
3373 * For example, if the obj->filp was moved to swap without us
3374 * being notified and releasing the pages, we would mistakenly
3375 * continue to assume that the obj remained out of the CPU cached
3378 ret = i915_gem_object_pin_pages(obj);
3382 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
3384 /* Serialise direct access to this object with the barriers for
3385 * coherent writes from the GPU, by effectively invalidating the
3386 * WC domain upon first access.
3388 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
3391 /* It should now be out of any other write domains, and we can update
3392 * the domain values for our changes.
3394 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
3395 obj->read_domains |= I915_GEM_DOMAIN_WC;
3397 obj->read_domains = I915_GEM_DOMAIN_WC;
3398 obj->write_domain = I915_GEM_DOMAIN_WC;
3399 obj->mm.dirty = true;
3402 i915_gem_object_unpin_pages(obj);
3407 * Moves a single object to the GTT read, and possibly write domain.
3408 * @obj: object to act on
3409 * @write: ask for write access or read only
3411 * This function returns when the move is complete, including waiting on
3415 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3419 lockdep_assert_held(&obj->base.dev->struct_mutex);
3421 ret = i915_gem_object_wait(obj,
3422 I915_WAIT_INTERRUPTIBLE |
3424 (write ? I915_WAIT_ALL : 0),
3425 MAX_SCHEDULE_TIMEOUT,
3430 if (obj->write_domain == I915_GEM_DOMAIN_GTT)
3433 /* Flush and acquire obj->pages so that we are coherent through
3434 * direct access in memory with previous cached writes through
3435 * shmemfs and that our cache domain tracking remains valid.
3436 * For example, if the obj->filp was moved to swap without us
3437 * being notified and releasing the pages, we would mistakenly
3438 * continue to assume that the obj remained out of the CPU cached
3441 ret = i915_gem_object_pin_pages(obj);
3445 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
3447 /* Serialise direct access to this object with the barriers for
3448 * coherent writes from the GPU, by effectively invalidating the
3449 * GTT domain upon first access.
3451 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
3454 /* It should now be out of any other write domains, and we can update
3455 * the domain values for our changes.
3457 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3458 obj->read_domains |= I915_GEM_DOMAIN_GTT;
3460 obj->read_domains = I915_GEM_DOMAIN_GTT;
3461 obj->write_domain = I915_GEM_DOMAIN_GTT;
3462 obj->mm.dirty = true;
3465 i915_gem_object_unpin_pages(obj);
3470 * Changes the cache-level of an object across all VMA.
3471 * @obj: object to act on
3472 * @cache_level: new cache level to set for the object
3474 * After this function returns, the object will be in the new cache-level
3475 * across all GTT and the contents of the backing storage will be coherent,
3476 * with respect to the new cache-level. In order to keep the backing storage
3477 * coherent for all users, we only allow a single cache level to be set
3478 * globally on the object and prevent it from being changed whilst the
3479 * hardware is reading from the object. That is if the object is currently
3480 * on the scanout it will be set to uncached (or equivalent display
3481 * cache coherency) and all non-MOCS GPU access will also be uncached so
3482 * that all direct access to the scanout remains coherent.
3484 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3485 enum i915_cache_level cache_level)
3487 struct i915_vma *vma;
3490 lockdep_assert_held(&obj->base.dev->struct_mutex);
3492 if (obj->cache_level == cache_level)
3495 /* Inspect the list of currently bound VMA and unbind any that would
3496 * be invalid given the new cache-level. This is principally to
3497 * catch the issue of the CS prefetch crossing page boundaries and
3498 * reading an invalid PTE on older architectures.
3501 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3502 if (!drm_mm_node_allocated(&vma->node))
3505 if (i915_vma_is_pinned(vma)) {
3506 DRM_DEBUG("can not change the cache level of pinned objects\n");
3510 if (!i915_vma_is_closed(vma) &&
3511 i915_gem_valid_gtt_space(vma, cache_level))
3514 ret = i915_vma_unbind(vma);
3518 /* As unbinding may affect other elements in the
3519 * obj->vma_list (due to side-effects from retiring
3520 * an active vma), play safe and restart the iterator.
3525 /* We can reuse the existing drm_mm nodes but need to change the
3526 * cache-level on the PTE. We could simply unbind them all and
3527 * rebind with the correct cache-level on next use. However since
3528 * we already have a valid slot, dma mapping, pages etc, we may as
3529 * rewrite the PTE in the belief that doing so tramples upon less
3530 * state and so involves less work.
3532 if (obj->bind_count) {
3533 /* Before we change the PTE, the GPU must not be accessing it.
3534 * If we wait upon the object, we know that all the bound
3535 * VMA are no longer active.
3537 ret = i915_gem_object_wait(obj,
3538 I915_WAIT_INTERRUPTIBLE |
3541 MAX_SCHEDULE_TIMEOUT,
3546 if (!HAS_LLC(to_i915(obj->base.dev)) &&
3547 cache_level != I915_CACHE_NONE) {
3548 /* Access to snoopable pages through the GTT is
3549 * incoherent and on some machines causes a hard
3550 * lockup. Relinquish the CPU mmaping to force
3551 * userspace to refault in the pages and we can
3552 * then double check if the GTT mapping is still
3553 * valid for that pointer access.
3555 i915_gem_release_mmap(obj);
3557 /* As we no longer need a fence for GTT access,
3558 * we can relinquish it now (and so prevent having
3559 * to steal a fence from someone else on the next
3560 * fence request). Note GPU activity would have
3561 * dropped the fence as all snoopable access is
3562 * supposed to be linear.
3564 for_each_ggtt_vma(vma, obj) {
3565 ret = i915_vma_put_fence(vma);
3570 /* We either have incoherent backing store and
3571 * so no GTT access or the architecture is fully
3572 * coherent. In such cases, existing GTT mmaps
3573 * ignore the cache bit in the PTE and we can
3574 * rewrite it without confusing the GPU or having
3575 * to force userspace to fault back in its mmaps.
3579 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3580 if (!drm_mm_node_allocated(&vma->node))
3583 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3589 list_for_each_entry(vma, &obj->vma_list, obj_link)
3590 vma->node.color = cache_level;
3591 i915_gem_object_set_cache_coherency(obj, cache_level);
3592 obj->cache_dirty = true; /* Always invalidate stale cachelines */
3597 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3598 struct drm_file *file)
3600 struct drm_i915_gem_caching *args = data;
3601 struct drm_i915_gem_object *obj;
3605 obj = i915_gem_object_lookup_rcu(file, args->handle);
3611 switch (obj->cache_level) {
3612 case I915_CACHE_LLC:
3613 case I915_CACHE_L3_LLC:
3614 args->caching = I915_CACHING_CACHED;
3618 args->caching = I915_CACHING_DISPLAY;
3622 args->caching = I915_CACHING_NONE;
3630 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3631 struct drm_file *file)
3633 struct drm_i915_private *i915 = to_i915(dev);
3634 struct drm_i915_gem_caching *args = data;
3635 struct drm_i915_gem_object *obj;
3636 enum i915_cache_level level;
3639 switch (args->caching) {
3640 case I915_CACHING_NONE:
3641 level = I915_CACHE_NONE;
3643 case I915_CACHING_CACHED:
3645 * Due to a HW issue on BXT A stepping, GPU stores via a
3646 * snooped mapping may leave stale data in a corresponding CPU
3647 * cacheline, whereas normally such cachelines would get
3650 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
3653 level = I915_CACHE_LLC;
3655 case I915_CACHING_DISPLAY:
3656 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
3662 obj = i915_gem_object_lookup(file, args->handle);
3667 * The caching mode of proxy object is handled by its generator, and
3668 * not allowed to be changed by userspace.
3670 if (i915_gem_object_is_proxy(obj)) {
3675 if (obj->cache_level == level)
3678 ret = i915_gem_object_wait(obj,
3679 I915_WAIT_INTERRUPTIBLE,
3680 MAX_SCHEDULE_TIMEOUT,
3681 to_rps_client(file));
3685 ret = i915_mutex_lock_interruptible(dev);
3689 ret = i915_gem_object_set_cache_level(obj, level);
3690 mutex_unlock(&dev->struct_mutex);
3693 i915_gem_object_put(obj);
3698 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
3699 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
3700 * (for pageflips). We only flush the caches while preparing the buffer for
3701 * display, the callers are responsible for frontbuffer flush.
3704 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3706 const struct i915_ggtt_view *view,
3709 struct i915_vma *vma;
3712 lockdep_assert_held(&obj->base.dev->struct_mutex);
3714 /* Mark the global pin early so that we account for the
3715 * display coherency whilst setting up the cache domains.
3719 /* The display engine is not coherent with the LLC cache on gen6. As
3720 * a result, we make sure that the pinning that is about to occur is
3721 * done with uncached PTEs. This is lowest common denominator for all
3724 * However for gen6+, we could do better by using the GFDT bit instead
3725 * of uncaching, which would allow us to flush all the LLC-cached data
3726 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3728 ret = i915_gem_object_set_cache_level(obj,
3729 HAS_WT(to_i915(obj->base.dev)) ?
3730 I915_CACHE_WT : I915_CACHE_NONE);
3733 goto err_unpin_global;
3736 /* As the user may map the buffer once pinned in the display plane
3737 * (e.g. libkms for the bootup splash), we have to ensure that we
3738 * always use map_and_fenceable for all scanout buffers. However,
3739 * it may simply be too big to fit into mappable, in which case
3740 * put it anyway and hope that userspace can cope (but always first
3741 * try to preserve the existing ABI).
3743 vma = ERR_PTR(-ENOSPC);
3744 if ((flags & PIN_MAPPABLE) == 0 &&
3745 (!view || view->type == I915_GGTT_VIEW_NORMAL))
3746 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
3751 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
3753 goto err_unpin_global;
3755 vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
3757 __i915_gem_object_flush_for_display(obj);
3759 /* It should now be out of any other write domains, and we can update
3760 * the domain values for our changes.
3762 obj->read_domains |= I915_GEM_DOMAIN_GTT;
3772 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
3774 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
3776 if (WARN_ON(vma->obj->pin_global == 0))
3779 if (--vma->obj->pin_global == 0)
3780 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
3782 /* Bump the LRU to try and avoid premature eviction whilst flipping */
3783 i915_gem_object_bump_inactive_ggtt(vma->obj);
3785 i915_vma_unpin(vma);
3789 * Moves a single object to the CPU read, and possibly write domain.
3790 * @obj: object to act on
3791 * @write: requesting write or read-only access
3793 * This function returns when the move is complete, including waiting on
3797 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3801 lockdep_assert_held(&obj->base.dev->struct_mutex);
3803 ret = i915_gem_object_wait(obj,
3804 I915_WAIT_INTERRUPTIBLE |
3806 (write ? I915_WAIT_ALL : 0),
3807 MAX_SCHEDULE_TIMEOUT,
3812 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
3814 /* Flush the CPU cache if it's still invalid. */
3815 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3816 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
3817 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3820 /* It should now be out of any other write domains, and we can update
3821 * the domain values for our changes.
3823 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
3825 /* If we're writing through the CPU, then the GPU read domains will
3826 * need to be invalidated at next use.
3829 __start_cpu_write(obj);
3834 /* Throttle our rendering by waiting until the ring has completed our requests
3835 * emitted over 20 msec ago.
3837 * Note that if we were to use the current jiffies each time around the loop,
3838 * we wouldn't escape the function with any frames outstanding if the time to
3839 * render a frame was over 20ms.
3841 * This should get us reasonable parallelism between CPU and GPU but also
3842 * relatively low latency when blocking on a particular request to finish.
3845 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3847 struct drm_i915_private *dev_priv = to_i915(dev);
3848 struct drm_i915_file_private *file_priv = file->driver_priv;
3849 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3850 struct i915_request *request, *target = NULL;
3853 /* ABI: return -EIO if already wedged */
3854 if (i915_terminally_wedged(&dev_priv->gpu_error))
3857 spin_lock(&file_priv->mm.lock);
3858 list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
3859 if (time_after_eq(request->emitted_jiffies, recent_enough))
3863 list_del(&target->client_link);
3864 target->file_priv = NULL;
3870 i915_request_get(target);
3871 spin_unlock(&file_priv->mm.lock);
3876 ret = i915_request_wait(target,
3877 I915_WAIT_INTERRUPTIBLE,
3878 MAX_SCHEDULE_TIMEOUT);
3879 i915_request_put(target);
3881 return ret < 0 ? ret : 0;
3885 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3886 const struct i915_ggtt_view *view,
3891 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
3892 struct i915_address_space *vm = &dev_priv->ggtt.vm;
3893 struct i915_vma *vma;
3896 lockdep_assert_held(&obj->base.dev->struct_mutex);
3898 if (flags & PIN_MAPPABLE &&
3899 (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
3900 /* If the required space is larger than the available
3901 * aperture, we will not able to find a slot for the
3902 * object and unbinding the object now will be in
3903 * vain. Worse, doing so may cause us to ping-pong
3904 * the object in and out of the Global GTT and
3905 * waste a lot of cycles under the mutex.
3907 if (obj->base.size > dev_priv->ggtt.mappable_end)
3908 return ERR_PTR(-E2BIG);
3910 /* If NONBLOCK is set the caller is optimistically
3911 * trying to cache the full object within the mappable
3912 * aperture, and *must* have a fallback in place for
3913 * situations where we cannot bind the object. We
3914 * can be a little more lax here and use the fallback
3915 * more often to avoid costly migrations of ourselves
3916 * and other objects within the aperture.
3918 * Half-the-aperture is used as a simple heuristic.
3919 * More interesting would to do search for a free
3920 * block prior to making the commitment to unbind.
3921 * That caters for the self-harm case, and with a
3922 * little more heuristics (e.g. NOFAULT, NOEVICT)
3923 * we could try to minimise harm to others.
3925 if (flags & PIN_NONBLOCK &&
3926 obj->base.size > dev_priv->ggtt.mappable_end / 2)
3927 return ERR_PTR(-ENOSPC);
3930 vma = i915_vma_instance(obj, vm, view);
3931 if (unlikely(IS_ERR(vma)))
3934 if (i915_vma_misplaced(vma, size, alignment, flags)) {
3935 if (flags & PIN_NONBLOCK) {
3936 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
3937 return ERR_PTR(-ENOSPC);
3939 if (flags & PIN_MAPPABLE &&
3940 vma->fence_size > dev_priv->ggtt.mappable_end / 2)
3941 return ERR_PTR(-ENOSPC);
3944 WARN(i915_vma_is_pinned(vma),
3945 "bo is already pinned in ggtt with incorrect alignment:"
3946 " offset=%08x, req.alignment=%llx,"
3947 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
3948 i915_ggtt_offset(vma), alignment,
3949 !!(flags & PIN_MAPPABLE),
3950 i915_vma_is_map_and_fenceable(vma));
3951 ret = i915_vma_unbind(vma);
3953 return ERR_PTR(ret);
3956 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
3958 return ERR_PTR(ret);
3963 static __always_inline unsigned int __busy_read_flag(unsigned int id)
3965 /* Note that we could alias engines in the execbuf API, but
3966 * that would be very unwise as it prevents userspace from
3967 * fine control over engine selection. Ahem.
3969 * This should be something like EXEC_MAX_ENGINE instead of
3972 BUILD_BUG_ON(I915_NUM_ENGINES > 16);
3973 return 0x10000 << id;
3976 static __always_inline unsigned int __busy_write_id(unsigned int id)
3978 /* The uABI guarantees an active writer is also amongst the read
3979 * engines. This would be true if we accessed the activity tracking
3980 * under the lock, but as we perform the lookup of the object and
3981 * its activity locklessly we can not guarantee that the last_write
3982 * being active implies that we have set the same engine flag from
3983 * last_read - hence we always set both read and write busy for
3986 return id | __busy_read_flag(id);
3989 static __always_inline unsigned int
3990 __busy_set_if_active(const struct dma_fence *fence,
3991 unsigned int (*flag)(unsigned int id))
3993 struct i915_request *rq;
3995 /* We have to check the current hw status of the fence as the uABI
3996 * guarantees forward progress. We could rely on the idle worker
3997 * to eventually flush us, but to minimise latency just ask the
4000 * Note we only report on the status of native fences.
4002 if (!dma_fence_is_i915(fence))
4005 /* opencode to_request() in order to avoid const warnings */
4006 rq = container_of(fence, struct i915_request, fence);
4007 if (i915_request_completed(rq))
4010 return flag(rq->engine->uabi_id);
4013 static __always_inline unsigned int
4014 busy_check_reader(const struct dma_fence *fence)
4016 return __busy_set_if_active(fence, __busy_read_flag);
4019 static __always_inline unsigned int
4020 busy_check_writer(const struct dma_fence *fence)
4025 return __busy_set_if_active(fence, __busy_write_id);
4029 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4030 struct drm_file *file)
4032 struct drm_i915_gem_busy *args = data;
4033 struct drm_i915_gem_object *obj;
4034 struct reservation_object_list *list;
4040 obj = i915_gem_object_lookup_rcu(file, args->handle);
4044 /* A discrepancy here is that we do not report the status of
4045 * non-i915 fences, i.e. even though we may report the object as idle,
4046 * a call to set-domain may still stall waiting for foreign rendering.
4047 * This also means that wait-ioctl may report an object as busy,
4048 * where busy-ioctl considers it idle.
4050 * We trade the ability to warn of foreign fences to report on which
4051 * i915 engines are active for the object.
4053 * Alternatively, we can trade that extra information on read/write
4056 * !reservation_object_test_signaled_rcu(obj->resv, true);
4057 * to report the overall busyness. This is what the wait-ioctl does.
4061 seq = raw_read_seqcount(&obj->resv->seq);
4063 /* Translate the exclusive fence to the READ *and* WRITE engine */
4064 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
4066 /* Translate shared fences to READ set of engines */
4067 list = rcu_dereference(obj->resv->fence);
4069 unsigned int shared_count = list->shared_count, i;
4071 for (i = 0; i < shared_count; ++i) {
4072 struct dma_fence *fence =
4073 rcu_dereference(list->shared[i]);
4075 args->busy |= busy_check_reader(fence);
4079 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
4089 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4090 struct drm_file *file_priv)
4092 return i915_gem_ring_throttle(dev, file_priv);
4096 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4097 struct drm_file *file_priv)
4099 struct drm_i915_private *dev_priv = to_i915(dev);
4100 struct drm_i915_gem_madvise *args = data;
4101 struct drm_i915_gem_object *obj;
4104 switch (args->madv) {
4105 case I915_MADV_DONTNEED:
4106 case I915_MADV_WILLNEED:
4112 obj = i915_gem_object_lookup(file_priv, args->handle);
4116 err = mutex_lock_interruptible(&obj->mm.lock);
4120 if (i915_gem_object_has_pages(obj) &&
4121 i915_gem_object_is_tiled(obj) &&
4122 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4123 if (obj->mm.madv == I915_MADV_WILLNEED) {
4124 GEM_BUG_ON(!obj->mm.quirked);
4125 __i915_gem_object_unpin_pages(obj);
4126 obj->mm.quirked = false;
4128 if (args->madv == I915_MADV_WILLNEED) {
4129 GEM_BUG_ON(obj->mm.quirked);
4130 __i915_gem_object_pin_pages(obj);
4131 obj->mm.quirked = true;
4135 if (obj->mm.madv != __I915_MADV_PURGED)
4136 obj->mm.madv = args->madv;
4138 /* if the object is no longer attached, discard its backing storage */
4139 if (obj->mm.madv == I915_MADV_DONTNEED &&
4140 !i915_gem_object_has_pages(obj))
4141 i915_gem_object_truncate(obj);
4143 args->retained = obj->mm.madv != __I915_MADV_PURGED;
4144 mutex_unlock(&obj->mm.lock);
4147 i915_gem_object_put(obj);
4152 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request)
4154 struct drm_i915_gem_object *obj =
4155 container_of(active, typeof(*obj), frontbuffer_write);
4157 intel_fb_obj_flush(obj, ORIGIN_CS);
4160 void i915_gem_object_init(struct drm_i915_gem_object *obj,
4161 const struct drm_i915_gem_object_ops *ops)
4163 mutex_init(&obj->mm.lock);
4165 INIT_LIST_HEAD(&obj->vma_list);
4166 INIT_LIST_HEAD(&obj->lut_list);
4167 INIT_LIST_HEAD(&obj->batch_pool_link);
4169 init_rcu_head(&obj->rcu);
4173 reservation_object_init(&obj->__builtin_resv);
4174 obj->resv = &obj->__builtin_resv;
4176 obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
4177 init_request_active(&obj->frontbuffer_write, frontbuffer_retire);
4179 obj->mm.madv = I915_MADV_WILLNEED;
4180 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
4181 mutex_init(&obj->mm.get_page.lock);
4183 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
4186 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4187 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
4188 I915_GEM_OBJECT_IS_SHRINKABLE,
4190 .get_pages = i915_gem_object_get_pages_gtt,
4191 .put_pages = i915_gem_object_put_pages_gtt,
4193 .pwrite = i915_gem_object_pwrite_gtt,
4196 static int i915_gem_object_create_shmem(struct drm_device *dev,
4197 struct drm_gem_object *obj,
4200 struct drm_i915_private *i915 = to_i915(dev);
4201 unsigned long flags = VM_NORESERVE;
4204 drm_gem_private_object_init(dev, obj, size);
4207 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
4210 filp = shmem_file_setup("i915", size, flags);
4213 return PTR_ERR(filp);
4220 struct drm_i915_gem_object *
4221 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
4223 struct drm_i915_gem_object *obj;
4224 struct address_space *mapping;
4225 unsigned int cache_level;
4229 /* There is a prevalence of the assumption that we fit the object's
4230 * page count inside a 32bit _signed_ variable. Let's document this and
4231 * catch if we ever need to fix it. In the meantime, if you do spot
4232 * such a local variable, please consider fixing!
4234 if (size >> PAGE_SHIFT > INT_MAX)
4235 return ERR_PTR(-E2BIG);
4237 if (overflows_type(size, obj->base.size))
4238 return ERR_PTR(-E2BIG);
4240 obj = i915_gem_object_alloc(dev_priv);
4242 return ERR_PTR(-ENOMEM);
4244 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
4248 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
4249 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
4250 /* 965gm cannot relocate objects above 4GiB. */
4251 mask &= ~__GFP_HIGHMEM;
4252 mask |= __GFP_DMA32;
4255 mapping = obj->base.filp->f_mapping;
4256 mapping_set_gfp_mask(mapping, mask);
4257 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
4259 i915_gem_object_init(obj, &i915_gem_object_ops);
4261 obj->write_domain = I915_GEM_DOMAIN_CPU;
4262 obj->read_domains = I915_GEM_DOMAIN_CPU;
4264 if (HAS_LLC(dev_priv))
4265 /* On some devices, we can have the GPU use the LLC (the CPU
4266 * cache) for about a 10% performance improvement
4267 * compared to uncached. Graphics requests other than
4268 * display scanout are coherent with the CPU in
4269 * accessing this cache. This means in this mode we
4270 * don't need to clflush on the CPU side, and on the
4271 * GPU side we only need to flush internal caches to
4272 * get data visible to the CPU.
4274 * However, we maintain the display planes as UC, and so
4275 * need to rebind when first used as such.
4277 cache_level = I915_CACHE_LLC;
4279 cache_level = I915_CACHE_NONE;
4281 i915_gem_object_set_cache_coherency(obj, cache_level);
4283 trace_i915_gem_object_create(obj);
4288 i915_gem_object_free(obj);
4289 return ERR_PTR(ret);
4292 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4294 /* If we are the last user of the backing storage (be it shmemfs
4295 * pages or stolen etc), we know that the pages are going to be
4296 * immediately released. In this case, we can then skip copying
4297 * back the contents from the GPU.
4300 if (obj->mm.madv != I915_MADV_WILLNEED)
4303 if (obj->base.filp == NULL)
4306 /* At first glance, this looks racy, but then again so would be
4307 * userspace racing mmap against close. However, the first external
4308 * reference to the filp can only be obtained through the
4309 * i915_gem_mmap_ioctl() which safeguards us against the user
4310 * acquiring such a reference whilst we are in the middle of
4311 * freeing the object.
4313 return atomic_long_read(&obj->base.filp->f_count) == 1;
4316 static void __i915_gem_free_objects(struct drm_i915_private *i915,
4317 struct llist_node *freed)
4319 struct drm_i915_gem_object *obj, *on;
4320 intel_wakeref_t wakeref;
4322 wakeref = intel_runtime_pm_get(i915);
4323 llist_for_each_entry_safe(obj, on, freed, freed) {
4324 struct i915_vma *vma, *vn;
4326 trace_i915_gem_object_destroy(obj);
4328 mutex_lock(&i915->drm.struct_mutex);
4330 GEM_BUG_ON(i915_gem_object_is_active(obj));
4331 list_for_each_entry_safe(vma, vn,
4332 &obj->vma_list, obj_link) {
4333 GEM_BUG_ON(i915_vma_is_active(vma));
4334 vma->flags &= ~I915_VMA_PIN_MASK;
4335 i915_vma_destroy(vma);
4337 GEM_BUG_ON(!list_empty(&obj->vma_list));
4338 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
4340 /* This serializes freeing with the shrinker. Since the free
4341 * is delayed, first by RCU then by the workqueue, we want the
4342 * shrinker to be able to free pages of unreferenced objects,
4343 * or else we may oom whilst there are plenty of deferred
4346 if (i915_gem_object_has_pages(obj)) {
4347 spin_lock(&i915->mm.obj_lock);
4348 list_del_init(&obj->mm.link);
4349 spin_unlock(&i915->mm.obj_lock);
4352 mutex_unlock(&i915->drm.struct_mutex);
4354 GEM_BUG_ON(obj->bind_count);
4355 GEM_BUG_ON(obj->userfault_count);
4356 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
4357 GEM_BUG_ON(!list_empty(&obj->lut_list));
4359 if (obj->ops->release)
4360 obj->ops->release(obj);
4362 if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
4363 atomic_set(&obj->mm.pages_pin_count, 0);
4364 __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
4365 GEM_BUG_ON(i915_gem_object_has_pages(obj));
4367 if (obj->base.import_attach)
4368 drm_prime_gem_destroy(&obj->base, NULL);
4370 reservation_object_fini(&obj->__builtin_resv);
4371 drm_gem_object_release(&obj->base);
4372 i915_gem_info_remove_obj(i915, obj->base.size);
4375 i915_gem_object_free(obj);
4377 GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
4378 atomic_dec(&i915->mm.free_count);
4383 intel_runtime_pm_put(i915, wakeref);
4386 static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
4388 struct llist_node *freed;
4390 /* Free the oldest, most stale object to keep the free_list short */
4392 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
4393 /* Only one consumer of llist_del_first() allowed */
4394 spin_lock(&i915->mm.free_lock);
4395 freed = llist_del_first(&i915->mm.free_list);
4396 spin_unlock(&i915->mm.free_lock);
4398 if (unlikely(freed)) {
4400 __i915_gem_free_objects(i915, freed);
4404 static void __i915_gem_free_work(struct work_struct *work)
4406 struct drm_i915_private *i915 =
4407 container_of(work, struct drm_i915_private, mm.free_work);
4408 struct llist_node *freed;
4411 * All file-owned VMA should have been released by this point through
4412 * i915_gem_close_object(), or earlier by i915_gem_context_close().
4413 * However, the object may also be bound into the global GTT (e.g.
4414 * older GPUs without per-process support, or for direct access through
4415 * the GTT either for the user or for scanout). Those VMA still need to
4419 spin_lock(&i915->mm.free_lock);
4420 while ((freed = llist_del_all(&i915->mm.free_list))) {
4421 spin_unlock(&i915->mm.free_lock);
4423 __i915_gem_free_objects(i915, freed);
4427 spin_lock(&i915->mm.free_lock);
4429 spin_unlock(&i915->mm.free_lock);
4432 static void __i915_gem_free_object_rcu(struct rcu_head *head)
4434 struct drm_i915_gem_object *obj =
4435 container_of(head, typeof(*obj), rcu);
4436 struct drm_i915_private *i915 = to_i915(obj->base.dev);
4439 * We reuse obj->rcu for the freed list, so we had better not treat
4440 * it like a rcu_head from this point forwards. And we expect all
4441 * objects to be freed via this path.
4443 destroy_rcu_head(&obj->rcu);
4446 * Since we require blocking on struct_mutex to unbind the freed
4447 * object from the GPU before releasing resources back to the
4448 * system, we can not do that directly from the RCU callback (which may
4449 * be a softirq context), but must instead then defer that work onto a
4450 * kthread. We use the RCU callback rather than move the freed object
4451 * directly onto the work queue so that we can mix between using the
4452 * worker and performing frees directly from subsequent allocations for
4453 * crude but effective memory throttling.
4455 if (llist_add(&obj->freed, &i915->mm.free_list))
4456 queue_work(i915->wq, &i915->mm.free_work);
4459 void i915_gem_free_object(struct drm_gem_object *gem_obj)
4461 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4463 if (obj->mm.quirked)
4464 __i915_gem_object_unpin_pages(obj);
4466 if (discard_backing_storage(obj))
4467 obj->mm.madv = I915_MADV_DONTNEED;
4470 * Before we free the object, make sure any pure RCU-only
4471 * read-side critical sections are complete, e.g.
4472 * i915_gem_busy_ioctl(). For the corresponding synchronized
4473 * lookup see i915_gem_object_lookup_rcu().
4475 atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
4476 call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
4479 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
4481 lockdep_assert_held(&obj->base.dev->struct_mutex);
4483 if (!i915_gem_object_has_active_reference(obj) &&
4484 i915_gem_object_is_active(obj))
4485 i915_gem_object_set_active_reference(obj);
4487 i915_gem_object_put(obj);
4490 void i915_gem_sanitize(struct drm_i915_private *i915)
4492 intel_wakeref_t wakeref;
4496 mutex_lock(&i915->drm.struct_mutex);
4498 wakeref = intel_runtime_pm_get(i915);
4499 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
4502 * As we have just resumed the machine and woken the device up from
4503 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
4504 * back to defaults, recovering from whatever wedged state we left it
4505 * in and so worth trying to use the device once more.
4507 if (i915_terminally_wedged(&i915->gpu_error))
4508 i915_gem_unset_wedged(i915);
4511 * If we inherit context state from the BIOS or earlier occupants
4512 * of the GPU, the GPU may be in an inconsistent state when we
4513 * try to take over. The only way to remove the earlier state
4514 * is by resetting. However, resetting on earlier gen is tricky as
4515 * it may impact the display and we are uncertain about the stability
4516 * of the reset, so this could be applied to even earlier gen.
4518 intel_engines_sanitize(i915, false);
4520 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
4521 intel_runtime_pm_put(i915, wakeref);
4523 i915_gem_contexts_lost(i915);
4524 mutex_unlock(&i915->drm.struct_mutex);
4527 int i915_gem_suspend(struct drm_i915_private *i915)
4529 intel_wakeref_t wakeref;
4534 wakeref = intel_runtime_pm_get(i915);
4535 intel_suspend_gt_powersave(i915);
4537 mutex_lock(&i915->drm.struct_mutex);
4540 * We have to flush all the executing contexts to main memory so
4541 * that they can saved in the hibernation image. To ensure the last
4542 * context image is coherent, we have to switch away from it. That
4543 * leaves the i915->kernel_context still active when
4544 * we actually suspend, and its image in memory may not match the GPU
4545 * state. Fortunately, the kernel_context is disposable and we do
4546 * not rely on its state.
4548 if (!i915_terminally_wedged(&i915->gpu_error)) {
4549 ret = i915_gem_switch_to_kernel_context(i915);
4553 ret = i915_gem_wait_for_idle(i915,
4554 I915_WAIT_INTERRUPTIBLE |
4556 I915_WAIT_FOR_IDLE_BOOST,
4557 MAX_SCHEDULE_TIMEOUT);
4558 if (ret && ret != -EIO)
4561 assert_kernel_context_is_current(i915);
4563 i915_retire_requests(i915); /* ensure we flush after wedging */
4565 mutex_unlock(&i915->drm.struct_mutex);
4567 intel_uc_suspend(i915);
4569 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
4570 cancel_delayed_work_sync(&i915->gt.retire_work);
4573 * As the idle_work is rearming if it detects a race, play safe and
4574 * repeat the flush until it is definitely idle.
4576 drain_delayed_work(&i915->gt.idle_work);
4579 * Assert that we successfully flushed all the work and
4580 * reset the GPU back to its idle, low power state.
4582 WARN_ON(i915->gt.awake);
4583 if (WARN_ON(!intel_engines_are_idle(i915)))
4584 i915_gem_set_wedged(i915); /* no hope, discard everything */
4586 intel_runtime_pm_put(i915, wakeref);
4590 mutex_unlock(&i915->drm.struct_mutex);
4591 intel_runtime_pm_put(i915, wakeref);
4595 void i915_gem_suspend_late(struct drm_i915_private *i915)
4597 struct drm_i915_gem_object *obj;
4598 struct list_head *phases[] = {
4599 &i915->mm.unbound_list,
4600 &i915->mm.bound_list,
4605 * Neither the BIOS, ourselves or any other kernel
4606 * expects the system to be in execlists mode on startup,
4607 * so we need to reset the GPU back to legacy mode. And the only
4608 * known way to disable logical contexts is through a GPU reset.
4610 * So in order to leave the system in a known default configuration,
4611 * always reset the GPU upon unload and suspend. Afterwards we then
4612 * clean up the GEM state tracking, flushing off the requests and
4613 * leaving the system in a known idle state.
4615 * Note that is of the upmost importance that the GPU is idle and
4616 * all stray writes are flushed *before* we dismantle the backing
4617 * storage for the pinned objects.
4619 * However, since we are uncertain that resetting the GPU on older
4620 * machines is a good idea, we don't - just in case it leaves the
4621 * machine in an unusable condition.
4624 mutex_lock(&i915->drm.struct_mutex);
4625 for (phase = phases; *phase; phase++) {
4626 list_for_each_entry(obj, *phase, mm.link)
4627 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
4629 mutex_unlock(&i915->drm.struct_mutex);
4631 intel_uc_sanitize(i915);
4632 i915_gem_sanitize(i915);
4635 void i915_gem_resume(struct drm_i915_private *i915)
4639 WARN_ON(i915->gt.awake);
4641 mutex_lock(&i915->drm.struct_mutex);
4642 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
4644 i915_gem_restore_gtt_mappings(i915);
4645 i915_gem_restore_fences(i915);
4648 * As we didn't flush the kernel context before suspend, we cannot
4649 * guarantee that the context image is complete. So let's just reset
4650 * it and start again.
4652 i915->gt.resume(i915);
4654 if (i915_gem_init_hw(i915))
4657 intel_uc_resume(i915);
4659 /* Always reload a context for powersaving. */
4660 if (i915_gem_switch_to_kernel_context(i915))
4664 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
4665 mutex_unlock(&i915->drm.struct_mutex);
4669 if (!i915_terminally_wedged(&i915->gpu_error)) {
4670 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
4671 i915_gem_set_wedged(i915);
4676 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
4678 if (INTEL_GEN(dev_priv) < 5 ||
4679 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4682 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4683 DISP_TILE_SURFACE_SWIZZLING);
4685 if (IS_GEN(dev_priv, 5))
4688 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4689 if (IS_GEN(dev_priv, 6))
4690 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
4691 else if (IS_GEN(dev_priv, 7))
4692 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4693 else if (IS_GEN(dev_priv, 8))
4694 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
4699 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
4701 I915_WRITE(RING_CTL(base), 0);
4702 I915_WRITE(RING_HEAD(base), 0);
4703 I915_WRITE(RING_TAIL(base), 0);
4704 I915_WRITE(RING_START(base), 0);
4707 static void init_unused_rings(struct drm_i915_private *dev_priv)
4709 if (IS_I830(dev_priv)) {
4710 init_unused_ring(dev_priv, PRB1_BASE);
4711 init_unused_ring(dev_priv, SRB0_BASE);
4712 init_unused_ring(dev_priv, SRB1_BASE);
4713 init_unused_ring(dev_priv, SRB2_BASE);
4714 init_unused_ring(dev_priv, SRB3_BASE);
4715 } else if (IS_GEN(dev_priv, 2)) {
4716 init_unused_ring(dev_priv, SRB0_BASE);
4717 init_unused_ring(dev_priv, SRB1_BASE);
4718 } else if (IS_GEN(dev_priv, 3)) {
4719 init_unused_ring(dev_priv, PRB1_BASE);
4720 init_unused_ring(dev_priv, PRB2_BASE);
4724 static int __i915_gem_restart_engines(void *data)
4726 struct drm_i915_private *i915 = data;
4727 struct intel_engine_cs *engine;
4728 enum intel_engine_id id;
4731 for_each_engine(engine, i915, id) {
4732 err = engine->init_hw(engine);
4734 DRM_ERROR("Failed to restart %s (%d)\n",
4743 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
4747 dev_priv->gt.last_init_time = ktime_get();
4749 /* Double layer security blanket, see i915_gem_init() */
4750 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4752 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
4753 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4755 if (IS_HASWELL(dev_priv))
4756 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
4757 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4759 /* Apply the GT workarounds... */
4760 intel_gt_apply_workarounds(dev_priv);
4761 /* ...and determine whether they are sticking. */
4762 intel_gt_verify_workarounds(dev_priv, "init");
4764 i915_gem_init_swizzling(dev_priv);
4767 * At least 830 can leave some of the unused rings
4768 * "active" (ie. head != tail) after resume which
4769 * will prevent c3 entry. Makes sure all unused rings
4772 init_unused_rings(dev_priv);
4774 BUG_ON(!dev_priv->kernel_context);
4775 if (i915_terminally_wedged(&dev_priv->gpu_error)) {
4780 ret = i915_ppgtt_init_hw(dev_priv);
4782 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
4786 ret = intel_wopcm_init_hw(&dev_priv->wopcm);
4788 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
4792 /* We can't enable contexts until all firmware is loaded */
4793 ret = intel_uc_init_hw(dev_priv);
4795 DRM_ERROR("Enabling uc failed (%d)\n", ret);
4799 intel_mocs_init_l3cc_table(dev_priv);
4801 /* Only when the HW is re-initialised, can we replay the requests */
4802 ret = __i915_gem_restart_engines(dev_priv);
4806 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4811 intel_uc_fini_hw(dev_priv);
4813 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4818 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4820 struct i915_gem_context *ctx;
4821 struct intel_engine_cs *engine;
4822 enum intel_engine_id id;
4826 * As we reset the gpu during very early sanitisation, the current
4827 * register state on the GPU should reflect its defaults values.
4828 * We load a context onto the hw (with restore-inhibit), then switch
4829 * over to a second context to save that default register state. We
4830 * can then prime every new context with that state so they all start
4831 * from the same default HW values.
4834 ctx = i915_gem_context_create_kernel(i915, 0);
4836 return PTR_ERR(ctx);
4838 for_each_engine(engine, i915, id) {
4839 struct i915_request *rq;
4841 rq = i915_request_alloc(engine, ctx);
4848 if (engine->init_context)
4849 err = engine->init_context(rq);
4851 i915_request_add(rq);
4856 err = i915_gem_switch_to_kernel_context(i915);
4860 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) {
4861 i915_gem_set_wedged(i915);
4862 err = -EIO; /* Caller will declare us wedged */
4866 assert_kernel_context_is_current(i915);
4869 * Immediately park the GPU so that we enable powersaving and
4870 * treat it as idle. The next time we issue a request, we will
4871 * unpark and start using the engine->pinned_default_state, otherwise
4872 * it is in limbo and an early reset may fail.
4874 __i915_gem_park(i915);
4876 for_each_engine(engine, i915, id) {
4877 struct i915_vma *state;
4880 GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
4882 state = to_intel_context(ctx, engine)->state;
4887 * As we will hold a reference to the logical state, it will
4888 * not be torn down with the context, and importantly the
4889 * object will hold onto its vma (making it possible for a
4890 * stray GTT write to corrupt our defaults). Unmap the vma
4891 * from the GTT to prevent such accidents and reclaim the
4894 err = i915_vma_unbind(state);
4898 err = i915_gem_object_set_to_cpu_domain(state->obj, false);
4902 engine->default_state = i915_gem_object_get(state->obj);
4904 /* Check we can acquire the image of the context state */
4905 vaddr = i915_gem_object_pin_map(engine->default_state,
4907 if (IS_ERR(vaddr)) {
4908 err = PTR_ERR(vaddr);
4912 i915_gem_object_unpin_map(engine->default_state);
4915 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
4916 unsigned int found = intel_engines_has_context_isolation(i915);
4919 * Make sure that classes with multiple engine instances all
4920 * share the same basic configuration.
4922 for_each_engine(engine, i915, id) {
4923 unsigned int bit = BIT(engine->uabi_class);
4924 unsigned int expected = engine->default_state ? bit : 0;
4926 if ((found & bit) != expected) {
4927 DRM_ERROR("mismatching default context state for class %d on engine %s\n",
4928 engine->uabi_class, engine->name);
4934 i915_gem_context_set_closed(ctx);
4935 i915_gem_context_put(ctx);
4940 * If we have to abandon now, we expect the engines to be idle
4941 * and ready to be torn-down. First try to flush any remaining
4942 * request, ensure we are pointing at the kernel context and
4945 if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
4948 if (WARN_ON(i915_gem_wait_for_idle(i915,
4950 MAX_SCHEDULE_TIMEOUT)))
4953 i915_gem_contexts_lost(i915);
4958 i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
4960 struct drm_i915_gem_object *obj;
4961 struct i915_vma *vma;
4964 obj = i915_gem_object_create_stolen(i915, size);
4966 obj = i915_gem_object_create_internal(i915, size);
4968 DRM_ERROR("Failed to allocate scratch page\n");
4969 return PTR_ERR(obj);
4972 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
4978 ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
4982 i915->gt.scratch = vma;
4986 i915_gem_object_put(obj);
4990 static void i915_gem_fini_scratch(struct drm_i915_private *i915)
4992 i915_vma_unpin_and_release(&i915->gt.scratch, 0);
4995 int i915_gem_init(struct drm_i915_private *dev_priv)
4999 /* We need to fallback to 4K pages if host doesn't support huge gtt. */
5000 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
5001 mkwrite_device_info(dev_priv)->page_sizes =
5002 I915_GTT_PAGE_SIZE_4K;
5004 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
5006 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
5007 dev_priv->gt.resume = intel_lr_context_resume;
5008 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
5010 dev_priv->gt.resume = intel_legacy_submission_resume;
5011 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
5014 ret = i915_gem_init_userptr(dev_priv);
5018 ret = intel_uc_init_misc(dev_priv);
5022 ret = intel_wopcm_init(&dev_priv->wopcm);
5026 /* This is just a security blanket to placate dragons.
5027 * On some systems, we very sporadically observe that the first TLBs
5028 * used by the CS may be stale, despite us poking the TLB reset. If
5029 * we hold the forcewake during initialisation these problems
5030 * just magically go away.
5032 mutex_lock(&dev_priv->drm.struct_mutex);
5033 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5035 ret = i915_gem_init_ggtt(dev_priv);
5037 GEM_BUG_ON(ret == -EIO);
5041 ret = i915_gem_init_scratch(dev_priv,
5042 IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
5044 GEM_BUG_ON(ret == -EIO);
5048 ret = i915_gem_contexts_init(dev_priv);
5050 GEM_BUG_ON(ret == -EIO);
5054 ret = intel_engines_init(dev_priv);
5056 GEM_BUG_ON(ret == -EIO);
5060 intel_init_gt_powersave(dev_priv);
5062 ret = intel_uc_init(dev_priv);
5066 ret = i915_gem_init_hw(dev_priv);
5071 * Despite its name intel_init_clock_gating applies both display
5072 * clock gating workarounds; GT mmio workarounds and the occasional
5073 * GT power context workaround. Worse, sometimes it includes a context
5074 * register workaround which we need to apply before we record the
5075 * default HW state for all contexts.
5077 * FIXME: break up the workarounds and apply them at the right time!
5079 intel_init_clock_gating(dev_priv);
5081 ret = __intel_engines_record_defaults(dev_priv);
5085 if (i915_inject_load_failure()) {
5090 if (i915_inject_load_failure()) {
5095 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5096 mutex_unlock(&dev_priv->drm.struct_mutex);
5101 * Unwinding is complicated by that we want to handle -EIO to mean
5102 * disable GPU submission but keep KMS alive. We want to mark the
5103 * HW as irrevisibly wedged, but keep enough state around that the
5104 * driver doesn't explode during runtime.
5107 mutex_unlock(&dev_priv->drm.struct_mutex);
5109 WARN_ON(i915_gem_suspend(dev_priv));
5110 i915_gem_suspend_late(dev_priv);
5112 i915_gem_drain_workqueue(dev_priv);
5114 mutex_lock(&dev_priv->drm.struct_mutex);
5115 intel_uc_fini_hw(dev_priv);
5117 intel_uc_fini(dev_priv);
5120 intel_cleanup_gt_powersave(dev_priv);
5121 i915_gem_cleanup_engines(dev_priv);
5125 i915_gem_contexts_fini(dev_priv);
5127 i915_gem_fini_scratch(dev_priv);
5130 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5131 mutex_unlock(&dev_priv->drm.struct_mutex);
5134 intel_uc_fini_misc(dev_priv);
5137 i915_gem_cleanup_userptr(dev_priv);
5140 mutex_lock(&dev_priv->drm.struct_mutex);
5143 * Allow engine initialisation to fail by marking the GPU as
5144 * wedged. But we only want to do this where the GPU is angry,
5145 * for all other failure, such as an allocation failure, bail.
5147 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
5148 i915_load_error(dev_priv,
5149 "Failed to initialize GPU, declaring it wedged!\n");
5150 i915_gem_set_wedged(dev_priv);
5153 /* Minimal basic recovery for KMS */
5154 ret = i915_ggtt_enable_hw(dev_priv);
5155 i915_gem_restore_gtt_mappings(dev_priv);
5156 i915_gem_restore_fences(dev_priv);
5157 intel_init_clock_gating(dev_priv);
5159 mutex_unlock(&dev_priv->drm.struct_mutex);
5162 i915_gem_drain_freed_objects(dev_priv);
5166 void i915_gem_fini(struct drm_i915_private *dev_priv)
5168 i915_gem_suspend_late(dev_priv);
5169 intel_disable_gt_powersave(dev_priv);
5171 /* Flush any outstanding unpin_work. */
5172 i915_gem_drain_workqueue(dev_priv);
5174 mutex_lock(&dev_priv->drm.struct_mutex);
5175 intel_uc_fini_hw(dev_priv);
5176 intel_uc_fini(dev_priv);
5177 i915_gem_cleanup_engines(dev_priv);
5178 i915_gem_contexts_fini(dev_priv);
5179 i915_gem_fini_scratch(dev_priv);
5180 mutex_unlock(&dev_priv->drm.struct_mutex);
5182 intel_wa_list_free(&dev_priv->gt_wa_list);
5184 intel_cleanup_gt_powersave(dev_priv);
5186 intel_uc_fini_misc(dev_priv);
5187 i915_gem_cleanup_userptr(dev_priv);
5189 i915_gem_drain_freed_objects(dev_priv);
5191 WARN_ON(!list_empty(&dev_priv->contexts.list));
5194 void i915_gem_init_mmio(struct drm_i915_private *i915)
5196 i915_gem_sanitize(i915);
5200 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
5202 struct intel_engine_cs *engine;
5203 enum intel_engine_id id;
5205 for_each_engine(engine, dev_priv, id)
5206 dev_priv->gt.cleanup_engine(engine);
5210 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
5214 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
5215 !IS_CHERRYVIEW(dev_priv))
5216 dev_priv->num_fence_regs = 32;
5217 else if (INTEL_GEN(dev_priv) >= 4 ||
5218 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
5219 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
5220 dev_priv->num_fence_regs = 16;
5222 dev_priv->num_fence_regs = 8;
5224 if (intel_vgpu_active(dev_priv))
5225 dev_priv->num_fence_regs =
5226 I915_READ(vgtif_reg(avail_rs.fence_num));
5228 /* Initialize fence registers to zero */
5229 for (i = 0; i < dev_priv->num_fence_regs; i++) {
5230 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
5232 fence->i915 = dev_priv;
5234 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
5236 i915_gem_restore_fences(dev_priv);
5238 i915_gem_detect_bit_6_swizzle(dev_priv);
5241 static void i915_gem_init__mm(struct drm_i915_private *i915)
5243 spin_lock_init(&i915->mm.object_stat_lock);
5244 spin_lock_init(&i915->mm.obj_lock);
5245 spin_lock_init(&i915->mm.free_lock);
5247 init_llist_head(&i915->mm.free_list);
5249 INIT_LIST_HEAD(&i915->mm.unbound_list);
5250 INIT_LIST_HEAD(&i915->mm.bound_list);
5251 INIT_LIST_HEAD(&i915->mm.fence_list);
5252 INIT_LIST_HEAD(&i915->mm.userfault_list);
5254 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
5257 int i915_gem_init_early(struct drm_i915_private *dev_priv)
5261 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
5262 if (!dev_priv->objects)
5265 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN);
5266 if (!dev_priv->vmas)
5269 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
5270 if (!dev_priv->luts)
5273 dev_priv->requests = KMEM_CACHE(i915_request,
5274 SLAB_HWCACHE_ALIGN |
5275 SLAB_RECLAIM_ACCOUNT |
5276 SLAB_TYPESAFE_BY_RCU);
5277 if (!dev_priv->requests)
5280 dev_priv->dependencies = KMEM_CACHE(i915_dependency,
5281 SLAB_HWCACHE_ALIGN |
5282 SLAB_RECLAIM_ACCOUNT);
5283 if (!dev_priv->dependencies)
5286 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
5287 if (!dev_priv->priorities)
5288 goto err_dependencies;
5290 INIT_LIST_HEAD(&dev_priv->gt.timelines);
5291 INIT_LIST_HEAD(&dev_priv->gt.active_rings);
5292 INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
5294 i915_gem_init__mm(dev_priv);
5296 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
5297 i915_gem_retire_work_handler);
5298 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
5299 i915_gem_idle_work_handler);
5300 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
5301 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
5302 mutex_init(&dev_priv->gpu_error.wedge_mutex);
5304 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
5306 spin_lock_init(&dev_priv->fb_tracking.lock);
5308 err = i915_gemfs_init(dev_priv);
5310 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
5315 kmem_cache_destroy(dev_priv->dependencies);
5317 kmem_cache_destroy(dev_priv->requests);
5319 kmem_cache_destroy(dev_priv->luts);
5321 kmem_cache_destroy(dev_priv->vmas);
5323 kmem_cache_destroy(dev_priv->objects);
5328 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
5330 i915_gem_drain_freed_objects(dev_priv);
5331 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
5332 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
5333 WARN_ON(dev_priv->mm.object_count);
5334 WARN_ON(!list_empty(&dev_priv->gt.timelines));
5336 kmem_cache_destroy(dev_priv->priorities);
5337 kmem_cache_destroy(dev_priv->dependencies);
5338 kmem_cache_destroy(dev_priv->requests);
5339 kmem_cache_destroy(dev_priv->luts);
5340 kmem_cache_destroy(dev_priv->vmas);
5341 kmem_cache_destroy(dev_priv->objects);
5343 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
5346 i915_gemfs_fini(dev_priv);
5349 int i915_gem_freeze(struct drm_i915_private *dev_priv)
5351 /* Discard all purgeable objects, let userspace recover those as
5352 * required after resuming.
5354 i915_gem_shrink_all(dev_priv);
5359 int i915_gem_freeze_late(struct drm_i915_private *i915)
5361 struct drm_i915_gem_object *obj;
5362 struct list_head *phases[] = {
5363 &i915->mm.unbound_list,
5364 &i915->mm.bound_list,
5369 * Called just before we write the hibernation image.
5371 * We need to update the domain tracking to reflect that the CPU
5372 * will be accessing all the pages to create and restore from the
5373 * hibernation, and so upon restoration those pages will be in the
5376 * To make sure the hibernation image contains the latest state,
5377 * we update that state just before writing out the image.
5379 * To try and reduce the hibernation image, we manually shrink
5380 * the objects as well, see i915_gem_freeze()
5383 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
5384 i915_gem_drain_freed_objects(i915);
5386 mutex_lock(&i915->drm.struct_mutex);
5387 for (phase = phases; *phase; phase++) {
5388 list_for_each_entry(obj, *phase, mm.link)
5389 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
5391 mutex_unlock(&i915->drm.struct_mutex);
5396 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
5398 struct drm_i915_file_private *file_priv = file->driver_priv;
5399 struct i915_request *request;
5401 /* Clean up our request list when the client is going away, so that
5402 * later retire_requests won't dereference our soon-to-be-gone
5405 spin_lock(&file_priv->mm.lock);
5406 list_for_each_entry(request, &file_priv->mm.request_list, client_link)
5407 request->file_priv = NULL;
5408 spin_unlock(&file_priv->mm.lock);
5411 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
5413 struct drm_i915_file_private *file_priv;
5418 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
5422 file->driver_priv = file_priv;
5423 file_priv->dev_priv = i915;
5424 file_priv->file = file;
5426 spin_lock_init(&file_priv->mm.lock);
5427 INIT_LIST_HEAD(&file_priv->mm.request_list);
5429 file_priv->bsd_engine = -1;
5430 file_priv->hang_timestamp = jiffies;
5432 ret = i915_gem_context_open(i915, file);
5440 * i915_gem_track_fb - update frontbuffer tracking
5441 * @old: current GEM buffer for the frontbuffer slots
5442 * @new: new GEM buffer for the frontbuffer slots
5443 * @frontbuffer_bits: bitmask of frontbuffer slots
5445 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
5446 * from @old and setting them in @new. Both @old and @new can be NULL.
5448 void i915_gem_track_fb(struct drm_i915_gem_object *old,
5449 struct drm_i915_gem_object *new,
5450 unsigned frontbuffer_bits)
5452 /* Control of individual bits within the mask are guarded by
5453 * the owning plane->mutex, i.e. we can never see concurrent
5454 * manipulation of individual bits. But since the bitfield as a whole
5455 * is updated using RMW, we need to use atomics in order to update
5458 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
5459 BITS_PER_TYPE(atomic_t));
5462 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
5463 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
5467 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
5468 atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
5472 /* Allocate a new GEM object and fill it with the supplied data */
5473 struct drm_i915_gem_object *
5474 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
5475 const void *data, size_t size)
5477 struct drm_i915_gem_object *obj;
5482 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
5486 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
5488 file = obj->base.filp;
5491 unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
5493 void *pgdata, *vaddr;
5495 err = pagecache_write_begin(file, file->f_mapping,
5502 memcpy(vaddr, data, len);
5505 err = pagecache_write_end(file, file->f_mapping,
5519 i915_gem_object_put(obj);
5520 return ERR_PTR(err);
5523 struct scatterlist *
5524 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
5526 unsigned int *offset)
5528 struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
5529 struct scatterlist *sg;
5530 unsigned int idx, count;
5533 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
5534 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
5536 /* As we iterate forward through the sg, we record each entry in a
5537 * radixtree for quick repeated (backwards) lookups. If we have seen
5538 * this index previously, we will have an entry for it.
5540 * Initial lookup is O(N), but this is amortized to O(1) for
5541 * sequential page access (where each new request is consecutive
5542 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
5543 * i.e. O(1) with a large constant!
5545 if (n < READ_ONCE(iter->sg_idx))
5548 mutex_lock(&iter->lock);
5550 /* We prefer to reuse the last sg so that repeated lookup of this
5551 * (or the subsequent) sg are fast - comparing against the last
5552 * sg is faster than going through the radixtree.
5557 count = __sg_page_count(sg);
5559 while (idx + count <= n) {
5564 /* If we cannot allocate and insert this entry, or the
5565 * individual pages from this range, cancel updating the
5566 * sg_idx so that on this lookup we are forced to linearly
5567 * scan onwards, but on future lookups we will try the
5568 * insertion again (in which case we need to be careful of
5569 * the error return reporting that we have already inserted
5572 ret = radix_tree_insert(&iter->radix, idx, sg);
5573 if (ret && ret != -EEXIST)
5576 entry = xa_mk_value(idx);
5577 for (i = 1; i < count; i++) {
5578 ret = radix_tree_insert(&iter->radix, idx + i, entry);
5579 if (ret && ret != -EEXIST)
5584 sg = ____sg_next(sg);
5585 count = __sg_page_count(sg);
5592 mutex_unlock(&iter->lock);
5594 if (unlikely(n < idx)) /* insertion completed by another thread */
5597 /* In case we failed to insert the entry into the radixtree, we need
5598 * to look beyond the current sg.
5600 while (idx + count <= n) {
5602 sg = ____sg_next(sg);
5603 count = __sg_page_count(sg);
5612 sg = radix_tree_lookup(&iter->radix, n);
5615 /* If this index is in the middle of multi-page sg entry,
5616 * the radix tree will contain a value entry that points
5617 * to the start of that range. We will return the pointer to
5618 * the base page and the offset of this page within the
5622 if (unlikely(xa_is_value(sg))) {
5623 unsigned long base = xa_to_value(sg);
5625 sg = radix_tree_lookup(&iter->radix, base);
5637 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
5639 struct scatterlist *sg;
5640 unsigned int offset;
5642 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
5644 sg = i915_gem_object_get_sg(obj, n, &offset);
5645 return nth_page(sg_page(sg), offset);
5648 /* Like i915_gem_object_get_page(), but mark the returned page dirty */
5650 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
5655 page = i915_gem_object_get_page(obj, n);
5657 set_page_dirty(page);
5663 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
5666 struct scatterlist *sg;
5667 unsigned int offset;
5669 sg = i915_gem_object_get_sg(obj, n, &offset);
5670 return sg_dma_address(sg) + (offset << PAGE_SHIFT);
5673 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
5675 struct sg_table *pages;
5678 if (align > obj->base.size)
5681 if (obj->ops == &i915_gem_phys_ops)
5684 if (obj->ops != &i915_gem_object_ops)
5687 err = i915_gem_object_unbind(obj);
5691 mutex_lock(&obj->mm.lock);
5693 if (obj->mm.madv != I915_MADV_WILLNEED) {
5698 if (obj->mm.quirked) {
5703 if (obj->mm.mapping) {
5708 pages = __i915_gem_object_unset_pages(obj);
5710 obj->ops = &i915_gem_phys_ops;
5712 err = ____i915_gem_object_get_pages(obj);
5716 /* Perma-pin (until release) the physical set of pages */
5717 __i915_gem_object_pin_pages(obj);
5719 if (!IS_ERR_OR_NULL(pages))
5720 i915_gem_object_ops.put_pages(obj, pages);
5721 mutex_unlock(&obj->mm.lock);
5725 obj->ops = &i915_gem_object_ops;
5726 if (!IS_ERR_OR_NULL(pages)) {
5727 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
5729 __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
5732 mutex_unlock(&obj->mm.lock);
5736 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
5737 #include "selftests/scatterlist.c"
5738 #include "selftests/mock_gem_device.c"
5739 #include "selftests/huge_gem_object.c"
5740 #include "selftests/huge_pages.c"
5741 #include "selftests/i915_gem_object.c"
5742 #include "selftests/i915_gem_coherency.c"
5743 #include "selftests/i915_gem.c"