drm/i915: Track the purgeable objects on a separate eviction list
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gem / i915_gem_domain.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "i915_gem_clflush.h"
9 #include "i915_gem_gtt.h"
10 #include "i915_gem_ioctls.h"
11 #include "i915_gem_object.h"
12 #include "i915_vma.h"
13 #include "intel_frontbuffer.h"
14
15 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
16 {
17         /*
18          * We manually flush the CPU domain so that we can override and
19          * force the flush for the display, and perform it asyncrhonously.
20          */
21         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
22         if (obj->cache_dirty)
23                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
24         obj->write_domain = 0;
25 }
26
27 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
28 {
29         if (!READ_ONCE(obj->pin_global))
30                 return;
31
32         i915_gem_object_lock(obj);
33         __i915_gem_object_flush_for_display(obj);
34         i915_gem_object_unlock(obj);
35 }
36
37 /**
38  * Moves a single object to the WC read, and possibly write domain.
39  * @obj: object to act on
40  * @write: ask for write access or read only
41  *
42  * This function returns when the move is complete, including waiting on
43  * flushes to occur.
44  */
45 int
46 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
47 {
48         int ret;
49
50         assert_object_held(obj);
51
52         ret = i915_gem_object_wait(obj,
53                                    I915_WAIT_INTERRUPTIBLE |
54                                    (write ? I915_WAIT_ALL : 0),
55                                    MAX_SCHEDULE_TIMEOUT);
56         if (ret)
57                 return ret;
58
59         if (obj->write_domain == I915_GEM_DOMAIN_WC)
60                 return 0;
61
62         /* Flush and acquire obj->pages so that we are coherent through
63          * direct access in memory with previous cached writes through
64          * shmemfs and that our cache domain tracking remains valid.
65          * For example, if the obj->filp was moved to swap without us
66          * being notified and releasing the pages, we would mistakenly
67          * continue to assume that the obj remained out of the CPU cached
68          * domain.
69          */
70         ret = i915_gem_object_pin_pages(obj);
71         if (ret)
72                 return ret;
73
74         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
75
76         /* Serialise direct access to this object with the barriers for
77          * coherent writes from the GPU, by effectively invalidating the
78          * WC domain upon first access.
79          */
80         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
81                 mb();
82
83         /* It should now be out of any other write domains, and we can update
84          * the domain values for our changes.
85          */
86         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
87         obj->read_domains |= I915_GEM_DOMAIN_WC;
88         if (write) {
89                 obj->read_domains = I915_GEM_DOMAIN_WC;
90                 obj->write_domain = I915_GEM_DOMAIN_WC;
91                 obj->mm.dirty = true;
92         }
93
94         i915_gem_object_unpin_pages(obj);
95         return 0;
96 }
97
98 /**
99  * Moves a single object to the GTT read, and possibly write domain.
100  * @obj: object to act on
101  * @write: ask for write access or read only
102  *
103  * This function returns when the move is complete, including waiting on
104  * flushes to occur.
105  */
106 int
107 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
108 {
109         int ret;
110
111         assert_object_held(obj);
112
113         ret = i915_gem_object_wait(obj,
114                                    I915_WAIT_INTERRUPTIBLE |
115                                    (write ? I915_WAIT_ALL : 0),
116                                    MAX_SCHEDULE_TIMEOUT);
117         if (ret)
118                 return ret;
119
120         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
121                 return 0;
122
123         /* Flush and acquire obj->pages so that we are coherent through
124          * direct access in memory with previous cached writes through
125          * shmemfs and that our cache domain tracking remains valid.
126          * For example, if the obj->filp was moved to swap without us
127          * being notified and releasing the pages, we would mistakenly
128          * continue to assume that the obj remained out of the CPU cached
129          * domain.
130          */
131         ret = i915_gem_object_pin_pages(obj);
132         if (ret)
133                 return ret;
134
135         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
136
137         /* Serialise direct access to this object with the barriers for
138          * coherent writes from the GPU, by effectively invalidating the
139          * GTT domain upon first access.
140          */
141         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
142                 mb();
143
144         /* It should now be out of any other write domains, and we can update
145          * the domain values for our changes.
146          */
147         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
148         obj->read_domains |= I915_GEM_DOMAIN_GTT;
149         if (write) {
150                 obj->read_domains = I915_GEM_DOMAIN_GTT;
151                 obj->write_domain = I915_GEM_DOMAIN_GTT;
152                 obj->mm.dirty = true;
153         }
154
155         i915_gem_object_unpin_pages(obj);
156         return 0;
157 }
158
159 /**
160  * Changes the cache-level of an object across all VMA.
161  * @obj: object to act on
162  * @cache_level: new cache level to set for the object
163  *
164  * After this function returns, the object will be in the new cache-level
165  * across all GTT and the contents of the backing storage will be coherent,
166  * with respect to the new cache-level. In order to keep the backing storage
167  * coherent for all users, we only allow a single cache level to be set
168  * globally on the object and prevent it from being changed whilst the
169  * hardware is reading from the object. That is if the object is currently
170  * on the scanout it will be set to uncached (or equivalent display
171  * cache coherency) and all non-MOCS GPU access will also be uncached so
172  * that all direct access to the scanout remains coherent.
173  */
174 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
175                                     enum i915_cache_level cache_level)
176 {
177         struct i915_vma *vma;
178         int ret;
179
180         assert_object_held(obj);
181
182         if (obj->cache_level == cache_level)
183                 return 0;
184
185         /* Inspect the list of currently bound VMA and unbind any that would
186          * be invalid given the new cache-level. This is principally to
187          * catch the issue of the CS prefetch crossing page boundaries and
188          * reading an invalid PTE on older architectures.
189          */
190 restart:
191         list_for_each_entry(vma, &obj->vma.list, obj_link) {
192                 if (!drm_mm_node_allocated(&vma->node))
193                         continue;
194
195                 if (i915_vma_is_pinned(vma)) {
196                         DRM_DEBUG("can not change the cache level of pinned objects\n");
197                         return -EBUSY;
198                 }
199
200                 if (!i915_vma_is_closed(vma) &&
201                     i915_gem_valid_gtt_space(vma, cache_level))
202                         continue;
203
204                 ret = i915_vma_unbind(vma);
205                 if (ret)
206                         return ret;
207
208                 /* As unbinding may affect other elements in the
209                  * obj->vma_list (due to side-effects from retiring
210                  * an active vma), play safe and restart the iterator.
211                  */
212                 goto restart;
213         }
214
215         /* We can reuse the existing drm_mm nodes but need to change the
216          * cache-level on the PTE. We could simply unbind them all and
217          * rebind with the correct cache-level on next use. However since
218          * we already have a valid slot, dma mapping, pages etc, we may as
219          * rewrite the PTE in the belief that doing so tramples upon less
220          * state and so involves less work.
221          */
222         if (obj->bind_count) {
223                 /* Before we change the PTE, the GPU must not be accessing it.
224                  * If we wait upon the object, we know that all the bound
225                  * VMA are no longer active.
226                  */
227                 ret = i915_gem_object_wait(obj,
228                                            I915_WAIT_INTERRUPTIBLE |
229                                            I915_WAIT_ALL,
230                                            MAX_SCHEDULE_TIMEOUT);
231                 if (ret)
232                         return ret;
233
234                 if (!HAS_LLC(to_i915(obj->base.dev)) &&
235                     cache_level != I915_CACHE_NONE) {
236                         /* Access to snoopable pages through the GTT is
237                          * incoherent and on some machines causes a hard
238                          * lockup. Relinquish the CPU mmaping to force
239                          * userspace to refault in the pages and we can
240                          * then double check if the GTT mapping is still
241                          * valid for that pointer access.
242                          */
243                         i915_gem_object_release_mmap(obj);
244
245                         /* As we no longer need a fence for GTT access,
246                          * we can relinquish it now (and so prevent having
247                          * to steal a fence from someone else on the next
248                          * fence request). Note GPU activity would have
249                          * dropped the fence as all snoopable access is
250                          * supposed to be linear.
251                          */
252                         for_each_ggtt_vma(vma, obj) {
253                                 ret = i915_vma_put_fence(vma);
254                                 if (ret)
255                                         return ret;
256                         }
257                 } else {
258                         /* We either have incoherent backing store and
259                          * so no GTT access or the architecture is fully
260                          * coherent. In such cases, existing GTT mmaps
261                          * ignore the cache bit in the PTE and we can
262                          * rewrite it without confusing the GPU or having
263                          * to force userspace to fault back in its mmaps.
264                          */
265                 }
266
267                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
268                         if (!drm_mm_node_allocated(&vma->node))
269                                 continue;
270
271                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
272                         if (ret)
273                                 return ret;
274                 }
275         }
276
277         list_for_each_entry(vma, &obj->vma.list, obj_link)
278                 vma->node.color = cache_level;
279         i915_gem_object_set_cache_coherency(obj, cache_level);
280         obj->cache_dirty = true; /* Always invalidate stale cachelines */
281
282         return 0;
283 }
284
285 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
286                                struct drm_file *file)
287 {
288         struct drm_i915_gem_caching *args = data;
289         struct drm_i915_gem_object *obj;
290         int err = 0;
291
292         rcu_read_lock();
293         obj = i915_gem_object_lookup_rcu(file, args->handle);
294         if (!obj) {
295                 err = -ENOENT;
296                 goto out;
297         }
298
299         switch (obj->cache_level) {
300         case I915_CACHE_LLC:
301         case I915_CACHE_L3_LLC:
302                 args->caching = I915_CACHING_CACHED;
303                 break;
304
305         case I915_CACHE_WT:
306                 args->caching = I915_CACHING_DISPLAY;
307                 break;
308
309         default:
310                 args->caching = I915_CACHING_NONE;
311                 break;
312         }
313 out:
314         rcu_read_unlock();
315         return err;
316 }
317
318 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
319                                struct drm_file *file)
320 {
321         struct drm_i915_private *i915 = to_i915(dev);
322         struct drm_i915_gem_caching *args = data;
323         struct drm_i915_gem_object *obj;
324         enum i915_cache_level level;
325         int ret = 0;
326
327         switch (args->caching) {
328         case I915_CACHING_NONE:
329                 level = I915_CACHE_NONE;
330                 break;
331         case I915_CACHING_CACHED:
332                 /*
333                  * Due to a HW issue on BXT A stepping, GPU stores via a
334                  * snooped mapping may leave stale data in a corresponding CPU
335                  * cacheline, whereas normally such cachelines would get
336                  * invalidated.
337                  */
338                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
339                         return -ENODEV;
340
341                 level = I915_CACHE_LLC;
342                 break;
343         case I915_CACHING_DISPLAY:
344                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
345                 break;
346         default:
347                 return -EINVAL;
348         }
349
350         obj = i915_gem_object_lookup(file, args->handle);
351         if (!obj)
352                 return -ENOENT;
353
354         /*
355          * The caching mode of proxy object is handled by its generator, and
356          * not allowed to be changed by userspace.
357          */
358         if (i915_gem_object_is_proxy(obj)) {
359                 ret = -ENXIO;
360                 goto out;
361         }
362
363         if (obj->cache_level == level)
364                 goto out;
365
366         ret = i915_gem_object_wait(obj,
367                                    I915_WAIT_INTERRUPTIBLE,
368                                    MAX_SCHEDULE_TIMEOUT);
369         if (ret)
370                 goto out;
371
372         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
373         if (ret)
374                 goto out;
375
376         ret = i915_gem_object_lock_interruptible(obj);
377         if (ret == 0) {
378                 ret = i915_gem_object_set_cache_level(obj, level);
379                 i915_gem_object_unlock(obj);
380         }
381         mutex_unlock(&i915->drm.struct_mutex);
382
383 out:
384         i915_gem_object_put(obj);
385         return ret;
386 }
387
388 /*
389  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
390  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
391  * (for pageflips). We only flush the caches while preparing the buffer for
392  * display, the callers are responsible for frontbuffer flush.
393  */
394 struct i915_vma *
395 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
396                                      u32 alignment,
397                                      const struct i915_ggtt_view *view,
398                                      unsigned int flags)
399 {
400         struct i915_vma *vma;
401         int ret;
402
403         assert_object_held(obj);
404
405         /* Mark the global pin early so that we account for the
406          * display coherency whilst setting up the cache domains.
407          */
408         obj->pin_global++;
409
410         /* The display engine is not coherent with the LLC cache on gen6.  As
411          * a result, we make sure that the pinning that is about to occur is
412          * done with uncached PTEs. This is lowest common denominator for all
413          * chipsets.
414          *
415          * However for gen6+, we could do better by using the GFDT bit instead
416          * of uncaching, which would allow us to flush all the LLC-cached data
417          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
418          */
419         ret = i915_gem_object_set_cache_level(obj,
420                                               HAS_WT(to_i915(obj->base.dev)) ?
421                                               I915_CACHE_WT : I915_CACHE_NONE);
422         if (ret) {
423                 vma = ERR_PTR(ret);
424                 goto err_unpin_global;
425         }
426
427         /* As the user may map the buffer once pinned in the display plane
428          * (e.g. libkms for the bootup splash), we have to ensure that we
429          * always use map_and_fenceable for all scanout buffers. However,
430          * it may simply be too big to fit into mappable, in which case
431          * put it anyway and hope that userspace can cope (but always first
432          * try to preserve the existing ABI).
433          */
434         vma = ERR_PTR(-ENOSPC);
435         if ((flags & PIN_MAPPABLE) == 0 &&
436             (!view || view->type == I915_GGTT_VIEW_NORMAL))
437                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
438                                                flags |
439                                                PIN_MAPPABLE |
440                                                PIN_NONBLOCK);
441         if (IS_ERR(vma))
442                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
443         if (IS_ERR(vma))
444                 goto err_unpin_global;
445
446         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
447
448         __i915_gem_object_flush_for_display(obj);
449
450         /* It should now be out of any other write domains, and we can update
451          * the domain values for our changes.
452          */
453         obj->read_domains |= I915_GEM_DOMAIN_GTT;
454
455         return vma;
456
457 err_unpin_global:
458         obj->pin_global--;
459         return vma;
460 }
461
462 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
463 {
464         struct drm_i915_private *i915 = to_i915(obj->base.dev);
465         struct i915_vma *vma;
466
467         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
468
469         mutex_lock(&i915->ggtt.vm.mutex);
470         for_each_ggtt_vma(vma, obj) {
471                 if (!drm_mm_node_allocated(&vma->node))
472                         continue;
473
474                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
475         }
476         mutex_unlock(&i915->ggtt.vm.mutex);
477
478         if (obj->mm.madv == I915_MADV_WILLNEED) {
479                 struct list_head *list;
480
481                 spin_lock(&i915->mm.obj_lock);
482                 list = obj->bind_count ?
483                         &i915->mm.bound_list : &i915->mm.unbound_list;
484                 list_move_tail(&obj->mm.link, list);
485                 spin_unlock(&i915->mm.obj_lock);
486         }
487 }
488
489 void
490 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
491 {
492         struct drm_i915_gem_object *obj = vma->obj;
493
494         assert_object_held(obj);
495
496         if (WARN_ON(obj->pin_global == 0))
497                 return;
498
499         if (--obj->pin_global == 0)
500                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
501
502         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
503         i915_gem_object_bump_inactive_ggtt(obj);
504
505         i915_vma_unpin(vma);
506 }
507
508 /**
509  * Moves a single object to the CPU read, and possibly write domain.
510  * @obj: object to act on
511  * @write: requesting write or read-only access
512  *
513  * This function returns when the move is complete, including waiting on
514  * flushes to occur.
515  */
516 int
517 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
518 {
519         int ret;
520
521         assert_object_held(obj);
522
523         ret = i915_gem_object_wait(obj,
524                                    I915_WAIT_INTERRUPTIBLE |
525                                    (write ? I915_WAIT_ALL : 0),
526                                    MAX_SCHEDULE_TIMEOUT);
527         if (ret)
528                 return ret;
529
530         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
531
532         /* Flush the CPU cache if it's still invalid. */
533         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
534                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
535                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
536         }
537
538         /* It should now be out of any other write domains, and we can update
539          * the domain values for our changes.
540          */
541         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
542
543         /* If we're writing through the CPU, then the GPU read domains will
544          * need to be invalidated at next use.
545          */
546         if (write)
547                 __start_cpu_write(obj);
548
549         return 0;
550 }
551
552 static inline enum fb_op_origin
553 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
554 {
555         return (domain == I915_GEM_DOMAIN_GTT ?
556                 obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
557 }
558
559 /**
560  * Called when user space prepares to use an object with the CPU, either
561  * through the mmap ioctl's mapping or a GTT mapping.
562  * @dev: drm device
563  * @data: ioctl data blob
564  * @file: drm file
565  */
566 int
567 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
568                           struct drm_file *file)
569 {
570         struct drm_i915_gem_set_domain *args = data;
571         struct drm_i915_gem_object *obj;
572         u32 read_domains = args->read_domains;
573         u32 write_domain = args->write_domain;
574         int err;
575
576         /* Only handle setting domains to types used by the CPU. */
577         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
578                 return -EINVAL;
579
580         /*
581          * Having something in the write domain implies it's in the read
582          * domain, and only that read domain.  Enforce that in the request.
583          */
584         if (write_domain && read_domains != write_domain)
585                 return -EINVAL;
586
587         if (!read_domains)
588                 return 0;
589
590         obj = i915_gem_object_lookup(file, args->handle);
591         if (!obj)
592                 return -ENOENT;
593
594         /*
595          * Already in the desired write domain? Nothing for us to do!
596          *
597          * We apply a little bit of cunning here to catch a broader set of
598          * no-ops. If obj->write_domain is set, we must be in the same
599          * obj->read_domains, and only that domain. Therefore, if that
600          * obj->write_domain matches the request read_domains, we are
601          * already in the same read/write domain and can skip the operation,
602          * without having to further check the requested write_domain.
603          */
604         if (READ_ONCE(obj->write_domain) == read_domains) {
605                 err = 0;
606                 goto out;
607         }
608
609         /*
610          * Try to flush the object off the GPU without holding the lock.
611          * We will repeat the flush holding the lock in the normal manner
612          * to catch cases where we are gazumped.
613          */
614         err = i915_gem_object_wait(obj,
615                                    I915_WAIT_INTERRUPTIBLE |
616                                    I915_WAIT_PRIORITY |
617                                    (write_domain ? I915_WAIT_ALL : 0),
618                                    MAX_SCHEDULE_TIMEOUT);
619         if (err)
620                 goto out;
621
622         /*
623          * Proxy objects do not control access to the backing storage, ergo
624          * they cannot be used as a means to manipulate the cache domain
625          * tracking for that backing storage. The proxy object is always
626          * considered to be outside of any cache domain.
627          */
628         if (i915_gem_object_is_proxy(obj)) {
629                 err = -ENXIO;
630                 goto out;
631         }
632
633         /*
634          * Flush and acquire obj->pages so that we are coherent through
635          * direct access in memory with previous cached writes through
636          * shmemfs and that our cache domain tracking remains valid.
637          * For example, if the obj->filp was moved to swap without us
638          * being notified and releasing the pages, we would mistakenly
639          * continue to assume that the obj remained out of the CPU cached
640          * domain.
641          */
642         err = i915_gem_object_pin_pages(obj);
643         if (err)
644                 goto out;
645
646         err = i915_gem_object_lock_interruptible(obj);
647         if (err)
648                 goto out_unpin;
649
650         if (read_domains & I915_GEM_DOMAIN_WC)
651                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
652         else if (read_domains & I915_GEM_DOMAIN_GTT)
653                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
654         else
655                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
656
657         /* And bump the LRU for this access */
658         i915_gem_object_bump_inactive_ggtt(obj);
659
660         i915_gem_object_unlock(obj);
661
662         if (write_domain != 0)
663                 intel_fb_obj_invalidate(obj,
664                                         fb_write_origin(obj, write_domain));
665
666 out_unpin:
667         i915_gem_object_unpin_pages(obj);
668 out:
669         i915_gem_object_put(obj);
670         return err;
671 }
672
673 /*
674  * Pins the specified object's pages and synchronizes the object with
675  * GPU accesses. Sets needs_clflush to non-zero if the caller should
676  * flush the object from the CPU cache.
677  */
678 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
679                                  unsigned int *needs_clflush)
680 {
681         int ret;
682
683         *needs_clflush = 0;
684         if (!i915_gem_object_has_struct_page(obj))
685                 return -ENODEV;
686
687         ret = i915_gem_object_lock_interruptible(obj);
688         if (ret)
689                 return ret;
690
691         ret = i915_gem_object_wait(obj,
692                                    I915_WAIT_INTERRUPTIBLE,
693                                    MAX_SCHEDULE_TIMEOUT);
694         if (ret)
695                 goto err_unlock;
696
697         ret = i915_gem_object_pin_pages(obj);
698         if (ret)
699                 goto err_unlock;
700
701         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
702             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
703                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
704                 if (ret)
705                         goto err_unpin;
706                 else
707                         goto out;
708         }
709
710         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
711
712         /* If we're not in the cpu read domain, set ourself into the gtt
713          * read domain and manually flush cachelines (if required). This
714          * optimizes for the case when the gpu will dirty the data
715          * anyway again before the next pread happens.
716          */
717         if (!obj->cache_dirty &&
718             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
719                 *needs_clflush = CLFLUSH_BEFORE;
720
721 out:
722         /* return with the pages pinned */
723         return 0;
724
725 err_unpin:
726         i915_gem_object_unpin_pages(obj);
727 err_unlock:
728         i915_gem_object_unlock(obj);
729         return ret;
730 }
731
732 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
733                                   unsigned int *needs_clflush)
734 {
735         int ret;
736
737         *needs_clflush = 0;
738         if (!i915_gem_object_has_struct_page(obj))
739                 return -ENODEV;
740
741         ret = i915_gem_object_lock_interruptible(obj);
742         if (ret)
743                 return ret;
744
745         ret = i915_gem_object_wait(obj,
746                                    I915_WAIT_INTERRUPTIBLE |
747                                    I915_WAIT_ALL,
748                                    MAX_SCHEDULE_TIMEOUT);
749         if (ret)
750                 goto err_unlock;
751
752         ret = i915_gem_object_pin_pages(obj);
753         if (ret)
754                 goto err_unlock;
755
756         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
757             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
758                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
759                 if (ret)
760                         goto err_unpin;
761                 else
762                         goto out;
763         }
764
765         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
766
767         /* If we're not in the cpu write domain, set ourself into the
768          * gtt write domain and manually flush cachelines (as required).
769          * This optimizes for the case when the gpu will use the data
770          * right away and we therefore have to clflush anyway.
771          */
772         if (!obj->cache_dirty) {
773                 *needs_clflush |= CLFLUSH_AFTER;
774
775                 /*
776                  * Same trick applies to invalidate partially written
777                  * cachelines read before writing.
778                  */
779                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
780                         *needs_clflush |= CLFLUSH_BEFORE;
781         }
782
783 out:
784         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
785         obj->mm.dirty = true;
786         /* return with the pages pinned */
787         return 0;
788
789 err_unpin:
790         i915_gem_object_unpin_pages(obj);
791 err_unlock:
792         i915_gem_object_unlock(obj);
793         return ret;
794 }