drm/i915: fix scheduling while holding the new active list spinlock
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
index f135c903305f6ffec6c4b915533c53d507f0f1eb..4642115902d61a419ed112b39c5680810cbb6d35 100644 (file)
@@ -43,8 +43,6 @@ static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
                                                     uint64_t offset,
                                                     uint64_t size);
 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
-static int i915_gem_object_get_pages(struct drm_gem_object *obj);
-static void i915_gem_object_put_pages(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
                                           unsigned alignment);
@@ -143,15 +141,27 @@ fast_shmem_read(struct page **pages,
                int length)
 {
        char __iomem *vaddr;
-       int ret;
+       int unwritten;
 
        vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
        if (vaddr == NULL)
                return -ENOMEM;
-       ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
+       unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
        kunmap_atomic(vaddr, KM_USER0);
 
-       return ret;
+       if (unwritten)
+               return -EFAULT;
+
+       return 0;
+}
+
+static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
+{
+       drm_i915_private_t *dev_priv = obj->dev->dev_private;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+       return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+               obj_priv->tiling_mode != I915_TILING_NONE;
 }
 
 static inline int
@@ -181,6 +191,64 @@ slow_shmem_copy(struct page *dst_page,
        return 0;
 }
 
+static inline int
+slow_shmem_bit17_copy(struct page *gpu_page,
+                     int gpu_offset,
+                     struct page *cpu_page,
+                     int cpu_offset,
+                     int length,
+                     int is_read)
+{
+       char *gpu_vaddr, *cpu_vaddr;
+
+       /* Use the unswizzled path if this page isn't affected. */
+       if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
+               if (is_read)
+                       return slow_shmem_copy(cpu_page, cpu_offset,
+                                              gpu_page, gpu_offset, length);
+               else
+                       return slow_shmem_copy(gpu_page, gpu_offset,
+                                              cpu_page, cpu_offset, length);
+       }
+
+       gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
+       if (gpu_vaddr == NULL)
+               return -ENOMEM;
+
+       cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
+       if (cpu_vaddr == NULL) {
+               kunmap_atomic(gpu_vaddr, KM_USER0);
+               return -ENOMEM;
+       }
+
+       /* Copy the data, XORing A6 with A17 (1). The user already knows he's
+        * XORing with the other bits (A9 for Y, A9 and A10 for X)
+        */
+       while (length > 0) {
+               int cacheline_end = ALIGN(gpu_offset + 1, 64);
+               int this_length = min(cacheline_end - gpu_offset, length);
+               int swizzled_gpu_offset = gpu_offset ^ 64;
+
+               if (is_read) {
+                       memcpy(cpu_vaddr + cpu_offset,
+                              gpu_vaddr + swizzled_gpu_offset,
+                              this_length);
+               } else {
+                       memcpy(gpu_vaddr + swizzled_gpu_offset,
+                              cpu_vaddr + cpu_offset,
+                              this_length);
+               }
+               cpu_offset += this_length;
+               gpu_offset += this_length;
+               length -= this_length;
+       }
+
+       kunmap_atomic(cpu_vaddr, KM_USER1);
+       kunmap_atomic(gpu_vaddr, KM_USER0);
+
+       return 0;
+}
+
 /**
  * This is the fast shmem pread path, which attempts to copy_from_user directly
  * from the backing pages of the object to the user's address space.  On a
@@ -269,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
        int page_length;
        int ret;
        uint64_t data_ptr = args->data_ptr;
+       int do_bit17_swizzling;
 
        remain = args->size;
 
@@ -286,13 +355,15 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 
        down_read(&mm->mmap_sem);
        pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-                                     num_pages, 0, 0, user_pages, NULL);
+                                     num_pages, 1, 0, user_pages, NULL);
        up_read(&mm->mmap_sem);
        if (pinned_pages < num_pages) {
                ret = -EFAULT;
                goto fail_put_user_pages;
        }
 
+       do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
        mutex_lock(&dev->struct_mutex);
 
        ret = i915_gem_object_get_pages(obj);
@@ -327,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
                if ((data_page_offset + page_length) > PAGE_SIZE)
                        page_length = PAGE_SIZE - data_page_offset;
 
-               ret = slow_shmem_copy(user_pages[data_page_index],
-                                     data_page_offset,
-                                     obj_priv->pages[shmem_page_index],
-                                     shmem_page_offset,
-                                     page_length);
+               if (do_bit17_swizzling) {
+                       ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+                                                   shmem_page_offset,
+                                                   user_pages[data_page_index],
+                                                   data_page_offset,
+                                                   page_length,
+                                                   1);
+               } else {
+                       ret = slow_shmem_copy(user_pages[data_page_index],
+                                             data_page_offset,
+                                             obj_priv->pages[shmem_page_index],
+                                             shmem_page_offset,
+                                             page_length);
+               }
                if (ret)
                        goto fail_put_pages;
 
@@ -383,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
                return -EINVAL;
        }
 
-       ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
-       if (ret != 0)
+       if (i915_gem_object_needs_bit17_swizzle(obj)) {
                ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
+       } else {
+               ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
+               if (ret != 0)
+                       ret = i915_gem_shmem_pread_slow(dev, obj, args,
+                                                       file_priv);
+       }
 
        drm_gem_object_unreference(obj);
 
@@ -446,13 +531,16 @@ fast_shmem_write(struct page **pages,
                 int length)
 {
        char __iomem *vaddr;
+       unsigned long unwritten;
 
        vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
        if (vaddr == NULL)
                return -ENOMEM;
-       __copy_from_user_inatomic(vaddr + page_offset, data, length);
+       unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
        kunmap_atomic(vaddr, KM_USER0);
 
+       if (unwritten)
+               return -EFAULT;
        return 0;
 }
 
@@ -724,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
        int page_length;
        int ret;
        uint64_t data_ptr = args->data_ptr;
+       int do_bit17_swizzling;
 
        remain = args->size;
 
@@ -748,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
                goto fail_put_user_pages;
        }
 
+       do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
        mutex_lock(&dev->struct_mutex);
 
        ret = i915_gem_object_get_pages(obj);
@@ -782,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
                if ((data_page_offset + page_length) > PAGE_SIZE)
                        page_length = PAGE_SIZE - data_page_offset;
 
-               ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
-                                     shmem_page_offset,
-                                     user_pages[data_page_index],
-                                     data_page_offset,
-                                     page_length);
+               if (do_bit17_swizzling) {
+                       ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+                                                   shmem_page_offset,
+                                                   user_pages[data_page_index],
+                                                   data_page_offset,
+                                                   page_length,
+                                                   0);
+               } else {
+                       ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
+                                             shmem_page_offset,
+                                             user_pages[data_page_index],
+                                             data_page_offset,
+                                             page_length);
+               }
                if (ret)
                        goto fail_put_pages;
 
@@ -851,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                        ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
                                                       file_priv);
                }
+       } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
+               ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
        } else {
                ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
                if (ret == -EFAULT) {
@@ -1069,6 +1171,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        case -EAGAIN:
                return VM_FAULT_OOM;
        case -EFAULT:
+       case -EINVAL:
                return VM_FAULT_SIGBUS;
        default:
                return VM_FAULT_NOPAGE;
@@ -1093,7 +1196,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj)
        struct drm_gem_mm *mm = dev->mm_private;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
        struct drm_map_list *list;
-       struct drm_map *map;
+       struct drm_local_map *map;
        int ret = 0;
 
        /* Set the object up for mmap'ing */
@@ -1281,7 +1384,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
        return 0;
 }
 
-static void
+void
 i915_gem_object_put_pages(struct drm_gem_object *obj)
 {
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1293,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
        if (--obj_priv->pages_refcount != 0)
                return;
 
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               i915_gem_object_save_bit_17_swizzle(obj);
+
        for (i = 0; i < page_count; i++)
                if (obj_priv->pages[i] != NULL) {
                        if (obj_priv->dirty)
@@ -1321,8 +1427,10 @@ i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
                obj_priv->active = 1;
        }
        /* Move from whatever list we were on to the tail of execution. */
+       spin_lock(&dev_priv->mm.active_list_lock);
        list_move_tail(&obj_priv->list,
                       &dev_priv->mm.active_list);
+       spin_unlock(&dev_priv->mm.active_list_lock);
        obj_priv->last_rendering_seqno = seqno;
 }
 
@@ -1464,6 +1572,7 @@ i915_gem_retire_request(struct drm_device *dev,
        /* Move any buffers on the active list that are no longer referenced
         * by the ringbuffer to the flushing/inactive lists as appropriate.
         */
+       spin_lock(&dev_priv->mm.active_list_lock);
        while (!list_empty(&dev_priv->mm.active_list)) {
                struct drm_gem_object *obj;
                struct drm_i915_gem_object *obj_priv;
@@ -1478,7 +1587,7 @@ i915_gem_retire_request(struct drm_device *dev,
                 * this seqno.
                 */
                if (obj_priv->last_rendering_seqno != request->seqno)
-                       return;
+                       goto out;
 
 #if WATCH_LRU
                DRM_INFO("%s: retire %d moves to inactive list %p\n",
@@ -1487,9 +1596,22 @@ i915_gem_retire_request(struct drm_device *dev,
 
                if (obj->write_domain != 0)
                        i915_gem_object_move_to_flushing(obj);
-               else
+               else {
+                       /* Take a reference on the object so it won't be
+                        * freed while the spinlock is held.  The list
+                        * protection for this spinlock is safe when breaking
+                        * the lock like this since the next thing we do
+                        * is just get the head of the list again.
+                        */
+                       drm_gem_object_reference(obj);
                        i915_gem_object_move_to_inactive(obj);
+                       spin_unlock(&dev_priv->mm.active_list_lock);
+                       drm_gem_object_unreference(obj);
+                       spin_lock(&dev_priv->mm.active_list_lock);
+               }
        }
+out:
+       spin_unlock(&dev_priv->mm.active_list_lock);
 }
 
 /**
@@ -1875,7 +1997,7 @@ i915_gem_evict_everything(struct drm_device *dev)
        return ret;
 }
 
-static int
+int
 i915_gem_object_get_pages(struct drm_gem_object *obj)
 {
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1913,6 +2035,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
                }
                obj_priv->pages[i] = page;
        }
+
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               i915_gem_object_do_bit_17_swizzle(obj);
+
        return 0;
 }
 
@@ -1987,20 +2113,23 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
        int regnum = obj_priv->fence_reg;
        uint32_t val;
        uint32_t pitch_val;
+       uint32_t fence_size_bits;
 
-       if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
+       if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
            (obj_priv->gtt_offset & (obj->size - 1))) {
-               WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
+               WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
                     __func__, obj_priv->gtt_offset);
                return;
        }
 
        pitch_val = (obj_priv->stride / 128) - 1;
-
+       WARN_ON(pitch_val & ~0x0000000f);
        val = obj_priv->gtt_offset;
        if (obj_priv->tiling_mode == I915_TILING_Y)
                val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-       val |= I830_FENCE_SIZE_BITS(obj->size);
+       fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
+       WARN_ON(fence_size_bits & ~0x00000f00);
+       val |= fence_size_bits;
        val |= pitch_val << I830_FENCE_PITCH_SHIFT;
        val |= I830_FENCE_REG_VALID;
 
@@ -2191,7 +2320,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                return -EBUSY;
        if (alignment == 0)
                alignment = i915_gem_get_gtt_alignment(obj);
-       if (alignment & (PAGE_SIZE - 1)) {
+       if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
                DRM_ERROR("Invalid object alignment requested %u\n", alignment);
                return -EINVAL;
        }
@@ -2208,15 +2337,20 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                }
        }
        if (obj_priv->gtt_space == NULL) {
+               bool lists_empty;
+
                /* If the gtt is empty and we're still having trouble
                 * fitting our object in, we're out of memory.
                 */
 #if WATCH_LRU
                DRM_INFO("%s: GTT full, evicting something\n", __func__);
 #endif
-               if (list_empty(&dev_priv->mm.inactive_list) &&
-                   list_empty(&dev_priv->mm.flushing_list) &&
-                   list_empty(&dev_priv->mm.active_list)) {
+               spin_lock(&dev_priv->mm.active_list_lock);
+               lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+                              list_empty(&dev_priv->mm.flushing_list) &&
+                              list_empty(&dev_priv->mm.active_list));
+               spin_unlock(&dev_priv->mm.active_list_lock);
+               if (lists_empty) {
                        DRM_ERROR("GTT full, but LRU list empty\n");
                        return -ENOMEM;
                }
@@ -2381,7 +2515,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
 static int
 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
 {
-       struct drm_device *dev = obj->dev;
        int ret;
 
        i915_gem_object_flush_gpu_write_domain(obj);
@@ -2400,7 +2533,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
                i915_gem_clflush_object(obj);
-               drm_agp_chipset_flush(dev);
 
                obj->read_domains |= I915_GEM_DOMAIN_CPU;
        }
@@ -2612,7 +2744,6 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
 static void
 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
 {
-       struct drm_device *dev = obj->dev;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
        if (!obj_priv->page_cpu_valid)
@@ -2628,7 +2759,6 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
                                continue;
                        drm_clflush_pages(obj_priv->pages + i, 1);
                }
-               drm_agp_chipset_flush(dev);
        }
 
        /* Free the page_cpu_valid mappings which are now stale, whether
@@ -2989,13 +3119,13 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
                        drm_free(*relocs, reloc_count * sizeof(**relocs),
                                 DRM_MEM_DRIVER);
                        *relocs = NULL;
-                       return ret;
+                       return -EFAULT;
                }
 
                reloc_index += exec_list[i].relocation_count;
        }
 
-       return ret;
+       return 0;
 }
 
 static int
@@ -3004,23 +3134,28 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
                            struct drm_i915_gem_relocation_entry *relocs)
 {
        uint32_t reloc_count = 0, i;
-       int ret;
+       int ret = 0;
 
        for (i = 0; i < buffer_count; i++) {
                struct drm_i915_gem_relocation_entry __user *user_relocs;
+               int unwritten;
 
                user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
 
-               if (ret == 0) {
-                       ret = copy_to_user(user_relocs,
-                                          &relocs[reloc_count],
-                                          exec_list[i].relocation_count *
-                                          sizeof(*relocs));
+               unwritten = copy_to_user(user_relocs,
+                                        &relocs[reloc_count],
+                                        exec_list[i].relocation_count *
+                                        sizeof(*relocs));
+
+               if (unwritten) {
+                       ret = -EFAULT;
+                       goto err;
                }
 
                reloc_count += exec_list[i].relocation_count;
        }
 
+err:
        drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
 
        return ret;
@@ -3230,7 +3365,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
        exec_offset = exec_list[args->buffer_count - 1].offset;
 
 #if WATCH_EXEC
-       i915_gem_dump_object(object_list[args->buffer_count - 1],
+       i915_gem_dump_object(batch_obj,
                              args->batch_len,
                              __func__,
                              ~0);
@@ -3295,10 +3430,12 @@ err:
                                   (uintptr_t) args->buffers_ptr,
                                   exec_list,
                                   sizeof(*exec_list) * args->buffer_count);
-               if (ret)
+               if (ret) {
+                       ret = -EFAULT;
                        DRM_ERROR("failed to copy %d exec entries "
                                  "back to user (%d)\n",
                                  args->buffer_count, ret);
+               }
        }
 
        /* Copy the updated relocations out regardless of current error
@@ -3580,6 +3717,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
        i915_gem_free_mmap_offset(obj);
 
        drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
+       kfree(obj_priv->bit_17);
        drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
 
@@ -3676,6 +3814,7 @@ i915_gem_idle(struct drm_device *dev)
 
        i915_gem_retire_requests(dev);
 
+       spin_lock(&dev_priv->mm.active_list_lock);
        if (!dev_priv->mm.wedged) {
                /* Active and flushing should now be empty as we've
                 * waited for a sequence higher than any pending execbuffer
@@ -3702,6 +3841,7 @@ i915_gem_idle(struct drm_device *dev)
                obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
                i915_gem_object_move_to_inactive(obj_priv->obj);
        }
+       spin_unlock(&dev_priv->mm.active_list_lock);
 
        while (!list_empty(&dev_priv->mm.flushing_list)) {
                struct drm_i915_gem_object *obj_priv;
@@ -3950,7 +4090,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
        if (ret != 0)
                return ret;
 
+       spin_lock(&dev_priv->mm.active_list_lock);
        BUG_ON(!list_empty(&dev_priv->mm.active_list));
+       spin_unlock(&dev_priv->mm.active_list_lock);
+
        BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
        BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
        BUG_ON(!list_empty(&dev_priv->mm.request_list));
@@ -3994,6 +4137,7 @@ i915_gem_load(struct drm_device *dev)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
 
+       spin_lock_init(&dev_priv->mm.active_list_lock);
        INIT_LIST_HEAD(&dev_priv->mm.active_list);
        INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
        INIT_LIST_HEAD(&dev_priv->mm.inactive_list);