Merge tag 'v2.6.35-rc6' into drm-radeon-next
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / r600_cs.c
index c39c1bc13016075f2bd7151937b3faadc2bf0143..c3ea212e0c3c28284271d64e617b3cebedc0e173 100644 (file)
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/kernel.h>
 #include "drmP.h"
 #include "radeon.h"
 #include "r600d.h"
@@ -166,7 +167,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
 static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
        struct r600_cs_track *track = p->track;
-       u32 bpe = 0, pitch, slice_tile_max, size, tmp, height;
+       u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align;
        volatile u32 *ib = p->ib->ptr;
 
        if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
@@ -180,56 +181,57 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
                        i, track->cb_color_info[i]);
                return -EINVAL;
        }
-       pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) << 3;
+       /* pitch is the number of 8x8 tiles per row */
+       pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1;
        slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
-       if (!pitch) {
-               dev_warn(p->dev, "%s:%d cb pitch (%d) for %d invalid (0x%08X)\n",
-                       __func__, __LINE__, pitch, i, track->cb_color_size[i]);
-               return -EINVAL;
-       }
-       height = size / (pitch * bpe);
+       height = size / (pitch * 8 * bpe);
        if (height > 8192)
                height = 8192;
+       if (height > 7)
+               height &= ~0x7;
        switch (G_0280A0_ARRAY_MODE(track->cb_color_info[i])) {
        case V_0280A0_ARRAY_LINEAR_GENERAL:
+               /* technically height & 0x7 */
+               break;
        case V_0280A0_ARRAY_LINEAR_ALIGNED:
-               if (pitch & 0x3f) {
-                       dev_warn(p->dev, "%s:%d cb pitch (%d x %d = %d) invalid\n",
-                               __func__, __LINE__, pitch, bpe, pitch * bpe);
+               pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
+               if (!IS_ALIGNED(pitch, pitch_align)) {
+                       dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+                                __func__, __LINE__, pitch);
                        return -EINVAL;
                }
-               if ((pitch * bpe) & (track->group_size - 1)) {
-                       dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-                               __func__, __LINE__, pitch);
+               if (!IS_ALIGNED(height, 8)) {
+                       dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+                                __func__, __LINE__, height);
                        return -EINVAL;
                }
                break;
        case V_0280A0_ARRAY_1D_TILED_THIN1:
-               if ((pitch * 8 * bpe * track->nsamples) & (track->group_size - 1)) {
+               pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe * track->nsamples))) / 8;
+               if (!IS_ALIGNED(pitch, pitch_align)) {
                        dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-                               __func__, __LINE__, pitch);
+                                __func__, __LINE__, pitch);
+                       return -EINVAL;
+               }
+               if (!IS_ALIGNED(height, 8)) {
+                       dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+                                __func__, __LINE__, height);
                        return -EINVAL;
                }
-               height &= ~0x7;
-               if (!height)
-                       height = 8;
                break;
        case V_0280A0_ARRAY_2D_TILED_THIN1:
-               if (pitch & ((8 * track->nbanks) - 1)) {
+               pitch_align = max((u32)track->nbanks,
+                                 (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks));
+               if (!IS_ALIGNED(pitch, pitch_align)) {
                        dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
                                __func__, __LINE__, pitch);
                        return -EINVAL;
                }
-               tmp = pitch * 8 * bpe * track->nsamples;
-               tmp = tmp / track->nbanks;
-               if (tmp & (track->group_size - 1)) {
-                       dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
-                               __func__, __LINE__, pitch);
+               if (!IS_ALIGNED((height / 8), track->nbanks)) {
+                       dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+                                __func__, __LINE__, height);
                        return -EINVAL;
                }
-               height &= ~((16 * track->npipes) - 1);
-               if (!height)
-                       height = 16 * track->npipes;
                break;
        default:
                dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
@@ -238,16 +240,20 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
                return -EINVAL;
        }
        /* check offset */
-       tmp = height * pitch;
+       tmp = height * pitch * 8 * bpe;
        if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
-               dev_warn(p->dev, "%s offset[%d] %d to big\n", __func__, i, track->cb_color_bo_offset[i]);
+               dev_warn(p->dev, "%s offset[%d] %d too big\n", __func__, i, track->cb_color_bo_offset[i]);
+               return -EINVAL;
+       }
+       if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) {
+               dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]);
                return -EINVAL;
        }
        /* limit max tile */
-       tmp = (height * pitch) >> 6;
+       tmp = (height * pitch * 8) >> 6;
        if (tmp < slice_tile_max)
                slice_tile_max = tmp;
-       tmp = S_028060_PITCH_TILE_MAX((pitch >> 3) - 1) |
+       tmp = S_028060_PITCH_TILE_MAX(pitch - 1) |
                S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
        ib[track->cb_color_size_idx[i]] = tmp;
        return 0;
@@ -289,7 +295,7 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
        /* Check depth buffer */
        if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
                G_028800_Z_ENABLE(track->db_depth_control)) {
-               u32 nviews, bpe, ntiles;
+               u32 nviews, bpe, ntiles, pitch, pitch_align, height, size;
                if (track->db_bo == NULL) {
                        dev_warn(p->dev, "z/stencil with no depth buffer\n");
                        return -EINVAL;
@@ -332,6 +338,51 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
                        }
                        ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
                } else {
+                       size = radeon_bo_size(track->db_bo);
+                       pitch = G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1;
+                       height = size / (pitch * 8 * bpe);
+                       height &= ~0x7;
+                       if (!height)
+                               height = 8;
+
+                       switch (G_028010_ARRAY_MODE(track->db_depth_info)) {
+                       case V_028010_ARRAY_1D_TILED_THIN1:
+                               pitch_align = (max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8);
+                               if (!IS_ALIGNED(pitch, pitch_align)) {
+                                       dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
+                                                __func__, __LINE__, pitch);
+                                       return -EINVAL;
+                               }
+                               if (!IS_ALIGNED(height, 8)) {
+                                       dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
+                                                __func__, __LINE__, height);
+                                       return -EINVAL;
+                               }
+                               break;
+                       case V_028010_ARRAY_2D_TILED_THIN1:
+                               pitch_align = max((u32)track->nbanks,
+                                                 (u32)(((track->group_size / 8) / bpe) * track->nbanks));
+                               if (!IS_ALIGNED(pitch, pitch_align)) {
+                                       dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
+                                                __func__, __LINE__, pitch);
+                                       return -EINVAL;
+                               }
+                               if ((height / 8) & (track->nbanks - 1)) {
+                                       dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
+                                                __func__, __LINE__, height);
+                                       return -EINVAL;
+                               }
+                               break;
+                       default:
+                               dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+                                        G_028010_ARRAY_MODE(track->db_depth_info),
+                                        track->db_depth_info);
+                               return -EINVAL;
+                       }
+                       if (!IS_ALIGNED(track->db_offset, track->group_size)) {
+                               dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->db_offset);
+                               return -EINVAL;
+                       }
                        ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
                        nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
                        tmp = ntiles * bpe * 64 * nviews;
@@ -585,7 +636,7 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
        header = radeon_get_ib_value(p, h_idx);
        crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
        reg = CP_PACKET0_GET_REG(header);
-       mutex_lock(&p->rdev->ddev->mode_config.mutex);
+
        obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
        if (!obj) {
                DRM_ERROR("cannot find crtc %d\n", crtc_id);
@@ -620,7 +671,6 @@ static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
                ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2;
        }
 out:
-       mutex_unlock(&p->rdev->ddev->mode_config.mutex);
        return r;
 }
 
@@ -725,7 +775,25 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
                track->db_depth_control = radeon_get_ib_value(p, idx);
                break;
        case R_028010_DB_DEPTH_INFO:
-               track->db_depth_info = radeon_get_ib_value(p, idx);
+               if (r600_cs_packet_next_is_pkt3_nop(p)) {
+                       r = r600_cs_packet_next_reloc(p, &reloc);
+                       if (r) {
+                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
+                                        "0x%04X\n", reg);
+                               return -EINVAL;
+                       }
+                       track->db_depth_info = radeon_get_ib_value(p, idx);
+                       ib[idx] &= C_028010_ARRAY_MODE;
+                       track->db_depth_info &= C_028010_ARRAY_MODE;
+                       if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+                               ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
+                               track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
+                       } else {
+                               ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
+                               track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
+                       }
+               } else
+                       track->db_depth_info = radeon_get_ib_value(p, idx);
                break;
        case R_028004_DB_DEPTH_VIEW:
                track->db_depth_view = radeon_get_ib_value(p, idx);
@@ -758,8 +826,25 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
        case R_0280B4_CB_COLOR5_INFO:
        case R_0280B8_CB_COLOR6_INFO:
        case R_0280BC_CB_COLOR7_INFO:
-               tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
-               track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+               if (r600_cs_packet_next_is_pkt3_nop(p)) {
+                       r = r600_cs_packet_next_reloc(p, &reloc);
+                       if (r) {
+                               dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
+                               return -EINVAL;
+                       }
+                       tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
+                       track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+                       if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+                               ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
+                               track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
+                       } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+                               ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
+                               track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
+                       }
+               } else {
+                       tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
+                       track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+               }
                break;
        case R_028060_CB_COLOR0_SIZE:
        case R_028064_CB_COLOR1_SIZE:
@@ -947,8 +1032,9 @@ static inline unsigned minify(unsigned size, unsigned levels)
 }
 
 static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels,
-                               unsigned w0, unsigned h0, unsigned d0, unsigned bpe,
-                               unsigned *l0_size, unsigned *mipmap_size)
+                             unsigned w0, unsigned h0, unsigned d0, unsigned bpe,
+                             unsigned pitch_align,
+                             unsigned *l0_size, unsigned *mipmap_size)
 {
        unsigned offset, i, level, face;
        unsigned width, height, depth, rowstride, size;
@@ -961,13 +1047,13 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels
                height = minify(h0, i);
                depth = minify(d0, i);
                for(face = 0; face < nfaces; face++) {
-                       rowstride = ((width * bpe) + 255) & ~255;
+                       rowstride = ALIGN((width * bpe), pitch_align);
                        size = height * rowstride * depth;
                        offset += size;
                        offset = (offset + 0x1f) & ~0x1f;
                }
        }
-       *l0_size = (((w0 * bpe) + 255) & ~255) * h0 * d0;
+       *l0_size = ALIGN((w0 * bpe), pitch_align) * h0 * d0;
        *mipmap_size = offset;
        if (!blevel)
                *mipmap_size -= *l0_size;
@@ -986,16 +1072,23 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels
  * the texture and mipmap bo object are big enough to cover this resource.
  */
 static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
-                                               struct radeon_bo *texture,
-                                               struct radeon_bo *mipmap)
+                                             struct radeon_bo *texture,
+                                             struct radeon_bo *mipmap,
+                                             u32 tiling_flags)
 {
+       struct r600_cs_track *track = p->track;
        u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0;
-       u32 word0, word1, l0_size, mipmap_size;
+       u32 word0, word1, l0_size, mipmap_size, pitch, pitch_align;
 
        /* on legacy kernel we don't perform advanced check */
        if (p->rdev == NULL)
                return 0;
+
        word0 = radeon_get_ib_value(p, idx + 0);
+       if (tiling_flags & RADEON_TILING_MACRO)
+               word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+       else if (tiling_flags & RADEON_TILING_MICRO)
+               word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
        word1 = radeon_get_ib_value(p, idx + 1);
        w0 = G_038000_TEX_WIDTH(word0) + 1;
        h0 = G_038004_TEX_HEIGHT(word1) + 1;
@@ -1022,11 +1115,55 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 i
                         __func__, __LINE__, G_038004_DATA_FORMAT(word1));
                return -EINVAL;
        }
+
+       pitch = G_038000_PITCH(word0) + 1;
+       switch (G_038000_TILE_MODE(word0)) {
+       case V_038000_ARRAY_LINEAR_GENERAL:
+               pitch_align = 1;
+               /* XXX check height align */
+               break;
+       case V_038000_ARRAY_LINEAR_ALIGNED:
+               pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
+               if (!IS_ALIGNED(pitch, pitch_align)) {
+                       dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+                                __func__, __LINE__, pitch);
+                       return -EINVAL;
+               }
+               /* XXX check height align */
+               break;
+       case V_038000_ARRAY_1D_TILED_THIN1:
+               pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8;
+               if (!IS_ALIGNED(pitch, pitch_align)) {
+                       dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+                                __func__, __LINE__, pitch);
+                       return -EINVAL;
+               }
+               /* XXX check height align */
+               break;
+       case V_038000_ARRAY_2D_TILED_THIN1:
+               pitch_align = max((u32)track->nbanks,
+                                 (u32)(((track->group_size / 8) / bpe) * track->nbanks));
+               if (!IS_ALIGNED(pitch, pitch_align)) {
+                       dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+                               __func__, __LINE__, pitch);
+                       return -EINVAL;
+               }
+               /* XXX check height align */
+               break;
+       default:
+               dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+                        G_038000_TILE_MODE(word0), word0);
+               return -EINVAL;
+       }
+       /* XXX check offset align */
+
        word0 = radeon_get_ib_value(p, idx + 4);
        word1 = radeon_get_ib_value(p, idx + 5);
        blevel = G_038010_BASE_LEVEL(word0);
        nlevels = G_038014_LAST_LEVEL(word1);
-       r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe, &l0_size, &mipmap_size);
+       r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe,
+                         (pitch_align * bpe),
+                         &l0_size, &mipmap_size);
        /* using get ib will give us the offset into the texture bo */
        word0 = radeon_get_ib_value(p, idx + 2);
        if ((l0_size + word0) > radeon_bo_size(texture)) {
@@ -1240,6 +1377,10 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
                                        return -EINVAL;
                                }
                                ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+                               if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+                                       ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+                               else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+                                       ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
                                texture = reloc->robj;
                                /* tex mip base */
                                r = r600_cs_packet_next_reloc(p, &reloc);
@@ -1250,7 +1391,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
                                ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
                                mipmap = reloc->robj;
                                r = r600_check_texture_resource(p,  idx+(i*7)+1,
-                                               texture, mipmap);
+                                                               texture, mipmap, reloc->lobj.tiling_flags);
                                if (r)
                                        return r;
                                break;