* Alex Deucher
* Jerome Glisse
*/
+#include <linux/kernel.h>
#include "drmP.h"
#include "radeon.h"
#include "r600d.h"
u32 nbanks;
u32 npipes;
/* value we track */
+ u32 sq_config;
u32 nsamples;
u32 cb_color_base_last[8];
struct radeon_bo *cb_color_bo[8];
{
int i;
+ /* assume DX9 mode */
+ track->sq_config = DX9_CONSTS;
for (i = 0; i < 8; i++) {
track->cb_color_base_last[i] = 0;
track->cb_color_size[i] = 0;
static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
{
struct r600_cs_track *track = p->track;
- u32 bpe = 0, pitch, slice_tile_max, size, tmp, height;
+ u32 bpe = 0, pitch, slice_tile_max, size, tmp, height, pitch_align;
volatile u32 *ib = p->ib->ptr;
if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
i, track->cb_color_info[i]);
return -EINVAL;
}
- pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) << 3;
+ /* pitch is the number of 8x8 tiles per row */
+ pitch = G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1;
slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
- if (!pitch) {
- dev_warn(p->dev, "%s:%d cb pitch (%d) for %d invalid (0x%08X)\n",
- __func__, __LINE__, pitch, i, track->cb_color_size[i]);
- return -EINVAL;
- }
- height = size / (pitch * bpe);
+ height = size / (pitch * 8 * bpe);
if (height > 8192)
height = 8192;
+ if (height > 7)
+ height &= ~0x7;
switch (G_0280A0_ARRAY_MODE(track->cb_color_info[i])) {
case V_0280A0_ARRAY_LINEAR_GENERAL:
+ /* technically height & 0x7 */
+ break;
case V_0280A0_ARRAY_LINEAR_ALIGNED:
- if (pitch & 0x3f) {
- dev_warn(p->dev, "%s:%d cb pitch (%d x %d = %d) invalid\n",
- __func__, __LINE__, pitch, bpe, pitch * bpe);
+ pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
return -EINVAL;
}
- if ((pitch * bpe) & (track->group_size - 1)) {
- dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
- __func__, __LINE__, pitch);
+ if (!IS_ALIGNED(height, 8)) {
+ dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+ __func__, __LINE__, height);
return -EINVAL;
}
break;
case V_0280A0_ARRAY_1D_TILED_THIN1:
- if ((pitch * 8 * bpe * track->nsamples) & (track->group_size - 1)) {
+ pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe * track->nsamples))) / 8;
+ if (!IS_ALIGNED(pitch, pitch_align)) {
dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
- __func__, __LINE__, pitch);
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(height, 8)) {
+ dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+ __func__, __LINE__, height);
return -EINVAL;
}
- height &= ~0x7;
- if (!height)
- height = 8;
break;
case V_0280A0_ARRAY_2D_TILED_THIN1:
- if (pitch & ((8 * track->nbanks) - 1)) {
+ pitch_align = max((u32)track->nbanks,
+ (u32)(((track->group_size / 8) / (bpe * track->nsamples)) * track->nbanks));
+ if (!IS_ALIGNED(pitch, pitch_align)) {
dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
__func__, __LINE__, pitch);
return -EINVAL;
}
- tmp = pitch * 8 * bpe * track->nsamples;
- tmp = tmp / track->nbanks;
- if (tmp & (track->group_size - 1)) {
- dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
- __func__, __LINE__, pitch);
+ if (!IS_ALIGNED((height / 8), track->nbanks)) {
+ dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
+ __func__, __LINE__, height);
return -EINVAL;
}
- height &= ~((16 * track->npipes) - 1);
- if (!height)
- height = 16 * track->npipes;
break;
default:
dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
return -EINVAL;
}
/* check offset */
- tmp = height * pitch;
+ tmp = height * pitch * 8 * bpe;
if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
- dev_warn(p->dev, "%s offset[%d] %d to big\n", __func__, i, track->cb_color_bo_offset[i]);
+ dev_warn(p->dev, "%s offset[%d] %d too big\n", __func__, i, track->cb_color_bo_offset[i]);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(track->cb_color_bo_offset[i], track->group_size)) {
+ dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->cb_color_bo_offset[i]);
return -EINVAL;
}
/* limit max tile */
- tmp = (height * pitch) >> 6;
+ tmp = (height * pitch * 8) >> 6;
if (tmp < slice_tile_max)
slice_tile_max = tmp;
- tmp = S_028060_PITCH_TILE_MAX((pitch >> 3) - 1) |
+ tmp = S_028060_PITCH_TILE_MAX(pitch - 1) |
S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
ib[track->cb_color_size_idx[i]] = tmp;
return 0;
/* Check depth buffer */
if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
G_028800_Z_ENABLE(track->db_depth_control)) {
- u32 nviews, bpe, ntiles;
+ u32 nviews, bpe, ntiles, pitch, pitch_align, height, size;
if (track->db_bo == NULL) {
dev_warn(p->dev, "z/stencil with no depth buffer\n");
return -EINVAL;
}
ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
} else {
+ size = radeon_bo_size(track->db_bo);
+ pitch = G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1;
+ height = size / (pitch * 8 * bpe);
+ height &= ~0x7;
+ if (!height)
+ height = 8;
+
+ switch (G_028010_ARRAY_MODE(track->db_depth_info)) {
+ case V_028010_ARRAY_1D_TILED_THIN1:
+ pitch_align = (max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8);
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(height, 8)) {
+ dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
+ __func__, __LINE__, height);
+ return -EINVAL;
+ }
+ break;
+ case V_028010_ARRAY_2D_TILED_THIN1:
+ pitch_align = max((u32)track->nbanks,
+ (u32)(((track->group_size / 8) / bpe) * track->nbanks));
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ if ((height / 8) & (track->nbanks - 1)) {
+ dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
+ __func__, __LINE__, height);
+ return -EINVAL;
+ }
+ break;
+ default:
+ dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+ G_028010_ARRAY_MODE(track->db_depth_info),
+ track->db_depth_info);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(track->db_offset, track->group_size)) {
+ dev_warn(p->dev, "%s offset[%d] %d not aligned\n", __func__, i, track->db_offset);
+ return -EINVAL;
+ }
ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
tmp = ntiles * bpe * 64 * nviews;
header = radeon_get_ib_value(p, h_idx);
crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
reg = CP_PACKET0_GET_REG(header);
- mutex_lock(&p->rdev->ddev->mode_config.mutex);
+
obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
if (!obj) {
DRM_ERROR("cannot find crtc %d\n", crtc_id);
ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2;
}
out:
- mutex_unlock(&p->rdev->ddev->mode_config.mutex);
return r;
}
tmp =radeon_get_ib_value(p, idx);
ib[idx] = 0;
break;
+ case SQ_CONFIG:
+ track->sq_config = radeon_get_ib_value(p, idx);
+ break;
case R_028800_DB_DEPTH_CONTROL:
track->db_depth_control = radeon_get_ib_value(p, idx);
break;
case R_028010_DB_DEPTH_INFO:
- track->db_depth_info = radeon_get_ib_value(p, idx);
+ if (r600_cs_packet_next_is_pkt3_nop(p)) {
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ dev_warn(p->dev, "bad SET_CONTEXT_REG "
+ "0x%04X\n", reg);
+ return -EINVAL;
+ }
+ track->db_depth_info = radeon_get_ib_value(p, idx);
+ ib[idx] &= C_028010_ARRAY_MODE;
+ track->db_depth_info &= C_028010_ARRAY_MODE;
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+ ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
+ track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
+ } else {
+ ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
+ track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
+ }
+ } else
+ track->db_depth_info = radeon_get_ib_value(p, idx);
break;
case R_028004_DB_DEPTH_VIEW:
track->db_depth_view = radeon_get_ib_value(p, idx);
case R_0280B4_CB_COLOR5_INFO:
case R_0280B8_CB_COLOR6_INFO:
case R_0280BC_CB_COLOR7_INFO:
- tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
- track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+ if (r600_cs_packet_next_is_pkt3_nop(p)) {
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
+ return -EINVAL;
+ }
+ tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
+ track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+ ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
+ track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
+ } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+ ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
+ track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
+ }
+ } else {
+ tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
+ track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+ }
break;
case R_028060_CB_COLOR0_SIZE:
case R_028064_CB_COLOR1_SIZE:
case SQ_PGM_START_VS:
case SQ_PGM_START_GS:
case SQ_PGM_START_PS:
+ case SQ_ALU_CONST_CACHE_GS_0:
+ case SQ_ALU_CONST_CACHE_GS_1:
+ case SQ_ALU_CONST_CACHE_GS_2:
+ case SQ_ALU_CONST_CACHE_GS_3:
+ case SQ_ALU_CONST_CACHE_GS_4:
+ case SQ_ALU_CONST_CACHE_GS_5:
+ case SQ_ALU_CONST_CACHE_GS_6:
+ case SQ_ALU_CONST_CACHE_GS_7:
+ case SQ_ALU_CONST_CACHE_GS_8:
+ case SQ_ALU_CONST_CACHE_GS_9:
+ case SQ_ALU_CONST_CACHE_GS_10:
+ case SQ_ALU_CONST_CACHE_GS_11:
+ case SQ_ALU_CONST_CACHE_GS_12:
+ case SQ_ALU_CONST_CACHE_GS_13:
+ case SQ_ALU_CONST_CACHE_GS_14:
+ case SQ_ALU_CONST_CACHE_GS_15:
+ case SQ_ALU_CONST_CACHE_PS_0:
+ case SQ_ALU_CONST_CACHE_PS_1:
+ case SQ_ALU_CONST_CACHE_PS_2:
+ case SQ_ALU_CONST_CACHE_PS_3:
+ case SQ_ALU_CONST_CACHE_PS_4:
+ case SQ_ALU_CONST_CACHE_PS_5:
+ case SQ_ALU_CONST_CACHE_PS_6:
+ case SQ_ALU_CONST_CACHE_PS_7:
+ case SQ_ALU_CONST_CACHE_PS_8:
+ case SQ_ALU_CONST_CACHE_PS_9:
+ case SQ_ALU_CONST_CACHE_PS_10:
+ case SQ_ALU_CONST_CACHE_PS_11:
+ case SQ_ALU_CONST_CACHE_PS_12:
+ case SQ_ALU_CONST_CACHE_PS_13:
+ case SQ_ALU_CONST_CACHE_PS_14:
+ case SQ_ALU_CONST_CACHE_PS_15:
+ case SQ_ALU_CONST_CACHE_VS_0:
+ case SQ_ALU_CONST_CACHE_VS_1:
+ case SQ_ALU_CONST_CACHE_VS_2:
+ case SQ_ALU_CONST_CACHE_VS_3:
+ case SQ_ALU_CONST_CACHE_VS_4:
+ case SQ_ALU_CONST_CACHE_VS_5:
+ case SQ_ALU_CONST_CACHE_VS_6:
+ case SQ_ALU_CONST_CACHE_VS_7:
+ case SQ_ALU_CONST_CACHE_VS_8:
+ case SQ_ALU_CONST_CACHE_VS_9:
+ case SQ_ALU_CONST_CACHE_VS_10:
+ case SQ_ALU_CONST_CACHE_VS_11:
+ case SQ_ALU_CONST_CACHE_VS_12:
+ case SQ_ALU_CONST_CACHE_VS_13:
+ case SQ_ALU_CONST_CACHE_VS_14:
+ case SQ_ALU_CONST_CACHE_VS_15:
r = r600_cs_packet_next_reloc(p, &reloc);
if (r) {
dev_warn(p->dev, "bad SET_CONTEXT_REG "
}
static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels,
- unsigned w0, unsigned h0, unsigned d0, unsigned bpe,
- unsigned *l0_size, unsigned *mipmap_size)
+ unsigned w0, unsigned h0, unsigned d0, unsigned bpe,
+ unsigned pitch_align,
+ unsigned *l0_size, unsigned *mipmap_size)
{
unsigned offset, i, level, face;
unsigned width, height, depth, rowstride, size;
height = minify(h0, i);
depth = minify(d0, i);
for(face = 0; face < nfaces; face++) {
- rowstride = ((width * bpe) + 255) & ~255;
+ rowstride = ALIGN((width * bpe), pitch_align);
size = height * rowstride * depth;
offset += size;
offset = (offset + 0x1f) & ~0x1f;
}
}
- *l0_size = (((w0 * bpe) + 255) & ~255) * h0 * d0;
+ *l0_size = ALIGN((w0 * bpe), pitch_align) * h0 * d0;
*mipmap_size = offset;
if (!blevel)
*mipmap_size -= *l0_size;
* the texture and mipmap bo object are big enough to cover this resource.
*/
static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
- struct radeon_bo *texture,
- struct radeon_bo *mipmap)
+ struct radeon_bo *texture,
+ struct radeon_bo *mipmap,
+ u32 tiling_flags)
{
+ struct r600_cs_track *track = p->track;
u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0;
- u32 word0, word1, l0_size, mipmap_size;
+ u32 word0, word1, l0_size, mipmap_size, pitch, pitch_align;
/* on legacy kernel we don't perform advanced check */
if (p->rdev == NULL)
return 0;
+
word0 = radeon_get_ib_value(p, idx + 0);
+ if (tiling_flags & RADEON_TILING_MACRO)
+ word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+ else if (tiling_flags & RADEON_TILING_MICRO)
+ word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
word1 = radeon_get_ib_value(p, idx + 1);
w0 = G_038000_TEX_WIDTH(word0) + 1;
h0 = G_038004_TEX_HEIGHT(word1) + 1;
__func__, __LINE__, G_038004_DATA_FORMAT(word1));
return -EINVAL;
}
+
+ pitch = G_038000_PITCH(word0) + 1;
+ switch (G_038000_TILE_MODE(word0)) {
+ case V_038000_ARRAY_LINEAR_GENERAL:
+ pitch_align = 1;
+ /* XXX check height align */
+ break;
+ case V_038000_ARRAY_LINEAR_ALIGNED:
+ pitch_align = max((u32)64, (u32)(track->group_size / bpe)) / 8;
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ /* XXX check height align */
+ break;
+ case V_038000_ARRAY_1D_TILED_THIN1:
+ pitch_align = max((u32)8, (u32)(track->group_size / (8 * bpe))) / 8;
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ /* XXX check height align */
+ break;
+ case V_038000_ARRAY_2D_TILED_THIN1:
+ pitch_align = max((u32)track->nbanks,
+ (u32)(((track->group_size / 8) / bpe) * track->nbanks));
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
+ __func__, __LINE__, pitch);
+ return -EINVAL;
+ }
+ /* XXX check height align */
+ break;
+ default:
+ dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+ G_038000_TILE_MODE(word0), word0);
+ return -EINVAL;
+ }
+ /* XXX check offset align */
+
word0 = radeon_get_ib_value(p, idx + 4);
word1 = radeon_get_ib_value(p, idx + 5);
blevel = G_038010_BASE_LEVEL(word0);
nlevels = G_038014_LAST_LEVEL(word1);
- r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe, &l0_size, &mipmap_size);
+ r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe,
+ (pitch_align * bpe),
+ &l0_size, &mipmap_size);
/* using get ib will give us the offset into the texture bo */
word0 = radeon_get_ib_value(p, idx + 2);
if ((l0_size + word0) > radeon_bo_size(texture)) {
return -EINVAL;
}
ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+ ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
+ else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+ ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
texture = reloc->robj;
/* tex mip base */
r = r600_cs_packet_next_reloc(p, &reloc);
ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
mipmap = reloc->robj;
r = r600_check_texture_resource(p, idx+(i*7)+1,
- texture, mipmap);
+ texture, mipmap, reloc->lobj.tiling_flags);
if (r)
return r;
break;
}
break;
case PACKET3_SET_ALU_CONST:
- start_reg = (idx_value << 2) + PACKET3_SET_ALU_CONST_OFFSET;
- end_reg = 4 * pkt->count + start_reg - 4;
- if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
- (start_reg >= PACKET3_SET_ALU_CONST_END) ||
- (end_reg >= PACKET3_SET_ALU_CONST_END)) {
- DRM_ERROR("bad SET_ALU_CONST\n");
- return -EINVAL;
+ if (track->sq_config & DX9_CONSTS) {
+ start_reg = (idx_value << 2) + PACKET3_SET_ALU_CONST_OFFSET;
+ end_reg = 4 * pkt->count + start_reg - 4;
+ if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
+ (start_reg >= PACKET3_SET_ALU_CONST_END) ||
+ (end_reg >= PACKET3_SET_ALU_CONST_END)) {
+ DRM_ERROR("bad SET_ALU_CONST\n");
+ return -EINVAL;
+ }
}
break;
case PACKET3_SET_BOOL_CONST: