Merge branch 'drm-radeon-testing' of /ssd/git/drm-radeon-next into drm-next-stage
[sfrench/cifs-2.6.git] / drivers / gpu / drm / radeon / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_buffer.h"
33 #include "drm_sarea.h"
34 #include "radeon_drm.h"
35 #include "radeon_drv.h"
36
37 /* ================================================================
38  * Helper functions for client state checking and fixup
39  */
40
41 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42                                                     dev_priv,
43                                                     struct drm_file * file_priv,
44                                                     u32 *offset)
45 {
46         u64 off = *offset;
47         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
48         struct drm_radeon_driver_file_fields *radeon_priv;
49
50         /* Hrm ... the story of the offset ... So this function converts
51          * the various ideas of what userland clients might have for an
52          * offset in the card address space into an offset into the card
53          * address space :) So with a sane client, it should just keep
54          * the value intact and just do some boundary checking. However,
55          * not all clients are sane. Some older clients pass us 0 based
56          * offsets relative to the start of the framebuffer and some may
57          * assume the AGP aperture it appended to the framebuffer, so we
58          * try to detect those cases and fix them up.
59          *
60          * Note: It might be a good idea here to make sure the offset lands
61          * in some "allowed" area to protect things like the PCIE GART...
62          */
63
64         /* First, the best case, the offset already lands in either the
65          * framebuffer or the GART mapped space
66          */
67         if (radeon_check_offset(dev_priv, off))
68                 return 0;
69
70         /* Ok, that didn't happen... now check if we have a zero based
71          * offset that fits in the framebuffer + gart space, apply the
72          * magic offset we get from SETPARAM or calculated from fb_location
73          */
74         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
75                 radeon_priv = file_priv->driver_priv;
76                 off += radeon_priv->radeon_fb_delta;
77         }
78
79         /* Finally, assume we aimed at a GART offset if beyond the fb */
80         if (off > fb_end)
81                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
82
83         /* Now recheck and fail if out of bounds */
84         if (radeon_check_offset(dev_priv, off)) {
85                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
86                 *offset = off;
87                 return 0;
88         }
89         return -EINVAL;
90 }
91
92 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
93                                                      dev_priv,
94                                                      struct drm_file *file_priv,
95                                                      int id, struct drm_buffer *buf)
96 {
97         u32 *data;
98         switch (id) {
99
100         case RADEON_EMIT_PP_MISC:
101                 data = drm_buffer_pointer_to_dword(buf,
102                         (RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
103
104                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
105                         DRM_ERROR("Invalid depth buffer offset\n");
106                         return -EINVAL;
107                 }
108                 dev_priv->have_z_offset = 1;
109                 break;
110
111         case RADEON_EMIT_PP_CNTL:
112                 data = drm_buffer_pointer_to_dword(buf,
113                         (RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
114
115                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
116                         DRM_ERROR("Invalid colour buffer offset\n");
117                         return -EINVAL;
118                 }
119                 break;
120
121         case R200_EMIT_PP_TXOFFSET_0:
122         case R200_EMIT_PP_TXOFFSET_1:
123         case R200_EMIT_PP_TXOFFSET_2:
124         case R200_EMIT_PP_TXOFFSET_3:
125         case R200_EMIT_PP_TXOFFSET_4:
126         case R200_EMIT_PP_TXOFFSET_5:
127                 data = drm_buffer_pointer_to_dword(buf, 0);
128                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
129                         DRM_ERROR("Invalid R200 texture offset\n");
130                         return -EINVAL;
131                 }
132                 break;
133
134         case RADEON_EMIT_PP_TXFILTER_0:
135         case RADEON_EMIT_PP_TXFILTER_1:
136         case RADEON_EMIT_PP_TXFILTER_2:
137                 data = drm_buffer_pointer_to_dword(buf,
138                         (RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
139                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
140                         DRM_ERROR("Invalid R100 texture offset\n");
141                         return -EINVAL;
142                 }
143                 break;
144
145         case R200_EMIT_PP_CUBIC_OFFSETS_0:
146         case R200_EMIT_PP_CUBIC_OFFSETS_1:
147         case R200_EMIT_PP_CUBIC_OFFSETS_2:
148         case R200_EMIT_PP_CUBIC_OFFSETS_3:
149         case R200_EMIT_PP_CUBIC_OFFSETS_4:
150         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
151                         int i;
152                         for (i = 0; i < 5; i++) {
153                                 data = drm_buffer_pointer_to_dword(buf, i);
154                                 if (radeon_check_and_fixup_offset(dev_priv,
155                                                                   file_priv,
156                                                                   data)) {
157                                         DRM_ERROR
158                                             ("Invalid R200 cubic texture offset\n");
159                                         return -EINVAL;
160                                 }
161                         }
162                         break;
163                 }
164
165         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
166         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
167         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
168                         int i;
169                         for (i = 0; i < 5; i++) {
170                                 data = drm_buffer_pointer_to_dword(buf, i);
171                                 if (radeon_check_and_fixup_offset(dev_priv,
172                                                                   file_priv,
173                                                                   data)) {
174                                         DRM_ERROR
175                                             ("Invalid R100 cubic texture offset\n");
176                                         return -EINVAL;
177                                 }
178                         }
179                 }
180                 break;
181
182         case R200_EMIT_VAP_CTL:{
183                         RING_LOCALS;
184                         BEGIN_RING(2);
185                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186                         ADVANCE_RING();
187                 }
188                 break;
189
190         case RADEON_EMIT_RB3D_COLORPITCH:
191         case RADEON_EMIT_RE_LINE_PATTERN:
192         case RADEON_EMIT_SE_LINE_WIDTH:
193         case RADEON_EMIT_PP_LUM_MATRIX:
194         case RADEON_EMIT_PP_ROT_MATRIX_0:
195         case RADEON_EMIT_RB3D_STENCILREFMASK:
196         case RADEON_EMIT_SE_VPORT_XSCALE:
197         case RADEON_EMIT_SE_CNTL:
198         case RADEON_EMIT_SE_CNTL_STATUS:
199         case RADEON_EMIT_RE_MISC:
200         case RADEON_EMIT_PP_BORDER_COLOR_0:
201         case RADEON_EMIT_PP_BORDER_COLOR_1:
202         case RADEON_EMIT_PP_BORDER_COLOR_2:
203         case RADEON_EMIT_SE_ZBIAS_FACTOR:
204         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
205         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
206         case R200_EMIT_PP_TXCBLEND_0:
207         case R200_EMIT_PP_TXCBLEND_1:
208         case R200_EMIT_PP_TXCBLEND_2:
209         case R200_EMIT_PP_TXCBLEND_3:
210         case R200_EMIT_PP_TXCBLEND_4:
211         case R200_EMIT_PP_TXCBLEND_5:
212         case R200_EMIT_PP_TXCBLEND_6:
213         case R200_EMIT_PP_TXCBLEND_7:
214         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
215         case R200_EMIT_TFACTOR_0:
216         case R200_EMIT_VTX_FMT_0:
217         case R200_EMIT_MATRIX_SELECT_0:
218         case R200_EMIT_TEX_PROC_CTL_2:
219         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
220         case R200_EMIT_PP_TXFILTER_0:
221         case R200_EMIT_PP_TXFILTER_1:
222         case R200_EMIT_PP_TXFILTER_2:
223         case R200_EMIT_PP_TXFILTER_3:
224         case R200_EMIT_PP_TXFILTER_4:
225         case R200_EMIT_PP_TXFILTER_5:
226         case R200_EMIT_VTE_CNTL:
227         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
228         case R200_EMIT_PP_TAM_DEBUG3:
229         case R200_EMIT_PP_CNTL_X:
230         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
231         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
232         case R200_EMIT_RE_SCISSOR_TL_0:
233         case R200_EMIT_RE_SCISSOR_TL_1:
234         case R200_EMIT_RE_SCISSOR_TL_2:
235         case R200_EMIT_SE_VAP_CNTL_STATUS:
236         case R200_EMIT_SE_VTX_STATE_CNTL:
237         case R200_EMIT_RE_POINTSIZE:
238         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
239         case R200_EMIT_PP_CUBIC_FACES_0:
240         case R200_EMIT_PP_CUBIC_FACES_1:
241         case R200_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_CUBIC_FACES_3:
243         case R200_EMIT_PP_CUBIC_FACES_4:
244         case R200_EMIT_PP_CUBIC_FACES_5:
245         case RADEON_EMIT_PP_TEX_SIZE_0:
246         case RADEON_EMIT_PP_TEX_SIZE_1:
247         case RADEON_EMIT_PP_TEX_SIZE_2:
248         case R200_EMIT_RB3D_BLENDCOLOR:
249         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
250         case RADEON_EMIT_PP_CUBIC_FACES_0:
251         case RADEON_EMIT_PP_CUBIC_FACES_1:
252         case RADEON_EMIT_PP_CUBIC_FACES_2:
253         case R200_EMIT_PP_TRI_PERF_CNTL:
254         case R200_EMIT_PP_AFS_0:
255         case R200_EMIT_PP_AFS_1:
256         case R200_EMIT_ATF_TFACTOR:
257         case R200_EMIT_PP_TXCTLALL_0:
258         case R200_EMIT_PP_TXCTLALL_1:
259         case R200_EMIT_PP_TXCTLALL_2:
260         case R200_EMIT_PP_TXCTLALL_3:
261         case R200_EMIT_PP_TXCTLALL_4:
262         case R200_EMIT_PP_TXCTLALL_5:
263         case R200_EMIT_VAP_PVS_CNTL:
264                 /* These packets don't contain memory offsets */
265                 break;
266
267         default:
268                 DRM_ERROR("Unknown state packet ID %d\n", id);
269                 return -EINVAL;
270         }
271
272         return 0;
273 }
274
275 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
276                                                      dev_priv,
277                                                      struct drm_file *file_priv,
278                                                      drm_radeon_kcmd_buffer_t *
279                                                      cmdbuf,
280                                                      unsigned int *cmdsz)
281 {
282         u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
283         u32 offset, narrays;
284         int count, i, k;
285
286         count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
287         *cmdsz = 2 + count;
288
289         if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
290                 DRM_ERROR("Not a type 3 packet\n");
291                 return -EINVAL;
292         }
293
294         if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
295                 DRM_ERROR("Packet size larger than size of data provided\n");
296                 return -EINVAL;
297         }
298
299         switch (*cmd & 0xff00) {
300         /* XXX Are there old drivers needing other packets? */
301
302         case RADEON_3D_DRAW_IMMD:
303         case RADEON_3D_DRAW_VBUF:
304         case RADEON_3D_DRAW_INDX:
305         case RADEON_WAIT_FOR_IDLE:
306         case RADEON_CP_NOP:
307         case RADEON_3D_CLEAR_ZMASK:
308 /*      case RADEON_CP_NEXT_CHAR:
309         case RADEON_CP_PLY_NEXTSCAN:
310         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
311                 /* these packets are safe */
312                 break;
313
314         case RADEON_CP_3D_DRAW_IMMD_2:
315         case RADEON_CP_3D_DRAW_VBUF_2:
316         case RADEON_CP_3D_DRAW_INDX_2:
317         case RADEON_3D_CLEAR_HIZ:
318                 /* safe but r200 only */
319                 if (dev_priv->microcode_version != UCODE_R200) {
320                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
321                         return -EINVAL;
322                 }
323                 break;
324
325         case RADEON_3D_LOAD_VBPNTR:
326
327                 if (count > 18) { /* 12 arrays max */
328                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
329                                   count);
330                         return -EINVAL;
331                 }
332
333                 /* carefully check packet contents */
334                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
335
336                 narrays = *cmd & ~0xc000;
337                 k = 0;
338                 i = 2;
339                 while ((k < narrays) && (i < (count + 2))) {
340                         i++;            /* skip attribute field */
341                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
342                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
343                                                           cmd)) {
344                                 DRM_ERROR
345                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346                                      k, i);
347                                 return -EINVAL;
348                         }
349                         k++;
350                         i++;
351                         if (k == narrays)
352                                 break;
353                         /* have one more to process, they come in pairs */
354                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
355
356                         if (radeon_check_and_fixup_offset(dev_priv,
357                                                           file_priv, cmd))
358                         {
359                                 DRM_ERROR
360                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
361                                      k, i);
362                                 return -EINVAL;
363                         }
364                         k++;
365                         i++;
366                 }
367                 /* do the counts match what we expect ? */
368                 if ((k != narrays) || (i != (count + 2))) {
369                         DRM_ERROR
370                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
371                               k, i, narrays, count + 1);
372                         return -EINVAL;
373                 }
374                 break;
375
376         case RADEON_3D_RNDR_GEN_INDX_PRIM:
377                 if (dev_priv->microcode_version != UCODE_R100) {
378                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
379                         return -EINVAL;
380                 }
381
382                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
383                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
384                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
385                                 return -EINVAL;
386                 }
387                 break;
388
389         case RADEON_CP_INDX_BUFFER:
390                 if (dev_priv->microcode_version != UCODE_R200) {
391                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
392                         return -EINVAL;
393                 }
394
395                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
396                 if ((*cmd & 0x8000ffff) != 0x80000810) {
397                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
398                         return -EINVAL;
399                 }
400                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
401                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
402                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
403                         return -EINVAL;
404                 }
405                 break;
406
407         case RADEON_CNTL_HOSTDATA_BLT:
408         case RADEON_CNTL_PAINT_MULTI:
409         case RADEON_CNTL_BITBLT_MULTI:
410                 /* MSB of opcode: next DWORD GUI_CNTL */
411                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
412                 if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
413                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
414                         u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
415                         offset = *cmd2 << 10;
416                         if (radeon_check_and_fixup_offset
417                             (dev_priv, file_priv, &offset)) {
418                                 DRM_ERROR("Invalid first packet offset\n");
419                                 return -EINVAL;
420                         }
421                         *cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
422                 }
423
424                 if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
425                     (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
426                         u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
427                         offset = *cmd << 10;
428                         if (radeon_check_and_fixup_offset
429                             (dev_priv, file_priv, &offset)) {
430                                 DRM_ERROR("Invalid second packet offset\n");
431                                 return -EINVAL;
432                         }
433                         *cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
434                 }
435                 break;
436
437         default:
438                 DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
439                 return -EINVAL;
440         }
441
442         return 0;
443 }
444
445 /* ================================================================
446  * CP hardware state programming functions
447  */
448
449 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
450                                              struct drm_clip_rect * box)
451 {
452         RING_LOCALS;
453
454         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
455                   box->x1, box->y1, box->x2, box->y2);
456
457         BEGIN_RING(4);
458         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
459         OUT_RING((box->y1 << 16) | box->x1);
460         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
461         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
462         ADVANCE_RING();
463 }
464
465 /* Emit 1.1 state
466  */
467 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
468                              struct drm_file *file_priv,
469                              drm_radeon_context_regs_t * ctx,
470                              drm_radeon_texture_regs_t * tex,
471                              unsigned int dirty)
472 {
473         RING_LOCALS;
474         DRM_DEBUG("dirty=0x%08x\n", dirty);
475
476         if (dirty & RADEON_UPLOAD_CONTEXT) {
477                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
478                                                   &ctx->rb3d_depthoffset)) {
479                         DRM_ERROR("Invalid depth buffer offset\n");
480                         return -EINVAL;
481                 }
482
483                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
484                                                   &ctx->rb3d_coloroffset)) {
485                         DRM_ERROR("Invalid depth buffer offset\n");
486                         return -EINVAL;
487                 }
488
489                 BEGIN_RING(14);
490                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
491                 OUT_RING(ctx->pp_misc);
492                 OUT_RING(ctx->pp_fog_color);
493                 OUT_RING(ctx->re_solid_color);
494                 OUT_RING(ctx->rb3d_blendcntl);
495                 OUT_RING(ctx->rb3d_depthoffset);
496                 OUT_RING(ctx->rb3d_depthpitch);
497                 OUT_RING(ctx->rb3d_zstencilcntl);
498                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
499                 OUT_RING(ctx->pp_cntl);
500                 OUT_RING(ctx->rb3d_cntl);
501                 OUT_RING(ctx->rb3d_coloroffset);
502                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
503                 OUT_RING(ctx->rb3d_colorpitch);
504                 ADVANCE_RING();
505         }
506
507         if (dirty & RADEON_UPLOAD_VERTFMT) {
508                 BEGIN_RING(2);
509                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
510                 OUT_RING(ctx->se_coord_fmt);
511                 ADVANCE_RING();
512         }
513
514         if (dirty & RADEON_UPLOAD_LINE) {
515                 BEGIN_RING(5);
516                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
517                 OUT_RING(ctx->re_line_pattern);
518                 OUT_RING(ctx->re_line_state);
519                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
520                 OUT_RING(ctx->se_line_width);
521                 ADVANCE_RING();
522         }
523
524         if (dirty & RADEON_UPLOAD_BUMPMAP) {
525                 BEGIN_RING(5);
526                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
527                 OUT_RING(ctx->pp_lum_matrix);
528                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
529                 OUT_RING(ctx->pp_rot_matrix_0);
530                 OUT_RING(ctx->pp_rot_matrix_1);
531                 ADVANCE_RING();
532         }
533
534         if (dirty & RADEON_UPLOAD_MASKS) {
535                 BEGIN_RING(4);
536                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
537                 OUT_RING(ctx->rb3d_stencilrefmask);
538                 OUT_RING(ctx->rb3d_ropcntl);
539                 OUT_RING(ctx->rb3d_planemask);
540                 ADVANCE_RING();
541         }
542
543         if (dirty & RADEON_UPLOAD_VIEWPORT) {
544                 BEGIN_RING(7);
545                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
546                 OUT_RING(ctx->se_vport_xscale);
547                 OUT_RING(ctx->se_vport_xoffset);
548                 OUT_RING(ctx->se_vport_yscale);
549                 OUT_RING(ctx->se_vport_yoffset);
550                 OUT_RING(ctx->se_vport_zscale);
551                 OUT_RING(ctx->se_vport_zoffset);
552                 ADVANCE_RING();
553         }
554
555         if (dirty & RADEON_UPLOAD_SETUP) {
556                 BEGIN_RING(4);
557                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
558                 OUT_RING(ctx->se_cntl);
559                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
560                 OUT_RING(ctx->se_cntl_status);
561                 ADVANCE_RING();
562         }
563
564         if (dirty & RADEON_UPLOAD_MISC) {
565                 BEGIN_RING(2);
566                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
567                 OUT_RING(ctx->re_misc);
568                 ADVANCE_RING();
569         }
570
571         if (dirty & RADEON_UPLOAD_TEX0) {
572                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573                                                   &tex[0].pp_txoffset)) {
574                         DRM_ERROR("Invalid texture offset for unit 0\n");
575                         return -EINVAL;
576                 }
577
578                 BEGIN_RING(9);
579                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
580                 OUT_RING(tex[0].pp_txfilter);
581                 OUT_RING(tex[0].pp_txformat);
582                 OUT_RING(tex[0].pp_txoffset);
583                 OUT_RING(tex[0].pp_txcblend);
584                 OUT_RING(tex[0].pp_txablend);
585                 OUT_RING(tex[0].pp_tfactor);
586                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
587                 OUT_RING(tex[0].pp_border_color);
588                 ADVANCE_RING();
589         }
590
591         if (dirty & RADEON_UPLOAD_TEX1) {
592                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593                                                   &tex[1].pp_txoffset)) {
594                         DRM_ERROR("Invalid texture offset for unit 1\n");
595                         return -EINVAL;
596                 }
597
598                 BEGIN_RING(9);
599                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
600                 OUT_RING(tex[1].pp_txfilter);
601                 OUT_RING(tex[1].pp_txformat);
602                 OUT_RING(tex[1].pp_txoffset);
603                 OUT_RING(tex[1].pp_txcblend);
604                 OUT_RING(tex[1].pp_txablend);
605                 OUT_RING(tex[1].pp_tfactor);
606                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
607                 OUT_RING(tex[1].pp_border_color);
608                 ADVANCE_RING();
609         }
610
611         if (dirty & RADEON_UPLOAD_TEX2) {
612                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
613                                                   &tex[2].pp_txoffset)) {
614                         DRM_ERROR("Invalid texture offset for unit 2\n");
615                         return -EINVAL;
616                 }
617
618                 BEGIN_RING(9);
619                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
620                 OUT_RING(tex[2].pp_txfilter);
621                 OUT_RING(tex[2].pp_txformat);
622                 OUT_RING(tex[2].pp_txoffset);
623                 OUT_RING(tex[2].pp_txcblend);
624                 OUT_RING(tex[2].pp_txablend);
625                 OUT_RING(tex[2].pp_tfactor);
626                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
627                 OUT_RING(tex[2].pp_border_color);
628                 ADVANCE_RING();
629         }
630
631         return 0;
632 }
633
634 /* Emit 1.2 state
635  */
636 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
637                               struct drm_file *file_priv,
638                               drm_radeon_state_t * state)
639 {
640         RING_LOCALS;
641
642         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
643                 BEGIN_RING(3);
644                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
645                 OUT_RING(state->context2.se_zbias_factor);
646                 OUT_RING(state->context2.se_zbias_constant);
647                 ADVANCE_RING();
648         }
649
650         return radeon_emit_state(dev_priv, file_priv, &state->context,
651                                  state->tex, state->dirty);
652 }
653
654 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
655  * 1.3 cmdbuffers allow all previous state to be updated as well as
656  * the tcl scalar and vector areas.
657  */
658 static struct {
659         int start;
660         int len;
661         const char *name;
662 } packet[RADEON_MAX_STATE_PACKETS] = {
663         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
664         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
665         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
666         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
667         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
668         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
669         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
670         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
671         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
672         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
673         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
674         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
675         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
676         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
677         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
678         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
679         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
680         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
681         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
682         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
683         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
684                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
685         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
686         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
687         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
688         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
689         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
690         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
691         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
692         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
693         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
694         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
695         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
696         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
697         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
698         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
699         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
700         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
701         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
702         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
703         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
704         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
705         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
706         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
707         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
708         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
709         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
710         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
711         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
712         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
713         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
714          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
715         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
716         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
717         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
718         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
719         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
720         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
721         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
722         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
723         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
724         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
725         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
726                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
727         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
728         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
729         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
730         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
731         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
732         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
733         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
734         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
735         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
736         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
737         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
738         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
739         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
740         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
741         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
742         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
743         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
744         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
745         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
746         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
747         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
748         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
749         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
750         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
751         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
752         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
753         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
754         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
755         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
756         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
757         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
758         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
759         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
760         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
761 };
762
763 /* ================================================================
764  * Performance monitoring functions
765  */
766
767 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
768                              struct drm_radeon_master_private *master_priv,
769                              int x, int y, int w, int h, int r, int g, int b)
770 {
771         u32 color;
772         RING_LOCALS;
773
774         x += master_priv->sarea_priv->boxes[0].x1;
775         y += master_priv->sarea_priv->boxes[0].y1;
776
777         switch (dev_priv->color_fmt) {
778         case RADEON_COLOR_FORMAT_RGB565:
779                 color = (((r & 0xf8) << 8) |
780                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
781                 break;
782         case RADEON_COLOR_FORMAT_ARGB8888:
783         default:
784                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
785                 break;
786         }
787
788         BEGIN_RING(4);
789         RADEON_WAIT_UNTIL_3D_IDLE();
790         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
791         OUT_RING(0xffffffff);
792         ADVANCE_RING();
793
794         BEGIN_RING(6);
795
796         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
797         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798                  RADEON_GMC_BRUSH_SOLID_COLOR |
799                  (dev_priv->color_fmt << 8) |
800                  RADEON_GMC_SRC_DATATYPE_COLOR |
801                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
802
803         if (master_priv->sarea_priv->pfCurrentPage == 1) {
804                 OUT_RING(dev_priv->front_pitch_offset);
805         } else {
806                 OUT_RING(dev_priv->back_pitch_offset);
807         }
808
809         OUT_RING(color);
810
811         OUT_RING((x << 16) | y);
812         OUT_RING((w << 16) | h);
813
814         ADVANCE_RING();
815 }
816
817 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
818 {
819         /* Collapse various things into a wait flag -- trying to
820          * guess if userspase slept -- better just to have them tell us.
821          */
822         if (dev_priv->stats.last_frame_reads > 1 ||
823             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
824                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
825         }
826
827         if (dev_priv->stats.freelist_loops) {
828                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
829         }
830
831         /* Purple box for page flipping
832          */
833         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
834                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
835
836         /* Red box if we have to wait for idle at any point
837          */
838         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
839                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
840
841         /* Blue box: lost context?
842          */
843
844         /* Yellow box for texture swaps
845          */
846         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
847                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
848
849         /* Green box if hardware never idles (as far as we can tell)
850          */
851         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
852                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
853
854         /* Draw bars indicating number of buffers allocated
855          * (not a great measure, easily confused)
856          */
857         if (dev_priv->stats.requested_bufs) {
858                 if (dev_priv->stats.requested_bufs > 100)
859                         dev_priv->stats.requested_bufs = 100;
860
861                 radeon_clear_box(dev_priv, master_priv, 4, 16,
862                                  dev_priv->stats.requested_bufs, 4,
863                                  196, 128, 128);
864         }
865
866         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
867
868 }
869
870 /* ================================================================
871  * CP command dispatch functions
872  */
873
874 static void radeon_cp_dispatch_clear(struct drm_device * dev,
875                                      struct drm_master *master,
876                                      drm_radeon_clear_t * clear,
877                                      drm_radeon_clear_rect_t * depth_boxes)
878 {
879         drm_radeon_private_t *dev_priv = dev->dev_private;
880         struct drm_radeon_master_private *master_priv = master->driver_priv;
881         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
882         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
883         int nbox = sarea_priv->nbox;
884         struct drm_clip_rect *pbox = sarea_priv->boxes;
885         unsigned int flags = clear->flags;
886         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
887         int i;
888         RING_LOCALS;
889         DRM_DEBUG("flags = 0x%x\n", flags);
890
891         dev_priv->stats.clears++;
892
893         if (sarea_priv->pfCurrentPage == 1) {
894                 unsigned int tmp = flags;
895
896                 flags &= ~(RADEON_FRONT | RADEON_BACK);
897                 if (tmp & RADEON_FRONT)
898                         flags |= RADEON_BACK;
899                 if (tmp & RADEON_BACK)
900                         flags |= RADEON_FRONT;
901         }
902         if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
903                 if (!dev_priv->have_z_offset)
904                         printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
905                 flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
906         }
907
908         if (flags & (RADEON_FRONT | RADEON_BACK)) {
909
910                 BEGIN_RING(4);
911
912                 /* Ensure the 3D stream is idle before doing a
913                  * 2D fill to clear the front or back buffer.
914                  */
915                 RADEON_WAIT_UNTIL_3D_IDLE();
916
917                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
918                 OUT_RING(clear->color_mask);
919
920                 ADVANCE_RING();
921
922                 /* Make sure we restore the 3D state next time.
923                  */
924                 sarea_priv->ctx_owner = 0;
925
926                 for (i = 0; i < nbox; i++) {
927                         int x = pbox[i].x1;
928                         int y = pbox[i].y1;
929                         int w = pbox[i].x2 - x;
930                         int h = pbox[i].y2 - y;
931
932                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
933                                   x, y, w, h, flags);
934
935                         if (flags & RADEON_FRONT) {
936                                 BEGIN_RING(6);
937
938                                 OUT_RING(CP_PACKET3
939                                          (RADEON_CNTL_PAINT_MULTI, 4));
940                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
941                                          RADEON_GMC_BRUSH_SOLID_COLOR |
942                                          (dev_priv->
943                                           color_fmt << 8) |
944                                          RADEON_GMC_SRC_DATATYPE_COLOR |
945                                          RADEON_ROP3_P |
946                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
947
948                                 OUT_RING(dev_priv->front_pitch_offset);
949                                 OUT_RING(clear->clear_color);
950
951                                 OUT_RING((x << 16) | y);
952                                 OUT_RING((w << 16) | h);
953
954                                 ADVANCE_RING();
955                         }
956
957                         if (flags & RADEON_BACK) {
958                                 BEGIN_RING(6);
959
960                                 OUT_RING(CP_PACKET3
961                                          (RADEON_CNTL_PAINT_MULTI, 4));
962                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
963                                          RADEON_GMC_BRUSH_SOLID_COLOR |
964                                          (dev_priv->
965                                           color_fmt << 8) |
966                                          RADEON_GMC_SRC_DATATYPE_COLOR |
967                                          RADEON_ROP3_P |
968                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
969
970                                 OUT_RING(dev_priv->back_pitch_offset);
971                                 OUT_RING(clear->clear_color);
972
973                                 OUT_RING((x << 16) | y);
974                                 OUT_RING((w << 16) | h);
975
976                                 ADVANCE_RING();
977                         }
978                 }
979         }
980
981         /* hyper z clear */
982         /* no docs available, based on reverse engeneering by Stephane Marchesin */
983         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
984             && (flags & RADEON_CLEAR_FASTZ)) {
985
986                 int i;
987                 int depthpixperline =
988                     dev_priv->depth_fmt ==
989                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
990                                                        2) : (dev_priv->
991                                                              depth_pitch / 4);
992
993                 u32 clearmask;
994
995                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
996                     ((clear->depth_mask & 0xff) << 24);
997
998                 /* Make sure we restore the 3D state next time.
999                  * we haven't touched any "normal" state - still need this?
1000                  */
1001                 sarea_priv->ctx_owner = 0;
1002
1003                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1004                     && (flags & RADEON_USE_HIERZ)) {
1005                         /* FIXME : reverse engineer that for Rx00 cards */
1006                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1007                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1008                            value into account? */
1009                         /* pattern seems to work for r100, though get slight
1010                            rendering errors with glxgears. If hierz is not enabled for r100,
1011                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1012                            other ones are ignored, and the same clear mask can be used. That's
1013                            very different behaviour than R200 which needs different clear mask
1014                            and different number of tiles to clear if hierz is enabled or not !?!
1015                          */
1016                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1017                 } else {
1018                         /* clear mask : chooses the clearing pattern.
1019                            rv250: could be used to clear only parts of macrotiles
1020                            (but that would get really complicated...)?
1021                            bit 0 and 1 (either or both of them ?!?!) are used to
1022                            not clear tile (or maybe one of the bits indicates if the tile is
1023                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
1024                            Pattern is as follows:
1025                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1026                            bits -------------------------------------------------
1027                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1028                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1029                            covers 256 pixels ?!?
1030                          */
1031                         clearmask = 0x0;
1032                 }
1033
1034                 BEGIN_RING(8);
1035                 RADEON_WAIT_UNTIL_2D_IDLE();
1036                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1037                              tempRB3D_DEPTHCLEARVALUE);
1038                 /* what offset is this exactly ? */
1039                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1040                 /* need ctlstat, otherwise get some strange black flickering */
1041                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1042                              RADEON_RB3D_ZC_FLUSH_ALL);
1043                 ADVANCE_RING();
1044
1045                 for (i = 0; i < nbox; i++) {
1046                         int tileoffset, nrtilesx, nrtilesy, j;
1047                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1048                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1049                             && !(dev_priv->microcode_version == UCODE_R200)) {
1050                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1051                                    maybe r200 actually doesn't need to put the low-res z value into
1052                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1053                                    Works for R100, both with hierz and without.
1054                                    R100 seems to operate on 2x1 8x8 tiles, but...
1055                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1056                                    problematic with resolutions which are not 64 pix aligned? */
1057                                 tileoffset =
1058                                     ((pbox[i].y1 >> 3) * depthpixperline +
1059                                      pbox[i].x1) >> 6;
1060                                 nrtilesx =
1061                                     ((pbox[i].x2 & ~63) -
1062                                      (pbox[i].x1 & ~63)) >> 4;
1063                                 nrtilesy =
1064                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1065                                 for (j = 0; j <= nrtilesy; j++) {
1066                                         BEGIN_RING(4);
1067                                         OUT_RING(CP_PACKET3
1068                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1069                                         /* first tile */
1070                                         OUT_RING(tileoffset * 8);
1071                                         /* the number of tiles to clear */
1072                                         OUT_RING(nrtilesx + 4);
1073                                         /* clear mask : chooses the clearing pattern. */
1074                                         OUT_RING(clearmask);
1075                                         ADVANCE_RING();
1076                                         tileoffset += depthpixperline >> 6;
1077                                 }
1078                         } else if (dev_priv->microcode_version == UCODE_R200) {
1079                                 /* works for rv250. */
1080                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1081                                 tileoffset =
1082                                     ((pbox[i].y1 >> 3) * depthpixperline +
1083                                      pbox[i].x1) >> 5;
1084                                 nrtilesx =
1085                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1086                                 nrtilesy =
1087                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1088                                 for (j = 0; j <= nrtilesy; j++) {
1089                                         BEGIN_RING(4);
1090                                         OUT_RING(CP_PACKET3
1091                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1092                                         /* first tile */
1093                                         /* judging by the first tile offset needed, could possibly
1094                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1095                                            macro tiles, though would still need clear mask for
1096                                            right/bottom if truely 4x4 granularity is desired ? */
1097                                         OUT_RING(tileoffset * 16);
1098                                         /* the number of tiles to clear */
1099                                         OUT_RING(nrtilesx + 1);
1100                                         /* clear mask : chooses the clearing pattern. */
1101                                         OUT_RING(clearmask);
1102                                         ADVANCE_RING();
1103                                         tileoffset += depthpixperline >> 5;
1104                                 }
1105                         } else {        /* rv 100 */
1106                                 /* rv100 might not need 64 pix alignment, who knows */
1107                                 /* offsets are, hmm, weird */
1108                                 tileoffset =
1109                                     ((pbox[i].y1 >> 4) * depthpixperline +
1110                                      pbox[i].x1) >> 6;
1111                                 nrtilesx =
1112                                     ((pbox[i].x2 & ~63) -
1113                                      (pbox[i].x1 & ~63)) >> 4;
1114                                 nrtilesy =
1115                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1116                                 for (j = 0; j <= nrtilesy; j++) {
1117                                         BEGIN_RING(4);
1118                                         OUT_RING(CP_PACKET3
1119                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1120                                         OUT_RING(tileoffset * 128);
1121                                         /* the number of tiles to clear */
1122                                         OUT_RING(nrtilesx + 4);
1123                                         /* clear mask : chooses the clearing pattern. */
1124                                         OUT_RING(clearmask);
1125                                         ADVANCE_RING();
1126                                         tileoffset += depthpixperline >> 6;
1127                                 }
1128                         }
1129                 }
1130
1131                 /* TODO don't always clear all hi-level z tiles */
1132                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1133                     && (dev_priv->microcode_version == UCODE_R200)
1134                     && (flags & RADEON_USE_HIERZ))
1135                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1136                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1137                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1138                            value into account? */
1139                 {
1140                         BEGIN_RING(4);
1141                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1142                         OUT_RING(0x0);  /* First tile */
1143                         OUT_RING(0x3cc0);
1144                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1145                         ADVANCE_RING();
1146                 }
1147         }
1148
1149         /* We have to clear the depth and/or stencil buffers by
1150          * rendering a quad into just those buffers.  Thus, we have to
1151          * make sure the 3D engine is configured correctly.
1152          */
1153         else if ((dev_priv->microcode_version == UCODE_R200) &&
1154                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1155
1156                 int tempPP_CNTL;
1157                 int tempRE_CNTL;
1158                 int tempRB3D_CNTL;
1159                 int tempRB3D_ZSTENCILCNTL;
1160                 int tempRB3D_STENCILREFMASK;
1161                 int tempRB3D_PLANEMASK;
1162                 int tempSE_CNTL;
1163                 int tempSE_VTE_CNTL;
1164                 int tempSE_VTX_FMT_0;
1165                 int tempSE_VTX_FMT_1;
1166                 int tempSE_VAP_CNTL;
1167                 int tempRE_AUX_SCISSOR_CNTL;
1168
1169                 tempPP_CNTL = 0;
1170                 tempRE_CNTL = 0;
1171
1172                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1173
1174                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1175                 tempRB3D_STENCILREFMASK = 0x0;
1176
1177                 tempSE_CNTL = depth_clear->se_cntl;
1178
1179                 /* Disable TCL */
1180
1181                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1182                                           (0x9 <<
1183                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1184
1185                 tempRB3D_PLANEMASK = 0x0;
1186
1187                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1188
1189                 tempSE_VTE_CNTL =
1190                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1191
1192                 /* Vertex format (X, Y, Z, W) */
1193                 tempSE_VTX_FMT_0 =
1194                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1195                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1196                 tempSE_VTX_FMT_1 = 0x0;
1197
1198                 /*
1199                  * Depth buffer specific enables
1200                  */
1201                 if (flags & RADEON_DEPTH) {
1202                         /* Enable depth buffer */
1203                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1204                 } else {
1205                         /* Disable depth buffer */
1206                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1207                 }
1208
1209                 /*
1210                  * Stencil buffer specific enables
1211                  */
1212                 if (flags & RADEON_STENCIL) {
1213                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1214                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1215                 } else {
1216                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1217                         tempRB3D_STENCILREFMASK = 0x00000000;
1218                 }
1219
1220                 if (flags & RADEON_USE_COMP_ZBUF) {
1221                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1222                             RADEON_Z_DECOMPRESSION_ENABLE;
1223                 }
1224                 if (flags & RADEON_USE_HIERZ) {
1225                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1226                 }
1227
1228                 BEGIN_RING(26);
1229                 RADEON_WAIT_UNTIL_2D_IDLE();
1230
1231                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1232                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1233                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1234                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1235                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1236                              tempRB3D_STENCILREFMASK);
1237                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1238                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1239                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1240                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1241                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1242                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1243                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1244                 ADVANCE_RING();
1245
1246                 /* Make sure we restore the 3D state next time.
1247                  */
1248                 sarea_priv->ctx_owner = 0;
1249
1250                 for (i = 0; i < nbox; i++) {
1251
1252                         /* Funny that this should be required --
1253                          *  sets top-left?
1254                          */
1255                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1256
1257                         BEGIN_RING(14);
1258                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1259                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1260                                   RADEON_PRIM_WALK_RING |
1261                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1262                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1263                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1264                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1265                         OUT_RING(0x3f800000);
1266                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1267                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1268                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1269                         OUT_RING(0x3f800000);
1270                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1271                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1272                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1273                         OUT_RING(0x3f800000);
1274                         ADVANCE_RING();
1275                 }
1276         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1277
1278                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1279
1280                 rb3d_cntl = depth_clear->rb3d_cntl;
1281
1282                 if (flags & RADEON_DEPTH) {
1283                         rb3d_cntl |= RADEON_Z_ENABLE;
1284                 } else {
1285                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1286                 }
1287
1288                 if (flags & RADEON_STENCIL) {
1289                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1290                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1291                 } else {
1292                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1293                         rb3d_stencilrefmask = 0x00000000;
1294                 }
1295
1296                 if (flags & RADEON_USE_COMP_ZBUF) {
1297                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1298                             RADEON_Z_DECOMPRESSION_ENABLE;
1299                 }
1300                 if (flags & RADEON_USE_HIERZ) {
1301                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1302                 }
1303
1304                 BEGIN_RING(13);
1305                 RADEON_WAIT_UNTIL_2D_IDLE();
1306
1307                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1308                 OUT_RING(0x00000000);
1309                 OUT_RING(rb3d_cntl);
1310
1311                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1312                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1313                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1314                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1315                 ADVANCE_RING();
1316
1317                 /* Make sure we restore the 3D state next time.
1318                  */
1319                 sarea_priv->ctx_owner = 0;
1320
1321                 for (i = 0; i < nbox; i++) {
1322
1323                         /* Funny that this should be required --
1324                          *  sets top-left?
1325                          */
1326                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1327
1328                         BEGIN_RING(15);
1329
1330                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1331                         OUT_RING(RADEON_VTX_Z_PRESENT |
1332                                  RADEON_VTX_PKCOLOR_PRESENT);
1333                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1334                                   RADEON_PRIM_WALK_RING |
1335                                   RADEON_MAOS_ENABLE |
1336                                   RADEON_VTX_FMT_RADEON_MODE |
1337                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1338
1339                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1340                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1341                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1342                         OUT_RING(0x0);
1343
1344                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1345                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1346                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1347                         OUT_RING(0x0);
1348
1349                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1350                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1351                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1352                         OUT_RING(0x0);
1353
1354                         ADVANCE_RING();
1355                 }
1356         }
1357
1358         /* Increment the clear counter.  The client-side 3D driver must
1359          * wait on this value before performing the clear ioctl.  We
1360          * need this because the card's so damned fast...
1361          */
1362         sarea_priv->last_clear++;
1363
1364         BEGIN_RING(4);
1365
1366         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1367         RADEON_WAIT_UNTIL_IDLE();
1368
1369         ADVANCE_RING();
1370 }
1371
1372 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1373 {
1374         drm_radeon_private_t *dev_priv = dev->dev_private;
1375         struct drm_radeon_master_private *master_priv = master->driver_priv;
1376         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1377         int nbox = sarea_priv->nbox;
1378         struct drm_clip_rect *pbox = sarea_priv->boxes;
1379         int i;
1380         RING_LOCALS;
1381         DRM_DEBUG("\n");
1382
1383         /* Do some trivial performance monitoring...
1384          */
1385         if (dev_priv->do_boxes)
1386                 radeon_cp_performance_boxes(dev_priv, master_priv);
1387
1388         /* Wait for the 3D stream to idle before dispatching the bitblt.
1389          * This will prevent data corruption between the two streams.
1390          */
1391         BEGIN_RING(2);
1392
1393         RADEON_WAIT_UNTIL_3D_IDLE();
1394
1395         ADVANCE_RING();
1396
1397         for (i = 0; i < nbox; i++) {
1398                 int x = pbox[i].x1;
1399                 int y = pbox[i].y1;
1400                 int w = pbox[i].x2 - x;
1401                 int h = pbox[i].y2 - y;
1402
1403                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1404
1405                 BEGIN_RING(9);
1406
1407                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1408                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1409                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1410                          RADEON_GMC_BRUSH_NONE |
1411                          (dev_priv->color_fmt << 8) |
1412                          RADEON_GMC_SRC_DATATYPE_COLOR |
1413                          RADEON_ROP3_S |
1414                          RADEON_DP_SRC_SOURCE_MEMORY |
1415                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1416
1417                 /* Make this work even if front & back are flipped:
1418                  */
1419                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1420                 if (sarea_priv->pfCurrentPage == 0) {
1421                         OUT_RING(dev_priv->back_pitch_offset);
1422                         OUT_RING(dev_priv->front_pitch_offset);
1423                 } else {
1424                         OUT_RING(dev_priv->front_pitch_offset);
1425                         OUT_RING(dev_priv->back_pitch_offset);
1426                 }
1427
1428                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1429                 OUT_RING((x << 16) | y);
1430                 OUT_RING((x << 16) | y);
1431                 OUT_RING((w << 16) | h);
1432
1433                 ADVANCE_RING();
1434         }
1435
1436         /* Increment the frame counter.  The client-side 3D driver must
1437          * throttle the framerate by waiting for this value before
1438          * performing the swapbuffer ioctl.
1439          */
1440         sarea_priv->last_frame++;
1441
1442         BEGIN_RING(4);
1443
1444         RADEON_FRAME_AGE(sarea_priv->last_frame);
1445         RADEON_WAIT_UNTIL_2D_IDLE();
1446
1447         ADVANCE_RING();
1448 }
1449
1450 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1451 {
1452         drm_radeon_private_t *dev_priv = dev->dev_private;
1453         struct drm_radeon_master_private *master_priv = master->driver_priv;
1454         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1455         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1456             ? dev_priv->front_offset : dev_priv->back_offset;
1457         RING_LOCALS;
1458         DRM_DEBUG("pfCurrentPage=%d\n",
1459                   master_priv->sarea_priv->pfCurrentPage);
1460
1461         /* Do some trivial performance monitoring...
1462          */
1463         if (dev_priv->do_boxes) {
1464                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1465                 radeon_cp_performance_boxes(dev_priv, master_priv);
1466         }
1467
1468         /* Update the frame offsets for both CRTCs
1469          */
1470         BEGIN_RING(6);
1471
1472         RADEON_WAIT_UNTIL_3D_IDLE();
1473         OUT_RING_REG(RADEON_CRTC_OFFSET,
1474                      ((sarea->frame.y * dev_priv->front_pitch +
1475                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1476                      + offset);
1477         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1478                      + offset);
1479
1480         ADVANCE_RING();
1481
1482         /* Increment the frame counter.  The client-side 3D driver must
1483          * throttle the framerate by waiting for this value before
1484          * performing the swapbuffer ioctl.
1485          */
1486         master_priv->sarea_priv->last_frame++;
1487         master_priv->sarea_priv->pfCurrentPage =
1488                 1 - master_priv->sarea_priv->pfCurrentPage;
1489
1490         BEGIN_RING(2);
1491
1492         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1493
1494         ADVANCE_RING();
1495 }
1496
1497 static int bad_prim_vertex_nr(int primitive, int nr)
1498 {
1499         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1500         case RADEON_PRIM_TYPE_NONE:
1501         case RADEON_PRIM_TYPE_POINT:
1502                 return nr < 1;
1503         case RADEON_PRIM_TYPE_LINE:
1504                 return (nr & 1) || nr == 0;
1505         case RADEON_PRIM_TYPE_LINE_STRIP:
1506                 return nr < 2;
1507         case RADEON_PRIM_TYPE_TRI_LIST:
1508         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1509         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1510         case RADEON_PRIM_TYPE_RECT_LIST:
1511                 return nr % 3 || nr == 0;
1512         case RADEON_PRIM_TYPE_TRI_FAN:
1513         case RADEON_PRIM_TYPE_TRI_STRIP:
1514                 return nr < 3;
1515         default:
1516                 return 1;
1517         }
1518 }
1519
1520 typedef struct {
1521         unsigned int start;
1522         unsigned int finish;
1523         unsigned int prim;
1524         unsigned int numverts;
1525         unsigned int offset;
1526         unsigned int vc_format;
1527 } drm_radeon_tcl_prim_t;
1528
1529 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1530                                       struct drm_file *file_priv,
1531                                       struct drm_buf * buf,
1532                                       drm_radeon_tcl_prim_t * prim)
1533 {
1534         drm_radeon_private_t *dev_priv = dev->dev_private;
1535         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1536         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1537         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1538         int numverts = (int)prim->numverts;
1539         int nbox = sarea_priv->nbox;
1540         int i = 0;
1541         RING_LOCALS;
1542
1543         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1544                   prim->prim,
1545                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1546
1547         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1548                 DRM_ERROR("bad prim %x numverts %d\n",
1549                           prim->prim, prim->numverts);
1550                 return;
1551         }
1552
1553         do {
1554                 /* Emit the next cliprect */
1555                 if (i < nbox) {
1556                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1557                 }
1558
1559                 /* Emit the vertex buffer rendering commands */
1560                 BEGIN_RING(5);
1561
1562                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1563                 OUT_RING(offset);
1564                 OUT_RING(numverts);
1565                 OUT_RING(prim->vc_format);
1566                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1567                          RADEON_COLOR_ORDER_RGBA |
1568                          RADEON_VTX_FMT_RADEON_MODE |
1569                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1570
1571                 ADVANCE_RING();
1572
1573                 i++;
1574         } while (i < nbox);
1575 }
1576
1577 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1578 {
1579         drm_radeon_private_t *dev_priv = dev->dev_private;
1580         struct drm_radeon_master_private *master_priv = master->driver_priv;
1581         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1582         RING_LOCALS;
1583
1584         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1585
1586         /* Emit the vertex buffer age */
1587         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1588                 BEGIN_RING(3);
1589                 R600_DISPATCH_AGE(buf_priv->age);
1590                 ADVANCE_RING();
1591         } else {
1592                 BEGIN_RING(2);
1593                 RADEON_DISPATCH_AGE(buf_priv->age);
1594                 ADVANCE_RING();
1595         }
1596
1597         buf->pending = 1;
1598         buf->used = 0;
1599 }
1600
1601 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1602                                         struct drm_buf * buf, int start, int end)
1603 {
1604         drm_radeon_private_t *dev_priv = dev->dev_private;
1605         RING_LOCALS;
1606         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1607
1608         if (start != end) {
1609                 int offset = (dev_priv->gart_buffers_offset
1610                               + buf->offset + start);
1611                 int dwords = (end - start + 3) / sizeof(u32);
1612
1613                 /* Indirect buffer data must be an even number of
1614                  * dwords, so if we've been given an odd number we must
1615                  * pad the data with a Type-2 CP packet.
1616                  */
1617                 if (dwords & 1) {
1618                         u32 *data = (u32 *)
1619                             ((char *)dev->agp_buffer_map->handle
1620                              + buf->offset + start);
1621                         data[dwords++] = RADEON_CP_PACKET2;
1622                 }
1623
1624                 /* Fire off the indirect buffer */
1625                 BEGIN_RING(3);
1626
1627                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1628                 OUT_RING(offset);
1629                 OUT_RING(dwords);
1630
1631                 ADVANCE_RING();
1632         }
1633 }
1634
1635 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1636                                        struct drm_master *master,
1637                                        struct drm_buf * elt_buf,
1638                                        drm_radeon_tcl_prim_t * prim)
1639 {
1640         drm_radeon_private_t *dev_priv = dev->dev_private;
1641         struct drm_radeon_master_private *master_priv = master->driver_priv;
1642         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1643         int offset = dev_priv->gart_buffers_offset + prim->offset;
1644         u32 *data;
1645         int dwords;
1646         int i = 0;
1647         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1648         int count = (prim->finish - start) / sizeof(u16);
1649         int nbox = sarea_priv->nbox;
1650
1651         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1652                   prim->prim,
1653                   prim->vc_format,
1654                   prim->start, prim->finish, prim->offset, prim->numverts);
1655
1656         if (bad_prim_vertex_nr(prim->prim, count)) {
1657                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1658                 return;
1659         }
1660
1661         if (start >= prim->finish || (prim->start & 0x7)) {
1662                 DRM_ERROR("buffer prim %d\n", prim->prim);
1663                 return;
1664         }
1665
1666         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1667
1668         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1669                         elt_buf->offset + prim->start);
1670
1671         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1672         data[1] = offset;
1673         data[2] = prim->numverts;
1674         data[3] = prim->vc_format;
1675         data[4] = (prim->prim |
1676                    RADEON_PRIM_WALK_IND |
1677                    RADEON_COLOR_ORDER_RGBA |
1678                    RADEON_VTX_FMT_RADEON_MODE |
1679                    (count << RADEON_NUM_VERTICES_SHIFT));
1680
1681         do {
1682                 if (i < nbox)
1683                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1684
1685                 radeon_cp_dispatch_indirect(dev, elt_buf,
1686                                             prim->start, prim->finish);
1687
1688                 i++;
1689         } while (i < nbox);
1690
1691 }
1692
1693 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1694
1695 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1696                                       struct drm_file *file_priv,
1697                                       drm_radeon_texture_t * tex,
1698                                       drm_radeon_tex_image_t * image)
1699 {
1700         drm_radeon_private_t *dev_priv = dev->dev_private;
1701         struct drm_buf *buf;
1702         u32 format;
1703         u32 *buffer;
1704         const u8 __user *data;
1705         int size, dwords, tex_width, blit_width, spitch;
1706         u32 height;
1707         int i;
1708         u32 texpitch, microtile;
1709         u32 offset, byte_offset;
1710         RING_LOCALS;
1711
1712         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1713                 DRM_ERROR("Invalid destination offset\n");
1714                 return -EINVAL;
1715         }
1716
1717         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1718
1719         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1720          * up with the texture data from the host data blit, otherwise
1721          * part of the texture image may be corrupted.
1722          */
1723         BEGIN_RING(4);
1724         RADEON_FLUSH_CACHE();
1725         RADEON_WAIT_UNTIL_IDLE();
1726         ADVANCE_RING();
1727
1728         /* The compiler won't optimize away a division by a variable,
1729          * even if the only legal values are powers of two.  Thus, we'll
1730          * use a shift instead.
1731          */
1732         switch (tex->format) {
1733         case RADEON_TXFORMAT_ARGB8888:
1734         case RADEON_TXFORMAT_RGBA8888:
1735                 format = RADEON_COLOR_FORMAT_ARGB8888;
1736                 tex_width = tex->width * 4;
1737                 blit_width = image->width * 4;
1738                 break;
1739         case RADEON_TXFORMAT_AI88:
1740         case RADEON_TXFORMAT_ARGB1555:
1741         case RADEON_TXFORMAT_RGB565:
1742         case RADEON_TXFORMAT_ARGB4444:
1743         case RADEON_TXFORMAT_VYUY422:
1744         case RADEON_TXFORMAT_YVYU422:
1745                 format = RADEON_COLOR_FORMAT_RGB565;
1746                 tex_width = tex->width * 2;
1747                 blit_width = image->width * 2;
1748                 break;
1749         case RADEON_TXFORMAT_I8:
1750         case RADEON_TXFORMAT_RGB332:
1751                 format = RADEON_COLOR_FORMAT_CI8;
1752                 tex_width = tex->width * 1;
1753                 blit_width = image->width * 1;
1754                 break;
1755         default:
1756                 DRM_ERROR("invalid texture format %d\n", tex->format);
1757                 return -EINVAL;
1758         }
1759         spitch = blit_width >> 6;
1760         if (spitch == 0 && image->height > 1)
1761                 return -EINVAL;
1762
1763         texpitch = tex->pitch;
1764         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1765                 microtile = 1;
1766                 if (tex_width < 64) {
1767                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1768                         /* we got tiled coordinates, untile them */
1769                         image->x *= 2;
1770                 }
1771         } else
1772                 microtile = 0;
1773
1774         /* this might fail for zero-sized uploads - are those illegal? */
1775         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1776                                 blit_width - 1)) {
1777                 DRM_ERROR("Invalid final destination offset\n");
1778                 return -EINVAL;
1779         }
1780
1781         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1782
1783         do {
1784                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1785                           tex->offset >> 10, tex->pitch, tex->format,
1786                           image->x, image->y, image->width, image->height);
1787
1788                 /* Make a copy of some parameters in case we have to
1789                  * update them for a multi-pass texture blit.
1790                  */
1791                 height = image->height;
1792                 data = (const u8 __user *)image->data;
1793
1794                 size = height * blit_width;
1795
1796                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1797                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1798                         size = height * blit_width;
1799                 } else if (size < 4 && size > 0) {
1800                         size = 4;
1801                 } else if (size == 0) {
1802                         return 0;
1803                 }
1804
1805                 buf = radeon_freelist_get(dev);
1806                 if (0 && !buf) {
1807                         radeon_do_cp_idle(dev_priv);
1808                         buf = radeon_freelist_get(dev);
1809                 }
1810                 if (!buf) {
1811                         DRM_DEBUG("EAGAIN\n");
1812                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1813                                 return -EFAULT;
1814                         return -EAGAIN;
1815                 }
1816
1817                 /* Dispatch the indirect buffer.
1818                  */
1819                 buffer =
1820                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1821                 dwords = size / 4;
1822
1823 #define RADEON_COPY_MT(_buf, _data, _width) \
1824         do { \
1825                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1826                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1827                         return -EFAULT; \
1828                 } \
1829         } while(0)
1830
1831                 if (microtile) {
1832                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1833                            however, we cannot use blitter directly for texture width < 64 bytes,
1834                            since minimum tex pitch is 64 bytes and we need this to match
1835                            the texture width, otherwise the blitter will tile it wrong.
1836                            Thus, tiling manually in this case. Additionally, need to special
1837                            case tex height = 1, since our actual image will have height 2
1838                            and we need to ensure we don't read beyond the texture size
1839                            from user space. */
1840                         if (tex->height == 1) {
1841                                 if (tex_width >= 64 || tex_width <= 16) {
1842                                         RADEON_COPY_MT(buffer, data,
1843                                                 (int)(tex_width * sizeof(u32)));
1844                                 } else if (tex_width == 32) {
1845                                         RADEON_COPY_MT(buffer, data, 16);
1846                                         RADEON_COPY_MT(buffer + 8,
1847                                                        data + 16, 16);
1848                                 }
1849                         } else if (tex_width >= 64 || tex_width == 16) {
1850                                 RADEON_COPY_MT(buffer, data,
1851                                                (int)(dwords * sizeof(u32)));
1852                         } else if (tex_width < 16) {
1853                                 for (i = 0; i < tex->height; i++) {
1854                                         RADEON_COPY_MT(buffer, data, tex_width);
1855                                         buffer += 4;
1856                                         data += tex_width;
1857                                 }
1858                         } else if (tex_width == 32) {
1859                                 /* TODO: make sure this works when not fitting in one buffer
1860                                    (i.e. 32bytes x 2048...) */
1861                                 for (i = 0; i < tex->height; i += 2) {
1862                                         RADEON_COPY_MT(buffer, data, 16);
1863                                         data += 16;
1864                                         RADEON_COPY_MT(buffer + 8, data, 16);
1865                                         data += 16;
1866                                         RADEON_COPY_MT(buffer + 4, data, 16);
1867                                         data += 16;
1868                                         RADEON_COPY_MT(buffer + 12, data, 16);
1869                                         data += 16;
1870                                         buffer += 16;
1871                                 }
1872                         }
1873                 } else {
1874                         if (tex_width >= 32) {
1875                                 /* Texture image width is larger than the minimum, so we
1876                                  * can upload it directly.
1877                                  */
1878                                 RADEON_COPY_MT(buffer, data,
1879                                                (int)(dwords * sizeof(u32)));
1880                         } else {
1881                                 /* Texture image width is less than the minimum, so we
1882                                  * need to pad out each image scanline to the minimum
1883                                  * width.
1884                                  */
1885                                 for (i = 0; i < tex->height; i++) {
1886                                         RADEON_COPY_MT(buffer, data, tex_width);
1887                                         buffer += 8;
1888                                         data += tex_width;
1889                                 }
1890                         }
1891                 }
1892
1893 #undef RADEON_COPY_MT
1894                 byte_offset = (image->y & ~2047) * blit_width;
1895                 buf->file_priv = file_priv;
1896                 buf->used = size;
1897                 offset = dev_priv->gart_buffers_offset + buf->offset;
1898                 BEGIN_RING(9);
1899                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1900                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1901                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1902                          RADEON_GMC_BRUSH_NONE |
1903                          (format << 8) |
1904                          RADEON_GMC_SRC_DATATYPE_COLOR |
1905                          RADEON_ROP3_S |
1906                          RADEON_DP_SRC_SOURCE_MEMORY |
1907                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1908                 OUT_RING((spitch << 22) | (offset >> 10));
1909                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1910                 OUT_RING(0);
1911                 OUT_RING((image->x << 16) | (image->y % 2048));
1912                 OUT_RING((image->width << 16) | height);
1913                 RADEON_WAIT_UNTIL_2D_IDLE();
1914                 ADVANCE_RING();
1915                 COMMIT_RING();
1916
1917                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
1918
1919                 /* Update the input parameters for next time */
1920                 image->y += height;
1921                 image->height -= height;
1922                 image->data = (const u8 __user *)image->data + size;
1923         } while (image->height > 0);
1924
1925         /* Flush the pixel cache after the blit completes.  This ensures
1926          * the texture data is written out to memory before rendering
1927          * continues.
1928          */
1929         BEGIN_RING(4);
1930         RADEON_FLUSH_CACHE();
1931         RADEON_WAIT_UNTIL_2D_IDLE();
1932         ADVANCE_RING();
1933         COMMIT_RING();
1934
1935         return 0;
1936 }
1937
1938 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1939 {
1940         drm_radeon_private_t *dev_priv = dev->dev_private;
1941         int i;
1942         RING_LOCALS;
1943         DRM_DEBUG("\n");
1944
1945         BEGIN_RING(35);
1946
1947         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1948         OUT_RING(0x00000000);
1949
1950         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1951         for (i = 0; i < 32; i++) {
1952                 OUT_RING(stipple[i]);
1953         }
1954
1955         ADVANCE_RING();
1956 }
1957
1958 static void radeon_apply_surface_regs(int surf_index,
1959                                       drm_radeon_private_t *dev_priv)
1960 {
1961         if (!dev_priv->mmio)
1962                 return;
1963
1964         radeon_do_cp_idle(dev_priv);
1965
1966         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1967                      dev_priv->surfaces[surf_index].flags);
1968         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1969                      dev_priv->surfaces[surf_index].lower);
1970         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1971                      dev_priv->surfaces[surf_index].upper);
1972 }
1973
1974 /* Allocates a virtual surface
1975  * doesn't always allocate a real surface, will stretch an existing
1976  * surface when possible.
1977  *
1978  * Note that refcount can be at most 2, since during a free refcount=3
1979  * might mean we have to allocate a new surface which might not always
1980  * be available.
1981  * For example : we allocate three contiguous surfaces ABC. If B is
1982  * freed, we suddenly need two surfaces to store A and C, which might
1983  * not always be available.
1984  */
1985 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1986                          drm_radeon_private_t *dev_priv,
1987                          struct drm_file *file_priv)
1988 {
1989         struct radeon_virt_surface *s;
1990         int i;
1991         int virt_surface_index;
1992         uint32_t new_upper, new_lower;
1993
1994         new_lower = new->address;
1995         new_upper = new_lower + new->size - 1;
1996
1997         /* sanity check */
1998         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1999             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2000              RADEON_SURF_ADDRESS_FIXED_MASK)
2001             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2002                 return -1;
2003
2004         /* make sure there is no overlap with existing surfaces */
2005         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2006                 if ((dev_priv->surfaces[i].refcount != 0) &&
2007                     (((new_lower >= dev_priv->surfaces[i].lower) &&
2008                       (new_lower < dev_priv->surfaces[i].upper)) ||
2009                      ((new_lower < dev_priv->surfaces[i].lower) &&
2010                       (new_upper > dev_priv->surfaces[i].lower)))) {
2011                         return -1;
2012                 }
2013         }
2014
2015         /* find a virtual surface */
2016         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2017                 if (dev_priv->virt_surfaces[i].file_priv == NULL)
2018                         break;
2019         if (i == 2 * RADEON_MAX_SURFACES) {
2020                 return -1;
2021         }
2022         virt_surface_index = i;
2023
2024         /* try to reuse an existing surface */
2025         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2026                 /* extend before */
2027                 if ((dev_priv->surfaces[i].refcount == 1) &&
2028                     (new->flags == dev_priv->surfaces[i].flags) &&
2029                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2030                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2031                         s->surface_index = i;
2032                         s->lower = new_lower;
2033                         s->upper = new_upper;
2034                         s->flags = new->flags;
2035                         s->file_priv = file_priv;
2036                         dev_priv->surfaces[i].refcount++;
2037                         dev_priv->surfaces[i].lower = s->lower;
2038                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2039                         return virt_surface_index;
2040                 }
2041
2042                 /* extend after */
2043                 if ((dev_priv->surfaces[i].refcount == 1) &&
2044                     (new->flags == dev_priv->surfaces[i].flags) &&
2045                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2046                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2047                         s->surface_index = i;
2048                         s->lower = new_lower;
2049                         s->upper = new_upper;
2050                         s->flags = new->flags;
2051                         s->file_priv = file_priv;
2052                         dev_priv->surfaces[i].refcount++;
2053                         dev_priv->surfaces[i].upper = s->upper;
2054                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2055                         return virt_surface_index;
2056                 }
2057         }
2058
2059         /* okay, we need a new one */
2060         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2061                 if (dev_priv->surfaces[i].refcount == 0) {
2062                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2063                         s->surface_index = i;
2064                         s->lower = new_lower;
2065                         s->upper = new_upper;
2066                         s->flags = new->flags;
2067                         s->file_priv = file_priv;
2068                         dev_priv->surfaces[i].refcount = 1;
2069                         dev_priv->surfaces[i].lower = s->lower;
2070                         dev_priv->surfaces[i].upper = s->upper;
2071                         dev_priv->surfaces[i].flags = s->flags;
2072                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2073                         return virt_surface_index;
2074                 }
2075         }
2076
2077         /* we didn't find anything */
2078         return -1;
2079 }
2080
2081 static int free_surface(struct drm_file *file_priv,
2082                         drm_radeon_private_t * dev_priv,
2083                         int lower)
2084 {
2085         struct radeon_virt_surface *s;
2086         int i;
2087         /* find the virtual surface */
2088         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2089                 s = &(dev_priv->virt_surfaces[i]);
2090                 if (s->file_priv) {
2091                         if ((lower == s->lower) && (file_priv == s->file_priv))
2092                         {
2093                                 if (dev_priv->surfaces[s->surface_index].
2094                                     lower == s->lower)
2095                                         dev_priv->surfaces[s->surface_index].
2096                                             lower = s->upper;
2097
2098                                 if (dev_priv->surfaces[s->surface_index].
2099                                     upper == s->upper)
2100                                         dev_priv->surfaces[s->surface_index].
2101                                             upper = s->lower;
2102
2103                                 dev_priv->surfaces[s->surface_index].refcount--;
2104                                 if (dev_priv->surfaces[s->surface_index].
2105                                     refcount == 0)
2106                                         dev_priv->surfaces[s->surface_index].
2107                                             flags = 0;
2108                                 s->file_priv = NULL;
2109                                 radeon_apply_surface_regs(s->surface_index,
2110                                                           dev_priv);
2111                                 return 0;
2112                         }
2113                 }
2114         }
2115         return 1;
2116 }
2117
2118 static void radeon_surfaces_release(struct drm_file *file_priv,
2119                                     drm_radeon_private_t * dev_priv)
2120 {
2121         int i;
2122         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2123                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2124                         free_surface(file_priv, dev_priv,
2125                                      dev_priv->virt_surfaces[i].lower);
2126         }
2127 }
2128
2129 /* ================================================================
2130  * IOCTL functions
2131  */
2132 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2133 {
2134         drm_radeon_private_t *dev_priv = dev->dev_private;
2135         drm_radeon_surface_alloc_t *alloc = data;
2136
2137         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2138                 return -EINVAL;
2139         else
2140                 return 0;
2141 }
2142
2143 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2144 {
2145         drm_radeon_private_t *dev_priv = dev->dev_private;
2146         drm_radeon_surface_free_t *memfree = data;
2147
2148         if (free_surface(file_priv, dev_priv, memfree->address))
2149                 return -EINVAL;
2150         else
2151                 return 0;
2152 }
2153
2154 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2155 {
2156         drm_radeon_private_t *dev_priv = dev->dev_private;
2157         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2158         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2159         drm_radeon_clear_t *clear = data;
2160         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2161         DRM_DEBUG("\n");
2162
2163         LOCK_TEST_WITH_RETURN(dev, file_priv);
2164
2165         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2166
2167         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2168                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2169
2170         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2171                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2172                 return -EFAULT;
2173
2174         radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2175
2176         COMMIT_RING();
2177         return 0;
2178 }
2179
2180 /* Not sure why this isn't set all the time:
2181  */
2182 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2183 {
2184         drm_radeon_private_t *dev_priv = dev->dev_private;
2185         struct drm_radeon_master_private *master_priv = master->driver_priv;
2186         RING_LOCALS;
2187
2188         DRM_DEBUG("\n");
2189
2190         BEGIN_RING(6);
2191         RADEON_WAIT_UNTIL_3D_IDLE();
2192         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2193         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2194                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2195         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2196         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2197                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2198         ADVANCE_RING();
2199
2200         dev_priv->page_flipping = 1;
2201
2202         if (master_priv->sarea_priv->pfCurrentPage != 1)
2203                 master_priv->sarea_priv->pfCurrentPage = 0;
2204
2205         return 0;
2206 }
2207
2208 /* Swapping and flipping are different operations, need different ioctls.
2209  * They can & should be intermixed to support multiple 3d windows.
2210  */
2211 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2212 {
2213         drm_radeon_private_t *dev_priv = dev->dev_private;
2214         DRM_DEBUG("\n");
2215
2216         LOCK_TEST_WITH_RETURN(dev, file_priv);
2217
2218         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2219
2220         if (!dev_priv->page_flipping)
2221                 radeon_do_init_pageflip(dev, file_priv->master);
2222
2223         radeon_cp_dispatch_flip(dev, file_priv->master);
2224
2225         COMMIT_RING();
2226         return 0;
2227 }
2228
2229 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2230 {
2231         drm_radeon_private_t *dev_priv = dev->dev_private;
2232         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2233         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2234
2235         DRM_DEBUG("\n");
2236
2237         LOCK_TEST_WITH_RETURN(dev, file_priv);
2238
2239         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2240
2241         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2242                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2243
2244         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2245                 r600_cp_dispatch_swap(dev, file_priv);
2246         else
2247                 radeon_cp_dispatch_swap(dev, file_priv->master);
2248         sarea_priv->ctx_owner = 0;
2249
2250         COMMIT_RING();
2251         return 0;
2252 }
2253
2254 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2255 {
2256         drm_radeon_private_t *dev_priv = dev->dev_private;
2257         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2258         drm_radeon_sarea_t *sarea_priv;
2259         struct drm_device_dma *dma = dev->dma;
2260         struct drm_buf *buf;
2261         drm_radeon_vertex_t *vertex = data;
2262         drm_radeon_tcl_prim_t prim;
2263
2264         LOCK_TEST_WITH_RETURN(dev, file_priv);
2265
2266         sarea_priv = master_priv->sarea_priv;
2267
2268         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2269                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2270
2271         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2272                 DRM_ERROR("buffer index %d (of %d max)\n",
2273                           vertex->idx, dma->buf_count - 1);
2274                 return -EINVAL;
2275         }
2276         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2277                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2278                 return -EINVAL;
2279         }
2280
2281         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2282         VB_AGE_TEST_WITH_RETURN(dev_priv);
2283
2284         buf = dma->buflist[vertex->idx];
2285
2286         if (buf->file_priv != file_priv) {
2287                 DRM_ERROR("process %d using buffer owned by %p\n",
2288                           DRM_CURRENTPID, buf->file_priv);
2289                 return -EINVAL;
2290         }
2291         if (buf->pending) {
2292                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2293                 return -EINVAL;
2294         }
2295
2296         /* Build up a prim_t record:
2297          */
2298         if (vertex->count) {
2299                 buf->used = vertex->count;      /* not used? */
2300
2301                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2302                         if (radeon_emit_state(dev_priv, file_priv,
2303                                               &sarea_priv->context_state,
2304                                               sarea_priv->tex_state,
2305                                               sarea_priv->dirty)) {
2306                                 DRM_ERROR("radeon_emit_state failed\n");
2307                                 return -EINVAL;
2308                         }
2309
2310                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2311                                                RADEON_UPLOAD_TEX1IMAGES |
2312                                                RADEON_UPLOAD_TEX2IMAGES |
2313                                                RADEON_REQUIRE_QUIESCENCE);
2314                 }
2315
2316                 prim.start = 0;
2317                 prim.finish = vertex->count;    /* unused */
2318                 prim.prim = vertex->prim;
2319                 prim.numverts = vertex->count;
2320                 prim.vc_format = sarea_priv->vc_format;
2321
2322                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2323         }
2324
2325         if (vertex->discard) {
2326                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2327         }
2328
2329         COMMIT_RING();
2330         return 0;
2331 }
2332
2333 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2334 {
2335         drm_radeon_private_t *dev_priv = dev->dev_private;
2336         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2337         drm_radeon_sarea_t *sarea_priv;
2338         struct drm_device_dma *dma = dev->dma;
2339         struct drm_buf *buf;
2340         drm_radeon_indices_t *elts = data;
2341         drm_radeon_tcl_prim_t prim;
2342         int count;
2343
2344         LOCK_TEST_WITH_RETURN(dev, file_priv);
2345
2346         sarea_priv = master_priv->sarea_priv;
2347
2348         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2349                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2350                   elts->discard);
2351
2352         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2353                 DRM_ERROR("buffer index %d (of %d max)\n",
2354                           elts->idx, dma->buf_count - 1);
2355                 return -EINVAL;
2356         }
2357         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2358                 DRM_ERROR("buffer prim %d\n", elts->prim);
2359                 return -EINVAL;
2360         }
2361
2362         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2363         VB_AGE_TEST_WITH_RETURN(dev_priv);
2364
2365         buf = dma->buflist[elts->idx];
2366
2367         if (buf->file_priv != file_priv) {
2368                 DRM_ERROR("process %d using buffer owned by %p\n",
2369                           DRM_CURRENTPID, buf->file_priv);
2370                 return -EINVAL;
2371         }
2372         if (buf->pending) {
2373                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2374                 return -EINVAL;
2375         }
2376
2377         count = (elts->end - elts->start) / sizeof(u16);
2378         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2379
2380         if (elts->start & 0x7) {
2381                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2382                 return -EINVAL;
2383         }
2384         if (elts->start < buf->used) {
2385                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2386                 return -EINVAL;
2387         }
2388
2389         buf->used = elts->end;
2390
2391         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2392                 if (radeon_emit_state(dev_priv, file_priv,
2393                                       &sarea_priv->context_state,
2394                                       sarea_priv->tex_state,
2395                                       sarea_priv->dirty)) {
2396                         DRM_ERROR("radeon_emit_state failed\n");
2397                         return -EINVAL;
2398                 }
2399
2400                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2401                                        RADEON_UPLOAD_TEX1IMAGES |
2402                                        RADEON_UPLOAD_TEX2IMAGES |
2403                                        RADEON_REQUIRE_QUIESCENCE);
2404         }
2405
2406         /* Build up a prim_t record:
2407          */
2408         prim.start = elts->start;
2409         prim.finish = elts->end;
2410         prim.prim = elts->prim;
2411         prim.offset = 0;        /* offset from start of dma buffers */
2412         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2413         prim.vc_format = sarea_priv->vc_format;
2414
2415         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2416         if (elts->discard) {
2417                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2418         }
2419
2420         COMMIT_RING();
2421         return 0;
2422 }
2423
2424 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2425 {
2426         drm_radeon_private_t *dev_priv = dev->dev_private;
2427         drm_radeon_texture_t *tex = data;
2428         drm_radeon_tex_image_t image;
2429         int ret;
2430
2431         LOCK_TEST_WITH_RETURN(dev, file_priv);
2432
2433         if (tex->image == NULL) {
2434                 DRM_ERROR("null texture image!\n");
2435                 return -EINVAL;
2436         }
2437
2438         if (DRM_COPY_FROM_USER(&image,
2439                                (drm_radeon_tex_image_t __user *) tex->image,
2440                                sizeof(image)))
2441                 return -EFAULT;
2442
2443         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2444         VB_AGE_TEST_WITH_RETURN(dev_priv);
2445
2446         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2447                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2448         else
2449                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2450
2451         return ret;
2452 }
2453
2454 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2455 {
2456         drm_radeon_private_t *dev_priv = dev->dev_private;
2457         drm_radeon_stipple_t *stipple = data;
2458         u32 mask[32];
2459
2460         LOCK_TEST_WITH_RETURN(dev, file_priv);
2461
2462         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2463                 return -EFAULT;
2464
2465         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2466
2467         radeon_cp_dispatch_stipple(dev, mask);
2468
2469         COMMIT_RING();
2470         return 0;
2471 }
2472
2473 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2474 {
2475         drm_radeon_private_t *dev_priv = dev->dev_private;
2476         struct drm_device_dma *dma = dev->dma;
2477         struct drm_buf *buf;
2478         drm_radeon_indirect_t *indirect = data;
2479         RING_LOCALS;
2480
2481         LOCK_TEST_WITH_RETURN(dev, file_priv);
2482
2483         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2484                   indirect->idx, indirect->start, indirect->end,
2485                   indirect->discard);
2486
2487         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2488                 DRM_ERROR("buffer index %d (of %d max)\n",
2489                           indirect->idx, dma->buf_count - 1);
2490                 return -EINVAL;
2491         }
2492
2493         buf = dma->buflist[indirect->idx];
2494
2495         if (buf->file_priv != file_priv) {
2496                 DRM_ERROR("process %d using buffer owned by %p\n",
2497                           DRM_CURRENTPID, buf->file_priv);
2498                 return -EINVAL;
2499         }
2500         if (buf->pending) {
2501                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2502                 return -EINVAL;
2503         }
2504
2505         if (indirect->start < buf->used) {
2506                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2507                           indirect->start, buf->used);
2508                 return -EINVAL;
2509         }
2510
2511         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2512         VB_AGE_TEST_WITH_RETURN(dev_priv);
2513
2514         buf->used = indirect->end;
2515
2516         /* Dispatch the indirect buffer full of commands from the
2517          * X server.  This is insecure and is thus only available to
2518          * privileged clients.
2519          */
2520         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2521                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2522         else {
2523                 /* Wait for the 3D stream to idle before the indirect buffer
2524                  * containing 2D acceleration commands is processed.
2525                  */
2526                 BEGIN_RING(2);
2527                 RADEON_WAIT_UNTIL_3D_IDLE();
2528                 ADVANCE_RING();
2529                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2530         }
2531
2532         if (indirect->discard) {
2533                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2534         }
2535
2536         COMMIT_RING();
2537         return 0;
2538 }
2539
2540 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2541 {
2542         drm_radeon_private_t *dev_priv = dev->dev_private;
2543         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2544         drm_radeon_sarea_t *sarea_priv;
2545         struct drm_device_dma *dma = dev->dma;
2546         struct drm_buf *buf;
2547         drm_radeon_vertex2_t *vertex = data;
2548         int i;
2549         unsigned char laststate;
2550
2551         LOCK_TEST_WITH_RETURN(dev, file_priv);
2552
2553         sarea_priv = master_priv->sarea_priv;
2554
2555         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2556                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2557
2558         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2559                 DRM_ERROR("buffer index %d (of %d max)\n",
2560                           vertex->idx, dma->buf_count - 1);
2561                 return -EINVAL;
2562         }
2563
2564         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2565         VB_AGE_TEST_WITH_RETURN(dev_priv);
2566
2567         buf = dma->buflist[vertex->idx];
2568
2569         if (buf->file_priv != file_priv) {
2570                 DRM_ERROR("process %d using buffer owned by %p\n",
2571                           DRM_CURRENTPID, buf->file_priv);
2572                 return -EINVAL;
2573         }
2574
2575         if (buf->pending) {
2576                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2577                 return -EINVAL;
2578         }
2579
2580         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2581                 return -EINVAL;
2582
2583         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2584                 drm_radeon_prim_t prim;
2585                 drm_radeon_tcl_prim_t tclprim;
2586
2587                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2588                         return -EFAULT;
2589
2590                 if (prim.stateidx != laststate) {
2591                         drm_radeon_state_t state;
2592
2593                         if (DRM_COPY_FROM_USER(&state,
2594                                                &vertex->state[prim.stateidx],
2595                                                sizeof(state)))
2596                                 return -EFAULT;
2597
2598                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2599                                 DRM_ERROR("radeon_emit_state2 failed\n");
2600                                 return -EINVAL;
2601                         }
2602
2603                         laststate = prim.stateidx;
2604                 }
2605
2606                 tclprim.start = prim.start;
2607                 tclprim.finish = prim.finish;
2608                 tclprim.prim = prim.prim;
2609                 tclprim.vc_format = prim.vc_format;
2610
2611                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2612                         tclprim.offset = prim.numverts * 64;
2613                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2614
2615                         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2616                 } else {
2617                         tclprim.numverts = prim.numverts;
2618                         tclprim.offset = 0;     /* not used */
2619
2620                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2621                 }
2622
2623                 if (sarea_priv->nbox == 1)
2624                         sarea_priv->nbox = 0;
2625         }
2626
2627         if (vertex->discard) {
2628                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2629         }
2630
2631         COMMIT_RING();
2632         return 0;
2633 }
2634
2635 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2636                                struct drm_file *file_priv,
2637                                drm_radeon_cmd_header_t header,
2638                                drm_radeon_kcmd_buffer_t *cmdbuf)
2639 {
2640         int id = (int)header.packet.packet_id;
2641         int sz, reg;
2642         RING_LOCALS;
2643
2644         if (id >= RADEON_MAX_STATE_PACKETS)
2645                 return -EINVAL;
2646
2647         sz = packet[id].len;
2648         reg = packet[id].start;
2649
2650         if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2651                 DRM_ERROR("Packet size provided larger than data provided\n");
2652                 return -EINVAL;
2653         }
2654
2655         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2656                                 cmdbuf->buffer)) {
2657                 DRM_ERROR("Packet verification failed\n");
2658                 return -EINVAL;
2659         }
2660
2661         BEGIN_RING(sz + 1);
2662         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2663         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2664         ADVANCE_RING();
2665
2666         return 0;
2667 }
2668
2669 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2670                                           drm_radeon_cmd_header_t header,
2671                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2672 {
2673         int sz = header.scalars.count;
2674         int start = header.scalars.offset;
2675         int stride = header.scalars.stride;
2676         RING_LOCALS;
2677
2678         BEGIN_RING(3 + sz);
2679         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2680         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2681         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2682         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2683         ADVANCE_RING();
2684         return 0;
2685 }
2686
2687 /* God this is ugly
2688  */
2689 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2690                                            drm_radeon_cmd_header_t header,
2691                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2692 {
2693         int sz = header.scalars.count;
2694         int start = ((unsigned int)header.scalars.offset) + 0x100;
2695         int stride = header.scalars.stride;
2696         RING_LOCALS;
2697
2698         BEGIN_RING(3 + sz);
2699         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2700         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2701         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2702         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2703         ADVANCE_RING();
2704         return 0;
2705 }
2706
2707 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2708                                           drm_radeon_cmd_header_t header,
2709                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2710 {
2711         int sz = header.vectors.count;
2712         int start = header.vectors.offset;
2713         int stride = header.vectors.stride;
2714         RING_LOCALS;
2715
2716         BEGIN_RING(5 + sz);
2717         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2718         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2719         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2720         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2721         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2722         ADVANCE_RING();
2723
2724         return 0;
2725 }
2726
2727 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2728                                           drm_radeon_cmd_header_t header,
2729                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2730 {
2731         int sz = header.veclinear.count * 4;
2732         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2733         RING_LOCALS;
2734
2735         if (!sz)
2736                 return 0;
2737         if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2738                 return -EINVAL;
2739
2740         BEGIN_RING(5 + sz);
2741         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2742         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2743         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2744         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2745         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2746         ADVANCE_RING();
2747
2748         return 0;
2749 }
2750
2751 static int radeon_emit_packet3(struct drm_device * dev,
2752                                struct drm_file *file_priv,
2753                                drm_radeon_kcmd_buffer_t *cmdbuf)
2754 {
2755         drm_radeon_private_t *dev_priv = dev->dev_private;
2756         unsigned int cmdsz;
2757         int ret;
2758         RING_LOCALS;
2759
2760         DRM_DEBUG("\n");
2761
2762         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2763                                                   cmdbuf, &cmdsz))) {
2764                 DRM_ERROR("Packet verification failed\n");
2765                 return ret;
2766         }
2767
2768         BEGIN_RING(cmdsz);
2769         OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2770         ADVANCE_RING();
2771
2772         return 0;
2773 }
2774
2775 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2776                                         struct drm_file *file_priv,
2777                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2778                                         int orig_nbox)
2779 {
2780         drm_radeon_private_t *dev_priv = dev->dev_private;
2781         struct drm_clip_rect box;
2782         unsigned int cmdsz;
2783         int ret;
2784         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2785         int i = 0;
2786         RING_LOCALS;
2787
2788         DRM_DEBUG("\n");
2789
2790         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2791                                                   cmdbuf, &cmdsz))) {
2792                 DRM_ERROR("Packet verification failed\n");
2793                 return ret;
2794         }
2795
2796         if (!orig_nbox)
2797                 goto out;
2798
2799         do {
2800                 if (i < cmdbuf->nbox) {
2801                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2802                                 return -EFAULT;
2803                         /* FIXME The second and subsequent times round
2804                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2805                          * calling emit_clip_rect(). This fixes a
2806                          * lockup on fast machines when sending
2807                          * several cliprects with a cmdbuf, as when
2808                          * waving a 2D window over a 3D
2809                          * window. Something in the commands from user
2810                          * space seems to hang the card when they're
2811                          * sent several times in a row. That would be
2812                          * the correct place to fix it but this works
2813                          * around it until I can figure that out - Tim
2814                          * Smith */
2815                         if (i) {
2816                                 BEGIN_RING(2);
2817                                 RADEON_WAIT_UNTIL_3D_IDLE();
2818                                 ADVANCE_RING();
2819                         }
2820                         radeon_emit_clip_rect(dev_priv, &box);
2821                 }
2822
2823                 BEGIN_RING(cmdsz);
2824                 OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2825                 ADVANCE_RING();
2826
2827         } while (++i < cmdbuf->nbox);
2828         if (cmdbuf->nbox == 1)
2829                 cmdbuf->nbox = 0;
2830
2831         return 0;
2832       out:
2833         drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2834         return 0;
2835 }
2836
2837 static int radeon_emit_wait(struct drm_device * dev, int flags)
2838 {
2839         drm_radeon_private_t *dev_priv = dev->dev_private;
2840         RING_LOCALS;
2841
2842         DRM_DEBUG("%x\n", flags);
2843         switch (flags) {
2844         case RADEON_WAIT_2D:
2845                 BEGIN_RING(2);
2846                 RADEON_WAIT_UNTIL_2D_IDLE();
2847                 ADVANCE_RING();
2848                 break;
2849         case RADEON_WAIT_3D:
2850                 BEGIN_RING(2);
2851                 RADEON_WAIT_UNTIL_3D_IDLE();
2852                 ADVANCE_RING();
2853                 break;
2854         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2855                 BEGIN_RING(2);
2856                 RADEON_WAIT_UNTIL_IDLE();
2857                 ADVANCE_RING();
2858                 break;
2859         default:
2860                 return -EINVAL;
2861         }
2862
2863         return 0;
2864 }
2865
2866 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2867                 struct drm_file *file_priv)
2868 {
2869         drm_radeon_private_t *dev_priv = dev->dev_private;
2870         struct drm_device_dma *dma = dev->dma;
2871         struct drm_buf *buf = NULL;
2872         drm_radeon_cmd_header_t stack_header;
2873         int idx;
2874         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2875         int orig_nbox;
2876
2877         LOCK_TEST_WITH_RETURN(dev, file_priv);
2878
2879         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2880         VB_AGE_TEST_WITH_RETURN(dev_priv);
2881
2882         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2883                 return -EINVAL;