Pull sn2-mmio-writes into release branch
[sfrench/cifs-2.6.git] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         drm_clip_rect_t box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return DRM_ERR(EFAULT);
78                         }
79
80                         box.x1 =
81                             (box.x1 +
82                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
83                         box.y1 =
84                             (box.y1 +
85                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86                         box.x2 =
87                             (box.x2 +
88                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
89                         box.y2 =
90                             (box.y2 +
91                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92
93                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
94                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
95                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
96                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
97                 }
98
99                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
100
101                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
102                  * client might be able to trample over memory.
103                  * The impact should be very limited, but I'd rather be safe than
104                  * sorry.
105                  */
106                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
107                 OUT_RING(0);
108                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
109                 ADVANCE_RING();
110         } else {
111                 /* Why we allow zero cliprect rendering:
112                  * There are some commands in a command buffer that must be submitted
113                  * even when there are no cliprects, e.g. DMA buffer discard
114                  * or state setting (though state setting could be avoided by
115                  * simulating a loss of context).
116                  *
117                  * Now since the cmdbuf interface is so chaotic right now (and is
118                  * bound to remain that way for a bit until things settle down),
119                  * it is basically impossible to filter out the commands that are
120                  * necessary and those that aren't.
121                  *
122                  * So I choose the safe way and don't do any filtering at all;
123                  * instead, I simply set up the engine so that all rendering
124                  * can't produce any fragments.
125                  */
126                 BEGIN_RING(2);
127                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
128                 ADVANCE_RING();
129         }
130
131         return 0;
132 }
133
134 static u8 r300_reg_flags[0x10000 >> 2];
135
136 void r300_init_reg_flags(void)
137 {
138         int i;
139         memset(r300_reg_flags, 0, 0x10000 >> 2);
140 #define ADD_RANGE_MARK(reg, count,mark) \
141                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
142                         r300_reg_flags[i]|=(mark);
143
144 #define MARK_SAFE               1
145 #define MARK_CHECK_OFFSET       2
146
147 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
148
149         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
150         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
151         ADD_RANGE(0x2080, 1);
152         ADD_RANGE(R300_SE_VTE_CNTL, 2);
153         ADD_RANGE(0x2134, 2);
154         ADD_RANGE(0x2140, 1);
155         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
156         ADD_RANGE(0x21DC, 1);
157         ADD_RANGE(0x221C, 1);
158         ADD_RANGE(0x2220, 4);
159         ADD_RANGE(0x2288, 1);
160         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
161         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
162         ADD_RANGE(R300_GB_ENABLE, 1);
163         ADD_RANGE(R300_GB_MSPOS0, 5);
164         ADD_RANGE(R300_TX_CNTL, 1);
165         ADD_RANGE(R300_TX_ENABLE, 1);
166         ADD_RANGE(0x4200, 4);
167         ADD_RANGE(0x4214, 1);
168         ADD_RANGE(R300_RE_POINTSIZE, 1);
169         ADD_RANGE(0x4230, 3);
170         ADD_RANGE(R300_RE_LINE_CNT, 1);
171         ADD_RANGE(0x4238, 1);
172         ADD_RANGE(0x4260, 3);
173         ADD_RANGE(0x4274, 4);
174         ADD_RANGE(0x4288, 5);
175         ADD_RANGE(0x42A0, 1);
176         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
177         ADD_RANGE(0x42B4, 1);
178         ADD_RANGE(R300_RE_CULL_CNTL, 1);
179         ADD_RANGE(0x42C0, 2);
180         ADD_RANGE(R300_RS_CNTL_0, 2);
181         ADD_RANGE(R300_RS_INTERP_0, 8);
182         ADD_RANGE(R300_RS_ROUTE_0, 8);
183         ADD_RANGE(0x43A4, 2);
184         ADD_RANGE(0x43E8, 1);
185         ADD_RANGE(R300_PFS_CNTL_0, 3);
186         ADD_RANGE(R300_PFS_NODE_0, 4);
187         ADD_RANGE(R300_PFS_TEXI_0, 64);
188         ADD_RANGE(0x46A4, 5);
189         ADD_RANGE(R300_PFS_INSTR0_0, 64);
190         ADD_RANGE(R300_PFS_INSTR1_0, 64);
191         ADD_RANGE(R300_PFS_INSTR2_0, 64);
192         ADD_RANGE(R300_PFS_INSTR3_0, 64);
193         ADD_RANGE(0x4BC0, 1);
194         ADD_RANGE(0x4BC8, 3);
195         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
196         ADD_RANGE(0x4BD8, 1);
197         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
198         ADD_RANGE(0x4E00, 1);
199         ADD_RANGE(R300_RB3D_CBLEND, 2);
200         ADD_RANGE(R300_RB3D_COLORMASK, 1);
201         ADD_RANGE(0x4E10, 3);
202         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
203         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
204         ADD_RANGE(0x4E50, 9);
205         ADD_RANGE(0x4E88, 1);
206         ADD_RANGE(0x4EA0, 2);
207         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
208         ADD_RANGE(0x4F10, 4);
209         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
210         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
211         ADD_RANGE(0x4F28, 1);
212         ADD_RANGE(0x4F30, 2);
213         ADD_RANGE(0x4F44, 1);
214         ADD_RANGE(0x4F54, 1);
215
216         ADD_RANGE(R300_TX_FILTER_0, 16);
217         ADD_RANGE(R300_TX_UNK1_0, 16);
218         ADD_RANGE(R300_TX_SIZE_0, 16);
219         ADD_RANGE(R300_TX_FORMAT_0, 16);
220         ADD_RANGE(R300_TX_PITCH_0, 16);
221         /* Texture offset is dangerous and needs more checking */
222         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
223         ADD_RANGE(R300_TX_UNK4_0, 16);
224         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
225
226         /* Sporadic registers used as primitives are emitted */
227         ADD_RANGE(0x4f18, 1);
228         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
229         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
230         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
231
232 }
233
234 static __inline__ int r300_check_range(unsigned reg, int count)
235 {
236         int i;
237         if (reg & ~0xffff)
238                 return -1;
239         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
240                 if (r300_reg_flags[i] != MARK_SAFE)
241                         return 1;
242         return 0;
243 }
244
245   /* we expect offsets passed to the framebuffer to be either within video memory or
246      within AGP space */
247 static __inline__ int r300_check_offset(drm_radeon_private_t *dev_priv,
248                                         u32 offset)
249 {
250         /* we realy want to check against end of video aperture
251            but this value is not being kept.
252            This code is correct for now (does the same thing as the
253            code that sets MC_FB_LOCATION) in radeon_cp.c */
254         if ((offset >= dev_priv->fb_location) &&
255             (offset < dev_priv->gart_vm_start))
256                 return 0;
257         if ((offset >= dev_priv->gart_vm_start) &&
258             (offset < dev_priv->gart_vm_start + dev_priv->gart_size))
259                 return 0;
260         return 1;
261 }
262
263 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
264                                                           dev_priv,
265                                                           drm_radeon_kcmd_buffer_t
266                                                           * cmdbuf,
267                                                           drm_r300_cmd_header_t
268                                                           header)
269 {
270         int reg;
271         int sz;
272         int i;
273         int values[64];
274         RING_LOCALS;
275
276         sz = header.packet0.count;
277         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
278
279         if ((sz > 64) || (sz < 0)) {
280                 DRM_ERROR
281                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
282                      reg, sz);
283                 return DRM_ERR(EINVAL);
284         }
285         for (i = 0; i < sz; i++) {
286                 values[i] = ((int *)cmdbuf->buf)[i];
287                 switch (r300_reg_flags[(reg >> 2) + i]) {
288                 case MARK_SAFE:
289                         break;
290                 case MARK_CHECK_OFFSET:
291                         if (r300_check_offset(dev_priv, (u32) values[i])) {
292                                 DRM_ERROR
293                                     ("Offset failed range check (reg=%04x sz=%d)\n",
294                                      reg, sz);
295                                 return DRM_ERR(EINVAL);
296                         }
297                         break;
298                 default:
299                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
300                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
301                         return DRM_ERR(EINVAL);
302                 }
303         }
304
305         BEGIN_RING(1 + sz);
306         OUT_RING(CP_PACKET0(reg, sz - 1));
307         OUT_RING_TABLE(values, sz);
308         ADVANCE_RING();
309
310         cmdbuf->buf += sz * 4;
311         cmdbuf->bufsz -= sz * 4;
312
313         return 0;
314 }
315
316 /**
317  * Emits a packet0 setting arbitrary registers.
318  * Called by r300_do_cp_cmdbuf.
319  *
320  * Note that checks are performed on contents and addresses of the registers
321  */
322 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
323                                         drm_radeon_kcmd_buffer_t *cmdbuf,
324                                         drm_r300_cmd_header_t header)
325 {
326         int reg;
327         int sz;
328         RING_LOCALS;
329
330         sz = header.packet0.count;
331         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
332
333         if (!sz)
334                 return 0;
335
336         if (sz * 4 > cmdbuf->bufsz)
337                 return DRM_ERR(EINVAL);
338
339         if (reg + sz * 4 >= 0x10000) {
340                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
341                           sz);
342                 return DRM_ERR(EINVAL);
343         }
344
345         if (r300_check_range(reg, sz)) {
346                 /* go and check everything */
347                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
348                                                            header);
349         }
350         /* the rest of the data is safe to emit, whatever the values the user passed */
351
352         BEGIN_RING(1 + sz);
353         OUT_RING(CP_PACKET0(reg, sz - 1));
354         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
355         ADVANCE_RING();
356
357         cmdbuf->buf += sz * 4;
358         cmdbuf->bufsz -= sz * 4;
359
360         return 0;
361 }
362
363 /**
364  * Uploads user-supplied vertex program instructions or parameters onto
365  * the graphics card.
366  * Called by r300_do_cp_cmdbuf.
367  */
368 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
369                                     drm_radeon_kcmd_buffer_t *cmdbuf,
370                                     drm_r300_cmd_header_t header)
371 {
372         int sz;
373         int addr;
374         RING_LOCALS;
375
376         sz = header.vpu.count;
377         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
378
379         if (!sz)
380                 return 0;
381         if (sz * 16 > cmdbuf->bufsz)
382                 return DRM_ERR(EINVAL);
383
384         BEGIN_RING(5 + sz * 4);
385         /* Wait for VAP to come to senses.. */
386         /* there is no need to emit it multiple times, (only once before VAP is programmed,
387            but this optimization is for later */
388         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
389         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
390         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
391         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
392
393         ADVANCE_RING();
394
395         cmdbuf->buf += sz * 16;
396         cmdbuf->bufsz -= sz * 16;
397
398         return 0;
399 }
400
401 /**
402  * Emit a clear packet from userspace.
403  * Called by r300_emit_packet3.
404  */
405 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
406                                       drm_radeon_kcmd_buffer_t *cmdbuf)
407 {
408         RING_LOCALS;
409
410         if (8 * 4 > cmdbuf->bufsz)
411                 return DRM_ERR(EINVAL);
412
413         BEGIN_RING(10);
414         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
415         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
416                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
417         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
418         ADVANCE_RING();
419
420         cmdbuf->buf += 8 * 4;
421         cmdbuf->bufsz -= 8 * 4;
422
423         return 0;
424 }
425
426 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
427                                                drm_radeon_kcmd_buffer_t *cmdbuf,
428                                                u32 header)
429 {
430         int count, i, k;
431 #define MAX_ARRAY_PACKET  64
432         u32 payload[MAX_ARRAY_PACKET];
433         u32 narrays;
434         RING_LOCALS;
435
436         count = (header >> 16) & 0x3fff;
437
438         if ((count + 1) > MAX_ARRAY_PACKET) {
439                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
440                           count);
441                 return DRM_ERR(EINVAL);
442         }
443         memset(payload, 0, MAX_ARRAY_PACKET * 4);
444         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
445
446         /* carefully check packet contents */
447
448         narrays = payload[0];
449         k = 0;
450         i = 1;
451         while ((k < narrays) && (i < (count + 1))) {
452                 i++;            /* skip attribute field */
453                 if (r300_check_offset(dev_priv, payload[i])) {
454                         DRM_ERROR
455                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
456                              k, i);
457                         return DRM_ERR(EINVAL);
458                 }
459                 k++;
460                 i++;
461                 if (k == narrays)
462                         break;
463                 /* have one more to process, they come in pairs */
464                 if (r300_check_offset(dev_priv, payload[i])) {
465                         DRM_ERROR
466                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
467                              k, i);
468                         return DRM_ERR(EINVAL);
469                 }
470                 k++;
471                 i++;
472         }
473         /* do the counts match what we expect ? */
474         if ((k != narrays) || (i != (count + 1))) {
475                 DRM_ERROR
476                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
477                      k, i, narrays, count + 1);
478                 return DRM_ERR(EINVAL);
479         }
480
481         /* all clear, output packet */
482
483         BEGIN_RING(count + 2);
484         OUT_RING(header);
485         OUT_RING_TABLE(payload, count + 1);
486         ADVANCE_RING();
487
488         cmdbuf->buf += (count + 2) * 4;
489         cmdbuf->bufsz -= (count + 2) * 4;
490
491         return 0;
492 }
493 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
494                                              drm_radeon_kcmd_buffer_t *cmdbuf)
495 {
496         u32 *cmd = (u32 *) cmdbuf->buf;
497         int count, ret;
498         RING_LOCALS;
499
500         count=(cmd[0]>>16) & 0x3fff;
501
502         if (cmd[0] & 0x8000) {
503                 u32 offset;
504
505                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL 
506                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
507                         offset = cmd[2] << 10;
508                         ret = r300_check_offset(dev_priv, offset);
509                         if (ret) {
510                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
511                                 return DRM_ERR(EINVAL);
512                         }
513                 }
514
515                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
516                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
517                         offset = cmd[3] << 10;
518                         ret = r300_check_offset(dev_priv, offset);
519                         if (ret) {
520                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
521                                 return DRM_ERR(EINVAL);
522                         }
523                         
524                 }
525         }
526
527         BEGIN_RING(count+2);
528         OUT_RING(cmd[0]);
529         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
530         ADVANCE_RING();
531
532         cmdbuf->buf += (count+2)*4;
533         cmdbuf->bufsz -= (count+2)*4;
534
535         return 0;
536 }
537
538 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
539                                             drm_radeon_kcmd_buffer_t *cmdbuf)
540 {
541         u32 header;
542         int count;
543         RING_LOCALS;
544
545         if (4 > cmdbuf->bufsz)
546                 return DRM_ERR(EINVAL);
547
548         /* Fixme !! This simply emits a packet without much checking.
549            We need to be smarter. */
550
551         /* obtain first word - actual packet3 header */
552         header = *(u32 *) cmdbuf->buf;
553
554         /* Is it packet 3 ? */
555         if ((header >> 30) != 0x3) {
556                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
557                 return DRM_ERR(EINVAL);
558         }
559
560         count = (header >> 16) & 0x3fff;
561
562         /* Check again now that we know how much data to expect */
563         if ((count + 2) * 4 > cmdbuf->bufsz) {
564                 DRM_ERROR
565                     ("Expected packet3 of length %d but have only %d bytes left\n",
566                      (count + 2) * 4, cmdbuf->bufsz);
567                 return DRM_ERR(EINVAL);
568         }
569
570         /* Is it a packet type we know about ? */
571         switch (header & 0xff00) {
572         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
573                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
574
575         case RADEON_CNTL_BITBLT_MULTI:
576                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
577
578         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
579         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
580         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
581         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
582         case RADEON_WAIT_FOR_IDLE:
583         case RADEON_CP_NOP:
584                 /* these packets are safe */
585                 break;
586         default:
587                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
588                 return DRM_ERR(EINVAL);
589         }
590
591         BEGIN_RING(count + 2);
592         OUT_RING(header);
593         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
594         ADVANCE_RING();
595
596         cmdbuf->buf += (count + 2) * 4;
597         cmdbuf->bufsz -= (count + 2) * 4;
598
599         return 0;
600 }
601
602 /**
603  * Emit a rendering packet3 from userspace.
604  * Called by r300_do_cp_cmdbuf.
605  */
606 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
607                                         drm_radeon_kcmd_buffer_t *cmdbuf,
608                                         drm_r300_cmd_header_t header)
609 {
610         int n;
611         int ret;
612         char *orig_buf = cmdbuf->buf;
613         int orig_bufsz = cmdbuf->bufsz;
614
615         /* This is a do-while-loop so that we run the interior at least once,
616          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
617          */
618         n = 0;
619         do {
620                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
621                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
622                         if (ret)
623                                 return ret;
624
625                         cmdbuf->buf = orig_buf;
626                         cmdbuf->bufsz = orig_bufsz;
627                 }
628
629                 switch (header.packet3.packet) {
630                 case R300_CMD_PACKET3_CLEAR:
631                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
632                         ret = r300_emit_clear(dev_priv, cmdbuf);
633                         if (ret) {
634                                 DRM_ERROR("r300_emit_clear failed\n");
635                                 return ret;
636                         }
637                         break;
638
639                 case R300_CMD_PACKET3_RAW:
640                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
641                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
642                         if (ret) {
643                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
644                                 return ret;
645                         }
646                         break;
647
648                 default:
649                         DRM_ERROR("bad packet3 type %i at %p\n",
650                                   header.packet3.packet,
651                                   cmdbuf->buf - sizeof(header));
652                         return DRM_ERR(EINVAL);
653                 }
654
655                 n += R300_SIMULTANEOUS_CLIPRECTS;
656         } while (n < cmdbuf->nbox);
657
658         return 0;
659 }
660
661 /* Some of the R300 chips seem to be extremely touchy about the two registers
662  * that are configured in r300_pacify.
663  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
664  * sends a command buffer that contains only state setting commands and a
665  * vertex program/parameter upload sequence, this will eventually lead to a
666  * lockup, unless the sequence is bracketed by calls to r300_pacify.
667  * So we should take great care to *always* call r300_pacify before
668  * *anything* 3D related, and again afterwards. This is what the
669  * call bracket in r300_do_cp_cmdbuf is for.
670  */
671
672 /**
673  * Emit the sequence to pacify R300.
674  */
675 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
676 {
677         RING_LOCALS;
678
679         BEGIN_RING(6);
680         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
681         OUT_RING(0xa);
682         OUT_RING(CP_PACKET0(0x4f18, 0));
683         OUT_RING(0x3);
684         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
685         OUT_RING(0x0);
686         ADVANCE_RING();
687 }
688
689 /**
690  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
691  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
692  * be careful about how this function is called.
693  */
694 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
695 {
696         drm_radeon_private_t *dev_priv = dev->dev_private;
697         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
698
699         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
700         buf->pending = 1;
701         buf->used = 0;
702 }
703
704 /**
705  * Parses and validates a user-supplied command buffer and emits appropriate
706  * commands on the DMA ring buffer.
707  * Called by the ioctl handler function radeon_cp_cmdbuf.
708  */
709 int r300_do_cp_cmdbuf(drm_device_t *dev,
710                       DRMFILE filp,
711                       drm_file_t *filp_priv,
712                       drm_radeon_kcmd_buffer_t *cmdbuf)
713 {
714         drm_radeon_private_t *dev_priv = dev->dev_private;
715         drm_device_dma_t *dma = dev->dma;
716         drm_buf_t *buf = NULL;
717         int emit_dispatch_age = 0;
718         int ret = 0;
719
720         DRM_DEBUG("\n");
721
722         /* See the comment above r300_emit_begin3d for why this call must be here,
723          * and what the cleanup gotos are for. */
724         r300_pacify(dev_priv);
725
726         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
727                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
728                 if (ret)
729                         goto cleanup;
730         }
731
732         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
733                 int idx;
734                 drm_r300_cmd_header_t header;
735
736                 header.u = *(unsigned int *)cmdbuf->buf;
737
738                 cmdbuf->buf += sizeof(header);
739                 cmdbuf->bufsz -= sizeof(header);
740
741                 switch (header.header.cmd_type) {
742                 case R300_CMD_PACKET0:
743                         DRM_DEBUG("R300_CMD_PACKET0\n");
744                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
745                         if (ret) {
746                                 DRM_ERROR("r300_emit_packet0 failed\n");
747                                 goto cleanup;
748                         }
749                         break;
750
751                 case R300_CMD_VPU:
752                         DRM_DEBUG("R300_CMD_VPU\n");
753                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
754                         if (ret) {
755                                 DRM_ERROR("r300_emit_vpu failed\n");
756                                 goto cleanup;
757                         }
758                         break;
759
760                 case R300_CMD_PACKET3:
761                         DRM_DEBUG("R300_CMD_PACKET3\n");
762                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
763                         if (ret) {
764                                 DRM_ERROR("r300_emit_packet3 failed\n");
765                                 goto cleanup;
766                         }
767                         break;
768
769                 case R300_CMD_END3D:
770                         DRM_DEBUG("R300_CMD_END3D\n");
771                         /* TODO:
772                            Ideally userspace driver should not need to issue this call,
773                            i.e. the drm driver should issue it automatically and prevent
774                            lockups.
775
776                            In practice, we do not understand why this call is needed and what
777                            it does (except for some vague guesses that it has to do with cache
778                            coherence) and so the user space driver does it.
779
780                            Once we are sure which uses prevent lockups the code could be moved
781                            into the kernel and the userspace driver will not
782                            need to use this command.
783
784                            Note that issuing this command does not hurt anything
785                            except, possibly, performance */
786                         r300_pacify(dev_priv);
787                         break;
788
789                 case R300_CMD_CP_DELAY:
790                         /* simple enough, we can do it here */
791                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
792                         {
793                                 int i;
794                                 RING_LOCALS;
795
796                                 BEGIN_RING(header.delay.count);
797                                 for (i = 0; i < header.delay.count; i++)
798                                         OUT_RING(RADEON_CP_PACKET2);
799                                 ADVANCE_RING();
800                         }
801                         break;
802
803                 case R300_CMD_DMA_DISCARD:
804                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
805                         idx = header.dma.buf_idx;
806                         if (idx < 0 || idx >= dma->buf_count) {
807                                 DRM_ERROR("buffer index %d (of %d max)\n",
808                                           idx, dma->buf_count - 1);
809                                 ret = DRM_ERR(EINVAL);
810                                 goto cleanup;
811                         }
812
813                         buf = dma->buflist[idx];
814                         if (buf->filp != filp || buf->pending) {
815                                 DRM_ERROR("bad buffer %p %p %d\n",
816                                           buf->filp, filp, buf->pending);
817                                 ret = DRM_ERR(EINVAL);
818                                 goto cleanup;
819                         }
820
821                         emit_dispatch_age = 1;
822                         r300_discard_buffer(dev, buf);
823                         break;
824
825                 case R300_CMD_WAIT:
826                         /* simple enough, we can do it here */
827                         DRM_DEBUG("R300_CMD_WAIT\n");
828                         if (header.wait.flags == 0)
829                                 break;  /* nothing to do */
830
831                         {
832                                 RING_LOCALS;
833
834                                 BEGIN_RING(2);
835                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
836                                 OUT_RING((header.wait.flags & 0xf) << 14);
837                                 ADVANCE_RING();
838                         }
839                         break;
840
841                 default:
842                         DRM_ERROR("bad cmd_type %i at %p\n",
843                                   header.header.cmd_type,
844                                   cmdbuf->buf - sizeof(header));
845                         ret = DRM_ERR(EINVAL);
846                         goto cleanup;
847                 }
848         }
849
850         DRM_DEBUG("END\n");
851
852       cleanup:
853         r300_pacify(dev_priv);
854
855         /* We emit the vertex buffer age here, outside the pacifier "brackets"
856          * for two reasons:
857          *  (1) This may coalesce multiple age emissions into a single one and
858          *  (2) more importantly, some chips lock up hard when scratch registers
859          *      are written inside the pacifier bracket.
860          */
861         if (emit_dispatch_age) {
862                 RING_LOCALS;
863
864                 /* Emit the vertex buffer age */
865                 BEGIN_RING(2);
866                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
867                 ADVANCE_RING();
868         }
869
870         COMMIT_RING();
871
872         return ret;
873 }