Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
[sfrench/cifs-2.6.git] / drivers / media / platform / coda / coda-bit.c
1 /*
2  * Coda multi-standard codec IP - BIT processor functions
3  *
4  * Copyright (C) 2012 Vista Silicon S.L.
5  *    Javier Martin, <javier.martin@vista-silicon.com>
6  *    Xavier Duret
7  * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  */
14
15 #include <linux/clk.h>
16 #include <linux/irqreturn.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/platform_device.h>
20 #include <linux/reset.h>
21 #include <linux/slab.h>
22 #include <linux/videodev2.h>
23
24 #include <media/v4l2-common.h>
25 #include <media/v4l2-ctrls.h>
26 #include <media/v4l2-fh.h>
27 #include <media/v4l2-mem2mem.h>
28 #include <media/videobuf2-v4l2.h>
29 #include <media/videobuf2-dma-contig.h>
30 #include <media/videobuf2-vmalloc.h>
31
32 #include "coda.h"
33 #include "imx-vdoa.h"
34 #define CREATE_TRACE_POINTS
35 #include "trace.h"
36
37 #define CODA_PARA_BUF_SIZE      (10 * 1024)
38 #define CODA7_PS_BUF_SIZE       0x28000
39 #define CODA9_PS_SAVE_SIZE      (512 * 1024)
40
41 #define CODA_DEFAULT_GAMMA      4096
42 #define CODA9_DEFAULT_GAMMA     24576   /* 0.75 * 32768 */
43
44 static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
45
46 static inline int coda_is_initialized(struct coda_dev *dev)
47 {
48         return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
49 }
50
51 static inline unsigned long coda_isbusy(struct coda_dev *dev)
52 {
53         return coda_read(dev, CODA_REG_BIT_BUSY);
54 }
55
56 static int coda_wait_timeout(struct coda_dev *dev)
57 {
58         unsigned long timeout = jiffies + msecs_to_jiffies(1000);
59
60         while (coda_isbusy(dev)) {
61                 if (time_after(jiffies, timeout))
62                         return -ETIMEDOUT;
63         }
64         return 0;
65 }
66
67 static void coda_command_async(struct coda_ctx *ctx, int cmd)
68 {
69         struct coda_dev *dev = ctx->dev;
70
71         if (dev->devtype->product == CODA_960 ||
72             dev->devtype->product == CODA_7541) {
73                 /* Restore context related registers to CODA */
74                 coda_write(dev, ctx->bit_stream_param,
75                                 CODA_REG_BIT_BIT_STREAM_PARAM);
76                 coda_write(dev, ctx->frm_dis_flg,
77                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
78                 coda_write(dev, ctx->frame_mem_ctrl,
79                                 CODA_REG_BIT_FRAME_MEM_CTRL);
80                 coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
81         }
82
83         if (dev->devtype->product == CODA_960) {
84                 coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
85                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
86         }
87
88         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
89
90         coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
91         coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
92         coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
93
94         trace_coda_bit_run(ctx, cmd);
95
96         coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
97 }
98
99 static int coda_command_sync(struct coda_ctx *ctx, int cmd)
100 {
101         struct coda_dev *dev = ctx->dev;
102         int ret;
103
104         coda_command_async(ctx, cmd);
105         ret = coda_wait_timeout(dev);
106         trace_coda_bit_done(ctx);
107
108         return ret;
109 }
110
111 int coda_hw_reset(struct coda_ctx *ctx)
112 {
113         struct coda_dev *dev = ctx->dev;
114         unsigned long timeout;
115         unsigned int idx;
116         int ret;
117
118         if (!dev->rstc)
119                 return -ENOENT;
120
121         idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
122
123         if (dev->devtype->product == CODA_960) {
124                 timeout = jiffies + msecs_to_jiffies(100);
125                 coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
126                 while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
127                         if (time_after(jiffies, timeout))
128                                 return -ETIME;
129                         cpu_relax();
130                 }
131         }
132
133         ret = reset_control_reset(dev->rstc);
134         if (ret < 0)
135                 return ret;
136
137         if (dev->devtype->product == CODA_960)
138                 coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
139         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
140         coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
141         ret = coda_wait_timeout(dev);
142         coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
143
144         return ret;
145 }
146
147 static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
148 {
149         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
150         struct coda_dev *dev = ctx->dev;
151         u32 rd_ptr;
152
153         rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
154         kfifo->out = (kfifo->in & ~kfifo->mask) |
155                       (rd_ptr - ctx->bitstream.paddr);
156         if (kfifo->out > kfifo->in)
157                 kfifo->out -= kfifo->mask + 1;
158 }
159
160 static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
161 {
162         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
163         struct coda_dev *dev = ctx->dev;
164         u32 rd_ptr, wr_ptr;
165
166         rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
167         coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
168         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
169         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
170 }
171
172 static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
173 {
174         struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
175         struct coda_dev *dev = ctx->dev;
176         u32 wr_ptr;
177
178         wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
179         coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
180 }
181
182 static int coda_bitstream_pad(struct coda_ctx *ctx, u32 size)
183 {
184         unsigned char *buf;
185         u32 n;
186
187         if (size < 6)
188                 size = 6;
189
190         buf = kmalloc(size, GFP_KERNEL);
191         if (!buf)
192                 return -ENOMEM;
193
194         coda_h264_filler_nal(size, buf);
195         n = kfifo_in(&ctx->bitstream_fifo, buf, size);
196         kfree(buf);
197
198         return (n < size) ? -ENOSPC : 0;
199 }
200
201 static int coda_bitstream_queue(struct coda_ctx *ctx,
202                                 struct vb2_v4l2_buffer *src_buf)
203 {
204         u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
205         u32 n;
206
207         n = kfifo_in(&ctx->bitstream_fifo,
208                         vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
209         if (n < src_size)
210                 return -ENOSPC;
211
212         src_buf->sequence = ctx->qsequence++;
213
214         return 0;
215 }
216
217 static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
218                                      struct vb2_v4l2_buffer *src_buf)
219 {
220         unsigned long payload = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
221         int ret;
222
223         if (coda_get_bitstream_payload(ctx) + payload + 512 >=
224             ctx->bitstream.size)
225                 return false;
226
227         if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
228                 v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
229                 return true;
230         }
231
232         /* Add zero padding before the first H.264 buffer, if it is too small */
233         if (ctx->qsequence == 0 && payload < 512 &&
234             ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
235                 coda_bitstream_pad(ctx, 512 - payload);
236
237         ret = coda_bitstream_queue(ctx, src_buf);
238         if (ret < 0) {
239                 v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
240                 return false;
241         }
242         /* Sync read pointer to device */
243         if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
244                 coda_kfifo_sync_to_device_write(ctx);
245
246         ctx->hold = false;
247
248         return true;
249 }
250
251 void coda_fill_bitstream(struct coda_ctx *ctx, struct list_head *buffer_list)
252 {
253         struct vb2_v4l2_buffer *src_buf;
254         struct coda_buffer_meta *meta;
255         unsigned long flags;
256         u32 start;
257
258         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
259                 return;
260
261         while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
262                 /*
263                  * Only queue a single JPEG into the bitstream buffer, except
264                  * to increase payload over 512 bytes or if in hold state.
265                  */
266                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
267                     (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
268                         break;
269
270                 src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
271
272                 /* Drop frames that do not start/end with a SOI/EOI markers */
273                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
274                     !coda_jpeg_check_buffer(ctx, &src_buf->vb2_buf)) {
275                         v4l2_err(&ctx->dev->v4l2_dev,
276                                  "dropping invalid JPEG frame %d\n",
277                                  ctx->qsequence);
278                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
279                         if (buffer_list) {
280                                 struct v4l2_m2m_buffer *m2m_buf;
281
282                                 m2m_buf = container_of(src_buf,
283                                                        struct v4l2_m2m_buffer,
284                                                        vb);
285                                 list_add_tail(&m2m_buf->list, buffer_list);
286                         } else {
287                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
288                         }
289                         continue;
290                 }
291
292                 /* Dump empty buffers */
293                 if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
294                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
295                         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
296                         continue;
297                 }
298
299                 /* Buffer start position */
300                 start = ctx->bitstream_fifo.kfifo.in &
301                         ctx->bitstream_fifo.kfifo.mask;
302
303                 if (coda_bitstream_try_queue(ctx, src_buf)) {
304                         /*
305                          * Source buffer is queued in the bitstream ringbuffer;
306                          * queue the timestamp and mark source buffer as done
307                          */
308                         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
309
310                         meta = kmalloc(sizeof(*meta), GFP_KERNEL);
311                         if (meta) {
312                                 meta->sequence = src_buf->sequence;
313                                 meta->timecode = src_buf->timecode;
314                                 meta->timestamp = src_buf->vb2_buf.timestamp;
315                                 meta->start = start;
316                                 meta->end = ctx->bitstream_fifo.kfifo.in &
317                                             ctx->bitstream_fifo.kfifo.mask;
318                                 spin_lock_irqsave(&ctx->buffer_meta_lock,
319                                                   flags);
320                                 list_add_tail(&meta->list,
321                                               &ctx->buffer_meta_list);
322                                 ctx->num_metas++;
323                                 spin_unlock_irqrestore(&ctx->buffer_meta_lock,
324                                                        flags);
325
326                                 trace_coda_bit_queue(ctx, src_buf, meta);
327                         }
328
329                         if (buffer_list) {
330                                 struct v4l2_m2m_buffer *m2m_buf;
331
332                                 m2m_buf = container_of(src_buf,
333                                                        struct v4l2_m2m_buffer,
334                                                        vb);
335                                 list_add_tail(&m2m_buf->list, buffer_list);
336                         } else {
337                                 v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
338                         }
339                 } else {
340                         break;
341                 }
342         }
343 }
344
345 void coda_bit_stream_end_flag(struct coda_ctx *ctx)
346 {
347         struct coda_dev *dev = ctx->dev;
348
349         ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
350
351         /* If this context is currently running, update the hardware flag */
352         if ((dev->devtype->product == CODA_960) &&
353             coda_isbusy(dev) &&
354             (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
355                 coda_write(dev, ctx->bit_stream_param,
356                            CODA_REG_BIT_BIT_STREAM_PARAM);
357         }
358 }
359
360 static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
361 {
362         struct coda_dev *dev = ctx->dev;
363         u32 *p = ctx->parabuf.vaddr;
364
365         if (dev->devtype->product == CODA_DX6)
366                 p[index] = value;
367         else
368                 p[index ^ 1] = value;
369 }
370
371 static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
372                                          struct coda_aux_buf *buf, size_t size,
373                                          const char *name)
374 {
375         return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
376 }
377
378
379 static void coda_free_framebuffers(struct coda_ctx *ctx)
380 {
381         int i;
382
383         for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
384                 coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
385 }
386
387 static int coda_alloc_framebuffers(struct coda_ctx *ctx,
388                                    struct coda_q_data *q_data, u32 fourcc)
389 {
390         struct coda_dev *dev = ctx->dev;
391         int width, height;
392         int ysize;
393         int ret;
394         int i;
395
396         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
397             ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264 ||
398             ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 ||
399             ctx->codec->dst_fourcc == V4L2_PIX_FMT_MPEG4) {
400                 width = round_up(q_data->width, 16);
401                 height = round_up(q_data->height, 16);
402         } else {
403                 width = round_up(q_data->width, 8);
404                 height = q_data->height;
405         }
406         ysize = width * height;
407
408         /* Allocate frame buffers */
409         for (i = 0; i < ctx->num_internal_frames; i++) {
410                 size_t size;
411                 char *name;
412
413                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
414                         size = round_up(ysize, 4096) + ysize / 2;
415                 else
416                         size = ysize + ysize / 2;
417                 /* Add space for mvcol buffers */
418                 if (dev->devtype->product != CODA_DX6 &&
419                     (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
420                      (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)))
421                         size += ysize / 4;
422                 name = kasprintf(GFP_KERNEL, "fb%d", i);
423                 if (!name) {
424                         coda_free_framebuffers(ctx);
425                         return -ENOMEM;
426                 }
427                 ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
428                                              size, name);
429                 kfree(name);
430                 if (ret < 0) {
431                         coda_free_framebuffers(ctx);
432                         return ret;
433                 }
434         }
435
436         /* Register frame buffers in the parameter buffer */
437         for (i = 0; i < ctx->num_internal_frames; i++) {
438                 u32 y, cb, cr, mvcol;
439
440                 /* Start addresses of Y, Cb, Cr planes */
441                 y = ctx->internal_frames[i].paddr;
442                 cb = y + ysize;
443                 cr = y + ysize + ysize/4;
444                 mvcol = y + ysize + ysize/4 + ysize/4;
445                 if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
446                         cb = round_up(cb, 4096);
447                         mvcol = cb + ysize/2;
448                         cr = 0;
449                         /* Packed 20-bit MSB of base addresses */
450                         /* YYYYYCCC, CCyyyyyc, cccc.... */
451                         y = (y & 0xfffff000) | cb >> 20;
452                         cb = (cb & 0x000ff000) << 12;
453                 }
454                 coda_parabuf_write(ctx, i * 3 + 0, y);
455                 coda_parabuf_write(ctx, i * 3 + 1, cb);
456                 coda_parabuf_write(ctx, i * 3 + 2, cr);
457
458                 if (dev->devtype->product == CODA_DX6)
459                         continue;
460
461                 /* mvcol buffer for h.264 and mpeg4 */
462                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264)
463                         coda_parabuf_write(ctx, 96 + i, mvcol);
464                 if (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4 && i == 0)
465                         coda_parabuf_write(ctx, 97, mvcol);
466         }
467
468         return 0;
469 }
470
471 static void coda_free_context_buffers(struct coda_ctx *ctx)
472 {
473         struct coda_dev *dev = ctx->dev;
474
475         coda_free_aux_buf(dev, &ctx->slicebuf);
476         coda_free_aux_buf(dev, &ctx->psbuf);
477         if (dev->devtype->product != CODA_DX6)
478                 coda_free_aux_buf(dev, &ctx->workbuf);
479         coda_free_aux_buf(dev, &ctx->parabuf);
480 }
481
482 static int coda_alloc_context_buffers(struct coda_ctx *ctx,
483                                       struct coda_q_data *q_data)
484 {
485         struct coda_dev *dev = ctx->dev;
486         size_t size;
487         int ret;
488
489         if (!ctx->parabuf.vaddr) {
490                 ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
491                                              CODA_PARA_BUF_SIZE, "parabuf");
492                 if (ret < 0)
493                         return ret;
494         }
495
496         if (dev->devtype->product == CODA_DX6)
497                 return 0;
498
499         if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
500                 /* worst case slice size */
501                 size = (DIV_ROUND_UP(q_data->width, 16) *
502                         DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
503                 ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
504                                              "slicebuf");
505                 if (ret < 0)
506                         goto err;
507         }
508
509         if (!ctx->psbuf.vaddr && dev->devtype->product == CODA_7541) {
510                 ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
511                                              CODA7_PS_BUF_SIZE, "psbuf");
512                 if (ret < 0)
513                         goto err;
514         }
515
516         if (!ctx->workbuf.vaddr) {
517                 size = dev->devtype->workbuf_size;
518                 if (dev->devtype->product == CODA_960 &&
519                     q_data->fourcc == V4L2_PIX_FMT_H264)
520                         size += CODA9_PS_SAVE_SIZE;
521                 ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
522                                              "workbuf");
523                 if (ret < 0)
524                         goto err;
525         }
526
527         return 0;
528
529 err:
530         coda_free_context_buffers(ctx);
531         return ret;
532 }
533
534 static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
535                               int header_code, u8 *header, int *size)
536 {
537         struct vb2_buffer *vb = &buf->vb2_buf;
538         struct coda_dev *dev = ctx->dev;
539         size_t bufsize;
540         int ret;
541         int i;
542
543         if (dev->devtype->product == CODA_960)
544                 memset(vb2_plane_vaddr(vb, 0), 0, 64);
545
546         coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
547                    CODA_CMD_ENC_HEADER_BB_START);
548         bufsize = vb2_plane_size(vb, 0);
549         if (dev->devtype->product == CODA_960)
550                 bufsize /= 1024;
551         coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
552         coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
553         ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
554         if (ret < 0) {
555                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
556                 return ret;
557         }
558
559         if (dev->devtype->product == CODA_960) {
560                 for (i = 63; i > 0; i--)
561                         if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
562                                 break;
563                 *size = i + 1;
564         } else {
565                 *size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
566                         coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
567         }
568         memcpy(header, vb2_plane_vaddr(vb, 0), *size);
569
570         return 0;
571 }
572
573 static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
574 {
575         phys_addr_t ret;
576
577         size = round_up(size, 1024);
578         if (size > iram->remaining)
579                 return 0;
580         iram->remaining -= size;
581
582         ret = iram->next_paddr;
583         iram->next_paddr += size;
584
585         return ret;
586 }
587
588 static void coda_setup_iram(struct coda_ctx *ctx)
589 {
590         struct coda_iram_info *iram_info = &ctx->iram_info;
591         struct coda_dev *dev = ctx->dev;
592         int w64, w128;
593         int mb_width;
594         int dbk_bits;
595         int bit_bits;
596         int ip_bits;
597
598         memset(iram_info, 0, sizeof(*iram_info));
599         iram_info->next_paddr = dev->iram.paddr;
600         iram_info->remaining = dev->iram.size;
601
602         if (!dev->iram.vaddr)
603                 return;
604
605         switch (dev->devtype->product) {
606         case CODA_7541:
607                 dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
608                 bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
609                 ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
610                 break;
611         case CODA_960:
612                 dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
613                 bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
614                 ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
615                 break;
616         default: /* CODA_DX6 */
617                 return;
618         }
619
620         if (ctx->inst_type == CODA_INST_ENCODER) {
621                 struct coda_q_data *q_data_src;
622
623                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
624                 mb_width = DIV_ROUND_UP(q_data_src->width, 16);
625                 w128 = mb_width * 128;
626                 w64 = mb_width * 64;
627
628                 /* Prioritize in case IRAM is too small for everything */
629                 if (dev->devtype->product == CODA_7541) {
630                         iram_info->search_ram_size = round_up(mb_width * 16 *
631                                                               36 + 2048, 1024);
632                         iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
633                                                 iram_info->search_ram_size);
634                         if (!iram_info->search_ram_paddr) {
635                                 pr_err("IRAM is smaller than the search ram size\n");
636                                 goto out;
637                         }
638                         iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
639                                                    CODA7_USE_ME_ENABLE;
640                 }
641
642                 /* Only H.264BP and H.263P3 are considered */
643                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
644                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
645                 if (!iram_info->buf_dbk_c_use)
646                         goto out;
647                 iram_info->axi_sram_use |= dbk_bits;
648
649                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
650                 if (!iram_info->buf_bit_use)
651                         goto out;
652                 iram_info->axi_sram_use |= bit_bits;
653
654                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
655                 if (!iram_info->buf_ip_ac_dc_use)
656                         goto out;
657                 iram_info->axi_sram_use |= ip_bits;
658
659                 /* OVL and BTP disabled for encoder */
660         } else if (ctx->inst_type == CODA_INST_DECODER) {
661                 struct coda_q_data *q_data_dst;
662
663                 q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
664                 mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
665                 w128 = mb_width * 128;
666
667                 iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
668                 iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
669                 if (!iram_info->buf_dbk_c_use)
670                         goto out;
671                 iram_info->axi_sram_use |= dbk_bits;
672
673                 iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
674                 if (!iram_info->buf_bit_use)
675                         goto out;
676                 iram_info->axi_sram_use |= bit_bits;
677
678                 iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
679                 if (!iram_info->buf_ip_ac_dc_use)
680                         goto out;
681                 iram_info->axi_sram_use |= ip_bits;
682
683                 /* OVL and BTP unused as there is no VC1 support yet */
684         }
685
686 out:
687         if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
688                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
689                          "IRAM smaller than needed\n");
690
691         if (dev->devtype->product == CODA_7541) {
692                 /* TODO - Enabling these causes picture errors on CODA7541 */
693                 if (ctx->inst_type == CODA_INST_DECODER) {
694                         /* fw 1.4.50 */
695                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
696                                                      CODA7_USE_IP_ENABLE);
697                 } else {
698                         /* fw 13.4.29 */
699                         iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
700                                                      CODA7_USE_HOST_DBK_ENABLE |
701                                                      CODA7_USE_IP_ENABLE |
702                                                      CODA7_USE_DBK_ENABLE);
703                 }
704         }
705 }
706
707 static u32 coda_supported_firmwares[] = {
708         CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
709         CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
710         CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
711         CODA_FIRMWARE_VERNUM(CODA_960, 2, 3, 10),
712         CODA_FIRMWARE_VERNUM(CODA_960, 3, 1, 1),
713 };
714
715 static bool coda_firmware_supported(u32 vernum)
716 {
717         int i;
718
719         for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
720                 if (vernum == coda_supported_firmwares[i])
721                         return true;
722         return false;
723 }
724
725 int coda_check_firmware(struct coda_dev *dev)
726 {
727         u16 product, major, minor, release;
728         u32 data;
729         int ret;
730
731         ret = clk_prepare_enable(dev->clk_per);
732         if (ret)
733                 goto err_clk_per;
734
735         ret = clk_prepare_enable(dev->clk_ahb);
736         if (ret)
737                 goto err_clk_ahb;
738
739         coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
740         coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
741         coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
742         coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
743         coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
744         if (coda_wait_timeout(dev)) {
745                 v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
746                 ret = -EIO;
747                 goto err_run_cmd;
748         }
749
750         if (dev->devtype->product == CODA_960) {
751                 data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
752                 v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
753                           data);
754         }
755
756         /* Check we are compatible with the loaded firmware */
757         data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
758         product = CODA_FIRMWARE_PRODUCT(data);
759         major = CODA_FIRMWARE_MAJOR(data);
760         minor = CODA_FIRMWARE_MINOR(data);
761         release = CODA_FIRMWARE_RELEASE(data);
762
763         clk_disable_unprepare(dev->clk_per);
764         clk_disable_unprepare(dev->clk_ahb);
765
766         if (product != dev->devtype->product) {
767                 v4l2_err(&dev->v4l2_dev,
768                          "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
769                          coda_product_name(dev->devtype->product),
770                          coda_product_name(product), major, minor, release);
771                 return -EINVAL;
772         }
773
774         v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
775                   coda_product_name(product));
776
777         if (coda_firmware_supported(data)) {
778                 v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
779                           major, minor, release);
780         } else {
781                 v4l2_warn(&dev->v4l2_dev,
782                           "Unsupported firmware version: %u.%u.%u\n",
783                           major, minor, release);
784         }
785
786         return 0;
787
788 err_run_cmd:
789         clk_disable_unprepare(dev->clk_ahb);
790 err_clk_ahb:
791         clk_disable_unprepare(dev->clk_per);
792 err_clk_per:
793         return ret;
794 }
795
796 static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
797 {
798         u32 cache_size, cache_config;
799
800         if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
801                 /* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
802                 cache_size = 0x20262024;
803                 cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
804         } else {
805                 /* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
806                 cache_size = 0x02440243;
807                 cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
808         }
809         coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
810         if (fourcc == V4L2_PIX_FMT_NV12 || fourcc == V4L2_PIX_FMT_YUYV) {
811                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
812                                 16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
813                                 0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
814         } else {
815                 cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
816                                 8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
817                                 8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
818         }
819         coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
820 }
821
822 /*
823  * Encoder context operations
824  */
825
826 static int coda_encoder_reqbufs(struct coda_ctx *ctx,
827                                 struct v4l2_requestbuffers *rb)
828 {
829         struct coda_q_data *q_data_src;
830         int ret;
831
832         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
833                 return 0;
834
835         if (rb->count) {
836                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
837                 ret = coda_alloc_context_buffers(ctx, q_data_src);
838                 if (ret < 0)
839                         return ret;
840         } else {
841                 coda_free_context_buffers(ctx);
842         }
843
844         return 0;
845 }
846
847 static int coda_start_encoding(struct coda_ctx *ctx)
848 {
849         struct coda_dev *dev = ctx->dev;
850         struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
851         struct coda_q_data *q_data_src, *q_data_dst;
852         u32 bitstream_buf, bitstream_size;
853         struct vb2_v4l2_buffer *buf;
854         int gamma, ret, value;
855         u32 dst_fourcc;
856         int num_fb;
857         u32 stride;
858
859         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
860         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
861         dst_fourcc = q_data_dst->fourcc;
862
863         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
864         bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
865         bitstream_size = q_data_dst->sizeimage;
866
867         if (!coda_is_initialized(dev)) {
868                 v4l2_err(v4l2_dev, "coda is not initialized.\n");
869                 return -EFAULT;
870         }
871
872         if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
873                 if (!ctx->params.jpeg_qmat_tab[0])
874                         ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
875                 if (!ctx->params.jpeg_qmat_tab[1])
876                         ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
877                 coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
878         }
879
880         mutex_lock(&dev->coda_mutex);
881
882         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
883         coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
884         coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
885         switch (dev->devtype->product) {
886         case CODA_DX6:
887                 coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
888                         CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
889                 break;
890         case CODA_960:
891                 coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
892                 /* fallthrough */
893         case CODA_7541:
894                 coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
895                         CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
896                 break;
897         }
898
899         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
900                                  CODA9_FRAME_TILED2LINEAR);
901         if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
902                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
903         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
904                 ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
905         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
906
907         if (dev->devtype->product == CODA_DX6) {
908                 /* Configure the coda */
909                 coda_write(dev, dev->iram.paddr,
910                            CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
911         }
912
913         /* Could set rotation here if needed */
914         value = 0;
915         switch (dev->devtype->product) {
916         case CODA_DX6:
917                 value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
918                         << CODADX6_PICWIDTH_OFFSET;
919                 value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
920                          << CODA_PICHEIGHT_OFFSET;
921                 break;
922         case CODA_7541:
923                 if (dst_fourcc == V4L2_PIX_FMT_H264) {
924                         value = (round_up(q_data_src->width, 16) &
925                                  CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
926                         value |= (round_up(q_data_src->height, 16) &
927                                  CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
928                         break;
929                 }
930                 /* fallthrough */
931         case CODA_960:
932                 value = (q_data_src->width & CODA7_PICWIDTH_MASK)
933                         << CODA7_PICWIDTH_OFFSET;
934                 value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
935                          << CODA_PICHEIGHT_OFFSET;
936         }
937         coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
938         if (dst_fourcc == V4L2_PIX_FMT_JPEG)
939                 ctx->params.framerate = 0;
940         coda_write(dev, ctx->params.framerate,
941                    CODA_CMD_ENC_SEQ_SRC_F_RATE);
942
943         ctx->params.codec_mode = ctx->codec->mode;
944         switch (dst_fourcc) {
945         case V4L2_PIX_FMT_MPEG4:
946                 if (dev->devtype->product == CODA_960)
947                         coda_write(dev, CODA9_STD_MPEG4,
948                                    CODA_CMD_ENC_SEQ_COD_STD);
949                 else
950                         coda_write(dev, CODA_STD_MPEG4,
951                                    CODA_CMD_ENC_SEQ_COD_STD);
952                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
953                 break;
954         case V4L2_PIX_FMT_H264:
955                 if (dev->devtype->product == CODA_960)
956                         coda_write(dev, CODA9_STD_H264,
957                                    CODA_CMD_ENC_SEQ_COD_STD);
958                 else
959                         coda_write(dev, CODA_STD_H264,
960                                    CODA_CMD_ENC_SEQ_COD_STD);
961                 if (ctx->params.h264_deblk_enabled) {
962                         value = ((ctx->params.h264_deblk_alpha &
963                                   CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
964                                  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
965                                 ((ctx->params.h264_deblk_beta &
966                                   CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
967                                  CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
968                 } else {
969                         value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
970                 }
971                 coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
972                 break;
973         case V4L2_PIX_FMT_JPEG:
974                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
975                 coda_write(dev, ctx->params.jpeg_restart_interval,
976                                 CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
977                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
978                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
979                 coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
980
981                 coda_jpeg_write_tables(ctx);
982                 break;
983         default:
984                 v4l2_err(v4l2_dev,
985                          "dst format (0x%08x) invalid.\n", dst_fourcc);
986                 ret = -EINVAL;
987                 goto out;
988         }
989
990         /*
991          * slice mode and GOP size registers are used for thumb size/offset
992          * in JPEG mode
993          */
994         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
995                 switch (ctx->params.slice_mode) {
996                 case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
997                         value = 0;
998                         break;
999                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
1000                         value  = (ctx->params.slice_max_mb &
1001                                   CODA_SLICING_SIZE_MASK)
1002                                  << CODA_SLICING_SIZE_OFFSET;
1003                         value |= (1 & CODA_SLICING_UNIT_MASK)
1004                                  << CODA_SLICING_UNIT_OFFSET;
1005                         value |=  1 & CODA_SLICING_MODE_MASK;
1006                         break;
1007                 case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
1008                         value  = (ctx->params.slice_max_bits &
1009                                   CODA_SLICING_SIZE_MASK)
1010                                  << CODA_SLICING_SIZE_OFFSET;
1011                         value |= (0 & CODA_SLICING_UNIT_MASK)
1012                                  << CODA_SLICING_UNIT_OFFSET;
1013                         value |=  1 & CODA_SLICING_MODE_MASK;
1014                         break;
1015                 }
1016                 coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
1017                 value = ctx->params.gop_size;
1018                 coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
1019         }
1020
1021         if (ctx->params.bitrate) {
1022                 /* Rate control enabled */
1023                 value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
1024                         << CODA_RATECONTROL_BITRATE_OFFSET;
1025                 value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
1026                 value |= (ctx->params.vbv_delay &
1027                           CODA_RATECONTROL_INITIALDELAY_MASK)
1028                          << CODA_RATECONTROL_INITIALDELAY_OFFSET;
1029                 if (dev->devtype->product == CODA_960)
1030                         value |= BIT(31); /* disable autoskip */
1031         } else {
1032                 value = 0;
1033         }
1034         coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
1035
1036         coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
1037         coda_write(dev, ctx->params.intra_refresh,
1038                    CODA_CMD_ENC_SEQ_INTRA_REFRESH);
1039
1040         coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
1041         coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
1042
1043
1044         value = 0;
1045         if (dev->devtype->product == CODA_960)
1046                 gamma = CODA9_DEFAULT_GAMMA;
1047         else
1048                 gamma = CODA_DEFAULT_GAMMA;
1049         if (gamma > 0) {
1050                 coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1051                            CODA_CMD_ENC_SEQ_RC_GAMMA);
1052         }
1053
1054         if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1055                 coda_write(dev,
1056                            ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1057                            ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1058                            CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1059         }
1060         if (dev->devtype->product == CODA_960) {
1061                 if (ctx->params.h264_max_qp)
1062                         value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1063                 if (CODA_DEFAULT_GAMMA > 0)
1064                         value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1065         } else {
1066                 if (CODA_DEFAULT_GAMMA > 0) {
1067                         if (dev->devtype->product == CODA_DX6)
1068                                 value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1069                         else
1070                                 value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1071                 }
1072                 if (ctx->params.h264_min_qp)
1073                         value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1074                 if (ctx->params.h264_max_qp)
1075                         value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1076         }
1077         coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1078
1079         coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1080
1081         coda_setup_iram(ctx);
1082
1083         if (dst_fourcc == V4L2_PIX_FMT_H264) {
1084                 switch (dev->devtype->product) {
1085                 case CODA_DX6:
1086                         value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1087                         coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1088                         break;
1089                 case CODA_7541:
1090                         coda_write(dev, ctx->iram_info.search_ram_paddr,
1091                                         CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1092                         coda_write(dev, ctx->iram_info.search_ram_size,
1093                                         CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1094                         break;
1095                 case CODA_960:
1096                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1097                         coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1098                 }
1099         }
1100
1101         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1102         if (ret < 0) {
1103                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1104                 goto out;
1105         }
1106
1107         if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1108                 v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1109                 ret = -EFAULT;
1110                 goto out;
1111         }
1112         ctx->initialized = 1;
1113
1114         if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1115                 if (dev->devtype->product == CODA_960)
1116                         ctx->num_internal_frames = 4;
1117                 else
1118                         ctx->num_internal_frames = 2;
1119                 ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1120                 if (ret < 0) {
1121                         v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1122                         goto out;
1123                 }
1124                 num_fb = 2;
1125                 stride = q_data_src->bytesperline;
1126         } else {
1127                 ctx->num_internal_frames = 0;
1128                 num_fb = 0;
1129                 stride = 0;
1130         }
1131         coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1132         coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1133
1134         if (dev->devtype->product == CODA_7541) {
1135                 coda_write(dev, q_data_src->bytesperline,
1136                                 CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1137         }
1138         if (dev->devtype->product != CODA_DX6) {
1139                 coda_write(dev, ctx->iram_info.buf_bit_use,
1140                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1141                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1142                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1143                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1144                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1145                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1146                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1147                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1148                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1149                 if (dev->devtype->product == CODA_960) {
1150                         coda_write(dev, ctx->iram_info.buf_btp_use,
1151                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1152
1153                         coda9_set_frame_cache(ctx, q_data_src->fourcc);
1154
1155                         /* FIXME */
1156                         coda_write(dev, ctx->internal_frames[2].paddr,
1157                                    CODA9_CMD_SET_FRAME_SUBSAMP_A);
1158                         coda_write(dev, ctx->internal_frames[3].paddr,
1159                                    CODA9_CMD_SET_FRAME_SUBSAMP_B);
1160                 }
1161         }
1162
1163         ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1164         if (ret < 0) {
1165                 v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1166                 goto out;
1167         }
1168
1169         /* Save stream headers */
1170         buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1171         switch (dst_fourcc) {
1172         case V4L2_PIX_FMT_H264:
1173                 /*
1174                  * Get SPS in the first frame and copy it to an
1175                  * intermediate buffer.
1176                  */
1177                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1178                                          &ctx->vpu_header[0][0],
1179                                          &ctx->vpu_header_size[0]);
1180                 if (ret < 0)
1181                         goto out;
1182
1183                 /*
1184                  * Get PPS in the first frame and copy it to an
1185                  * intermediate buffer.
1186                  */
1187                 ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1188                                          &ctx->vpu_header[1][0],
1189                                          &ctx->vpu_header_size[1]);
1190                 if (ret < 0)
1191                         goto out;
1192
1193                 /*
1194                  * Length of H.264 headers is variable and thus it might not be
1195                  * aligned for the coda to append the encoded frame. In that is
1196                  * the case a filler NAL must be added to header 2.
1197                  */
1198                 ctx->vpu_header_size[2] = coda_h264_padding(
1199                                         (ctx->vpu_header_size[0] +
1200                                          ctx->vpu_header_size[1]),
1201                                          ctx->vpu_header[2]);
1202                 break;
1203         case V4L2_PIX_FMT_MPEG4:
1204                 /*
1205                  * Get VOS in the first frame and copy it to an
1206                  * intermediate buffer
1207                  */
1208                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1209                                          &ctx->vpu_header[0][0],
1210                                          &ctx->vpu_header_size[0]);
1211                 if (ret < 0)
1212                         goto out;
1213
1214                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1215                                          &ctx->vpu_header[1][0],
1216                                          &ctx->vpu_header_size[1]);
1217                 if (ret < 0)
1218                         goto out;
1219
1220                 ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1221                                          &ctx->vpu_header[2][0],
1222                                          &ctx->vpu_header_size[2]);
1223                 if (ret < 0)
1224                         goto out;
1225                 break;
1226         default:
1227                 /* No more formats need to save headers at the moment */
1228                 break;
1229         }
1230
1231 out:
1232         mutex_unlock(&dev->coda_mutex);
1233         return ret;
1234 }
1235
1236 static int coda_prepare_encode(struct coda_ctx *ctx)
1237 {
1238         struct coda_q_data *q_data_src, *q_data_dst;
1239         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1240         struct coda_dev *dev = ctx->dev;
1241         int force_ipicture;
1242         int quant_param = 0;
1243         u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1244         u32 rot_mode = 0;
1245         u32 dst_fourcc;
1246         u32 reg;
1247
1248         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1249         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1250         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1251         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1252         dst_fourcc = q_data_dst->fourcc;
1253
1254         src_buf->sequence = ctx->osequence;
1255         dst_buf->sequence = ctx->osequence;
1256         ctx->osequence++;
1257
1258         force_ipicture = ctx->params.force_ipicture;
1259         if (force_ipicture)
1260                 ctx->params.force_ipicture = false;
1261         else if (ctx->params.gop_size != 0 &&
1262                  (src_buf->sequence % ctx->params.gop_size) == 0)
1263                 force_ipicture = 1;
1264
1265         /*
1266          * Workaround coda firmware BUG that only marks the first
1267          * frame as IDR. This is a problem for some decoders that can't
1268          * recover when a frame is lost.
1269          */
1270         if (!force_ipicture) {
1271                 src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1272                 src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1273         } else {
1274                 src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1275                 src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1276         }
1277
1278         if (dev->devtype->product == CODA_960)
1279                 coda_set_gdi_regs(ctx);
1280
1281         /*
1282          * Copy headers in front of the first frame and forced I frames for
1283          * H.264 only. In MPEG4 they are already copied by the CODA.
1284          */
1285         if (src_buf->sequence == 0 || force_ipicture) {
1286                 pic_stream_buffer_addr =
1287                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1288                         ctx->vpu_header_size[0] +
1289                         ctx->vpu_header_size[1] +
1290                         ctx->vpu_header_size[2];
1291                 pic_stream_buffer_size = q_data_dst->sizeimage -
1292                         ctx->vpu_header_size[0] -
1293                         ctx->vpu_header_size[1] -
1294                         ctx->vpu_header_size[2];
1295                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1296                        &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1297                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1298                         + ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1299                         ctx->vpu_header_size[1]);
1300                 memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1301                         + ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1302                         &ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1303         } else {
1304                 pic_stream_buffer_addr =
1305                         vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1306                 pic_stream_buffer_size = q_data_dst->sizeimage;
1307         }
1308
1309         if (force_ipicture) {
1310                 switch (dst_fourcc) {
1311                 case V4L2_PIX_FMT_H264:
1312                         quant_param = ctx->params.h264_intra_qp;
1313                         break;
1314                 case V4L2_PIX_FMT_MPEG4:
1315                         quant_param = ctx->params.mpeg4_intra_qp;
1316                         break;
1317                 case V4L2_PIX_FMT_JPEG:
1318                         quant_param = 30;
1319                         break;
1320                 default:
1321                         v4l2_warn(&ctx->dev->v4l2_dev,
1322                                 "cannot set intra qp, fmt not supported\n");
1323                         break;
1324                 }
1325         } else {
1326                 switch (dst_fourcc) {
1327                 case V4L2_PIX_FMT_H264:
1328                         quant_param = ctx->params.h264_inter_qp;
1329                         break;
1330                 case V4L2_PIX_FMT_MPEG4:
1331                         quant_param = ctx->params.mpeg4_inter_qp;
1332                         break;
1333                 default:
1334                         v4l2_warn(&ctx->dev->v4l2_dev,
1335                                 "cannot set inter qp, fmt not supported\n");
1336                         break;
1337                 }
1338         }
1339
1340         /* submit */
1341         if (ctx->params.rot_mode)
1342                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1343         coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1344         coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1345
1346         if (dev->devtype->product == CODA_960) {
1347                 coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1348                 coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1349                 coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1350
1351                 reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1352         } else {
1353                 reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1354         }
1355         coda_write_base(ctx, q_data_src, src_buf, reg);
1356
1357         coda_write(dev, force_ipicture << 1 & 0x2,
1358                    CODA_CMD_ENC_PIC_OPTION);
1359
1360         coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1361         coda_write(dev, pic_stream_buffer_size / 1024,
1362                    CODA_CMD_ENC_PIC_BB_SIZE);
1363
1364         if (!ctx->streamon_out) {
1365                 /* After streamoff on the output side, set stream end flag */
1366                 ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1367                 coda_write(dev, ctx->bit_stream_param,
1368                            CODA_REG_BIT_BIT_STREAM_PARAM);
1369         }
1370
1371         if (dev->devtype->product != CODA_DX6)
1372                 coda_write(dev, ctx->iram_info.axi_sram_use,
1373                                 CODA7_REG_BIT_AXI_SRAM_USE);
1374
1375         trace_coda_enc_pic_run(ctx, src_buf);
1376
1377         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1378
1379         return 0;
1380 }
1381
1382 static void coda_finish_encode(struct coda_ctx *ctx)
1383 {
1384         struct vb2_v4l2_buffer *src_buf, *dst_buf;
1385         struct coda_dev *dev = ctx->dev;
1386         u32 wr_ptr, start_ptr;
1387
1388         src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1389         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1390
1391         trace_coda_enc_pic_done(ctx, dst_buf);
1392
1393         /* Get results from the coda */
1394         start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1395         wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1396
1397         /* Calculate bytesused field */
1398         if (dst_buf->sequence == 0 ||
1399             src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1400                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1401                                         ctx->vpu_header_size[0] +
1402                                         ctx->vpu_header_size[1] +
1403                                         ctx->vpu_header_size[2]);
1404         } else {
1405                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1406         }
1407
1408         v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1409                  wr_ptr - start_ptr);
1410
1411         coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1412         coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1413
1414         if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1415                 dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1416                 dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1417         } else {
1418                 dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1419                 dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1420         }
1421
1422         dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp;
1423         dst_buf->field = src_buf->field;
1424         dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1425         dst_buf->flags |=
1426                 src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1427         dst_buf->timecode = src_buf->timecode;
1428
1429         v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1430
1431         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1432         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1433
1434         ctx->gopcounter--;
1435         if (ctx->gopcounter < 0)
1436                 ctx->gopcounter = ctx->params.gop_size - 1;
1437
1438         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1439                 "job finished: encoding frame (%d) (%s)\n",
1440                 dst_buf->sequence,
1441                 (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1442                 "KEYFRAME" : "PFRAME");
1443 }
1444
1445 static void coda_seq_end_work(struct work_struct *work)
1446 {
1447         struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1448         struct coda_dev *dev = ctx->dev;
1449
1450         mutex_lock(&ctx->buffer_mutex);
1451         mutex_lock(&dev->coda_mutex);
1452
1453         if (ctx->initialized == 0)
1454                 goto out;
1455
1456         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1457                  "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1458                  __func__);
1459         if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1460                 v4l2_err(&dev->v4l2_dev,
1461                          "CODA_COMMAND_SEQ_END failed\n");
1462         }
1463
1464         /*
1465          * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1466          * from the output stream after the h.264 decoder has run. Resetting the
1467          * hardware after the decoder has finished seems to help.
1468          */
1469         if (dev->devtype->product == CODA_960)
1470                 coda_hw_reset(ctx);
1471
1472         kfifo_init(&ctx->bitstream_fifo,
1473                 ctx->bitstream.vaddr, ctx->bitstream.size);
1474
1475         coda_free_framebuffers(ctx);
1476
1477         ctx->initialized = 0;
1478
1479 out:
1480         mutex_unlock(&dev->coda_mutex);
1481         mutex_unlock(&ctx->buffer_mutex);
1482 }
1483
1484 static void coda_bit_release(struct coda_ctx *ctx)
1485 {
1486         mutex_lock(&ctx->buffer_mutex);
1487         coda_free_framebuffers(ctx);
1488         coda_free_context_buffers(ctx);
1489         coda_free_bitstream_buffer(ctx);
1490         mutex_unlock(&ctx->buffer_mutex);
1491 }
1492
1493 const struct coda_context_ops coda_bit_encode_ops = {
1494         .queue_init = coda_encoder_queue_init,
1495         .reqbufs = coda_encoder_reqbufs,
1496         .start_streaming = coda_start_encoding,
1497         .prepare_run = coda_prepare_encode,
1498         .finish_run = coda_finish_encode,
1499         .seq_end_work = coda_seq_end_work,
1500         .release = coda_bit_release,
1501 };
1502
1503 /*
1504  * Decoder context operations
1505  */
1506
1507 static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1508                                        struct coda_q_data *q_data)
1509 {
1510         if (ctx->bitstream.vaddr)
1511                 return 0;
1512
1513         ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1514         ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
1515                                             ctx->bitstream.size,
1516                                             &ctx->bitstream.paddr, GFP_KERNEL);
1517         if (!ctx->bitstream.vaddr) {
1518                 v4l2_err(&ctx->dev->v4l2_dev,
1519                          "failed to allocate bitstream ringbuffer");
1520                 return -ENOMEM;
1521         }
1522         kfifo_init(&ctx->bitstream_fifo,
1523                    ctx->bitstream.vaddr, ctx->bitstream.size);
1524
1525         return 0;
1526 }
1527
1528 static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1529 {
1530         if (ctx->bitstream.vaddr == NULL)
1531                 return;
1532
1533         dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1534                     ctx->bitstream.vaddr, ctx->bitstream.paddr);
1535         ctx->bitstream.vaddr = NULL;
1536         kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1537 }
1538
1539 static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1540                                 struct v4l2_requestbuffers *rb)
1541 {
1542         struct coda_q_data *q_data_src;
1543         int ret;
1544
1545         if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1546                 return 0;
1547
1548         if (rb->count) {
1549                 q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1550                 ret = coda_alloc_context_buffers(ctx, q_data_src);
1551                 if (ret < 0)
1552                         return ret;
1553                 ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1554                 if (ret < 0) {
1555                         coda_free_context_buffers(ctx);
1556                         return ret;
1557                 }
1558         } else {
1559                 coda_free_bitstream_buffer(ctx);
1560                 coda_free_context_buffers(ctx);
1561         }
1562
1563         return 0;
1564 }
1565
1566 static bool coda_reorder_enable(struct coda_ctx *ctx)
1567 {
1568         const char * const *profile_names;
1569         const char * const *level_names;
1570         struct coda_dev *dev = ctx->dev;
1571         int profile, level;
1572
1573         if (dev->devtype->product != CODA_7541 &&
1574             dev->devtype->product != CODA_960)
1575                 return false;
1576
1577         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1578                 return false;
1579
1580         if (ctx->codec->src_fourcc != V4L2_PIX_FMT_H264)
1581                 return true;
1582
1583         profile = coda_h264_profile(ctx->params.h264_profile_idc);
1584         if (profile < 0) {
1585                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Profile: %d\n",
1586                          ctx->params.h264_profile_idc);
1587                 return false;
1588         }
1589
1590         level = coda_h264_level(ctx->params.h264_level_idc);
1591         if (level < 0) {
1592                 v4l2_warn(&dev->v4l2_dev, "Invalid H264 Level: %d\n",
1593                          ctx->params.h264_level_idc);
1594                 return false;
1595         }
1596
1597         profile_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_PROFILE);
1598         level_names = v4l2_ctrl_get_menu(V4L2_CID_MPEG_VIDEO_H264_LEVEL);
1599
1600         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "H264 Profile/Level: %s L%s\n",
1601                  profile_names[profile], level_names[level]);
1602
1603         /* Baseline profile does not support reordering */
1604         return profile > V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
1605 }
1606
1607 static int __coda_start_decoding(struct coda_ctx *ctx)
1608 {
1609         struct coda_q_data *q_data_src, *q_data_dst;
1610         u32 bitstream_buf, bitstream_size;
1611         struct coda_dev *dev = ctx->dev;
1612         int width, height;
1613         u32 src_fourcc, dst_fourcc;
1614         u32 val;
1615         int ret;
1616
1617         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1618                  "Video Data Order Adapter: %s\n",
1619                  ctx->use_vdoa ? "Enabled" : "Disabled");
1620
1621         /* Start decoding */
1622         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1623         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1624         bitstream_buf = ctx->bitstream.paddr;
1625         bitstream_size = ctx->bitstream.size;
1626         src_fourcc = q_data_src->fourcc;
1627         dst_fourcc = q_data_dst->fourcc;
1628
1629         coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1630
1631         /* Update coda bitstream read and write pointers from kfifo */
1632         coda_kfifo_sync_to_device_full(ctx);
1633
1634         ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1635                                  CODA9_FRAME_TILED2LINEAR);
1636         if (dst_fourcc == V4L2_PIX_FMT_NV12 || dst_fourcc == V4L2_PIX_FMT_YUYV)
1637                 ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1638         if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1639                 ctx->frame_mem_ctrl |= (0x3 << 9) |
1640                         ((ctx->use_vdoa) ? 0 : CODA9_FRAME_TILED2LINEAR);
1641         coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1642
1643         ctx->display_idx = -1;
1644         ctx->frm_dis_flg = 0;
1645         coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1646
1647         coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1648         coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1649         val = 0;
1650         if (coda_reorder_enable(ctx))
1651                 val |= CODA_REORDER_ENABLE;
1652         if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1653                 val |= CODA_NO_INT_ENABLE;
1654         coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1655
1656         ctx->params.codec_mode = ctx->codec->mode;
1657         if (dev->devtype->product == CODA_960 &&
1658             src_fourcc == V4L2_PIX_FMT_MPEG4)
1659                 ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1660         else
1661                 ctx->params.codec_mode_aux = 0;
1662         if (src_fourcc == V4L2_PIX_FMT_MPEG4) {
1663                 coda_write(dev, CODA_MP4_CLASS_MPEG4,
1664                            CODA_CMD_DEC_SEQ_MP4_ASP_CLASS);
1665         }
1666         if (src_fourcc == V4L2_PIX_FMT_H264) {
1667                 if (dev->devtype->product == CODA_7541) {
1668                         coda_write(dev, ctx->psbuf.paddr,
1669                                         CODA_CMD_DEC_SEQ_PS_BB_START);
1670                         coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1671                                         CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1672                 }
1673                 if (dev->devtype->product == CODA_960) {
1674                         coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1675                         coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1676                 }
1677         }
1678         if (dev->devtype->product != CODA_960)
1679                 coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1680
1681         ctx->bit_stream_param = CODA_BIT_DEC_SEQ_INIT_ESCAPE;
1682         ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1683         ctx->bit_stream_param = 0;
1684         if (ret) {
1685                 v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1686                 return ret;
1687         }
1688         ctx->initialized = 1;
1689
1690         /* Update kfifo out pointer from coda bitstream read pointer */
1691         coda_kfifo_sync_from_device(ctx);
1692
1693         if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1694                 v4l2_err(&dev->v4l2_dev,
1695                         "CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1696                         coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1697                 return -EAGAIN;
1698         }
1699
1700         val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1701         if (dev->devtype->product == CODA_DX6) {
1702                 width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1703                 height = val & CODADX6_PICHEIGHT_MASK;
1704         } else {
1705                 width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1706                 height = val & CODA7_PICHEIGHT_MASK;
1707         }
1708
1709         if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1710                 v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1711                          width, height, q_data_dst->bytesperline,
1712                          q_data_dst->height);
1713                 return -EINVAL;
1714         }
1715
1716         width = round_up(width, 16);
1717         height = round_up(height, 16);
1718
1719         v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1720                  __func__, ctx->idx, width, height);
1721
1722         ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1723         /*
1724          * If the VDOA is used, the decoder needs one additional frame,
1725          * because the frames are freed when the next frame is decoded.
1726          * Otherwise there are visible errors in the decoded frames (green
1727          * regions in displayed frames) and a broken order of frames (earlier
1728          * frames are sporadically displayed after later frames).
1729          */
1730         if (ctx->use_vdoa)
1731                 ctx->num_internal_frames += 1;
1732         if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1733                 v4l2_err(&dev->v4l2_dev,
1734                          "not enough framebuffers to decode (%d < %d)\n",
1735                          CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1736                 return -EINVAL;
1737         }
1738
1739         if (src_fourcc == V4L2_PIX_FMT_H264) {
1740                 u32 left_right;
1741                 u32 top_bottom;
1742
1743                 left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1744                 top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1745
1746                 q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1747                 q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1748                 q_data_dst->rect.width = width - q_data_dst->rect.left -
1749                                          (left_right & 0x3ff);
1750                 q_data_dst->rect.height = height - q_data_dst->rect.top -
1751                                           (top_bottom & 0x3ff);
1752         }
1753
1754         ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1755         if (ret < 0) {
1756                 v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1757                 return ret;
1758         }
1759
1760         /* Tell the decoder how many frame buffers we allocated. */
1761         coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1762         coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1763
1764         if (dev->devtype->product != CODA_DX6) {
1765                 /* Set secondary AXI IRAM */
1766                 coda_setup_iram(ctx);
1767
1768                 coda_write(dev, ctx->iram_info.buf_bit_use,
1769                                 CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1770                 coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1771                                 CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1772                 coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1773                                 CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1774                 coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1775                                 CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1776                 coda_write(dev, ctx->iram_info.buf_ovl_use,
1777                                 CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1778                 if (dev->devtype->product == CODA_960) {
1779                         coda_write(dev, ctx->iram_info.buf_btp_use,
1780                                         CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1781
1782                         coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1783                         coda9_set_frame_cache(ctx, dst_fourcc);
1784                 }
1785         }
1786
1787         if (src_fourcc == V4L2_PIX_FMT_H264) {
1788                 coda_write(dev, ctx->slicebuf.paddr,
1789                                 CODA_CMD_SET_FRAME_SLICE_BB_START);
1790                 coda_write(dev, ctx->slicebuf.size / 1024,
1791                                 CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1792         }
1793
1794         if (dev->devtype->product == CODA_7541) {
1795                 int max_mb_x = 1920 / 16;
1796                 int max_mb_y = 1088 / 16;
1797                 int max_mb_num = max_mb_x * max_mb_y;
1798
1799                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1800                                 CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1801         } else if (dev->devtype->product == CODA_960) {
1802                 int max_mb_x = 1920 / 16;
1803                 int max_mb_y = 1088 / 16;
1804                 int max_mb_num = max_mb_x * max_mb_y;
1805
1806                 coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1807                                 CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1808         }
1809
1810         if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1811                 v4l2_err(&ctx->dev->v4l2_dev,
1812                          "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1813                 return -ETIMEDOUT;
1814         }
1815
1816         return 0;
1817 }
1818
1819 static int coda_start_decoding(struct coda_ctx *ctx)
1820 {
1821         struct coda_dev *dev = ctx->dev;
1822         int ret;
1823
1824         mutex_lock(&dev->coda_mutex);
1825         ret = __coda_start_decoding(ctx);
1826         mutex_unlock(&dev->coda_mutex);
1827
1828         return ret;
1829 }
1830
1831 static int coda_prepare_decode(struct coda_ctx *ctx)
1832 {
1833         struct vb2_v4l2_buffer *dst_buf;
1834         struct coda_dev *dev = ctx->dev;
1835         struct coda_q_data *q_data_dst;
1836         struct coda_buffer_meta *meta;
1837         unsigned long flags;
1838         u32 rot_mode = 0;
1839         u32 reg_addr, reg_stride;
1840
1841         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1842         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1843
1844         /* Try to copy source buffer contents into the bitstream ringbuffer */
1845         mutex_lock(&ctx->bitstream_mutex);
1846         coda_fill_bitstream(ctx, NULL);
1847         mutex_unlock(&ctx->bitstream_mutex);
1848
1849         if (coda_get_bitstream_payload(ctx) < 512 &&
1850             (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1851                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1852                          "bitstream payload: %d, skipping\n",
1853                          coda_get_bitstream_payload(ctx));
1854                 v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1855                 return -EAGAIN;
1856         }
1857
1858         /* Run coda_start_decoding (again) if not yet initialized */
1859         if (!ctx->initialized) {
1860                 int ret = __coda_start_decoding(ctx);
1861
1862                 if (ret < 0) {
1863                         v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1864                         v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1865                         return -EAGAIN;
1866                 } else {
1867                         ctx->initialized = 1;
1868                 }
1869         }
1870
1871         if (dev->devtype->product == CODA_960)
1872                 coda_set_gdi_regs(ctx);
1873
1874         if (ctx->use_vdoa &&
1875             ctx->display_idx >= 0 &&
1876             ctx->display_idx < ctx->num_internal_frames) {
1877                 vdoa_device_run(ctx->vdoa,
1878                                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0),
1879                                 ctx->internal_frames[ctx->display_idx].paddr);
1880         } else {
1881                 if (dev->devtype->product == CODA_960) {
1882                         /*
1883                          * The CODA960 seems to have an internal list of
1884                          * buffers with 64 entries that includes the
1885                          * registered frame buffers as well as the rotator
1886                          * buffer output.
1887                          *
1888                          * ROT_INDEX needs to be < 0x40, but >
1889                          * ctx->num_internal_frames.
1890                          */
1891                         coda_write(dev,
1892                                    CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1893                                    CODA9_CMD_DEC_PIC_ROT_INDEX);
1894
1895                         reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1896                         reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1897                 } else {
1898                         reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1899                         reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1900                 }
1901                 coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1902                 coda_write(dev, q_data_dst->bytesperline, reg_stride);
1903
1904                 rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1905         }
1906
1907         coda_write(dev, rot_mode, CODA_CMD_DEC_PIC_ROT_MODE);
1908
1909         switch (dev->devtype->product) {
1910         case CODA_DX6:
1911                 /* TBD */
1912         case CODA_7541:
1913                 coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1914                 break;
1915         case CODA_960:
1916                 /* 'hardcode to use interrupt disable mode'? */
1917                 coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1918                 break;
1919         }
1920
1921         coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1922
1923         coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1924         coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1925
1926         if (dev->devtype->product != CODA_DX6)
1927                 coda_write(dev, ctx->iram_info.axi_sram_use,
1928                                 CODA7_REG_BIT_AXI_SRAM_USE);
1929
1930         spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1931         meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1932                                         struct coda_buffer_meta, list);
1933
1934         if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1935
1936                 /* If this is the last buffer in the bitstream, add padding */
1937                 if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1938                                   ctx->bitstream_fifo.kfifo.mask)) {
1939                         static unsigned char buf[512];
1940                         unsigned int pad;
1941
1942                         /* Pad to multiple of 256 and then add 256 more */
1943                         pad = ((0 - meta->end) & 0xff) + 256;
1944
1945                         memset(buf, 0xff, sizeof(buf));
1946
1947                         kfifo_in(&ctx->bitstream_fifo, buf, pad);
1948                 }
1949         }
1950         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1951
1952         coda_kfifo_sync_to_device_full(ctx);
1953
1954         /* Clear decode success flag */
1955         coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1956
1957         trace_coda_dec_pic_run(ctx, meta);
1958
1959         coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1960
1961         return 0;
1962 }
1963
1964 static void coda_finish_decode(struct coda_ctx *ctx)
1965 {
1966         struct coda_dev *dev = ctx->dev;
1967         struct coda_q_data *q_data_src;
1968         struct coda_q_data *q_data_dst;
1969         struct vb2_v4l2_buffer *dst_buf;
1970         struct coda_buffer_meta *meta;
1971         unsigned long payload;
1972         unsigned long flags;
1973         int width, height;
1974         int decoded_idx;
1975         int display_idx;
1976         u32 src_fourcc;
1977         int success;
1978         u32 err_mb;
1979         int err_vdoa = 0;
1980         u32 val;
1981
1982         /* Update kfifo out pointer from coda bitstream read pointer */
1983         coda_kfifo_sync_from_device(ctx);
1984
1985         /*
1986          * in stream-end mode, the read pointer can overshoot the write pointer
1987          * by up to 512 bytes
1988          */
1989         if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1990                 if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
1991                         kfifo_init(&ctx->bitstream_fifo,
1992                                 ctx->bitstream.vaddr, ctx->bitstream.size);
1993         }
1994
1995         q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1996         src_fourcc = q_data_src->fourcc;
1997
1998         val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1999         if (val != 1)
2000                 pr_err("DEC_PIC_SUCCESS = %d\n", val);
2001
2002         success = val & 0x1;
2003         if (!success)
2004                 v4l2_err(&dev->v4l2_dev, "decode failed\n");
2005
2006         if (src_fourcc == V4L2_PIX_FMT_H264) {
2007                 if (val & (1 << 3))
2008                         v4l2_err(&dev->v4l2_dev,
2009                                  "insufficient PS buffer space (%d bytes)\n",
2010                                  ctx->psbuf.size);
2011                 if (val & (1 << 2))
2012                         v4l2_err(&dev->v4l2_dev,
2013                                  "insufficient slice buffer space (%d bytes)\n",
2014                                  ctx->slicebuf.size);
2015         }
2016
2017         val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
2018         width = (val >> 16) & 0xffff;
2019         height = val & 0xffff;
2020
2021         q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
2022
2023         /* frame crop information */
2024         if (src_fourcc == V4L2_PIX_FMT_H264) {
2025                 u32 left_right;
2026                 u32 top_bottom;
2027
2028                 left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
2029                 top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
2030
2031                 if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
2032                         /* Keep current crop information */
2033                 } else {
2034                         struct v4l2_rect *rect = &q_data_dst->rect;
2035
2036                         rect->left = left_right >> 16 & 0xffff;
2037                         rect->top = top_bottom >> 16 & 0xffff;
2038                         rect->width = width - rect->left -
2039                                       (left_right & 0xffff);
2040                         rect->height = height - rect->top -
2041                                        (top_bottom & 0xffff);
2042                 }
2043         } else {
2044                 /* no cropping */
2045         }
2046
2047         err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
2048         if (err_mb > 0)
2049                 v4l2_err(&dev->v4l2_dev,
2050                          "errors in %d macroblocks\n", err_mb);
2051
2052         if (dev->devtype->product == CODA_7541) {
2053                 val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
2054                 if (val == 0) {
2055                         /* not enough bitstream data */
2056                         v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2057                                  "prescan failed: %d\n", val);
2058                         ctx->hold = true;
2059                         return;
2060                 }
2061         }
2062
2063         /* Wait until the VDOA finished writing the previous display frame */
2064         if (ctx->use_vdoa &&
2065             ctx->display_idx >= 0 &&
2066             ctx->display_idx < ctx->num_internal_frames) {
2067                 err_vdoa = vdoa_wait_for_completion(ctx->vdoa);
2068         }
2069
2070         ctx->frm_dis_flg = coda_read(dev,
2071                                      CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2072
2073         /* The previous display frame was copied out and can be overwritten */
2074         if (ctx->display_idx >= 0 &&
2075             ctx->display_idx < ctx->num_internal_frames) {
2076                 ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
2077                 coda_write(dev, ctx->frm_dis_flg,
2078                                 CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
2079         }
2080
2081         /*
2082          * The index of the last decoded frame, not necessarily in
2083          * display order, and the index of the next display frame.
2084          * The latter could have been decoded in a previous run.
2085          */
2086         decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
2087         display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
2088
2089         if (decoded_idx == -1) {
2090                 /* no frame was decoded, but we might have a display frame */
2091                 if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
2092                         ctx->sequence_offset++;
2093                 else if (ctx->display_idx < 0)
2094                         ctx->hold = true;
2095         } else if (decoded_idx == -2) {
2096                 /* no frame was decoded, we still return remaining buffers */
2097         } else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
2098                 v4l2_err(&dev->v4l2_dev,
2099                          "decoded frame index out of range: %d\n", decoded_idx);
2100         } else {
2101                 val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
2102                 val -= ctx->sequence_offset;
2103                 spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
2104                 if (!list_empty(&ctx->buffer_meta_list)) {
2105                         meta = list_first_entry(&ctx->buffer_meta_list,
2106                                               struct coda_buffer_meta, list);
2107                         list_del(&meta->list);
2108                         ctx->num_metas--;
2109                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2110                         /*
2111                          * Clamp counters to 16 bits for comparison, as the HW
2112                          * counter rolls over at this point for h.264. This
2113                          * may be different for other formats, but using 16 bits
2114                          * should be enough to detect most errors and saves us
2115                          * from doing different things based on the format.
2116                          */
2117                         if ((val & 0xffff) != (meta->sequence & 0xffff)) {
2118                                 v4l2_err(&dev->v4l2_dev,
2119                                          "sequence number mismatch (%d(%d) != %d)\n",
2120                                          val, ctx->sequence_offset,
2121                                          meta->sequence);
2122                         }
2123                         ctx->frame_metas[decoded_idx] = *meta;
2124                         kfree(meta);
2125                 } else {
2126                         spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
2127                         v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
2128                         memset(&ctx->frame_metas[decoded_idx], 0,
2129                                sizeof(struct coda_buffer_meta));
2130                         ctx->frame_metas[decoded_idx].sequence = val;
2131                         ctx->sequence_offset++;
2132                 }
2133
2134                 trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2135
2136                 val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2137                 if (val == 0)
2138                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2139                 else if (val == 1)
2140                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2141                 else
2142                         ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2143
2144                 ctx->frame_errors[decoded_idx] = err_mb;
2145         }
2146
2147         if (display_idx == -1) {
2148                 /*
2149                  * no more frames to be decoded, but there could still
2150                  * be rotator output to dequeue
2151                  */
2152                 ctx->hold = true;
2153         } else if (display_idx == -3) {
2154                 /* possibly prescan failure */
2155         } else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2156                 v4l2_err(&dev->v4l2_dev,
2157                          "presentation frame index out of range: %d\n",
2158                          display_idx);
2159         }
2160
2161         /* If a frame was copied out, return it */
2162         if (ctx->display_idx >= 0 &&
2163             ctx->display_idx < ctx->num_internal_frames) {
2164                 dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2165                 dst_buf->sequence = ctx->osequence++;
2166
2167                 dst_buf->field = V4L2_FIELD_NONE;
2168                 dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2169                                              V4L2_BUF_FLAG_PFRAME |
2170                                              V4L2_BUF_FLAG_BFRAME);
2171                 dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2172                 meta = &ctx->frame_metas[ctx->display_idx];
2173                 dst_buf->timecode = meta->timecode;
2174                 dst_buf->vb2_buf.timestamp = meta->timestamp;
2175
2176                 trace_coda_dec_rot_done(ctx, dst_buf, meta);
2177
2178                 switch (q_data_dst->fourcc) {
2179                 case V4L2_PIX_FMT_YUYV:
2180                         payload = width * height * 2;
2181                         break;
2182                 case V4L2_PIX_FMT_YUV420:
2183                 case V4L2_PIX_FMT_YVU420:
2184                 case V4L2_PIX_FMT_NV12:
2185                 default:
2186                         payload = width * height * 3 / 2;
2187                         break;
2188                 case V4L2_PIX_FMT_YUV422P:
2189                         payload = width * height * 2;
2190                         break;
2191                 }
2192                 vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2193
2194                 if (ctx->frame_errors[ctx->display_idx] || err_vdoa)
2195                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2196                 else
2197                         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
2198
2199                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2200                         "job finished: decoding frame (%d) (%s)\n",
2201                         dst_buf->sequence,
2202                         (dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2203                         "KEYFRAME" : "PFRAME");
2204         } else {
2205                 v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2206                         "job finished: no frame decoded\n");
2207         }
2208
2209         /* The rotator will copy the current display frame next time */
2210         ctx->display_idx = display_idx;
2211 }
2212
2213 static void coda_decode_timeout(struct coda_ctx *ctx)
2214 {
2215         struct vb2_v4l2_buffer *dst_buf;
2216
2217         /*
2218          * For now this only handles the case where we would deadlock with
2219          * userspace, i.e. userspace issued DEC_CMD_STOP and waits for EOS,
2220          * but after a failed decode run we would hold the context and wait for
2221          * userspace to queue more buffers.
2222          */
2223         if (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))
2224                 return;
2225
2226         dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2227         dst_buf->sequence = ctx->qsequence - 1;
2228
2229         coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_ERROR);
2230 }
2231
2232 const struct coda_context_ops coda_bit_decode_ops = {
2233         .queue_init = coda_decoder_queue_init,
2234         .reqbufs = coda_decoder_reqbufs,
2235         .start_streaming = coda_start_decoding,
2236         .prepare_run = coda_prepare_decode,
2237         .finish_run = coda_finish_decode,
2238         .run_timeout = coda_decode_timeout,
2239         .seq_end_work = coda_seq_end_work,
2240         .release = coda_bit_release,
2241 };
2242
2243 irqreturn_t coda_irq_handler(int irq, void *data)
2244 {
2245         struct coda_dev *dev = data;
2246         struct coda_ctx *ctx;
2247
2248         /* read status register to attend the IRQ */
2249         coda_read(dev, CODA_REG_BIT_INT_STATUS);
2250         coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2251                       CODA_REG_BIT_INT_CLEAR);
2252
2253         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2254         if (ctx == NULL) {
2255                 v4l2_err(&dev->v4l2_dev,
2256                          "Instance released before the end of transaction\n");
2257                 mutex_unlock(&dev->coda_mutex);
2258                 return IRQ_HANDLED;
2259         }
2260
2261         trace_coda_bit_done(ctx);
2262
2263         if (ctx->aborting) {
2264                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2265                          "task has been aborted\n");
2266         }
2267
2268         if (coda_isbusy(ctx->dev)) {
2269                 v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2270                          "coda is still busy!!!!\n");
2271                 return IRQ_NONE;
2272         }
2273
2274         complete(&ctx->completion);
2275
2276         return IRQ_HANDLED;
2277 }