2227b4e1206751d9384bb3174dd648f4606ebd21
[sfrench/cifs-2.6.git] / drivers / staging / media / hantro / hantro_h264.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip RK3288 VPU codec driver
4  *
5  * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6  *      Hertz Wong <hertz.wong@rock-chips.com>
7  *      Herman Chen <herman.chen@rock-chips.com>
8  *
9  * Copyright (C) 2014 Google, Inc.
10  *      Tomasz Figa <tfiga@chromium.org>
11  */
12
13 #include <linux/types.h>
14 #include <linux/sort.h>
15 #include <media/v4l2-mem2mem.h>
16
17 #include "hantro.h"
18 #include "hantro_hw.h"
19
20 /* Size with u32 units. */
21 #define CABAC_INIT_BUFFER_SIZE          (460 * 2)
22 #define POC_BUFFER_SIZE                 34
23 #define SCALING_LIST_SIZE               (6 * 16 + 6 * 64)
24
25 #define POC_CMP(p0, p1) ((p0) < (p1) ? -1 : 1)
26
27 /* Data structure describing auxiliary buffer format. */
28 struct hantro_h264_dec_priv_tbl {
29         u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
30         u32 poc[POC_BUFFER_SIZE];
31         u8 scaling_list[SCALING_LIST_SIZE];
32 };
33
34 /*
35  * Constant CABAC table.
36  * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
37  * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
38  * chromeos-3.14 branch.
39  */
40 static const u32 h264_cabac_table[] = {
41         0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
42         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
43         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
44         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
45         0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
46         0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
47         0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
48         0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
49         0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
50         0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
51         0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
52         0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
53         0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
54         0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
55         0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
56         0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
57         0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
58         0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
59         0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
60         0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
61         0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
62         0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
63         0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
64         0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
65         0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
66         0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
67         0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
68         0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
69         0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
70         0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
71         0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
72         0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
73         0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
74         0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
75         0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
76         0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
77         0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
78         0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
79         0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
80         0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
81         0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
82         0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
83         0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
84         0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
85         0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
86         0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
87         0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
88         0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
89         0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
90         0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
91         0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
92         0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
93         0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
94         0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
95         0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
96         0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
97         0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
98         0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
99         0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
100         0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
101         0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
102         0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
103         0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
104         0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
105         0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
106         0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
107         0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
108         0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
109         0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
110         0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
111         0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
112         0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
113         0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
114         0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
115         0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
116         0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
117         0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
118         0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
119         0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
120         0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
121         0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
122         0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
123         0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
124         0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
125         0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
126         0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
127         0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
128         0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
129         0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
130         0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
131         0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
132         0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
133         0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
134         0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
135         0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
136         0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
137         0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
138         0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
139         0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
140         0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
141         0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
142         0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
143         0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
144         0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
145         0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
146         0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
147         0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
148         0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
149         0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
150         0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
151         0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
152         0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
153         0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
154         0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
155         0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
156         0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
157         0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
158         0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
159         0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
160         0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
161         0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
162         0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
163         0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
164         0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
165         0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
166         0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
167         0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
168         0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
169         0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
170         0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
171         0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
172         0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
173         0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
174         0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
175         0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
176         0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
177         0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
178         0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
179         0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
180         0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
181         0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
182         0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
183         0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
184         0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
185         0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
186         0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
187         0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
188         0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
189         0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
190         0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
191         0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
192         0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
193         0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
194         0x1f0c2517, 0x1f261440
195 };
196
197 /*
198  * NOTE: The scaling lists are in zig-zag order, apply inverse scanning process
199  * to get the values in matrix order. In addition, the hardware requires bytes
200  * swapped within each subsequent 4 bytes. Both arrays below include both
201  * transformations.
202  */
203 static const u32 zig_zag_4x4[] = {
204         3, 2, 7, 11, 6, 1, 0, 5, 10, 15, 14, 9, 4, 8, 13, 12
205 };
206
207 static const u32 zig_zag_8x8[] = {
208         3, 2, 11, 19, 10, 1, 0, 9, 18, 27, 35, 26, 17, 8, 7, 6,
209         15, 16, 25, 34, 43, 51, 42, 33, 24, 23, 14, 5, 4, 13, 22, 31,
210         32, 41, 50, 59, 58, 49, 40, 39, 30, 21, 12, 20, 29, 38, 47, 48,
211         57, 56, 55, 46, 37, 28, 36, 45, 54, 63, 62, 53, 44, 52, 61, 60
212 };
213
214 static void
215 reorder_scaling_list(struct hantro_ctx *ctx)
216 {
217         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
218         const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
219         const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
220         const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
221         const size_t num_list_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8);
222         const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
223         struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
224         u8 *dst = tbl->scaling_list;
225         const u8 *src;
226         int i, j;
227
228         BUILD_BUG_ON(ARRAY_SIZE(zig_zag_4x4) != list_len_4x4);
229         BUILD_BUG_ON(ARRAY_SIZE(zig_zag_8x8) != list_len_8x8);
230         BUILD_BUG_ON(ARRAY_SIZE(tbl->scaling_list) !=
231                      num_list_4x4 * list_len_4x4 +
232                      num_list_8x8 * list_len_8x8);
233
234         src = &scaling->scaling_list_4x4[0][0];
235         for (i = 0; i < num_list_4x4; ++i) {
236                 for (j = 0; j < list_len_4x4; ++j)
237                         dst[zig_zag_4x4[j]] = src[j];
238                 src += list_len_4x4;
239                 dst += list_len_4x4;
240         }
241
242         src = &scaling->scaling_list_8x8[0][0];
243         for (i = 0; i < num_list_8x8; ++i) {
244                 for (j = 0; j < list_len_8x8; ++j)
245                         dst[zig_zag_8x8[j]] = src[j];
246                 src += list_len_8x8;
247                 dst += list_len_8x8;
248         }
249 }
250
251 static void prepare_table(struct hantro_ctx *ctx)
252 {
253         const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
254         const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
255         struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
256         const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
257         int i;
258
259         for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
260                 tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
261                 tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
262         }
263
264         tbl->poc[32] = dec_param->top_field_order_cnt;
265         tbl->poc[33] = dec_param->bottom_field_order_cnt;
266
267         reorder_scaling_list(ctx);
268 }
269
270 struct hantro_h264_reflist_builder {
271         const struct v4l2_h264_dpb_entry *dpb;
272         s32 pocs[HANTRO_H264_DPB_SIZE];
273         u8 unordered_reflist[HANTRO_H264_DPB_SIZE];
274         s32 curpoc;
275         u8 num_valid;
276 };
277
278 static s32 get_poc(enum v4l2_field field, s32 top_field_order_cnt,
279                    s32 bottom_field_order_cnt)
280 {
281         switch (field) {
282         case V4L2_FIELD_TOP:
283                 return top_field_order_cnt;
284         case V4L2_FIELD_BOTTOM:
285                 return bottom_field_order_cnt;
286         default:
287                 break;
288         }
289
290         return min(top_field_order_cnt, bottom_field_order_cnt);
291 }
292
293 static void
294 init_reflist_builder(struct hantro_ctx *ctx,
295                      struct hantro_h264_reflist_builder *b)
296 {
297         const struct v4l2_ctrl_h264_decode_params *dec_param;
298         struct vb2_v4l2_buffer *buf = hantro_get_dst_buf(ctx);
299         const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
300         struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
301         unsigned int i;
302
303         dec_param = ctx->h264_dec.ctrls.decode;
304
305         memset(b, 0, sizeof(*b));
306         b->dpb = dpb;
307         b->curpoc = get_poc(buf->field, dec_param->top_field_order_cnt,
308                             dec_param->bottom_field_order_cnt);
309
310         for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++) {
311                 int buf_idx;
312
313                 if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
314                         continue;
315
316                 buf_idx = vb2_find_timestamp(cap_q, dpb[i].reference_ts, 0);
317                 if (buf_idx < 0)
318                         continue;
319
320                 buf = to_vb2_v4l2_buffer(vb2_get_buffer(cap_q, buf_idx));
321                 b->pocs[i] = get_poc(buf->field, dpb[i].top_field_order_cnt,
322                                      dpb[i].bottom_field_order_cnt);
323                 b->unordered_reflist[b->num_valid] = i;
324                 b->num_valid++;
325         }
326
327         for (i = b->num_valid; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
328                 b->unordered_reflist[i] = i;
329 }
330
331 static int p_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
332 {
333         const struct hantro_h264_reflist_builder *builder = data;
334         const struct v4l2_h264_dpb_entry *a, *b;
335         u8 idxa, idxb;
336
337         idxa = *((u8 *)ptra);
338         idxb = *((u8 *)ptrb);
339         a = &builder->dpb[idxa];
340         b = &builder->dpb[idxb];
341
342         if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
343             (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
344                 /* Short term pics firt. */
345                 if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
346                         return -1;
347                 else
348                         return 1;
349         }
350
351         /*
352          * Short term pics in descending pic num order, long term ones in
353          * ascending order.
354          */
355         if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
356                 return b->frame_num - a->frame_num;
357
358         return a->pic_num - b->pic_num;
359 }
360
361 static int b0_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
362 {
363         const struct hantro_h264_reflist_builder *builder = data;
364         const struct v4l2_h264_dpb_entry *a, *b;
365         s32 poca, pocb;
366         u8 idxa, idxb;
367
368         idxa = *((u8 *)ptra);
369         idxb = *((u8 *)ptrb);
370         a = &builder->dpb[idxa];
371         b = &builder->dpb[idxb];
372
373         if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
374             (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
375                 /* Short term pics firt. */
376                 if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
377                         return -1;
378                 else
379                         return 1;
380         }
381
382         /* Long term pics in ascending pic num order. */
383         if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
384                 return a->pic_num - b->pic_num;
385
386         poca = builder->pocs[idxa];
387         pocb = builder->pocs[idxb];
388
389         /*
390          * Short term pics with POC < cur POC first in POC descending order
391          * followed by short term pics with POC > cur POC in POC ascending
392          * order.
393          */
394         if ((poca < builder->curpoc) != (pocb < builder->curpoc))
395                 return POC_CMP(poca, pocb);
396         else if (poca < builder->curpoc)
397                 return POC_CMP(pocb, poca);
398
399         return POC_CMP(poca, pocb);
400 }
401
402 static int b1_ref_list_cmp(const void *ptra, const void *ptrb, const void *data)
403 {
404         const struct hantro_h264_reflist_builder *builder = data;
405         const struct v4l2_h264_dpb_entry *a, *b;
406         s32 poca, pocb;
407         u8 idxa, idxb;
408
409         idxa = *((u8 *)ptra);
410         idxb = *((u8 *)ptrb);
411         a = &builder->dpb[idxa];
412         b = &builder->dpb[idxb];
413
414         if ((a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) !=
415             (b->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)) {
416                 /* Short term pics firt. */
417                 if (!(a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM))
418                         return -1;
419                 else
420                         return 1;
421         }
422
423         /* Long term pics in ascending pic num order. */
424         if (a->flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
425                 return a->pic_num - b->pic_num;
426
427         poca = builder->pocs[idxa];
428         pocb = builder->pocs[idxb];
429
430         /*
431          * Short term pics with POC > cur POC first in POC ascending order
432          * followed by short term pics with POC > cur POC in POC descending
433          * order.
434          */
435         if ((poca < builder->curpoc) != (pocb < builder->curpoc))
436                 return POC_CMP(pocb, poca);
437         else if (poca < builder->curpoc)
438                 return POC_CMP(pocb, poca);
439
440         return POC_CMP(poca, pocb);
441 }
442
443 static void
444 build_p_ref_list(const struct hantro_h264_reflist_builder *builder,
445                  u8 *reflist)
446 {
447         memcpy(reflist, builder->unordered_reflist,
448                sizeof(builder->unordered_reflist));
449         sort_r(reflist, builder->num_valid, sizeof(*reflist),
450                p_ref_list_cmp, NULL, builder);
451 }
452
453 static void
454 build_b_ref_lists(const struct hantro_h264_reflist_builder *builder,
455                   u8 *b0_reflist, u8 *b1_reflist)
456 {
457         memcpy(b0_reflist, builder->unordered_reflist,
458                sizeof(builder->unordered_reflist));
459         sort_r(b0_reflist, builder->num_valid, sizeof(*b0_reflist),
460                b0_ref_list_cmp, NULL, builder);
461
462         memcpy(b1_reflist, builder->unordered_reflist,
463                sizeof(builder->unordered_reflist));
464         sort_r(b1_reflist, builder->num_valid, sizeof(*b1_reflist),
465                b1_ref_list_cmp, NULL, builder);
466
467         if (builder->num_valid > 1 &&
468             !memcmp(b1_reflist, b0_reflist, builder->num_valid))
469                 swap(b1_reflist[0], b1_reflist[1]);
470 }
471
472 static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
473                             const struct v4l2_h264_dpb_entry *b)
474 {
475         return a->top_field_order_cnt == b->top_field_order_cnt &&
476                a->bottom_field_order_cnt == b->bottom_field_order_cnt;
477 }
478
479 static void update_dpb(struct hantro_ctx *ctx)
480 {
481         const struct v4l2_ctrl_h264_decode_params *dec_param;
482         DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
483         DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
484         unsigned int i, j;
485
486         dec_param = ctx->h264_dec.ctrls.decode;
487
488         /* Disable all entries by default. */
489         for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
490                 ctx->h264_dec.dpb[i].flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
491
492         /* Try to match new DPB entries with existing ones by their POCs. */
493         for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
494                 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
495
496                 if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
497                         continue;
498
499                 /*
500                  * To cut off some comparisons, iterate only on target DPB
501                  * entries which are not used yet.
502                  */
503                 for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
504                         struct v4l2_h264_dpb_entry *cdpb;
505
506                         cdpb = &ctx->h264_dec.dpb[j];
507                         if (cdpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE ||
508                             !dpb_entry_match(cdpb, ndpb))
509                                 continue;
510
511                         *cdpb = *ndpb;
512                         set_bit(j, used);
513                         break;
514                 }
515
516                 if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
517                         set_bit(i, new);
518         }
519
520         /* For entries that could not be matched, use remaining free slots. */
521         for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
522                 const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
523                 struct v4l2_h264_dpb_entry *cdpb;
524
525                 /*
526                  * Both arrays are of the same sizes, so there is no way
527                  * we can end up with no space in target array, unless
528                  * something is buggy.
529                  */
530                 j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
531                 if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
532                         return;
533
534                 cdpb = &ctx->h264_dec.dpb[j];
535                 *cdpb = *ndpb;
536                 set_bit(j, used);
537         }
538 }
539
540 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
541                                    unsigned int dpb_idx)
542 {
543         struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
544         dma_addr_t dma_addr = 0;
545
546         if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
547                 dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts);
548
549         if (!dma_addr) {
550                 struct vb2_v4l2_buffer *dst_buf;
551                 struct vb2_buffer *buf;
552
553                 /*
554                  * If a DPB entry is unused or invalid, address of current
555                  * destination buffer is returned.
556                  */
557                 dst_buf = hantro_get_dst_buf(ctx);
558                 buf = &dst_buf->vb2_buf;
559                 dma_addr = vb2_dma_contig_plane_dma_addr(buf, 0);
560         }
561
562         return dma_addr;
563 }
564
565 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
566 {
567         struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
568         struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
569         struct hantro_h264_reflist_builder reflist_builder;
570
571         hantro_prepare_run(ctx);
572
573         ctrls->scaling =
574                 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX);
575         if (WARN_ON(!ctrls->scaling))
576                 return -EINVAL;
577
578         ctrls->decode =
579                 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
580         if (WARN_ON(!ctrls->decode))
581                 return -EINVAL;
582
583         ctrls->slices =
584                 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS);
585         if (WARN_ON(!ctrls->slices))
586                 return -EINVAL;
587
588         ctrls->sps =
589                 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_SPS);
590         if (WARN_ON(!ctrls->sps))
591                 return -EINVAL;
592
593         ctrls->pps =
594                 hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_H264_PPS);
595         if (WARN_ON(!ctrls->pps))
596                 return -EINVAL;
597
598         /* Update the DPB with new refs. */
599         update_dpb(ctx);
600
601         /* Prepare data in memory. */
602         prepare_table(ctx);
603
604         /* Build the P/B{0,1} ref lists. */
605         init_reflist_builder(ctx, &reflist_builder);
606         build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
607         build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
608                           h264_ctx->reflists.b1);
609         return 0;
610 }
611
612 void hantro_h264_dec_exit(struct hantro_ctx *ctx)
613 {
614         struct hantro_dev *vpu = ctx->dev;
615         struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
616         struct hantro_aux_buf *priv = &h264_dec->priv;
617
618         dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
619 }
620
621 int hantro_h264_dec_init(struct hantro_ctx *ctx)
622 {
623         struct hantro_dev *vpu = ctx->dev;
624         struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
625         struct hantro_aux_buf *priv = &h264_dec->priv;
626         struct hantro_h264_dec_priv_tbl *tbl;
627         struct v4l2_pix_format_mplane pix_mp;
628
629         priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
630                                        GFP_KERNEL);
631         if (!priv->cpu)
632                 return -ENOMEM;
633
634         priv->size = sizeof(*tbl);
635         tbl = priv->cpu;
636         memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
637
638         v4l2_fill_pixfmt_mp(&pix_mp, ctx->dst_fmt.pixelformat,
639                             ctx->dst_fmt.width, ctx->dst_fmt.height);
640         h264_dec->pic_size = pix_mp.plane_fmt[0].sizeimage;
641
642         return 0;
643 }