1 /* Linux driver for Philips webcam
2 Decompression for chipset version 2 et 3
3 (C) 2004-2006 Luc Saillard (luc@saillard.org)
5 NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx
6 driver and thus may have bugs that are not present in the original version.
7 Please send bug reports and support requests to <luc@saillard.org>.
8 The decompression routines have been implemented by reverse-engineering the
9 Nemosoft binary pwcx module. Caveat emptor.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include "pwc-timon.h"
28 #include "pwc-kiara.h"
29 #include "pwc-dec23.h"
31 #include <linux/string.h>
32 #include <linux/slab.h>
35 * USE_LOOKUP_TABLE_TO_CLAMP
36 * 0: use a C version of this tests: { a<0?0:(a>255?255:a) }
37 * 1: use a faster lookup table for cpu with a big cache (intel)
39 #define USE_LOOKUP_TABLE_TO_CLAMP 1
41 * UNROLL_LOOP_FOR_COPYING_BLOCK
42 * 0: use a loop for a smaller code (but little slower)
43 * 1: when unrolling the loop, gcc produces some faster code (perhaps only
44 * valid for intel processor class). Activating this option, automaticaly
45 * activate USE_LOOKUP_TABLE_TO_CLAMP
47 #define UNROLL_LOOP_FOR_COPY 1
48 #if UNROLL_LOOP_FOR_COPY
49 # undef USE_LOOKUP_TABLE_TO_CLAMP
50 # define USE_LOOKUP_TABLE_TO_CLAMP 1
53 static void build_subblock_pattern(struct pwc_dec23_private *pdec)
55 static const unsigned int initial_values[12] = {
56 -0x526500, -0x221200, 0x221200, 0x526500,
58 -0x6db480, -0x2d5d00, 0x2d5d00, 0x6db480,
62 static const unsigned int values_derivated[12] = {
63 0xa4ca, 0x4424, -0x4424, -0xa4ca,
65 0xdb69, 0x5aba, -0x5aba, -0xdb69,
68 unsigned int temp_values[12];
71 memcpy(temp_values, initial_values, sizeof(initial_values));
72 for (i = 0; i < 256; i++) {
73 for (j = 0; j < 12; j++) {
74 pdec->table_subblock[i][j] = temp_values[j];
75 temp_values[j] += values_derivated[j];
80 static void build_bit_powermask_table(struct pwc_dec23_private *pdec)
83 unsigned int bit, byte, mask, val;
84 unsigned int bitpower = 1;
86 for (bit = 0; bit < 8; bit++) {
88 p = pdec->table_bitpowermask[bit];
89 for (byte = 0; byte < 256; byte++) {
100 static void build_table_color(const unsigned int romtable[16][8],
101 unsigned char p0004[16][1024],
102 unsigned char p8004[16][256])
104 int compression_mode, j, k, bit, pw;
105 unsigned char *p0, *p8;
106 const unsigned int *r;
108 /* We have 16 compressions tables */
109 for (compression_mode = 0; compression_mode < 16; compression_mode++) {
110 p0 = p0004[compression_mode];
111 p8 = p8004[compression_mode];
112 r = romtable[compression_mode];
114 for (j = 0; j < 8; j++, r++, p0 += 128) {
116 for (k = 0; k < 16; k++) {
119 else if (k >= 1 && k < 3)
120 bit = (r[0] >> 15) & 7;
121 else if (k >= 3 && k < 6)
122 bit = (r[0] >> 12) & 7;
123 else if (k >= 6 && k < 10)
124 bit = (r[0] >> 9) & 7;
125 else if (k >= 10 && k < 13)
126 bit = (r[0] >> 6) & 7;
127 else if (k >= 13 && k < 15)
128 bit = (r[0] >> 3) & 7;
138 p0[k + 0x00] = (1 * pw) + 0x80;
139 p0[k + 0x10] = (2 * pw) + 0x80;
140 p0[k + 0x20] = (3 * pw) + 0x80;
141 p0[k + 0x30] = (4 * pw) + 0x80;
142 p0[k + 0x40] = (-1 * pw) + 0x80;
143 p0[k + 0x50] = (-2 * pw) + 0x80;
144 p0[k + 0x60] = (-3 * pw) + 0x80;
145 p0[k + 0x70] = (-4 * pw) + 0x80;
146 } /* end of for (k=0; k<16; k++, p8++) */
147 } /* end of for (j=0; j<8; j++ , table++) */
148 } /* end of foreach compression_mode */
154 static void fill_table_dc00_d800(struct pwc_dec23_private *pdec)
157 #define ONE_HALF (1UL << (SCALEBITS - 1))
159 unsigned int offset1 = ONE_HALF;
160 unsigned int offset2 = 0x0000;
162 for (i=0; i<256; i++) {
163 pdec->table_dc00[i] = offset1 & ~(ONE_HALF);
164 pdec->table_d800[i] = offset2;
172 * To decode the stream:
173 * if look_bits(2) == 0: # op == 2 in the lookup table
176 * elif look_bits(3) == 7: # op == 1 in the lookup table
180 * else: # op == 0 in the lookup table
183 * For speedup processing, we build a lookup table and we takes the first 6 bits.
186 * unsigned char op; // operation to execute
187 * unsigned char bits; // bits use to perform operation
188 * unsigned char offset1; // offset to add to access in the table_0004 % 16
189 * unsigned char offset2; // offset to add to access in the table_0004
192 * How to build this table ?
193 * op == 2 when (i%4)==0
194 * op == 1 when (i%8)==7
198 static const unsigned char hash_table_ops[64*4] = {
199 0x02, 0x00, 0x00, 0x00,
200 0x00, 0x03, 0x01, 0x00,
201 0x00, 0x04, 0x01, 0x10,
202 0x00, 0x06, 0x01, 0x30,
203 0x02, 0x00, 0x00, 0x00,
204 0x00, 0x03, 0x01, 0x40,
205 0x00, 0x05, 0x01, 0x20,
206 0x01, 0x00, 0x00, 0x00,
207 0x02, 0x00, 0x00, 0x00,
208 0x00, 0x03, 0x01, 0x00,
209 0x00, 0x04, 0x01, 0x50,
210 0x00, 0x05, 0x02, 0x00,
211 0x02, 0x00, 0x00, 0x00,
212 0x00, 0x03, 0x01, 0x40,
213 0x00, 0x05, 0x03, 0x00,
214 0x01, 0x00, 0x00, 0x00,
215 0x02, 0x00, 0x00, 0x00,
216 0x00, 0x03, 0x01, 0x00,
217 0x00, 0x04, 0x01, 0x10,
218 0x00, 0x06, 0x02, 0x10,
219 0x02, 0x00, 0x00, 0x00,
220 0x00, 0x03, 0x01, 0x40,
221 0x00, 0x05, 0x01, 0x60,
222 0x01, 0x00, 0x00, 0x00,
223 0x02, 0x00, 0x00, 0x00,
224 0x00, 0x03, 0x01, 0x00,
225 0x00, 0x04, 0x01, 0x50,
226 0x00, 0x05, 0x02, 0x40,
227 0x02, 0x00, 0x00, 0x00,
228 0x00, 0x03, 0x01, 0x40,
229 0x00, 0x05, 0x03, 0x40,
230 0x01, 0x00, 0x00, 0x00,
231 0x02, 0x00, 0x00, 0x00,
232 0x00, 0x03, 0x01, 0x00,
233 0x00, 0x04, 0x01, 0x10,
234 0x00, 0x06, 0x01, 0x70,
235 0x02, 0x00, 0x00, 0x00,
236 0x00, 0x03, 0x01, 0x40,
237 0x00, 0x05, 0x01, 0x20,
238 0x01, 0x00, 0x00, 0x00,
239 0x02, 0x00, 0x00, 0x00,
240 0x00, 0x03, 0x01, 0x00,
241 0x00, 0x04, 0x01, 0x50,
242 0x00, 0x05, 0x02, 0x00,
243 0x02, 0x00, 0x00, 0x00,
244 0x00, 0x03, 0x01, 0x40,
245 0x00, 0x05, 0x03, 0x00,
246 0x01, 0x00, 0x00, 0x00,
247 0x02, 0x00, 0x00, 0x00,
248 0x00, 0x03, 0x01, 0x00,
249 0x00, 0x04, 0x01, 0x10,
250 0x00, 0x06, 0x02, 0x50,
251 0x02, 0x00, 0x00, 0x00,
252 0x00, 0x03, 0x01, 0x40,
253 0x00, 0x05, 0x01, 0x60,
254 0x01, 0x00, 0x00, 0x00,
255 0x02, 0x00, 0x00, 0x00,
256 0x00, 0x03, 0x01, 0x00,
257 0x00, 0x04, 0x01, 0x50,
258 0x00, 0x05, 0x02, 0x40,
259 0x02, 0x00, 0x00, 0x00,
260 0x00, 0x03, 0x01, 0x40,
261 0x00, 0x05, 0x03, 0x40,
262 0x01, 0x00, 0x00, 0x00
268 static const unsigned int MulIdx[16][16] = {
269 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
270 {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,},
271 {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,},
272 {4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,},
273 {6, 7, 8, 9, 7, 10, 11, 8, 8, 11, 10, 7, 9, 8, 7, 6,},
274 {4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4,},
275 {1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,},
276 {0, 3, 3, 0, 1, 2, 2, 1, 2, 1, 1, 2, 3, 0, 0, 3,},
277 {0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 0, 1, 2, 3,},
278 {1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2,},
279 {7, 10, 11, 8, 9, 8, 7, 6, 6, 7, 8, 9, 8, 11, 10, 7,},
280 {4, 5, 5, 4, 5, 4, 4, 5, 5, 4, 4, 5, 4, 5, 5, 4,},
281 {7, 9, 6, 8, 10, 8, 7, 11, 11, 7, 8, 10, 8, 6, 9, 7,},
282 {1, 3, 0, 2, 2, 0, 3, 1, 2, 0, 3, 1, 1, 3, 0, 2,},
283 {1, 2, 2, 1, 3, 0, 0, 3, 0, 3, 3, 0, 2, 1, 1, 2,},
284 {10, 8, 7, 11, 8, 6, 9, 7, 7, 9, 6, 8, 11, 7, 8, 10}
287 #if USE_LOOKUP_TABLE_TO_CLAMP
288 #define MAX_OUTER_CROP_VALUE (512)
289 static unsigned char pwc_crop_table[256 + 2*MAX_OUTER_CROP_VALUE];
290 #define CLAMP(x) (pwc_crop_table[MAX_OUTER_CROP_VALUE+(x)])
292 #define CLAMP(x) ((x)>255?255:((x)<0?0:x))
296 /* If the type or the command change, we rebuild the lookup table */
297 int pwc_dec23_init(struct pwc_device *pwc, int type, unsigned char *cmd)
299 int flags, version, shift, i;
300 struct pwc_dec23_private *pdec;
302 if (pwc->decompress_data == NULL) {
303 pdec = kmalloc(sizeof(struct pwc_dec23_private), GFP_KERNEL);
306 pwc->decompress_data = pdec;
308 pdec = pwc->decompress_data;
310 mutex_init(&pdec->lock);
312 if (DEVICE_USE_CODEC3(type)) {
313 flags = cmd[2] & 0x18;
315 pdec->nbits = 7; /* More bits, mean more bits to encode the stream, but better quality */
316 else if (flags == 0x10)
321 version = cmd[2] >> 5;
322 build_table_color(KiaraRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
323 build_table_color(KiaraRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
335 version = cmd[2] >> 3;
336 build_table_color(TimonRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
337 build_table_color(TimonRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
340 /* Informations can be coded on a variable number of bits but never less than 8 */
341 shift = 8 - pdec->nbits;
342 pdec->scalebits = SCALEBITS - shift;
343 pdec->nbitsmask = 0xFF >> shift;
345 fill_table_dc00_d800(pdec);
346 build_subblock_pattern(pdec);
347 build_bit_powermask_table(pdec);
349 #if USE_LOOKUP_TABLE_TO_CLAMP
350 /* Build the static table to clamp value [0-255] */
351 for (i=0;i<MAX_OUTER_CROP_VALUE;i++)
352 pwc_crop_table[i] = 0;
353 for (i=0; i<256; i++)
354 pwc_crop_table[MAX_OUTER_CROP_VALUE+i] = i;
355 for (i=0; i<MAX_OUTER_CROP_VALUE; i++)
356 pwc_crop_table[MAX_OUTER_CROP_VALUE+256+i] = 255;
363 * Copy the 4x4 image block to Y plane buffer
365 static void copy_image_block_Y(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
367 #if UNROLL_LOOP_FOR_COPY
368 const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
370 unsigned char *d = dst;
372 *d++ = cm[c[0] >> scalebits];
373 *d++ = cm[c[1] >> scalebits];
374 *d++ = cm[c[2] >> scalebits];
375 *d++ = cm[c[3] >> scalebits];
377 d = dst + bytes_per_line;
378 *d++ = cm[c[4] >> scalebits];
379 *d++ = cm[c[5] >> scalebits];
380 *d++ = cm[c[6] >> scalebits];
381 *d++ = cm[c[7] >> scalebits];
383 d = dst + bytes_per_line*2;
384 *d++ = cm[c[8] >> scalebits];
385 *d++ = cm[c[9] >> scalebits];
386 *d++ = cm[c[10] >> scalebits];
387 *d++ = cm[c[11] >> scalebits];
389 d = dst + bytes_per_line*3;
390 *d++ = cm[c[12] >> scalebits];
391 *d++ = cm[c[13] >> scalebits];
392 *d++ = cm[c[14] >> scalebits];
393 *d++ = cm[c[15] >> scalebits];
397 unsigned char *d = dst;
398 for (i = 0; i < 4; i++, c++)
399 *d++ = CLAMP((*c) >> scalebits);
401 d = dst + bytes_per_line;
402 for (i = 0; i < 4; i++, c++)
403 *d++ = CLAMP((*c) >> scalebits);
405 d = dst + bytes_per_line*2;
406 for (i = 0; i < 4; i++, c++)
407 *d++ = CLAMP((*c) >> scalebits);
409 d = dst + bytes_per_line*3;
410 for (i = 0; i < 4; i++, c++)
411 *d++ = CLAMP((*c) >> scalebits);
416 * Copy the 4x4 image block to a CrCb plane buffer
419 static void copy_image_block_CrCb(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
421 #if UNROLL_LOOP_FOR_COPY
422 /* Unroll all loops */
423 const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
425 unsigned char *d = dst;
427 *d++ = cm[c[0] >> scalebits];
428 *d++ = cm[c[4] >> scalebits];
429 *d++ = cm[c[1] >> scalebits];
430 *d++ = cm[c[5] >> scalebits];
431 *d++ = cm[c[2] >> scalebits];
432 *d++ = cm[c[6] >> scalebits];
433 *d++ = cm[c[3] >> scalebits];
434 *d++ = cm[c[7] >> scalebits];
436 d = dst + bytes_per_line;
437 *d++ = cm[c[12] >> scalebits];
438 *d++ = cm[c[8] >> scalebits];
439 *d++ = cm[c[13] >> scalebits];
440 *d++ = cm[c[9] >> scalebits];
441 *d++ = cm[c[14] >> scalebits];
442 *d++ = cm[c[10] >> scalebits];
443 *d++ = cm[c[15] >> scalebits];
444 *d++ = cm[c[11] >> scalebits];
448 const int *c2 = src + 4;
449 unsigned char *d = dst;
451 for (i = 0; i < 4; i++, c1++, c2++) {
452 *d++ = CLAMP((*c1) >> scalebits);
453 *d++ = CLAMP((*c2) >> scalebits);
456 d = dst + bytes_per_line;
457 for (i = 0; i < 4; i++, c1++, c2++) {
458 *d++ = CLAMP((*c1) >> scalebits);
459 *d++ = CLAMP((*c2) >> scalebits);
465 * To manage the stream, we keep bits in a 32 bits register.
466 * fill_nbits(n): fill the reservoir with at least n bits
467 * skip_bits(n): discard n bits from the reservoir
468 * get_bits(n): fill the reservoir, returns the first n bits and discard the
469 * bits from the reservoir.
470 * __get_nbits(n): faster version of get_bits(n), but asumes that the reservoir
471 * contains at least n bits. bits returned is discarded.
473 #define fill_nbits(pdec, nbits_wanted) do { \
474 while (pdec->nbits_in_reservoir<(nbits_wanted)) \
476 pdec->reservoir |= (*(pdec->stream)++) << (pdec->nbits_in_reservoir); \
477 pdec->nbits_in_reservoir += 8; \
481 #define skip_nbits(pdec, nbits_to_skip) do { \
482 pdec->reservoir >>= (nbits_to_skip); \
483 pdec->nbits_in_reservoir -= (nbits_to_skip); \
486 #define get_nbits(pdec, nbits_wanted, result) do { \
487 fill_nbits(pdec, nbits_wanted); \
488 result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
489 skip_nbits(pdec, nbits_wanted); \
492 #define __get_nbits(pdec, nbits_wanted, result) do { \
493 result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
494 skip_nbits(pdec, nbits_wanted); \
497 #define look_nbits(pdec, nbits_wanted) \
498 ((pdec->reservoir) & ((1U<<(nbits_wanted))-1))
501 * Decode a 4x4 pixel block
503 static void decode_block(struct pwc_dec23_private *pdec,
504 const unsigned char *ptable0004,
505 const unsigned char *ptable8004)
507 unsigned int primary_color;
508 unsigned int channel_v, offset1, op;
511 fill_nbits(pdec, 16);
512 __get_nbits(pdec, pdec->nbits, primary_color);
514 if (look_nbits(pdec,2) == 0) {
516 /* Very simple, the color is the same for all pixels of the square */
517 for (i = 0; i < 16; i++)
518 pdec->temp_colors[i] = pdec->table_dc00[primary_color];
523 /* This block is encoded with small pattern */
524 for (i = 0; i < 16; i++)
525 pdec->temp_colors[i] = pdec->table_d800[primary_color];
527 __get_nbits(pdec, 3, channel_v);
528 channel_v = ((channel_v & 1) << 2) | (channel_v & 2) | ((channel_v & 4) >> 2);
530 ptable0004 += (channel_v * 128);
531 ptable8004 += (channel_v * 32);
536 unsigned int htable_idx, rows = 0;
537 const unsigned int *block;
540 * xx == 00 :=> end of the block def, remove the two bits from the stream
542 * yxx == any other value
545 fill_nbits(pdec, 16);
546 htable_idx = look_nbits(pdec, 6);
547 op = hash_table_ops[htable_idx * 4];
552 } else if (op == 1) {
553 /* 15bits [ xxxx xxxx yyyy 111 ]
554 * yyy => offset in the table8004
555 * xxx => offset in the tabled004 (tree)
557 unsigned int mask, shift;
558 unsigned int nbits, col1;
562 /* offset1 += yyyy */
563 __get_nbits(pdec, 4, yyyy);
566 nbits = ptable8004[offset1 * 2];
568 /* col1 = xxxx xxxx */
569 __get_nbits(pdec, nbits+1, col1);
572 mask = pdec->table_bitpowermask[nbits][col1];
573 shift = ptable8004[offset1 * 2 + 1];
574 rows = ((mask << shift) + 0x80) & 0xFF;
576 block = pdec->table_subblock[rows];
577 for (i = 0; i < 16; i++)
578 pdec->temp_colors[i] += block[MulIdx[offset1][i]];
582 * offset1 is coded on 3 bits
586 offset1 += hash_table_ops [htable_idx * 4 + 2];
589 rows = ptable0004[offset1 + hash_table_ops [htable_idx * 4 + 3]];
590 block = pdec->table_subblock[rows];
591 for (i = 0; i < 16; i++)
592 pdec->temp_colors[i] += block[MulIdx[offset1][i]];
594 shift = hash_table_ops[htable_idx * 4 + 1];
595 skip_nbits(pdec, shift);
602 static void DecompressBand23(struct pwc_dec23_private *pdec,
603 const unsigned char *rawyuv,
604 unsigned char *planar_y,
605 unsigned char *planar_u,
606 unsigned char *planar_v,
607 unsigned int compressed_image_width,
608 unsigned int real_image_width)
610 int compression_index, nblocks;
611 const unsigned char *ptable0004;
612 const unsigned char *ptable8004;
615 pdec->nbits_in_reservoir = 0;
616 pdec->stream = rawyuv + 1; /* The first byte of the stream is skipped */
618 get_nbits(pdec, 4, compression_index);
620 /* pass 1: uncompress Y component */
621 nblocks = compressed_image_width / 4;
623 ptable0004 = pdec->table_0004_pass1[compression_index];
624 ptable8004 = pdec->table_8004_pass1[compression_index];
626 /* Each block decode a square of 4x4 */
628 decode_block(pdec, ptable0004, ptable8004);
629 copy_image_block_Y(pdec->temp_colors, planar_y, real_image_width, pdec->scalebits);
634 /* pass 2: uncompress UV component */
635 nblocks = compressed_image_width / 8;
637 ptable0004 = pdec->table_0004_pass2[compression_index];
638 ptable8004 = pdec->table_8004_pass2[compression_index];
640 /* Each block decode a square of 4x4 */
642 decode_block(pdec, ptable0004, ptable8004);
643 copy_image_block_CrCb(pdec->temp_colors, planar_u, real_image_width/2, pdec->scalebits);
645 decode_block(pdec, ptable0004, ptable8004);
646 copy_image_block_CrCb(pdec->temp_colors, planar_v, real_image_width/2, pdec->scalebits);
657 * Uncompress a pwc23 buffer.
662 void pwc_dec23_decompress(const struct pwc_device *pwc,
666 int bandlines_left, bytes_per_block;
667 struct pwc_dec23_private *pdec = pwc->decompress_data;
669 /* YUV420P image format */
670 unsigned char *pout_planar_y;
671 unsigned char *pout_planar_u;
672 unsigned char *pout_planar_v;
673 unsigned int plane_size;
675 mutex_lock(&pdec->lock);
677 bandlines_left = pwc->height / 4;
678 bytes_per_block = pwc->width * 4;
679 plane_size = pwc->height * pwc->width;
682 pout_planar_u = dst + plane_size;
683 pout_planar_v = dst + plane_size + plane_size / 4;
685 while (bandlines_left--) {
686 DecompressBand23(pwc->decompress_data,
688 pout_planar_y, pout_planar_u, pout_planar_v,
689 pwc->width, pwc->width);
690 src += pwc->vbandlength;
691 pout_planar_y += bytes_per_block;
692 pout_planar_u += pwc->width;
693 pout_planar_v += pwc->width;
695 mutex_unlock(&pdec->lock);