compression: added a simple lzxpress test
[sfrench/samba-autobuild/.git] / lib / compression / lzxpress.c
1 /*
2  * Copyright (C) Matthieu Suiche 2008
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the author nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  */
34
35 #include "replace.h"
36 #include "lzxpress.h"
37
38
39 #define __BUF_POS_CONST(buf,ofs)(((const uint8_t *)buf)+(ofs))
40 #define __PULL_BYTE(buf,ofs) \
41         ((uint8_t)((*__BUF_POS_CONST(buf,ofs)) & 0xFF))
42
43 #ifndef PULL_LE_UINT16
44 #define PULL_LE_UINT16(buf,ofs) ((uint16_t)( \
45         ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \
46         ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) \
47 ))
48 #endif
49
50 #ifndef PULL_LE_UINT32
51 #define PULL_LE_UINT32(buf,ofs) ((uint32_t)( \
52         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+0))) <<  0)) | \
53         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+1))) <<  8)) | \
54         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+2))) << 16)) | \
55         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+3))) << 24)) \
56 ))
57 #endif
58
59 ssize_t lzxpress_compress(const uint8_t *uncompressed,
60                           uint32_t uncompressed_size,
61                           uint8_t *compressed,
62                           uint32_t max_compressed_size)
63 {
64         uint32_t uncompressed_pos, compressed_pos, byte_left;
65         uint32_t max_offset, best_offset;
66         int32_t offset;
67         uint32_t max_len, len, best_len;
68         const uint8_t *str1, *str2;
69         uint32_t indic;
70         uint8_t *indic_pos;
71         uint32_t indic_bit, nibble_index;
72
73         uint32_t metadata_size;
74         uint16_t metadata;
75         uint16_t *dest;
76
77         if (!uncompressed_size) {
78                 return 0;
79         }
80
81         uncompressed_pos = 0;
82         indic = 0;
83         *(uint32_t *)compressed = 0;
84         compressed_pos = sizeof(uint32_t);
85         indic_pos = &compressed[0];
86
87         byte_left = uncompressed_size;
88         indic_bit = 0;
89         nibble_index = 0;
90
91         if (uncompressed_pos > XPRESS_BLOCK_SIZE)
92                 return 0;
93
94         do {
95                 bool found = false;
96
97                 max_offset = uncompressed_pos;
98
99                 str1 = &uncompressed[uncompressed_pos];
100
101                 best_len = 2;
102                 best_offset = 0;
103
104                 max_offset = MIN(0x1FFF, max_offset);
105
106                 /* search for the longest match in the window for the lookahead buffer */
107                 for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
108                         str2 = &str1[-offset];
109
110                         /* maximum len we can encode into metadata */
111                         max_len = MIN((255 + 15 + 7 + 3), byte_left);
112
113                         for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
114
115                         /*
116                          * We check if len is better than the value found before, including the
117                          * sequence of identical bytes
118                          */
119                         if (len > best_len) {
120                                 found = true;
121                                 best_len = len;
122                                 best_offset = offset;
123                         }
124                 }
125
126                 if (found) {
127                         metadata_size = 0;
128                         dest = (uint16_t *)&compressed[compressed_pos];
129
130                         if (best_len < 10) {
131                                 /* Classical meta-data */
132                                 metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
133                                 dest[metadata_size / sizeof(uint16_t)] = metadata;
134                                 metadata_size += sizeof(uint16_t);
135                         } else {
136                                 metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
137                                 dest[metadata_size / sizeof(uint16_t)] = metadata;
138                                 metadata_size = sizeof(uint16_t);
139
140                                 if (best_len < (15 + 7 + 3)) {
141                                         /* Shared byte */
142                                         if (!nibble_index) {
143                                                 compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
144                                                 metadata_size += sizeof(uint8_t);
145                                         } else {
146                                                 compressed[nibble_index] &= 0xF;
147                                                 compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
148                                         }
149                                 } else if (best_len < (3 + 7 + 15 + 255)) {
150                                         /* Shared byte */
151                                         if (!nibble_index) {
152                                                 compressed[compressed_pos + metadata_size] = 15;
153                                                 metadata_size += sizeof(uint8_t);
154                                         } else {
155                                                 compressed[nibble_index] &= 0xF;
156                                                 compressed[nibble_index] |= (15 * 16);
157                                         }
158
159                                         /* Additional best_len */
160                                         compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
161                                         metadata_size += sizeof(uint8_t);
162                                 } else {
163                                         /* Shared byte */
164                                         if (!nibble_index) {
165                                                 compressed[compressed_pos + metadata_size] |= 15;
166                                                 metadata_size += sizeof(uint8_t);
167                                         } else {
168                                                 compressed[nibble_index] |= 15 << 4;
169                                         }
170
171                                         /* Additional best_len */
172                                         compressed[compressed_pos + metadata_size] = 255;
173
174                                         metadata_size += sizeof(uint8_t);
175
176                                         compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
177                                         compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
178                                         metadata_size += sizeof(uint16_t);
179                                 }
180                         }
181
182                         indic |= 1 << (32 - ((indic_bit % 32) + 1));
183
184                         if (best_len > 9) {
185                                 if (nibble_index == 0) {
186                                         nibble_index = compressed_pos + sizeof(uint16_t);
187                                 } else {
188                                         nibble_index = 0;
189                                 }
190                         }
191
192                         compressed_pos += metadata_size;
193                         uncompressed_pos += best_len;
194                         byte_left -= best_len;
195                 } else {
196                         compressed[compressed_pos++] = uncompressed[uncompressed_pos++];
197                         byte_left--;
198                 }
199                 indic_bit++;
200
201                 if ((indic_bit - 1) % 32 > (indic_bit % 32)) {
202                         *(uint32_t *)indic_pos = indic;
203                         indic = 0;
204                         indic_pos = &compressed[compressed_pos];
205                         compressed_pos += sizeof(uint32_t);
206                 }
207         } while (byte_left > 3);
208
209         do {
210                 compressed[compressed_pos] = uncompressed[uncompressed_pos];
211                 indic_bit++;
212
213                 uncompressed_pos++;
214                 compressed_pos++;
215                 if (((indic_bit - 1) % 32) > (indic_bit % 32)){
216                         *(uint32_t *)indic_pos = indic;
217                         indic = 0;
218                         indic_pos = &compressed[compressed_pos];
219                         compressed_pos += sizeof(uint32_t);
220                 }
221         } while (uncompressed_pos < uncompressed_size);
222
223         if ((indic_bit % 32) > 0) {
224                 for (; (indic_bit % 32) != 0; indic_bit++)
225                         indic |= 0 << (32 - ((indic_bit % 32) + 1));
226
227                 *(uint32_t *)&compressed[compressed_pos] = 0;
228                 *(uint32_t *)indic_pos = indic;
229                 compressed_pos += sizeof(uint32_t);
230         }
231
232         return compressed_pos;
233 }
234
235 ssize_t lzxpress_decompress(const uint8_t *input,
236                             uint32_t input_size,
237                             uint8_t *output,
238                             uint32_t max_output_size)
239 {
240         uint32_t output_index, input_index;
241         uint32_t indicator, indicator_bit;
242         uint32_t length;
243         uint32_t offset;
244         uint32_t nibble_index;
245
246         output_index = 0;
247         input_index = 0;
248         indicator = 0;
249         indicator_bit = 0;
250         length = 0;
251         offset = 0;
252         nibble_index = 0;
253
254         do {
255                 if (indicator_bit == 0) {
256                         indicator = PULL_LE_UINT32(input, input_index);
257                         input_index += sizeof(uint32_t);
258                         indicator_bit = 32;
259                 }
260                 indicator_bit--;
261
262                 /*
263                  * check whether the bit specified by indicator_bit is set or not
264                  * set in indicator. For example, if indicator_bit has value 4
265                  * check whether the 4th bit of the value in indicator is set
266                  */
267                 if (((indicator >> indicator_bit) & 1) == 0) {
268                         output[output_index] = input[input_index];
269                         input_index += sizeof(uint8_t);
270                         output_index += sizeof(uint8_t);
271                 } else {
272                         length = PULL_LE_UINT16(input, input_index);
273                         input_index += sizeof(uint16_t);
274                         offset = length / 8;
275                         length = length % 8;
276
277                         if (length == 7) {
278                                 if (nibble_index == 0) {
279                                         nibble_index = input_index;
280                                         length = input[input_index] % 16;
281                                         input_index += sizeof(uint8_t);
282                                 } else {
283                                         length = input[nibble_index] / 16;
284                                         nibble_index = 0;
285                                 }
286
287                                 if (length == 15) {
288                                         length = input[input_index];
289                                         input_index += sizeof(uint8_t);
290                                         if (length == 255) {
291                                                 length = PULL_LE_UINT16(input, input_index);
292                                                 input_index += sizeof(uint16_t);
293                                                 length -= (15 + 7);
294                                         }
295                                         length += 15;
296                                 }
297                                 length += 7;
298                         }
299
300                         length += 3;
301
302                         do {
303                                 if ((output_index >= max_output_size) || ((offset + 1) > output_index)) break;
304
305                                 output[output_index] = output[output_index - offset - 1];
306
307                                 output_index += sizeof(uint8_t);
308                                 length -= sizeof(uint8_t);
309                         } while (length != 0);
310                 }
311         } while ((output_index < max_output_size) && (input_index < (input_size)));
312
313         return output_index;
314 }