s4:objectclass LDB module - fix a comment
[nivanova/samba-autobuild/.git] / lib / compression / lzxpress.c
1 /*
2  * Copyright (C) Matthieu Suiche 2008
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the author nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  */
34
35 #include "replace.h"
36 #include "lzxpress.h"
37 #include "../lib/util/byteorder.h"
38
39
40 #define __BUF_POS_CONST(buf,ofs)(((const uint8_t *)buf)+(ofs))
41 #define __PULL_BYTE(buf,ofs) \
42         ((uint8_t)((*__BUF_POS_CONST(buf,ofs)) & 0xFF))
43
44 #ifndef PULL_LE_UINT16
45 #define PULL_LE_UINT16(buf,ofs) ((uint16_t)( \
46         ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \
47         ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) \
48 ))
49 #endif
50
51 #ifndef PULL_LE_UINT32
52 #define PULL_LE_UINT32(buf,ofs) ((uint32_t)( \
53         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+0))) <<  0)) | \
54         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+1))) <<  8)) | \
55         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+2))) << 16)) | \
56         ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+3))) << 24)) \
57 ))
58 #endif
59
60 ssize_t lzxpress_compress(const uint8_t *uncompressed,
61                           uint32_t uncompressed_size,
62                           uint8_t *compressed,
63                           uint32_t max_compressed_size)
64 {
65         uint32_t uncompressed_pos, compressed_pos, byte_left;
66         uint32_t max_offset, best_offset;
67         int32_t offset;
68         uint32_t max_len, len, best_len;
69         const uint8_t *str1, *str2;
70         uint32_t indic;
71         uint8_t *indic_pos;
72         uint32_t indic_bit, nibble_index;
73
74         uint32_t metadata_size;
75         uint16_t metadata;
76         uint16_t *dest;
77
78         if (!uncompressed_size) {
79                 return 0;
80         }
81
82         uncompressed_pos = 0;
83         indic = 0;
84         *(uint32_t *)compressed = 0;
85         compressed_pos = sizeof(uint32_t);
86         indic_pos = &compressed[0];
87
88         byte_left = uncompressed_size;
89         indic_bit = 0;
90         nibble_index = 0;
91
92         if (uncompressed_pos > XPRESS_BLOCK_SIZE)
93                 return 0;
94
95         do {
96                 bool found = false;
97
98                 max_offset = uncompressed_pos;
99
100                 str1 = &uncompressed[uncompressed_pos];
101
102                 best_len = 2;
103                 best_offset = 0;
104
105                 max_offset = MIN(0x1FFF, max_offset);
106
107                 /* search for the longest match in the window for the lookahead buffer */
108                 for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
109                         str2 = &str1[-offset];
110
111                         /* maximum len we can encode into metadata */
112                         max_len = MIN((255 + 15 + 7 + 3), byte_left);
113
114                         for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
115
116                         /*
117                          * We check if len is better than the value found before, including the
118                          * sequence of identical bytes
119                          */
120                         if (len > best_len) {
121                                 found = true;
122                                 best_len = len;
123                                 best_offset = offset;
124                         }
125                 }
126
127                 if (found) {
128                         metadata_size = 0;
129                         dest = (uint16_t *)&compressed[compressed_pos];
130
131                         if (best_len < 10) {
132                                 /* Classical meta-data */
133                                 metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
134                                 SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
135                                 metadata_size += sizeof(uint16_t);
136                         } else {
137                                 metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
138                                 SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
139                                 metadata_size = sizeof(uint16_t);
140
141                                 if (best_len < (15 + 7 + 3)) {
142                                         /* Shared byte */
143                                         if (!nibble_index) {
144                                                 compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
145                                                 metadata_size += sizeof(uint8_t);
146                                         } else {
147                                                 compressed[nibble_index] &= 0xF;
148                                                 compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
149                                         }
150                                 } else if (best_len < (3 + 7 + 15 + 255)) {
151                                         /* Shared byte */
152                                         if (!nibble_index) {
153                                                 compressed[compressed_pos + metadata_size] = 15;
154                                                 metadata_size += sizeof(uint8_t);
155                                         } else {
156                                                 compressed[nibble_index] &= 0xF;
157                                                 compressed[nibble_index] |= (15 * 16);
158                                         }
159
160                                         /* Additional best_len */
161                                         compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
162                                         metadata_size += sizeof(uint8_t);
163                                 } else {
164                                         /* Shared byte */
165                                         if (!nibble_index) {
166                                                 compressed[compressed_pos + metadata_size] |= 15;
167                                                 metadata_size += sizeof(uint8_t);
168                                         } else {
169                                                 compressed[nibble_index] |= 15 << 4;
170                                         }
171
172                                         /* Additional best_len */
173                                         compressed[compressed_pos + metadata_size] = 255;
174
175                                         metadata_size += sizeof(uint8_t);
176
177                                         compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
178                                         compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
179                                         metadata_size += sizeof(uint16_t);
180                                 }
181                         }
182
183                         indic |= 1 << (32 - ((indic_bit % 32) + 1));
184
185                         if (best_len > 9) {
186                                 if (nibble_index == 0) {
187                                         nibble_index = compressed_pos + sizeof(uint16_t);
188                                 } else {
189                                         nibble_index = 0;
190                                 }
191                         }
192
193                         compressed_pos += metadata_size;
194                         uncompressed_pos += best_len;
195                         byte_left -= best_len;
196                 } else {
197                         compressed[compressed_pos++] = uncompressed[uncompressed_pos++];
198                         byte_left--;
199                 }
200                 indic_bit++;
201
202                 if ((indic_bit - 1) % 32 > (indic_bit % 32)) {
203                         SIVAL(indic_pos, 0, indic);
204                         indic = 0;
205                         indic_pos = &compressed[compressed_pos];
206                         compressed_pos += sizeof(uint32_t);
207                 }
208         } while (byte_left > 3);
209
210         do {
211                 compressed[compressed_pos] = uncompressed[uncompressed_pos];
212                 indic_bit++;
213
214                 uncompressed_pos++;
215                 compressed_pos++;
216                 if (((indic_bit - 1) % 32) > (indic_bit % 32)){
217                         SIVAL(indic_pos, 0, indic);
218                         indic = 0;
219                         indic_pos = &compressed[compressed_pos];
220                         compressed_pos += sizeof(uint32_t);
221                 }
222         } while (uncompressed_pos < uncompressed_size);
223
224         if ((indic_bit % 32) > 0) {
225                 for (; (indic_bit % 32) != 0; indic_bit++)
226                         indic |= 0 << (32 - ((indic_bit % 32) + 1));
227
228                 *(uint32_t *)&compressed[compressed_pos] = 0;
229                 SIVAL(indic_pos, 0, indic);
230                 compressed_pos += sizeof(uint32_t);
231         }
232
233         return compressed_pos;
234 }
235
236 ssize_t lzxpress_decompress(const uint8_t *input,
237                             uint32_t input_size,
238                             uint8_t *output,
239                             uint32_t max_output_size)
240 {
241         uint32_t output_index, input_index;
242         uint32_t indicator, indicator_bit;
243         uint32_t length;
244         uint32_t offset;
245         uint32_t nibble_index;
246
247         output_index = 0;
248         input_index = 0;
249         indicator = 0;
250         indicator_bit = 0;
251         length = 0;
252         offset = 0;
253         nibble_index = 0;
254
255         do {
256                 if (indicator_bit == 0) {
257                         indicator = PULL_LE_UINT32(input, input_index);
258                         input_index += sizeof(uint32_t);
259                         indicator_bit = 32;
260                 }
261                 indicator_bit--;
262
263                 /*
264                  * check whether the bit specified by indicator_bit is set or not
265                  * set in indicator. For example, if indicator_bit has value 4
266                  * check whether the 4th bit of the value in indicator is set
267                  */
268                 if (((indicator >> indicator_bit) & 1) == 0) {
269                         output[output_index] = input[input_index];
270                         input_index += sizeof(uint8_t);
271                         output_index += sizeof(uint8_t);
272                 } else {
273                         length = PULL_LE_UINT16(input, input_index);
274                         input_index += sizeof(uint16_t);
275                         offset = length / 8;
276                         length = length % 8;
277
278                         if (length == 7) {
279                                 if (nibble_index == 0) {
280                                         nibble_index = input_index;
281                                         length = input[input_index] % 16;
282                                         input_index += sizeof(uint8_t);
283                                 } else {
284                                         length = input[nibble_index] / 16;
285                                         nibble_index = 0;
286                                 }
287
288                                 if (length == 15) {
289                                         length = input[input_index];
290                                         input_index += sizeof(uint8_t);
291                                         if (length == 255) {
292                                                 length = PULL_LE_UINT16(input, input_index);
293                                                 input_index += sizeof(uint16_t);
294                                                 length -= (15 + 7);
295                                         }
296                                         length += 15;
297                                 }
298                                 length += 7;
299                         }
300
301                         length += 3;
302
303                         do {
304                                 if ((output_index >= max_output_size) || ((offset + 1) > output_index)) break;
305
306                                 output[output_index] = output[output_index - offset - 1];
307
308                                 output_index += sizeof(uint8_t);
309                                 length -= sizeof(uint8_t);
310                         } while (length != 0);
311                 }
312         } while ((output_index < max_output_size) && (input_index < (input_size)));
313
314         return output_index;
315 }