Developer Documentation
snappy.cc
1// Copyright 2005 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include "snappy.h"
30#include "snappy-internal.h"
31#include "snappy-sinksource.h"
32
33#include <stdio.h>
34
35#include <algorithm>
36#include <string>
37#include <vector>
38
39
40namespace snappy {
41
42
43// Any hash function will produce a valid compressed bitstream, but a good
44// hash function reduces the number of collisions and thus yields better
45// compression for compressible input, and more speed for incompressible
46// input. Of course, it doesn't hurt if the hash function is reasonably fast
47// either, as it gets called a lot.
48static inline uint32 HashBytes(uint32 bytes, int shift) {
49 uint32 kMul = 0x1e35a7bd;
50 return (bytes * kMul) >> shift;
51}
52static inline uint32 Hash(const char* p, int shift) {
53 return HashBytes(UNALIGNED_LOAD32(p), shift);
54}
55
56size_t MaxCompressedLength(size_t source_len) {
57 // Compressed data can be defined as:
58 // compressed := item* literal*
59 // item := literal* copy
60 //
61 // The trailing literal sequence has a space blowup of at most 62/60
62 // since a literal of length 60 needs one tag byte + one extra byte
63 // for length information.
64 //
65 // Item blowup is trickier to measure. Suppose the "copy" op copies
66 // 4 bytes of data. Because of a special check in the encoding code,
67 // we produce a 4-byte copy only if the offset is < 65536. Therefore
68 // the copy op takes 3 bytes to encode, and this type of item leads
69 // to at most the 62/60 blowup for representing literals.
70 //
71 // Suppose the "copy" op copies 5 bytes of data. If the offset is big
72 // enough, it will take 5 bytes to encode the copy op. Therefore the
73 // worst case here is a one-byte literal followed by a five-byte copy.
74 // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
75 //
76 // This last factor dominates the blowup, so the final estimate is:
77 return 32 + source_len + source_len/6;
78}
79
80enum {
81 LITERAL = 0,
82 COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
83 COPY_2_BYTE_OFFSET = 2,
84 COPY_4_BYTE_OFFSET = 3
85};
86static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
87
88// Copy "len" bytes from "src" to "op", one byte at a time. Used for
89// handling COPY operations where the input and output regions may
90// overlap. For example, suppose:
91// src == "ab"
92// op == src + 2
93// len == 20
94// After IncrementalCopy(src, op, len), the result will have
95// eleven copies of "ab"
96// ababababababababababab
97// Note that this does not match the semantics of either memcpy()
98// or memmove().
99static inline void IncrementalCopy(const char* src, char* op, int64_t len) {
100 assert(len > 0);
101 do {
102 *op++ = *src++;
103 } while (--len > 0);
104}
105
106// Equivalent to IncrementalCopy except that it can write up to ten extra
107// bytes after the end of the copy, and that it is faster.
108//
109// The main part of this loop is a simple copy of eight bytes at a time until
110// we've copied (at least) the requested amount of bytes. However, if op and
111// src are less than eight bytes apart (indicating a repeating pattern of
112// length < 8), we first need to expand the pattern in order to get the correct
113// results. For instance, if the buffer looks like this, with the eight-byte
114// <src> and <op> patterns marked as intervals:
115//
116// abxxxxxxxxxxxx
117// [------] src
118// [------] op
119//
120// a single eight-byte copy from <src> to <op> will repeat the pattern once,
121// after which we can move <op> two bytes without moving <src>:
122//
123// ababxxxxxxxxxx
124// [------] src
125// [------] op
126//
127// and repeat the exercise until the two no longer overlap.
128//
129// This allows us to do very well in the special case of one single byte
130// repeated many times, without taking a big hit for more general cases.
131//
132// The worst case of extra writing past the end of the match occurs when
133// op - src == 1 and len == 1; the last copy will read from byte positions
134// [0..7] and write to [4..11], whereas it was only supposed to write to
135// position 1. Thus, ten excess bytes.
136
137namespace {
138
139const int kMaxIncrementCopyOverflow = 10;
140
141inline void IncrementalCopyFastPath(const char* src, char* op, int64_t len) {
142 while (PREDICT_FALSE(op - src < 8)) {
143 UnalignedCopy64(src, op);
144 len -= op - src;
145 op += op - src;
146 }
147 while (len > 0) {
148 UnalignedCopy64(src, op);
149 src += 8;
150 op += 8;
151 len -= 8;
152 }
153}
154
155} // namespace
156
157static inline char* EmitLiteral(char* op,
158 const char* literal,
159 int len,
160 bool allow_fast_path) {
161 int n = len - 1; // Zero-length literals are disallowed
162 if (n < 60) {
163 // Fits in tag byte
164 *op++ = LITERAL | (n << 2);
165
166 // The vast majority of copies are below 16 bytes, for which a
167 // call to memcpy is overkill. This fast path can sometimes
168 // copy up to 15 bytes too much, but that is okay in the
169 // main loop, since we have a bit to go on for both sides:
170 //
171 // - The input will always have kInputMarginBytes = 15 extra
172 // available bytes, as long as we're in the main loop, and
173 // if not, allow_fast_path = false.
174 // - The output will always have 32 spare bytes (see
175 // MaxCompressedLength).
176 if (allow_fast_path && len <= 16) {
177 UnalignedCopy64(literal, op);
178 UnalignedCopy64(literal + 8, op + 8);
179 return op + len;
180 }
181 } else {
182 // Encode in upcoming bytes
183 char* base = op;
184 int count = 0;
185 op++;
186 while (n > 0) {
187 *op++ = n & 0xff;
188 n >>= 8;
189 count++;
190 }
191 assert(count >= 1);
192 assert(count <= 4);
193 *base = LITERAL | ((59+count) << 2);
194 }
195 memcpy(op, literal, len);
196 return op + len;
197}
198
199static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
200 assert(len <= 64);
201 assert(len >= 4);
202 assert(offset < 65536);
203
204 if ((len < 12) && (offset < 2048)) {
205 size_t len_minus_4 = len - 4;
206 assert(len_minus_4 < 8); // Must fit in 3 bits
207 *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
208 *op++ = offset & 0xff;
209 } else {
210 *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
211 LittleEndian::Store16(op, offset);
212 op += 2;
213 }
214 return op;
215}
216
217static inline char* EmitCopy(char* op, size_t offset, int len) {
218 // Emit 64 byte copies but make sure to keep at least four bytes reserved
219 while (PREDICT_FALSE(len >= 68)) {
220 op = EmitCopyLessThan64(op, offset, 64);
221 len -= 64;
222 }
223
224 // Emit an extra 60 byte copy if have too much data to fit in one copy
225 if (len > 64) {
226 op = EmitCopyLessThan64(op, offset, 60);
227 len -= 60;
228 }
229
230 // Emit remainder
231 op = EmitCopyLessThan64(op, offset, len);
232 return op;
233}
234
235
236bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
237 uint32 v = 0;
238 const char* limit = start + n;
239 if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
240 *result = v;
241 return true;
242 } else {
243 return false;
244 }
245}
246
247namespace internal {
248uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
249 // Use smaller hash table when input.size() is smaller, since we
250 // fill the table, incurring O(hash table size) overhead for
251 // compression, and if the input is short, we won't need that
252 // many hash table entries anyway.
253 assert(kMaxHashTableSize >= 256);
254 size_t htsize = 256;
255 while (htsize < kMaxHashTableSize && htsize < input_size) {
256 htsize <<= 1;
257 }
258
259 uint16* table;
260 if (htsize <= ARRAYSIZE(small_table_)) {
261 table = small_table_;
262 } else {
263 if (large_table_ == NULL) {
264 large_table_ = new uint16[kMaxHashTableSize];
265 }
266 table = large_table_;
267 }
268
269 *table_size = htsize;
270 memset(table, 0, htsize * sizeof(*table));
271 return table;
272}
273} // end namespace internal
274
275// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
276// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
277// empirically found that overlapping loads such as
278// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
279// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
280//
281// We have different versions for 64- and 32-bit; ideally we would avoid the
282// two functions and just inline the UNALIGNED_LOAD64 call into
283// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
284// enough to avoid loading the value multiple times then. For 64-bit, the load
285// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
286// done at GetUint32AtOffset() time.
287
288#ifdef ARCH_K8
289
290typedef uint64 EightBytesReference;
291
292static inline EightBytesReference GetEightBytesAt(const char* ptr) {
293 return UNALIGNED_LOAD64(ptr);
294}
295
296static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
297 assert(offset >= 0);
298 assert(offset <= 4);
299 return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
300}
301
302#else
303
304typedef const char* EightBytesReference;
305
306static inline EightBytesReference GetEightBytesAt(const char* ptr) {
307 return ptr;
308}
309
310static inline uint32 GetUint32AtOffset(const char* v, int offset) {
311 assert(offset >= 0);
312 assert(offset <= 4);
313 return UNALIGNED_LOAD32(v + offset);
314}
315
316#endif
317
318// Flat array compression that does not emit the "uncompressed length"
319// prefix. Compresses "input" string to the "*op" buffer.
320//
321// REQUIRES: "input" is at most "kBlockSize" bytes long.
322// REQUIRES: "op" points to an array of memory that is at least
323// "MaxCompressedLength(input.size())" in size.
324// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
325// REQUIRES: "table_size" is a power of two
326//
327// Returns an "end" pointer into "op" buffer.
328// "end - op" is the compressed size of "input".
329namespace internal {
330char* CompressFragment(const char* input,
331 size_t input_size,
332 char* op,
333 uint16* table,
334 const int table_size) {
335 // "ip" is the input pointer, and "op" is the output pointer.
336 const char* ip = input;
337 assert(input_size <= kBlockSize);
338 assert((table_size & (table_size - 1)) == 0); // table must be power of two
339 const int shift = 32 - Bits::Log2Floor(table_size);
340 assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
341 const char* ip_end = input + input_size;
342 const char* base_ip = ip;
343 // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
344 // [next_emit, ip_end) after the main loop.
345 const char* next_emit = ip;
346
347 const size_t kInputMarginBytes = 15;
348 if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
349 const char* ip_limit = input + input_size - kInputMarginBytes;
350
351 for (uint32 next_hash = Hash(++ip, shift); ; ) {
352 assert(next_emit < ip);
353 // The body of this loop calls EmitLiteral once and then EmitCopy one or
354 // more times. (The exception is that when we're close to exhausting
355 // the input we goto emit_remainder.)
356 //
357 // In the first iteration of this loop we're just starting, so
358 // there's nothing to copy, so calling EmitLiteral once is
359 // necessary. And we only start a new iteration when the
360 // current iteration has determined that a call to EmitLiteral will
361 // precede the next call to EmitCopy (if any).
362 //
363 // Step 1: Scan forward in the input looking for a 4-byte-long match.
364 // If we get close to exhausting the input then goto emit_remainder.
365 //
366 // Heuristic match skipping: If 32 bytes are scanned with no matches
367 // found, start looking only at every other byte. If 32 more bytes are
368 // scanned, look at every third byte, etc.. When a match is found,
369 // immediately go back to looking at every byte. This is a small loss
370 // (~5% performance, ~0.1% density) for compressible data due to more
371 // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
372 // win since the compressor quickly "realizes" the data is incompressible
373 // and doesn't bother looking for matches everywhere.
374 //
375 // The "skip" variable keeps track of how many bytes there are since the
376 // last match; dividing it by 32 (ie. right-shifting by five) gives the
377 // number of bytes to move ahead for each iteration.
378 uint32 skip = 32;
379
380 const char* next_ip = ip;
381 const char* candidate;
382 do {
383 ip = next_ip;
384 uint32 hash = next_hash;
385 assert(hash == Hash(ip, shift));
386 uint32 bytes_between_hash_lookups = skip++ >> 5;
387 next_ip = ip + bytes_between_hash_lookups;
388 if (PREDICT_FALSE(next_ip > ip_limit)) {
389 goto emit_remainder;
390 }
391 next_hash = Hash(next_ip, shift);
392 candidate = base_ip + table[hash];
393 assert(candidate >= base_ip);
394 assert(candidate < ip);
395
396 table[hash] = ip - base_ip;
397 } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
398 UNALIGNED_LOAD32(candidate)));
399
400 // Step 2: A 4-byte match has been found. We'll later see if more
401 // than 4 bytes match. But, prior to the match, input
402 // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
403 assert(next_emit + 16 <= ip_end);
404 op = EmitLiteral(op, next_emit, ip - next_emit, true);
405
406 // Step 3: Call EmitCopy, and then see if another EmitCopy could
407 // be our next move. Repeat until we find no match for the
408 // input immediately after what was consumed by the last EmitCopy call.
409 //
410 // If we exit this loop normally then we need to call EmitLiteral next,
411 // though we don't yet know how big the literal will be. We handle that
412 // by proceeding to the next iteration of the main loop. We also can exit
413 // this loop via goto if we get close to exhausting the input.
414 EightBytesReference input_bytes;
415 uint32 candidate_bytes = 0;
416
417 do {
418 // We have a 4-byte match at ip, and no need to emit any
419 // "literal bytes" prior to ip.
420 const char* base = ip;
421 int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
422 ip += matched;
423 size_t offset = base - candidate;
424 assert(0 == memcmp(base, candidate, matched));
425 op = EmitCopy(op, offset, matched);
426 // We could immediately start working at ip now, but to improve
427 // compression we first update table[Hash(ip - 1, ...)].
428 const char* insert_tail = ip - 1;
429 next_emit = ip;
430 if (PREDICT_FALSE(ip >= ip_limit)) {
431 goto emit_remainder;
432 }
433 input_bytes = GetEightBytesAt(insert_tail);
434 uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
435 table[prev_hash] = ip - base_ip - 1;
436 uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
437 candidate = base_ip + table[cur_hash];
438 candidate_bytes = UNALIGNED_LOAD32(candidate);
439 table[cur_hash] = ip - base_ip;
440 } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
441
442 next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
443 ++ip;
444 }
445 }
446
447 emit_remainder:
448 // Emit the remaining bytes as a literal
449 if (next_emit < ip_end) {
450 op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
451 }
452
453 return op;
454}
455} // end namespace internal
456
457// Signature of output types needed by decompression code.
458// The decompression code is templatized on a type that obeys this
459// signature so that we do not pay virtual function call overhead in
460// the middle of a tight decompression loop.
461//
462// class DecompressionWriter {
463// public:
464// // Called before decompression
465// void SetExpectedLength(size_t length);
466//
467// // Called after decompression
468// bool CheckLength() const;
469//
470// // Called repeatedly during decompression
471// bool Append(const char* ip, size_t length);
472// bool AppendFromSelf(uint32 offset, size_t length);
473//
474// // The rules for how TryFastAppend differs from Append are somewhat
475// // convoluted:
476// //
477// // - TryFastAppend is allowed to decline (return false) at any
478// // time, for any reason -- just "return false" would be
479// // a perfectly legal implementation of TryFastAppend.
480// // The intention is for TryFastAppend to allow a fast path
481// // in the common case of a small append.
482// // - TryFastAppend is allowed to read up to <available> bytes
483// // from the input buffer, whereas Append is allowed to read
484// // <length>. However, if it returns true, it must leave
485// // at least five (kMaximumTagLength) bytes in the input buffer
486// // afterwards, so that there is always enough space to read the
487// // next tag without checking for a refill.
488// // - TryFastAppend must always return decline (return false)
489// // if <length> is 61 or more, as in this case the literal length is not
490// // decoded fully. In practice, this should not be a big problem,
491// // as it is unlikely that one would implement a fast path accepting
492// // this much data.
493// //
494// bool TryFastAppend(const char* ip, size_t available, size_t length);
495// };
496
497// -----------------------------------------------------------------------
498// Lookup table for decompression code. Generated by ComputeTable() below.
499// -----------------------------------------------------------------------
500
501// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
502static const uint32 wordmask[] = {
503 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
504};
505
506// Data stored per entry in lookup table:
507// Range Bits-used Description
508// ------------------------------------
509// 1..64 0..7 Literal/copy length encoded in opcode byte
510// 0..7 8..10 Copy offset encoded in opcode byte / 256
511// 0..4 11..13 Extra bytes after opcode
512//
513// We use eight bits for the length even though 7 would have sufficed
514// because of efficiency reasons:
515// (1) Extracting a byte is faster than a bit-field
516// (2) It properly aligns copy offset so we do not need a <<8
517static const uint16 char_table[256] = {
518 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
519 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
520 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
521 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
522 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
523 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
524 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
525 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
526 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
527 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
528 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
529 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
530 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
531 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
532 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
533 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
534 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
535 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
536 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
537 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
538 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
539 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
540 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
541 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
542 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
543 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
544 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
545 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
546 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
547 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
548 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
549 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
550};
551
552// In debug mode, allow optional computation of the table at startup.
553// Also, check that the decompression table is correct.
554#ifndef NDEBUG
555DEFINE_bool(snappy_dump_decompression_table, false,
556 "If true, we print the decompression table at startup.");
557
558static uint16 MakeEntry(unsigned int extra,
559 unsigned int len,
560 unsigned int copy_offset) {
561 // Check that all of the fields fit within the allocated space
562 assert(extra == (extra & 0x7)); // At most 3 bits
563 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
564 assert(len == (len & 0x7f)); // At most 7 bits
565 return len | (copy_offset << 8) | (extra << 11);
566}
567
568static void ComputeTable() {
569 uint16 dst[256];
570
571 // Place invalid entries in all places to detect missing initialization
572 int assigned = 0;
573 for (int i = 0; i < 256; i++) {
574 dst[i] = 0xffff;
575 }
576
577 // Small LITERAL entries. We store (len-1) in the top 6 bits.
578 for (unsigned int len = 1; len <= 60; len++) {
579 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
580 assigned++;
581 }
582
583 // Large LITERAL entries. We use 60..63 in the high 6 bits to
584 // encode the number of bytes of length info that follow the opcode.
585 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
586 // We set the length field in the lookup table to 1 because extra
587 // bytes encode len-1.
588 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
589 assigned++;
590 }
591
592 // COPY_1_BYTE_OFFSET.
593 //
594 // The tag byte in the compressed data stores len-4 in 3 bits, and
595 // offset/256 in 5 bits. offset%256 is stored in the next byte.
596 //
597 // This format is used for length in range [4..11] and offset in
598 // range [0..2047]
599 for (unsigned int len = 4; len < 12; len++) {
600 for (unsigned int offset = 0; offset < 2048; offset += 256) {
601 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
602 MakeEntry(1, len, offset>>8);
603 assigned++;
604 }
605 }
606
607 // COPY_2_BYTE_OFFSET.
608 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
609 for (unsigned int len = 1; len <= 64; len++) {
610 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
611 assigned++;
612 }
613
614 // COPY_4_BYTE_OFFSET.
615 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
616 for (unsigned int len = 1; len <= 64; len++) {
617 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
618 assigned++;
619 }
620
621 // Check that each entry was initialized exactly once.
622 if (assigned != 256) {
623 fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
624 abort();
625 }
626 for (int i = 0; i < 256; i++) {
627 if (dst[i] == 0xffff) {
628 fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
629 abort();
630 }
631 }
632
633 if (FLAGS_snappy_dump_decompression_table) {
634 printf("static const uint16 char_table[256] = {\n ");
635 for (int i = 0; i < 256; i++) {
636 printf("0x%04x%s",
637 dst[i],
638 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
639 }
640 printf("};\n");
641 }
642
643 // Check that computed table matched recorded table
644 for (int i = 0; i < 256; i++) {
645 if (dst[i] != char_table[i]) {
646 fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
647 i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
648 abort();
649 }
650 }
651}
652#endif /* !NDEBUG */
653
654// Helper class for decompression
656 private:
657 Source* reader_; // Underlying source of bytes to decompress
658 const char* ip_; // Points to next buffered byte
659 const char* ip_limit_; // Points just past buffered bytes
660 uint32 peeked_; // Bytes peeked from reader (need to skip)
661 bool eof_; // Hit end of input without an error?
662 char scratch_[kMaximumTagLength]; // See RefillTag().
663
664 // Ensure that all of the tag metadata for the next tag is available
665 // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
666 // if (ip_limit_ - ip_ < 5).
667 //
668 // Returns true on success, false on error or end of input.
669 bool RefillTag();
670
671 public:
672 explicit SnappyDecompressor(Source* reader)
673 : reader_(reader),
674 ip_(NULL),
675 ip_limit_(NULL),
676 peeked_(0),
677 eof_(false)
678 {
679 }
680
682 // Advance past any bytes we peeked at from the reader
683 reader_->Skip(peeked_);
684 }
685
686 // Returns true iff we have hit the end of the input without an error.
687 bool eof() const {
688 return eof_;
689 }
690
691 // Read the uncompressed length stored at the start of the compressed data.
692 // On succcess, stores the length in *result and returns true.
693 // On failure, returns false.
694 bool ReadUncompressedLength(uint32* result) {
695 assert(ip_ == NULL); // Must not have read anything yet
696 // Length is encoded in 1..5 bytes
697 *result = 0;
698 uint32 shift = 0;
699 while (true) {
700 if (shift >= 32) return false;
701 size_t n;
702 const char* ip = reader_->Peek(&n);
703 if (n == 0) return false;
704 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
705 reader_->Skip(1);
706 *result |= static_cast<uint32>(c & 0x7f) << shift;
707 if (c < 128) {
708 break;
709 }
710 shift += 7;
711 }
712 return true;
713 }
714
715 // Process the next item found in the input.
716 // Returns true if successful, false on error or end of input.
717 template <class Writer>
718 void DecompressAllTags(Writer* writer) {
719 const char* ip = ip_;
720
721 // We could have put this refill fragment only at the beginning of the loop.
722 // However, duplicating it at the end of each branch gives the compiler more
723 // scope to optimize the <ip_limit_ - ip> expression based on the local
724 // context, which overall increases speed.
725 #define MAYBE_REFILL() \
726 if (ip_limit_ - ip < kMaximumTagLength) { \
727 ip_ = ip; \
728 if (!RefillTag()) return; \
729 ip = ip_; \
730 }
731
732 MAYBE_REFILL();
733 for ( ;; ) {
734 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
735
736 if ((c & 0x3) == LITERAL) {
737 size_t literal_length = (c >> 2) + 1u;
738 if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
739 assert(literal_length < 61);
740 ip += literal_length;
741 // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend()
742 // will not return true unless there's already at least five spare
743 // bytes in addition to the literal.
744 continue;
745 }
746 if (PREDICT_FALSE(literal_length >= 61)) {
747 // Long literal.
748 const size_t literal_length_length = literal_length - 60;
749 literal_length =
750 (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
751 ip += literal_length_length;
752 }
753
754 size_t avail = ip_limit_ - ip;
755 while (avail < literal_length) {
756 if (!writer->Append(ip, avail)) return;
757 literal_length -= avail;
758 reader_->Skip(peeked_);
759 size_t n;
760 ip = reader_->Peek(&n);
761 avail = n;
762 peeked_ = avail;
763 if (avail == 0) return; // Premature end of input
764 ip_limit_ = ip + avail;
765 }
766 if (!writer->Append(ip, literal_length)) {
767 return;
768 }
769 ip += literal_length;
770 MAYBE_REFILL();
771 } else {
772 const uint32 entry = char_table[c];
773 const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
774 const uint32 length = entry & 0xff;
775 ip += entry >> 11;
776
777 // copy_offset/256 is encoded in bits 8..10. By just fetching
778 // those bits, we get copy_offset (since the bit-field starts at
779 // bit 8).
780 const uint32 copy_offset = entry & 0x700;
781 if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
782 return;
783 }
784 MAYBE_REFILL();
785 }
786 }
787
788#undef MAYBE_REFILL
789 }
790};
791
792bool SnappyDecompressor::RefillTag() {
793 const char* ip = ip_;
794 if (ip == ip_limit_) {
795 // Fetch a new fragment from the reader
796 reader_->Skip(peeked_); // All peeked bytes are used up
797 size_t n;
798 ip = reader_->Peek(&n);
799 peeked_ = n;
800 if (n == 0) {
801 eof_ = true;
802 return false;
803 }
804 ip_limit_ = ip + n;
805 }
806
807 // Read the tag character
808 assert(ip < ip_limit_);
809 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
810 const uint32 entry = char_table[c];
811 const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
812 assert(needed <= sizeof(scratch_));
813
814 // Read more bytes from reader if needed
815 uint32 nbuf = ip_limit_ - ip;
816 if (nbuf < needed) {
817 // Stitch together bytes from ip and reader to form the word
818 // contents. We store the needed bytes in "scratch_". They
819 // will be consumed immediately by the caller since we do not
820 // read more than we need.
821 memmove(scratch_, ip, nbuf);
822 reader_->Skip(peeked_); // All peeked bytes are used up
823 peeked_ = 0;
824 while (nbuf < needed) {
825 size_t length;
826 const char* src = reader_->Peek(&length);
827 if (length == 0) return false;
828 uint32 to_add = min<uint32>(needed - nbuf, length);
829 memcpy(scratch_ + nbuf, src, to_add);
830 nbuf += to_add;
831 reader_->Skip(to_add);
832 }
833 assert(nbuf == needed);
834 ip_ = scratch_;
835 ip_limit_ = scratch_ + needed;
836 } else if (nbuf < kMaximumTagLength) {
837 // Have enough bytes, but move into scratch_ so that we do not
838 // read past end of input
839 memmove(scratch_, ip, nbuf);
840 reader_->Skip(peeked_); // All peeked bytes are used up
841 peeked_ = 0;
842 ip_ = scratch_;
843 ip_limit_ = scratch_ + nbuf;
844 } else {
845 // Pass pointer to buffer returned by reader_.
846 ip_ = ip;
847 }
848 return true;
849}
850
851template <typename Writer>
852static bool InternalUncompress(Source* r, Writer* writer) {
853 // Read the uncompressed length from the front of the compressed input
854 SnappyDecompressor decompressor(r);
855 uint32 uncompressed_len = 0;
856 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
857 return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
858}
859
860template <typename Writer>
861static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
862 Writer* writer,
863 uint32 uncompressed_len) {
864 writer->SetExpectedLength(uncompressed_len);
865
866 // Process the entire input
867 decompressor->DecompressAllTags(writer);
868 writer->Flush();
869 return (decompressor->eof() && writer->CheckLength());
870}
871
872bool GetUncompressedLength(Source* source, uint32* result) {
873 SnappyDecompressor decompressor(source);
874 return decompressor.ReadUncompressedLength(result);
875}
876
877size_t Compress(Source* reader, Sink* writer) {
878 size_t written = 0;
879 size_t N = reader->Available();
880 char ulength[Varint::kMax32];
881 char* p = Varint::Encode32(ulength, N);
882 writer->Append(ulength, p-ulength);
883 written += (p - ulength);
884
885 internal::WorkingMemory wmem;
886 char* scratch = NULL;
887 char* scratch_output = NULL;
888
889 while (N > 0) {
890 // Get next block to compress (without copying if possible)
891 size_t fragment_size;
892 const char* fragment = reader->Peek(&fragment_size);
893 assert(fragment_size != 0); // premature end of input
894 const size_t num_to_read = min(N, kBlockSize);
895 size_t bytes_read = fragment_size;
896
897 size_t pending_advance = 0;
898 if (bytes_read >= num_to_read) {
899 // Buffer returned by reader is large enough
900 pending_advance = num_to_read;
901 fragment_size = num_to_read;
902 } else {
903 // Read into scratch buffer
904 if (scratch == NULL) {
905 // If this is the last iteration, we want to allocate N bytes
906 // of space, otherwise the max possible kBlockSize space.
907 // num_to_read contains exactly the correct value
908 scratch = new char[num_to_read];
909 }
910 memcpy(scratch, fragment, bytes_read);
911 reader->Skip(bytes_read);
912
913 while (bytes_read < num_to_read) {
914 fragment = reader->Peek(&fragment_size);
915 size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
916 memcpy(scratch + bytes_read, fragment, n);
917 bytes_read += n;
918 reader->Skip(n);
919 }
920 assert(bytes_read == num_to_read);
921 fragment = scratch;
922 fragment_size = num_to_read;
923 }
924 assert(fragment_size == num_to_read);
925
926 // Get encoding table for compression
927 int table_size;
928 uint16* table = wmem.GetHashTable(num_to_read, &table_size);
929
930 // Compress input_fragment and append to dest
931 const int max_output = MaxCompressedLength(num_to_read);
932
933 // Need a scratch buffer for the output, in case the byte sink doesn't
934 // have room for us directly.
935 if (scratch_output == NULL) {
936 scratch_output = new char[max_output];
937 } else {
938 // Since we encode kBlockSize regions followed by a region
939 // which is <= kBlockSize in length, a previously allocated
940 // scratch_output[] region is big enough for this iteration.
941 }
942 char* dest = writer->GetAppendBuffer(max_output, scratch_output);
943 char* end = internal::CompressFragment(fragment, fragment_size,
944 dest, table, table_size);
945 writer->Append(dest, end - dest);
946 written += (end - dest);
947
948 N -= num_to_read;
949 reader->Skip(pending_advance);
950 }
951
952 delete[] scratch;
953 delete[] scratch_output;
954
955 return written;
956}
957
958// -----------------------------------------------------------------------
959// IOVec interfaces
960// -----------------------------------------------------------------------
961
962// A type that writes to an iovec.
963// Note that this is not a "ByteSink", but a type that matches the
964// Writer template argument to SnappyDecompressor::DecompressAllTags().
966 private:
967 const struct iovec* output_iov_;
968 const size_t output_iov_count_;
969
970 // We are currently writing into output_iov_[curr_iov_index_].
971 int curr_iov_index_;
972
973 // Bytes written to output_iov_[curr_iov_index_] so far.
974 size_t curr_iov_written_;
975
976 // Total bytes decompressed into output_iov_ so far.
977 size_t total_written_;
978
979 // Maximum number of bytes that will be decompressed into output_iov_.
980 size_t output_limit_;
981
982 inline char* GetIOVecPointer(int index, size_t offset) {
983 return reinterpret_cast<char*>(output_iov_[index].iov_base) +
984 offset;
985 }
986
987 public:
988 // Does not take ownership of iov. iov must be valid during the
989 // entire lifetime of the SnappyIOVecWriter.
990 inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count)
991 : output_iov_(iov),
992 output_iov_count_(iov_count),
993 curr_iov_index_(0),
994 curr_iov_written_(0),
995 total_written_(0),
996 output_limit_(-1) {
997 }
998
999 inline void SetExpectedLength(size_t len) {
1000 output_limit_ = len;
1001 }
1002
1003 inline bool CheckLength() const {
1004 return total_written_ == output_limit_;
1005 }
1006
1007 inline bool Append(const char* ip, size_t len) {
1008 if (total_written_ + len > output_limit_) {
1009 return false;
1010 }
1011
1012 while (len > 0) {
1013 assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1014 if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) {
1015 // This iovec is full. Go to the next one.
1016 if (size_t(curr_iov_index_) + 1 >= output_iov_count_) {
1017 return false;
1018 }
1019 curr_iov_written_ = 0;
1020 ++curr_iov_index_;
1021 }
1022
1023 const size_t to_write = std::min(
1024 len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_);
1025 memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1026 ip,
1027 to_write);
1028 curr_iov_written_ += to_write;
1029 total_written_ += to_write;
1030 ip += to_write;
1031 len -= to_write;
1032 }
1033
1034 return true;
1035 }
1036
1037 inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1038 const size_t space_left = output_limit_ - total_written_;
1039 if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 &&
1040 output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) {
1041 // Fast path, used for the majority (about 95%) of invocations.
1042 char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_);
1043 UnalignedCopy64(ip, ptr);
1044 UnalignedCopy64(ip + 8, ptr + 8);
1045 curr_iov_written_ += len;
1046 total_written_ += len;
1047 return true;
1048 }
1049
1050 return false;
1051 }
1052
1053 inline bool AppendFromSelf(size_t offset, size_t len) {
1054 if (offset > total_written_ || offset == 0) {
1055 return false;
1056 }
1057 const size_t space_left = output_limit_ - total_written_;
1058 if (len > space_left) {
1059 return false;
1060 }
1061
1062 // Locate the iovec from which we need to start the copy.
1063 int from_iov_index = curr_iov_index_;
1064 size_t from_iov_offset = curr_iov_written_;
1065 while (offset > 0) {
1066 if (from_iov_offset >= offset) {
1067 from_iov_offset -= offset;
1068 break;
1069 }
1070
1071 offset -= from_iov_offset;
1072 --from_iov_index;
1073 assert(from_iov_index >= 0);
1074 from_iov_offset = output_iov_[from_iov_index].iov_len;
1075 }
1076
1077 // Copy <len> bytes starting from the iovec pointed to by from_iov_index to
1078 // the current iovec.
1079 while (len > 0) {
1080 assert(from_iov_index <= curr_iov_index_);
1081 if (from_iov_index != curr_iov_index_) {
1082 const size_t to_copy = std::min(
1083 output_iov_[from_iov_index].iov_len - from_iov_offset,
1084 len);
1085 Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy);
1086 len -= to_copy;
1087 if (len > 0) {
1088 ++from_iov_index;
1089 from_iov_offset = 0;
1090 }
1091 } else {
1092 assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len);
1093 size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len -
1094 curr_iov_written_,
1095 len);
1096 if (to_copy == 0) {
1097 // This iovec is full. Go to the next one.
1098 if ( size_t(curr_iov_index_) + 1 >= output_iov_count_) {
1099 return false;
1100 }
1101 ++curr_iov_index_;
1102 curr_iov_written_ = 0;
1103 continue;
1104 }
1105 if (to_copy > len) {
1106 to_copy = len;
1107 }
1108 IncrementalCopy(GetIOVecPointer(from_iov_index, from_iov_offset),
1109 GetIOVecPointer(curr_iov_index_, curr_iov_written_),
1110 to_copy);
1111 curr_iov_written_ += to_copy;
1112 from_iov_offset += to_copy;
1113 total_written_ += to_copy;
1114 len -= to_copy;
1115 }
1116 }
1117
1118 return true;
1119 }
1120
1121 inline void Flush() {}
1122};
1123
1124bool RawUncompressToIOVec(const char* compressed, size_t compressed_length,
1125 const struct iovec* iov, size_t iov_cnt) {
1126 ByteArraySource reader(compressed, compressed_length);
1127 return RawUncompressToIOVec(&reader, iov, iov_cnt);
1128}
1129
1130bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov,
1131 size_t iov_cnt) {
1132 SnappyIOVecWriter output(iov, iov_cnt);
1133 return InternalUncompress(compressed, &output);
1134}
1135
1136// -----------------------------------------------------------------------
1137// Flat array interfaces
1138// -----------------------------------------------------------------------
1139
1140// A type that writes to a flat array.
1141// Note that this is not a "ByteSink", but a type that matches the
1142// Writer template argument to SnappyDecompressor::DecompressAllTags().
1144 private:
1145 char* base_;
1146 char* op_;
1147 char* op_limit_;
1148
1149 public:
1150 inline explicit SnappyArrayWriter(char* dst)
1151 : base_(dst),
1152 op_(dst),
1153 op_limit_(dst) {
1154 }
1155
1156 inline void SetExpectedLength(size_t len) {
1157 op_limit_ = op_ + len;
1158 }
1159
1160 inline bool CheckLength() const {
1161 return op_ == op_limit_;
1162 }
1163
1164 inline bool Append(const char* ip, size_t len) {
1165 char* op = op_;
1166 const size_t space_left = op_limit_ - op;
1167 if (space_left < len) {
1168 return false;
1169 }
1170 memcpy(op, ip, len);
1171 op_ = op + len;
1172 return true;
1173 }
1174
1175 inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
1176 char* op = op_;
1177 const size_t space_left = op_limit_ - op;
1178 if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) {
1179 // Fast path, used for the majority (about 95%) of invocations.
1180 UnalignedCopy64(ip, op);
1181 UnalignedCopy64(ip + 8, op + 8);
1182 op_ = op + len;
1183 return true;
1184 } else {
1185 return false;
1186 }
1187 }
1188
1189 inline bool AppendFromSelf(size_t offset, size_t len) {
1190 char* op = op_;
1191 const size_t space_left = op_limit_ - op;
1192
1193 // Check if we try to append from before the start of the buffer.
1194 // Normally this would just be a check for "produced < offset",
1195 // but "produced <= offset - 1u" is equivalent for every case
1196 // except the one where offset==0, where the right side will wrap around
1197 // to a very big number. This is convenient, as offset==0 is another
1198 // invalid case that we also want to catch, so that we do not go
1199 // into an infinite loop.
1200 assert(op >= base_);
1201 size_t produced = op - base_;
1202 if (produced <= offset - 1u) {
1203 return false;
1204 }
1205 if (len <= 16 && offset >= 8 && space_left >= 16) {
1206 // Fast path, used for the majority (70-80%) of dynamic invocations.
1207 UnalignedCopy64(op - offset, op);
1208 UnalignedCopy64(op - offset + 8, op + 8);
1209 } else {
1210 if (space_left >= len + kMaxIncrementCopyOverflow) {
1211 IncrementalCopyFastPath(op - offset, op, len);
1212 } else {
1213 if (space_left < len) {
1214 return false;
1215 }
1216 IncrementalCopy(op - offset, op, len);
1217 }
1218 }
1219
1220 op_ = op + len;
1221 return true;
1222 }
1223 inline size_t Produced() const {
1224 return op_ - base_;
1225 }
1226 inline void Flush() {}
1227};
1228
1229bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1230 ByteArraySource reader(compressed, n);
1231 return RawUncompress(&reader, uncompressed);
1232}
1233
1234bool RawUncompress(Source* compressed, char* uncompressed) {
1235 SnappyArrayWriter output(uncompressed);
1236 return InternalUncompress(compressed, &output);
1237}
1238
1239bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1240 size_t ulength;
1241 if (!GetUncompressedLength(compressed, n, &ulength)) {
1242 return false;
1243 }
1244 // On 32-bit builds: max_size() < kuint32max. Check for that instead
1245 // of crashing (e.g., consider externally specified compressed data).
1246 if (ulength > uncompressed->max_size()) {
1247 return false;
1248 }
1249 STLStringResizeUninitialized(uncompressed, ulength);
1250 return RawUncompress(compressed, n, string_as_array(uncompressed));
1251}
1252
1253// A Writer that drops everything on the floor and just does validation
1255 private:
1256 size_t expected_;
1257 size_t produced_;
1258
1259 public:
1260 inline SnappyDecompressionValidator() : expected_(0), produced_(0) { }
1261 inline void SetExpectedLength(size_t len) {
1262 expected_ = len;
1263 }
1264 inline bool CheckLength() const {
1265 return expected_ == produced_;
1266 }
1267 inline bool Append(const char* ip, size_t len) {
1268 produced_ += len;
1269 return produced_ <= expected_;
1270 }
1271 inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1272 return false;
1273 }
1274 inline bool AppendFromSelf(size_t offset, size_t len) {
1275 // See SnappyArrayWriter::AppendFromSelf for an explanation of
1276 // the "offset - 1u" trick.
1277 if (produced_ <= offset - 1u) return false;
1278 produced_ += len;
1279 return produced_ <= expected_;
1280 }
1281 inline void Flush() {}
1282};
1283
1284bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1285 ByteArraySource reader(compressed, n);
1287 return InternalUncompress(&reader, &writer);
1288}
1289
1290bool IsValidCompressed(Source* compressed) {
1291 SnappyDecompressionValidator writer;
1292 return InternalUncompress(compressed, &writer);
1293}
1294
1295void RawCompress(const char* input,
1296 size_t input_length,
1297 char* compressed,
1298 size_t* compressed_length) {
1299 ByteArraySource reader(input, input_length);
1300 UncheckedByteArraySink writer(compressed);
1301 Compress(&reader, &writer);
1302
1303 // Compute how many bytes were added
1304 *compressed_length = (writer.CurrentDestination() - compressed);
1305}
1306
1307size_t Compress(const char* input, size_t input_length, string* compressed) {
1308 // Pre-grow the buffer to the max length of the compressed output
1309 compressed->resize(MaxCompressedLength(input_length));
1310
1311 size_t compressed_length;
1312 RawCompress(input, input_length, string_as_array(compressed),
1313 &compressed_length);
1314 compressed->resize(compressed_length);
1315 return compressed_length;
1316}
1317
1318// -----------------------------------------------------------------------
1319// Sink interface
1320// -----------------------------------------------------------------------
1321
1322// A type that decompresses into a Sink. The template parameter
1323// Allocator must export one method "char* Allocate(int size);", which
1324// allocates a buffer of "size" and appends that to the destination.
1325template <typename Allocator>
1327 Allocator allocator_;
1328
1329 // We need random access into the data generated so far. Therefore
1330 // we keep track of all of the generated data as an array of blocks.
1331 // All of the blocks except the last have length kBlockSize.
1332 vector<char*> blocks_;
1333 size_t expected_;
1334
1335 // Total size of all fully generated blocks so far
1336 size_t full_size_;
1337
1338 // Pointer into current output block
1339 char* op_base_; // Base of output block
1340 char* op_ptr_; // Pointer to next unfilled byte in block
1341 char* op_limit_; // Pointer just past block
1342
1343 inline size_t Size() const {
1344 return full_size_ + (op_ptr_ - op_base_);
1345 }
1346
1347 bool SlowAppend(const char* ip, size_t len);
1348 bool SlowAppendFromSelf(size_t offset, size_t len);
1349
1350 public:
1351 inline explicit SnappyScatteredWriter(const Allocator& allocator)
1352 : allocator_(allocator),
1353 expected_(0),
1354 full_size_(0),
1355 op_base_(NULL),
1356 op_ptr_(NULL),
1357 op_limit_(NULL) {
1358 }
1359
1360 inline void SetExpectedLength(size_t len) {
1361 assert(blocks_.empty());
1362 expected_ = len;
1363 }
1364
1365 inline bool CheckLength() const {
1366 return Size() == expected_;
1367 }
1368
1369 // Return the number of bytes actually uncompressed so far
1370 inline size_t Produced() const {
1371 return Size();
1372 }
1373
1374 inline bool Append(const char* ip, size_t len) {
1375 size_t avail = op_limit_ - op_ptr_;
1376 if (len <= avail) {
1377 // Fast path
1378 memcpy(op_ptr_, ip, len);
1379 op_ptr_ += len;
1380 return true;
1381 } else {
1382 return SlowAppend(ip, len);
1383 }
1384 }
1385
1386 inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1387 char* op = op_ptr_;
1388 const int space_left = op_limit_ - op;
1389 if (length <= 16 && available >= 16 + kMaximumTagLength &&
1390 space_left >= 16) {
1391 // Fast path, used for the majority (about 95%) of invocations.
1392 UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
1393 UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
1394 op_ptr_ = op + length;
1395 return true;
1396 } else {
1397 return false;
1398 }
1399 }
1400
1401 inline bool AppendFromSelf(size_t offset, size_t len) {
1402 // See SnappyArrayWriter::AppendFromSelf for an explanation of
1403 // the "offset - 1u" trick.
1404
1405// if (offset - 1u < op_ptr_ - op_base_) { // old original code with warning
1406 if ( offset != 0 && long(offset) <= op_ptr_ - op_base_) { // new code without warning
1407 const size_t space_left = op_limit_ - op_ptr_;
1408 if (space_left >= len + kMaxIncrementCopyOverflow) {
1409 // Fast path: src and dst in current block.
1410 IncrementalCopyFastPath(op_ptr_ - offset, op_ptr_, len);
1411 op_ptr_ += len;
1412 return true;
1413 }
1414 }
1415 return SlowAppendFromSelf(offset, len);
1416 }
1417
1418 // Called at the end of the decompress. We ask the allocator
1419 // write all blocks to the sink.
1420 inline void Flush() { allocator_.Flush(Produced()); }
1421};
1422
1423template<typename Allocator>
1424bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1425 size_t avail = op_limit_ - op_ptr_;
1426 while (len > avail) {
1427 // Completely fill this block
1428 memcpy(op_ptr_, ip, avail);
1429 op_ptr_ += avail;
1430 assert(op_limit_ - op_ptr_ == 0);
1431 full_size_ += (op_ptr_ - op_base_);
1432 len -= avail;
1433 ip += avail;
1434
1435 // Bounds check
1436 if (full_size_ + len > expected_) {
1437 return false;
1438 }
1439
1440 // Make new block
1441 size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1442 op_base_ = allocator_.Allocate(bsize);
1443 op_ptr_ = op_base_;
1444 op_limit_ = op_base_ + bsize;
1445 blocks_.push_back(op_base_);
1446 avail = bsize;
1447 }
1448
1449 memcpy(op_ptr_, ip, len);
1450 op_ptr_ += len;
1451 return true;
1452}
1453
1454template<typename Allocator>
1455bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
1456 size_t len) {
1457 // Overflow check
1458 // See SnappyArrayWriter::AppendFromSelf for an explanation of
1459 // the "offset - 1u" trick.
1460 const size_t cur = Size();
1461 if (offset - 1u >= cur) return false;
1462 if (expected_ - cur < len) return false;
1463
1464 // Currently we shouldn't ever hit this path because Compress() chops the
1465 // input into blocks and does not create cross-block copies. However, it is
1466 // nice if we do not rely on that, since we can get better compression if we
1467 // allow cross-block copies and thus might want to change the compressor in
1468 // the future.
1469 size_t src = cur - offset;
1470 while (len-- > 0) {
1471 char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)];
1472 Append(&c, 1);
1473 src++;
1474 }
1475 return true;
1476}
1477
1479 public:
1480 explicit SnappySinkAllocator(Sink* dest): dest_(dest) {}
1482
1483 char* Allocate(int size) {
1484 Datablock block(new char[size], size);
1485 blocks_.push_back(block);
1486 return block.data;
1487 }
1488
1489 // We flush only at the end, because the writer wants
1490 // random access to the blocks and once we hand the
1491 // block over to the sink, we can't access it anymore.
1492 // Also we don't write more than has been actually written
1493 // to the blocks.
1494 void Flush(size_t size) {
1495 size_t size_written = 0;
1496 for (size_t i = 0; i < blocks_.size(); ++i) {
1497 size_t block_size = min<size_t>(blocks_[i].size, size - size_written);
1498 dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1499 &SnappySinkAllocator::Deleter, NULL);
1500 size_written += block_size;
1501 }
1502 blocks_.clear();
1503 }
1504
1505 private:
1506 struct Datablock {
1507 char* data;
1508 size_t size;
1509 Datablock(char* p, size_t s) : data(p), size(s) {}
1510 };
1511
1512 static void Deleter(void* arg, const char* bytes, size_t size) {
1513 delete[] bytes;
1514 }
1515
1516 Sink* dest_;
1517 vector<Datablock> blocks_;
1518
1519 // Note: copying this object is allowed
1520};
1521
1522size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) {
1523 SnappySinkAllocator allocator(uncompressed);
1524 SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1525 InternalUncompress(compressed, &writer);
1526 return writer.Produced();
1527}
1528
1529bool Uncompress(Source* compressed, Sink* uncompressed) {
1530 // Read the uncompressed length from the front of the compressed input
1531 SnappyDecompressor decompressor(compressed);
1532 uint32 uncompressed_len = 0;
1533 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) {
1534 return false;
1535 }
1536
1537 char c;
1538 size_t allocated_size;
1539 char* buf = uncompressed->GetAppendBufferVariable(
1540 1, uncompressed_len, &c, 1, &allocated_size);
1541
1542 // If we can get a flat buffer, then use it, otherwise do block by block
1543 // uncompression
1544 if (allocated_size >= uncompressed_len) {
1545 SnappyArrayWriter writer(buf);
1546 bool result = InternalUncompressAllTags(
1547 &decompressor, &writer, uncompressed_len);
1548 uncompressed->Append(buf, writer.Produced());
1549 return result;
1550 } else {
1551 SnappySinkAllocator allocator(uncompressed);
1552 SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1553 return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1554 }
1555}
1556
1557} // end namespace snappy