// Copyright 2015, Joe Tsai. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE.md file. // Package meta implements the XFLATE meta encoding scheme. // // The XFLATE meta encoding is a method of encoding arbitrary data into one // or more RFC 1951 compliant DEFLATE blocks. This encoding has the special // property that when the blocks are decoded by a RFC 1951 compliant // decompressor, they produce absolutely no output. However, when decoded with // the XFLATE meta decoder, it losslessly produces the original input. // // The meta encoding works by encoding arbitrary data into the Huffman tree // definition of dynamic DEFLATE blocks. These blocks have an empty data section // and produce no output. Due to the Huffman definition overhead, the encoded // output is usually larger than the input. However, for most input datasets, // this encoding scheme is able to achieve an efficiency of at least 50%. package meta import ( "fmt" "github.com/dsnet/compress/internal/errors" "github.com/dsnet/compress/internal/prefix" ) // These are the magic values that begin every single meta block. const ( magicVals uint32 = 0x05860004 magicMask uint32 = 0xfffe3fc6 ) // ReverseSearch searches for a meta header in reverse. This returns the last // index where the header was found. If not found, it returns -1. func ReverseSearch(data []byte) int { var magic uint32 for i := len(data) - 1; i >= 0; i-- { magic = (magic << 8) | uint32(data[i]) if magic&magicMask == magicVals { return i } } return -1 } const ( maxSyms = 257 // Maximum number of literal codes (with EOB marker) minHuffLen = 1 // Minimum number of bits for each Huffman code maxHuffLen = 7 // Maximum number of bits for each Huffman code minRepLast = 3 // Minimum number of repeated codes (clen: 16) maxRepLast = 6 // Maximum number of repeated codes (clen: 16) minRepZero = 11 // Minimum number of repeated zeros (clen: 18) maxRepZero = 138 // Maximum number of repeated zeros (clen: 18) ) // These are some constants regarding the theoretical and practical limits for // the meta encoding of a single block. const ( // MinRawBytes and MaxRawBytes are the theoretical minimum and maximum // number of bytes a block can encode. MinRawBytes = 0 MaxRawBytes = 31 // MinEncBytes and MaxEncBytes are the theoretical minimum and maximum // number of bytes an encoded block will occupy. MinEncBytes = 12 MaxEncBytes = 64 // EnsureRawBytes is the maximum number of bytes that a single block is // ensured to encode (computed using brute force). EnsureRawBytes = 22 ) // FinalMode controls or indicates which final bits are set in the last block // in the meta stream. In the meta encoding, there are 2 final bits: // // Stream: This is the final bit from DEFLATE (as defined in RFC 1951). // and indicates that the entire compression stream has come to an end. // This bit indicate absolute termination of the stream. // // Meta: This final bit indicates that the current sequence of meta blocks has // terminated. The decoded data from those blocks form a meta substream. // This bit is used as a form of message framing for the meta encoding format. // // It invalid for the stream final bit to be set, while the meta final bit is // not set. All other combinations are legal. type FinalMode int const ( // FinalNil has neither the stream nor meta final bits set. FinalNil FinalMode = iota // FinalMeta has the final bit set, but not stream final bit. FinalMeta // FinalStream has both the meta and stream final bits set. FinalStream ) // The Huffman encoding used by the XFLATE meta encoding uses a partially // pre-determined HCLEN table. The symbols are 0, 16, 18, and another symbol // between minHuffLen and maxHuffLen, inclusively. The 0 symbol represents a // logical zero in the meta encoding, and the symbol between minHuffLen and // maxHuffLen represents a logical one. Symbols 16 and 18 are used to provide a // primitive form of run-length encoding. The codes that these symbols map to // are fixed and are shown below. // // Code Symbol // 0 <=> 0 (symZero) // 10 <=> minHuffLen..maxHuffLen (symOne) // 110 <=> 16 (symRepLast) // 111 <=> 18 (symRepZero) // // The symZero symbol occupies 1 bit since it is the most commonly needed bit, // while symOne occupies 2 bits. Thus, it is cheaper to encode logical zeros // than it is to encode logical ones. The invert bit in the meta encoding takes // advantage of this fact and allows all data bits to be inverted so that the // number zero bits is greater than the number of one bits. const ( symZero = iota // Symbol to encode a input zero (clen: 0) symOne // Symbol to encode a input one (clen: minHuffLen..maxHuffLen) symRepLast // Symbol to repeat last symbol (clen: 16) symRepZero // Symbol to repeat zero symbol (clen: 18) ) var encHuff, decHuff = func() (enc prefix.Encoder, dec prefix.Decoder) { codes := [4]prefix.PrefixCode{ {Sym: symZero, Len: 1}, {Sym: symOne, Len: 2}, {Sym: symRepLast, Len: 3}, {Sym: symRepZero, Len: 3}, } prefix.GeneratePrefixes(codes[:]) enc.Init(codes[:]) dec.Init(codes[:]) return }() func errorf(c int, f string, a ...interface{}) error { return errors.Error{Code: c, Pkg: "meta", Msg: fmt.Sprintf(f, a...)} } var errClosed = errorf(errors.Closed, "") // oneBitsLUT reports the number of set bits in the input byte. var oneBitsLUT = func() (lut [256]byte) { for i := range lut[:] { for b := byte(i); b > 0; b >>= 1 { lut[i] += b & 1 } } return }() // numBits counts the number of zero and one bits in the byte. func numBits(b byte) (zeros, ones int) { ones = int(oneBitsLUT[b]) zeros = 8 - ones return } // numPads computes number of bits needed to pad n-bits to a byte alignment. func numPads(n uint) uint { return -n & 7 } // btoi converts a bool to a integer 0 or 1. func btoi(b bool) int { if b { return 1 } return 0 }