diff options
| author | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
|---|---|---|
| committer | magh <magh@maghmogh.com> | 2023-03-06 18:44:55 -0600 |
| commit | e80d9d8871b325a04b18f90a9ea4bb7fd148fb25 (patch) | |
| tree | 79dbdb8506b7ff1e92549188d1b94cfc0b3503ae /tools/src/minilzlib/lzma2dec.h | |
Diffstat (limited to 'tools/src/minilzlib/lzma2dec.h')
| -rw-r--r-- | tools/src/minilzlib/lzma2dec.h | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/tools/src/minilzlib/lzma2dec.h b/tools/src/minilzlib/lzma2dec.h new file mode 100644 index 0000000..0b31440 --- /dev/null +++ b/tools/src/minilzlib/lzma2dec.h @@ -0,0 +1,91 @@ +/*++ + +Copyright (c) Alex Ionescu. All rights reserved. + +Module Name: + + lzma2dec.h + +Abstract: + + This header file contains C-style data structures and enumerations that map + back to the LZMA2 standard. This includes the encoding of the LZMA2 Control + Byte and the possible LZMA2 Reset States. + +Author: + + Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version + +Environment: + + Windows & Linux, user mode and kernel mode. + +--*/ + +#pragma once + +// +// The most complex LZMA sequence possible is a "match" sequence where the +// the length is > 127 bytes, and the distance is > 127 bytes. This type of +// sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for +// "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance +// slot (63). That's 18 bits so far, which all come from arithmetic-coded +// bit trees with various probabilities. The next 26 bits are going to be +// fixed-probability, meaning that the bit tree is mathematically hardcoded +// at 50%. Finally, there are the last 4 "align" distance bits which also +// come from an arithmetic-coded bit tree, bringing the total such bits to +// 22. +// +// Each time we have to "normalize" the arithmetic coder, it consumes an +// additional byte. Normalization is done whenever we consume more than 8 +// of the high bits of the coder's range (i.e.: below 2^24), so exactly +// every 8 direct bits (which always halve the range due to their 50%). +// The other bits can have arbitrary probabilities, but in the worst case +// we need to normalize the range every n bits. As such, this is a total of +// 20 worst-case normalization per LZMA sequence. Finally, we do one last +// normalization at the end of LzDecode, to make sure that the decoder is +// always in a normalized state. This means that a compressed chunk should +// be at least 21 bytes if we want to guarantee that LzDecode can never +// read past the current input stream, and avoid range checking. +// +#define LZMA_MAX_SEQUENCE_SIZE 21 + +// +// This describes the different ways an LZMA2 control byte can request a reset +// +typedef enum _LZMA2_COMPRESSED_RESET_STATE +{ + Lzma2NoReset = 0, + Lzma2SimpleReset = 1, + Lzma2PropertyReset = 2, + Lzma2FullReset = 3 +} LZMA2_COMPRESSED_RESET_STATE; + +// +// This describes how an LZMA2 control byte can be parsed +// +typedef union _LZMA2_CONTROL_BYTE +{ + union + { + struct + { + uint8_t ResetState : 2; + uint8_t Reserved : 5; + uint8_t IsLzma : 1; + } Raw; + struct + { + uint8_t RawSize : 5; + uint8_t ResetState : 2; + uint8_t IsLzma : 1; + } Lzma; + struct + { + uint8_t : 7; + uint8_t IsLzma : 1; + } Common; + } u; + uint8_t Value; +} LZMA2_CONTROL_BYTE; +static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size"); |
