summaryrefslogtreecommitdiff
path: root/tools/src/minilzlib
diff options
context:
space:
mode:
Diffstat (limited to 'tools/src/minilzlib')
-rw-r--r--tools/src/minilzlib/dictbuf.c155
-rw-r--r--tools/src/minilzlib/inputbuf.c144
-rw-r--r--tools/src/minilzlib/lzma2dec.c228
-rw-r--r--tools/src/minilzlib/lzma2dec.h91
-rw-r--r--tools/src/minilzlib/lzmadec.c627
-rw-r--r--tools/src/minilzlib/lzmadec.h114
-rw-r--r--tools/src/minilzlib/minlzlib.h88
-rw-r--r--tools/src/minilzlib/minlzma.h33
-rw-r--r--tools/src/minilzlib/rangedec.c395
-rw-r--r--tools/src/minilzlib/xzstream.c547
-rw-r--r--tools/src/minilzlib/xzstream.h123
11 files changed, 2545 insertions, 0 deletions
diff --git a/tools/src/minilzlib/dictbuf.c b/tools/src/minilzlib/dictbuf.c
new file mode 100644
index 0000000..02875dc
--- /dev/null
+++ b/tools/src/minilzlib/dictbuf.c
@@ -0,0 +1,155 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ dictbuf.c
+
+Abstract:
+
+ This module implements the management of the LZMA "history buffer" which is
+ often called the "dictionary". Routines for writing into the history buffer
+ as well as for reading back from it are implemented, as well as mechanisms
+ for repeating previous symbols forward into the dictionary. This forms the
+ basis for LZMA match distance-length pairs that are found and decompressed.
+ Note that for simplicity's sake, the dictionary is stored directly in the
+ output buffer, such that no "flushing" or copying is needed back and forth.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// State used for the history buffer (dictionary)
+//
+typedef struct _DICTIONARY_STATE
+{
+ //
+ // Buffer, start position, current position, and offset limit in the buffer
+ //
+ uint8_t* Buffer;
+ uint32_t BufferSize;
+ uint32_t Start;
+ uint32_t Offset;
+ uint32_t Limit;
+} DICTIONARY_STATE, *PDICTIONARY_STATE;
+DICTIONARY_STATE Dictionary;
+
+void
+DtInitialize (
+ uint8_t* HistoryBuffer,
+ uint32_t Size
+ )
+{
+ //
+ // Initialize the buffer and reset the position
+ //
+ Dictionary.Buffer = HistoryBuffer;
+ Dictionary.Offset = 0;
+ Dictionary.BufferSize = Size;
+}
+
+bool
+DtSetLimit (
+ uint32_t Limit
+ )
+{
+ //
+ // Make sure that the passed in dictionary limit fits within the size, and
+ // then set this as the new limit. Save the starting point (current offset)
+ //
+ if ((Dictionary.Offset + Limit) > Dictionary.BufferSize)
+ {
+ return false;
+ }
+ Dictionary.Limit = Dictionary.Offset + Limit;
+ Dictionary.Start = Dictionary.Offset;
+ return true;
+}
+
+bool
+DtIsComplete (
+ uint32_t* BytesProcessed
+ )
+{
+ //
+ // Return bytes processed and if the dictionary has been fully written to
+ //
+ *BytesProcessed = Dictionary.Offset - Dictionary.Start;
+ return (Dictionary.Offset == Dictionary.Limit);
+}
+
+bool
+DtCanWrite (
+ uint32_t* Position
+ )
+{
+ //
+ // Return our position and make sure it's not beyond the uncompressed size
+ //
+ *Position = Dictionary.Offset;
+ return (Dictionary.Offset < Dictionary.Limit);
+}
+
+uint8_t
+DtGetSymbol (
+ uint32_t Distance
+ )
+{
+ //
+ // If the dictionary is still empty, just return 0, otherwise, return the
+ // symbol that is Distance bytes backward.
+ //
+ if (Distance > Dictionary.Offset)
+ {
+ return 0;
+ }
+ return Dictionary.Buffer[Dictionary.Offset - Distance];
+}
+
+void
+DtPutSymbol (
+ uint8_t Symbol
+ )
+{
+ //
+ // Write the symbol and advance our position
+ //
+ Dictionary.Buffer[Dictionary.Offset++] = Symbol;
+}
+
+bool
+DtRepeatSymbol (
+ uint32_t Length,
+ uint32_t Distance
+ )
+{
+ //
+ // Make sure we never get asked to write past the end of the dictionary. We
+ // should also not allow the distance to go beyond the current offset since
+ // DtGetSymbol will return 0 thinking the dictionary is empty.
+ //
+ if (((Length + Dictionary.Offset) > Dictionary.Limit) ||
+ (Distance > Dictionary.Offset))
+ {
+ return false;
+ }
+
+ //
+ // Now rewrite the stream of past symbols forward into the dictionary.
+ //
+ do
+ {
+ DtPutSymbol(DtGetSymbol(Distance));
+ } while (--Length > 0);
+ return true;
+}
diff --git a/tools/src/minilzlib/inputbuf.c b/tools/src/minilzlib/inputbuf.c
new file mode 100644
index 0000000..67d652c
--- /dev/null
+++ b/tools/src/minilzlib/inputbuf.c
@@ -0,0 +1,144 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ inputbuf.c
+
+Abstract:
+
+ This module implements helper functions for managing the input buffer that
+ contains arithmetic-coded LZ77 match distance-length pairs and raw literals
+ Both seeking (such that an external reader can refer to multiple bytes) and
+ reading (capturing) an individual byte are supported. Support for aligning
+ input data to 4 bytes (which is a requirement for XZ-encoded files) is also
+ implemented.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// Input Buffer State
+//
+typedef struct _BUFFER_STATE
+{
+ //
+ // Start of the buffer, current offset, current packet end, and total input size
+ //
+ uint8_t* Buffer;
+ uint32_t Offset;
+ uint32_t SoftLimit;
+ uint32_t Size;
+} BUFFER_STATE, * PBUFFER_STATE;
+BUFFER_STATE In;
+
+bool
+BfAlign (
+ void
+ )
+{
+ uint8_t padByte;
+ //
+ // Keep reading until we reach 32-bit alignment. All bytes must be zero.
+ //
+ while (In.Offset & 3)
+ {
+ if (!BfRead(&padByte) || (padByte != 0))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool
+BfSetSoftLimit (
+ uint32_t Remaining
+ )
+{
+ if ((In.Size - In.Offset) < Remaining)
+ {
+ return false;
+ }
+ In.SoftLimit = In.Offset + Remaining;
+ return true;
+}
+
+void
+BfResetSoftLimit (
+ void
+ )
+{
+ In.SoftLimit = In.Size;
+}
+
+bool
+BfSeek (
+ uint32_t Length,
+ uint8_t** Bytes
+ )
+{
+ //
+ // Make sure the input buffer has enough space to seek the desired size, if
+ // it does, return the current position and then seek past the desired size
+ //
+ if ((In.Offset + Length) > In.SoftLimit)
+ {
+ *Bytes = 0;
+ return false;
+ }
+ *Bytes = &In.Buffer[In.Offset];
+ In.Offset += Length;
+ return true;
+}
+
+uint32_t
+BfTell (
+ void
+ )
+{
+ return In.Offset;
+}
+
+bool
+BfRead (
+ uint8_t* Byte
+ )
+{
+ uint8_t* pByte;
+ //
+ // Seek past the byte and read it
+ //
+ if (!BfSeek(sizeof(*Byte), &pByte))
+ {
+ *Byte = 0;
+ return false;
+ }
+ *Byte = *pByte;
+ return true;
+}
+
+void
+BfInitialize (
+ uint8_t* InputBuffer,
+ uint32_t InputSize
+ )
+{
+ //
+ // Save all the data in the context buffer state
+ //
+ In.Buffer = InputBuffer;
+ In.Size = InputSize;
+ In.SoftLimit = InputSize;
+ In.Offset = 0;
+}
diff --git a/tools/src/minilzlib/lzma2dec.c b/tools/src/minilzlib/lzma2dec.c
new file mode 100644
index 0000000..7a15513
--- /dev/null
+++ b/tools/src/minilzlib/lzma2dec.c
@@ -0,0 +1,228 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzma2dec.c
+
+Abstract:
+
+ This module implements the LZMA2 decoding logic responsible for parsing the
+ LZMA2 Control Byte, the Information Bytes (Compressed & Uncompressed Stream
+ Size), and the Property Byte during the initial Dictionary Reset. Note that
+ this module only implements support for a single such reset (i.e.: archives
+ in "solid" mode).
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+#include "lzma2dec.h"
+
+bool
+Lz2DecodeChunk (
+ uint32_t* BytesProcessed,
+ uint32_t RawSize,
+ uint16_t CompressedSize
+ )
+{
+ uint32_t bytesProcessed;
+
+ //
+ // Go and decode this chunk, sequence by sequence
+ //
+ if (!LzDecode())
+ {
+ return false;
+ }
+
+ //
+ // In a correctly formatted stream, the last arithmetic-coded sequence must
+ // be zero once we finished with the last chunk. Make sure the stream ended
+ // exactly where we expected it to.
+ //
+ if (!RcIsComplete(&bytesProcessed) || (bytesProcessed != CompressedSize))
+ {
+ return false;
+ }
+
+ //
+ // The entire output stream must have been written to, and the dictionary
+ // must be full now.
+ //
+ if (!DtIsComplete(&bytesProcessed) || (bytesProcessed != RawSize))
+ {
+ return false;
+ }
+ *BytesProcessed += bytesProcessed;
+ return true;
+}
+
+bool
+Lz2DecodeStream (
+ uint32_t* BytesProcessed,
+ bool GetSizeOnly
+ )
+{
+ uint8_t* inBytes;
+ LZMA2_CONTROL_BYTE controlByte;
+ uint8_t propertyByte;
+ uint32_t rawSize;
+ uint16_t compressedSize;
+
+ //
+ // Read the first control byte
+ //
+ *BytesProcessed = 0;
+ while (BfRead(&controlByte.Value))
+ {
+ //
+ // When the LZMA2 control byte is 0, the entire stream is decoded. This
+ // is the only success path out of this function.
+ //
+ if (controlByte.Value == 0)
+ {
+ return true;
+ }
+
+ //
+ // Read the appropriate number of info bytes based on the stream type.
+ //
+ if (!BfSeek((controlByte.u.Common.IsLzma == 1 ) ? 4 : 2, &inBytes))
+ {
+ break;
+ }
+
+ //
+ // For LZMA streams calculate both the uncompressed and compressed size
+ // from the info bytes. Uncompressed streams only have the former.
+ //
+ if (controlByte.u.Common.IsLzma == 1)
+ {
+ rawSize = controlByte.u.Lzma.RawSize << 16;
+ compressedSize = inBytes[2] << 8;
+ compressedSize += inBytes[3] + 1;
+ }
+ else
+ {
+ rawSize = 0;
+ compressedSize = 0;
+ }
+
+ //
+ // Make sure that the output buffer that was supplied is big enough to
+ // fit the uncompressed chunk, unless we're just calculating the size.
+ //
+ rawSize += inBytes[0] << 8;
+ rawSize += inBytes[1] + 1;
+ if (!GetSizeOnly && !DtSetLimit(rawSize))
+ {
+ break;
+ }
+
+ //
+ // Check if the full LZMA state needs to be reset, which must happen at
+ // the start of stream. Also check for a property reset, which occurs
+ // when an LZMA stream follows an uncompressed stream. Separately,
+ // check for a state reset without a property byte (happens rarely,
+ // but does happen in a few compressed streams).
+ //
+ if ((controlByte.u.Lzma.ResetState == Lzma2FullReset) ||
+ (controlByte.u.Lzma.ResetState == Lzma2PropertyReset))
+ {
+ //
+ // Read the LZMA properties and then initialize the decoder.
+ //
+ if (!BfRead(&propertyByte) || !LzInitialize(propertyByte))
+ {
+ break;
+ }
+ }
+ else if (controlByte.u.Lzma.ResetState == Lzma2SimpleReset)
+ {
+ LzResetState();
+ }
+ //
+ // else controlByte.u.Lzma.ResetState == Lzma2NoReset, since a two-bit
+ // field only has four possible values
+ //
+
+ //
+ // Don't do any decompression if the caller only wants to know the size
+ //
+ if (GetSizeOnly)
+ {
+ *BytesProcessed += rawSize;
+ BfSeek((controlByte.u.Common.IsLzma == 1) ? compressedSize : rawSize,
+ &inBytes);
+ continue;
+ }
+ else if (controlByte.u.Common.IsLzma == 0)
+ {
+ //
+ // Seek to the requested size in the input buffer
+ //
+ if (!BfSeek(rawSize, &inBytes))
+ {
+ return false;
+ }
+
+ //
+ // Copy the data into the dictionary as-is
+ //
+ for (uint32_t i = 0; i < rawSize; i++)
+ {
+ DtPutSymbol(inBytes[i]);
+ }
+
+ //
+ // Update bytes and keep going to the next chunk
+ //
+ *BytesProcessed += rawSize;
+ continue;
+ }
+
+ //
+ // Record how many bytes are left in this sequence as our SoftLimit for
+ // the other operations. This allows us to omit most range checking
+ // logic in rangedec.c. This soft limit lasts until reset below.
+ //
+ if (!BfSetSoftLimit(compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Read the initial range and code bytes to initialize the arithmetic
+ // coding decoder, and let it know how much input data exists. We've
+ // already validated that this much space exists in the input buffer.
+ //
+ if (!RcInitialize(&compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Start decoding the LZMA sequences in this chunk
+ //
+ if (!Lz2DecodeChunk(BytesProcessed, rawSize, compressedSize))
+ {
+ break;
+ }
+
+ //
+ // Having decoded that chunk, reset our soft limit (to the full
+ // input stream) so we can read the next chunk.
+ //
+ BfResetSoftLimit();
+ }
+ return false;
+}
diff --git a/tools/src/minilzlib/lzma2dec.h b/tools/src/minilzlib/lzma2dec.h
new file mode 100644
index 0000000..0b31440
--- /dev/null
+++ b/tools/src/minilzlib/lzma2dec.h
@@ -0,0 +1,91 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzma2dec.h
+
+Abstract:
+
+ This header file contains C-style data structures and enumerations that map
+ back to the LZMA2 standard. This includes the encoding of the LZMA2 Control
+ Byte and the possible LZMA2 Reset States.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// The most complex LZMA sequence possible is a "match" sequence where the
+// the length is > 127 bytes, and the distance is > 127 bytes. This type of
+// sequence starts with {1,1} for "match", followed by {1,1,nnnnnnnn} for
+// "8-bit encoded length", followed by {1,1,1,1,1,1} to select the distance
+// slot (63). That's 18 bits so far, which all come from arithmetic-coded
+// bit trees with various probabilities. The next 26 bits are going to be
+// fixed-probability, meaning that the bit tree is mathematically hardcoded
+// at 50%. Finally, there are the last 4 "align" distance bits which also
+// come from an arithmetic-coded bit tree, bringing the total such bits to
+// 22.
+//
+// Each time we have to "normalize" the arithmetic coder, it consumes an
+// additional byte. Normalization is done whenever we consume more than 8
+// of the high bits of the coder's range (i.e.: below 2^24), so exactly
+// every 8 direct bits (which always halve the range due to their 50%).
+// The other bits can have arbitrary probabilities, but in the worst case
+// we need to normalize the range every n bits. As such, this is a total of
+// 20 worst-case normalization per LZMA sequence. Finally, we do one last
+// normalization at the end of LzDecode, to make sure that the decoder is
+// always in a normalized state. This means that a compressed chunk should
+// be at least 21 bytes if we want to guarantee that LzDecode can never
+// read past the current input stream, and avoid range checking.
+//
+#define LZMA_MAX_SEQUENCE_SIZE 21
+
+//
+// This describes the different ways an LZMA2 control byte can request a reset
+//
+typedef enum _LZMA2_COMPRESSED_RESET_STATE
+{
+ Lzma2NoReset = 0,
+ Lzma2SimpleReset = 1,
+ Lzma2PropertyReset = 2,
+ Lzma2FullReset = 3
+} LZMA2_COMPRESSED_RESET_STATE;
+
+//
+// This describes how an LZMA2 control byte can be parsed
+//
+typedef union _LZMA2_CONTROL_BYTE
+{
+ union
+ {
+ struct
+ {
+ uint8_t ResetState : 2;
+ uint8_t Reserved : 5;
+ uint8_t IsLzma : 1;
+ } Raw;
+ struct
+ {
+ uint8_t RawSize : 5;
+ uint8_t ResetState : 2;
+ uint8_t IsLzma : 1;
+ } Lzma;
+ struct
+ {
+ uint8_t : 7;
+ uint8_t IsLzma : 1;
+ } Common;
+ } u;
+ uint8_t Value;
+} LZMA2_CONTROL_BYTE;
+static_assert(sizeof(LZMA2_CONTROL_BYTE) == 1, "Invalid control byte size");
diff --git a/tools/src/minilzlib/lzmadec.c b/tools/src/minilzlib/lzmadec.c
new file mode 100644
index 0000000..1a3c420
--- /dev/null
+++ b/tools/src/minilzlib/lzmadec.c
@@ -0,0 +1,627 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzmadec.c
+
+Abstract:
+
+ This module implements the LZMA Decoding Logic responsible for decoding the
+ three possible types of LZMA "packets": matches, repetitions (short \& long)
+ and literals. The probability model for each type of packet is also stored
+ in this file, along with the management of the previously seen packet types
+ (which is tracked as the "sequence").
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+#include "lzmadec.h"
+
+//
+// Probability Bit Model for Lenghts in Rep and in Match sequences
+//
+typedef struct _LENGTH_DECODER_STATE
+{
+ //
+ // Bit Model for the choosing the type of length encoding
+ //
+ uint16_t Choice;
+ uint16_t Choice2;
+ //
+ // Bit Model for each of the length encodings
+ //
+ uint16_t Low[LZMA_POSITION_COUNT][LZMA_MAX_LOW_LENGTH];
+ uint16_t Mid[LZMA_POSITION_COUNT][LZMA_MAX_MID_LENGTH];
+ uint16_t High[LZMA_MAX_HIGH_LENGTH];
+} LENGTH_DECODER_STATE, * PLENGTH_DECODER_STATE;
+
+//
+// State used for LZMA decoding
+//
+typedef struct _DECODER_STATE
+{
+ //
+ // Current type of sequence last decoded
+ //
+ LZMA_SEQUENCE_STATE Sequence;
+ //
+ // History of last 4 decoded distances
+ //
+ uint32_t Rep0;
+ uint32_t Rep1;
+ uint32_t Rep2;
+ uint32_t Rep3;
+ //
+ // Pending length to repeat from dictionary
+ //
+ uint32_t Len;
+ //
+ // Probability Bit Models for all sequence types
+ //
+ union
+ {
+ struct
+ {
+ //
+ // Literal model
+ //
+ uint16_t Literal[LZMA_LITERAL_CODERS][LZMA_LC_MODEL_SIZE];
+ //
+ // Last-used-distance based models
+ //
+ uint16_t Rep[LzmaMaxState];
+ uint16_t Rep0[LzmaMaxState];
+ uint16_t Rep0Long[LzmaMaxState][LZMA_POSITION_COUNT];
+ uint16_t Rep1[LzmaMaxState];
+ uint16_t Rep2[LzmaMaxState];
+ LENGTH_DECODER_STATE RepLen;
+ //
+ // Explicit distance match based models
+ //
+ uint16_t Match[LzmaMaxState][LZMA_POSITION_COUNT];
+ uint16_t DistSlot[LZMA_FIRST_CONTEXT_DISTANCE_SLOT][LZMA_DISTANCE_SLOTS];
+ uint16_t Dist[(1 << 7) - LZMA_FIRST_FIXED_DISTANCE_SLOT];
+ uint16_t Align[LZMA_DISTANCE_ALIGN_SLOTS];
+ LENGTH_DECODER_STATE MatchLen;
+ } BitModel;
+ uint16_t RawProbabilities[LZMA_BIT_MODEL_SLOTS];
+ } u;
+} DECODER_STATE, *PDECODER_STATE;
+DECODER_STATE Decoder;
+
+//
+// LZMA decoding uses 3 "properties" which determine how the probability
+// bit model will be laid out. These store the number of bits that are used
+// to pick the correct Literal Coder ("lc"), the number of Position bits to
+// select the Literal coder ("lp"), and the number of Position Bits used to
+// select various lengths ("pb"). In LZMA2, these properties are encoded in
+// a single byte with the formula: ((pb * 45) + lp * 9) + lc).
+//
+// We only support the default {lc = 3, lp = 0, pb = 2} properties, which
+// are what the main encoders out there use. This means that a total of 2
+// bits will be used for arithmetic-coded bit trees that are dependent on
+// the current position, and that a total of 3 bits will be used when we
+// pick the arithmetic-coded bit tree used for literal coding. The 0 means
+// this selection will _not_ be dependent on the position in the buffer.
+//
+const uint8_t k_LzSupportedProperties =
+ (LZMA_PB * 45) + (LZMA_LP * 9) + (LZMA_LC);
+
+void
+LzSetLiteral (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ if (*State <= LzmaLitShortrepLitLitState)
+ {
+ //
+ // States 0-3 represent packets with at least 2 back-to-back literals,
+ // so another literal now takes us to state 0 (3 back-to-back literals)
+ //
+ *State = LzmaLitLitLitState;
+ }
+ else if (*State <= LzmaLitShortrepState)
+ {
+ //
+ // States 4-6 represent packets with a literal at the end, so seeing
+ // another literal now takes us to 2 back-to-back literals, which are
+ // state packets 1-3.
+ //
+ // States 7-9 represent packets with a literal at the start, followed
+ // by a match/rep/shortrep. Seeing another literal now drops this first
+ // literal and takes us to having a literal at the end, which are state
+ // packets 4-6 that we just described in the paragraph above.
+ //
+ *State = (LZMA_SEQUENCE_STATE)(*State - 3);
+ }
+ else
+ {
+ //
+ // Finally, state 10 and 11 represent cases without a single literal in
+ // the last 2 sequence packets, so seeing a literal now takes us to a
+ // "literal at the end" state, either following a match or a rep.
+ //
+ *State = (LZMA_SEQUENCE_STATE)(*State - 6);
+ }
+}
+
+bool
+LzIsLiteral (
+ LZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // States 0-6 describe literal packet sequences
+ //
+ return State < LzmaMaxLitState;
+}
+
+void
+LzSetMatch (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "match" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitMatchState : LzmaNonlitMatchState;
+}
+
+void
+LzSetLongRep (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "long rep" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitRepState : LzmaNonlitRepState;
+}
+
+void
+LzSetShortRep (
+ PLZMA_SEQUENCE_STATE State
+ )
+{
+ //
+ // Move to the appropriate "short rep" state based on current literal state
+ //
+ *State = LzIsLiteral(*State) ? LzmaLitShortrepState : LzmaNonlitRepState;
+}
+
+uint16_t*
+LzGetLiteralSlot (
+ void
+ )
+{
+ uint8_t symbol;
+
+ //
+ // To pick the correct literal coder arithmetic-coded bit tree, LZMA uses
+ // the "lc" parameter to choose the number of high bits from the previous
+ // symbol (in the normal case, 3). It then combines that with the "lp"
+ // parameter to choose the number of low bits from the current position in
+ // the dictionary. However, since "lp" is normally 0, we can omit this.
+ //
+ symbol = DtGetSymbol(1);
+ return Decoder.u.BitModel.Literal[symbol >> (8 - LZMA_LC)];
+}
+
+uint16_t*
+LzGetDistSlot (
+ void
+ )
+{
+ uint8_t slotIndex;
+
+ //
+ // There are 4 different arithmetic-coded bit trees which are used to pick
+ // the correct "distance slot" when doing match distance decoding. Each of
+ // them is used based on the length of the symbol that is being repeated.
+ // For lengths of 2, 3, 4 bytes, a dedicated set of distance slots is used.
+ // For lengths of 5 bytes or above, a shared set of distance slots is used.
+ //
+ if (Decoder.Len < (LZMA_FIRST_CONTEXT_DISTANCE_SLOT + LZMA_MIN_LENGTH))
+ {
+ slotIndex = (uint8_t)(Decoder.Len - LZMA_MIN_LENGTH);
+ }
+ else
+ {
+ slotIndex = LZMA_FIRST_CONTEXT_DISTANCE_SLOT - 1;
+ }
+ return Decoder.u.BitModel.DistSlot[slotIndex];
+}
+
+void
+LzDecodeLiteral (
+ void
+ )
+{
+ uint16_t* probArray;
+ uint8_t symbol, matchByte;
+
+ //
+ // First, choose the correct arithmetic-coded bit tree (which is based on
+ // the last symbol we just decoded), then see if we last decoded a literal.
+ //
+ // If so, simply get the symbol from the bit tree as normal. However, if
+ // we didn't last see a literal, we need to read the "match byte" that is
+ // "n" bytes away from the last decoded match. We previously stored this in
+ // rep0.
+ //
+ // Based on this match byte, we'll then use 2 other potential bit trees,
+ // see LzDecodeMatched for more information.
+ //
+ probArray = LzGetLiteralSlot();
+ if (LzIsLiteral(Decoder.Sequence))
+ {
+
+ symbol = RcGetBitTree(probArray, (1 << 8));
+ }
+ else
+ {
+ matchByte = DtGetSymbol(Decoder.Rep0 + 1);
+ symbol = RcDecodeMatchedBitTree(probArray, matchByte);
+ }
+
+ //
+ // Write the symbol and indicate that the last sequence was a literal
+ //
+ DtPutSymbol(symbol);
+ LzSetLiteral(&Decoder.Sequence);
+}
+
+void
+LzDecodeLen (
+ PLENGTH_DECODER_STATE LenState,
+ uint8_t PosBit
+ )
+{
+ uint16_t* probArray;
+ uint16_t limit;
+
+ //
+ // Lenghts of 2 and higher are encoded in 3 possible types of arithmetic-
+ // coded bit trees, depending on the size of the length.
+ //
+ // Lengths 2-9 are encoded in trees called "Low" using 3 bits of data.
+ // Lengths 10-17 are encoded in trees called "Mid" using 3 bits of data.
+ // Lengths 18-273 are encoded in a tree called "high" using 8 bits of data.
+ //
+ // The appropriate "Low" or "Mid" tree is selected based on the bottom 2
+ // position bits (0-3) (in the LZMA standard, this is based on the "pb",
+ // while the "High" tree is shared for all positions.
+ //
+ // Two arithmetic-coded bit trees, called "Choice" and "Choice2" tell us
+ // the type of Length, so we can choose the right tree. {0, n} tells us
+ // to use the Low trees, while {1, 0} tells us to use the Mid trees. Lastly
+ // {1, 1} tells us to use the High tree.
+ //
+ Decoder.Len = LZMA_MIN_LENGTH;
+ if (RcIsBitSet(&LenState->Choice))
+ {
+ if (RcIsBitSet(&LenState->Choice2))
+ {
+ probArray = LenState->High;
+ limit = LZMA_MAX_HIGH_LENGTH;
+ Decoder.Len += LZMA_MAX_LOW_LENGTH + LZMA_MAX_MID_LENGTH;
+ }
+ else
+ {
+ probArray = LenState->Mid[PosBit];
+ limit = LZMA_MAX_MID_LENGTH;
+ Decoder.Len += LZMA_MAX_LOW_LENGTH;
+ }
+ }
+ else
+ {
+ probArray = LenState->Low[PosBit];
+ limit = LZMA_MAX_LOW_LENGTH;
+ }
+ Decoder.Len += RcGetBitTree(probArray, limit);
+}
+
+void
+LzDecodeMatch (
+ uint8_t PosBit
+ )
+{
+ uint16_t* probArray;
+ uint8_t distSlot, distBits;
+
+ //
+ // Decode the length component of the "match" sequence. Then, since we're
+ // about to decode a new distance, update our history by one level.
+ //
+ LzDecodeLen(&Decoder.u.BitModel.MatchLen, PosBit);
+ Decoder.Rep3 = Decoder.Rep2;
+ Decoder.Rep2 = Decoder.Rep1;
+ Decoder.Rep1 = Decoder.Rep0;
+
+ //
+ // Read the first 6 bits, which make up the "distance slot"
+ //
+ probArray = LzGetDistSlot();
+ distSlot = RcGetBitTree(probArray, LZMA_DISTANCE_SLOTS);
+ if (distSlot < LZMA_FIRST_CONTEXT_DISTANCE_SLOT)
+ {
+ //
+ // Slots 0-3 directly encode the distance as a literal number
+ //
+ Decoder.Rep0 = distSlot;
+ }
+ else
+ {
+ //
+ // For slots 4-13, figure out how many "context encoded bits" are used
+ // to encode this distance. The math works out such that slots 4-5 use
+ // 1 bit, 6-7 use 2 bits, 8-9 use 3 bits, and so on and so forth until
+ // slots 12-13 which use 5 bits.
+ //
+ // This gives us anywhere from 1-5 bits, plus the two upper bits which
+ // can either be 0b10 or 0b11 (based on the bottom bit of the distance
+ // slot). Thus, with the context encoded bits, we can represent lengths
+ // anywhere from 0b10[0] to 0b11[11111] (i.e.: 4-127).
+ //
+ // For slots 14-63, we use "fixed 50% probability bits" which are also
+ // called "direct bits". The formula below also tells us how many such
+ // direct bits to use in this scenario. In other words, distBits can
+ // either be the number of "context encoded bits" for slots 4-13, or it
+ // can be the the number of "direct bits" for slots 14-63. This gives
+ // us a range of of 2 to 26 bits, which are then used as middle bits.
+ // Finally, the last 4 bits are called the "align" bits. The smallest
+ // possible number we can encode is now going to be 0b10[00][0000] and
+ // the highest is 0b11[1111111111111111111111111][1111], in other words
+ // 128 to (2^31)-1.
+ //
+ distBits = (distSlot >> 1) - 1;
+ Decoder.Rep0 = (0b10 | (distSlot & 1)) << distBits;
+
+ //
+ // Slots 4-13 have their own arithmetic-coded reverse bit trees. Slots
+ // 14-63 encode the middle "direct bits" with fixed 50% probability and
+ // the bottom 4 "align bits" with a shared arithmetic-coded reverse bit
+ // tree.
+ //
+ if (distSlot < LZMA_FIRST_FIXED_DISTANCE_SLOT)
+ {
+ probArray = &Decoder.u.BitModel.Dist[Decoder.Rep0 - distSlot];
+ }
+ else
+ {
+ Decoder.Rep0 |= RcGetFixed(distBits - LZMA_DISTANCE_ALIGN_BITS) <<
+ LZMA_DISTANCE_ALIGN_BITS;
+ distBits = LZMA_DISTANCE_ALIGN_BITS;
+ probArray = Decoder.u.BitModel.Align;
+ }
+ Decoder.Rep0 |= RcGetReverseBitTree(probArray, distBits);
+ }
+
+ //
+ // Indicate that the last sequence was a "match"
+ //
+ LzSetMatch(&Decoder.Sequence);
+}
+
+void
+LzDecodeRepLen (
+ uint8_t PosBit,
+ bool IsLongRep
+ )
+{
+ //
+ // Decode the length byte and indicate the last sequence was a "rep".
+ // If this is a short rep, then the length is always hard-coded to 1.
+ //
+ if (IsLongRep)
+ {
+ LzDecodeLen(&Decoder.u.BitModel.RepLen, PosBit);
+ LzSetLongRep(&Decoder.Sequence);
+ }
+ else
+ {
+ Decoder.Len = 1;
+ LzSetShortRep(&Decoder.Sequence);
+ }
+}
+
+void
+LzDecodeRep0(
+ uint8_t PosBit
+ )
+{
+ uint8_t bit;
+
+ //
+ // This could be a "short rep" with a length of 1, or a "long rep0" with
+ // a length that we have to decode. The next bit tells us this, using the
+ // arithmetic-coded bit trees stored in "Rep0Long", with 1 tree for each
+ // position bit (0-3).
+ //
+ bit = RcIsBitSet(&Decoder.u.BitModel.Rep0Long[Decoder.Sequence][PosBit]);
+ LzDecodeRepLen(PosBit, bit);
+}
+
+void
+LzDecodeLongRep (
+ uint8_t PosBit
+ )
+{
+ uint32_t newRep;
+
+ //
+ // Read the next 2 bits to figure out which of the recently used distances
+ // we should use for this match. The following three states are possible :
+ //
+ // {0,n} - "Long rep1", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 2nd most recently used distance (Rep1)
+ //
+ // {1,0} - "Long rep2", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 3rd most recently used distance (Rep2)
+ //
+ // {1,1} - "Long rep3", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the 4th most recently used distance (Rep3)
+ //
+ // Once we have the right one, we must slide down each previously recently
+ // used distance, so that the distance we're now using (Rep1, Rep2 or Rep3)
+ // becomes "Rep0" again.
+ //
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep1[Decoder.Sequence]))
+ {
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep2[Decoder.Sequence]))
+ {
+ newRep = Decoder.Rep3;
+ Decoder.Rep3 = Decoder.Rep2;
+ }
+ else
+ {
+ newRep = Decoder.Rep2;
+ }
+ Decoder.Rep2 = Decoder.Rep1;
+ }
+ else
+ {
+ newRep = Decoder.Rep1;
+ }
+ Decoder.Rep1 = Decoder.Rep0;
+ Decoder.Rep0 = newRep;
+ LzDecodeRepLen(PosBit, true);
+}
+
+void
+LzDecodeRep (
+ uint8_t PosBit
+ )
+{
+ //
+ // We know this is an LZ77 distance-length pair where the distance is based
+ // on a history of up to 4 previously used distance (Rep0-3). To know which
+ // distance to use, the following 5 bit positions are possible (keeping in
+ // mind that we've already decoded the first 2 bits {1,1} in LzDecode which
+ // got us here in the first place):
+ //
+ // {0,0} - "Short rep", where the length is always 1 and distance is always
+ // the most recently used distance (Rep0).
+ //
+ // {0,1} - "Long rep0", where the length is stored in an arithmetic-coded
+ // bit tree, and the distance is the most recently used distance (Rep0).
+ //
+ // Because both of these possibilities just use Rep0, LzDecodeRep0 handles
+ // these two cases. Otherwise, we use LzDecodeLongRep to read up to two
+ // additional bits to figure out which recently used distance (1, 2, or 3)
+ // to use.
+ //
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep0[Decoder.Sequence]))
+ {
+ LzDecodeLongRep(PosBit);
+ }
+ else
+ {
+ LzDecodeRep0(PosBit);
+ }
+}
+
+bool
+LzDecode (
+ void
+ )
+{
+ uint32_t position;
+ uint8_t posBit;
+
+ //
+ // Get the current position in dictionary, making sure we have input bytes.
+ // Once we run out of bytes, normalize the last arithmetic coded byte and
+ // ensure there's no pending lengths that we haven't yet repeated.
+ //
+ while (DtCanWrite(&position) && RcCanRead())
+ {
+ //
+ // An LZMA packet begins here, which can have 3 possible initial bit
+ // sequences that correspond to the type of encoding that was chosen
+ // to represent the next stream of symbols.
+ //
+ // {0, n} represents a "literal", which LzDecodeLiteral decodes.
+ // Literals are a single byte encoded with arithmetic-coded bit trees
+ //
+ // {1, 0} represents a "match", which LzDecodeMatch decodes.
+ // Matches are typical LZ77 sequences with explicit length and distance
+ //
+ // {1, 1} represents a "rep", which LzDecodeRep decodes.
+ // Reps are LZ77 sequences where the distance is encoded as a reference
+ // to a previously used distance (up to 4 -- called "Rep0-3").
+ //
+ // Once we've decoded either the "match" or the "rep', we now have the
+ // distance in "Rep0" (the most recently used distance) and the length
+ // in "Len", so we will use DtRepeatSymbol to go back in the dictionary
+ // buffer "Rep0" bytes and repeat that character "Len" times.
+ //
+ posBit = position & (LZMA_POSITION_COUNT - 1);
+ if (RcIsBitSet(&Decoder.u.BitModel.Match[Decoder.Sequence][posBit]))
+ {
+ if (RcIsBitSet(&Decoder.u.BitModel.Rep[Decoder.Sequence]))
+ {
+ LzDecodeRep(posBit);
+ }
+ else
+ {
+ LzDecodeMatch(posBit);
+ }
+
+ if (!DtRepeatSymbol(Decoder.Len, Decoder.Rep0 + 1))
+ {
+ return false;
+ }
+ Decoder.Len = 0;
+ }
+ else
+ {
+ LzDecodeLiteral();
+ }
+ }
+ RcNormalize();
+ return (Decoder.Len == 0);
+}
+
+void
+LzResetState (
+ void
+ )
+{
+ //
+ // Initialize decoder to default state in case we're called more than once.
+ // The LZMA "Bit Model" is an adaptive arithmetic-coded probability-based
+ // bit tree which encodes either a "0" or a "1".
+ //
+ Decoder.Sequence = LzmaLitLitLitState;
+ Decoder.Rep0 = Decoder.Rep1 = Decoder.Rep2 = Decoder.Rep3 = 0;
+ static_assert((LZMA_BIT_MODEL_SLOTS * 2) == sizeof(Decoder.u.BitModel),
+ "Invalid size");
+ for (int i = 0; i < LZMA_BIT_MODEL_SLOTS; i++)
+ {
+ RcSetDefaultProbability(&Decoder.u.RawProbabilities[i]);
+ }
+}
+
+bool
+LzInitialize (
+ uint8_t Properties
+ )
+{
+ if (Properties != k_LzSupportedProperties)
+ {
+ return false;
+ }
+ LzResetState();
+ return true;
+}
diff --git a/tools/src/minilzlib/lzmadec.h b/tools/src/minilzlib/lzmadec.h
new file mode 100644
index 0000000..652165d
--- /dev/null
+++ b/tools/src/minilzlib/lzmadec.h
@@ -0,0 +1,114 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ lzmadec.h
+
+Abstract:
+
+ This header file contains C-style definitions, constants, and enumerations
+ that map back to the LZMA Standard, specifically the probability model that
+ is used for encoding probabilities.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// Literals can be 0-255 and are encoded in 3 different types of slots based on
+// the previous literal decoded and the "match byte" used.
+//
+#define LZMA_LITERALS 256
+#define LZMA_LC_TYPES 3
+#define LZMA_LC_MODEL_SIZE (LZMA_LC_TYPES * LZMA_LITERALS)
+
+//
+// These are the hardcoded LZMA properties we support for position and coders
+//
+#define LZMA_LC 3
+#define LZMA_PB 2
+#define LZMA_LP 0
+#define LZMA_LITERAL_CODERS (1 << LZMA_LC)
+#define LZMA_POSITION_COUNT (1 << LZMA_PB)
+
+//
+// Lengths are described in three different ways using "low", "mid", and "high"
+// bit trees. The first two trees encode 3 bits, the last encodes 8. We never
+// encode a length less than 2 bytes, since that's wasteful.
+//
+#define LZMA_MAX_LOW_LENGTH (1 << 3)
+#define LZMA_MAX_MID_LENGTH (1 << 3)
+#define LZMA_MAX_HIGH_LENGTH (1 << 8)
+#define LZMA_MIN_LENGTH 2
+
+//
+// Distances can be encoded in different ways, based on the distance slot.
+// Lengths of 2, 3, 4 bytes are directly encoded with their own slot. Lengths
+// over 5 share a slot, which is then further subdivded into 3 different ways
+// of encoding them, which are described in the source.
+//
+#define LZMA_DISTANCE_SLOTS 64
+#define LZMA_FIRST_CONTEXT_DISTANCE_SLOT 4
+#define LZMA_FIRST_FIXED_DISTANCE_SLOT 14
+#define LZMA_DISTANCE_ALIGN_BITS 4
+#define LZMA_DISTANCE_ALIGN_SLOTS (1 << LZMA_DISTANCE_ALIGN_BITS)
+
+//
+// Total number of probabilities that we need to store
+//
+#define LZMA_BIT_MODEL_SLOTS (1174 + \
+ (LZMA_LITERAL_CODERS * \
+ LZMA_LC_MODEL_SIZE))
+
+//
+// The LZMA probability bit model is typically based on the last LZMA sequences
+// that were decoded. There are 11 such possibilities that are tracked.
+//
+typedef enum _LZMA_SEQUENCE_STATE
+{
+ //
+ // State where we last saw three literals
+ //
+ LzmaLitLitLitState,
+ //
+ // States where we last saw two literals preceeded by a non-literal
+ //
+ LzmaMatchLitLitState,
+ LzmaRepLitLitState,
+ LzmaLitShortrepLitLitState,
+ //
+ // States where we last saw one literal preceeded by a non-literal
+ //
+ LzmaMatchLitState,
+ LzmaRepLitState,
+ LzmaLitShortrepLitState,
+ //
+ // Separator between states where we last saw at least one literal
+ //
+ LzmaMaxLitState,
+ //
+ // States where we last saw a non-literal preceeded by a literal
+ //
+ LzmaLitMatchState = 7,
+ LzmaLitRepState,
+ LzmaLitShortrepState,
+ //
+ // States where we last saw two non-literals
+ //
+ LzmaNonlitMatchState,
+ LzmaNonlitRepState,
+ //
+ // Separator for number of total states
+ //
+ LzmaMaxState
+} LZMA_SEQUENCE_STATE, * PLZMA_SEQUENCE_STATE;
diff --git a/tools/src/minilzlib/minlzlib.h b/tools/src/minilzlib/minlzlib.h
new file mode 100644
index 0000000..c5276ae
--- /dev/null
+++ b/tools/src/minilzlib/minlzlib.h
@@ -0,0 +1,88 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ minlzlib.h
+
+Abstract:
+
+ This header file is the main include for the minlz library. It contains the
+ internal function definitions for the history \& input buffers, the LZMA and
+ LZMA2 decoders, and the arithmetic (de)coder.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// C Standard Headers
+//
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+//
+// Input Buffer Management
+//
+bool BfRead(uint8_t* Byte);
+bool BfSeek(uint32_t Length, uint8_t** Bytes);
+uint32_t BfTell(void);
+bool BfAlign(void);
+void BfInitialize(uint8_t* InputBuffer, uint32_t InputSize);
+bool BfSetSoftLimit(uint32_t Remaining);
+void BfResetSoftLimit(void);
+
+//
+// Dictionary (History Buffer) Management
+//
+bool DtRepeatSymbol(uint32_t Length, uint32_t Distance);
+void DtInitialize(uint8_t* HistoryBuffer, uint32_t Position);
+bool DtSetLimit(uint32_t Limit);
+void DtPutSymbol(uint8_t Symbol);
+uint8_t DtGetSymbol(uint32_t Distance);
+bool DtCanWrite(uint32_t* Position);
+bool DtIsComplete(uint32_t* BytesProcessed);
+
+//
+// Range Decoder
+//
+uint8_t RcGetBitTree(uint16_t* BitModel, uint16_t Limit);
+uint8_t RcGetReverseBitTree(uint16_t* BitModel, uint8_t HighestBit);
+uint8_t RcDecodeMatchedBitTree(uint16_t* BitModel, uint8_t MatchByte);
+uint32_t RcGetFixed(uint8_t HighestBit);
+bool RcInitialize(uint16_t* ChunkSize);
+uint8_t RcIsBitSet(uint16_t* Probability);
+void RcNormalize(void);
+bool RcCanRead(void);
+bool RcIsComplete(uint32_t* Offset);
+void RcSetDefaultProbability(uint16_t* Probability);
+
+//
+// LZMA Decoder
+//
+bool LzDecode(void);
+bool LzInitialize(uint8_t Properties);
+void LzResetState(void);
+
+//
+// LZMA2 Decoder
+//
+bool Lz2DecodeStream(uint32_t* BytesProcessed, bool GetSizeOnly);
+#ifdef MINLZ_INTEGRITY_CHECKS
+//
+// Checksum Management
+//
+uint32_t OsComputeCrc32(uint32_t Initial, const uint8_t* Data, uint32_t Length);
+#define Crc32(Buffer, Length) OsComputeCrc32(0, (const uint8_t*)Buffer, Length)
+#endif
diff --git a/tools/src/minilzlib/minlzma.h b/tools/src/minilzlib/minlzma.h
new file mode 100644
index 0000000..f7ca4bd
--- /dev/null
+++ b/tools/src/minilzlib/minlzma.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <stdbool.h>
+
+/*!
+ * @brief Decompresses an XZ stream from InputBuffer into OutputBuffer.
+ *
+ * @detail The XZ stream must contain a single block with an LZMA2 filter
+ * and no BJC2 filters, using default LZMA properties, and using
+ * either CRC32 or None as the checksum type.
+ *
+ * @param[in] InputBuffer - A fully formed buffer containing the XZ stream.
+ * @param[in,out] InputSize - The size of the input buffer. On output, the size
+ * consumed from the input buffer.
+ * @param[in] OutputBuffer - A fully allocated buffer to receive the output.
+ * Callers can pass in NULL if they do not intend to decompress,
+ * in combination with setting OutputSize to 0, in order to query
+ * the final expected size of the decompressed buffer.
+ * @param[in,out] OutputSize - On input, the size of the buffer. On output, the
+ * size of the decompressed result.
+ *
+ * @return true - The input buffer was fully decompressed in OutputBuffer,
+ * or no decompression was requested, the size of the decompressed
+ * buffer was returned in OutputSIze.
+ * false - A failure occurred during the decompression process.
+ */
+bool
+XzDecode (
+ uint8_t* InputBuffer,
+ uint32_t* InputSize,
+ uint8_t* OutputBuffer,
+ uint32_t* OutputSize
+ );
diff --git a/tools/src/minilzlib/rangedec.c b/tools/src/minilzlib/rangedec.c
new file mode 100644
index 0000000..6a9f84f
--- /dev/null
+++ b/tools/src/minilzlib/rangedec.c
@@ -0,0 +1,395 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ rangedec.c
+
+Abstract:
+
+ This module implements the Range Decoder, which is how LZMA describes the
+ arithmetic coder that it uses to represent the binary representation of the
+ LZ77 match length-distance pairs after the initial compression pass. At the
+ implementation level, this coder works with an alphabet of only 2 symbols:
+ the bit "0", and the bit "1", so there are only ever two probability ranges
+ that need to be checked each pass. In LZMA, a probability of 100% encodes a
+ "0", while 0% encodes a "1". Initially, all probabilities are assumed to be
+ 50%. Probabilities are stored using 11-bits (2048 \=\= 100%), and thus use 16
+ bits of storage. Finally, the range decoder is adaptive, meaning that each
+ time a bit is decoded, the probabilities are updated: each 0 increases the
+ probability of another 0, and each 1 decrases it. The algorithm adapts the
+ probabilities using an exponential moving average with a shift ratio of 5.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#include "minlzlib.h"
+
+//
+// The range decoder uses 11 probability bits, where 2048 is 100% chance of a 0
+//
+#define LZMA_RC_PROBABILITY_BITS 11
+#define LZMA_RC_MAX_PROBABILITY (1 << LZMA_RC_PROBABILITY_BITS)
+const uint16_t k_LzmaRcHalfProbability = LZMA_RC_MAX_PROBABILITY / 2;
+
+//
+// The range decoder uses an exponential moving average of the last probability
+// hit (match or miss) with an adaptation rate of 5 bits (which falls in the
+// middle of its 11 bits used to encode a probability.
+//
+#define LZMA_RC_ADAPTATION_RATE_SHIFT 5
+
+//
+// The range decoder has enough precision for the range only as long as the top
+// 8 bits are still set. Once it falls below, it needs a renormalization step.
+//
+#define LZMA_RC_MIN_RANGE (1 << 24)
+
+//
+// The range decoder must be initialized with 5 bytes, the first of which is
+// ignored
+//
+#define LZMA_RC_INIT_BYTES 5
+
+//
+// State used for the binary adaptive arithmetic coder (LZMA Range Decoder)
+//
+typedef struct _RANGE_DECODER_STATE
+{
+ //
+ // Start and end location of the current stream's range encoder buffer
+ //
+ uint8_t* Start;
+ uint8_t* Limit;
+ //
+ // Current probability range and 32-bit arithmetic encoded sequence code
+ //
+ uint32_t Range;
+ uint32_t Code;
+} RANGE_DECODER_STATE, *PRANGE_DECODER_STATE;
+RANGE_DECODER_STATE RcState;
+
+bool
+RcInitialize (
+ uint16_t* ChunkSize
+ )
+{
+ uint8_t i, rcByte;
+ uint8_t* chunkEnd;
+
+ //
+ // Make sure that the input buffer has enough space for the requirements of
+ // the range encoder. We (temporarily) seek forward to validate this.
+ //
+ if (!BfSeek(*ChunkSize, &chunkEnd))
+ {
+ return false;
+ }
+ BfSeek(-*ChunkSize, &chunkEnd);
+
+ //
+ // The initial probability range is set to its highest value, after which
+ // the next 5 bytes are used to initialize the initial code. Note that the
+ // first byte outputted by the encoder is always going to be zero, so it is
+ // ignored here.
+ //
+ RcState.Range = (uint32_t)-1;
+ RcState.Code = 0;
+ for (i = 0; i < LZMA_RC_INIT_BYTES; i++)
+ {
+ BfRead(&rcByte);
+ RcState.Code = (RcState.Code << 8) | rcByte;
+ }
+
+ //
+ // Store our current location in the buffer now, and how far we can go on
+ // reading. Then decrease the total chunk size by the count of init bytes,
+ // so that the caller can check, once done (RcIsComplete), if the code has
+ // become 0 exactly when the compressed chunk size has been fully consumed
+ // by the decoder.
+ //
+ BfSeek(0, &RcState.Start);
+ RcState.Limit = RcState.Start + *ChunkSize;
+ *ChunkSize -= LZMA_RC_INIT_BYTES;
+ return true;
+}
+
+bool
+RcCanRead (
+ void
+ )
+{
+ uint8_t* pos;
+ //
+ // We can keep reading symbols as long as we haven't reached the end of the
+ // input buffer yet.
+ //
+ BfSeek(0, &pos);
+ return pos <= RcState.Limit;
+}
+
+bool
+RcIsComplete (
+ uint32_t* BytesProcessed
+ )
+{
+ uint8_t* pos;
+ //
+ // When the last symbol has been decoded, the last code should be zero as
+ // there is nothing left to describe. Return the offset in the buffer where
+ // this occurred (which should be equal to the compressed size).
+ //
+ BfSeek(0, &pos);
+ *BytesProcessed = (uint32_t)(pos - RcState.Start);
+ return (RcState.Code == 0);
+}
+
+void
+RcNormalize (
+ void
+ )
+{
+ uint8_t rcByte;
+ //
+ // Whenever we drop below 24 bits, there is no longer enough precision in
+ // the probability range not to avoid a "stuck" state where we cannot tell
+ // apart the two branches (above/below the probability range) because the
+ // two options appear identical with the number of precision bits that we
+ // have. In this case, shift the state by a byte (8 bits) and read another.
+ //
+ if (RcState.Range < LZMA_RC_MIN_RANGE)
+ {
+ RcState.Range <<= 8;
+ RcState.Code <<= 8;
+ BfRead(&rcByte);
+ RcState.Code |= rcByte;
+ }
+}
+
+void
+RcAdapt (
+ bool Miss,
+ uint16_t* Probability
+ )
+{
+ //
+ // In the canonical range encoders out there (including this one used by
+ // LZMA, we want the probability to adapt (change) as we read more or less
+ // bits that match our expectation. In order to quickly adapt to change,
+ // use an exponential moving average. The standard way of doing this is to
+ // use an integer based adaptation with a shift that's somewhere between
+ // {1, bits-1}. Since LZMA uses 11 bits for its model, 5 is a nice number
+ // that lands exactly between 1 and 10.
+ //
+ if (Miss)
+ {
+ *Probability -= *Probability >> LZMA_RC_ADAPTATION_RATE_SHIFT;
+ }
+ else
+ {
+ *Probability += (LZMA_RC_MAX_PROBABILITY - *Probability) >>
+ LZMA_RC_ADAPTATION_RATE_SHIFT;
+ }
+}
+
+uint8_t
+RcIsBitSet (
+ uint16_t* Probability
+ )
+{
+ uint32_t bound;
+ uint8_t bit;
+
+ //
+ // Always begin by making sure the range has been normalized for precision
+ //
+ RcNormalize();
+
+ //
+ // Check if the current arithmetic code is descried by the next calculated
+ // proportionally-divided probability range. Recall that the probabilities
+ // encode the chance of the symbol (bit) being a 0 -- not a 1!
+ //
+ // Therefore, if the next chunk of the code lies outside of this new range,
+ // we are still on the path to our 0. Otherwise, if the code is now part of
+ // the newly defined range (inclusive), then we produce a 1 and limit the
+ // range to produce a new range and code for the next decoding pass.
+ //
+ bound = (RcState.Range >> LZMA_RC_PROBABILITY_BITS) * *Probability;
+ if (RcState.Code < bound)
+ {
+ RcState.Range = bound;
+ bit = 0;
+ }
+ else
+ {
+ RcState.Range -= bound;
+ RcState.Code -= bound;
+ bit = 1;
+ }
+
+ //
+ // Always finish by adapt the probabilities based on the bit value
+ //
+ RcAdapt(bit, Probability);
+ return bit;
+}
+
+uint8_t
+RcIsFixedBitSet(
+ void
+ )
+{
+ uint8_t bit;
+
+ //
+ // This is a specialized version of RcIsBitSet with two differences:
+ //
+ // First, there is no adaptive probability -- it is hardcoded to 50%.
+ //
+ // Second, because there are 11 bits per probability, and 50% is 1<<10,
+ // "(LZMA_RC_PROBABILITY_BITS) * Probability" is essentially 1. As such,
+ // we can just shift by 1 (in other words, halving the range).
+ //
+ RcNormalize();
+ RcState.Range >>= 1;
+ if (RcState.Code < RcState.Range)
+ {
+ bit = 0;
+ }
+ else
+ {
+ RcState.Code -= RcState.Range;
+ bit = 1;
+ }
+ return bit;
+}
+
+uint8_t
+RcGetBitTree (
+ uint16_t* BitModel,
+ uint16_t Limit
+ )
+{
+ uint16_t symbol;
+
+ //
+ // Context probability bit trees always begin at index 1. Iterate over each
+ // decoded bit and just keep shifting it in place, until we reach the total
+ // expected number of bits, which should never be over 8 (limit is 0x100).
+ //
+ // Once decoded, always subtract the limit back from the symbol since we go
+ // one bit "past" the limit in the loop, as a side effect of the tree being
+ // off-by-one.
+ //
+ for (symbol = 1; symbol < Limit; )
+ {
+ symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
+ }
+ return (symbol - Limit) & 0xFF;
+}
+
+uint8_t
+RcGetReverseBitTree (
+ uint16_t* BitModel,
+ uint8_t HighestBit
+ )
+{
+ uint16_t symbol;
+ uint8_t i, bit, result;
+
+ //
+ // This is the same logic as in RcGetBitTree, but with the bits actually
+ // encoded in reverse order. We keep track of the probability index as the
+ // "symbol" just like RcGetBitTree, but actually decode the result in the
+ // opposite order.
+ //
+ for (i = 0, symbol = 1, result = 0; i < HighestBit; i++)
+ {
+ bit = RcIsBitSet(&BitModel[symbol]);
+ symbol = (symbol << 1) | bit;
+ result |= bit << i;
+ }
+ return result;
+}
+
+uint8_t
+RcDecodeMatchedBitTree (
+ uint16_t* BitModel,
+ uint8_t MatchByte
+ )
+{
+ uint16_t symbol, bytePos, matchBit;
+ uint8_t bit;
+
+ //
+ // Parse each bit in the "match byte" (see LzDecodeLiteral), which we call
+ // a "match bit".
+ //
+ // Then, treat this as a special bit tree decoding where two possible trees
+ // are used: one for when the "match bit" is set, and a separate one for
+ // when the "match bit" is not set. Since each tree can encode up to 256
+ // symbols, each one has 0x100 slots.
+ //
+ // Finally, we have the original bit tree which we'll revert back to once
+ // the match bits are no longer in play, which we parse for the remainder
+ // of the symbol.
+ //
+ for (bytePos = MatchByte, symbol = 1; symbol < 0x100; bytePos <<= 1)
+ {
+ matchBit = (bytePos >> 7) & 1;
+
+ bit = RcIsBitSet(&BitModel[symbol + (0x100 * (matchBit + 1))]);
+ symbol = (symbol << 1) | bit;
+
+ if (matchBit != bit)
+ {
+ while (symbol < 0x100)
+ {
+ symbol = (symbol << 1) | RcIsBitSet(&BitModel[symbol]);
+ }
+ break;
+ }
+ }
+ return symbol & 0xFF;
+}
+
+uint32_t
+RcGetFixed (
+ uint8_t HighestBit
+ )
+{
+ uint32_t symbol;
+
+ //
+ // Fixed probability bit trees always begin at index 0. Iterate over each
+ // decoded bit and just keep shifting it in place, until we reach the total
+ // expected number of bits (typically never higher than 26 -- the maximum
+ // number of "direct bits" that the distance of a "match" can encode).
+ //
+ symbol = 0;
+ do
+ {
+ symbol = (symbol << 1) | RcIsFixedBitSet();
+ } while (--HighestBit > 0);
+ return symbol;
+}
+
+void
+RcSetDefaultProbability (
+ uint16_t* Probability
+ )
+{
+ //
+ // By default, we initialize the probabilities to 0.5 (50% chance).
+ //
+ *Probability = k_LzmaRcHalfProbability;
+}
diff --git a/tools/src/minilzlib/xzstream.c b/tools/src/minilzlib/xzstream.c
new file mode 100644
index 0000000..dd5078c
--- /dev/null
+++ b/tools/src/minilzlib/xzstream.c
@@ -0,0 +1,547 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ xzstream.c
+
+Abstract:
+
+ This module implements the XZ stream format decoding, including support for
+ parsing the stream header and block header, and then handing off the block
+ decoding to the LZMA2 decoder. Finally, if "meta checking" is enabled, then
+ the index and stream footer are also parsed and validated. Optionally, each
+ of these component structures can be checked against its CRC32 checksum, if
+ "integrity checking" has been enabled. Note that this library only supports
+ single-stream, single-block XZ files that have CRC32 (or None) set as their
+ block checking algorithm. Finally, no BJC filters are supported, and files
+ with a compressed/uncompressed size metadata indicator are not handled.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#define MINLZ_META_CHECKS
+
+#include "minlzlib.h"
+#include "xzstream.h"
+#include "../utils.h"
+
+//
+// XzDecodeBlockHeader can return "I successfully found a block",
+// "I failed/bad block header", or "there was no block header".
+// Though minlzlib explicitly only claims to handle files with a
+// single block, it needs to also handle files with no blocks at all.
+// (Produced by "xz" when compressing an empty input file)
+//
+typedef enum _XZ_DECODE_BLOCK_HEADER_RESULT {
+ XzBlockHeaderFail = 0,
+ XzBlockHeaderSuccess = 1,
+ XzBlockHeaderNoBlock = 2
+} XZ_DECODE_BLOCK_HEADER_RESULT;
+
+const uint8_t k_XzLzma2FilterIdentifier = 0x21;
+
+#ifdef _WIN32
+void __security_check_cookie(_In_ uintptr_t _StackCookie) { (void)(_StackCookie); }
+#endif
+
+#ifdef MINLZ_META_CHECKS
+//
+// XZ Stream Container State
+//
+typedef struct _CONTAINER_STATE
+{
+ //
+ // Size of the XZ header and the index, used to validate against footer
+ //
+ uint32_t HeaderSize;
+ uint32_t IndexSize;
+ //
+ // Size of the compressed block and its checksum
+ //
+ uint32_t UncompressedBlockSize;
+ uint32_t UnpaddedBlockSize;
+ uint32_t ChecksumSize;
+} CONTAINER_STATE, * PCONTAINER_STATE;
+CONTAINER_STATE Container;
+#endif
+
+#ifdef MINLZ_META_CHECKS
+bool
+XzDecodeVli (
+ vli_type* Vli
+ )
+{
+ uint8_t vliByte;
+ uint32_t bitPos;
+
+ //
+ // Read the initial VLI byte (might be the value itself)
+ //
+ if (!BfRead(&vliByte))
+ {
+ return false;
+ }
+ *Vli = vliByte & 0x7F;
+
+ //
+ // Check if this was a complex VLI (and we have space for it)
+ //
+ bitPos = 7;
+ while ((vliByte & 0x80) != 0)
+ {
+ //
+ // Read the next byte
+ //
+ if (!BfRead(&vliByte))
+ {
+ return false;
+ }
+
+ //
+ // Make sure we're not decoding an invalid VLI
+ //
+ if ((bitPos == (7 * VLI_BYTES_MAX)) || (vliByte == 0))
+ {
+ return false;
+ }
+
+ //
+ // Decode it and move to the next 7 bits
+ //
+ *Vli |= (vli_type)((vliByte & 0x7F) << bitPos);
+ bitPos += 7;
+ }
+ return true;
+}
+
+bool
+XzDecodeIndex (
+ void
+ )
+{
+ uint32_t vli;
+ uint8_t* indexStart;
+ uint8_t* indexEnd;
+ uint32_t* pCrc32;
+ uint8_t indexByte;
+
+ //
+ // Remember where the index started so we can compute its size
+ //
+ BfSeek(0, &indexStart);
+
+ //
+ // The index always starts out with an empty byte
+ //
+ if (!BfRead(&indexByte) || (indexByte != 0))
+ {
+ return false;
+ }
+
+ //
+ // Then the count of blocks, which we expect to be 1
+ //
+ if (!XzDecodeVli(&vli) || (vli != 1))
+ {
+ return false;
+ }
+
+ //
+ // Then the unpadded block size, which should match
+ //
+ if (!XzDecodeVli(&vli) || (Container.UnpaddedBlockSize != vli))
+ {
+ return false;
+ }
+
+ //
+ // Then the uncompressed block size, which should match
+ //
+ if (!XzDecodeVli(&vli) || (Container.UncompressedBlockSize != vli))
+ {
+ return false;
+ }
+
+ //
+ // Then we pad to the next multiple of 4
+ //
+ if (!BfAlign())
+ {
+ return false;
+ }
+
+ //
+ // Store the index size with padding to validate the footer later
+ //
+ BfSeek(0, &indexEnd);
+ Container.IndexSize = (uint32_t)(indexEnd - indexStart);
+
+ //
+ // Read the CRC32, which is not part of the index size
+ //
+ if (!BfSeek(sizeof(*pCrc32), (uint8_t**)&pCrc32))
+ {
+ return false;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Make sure the index is not corrupt
+ //
+ if (Crc32(indexStart, Container.IndexSize) != *pCrc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+
+bool
+XzDecodeStreamFooter (
+ void
+ )
+{
+ PXZ_STREAM_FOOTER streamFooter;
+
+ //
+ // Seek past the footer, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*streamFooter), (uint8_t**)&streamFooter))
+ {
+ return false;
+ }
+
+ //
+ // Validate the footer magic
+ //
+ if (streamFooter->Magic != 'ZY')
+ {
+ return false;
+ }
+
+ //
+ // Validate no flags other than checksum type are set
+ //
+ if ((streamFooter->u.Flags != 0) &&
+ ((streamFooter->u.s.CheckType != XzCheckTypeCrc32) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeCrc64) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeSha2) &&
+ (streamFooter->u.s.CheckType != XzCheckTypeNone)))
+ {
+ return false;
+ }
+
+ //
+ // Validate if the footer accurately describes the size of the index
+ //
+ if (Container.IndexSize != (streamFooter->BackwardSize * 4))
+ {
+ return false;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the footer's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(&streamFooter->BackwardSize,
+ sizeof(streamFooter->BackwardSize) +
+ sizeof(streamFooter->u.Flags)) !=
+ streamFooter->Crc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+#endif
+
+bool
+XzDecodeBlock (
+ uint8_t* OutputBuffer,
+ uint32_t* BlockSize
+ )
+{
+#ifdef MINLZ_META_CHECKS
+ uint8_t *inputStart, *inputEnd;
+#endif
+ //
+ // Decode the LZMA2 stream. If full integrity checking is enabled, also
+ // save the offset before and after decoding, so we can save the block
+ // sizes and compare them against the footer and index after decoding.
+ //
+#ifdef MINLZ_META_CHECKS
+ BfSeek(0, &inputStart);
+#endif
+ if (!Lz2DecodeStream(BlockSize, OutputBuffer == NULL))
+ {
+ return false;
+ }
+#ifdef MINLZ_META_CHECKS
+ BfSeek(0, &inputEnd);
+ Container.UnpaddedBlockSize = Container.HeaderSize +
+ (uint32_t)(inputEnd - inputStart);
+ Container.UncompressedBlockSize = *BlockSize;
+#endif
+ //
+ // After the block data, we need to pad to 32-bit alignment
+ //
+ if (!BfAlign())
+ {
+ return false;
+ }
+#if defined(MINLZ_INTEGRITY_CHECKS) || defined(MINLZ_META_CHECKS)
+ //
+ // Finally, move past the size of the checksum if any, then compare it with
+ // with the actual CRC32 of the block, if integrity checks are enabled. If
+ // meta checks are enabled, update the block size so the index checking can
+ // validate it.
+ //
+ if (!BfSeek(Container.ChecksumSize, &inputEnd))
+ {
+ return false;
+ }
+#endif
+ (void)(OutputBuffer);
+#ifdef MINLZ_INTEGRITY_CHECKS
+ if ((OutputBuffer != NULL) &&
+ (Crc32(OutputBuffer, *BlockSize) != *(uint32_t*)inputEnd))
+ {
+ return false;
+ }
+#endif
+#ifdef MINLZ_META_CHECKS
+ Container.UnpaddedBlockSize += Container.ChecksumSize;
+#endif
+ return true;
+}
+
+bool
+XzDecodeStreamHeader (
+ void
+ )
+{
+ PXZ_STREAM_HEADER streamHeader;
+
+ //
+ // Seek past the header, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*streamHeader), (uint8_t**)&streamHeader))
+ {
+ return false;
+ }
+#ifdef MINLZ_META_CHECKS
+ //
+ // Validate the header magic
+ //
+ if ((*(uint32_t*)&streamHeader->Magic[1] != 'ZXz7') ||
+ (streamHeader->Magic[0] != 0xFD) ||
+ (streamHeader->Magic[5] != 0x00))
+ {
+ return false;
+ }
+
+ //
+ // Validate no flags other than checksum type are set
+ //
+ if ((streamHeader->u.Flags != 0) &&
+ ((streamHeader->u.s.CheckType != XzCheckTypeCrc32) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeCrc64) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeSha2) &&
+ (streamHeader->u.s.CheckType != XzCheckTypeNone)))
+ {
+ return false;
+ }
+
+ //
+ // Remember that a checksum might come at the end of the block later
+ //
+ if (streamHeader->u.s.CheckType == 0)
+ {
+ Container.ChecksumSize = 0;
+ } else {
+ Container.ChecksumSize = 4 << ((streamHeader->u.s.CheckType - 1) / 3);
+ }
+
+#endif
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the header's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(&streamHeader->u.Flags, sizeof(streamHeader->u.Flags)) !=
+ streamHeader->Crc32)
+ {
+ return false;
+ }
+#endif
+ return true;
+}
+
+XZ_DECODE_BLOCK_HEADER_RESULT
+XzDecodeBlockHeader (
+ void
+ )
+{
+ PXZ_BLOCK_HEADER blockHeader;
+#ifdef MINLZ_META_CHECKS
+ uint32_t size;
+#endif
+ //
+ // Seek past the header, making sure we have space in the input stream
+ //
+ if (!BfSeek(sizeof(*blockHeader), (uint8_t**)&blockHeader))
+ {
+ return XzBlockHeaderFail;
+ }
+ if (blockHeader->Size == 0)
+ {
+ //
+ // That's no block! That's an index!
+ //
+ BfSeek((uint32_t)(-(uint16_t)sizeof(*blockHeader)),
+ (uint8_t**)&blockHeader);
+ return XzBlockHeaderNoBlock;
+ }
+#ifdef MINLZ_META_CHECKS
+ //
+ // Validate that the size of the header is what we expect
+ //
+ Container.HeaderSize = (blockHeader->Size + 1) * 4;
+ if (Container.HeaderSize != sizeof(*blockHeader))
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // Validate that no additional flags or filters are enabled
+ //
+ if (blockHeader->u.Flags != 0)
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // Validate that the only filter is the LZMA2 filter
+ //
+ if (blockHeader->LzmaFlags.Id != k_XzLzma2FilterIdentifier)
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // With the expected number of property bytes
+ //
+ if (blockHeader->LzmaFlags.Size
+ != sizeof(blockHeader->LzmaFlags.u.Properties))
+ {
+ return XzBlockHeaderFail;
+ }
+
+ //
+ // The only property is the dictionary size, make sure it is valid.
+ //
+ // We don't actually need to store or compare the size with anything since
+ // the library expects the caller to always put in a buffer that's large
+ // enough to contain the full uncompressed file (or calling it in "get size
+ // only" mode to get this information).
+ //
+ // This output buffer can thus be smaller than the size of the dictionary
+ // which is absolutely OK as long as that's actually the size of the output
+ // file. If callers pass in a buffer size that's too small, decoding will
+ // fail at later stages anyway, and that's incorrect use of minlzlib.
+ //
+ size = blockHeader->LzmaFlags.u.s.DictionarySize;
+ if (size > 39)
+ {
+ return XzBlockHeaderFail;
+ }
+#ifdef MINLZ_INTEGRITY_CHECKS
+ //
+ // Compute the header's CRC32 and make sure it's not corrupted
+ //
+ if (Crc32(blockHeader,
+ Container.HeaderSize - sizeof(blockHeader->Crc32)) !=
+ blockHeader->Crc32)
+ {
+ return XzBlockHeaderFail;
+ }
+#endif
+#endif
+ return XzBlockHeaderSuccess;
+}
+
+bool
+XzDecode (
+ uint8_t* InputBuffer,
+ uint32_t* InputSize,
+ uint8_t* OutputBuffer,
+ uint32_t* OutputSize
+ )
+{
+
+ //
+ // Initialize the input buffer descriptor and history buffer (dictionary)
+ //
+ BfInitialize(InputBuffer, *InputSize ? *InputSize : UINT32_MAX);
+ DtInitialize(OutputBuffer, *OutputSize);
+
+ //
+ // Decode the stream header for check for validity
+ //
+ if (!XzDecodeStreamHeader())
+ {
+ printf("header decode failed\n");
+ return false;
+ }
+
+ //
+ // Decode the block header for check for validity
+ //
+ switch (XzDecodeBlockHeader())
+ {
+ case XzBlockHeaderFail:
+ printf("block header failed\n");
+ return false;
+ case XzBlockHeaderNoBlock:
+ *OutputSize = 0;
+ break;
+ case XzBlockHeaderSuccess:
+ //
+ // Decode the actual block
+ //
+ if (!XzDecodeBlock(OutputBuffer, OutputSize))
+ {
+ printf("block decode failed\n");
+ return false;
+ }
+ break;
+ }
+
+#ifdef MINLZ_META_CHECKS
+ //
+ // Decode the index for validity checks
+ //
+ if (!XzDecodeIndex())
+ {
+ return false;
+ }
+
+ //
+ // And finally decode the footer as a final set of checks
+ //
+ if (!XzDecodeStreamFooter())
+ {
+ return false;
+ }
+
+ if (!*InputSize)
+ *InputSize = BfTell();
+#endif
+ return true;
+}
diff --git a/tools/src/minilzlib/xzstream.h b/tools/src/minilzlib/xzstream.h
new file mode 100644
index 0000000..f227879
--- /dev/null
+++ b/tools/src/minilzlib/xzstream.h
@@ -0,0 +1,123 @@
+/*++
+
+Copyright (c) Alex Ionescu. All rights reserved.
+
+Module Name:
+
+ xzstream.h
+
+Abstract:
+
+ This header file contains C-style data structures and enumerations that map
+ back to the XZ stream and file format standard, including for the decoding
+ of Variable Length Integers (VLI). This includes definitions for the stream
+ header, block header, index and stream footer, and associated check types.
+
+Author:
+
+ Alex Ionescu (@aionescu) 15-Apr-2020 - Initial version
+
+Environment:
+
+ Windows & Linux, user mode and kernel mode.
+
+--*/
+
+#pragma once
+
+//
+// XZ streams encode certain numbers as "variable length integers", with 7 bits
+// for the data, and a high bit to encode that another byte must be consumed.
+//
+typedef uint32_t vli_type;
+#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
+
+//
+// These are the possible supported types for integrity checking in an XZ file
+//
+typedef enum _XZ_CHECK_TYPES
+{
+ XzCheckTypeNone = 0,
+ XzCheckTypeCrc32 = 1,
+ XzCheckTypeCrc64 = 4,
+ XzCheckTypeSha2 = 10
+} XZ_CHECK_TYPES;
+
+//
+// This describes the first 12 bytes of any XZ container file / stream
+//
+typedef struct _XZ_STREAM_HEADER
+{
+ uint8_t Magic[6];
+ union
+ {
+ struct
+ {
+ uint8_t ReservedFlags;
+ uint8_t CheckType : 4;
+ uint8_t ReservedType : 4;
+ } s;
+ uint16_t Flags;
+ } u;
+ uint32_t Crc32;
+} XZ_STREAM_HEADER, * PXZ_STREAM_HEADER;
+static_assert(sizeof(XZ_STREAM_HEADER) == 12, "Invalid Stream Header Size");
+
+//
+// This describes the last 12 bytes of any XZ container file / stream
+//
+typedef struct _XZ_STREAM_FOOTER
+{
+ uint32_t Crc32;
+ uint32_t BackwardSize;
+ union
+ {
+ struct
+ {
+ uint8_t ReservedFlags;
+ uint8_t CheckType : 4;
+ uint8_t ReservedType : 4;
+ } s;
+ uint16_t Flags;
+ } u;
+ uint16_t Magic;
+} XZ_STREAM_FOOTER, * PXZ_STREAM_FOOTER;
+static_assert(sizeof(XZ_STREAM_FOOTER) == 12, "Invalid Stream Footer Size");
+
+//
+// This describes the beginning of a compressed payload stored in an XZ stream,
+// with hardcoded expectations for an LZMA2-compressed payload that has 0 extra
+// filters (such as BCJ2).
+//
+typedef struct _XZ_BLOCK_HEADER
+{
+ uint8_t Size;
+ union
+ {
+ struct
+ {
+ uint8_t FilterCount : 2;
+ uint8_t Reserved : 4;
+ uint8_t HasCompressedSize : 1;
+ uint8_t HasUncompressedSize : 1;
+ } s;
+ uint8_t Flags;
+ } u;
+ struct
+ {
+ uint8_t Id;
+ uint8_t Size;
+ union
+ {
+ struct
+ {
+ uint8_t DictionarySize : 6;
+ uint8_t Reserved : 2;
+ } s;
+ uint8_t Properties;
+ } u;
+ } LzmaFlags;
+ uint8_t Padding[3];
+ uint32_t Crc32;
+} XZ_BLOCK_HEADER, * PXZ_BLOCK_HEADER;
+static_assert(sizeof(XZ_BLOCK_HEADER) == 12, "Invalid Block Header Size");