All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] LZMA support in i386-pc kernel
@ 2008-07-02 13:39 Bean
  2008-07-02 14:42 ` Robert Millan
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Bean @ 2008-07-02 13:39 UTC (permalink / raw)
  To: The development of GRUB 2

[-- Attachment #1: Type: text/plain, Size: 972 bytes --]

Hi,

This patch add support for lzma decompression. The assembly code
lzma_decode.S is manually optimized to reduce size. The result decoder
is tiny, only 416 bytes longer than the lzo version.

I also include lzma encode from the LZMA SDK. grub needs to use the
ANSI-C version of encoder, which is only present in the latest 4.58
beta. I can't find ready to use shared library in most distro.
Including the encoder/decoder has advantages as well. We don't need to
worry about the host os, and lzma encoder/decoder can be used in other
place, like font compression.

I use lzma as default, it's still possible to use the old lzo
compression, you just need to add --enable-lzo option when running
configure.

PS, here are some information about the lzma decoder:

properties: lc = 3 lp = 0 pb = 2
memory requirement for the decoder: 15980 bytes

Result of
./grub-mkimage -o core.img biosdisk pc ext2 lvm raid

lzma version: 27,776 bytes
lzo version: 32,768 bytes

-- 
Bean

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: lzma.diff --]
[-- Type: text/x-diff; name=lzma.diff, Size: 160023 bytes --]

diff --git a/Makefile.in b/Makefile.in
index c29c5fd..f586a37 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -87,6 +87,7 @@ UNIFONT_HEX = @UNIFONT_HEX@
 # Options.
 enable_grub_emu = @enable_grub_emu@
 enable_grub_fstest = @enable_grub_fstest@
+enable_lzo = @enable_lzo@
 
 ### General variables.
 
diff --git a/conf/i386-pc.rmk b/conf/i386-pc.rmk
index 7b7924f..b677a96 100644
--- a/conf/i386-pc.rmk
+++ b/conf/i386-pc.rmk
@@ -72,10 +72,15 @@ sbin_UTILITIES += grub-emu
 endif
 
 # For grub-mkimage.
+ifeq ($(enable_lzo), yes)
 grub_mkimage_SOURCES = util/i386/pc/grub-mkimage.c util/misc.c \
 	util/resolve.c
-grub_mkimage_CFLAGS = -DGRUB_MEMORY_MACHINE_LINK_ADDR=$(GRUB_MEMORY_MACHINE_LINK_ADDR)
 grub_mkimage_LDFLAGS = $(LIBLZO)
+else
+grub_mkimage_SOURCES = util/i386/pc/grub-mkimage.c util/misc.c \
+	util/resolve.c lib/LzmaEnc.c lib/LzFind.c
+endif
+grub_mkimage_CFLAGS = -DGRUB_MEMORY_MACHINE_LINK_ADDR=$(GRUB_MEMORY_MACHINE_LINK_ADDR)
 util/i386/pc/grub-mkimage.c_DEPENDENCIES = Makefile
 
 # For grub-setup.
diff --git a/config.h.in b/config.h.in
index 1aef212..c8ff715 100644
--- a/config.h.in
+++ b/config.h.in
@@ -13,6 +13,9 @@
 /* Define it to \"data32\" or \"data32;\" to make GAS happy */
 #undef DATA32
 
+/* Use lzo compression */
+#undef ENABLE_LZO
+
 /* Define it to either end or _end */
 #undef END_SYMBOL
 
diff --git a/configure.ac b/configure.ac
index 5ec7a47..77514b3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -143,19 +143,26 @@ fi
 
 # Check LZO when compiling for the i386-pc.
 if test "$target_cpu"-"$platform" = i386-pc; then
-  # There are three possibilities. LZO version 2 installed with the name
-  # liblzo2, with the name liblzo, and LZO version 1.
-  AC_CHECK_LIB(lzo2, __lzo_init_v2, [LIBLZO="-llzo2"],
-    AC_CHECK_LIB(lzo, __lzo_init_v2, [LIBLZO="-llzo"],
-      AC_CHECK_LIB(lzo, __lzo_init2, [LIBLZO="-llzo"],
-	       AC_MSG_ERROR([LZO library version 1.02 or later is required]))))
-  AC_SUBST(LIBLZO)
-  LIBS="$LIBS $LIBLZO"
-  AC_CHECK_FUNC(lzo1x_999_compress, , 
+  AC_ARG_ENABLE([lzo],
+	      [AS_HELP_STRING([--enable-lzo],
+                             [use lzo to compress kernel (default is lzma)])])
+  [if [ x"$enable_lzo" = xyes ]; then
+    # There are three possibilities. LZO version 2 installed with the name
+    # liblzo2, with the name liblzo, and LZO version 1.]
+    AC_DEFINE([ENABLE_LZO], [1], [Use lzo compression])
+    AC_CHECK_LIB([lzo2], [__lzo_init_v2], [LIBLZO="-llzo2"],
+      [AC_CHECK_LIB([lzo], [__lzo_init_v2], [LIBLZO="-llzo"],
+        [AC_CHECK_LIB([lzo], [__lzo_init2], [LIBLZO="-llzo"],
+	       [AC_MSG_ERROR([LZO library version 1.02 or later is required])])])])
+    AC_SUBST([LIBLZO])
+    [LIBS="$LIBS $LIBLZO"]
+    AC_CHECK_FUNC([lzo1x_999_compress], , 
 	        [AC_MSG_ERROR([LZO1X-999 must be enabled])])
 
-  # LZO version 2 uses lzo/lzo1x.h, while LZO version 1 uses lzo1x.h.
-  AC_CHECK_HEADERS(lzo/lzo1x.h lzo1x.h)
+    [# LZO version 2 uses lzo/lzo1x.h, while LZO version 1 uses lzo1x.h.]
+    AC_CHECK_HEADERS([lzo/lzo1x.h lzo1x.h])  
+  [fi]
+  AC_SUBST([enable_lzo])
 fi
 
 # Check for functions.
diff --git a/include/grub/i386/pc/kernel.h b/include/grub/i386/pc/kernel.h
index 13e8873..d8d9346 100644
--- a/include/grub/i386/pc/kernel.h
+++ b/include/grub/i386/pc/kernel.h
@@ -44,7 +44,11 @@
 #define GRUB_KERNEL_MACHINE_DATA_END		0x50
 
 /* The size of the first region which won't be compressed.  */
+#if defined(ENABLE_LZO)
 #define GRUB_KERNEL_MACHINE_RAW_SIZE		(GRUB_KERNEL_MACHINE_DATA_END + 0x450)
+#else
+#define GRUB_KERNEL_MACHINE_RAW_SIZE		(GRUB_KERNEL_MACHINE_DATA_END + 0x5F0)
+#endif
 
 #ifndef ASM_FILE
 
diff --git a/include/grub/lib/LzFind.h b/include/grub/lib/LzFind.h
new file mode 100644
index 0000000..3d74d0c
--- /dev/null
+++ b/include/grub/lib/LzFind.h
@@ -0,0 +1,116 @@
+/* LzFind.h  -- Match finder for LZ algorithms
+2008-04-04
+Copyright (c) 1999-2008 Igor Pavlov
+You can use any of the following license options:
+  1) GNU Lesser General Public License (GNU LGPL)
+  2) Common Public License (CPL)
+  3) Common Development and Distribution License (CDDL) Version 1.0
+  4) Igor Pavlov, as the author of this code, expressly permits you to
+     statically or dynamically link your code (or bind by name) to this file,
+     while you keep this file unmodified.
+*/
+
+#ifndef __LZFIND_H
+#define __LZFIND_H
+
+#include <grub/lib/LzmaTypes.h>
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+  Byte *buffer;
+  UInt32 pos;
+  UInt32 posLimit;
+  UInt32 streamPos;
+  UInt32 lenLimit;
+
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+  UInt32 matchMaxLen;
+  CLzRef *hash;
+  CLzRef *son;
+  UInt32 hashMask;
+  UInt32 cutValue;
+
+  Byte *bufferBase;
+  ISeqInStream *stream;
+  int streamEndWasReached;
+
+  UInt32 blockSize;
+  UInt32 keepSizeBefore;
+  UInt32 keepSizeAfter;
+
+  UInt32 numHashBytes;
+  int directInput;
+  int btMode;
+  /* int skipModeBits; */
+  int bigHash;
+  UInt32 historySize;
+  UInt32 fixedHashSize;
+  UInt32 hashSizeSum;
+  UInt32 numSons;
+  SRes result;
+  UInt32 crc[256];
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+     historySize <= 3 GB
+     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAlloc *alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+  Mf_Init_Func Init;
+  Mf_GetIndexByte_Func GetIndexByte;
+  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+  Mf_GetMatches_Func GetMatches;
+  Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init(CMatchFinder *p);
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+#endif
diff --git a/include/grub/lib/LzHash.h b/include/grub/lib/LzHash.h
new file mode 100644
index 0000000..59ca7c3
--- /dev/null
+++ b/include/grub/lib/LzHash.h
@@ -0,0 +1,56 @@
+/* LzHash.h  -- HASH functions for LZ algorithms
+2008-03-26
+Copyright (c) 1999-2008 Igor Pavlov
+Read LzFind.h for license options */
+
+#ifndef __LZHASH_H
+#define __LZHASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
+  hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
+  hash4Value &= (kHash4Size - 1); }
+
+/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+  hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/include/grub/lib/LzmaDec.h b/include/grub/lib/LzmaDec.h
new file mode 100644
index 0000000..8087751
--- /dev/null
+++ b/include/grub/lib/LzmaDec.h
@@ -0,0 +1,232 @@
+/* LzmaDec.h -- LZMA Decoder
+2008-04-29
+Copyright (c) 1999-2008 Igor Pavlov
+You can use any of the following license options:
+  1) GNU Lesser General Public License (GNU LGPL)
+  2) Common Public License (CPL)
+  3) Common Development and Distribution License (CDDL) Version 1.0 
+  4) Igor Pavlov, as the author of this code, expressly permits you to 
+     statically or dynamically link your code (or bind by name) to this file, 
+     while you keep this file unmodified.
+*/
+
+#ifndef __LZMADEC_H
+#define __LZMADEC_H
+
+#include "Types.h"
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs, 
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+
+/* ---------- LZMA Properties ---------- */  
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  unsigned lc, lp, pb;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */  
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  Byte *dic;
+  const Byte *buf;
+  UInt32 range, code;
+  SizeT dicPos;
+  SizeT dicBufSize;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  unsigned state;
+  UInt32 reps[4];
+  unsigned remainLen;
+  int needFlush;
+  int needInitState;
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum 
+{
+  LZMA_FINISH_ANY,   /* finish at any point */      
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer 
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 
+        You must use correct finish mode in that case. */ 
+
+typedef enum 
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */   
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */  
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different 
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually. 
+   For Buffer Interface you must always use variant 1. 
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
+
+/* ---------- Dictionary Interface ---------- */  
+
+/* You can use it, if you want to eliminate the overhead for data copying from 
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Constr()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! 
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED 
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, 
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */  
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode: 
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, 
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */  
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED 
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
+    ELzmaStatus *status, ISzAlloc *alloc);
+
+#endif
diff --git a/include/grub/lib/LzmaEnc.h b/include/grub/lib/LzmaEnc.h
new file mode 100644
index 0000000..fcd1f12
--- /dev/null
+++ b/include/grub/lib/LzmaEnc.h
@@ -0,0 +1,74 @@
+/*  LzmaEnc.h -- LZMA Encoder
+2008-04-27
+Copyright (c) 1999-2008 Igor Pavlov
+Read LzFind.h for license options */
+
+#ifndef __LZMAENC_H
+#define __LZMAENC_H
+
+#include "LzmaTypes.h"
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaEncProps
+{
+  int level;       /*  0 <= level <= 9 */
+  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+                      (1 << 12) <= dictSize <= (1 << 30) for 64-bit version
+                       default = (1 << 24) */
+  int lc;          /* 0 <= lc <= 8, default = 3 */
+  int lp;          /* 0 <= lp <= 4, default = 0 */
+  int pb;          /* 0 <= pb <= 4, default = 2 */
+  int algo;        /* 0 - fast, 1 - normal, default = 1 */
+  int fb;          /* 5 <= fb <= 273, default = 32 */
+  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+  int numHashBytes; /* 2, 3 or 4, default = 4 */
+  UInt32 mc;        /* 1 <= mc <= (1 << 30), default = 32 */
+  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+  int numThreads;  /* 1 or 2, default = 2 */
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc_* functions can return the following exit codes:
+Returns:
+  SZ_OK           - OK
+  SZ_ERROR_MEM    - Memory allocation error
+  SZ_ERROR_PARAM  - Incorrect paramater in props
+  SZ_ERROR_WRITE  - Write callback error.
+  SZ_ERROR_PROGRESS - some break from progress callback
+  SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
+*/
+
+typedef void * CLzmaEncHandle;
+
+CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaEncode
+Return code:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+*/
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+#endif
diff --git a/include/grub/lib/LzmaTypes.h b/include/grub/lib/LzmaTypes.h
new file mode 100644
index 0000000..72b3675
--- /dev/null
+++ b/include/grub/lib/LzmaTypes.h
@@ -0,0 +1,130 @@
+/* Types.h -- Basic types
+2008-04-11
+Igor Pavlov
+Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+/* #define _SZ_NO_INT_64 */
+/* define it if your compiler doesn't support 64-bit integers */
+
+#ifdef _SZ_NO_INT_64
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+#include <stddef.h>
+typedef size_t SizeT;
+#endif
+
+typedef int Bool;
+#define True 1
+#define False 0
+
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_CDECL __cdecl
+#define MY_STD_CALL __stdcall 
+#define MY_FAST_CALL MY_NO_INLINE __fastcall 
+
+#else
+
+#define MY_CDECL
+#define MY_STD_CALL
+#define MY_FAST_CALL
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+} ISeqInStream;
+
+typedef struct
+{
+  size_t (*Write)(void *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+      (result < size) means error */
+} ISeqOutStream;
+
+typedef struct
+{
+  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+} ICompressProgress;
+
+typedef struct
+{
+  void *(*Alloc)(void *p, size_t size);
+  void (*Free)(void *p, void *address); /* address can be 0 */
+} ISzAlloc;
+
+#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
+#define IAlloc_Free(p, a) (p)->Free((p), a)
+
+#endif
diff --git a/kern/i386/pc/lzma_decode.S b/kern/i386/pc/lzma_decode.S
new file mode 100644
index 0000000..c194292
--- /dev/null
+++ b/kern/i386/pc/lzma_decode.S
@@ -0,0 +1,677 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define FIXED_PROPS
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LZMA_PROPERTIES_SIZE 5
+
+#define kNumTopBits 24
+#define kTopValue (1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+
+#if 0
+
+DbgOut:
+	pushf
+	pushl	%ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%edx
+	pushl	%ecx
+	pushl	%ebx
+	pushl	%eax
+
+	call	_DebugPrint
+
+	popl	%eax
+	popl	%ebx
+	popl	%ecx
+	popl	%edx
+	popl	%esi
+	popl	%edi
+	popl	%ebp
+	popf
+
+	ret
+
+
+/*
+ * int LzmaDecodeProperties(CLzmaProperties *propsRes,
+ *                          const unsigned char *propsData,
+ *                          int size);
+ */
+
+_LzmaDecodePropertiesA:
+	movb	(%edx), %dl
+
+	xorl	%ecx, %ecx
+1:
+	cmpb	$45, %dl
+	jb	2f
+	incl	%ecx
+	subb	$45, %dl
+	jmp	1b
+2:
+	movl	%ecx, 8(%eax)		/* pb */
+	xorl	%ecx, %ecx
+1:
+	cmpb	$9, %dl
+	jb	2f
+	incl	%ecx
+	subb	$9, %dl
+2:
+	movl	%ecx, 4(%eax)		/* lp */
+	movb	%dl, %cl
+	movl	%ecx, (%eax)		/* lc */
+
+#endif
+
+#ifndef ASM_FILE
+	xorl	%eax, %eax
+#endif
+	ret
+
+#define out_size	8(%ebp)
+
+#define now_pos		-4(%ebp)
+#define prev_byte	-8(%ebp)
+#define range		-12(%ebp)
+#define code		-16(%ebp)
+#define state		-20(%ebp)
+#define rep0		-24(%ebp)
+#define rep1		-28(%ebp)
+#define rep2		-32(%ebp)
+#define rep3		-36(%ebp)
+
+#ifdef FIXED_PROPS
+
+#define FIXED_LC	3
+#define FIXED_LP	0
+#define FIXED_PB	2
+
+#define POS_STATE_MASK	((1 << (FIXED_PB)) - 1)
+#define LIT_POS_MASK	((1 << (FIXED_LP)) - 1)
+
+#define LOCAL_SIZE	36
+
+#else
+
+#define lc		(%ebx)
+#define lp		4(%ebx)
+#define pb		8(%ebx)
+#define probs		12(%ebx)
+
+#define pos_state_mask	-40(%ebp)
+#define lit_pos_mask	-44(%ebp)
+
+#define LOCAL_SIZE	44
+
+#endif
+
+RangeDecoderBitDecode:
+#ifdef FIXED_PROPS
+	leal	(%ebx, %eax, 4), %eax
+#else
+	shll	$2, %eax
+	addl	probs, %eax
+#endif
+
+	movl	%eax, %ecx
+	movl	(%ecx), %eax
+
+	movl	range, %edx
+	shrl	$kNumBitModelTotalBits, %edx
+	mull	%edx
+
+	cmpl	code, %eax
+	jbe	1f
+
+	movl	%eax, range
+	movl	$kBitModelTotal, %edx
+	subl	(%ecx), %edx
+	shrl	$kNumMoveBits, %edx
+	addl	%edx, (%ecx)
+	clc
+3:
+	pushf
+	cmpl	$kTopValue, range
+	jnc	2f
+	shll	$8, code
+	lodsb
+	movb	%al, code
+	shll	$8, range
+2:
+	popf
+	ret
+1:
+	subl	%eax, range
+	subl	%eax, code
+	movl	(%ecx), %edx
+	shrl	$kNumMoveBits, %edx
+	subl	%edx, (%ecx)
+	stc
+	jmp	3b
+
+RangeDecoderBitTreeDecode:
+RangeDecoderReverseBitTreeDecode:
+	movzbl	%cl, %ecx
+	xorl	%edx, %edx
+	pushl	%edx
+	incl	%edx
+	pushl	%edx
+
+1:
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+
+	addl	%edx, %eax
+	call	RangeDecoderBitDecode
+
+	popl	%edx
+	popl	%ecx
+
+	jnc	2f
+	movl	4(%esp), %eax
+	orl	%eax, 8(%esp)
+	stc
+
+2:
+	adcl	%edx, %edx
+	popl	%eax
+
+	shll	$1, (%esp)
+	loop	1b
+
+	popl	%ecx
+	subl	%ecx, %edx		/* RangeDecoderBitTreeDecode */
+	popl	%ecx			/* RangeDecoderReverseBitTreeDecode */
+	ret
+
+LzmaLenDecode:
+	pushl	%eax
+	addl	$LenChoice, %eax
+	call	RangeDecoderBitDecode
+	popl	%eax
+	jc	1f
+	pushl	$0
+	movb	$kLenNumLowBits, %cl
+	addl	$LenLow, %eax
+2:
+	movl	12(%esp), %edx
+	shll	%cl, %edx
+	addl	%edx, %eax
+3:
+
+	call	RangeDecoderBitTreeDecode
+	popl	%eax
+	addl	%eax, %edx
+	ret
+
+1:
+	pushl	%eax
+	addl	$LenChoice2, %eax
+	call	RangeDecoderBitDecode
+	popl	%eax
+	jc	1f
+	pushl	$kLenNumLowSymbols
+	movb	$kLenNumMidBits, %cl
+	addl	$LenMid, %eax
+	jmp	2b
+
+1:
+	pushl	$(kLenNumLowSymbols + kLenNumMidSymbols)
+	addl	$LenHigh, %eax
+	movb	$kLenNumHighBits, %cl
+	jmp	3b
+
+WriteByte:
+	movb	%al, prev_byte
+	stosb
+	incl	now_pos
+	ret
+
+/*
+ * int LzmaDecode(CLzmaDecoderState *vs,
+ *                const unsigned char *inStream,
+ *                unsigned char *outStream,
+ *                SizeT outSize);
+ */
+
+_LzmaDecodeA:
+
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$LOCAL_SIZE, %esp
+
+#ifndef ASM_FILE
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	%eax, %ebx
+	movl	%edx, %esi
+	pushl	%ecx
+#else
+	pushl	%edi
+#endif
+
+	cld
+
+#ifdef FIXED_PROPS
+	movl	%ebx, %edi
+	movl	$(Literal + (LZMA_LIT_SIZE << (FIXED_LC + FIXED_LP))), %ecx
+#else
+	movl	$LZMA_LIT_SIZE, %eax
+	movb	lc, %cl
+	addb	lp, %cl
+	shll	%cl, %eax
+	addl	$Literal, %eax
+	movl	%eax, %ecx
+	movl	probs, %edi
+#endif
+
+	movl	$(kBitModelTotal >> 1), %eax
+
+	rep
+	stosl
+
+	popl	%edi
+
+	xorl	%eax, %eax
+	movl	%eax, now_pos
+	movl	%eax, prev_byte
+	movl	%eax, state
+
+	incl	%eax
+	movl	%eax, rep0
+	movl	%eax, rep1
+	movl	%eax, rep2
+	movl	%eax, rep3
+
+#ifndef FIXED_PROPS
+	movl	%eax, %edx
+	movb	pb, %cl
+	shll	%cl, %edx
+	decl	%edx
+	movl	%edx, pos_state_mask
+
+	movl	%eax, %edx
+	movb	lp, %cl
+	shll	%cl, %edx
+	decl	%edx
+	movl	%edx, lit_pos_mask;
+#endif
+
+	/* RangeDecoderInit */
+	negl	%eax
+	movl	%eax, range
+
+	incl	%eax
+	movb	$5, %cl
+
+1:
+	shll	$8, %eax
+	lodsb
+	loop	1b
+
+	movl	%eax, code
+
+lzma_decode_loop:
+	movl	now_pos, %eax
+	cmpl	out_size, %eax
+
+	jb	1f
+
+#ifndef ASM_FILE
+	xorl	%eax, %eax
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+#endif
+
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+
+1:
+#ifdef FIXED_PROPS
+	andl	$POS_STATE_MASK, %eax
+#else
+	andl	pos_state_mask, %eax
+#endif
+	pushl	%eax				/* posState */
+	movl	state, %edx
+	shll	$kNumPosBitsMax, %edx
+	addl	%edx, %eax
+	pushl	%eax				/* (state << kNumPosBitsMax) + posState */
+
+	call	RangeDecoderBitDecode
+	jc	1f
+
+	movl	now_pos, %eax
+
+#ifdef FIXED_PROPS
+	andl	$LIT_POS_MASK, %eax
+	shll	$FIXED_LC, %eax
+	movl	prev_byte, %edx
+	shrl	$(8 - FIXED_LC), %edx
+#else
+	andl	lit_pos_mask, %eax
+	movb	lc, %cl
+	shll	%cl, %eax
+	negb	%cl
+	addb	$8, %cl
+	movl	prev_byte, %edx
+	shrl	%cl, %edx
+#endif
+
+	addl	%edx, %eax
+	movl	$LZMA_LIT_SIZE, %edx
+	mull	%edx
+	addl	$Literal, %eax
+	pushl	%eax
+
+	incl	%edx			/* edx = 1 */
+
+	movl	rep0, %eax
+	negl	%eax
+	pushl	(%edi, %eax)		/* matchByte */
+
+	cmpb	$kNumLitStates, state
+	jb	5f
+
+	/* LzmaLiteralDecodeMatch */
+
+3:
+	cmpl	$0x100, %edx
+	jae	4f
+
+	xorl	%eax, %eax
+	shlb	$1, (%esp)
+	adcl	%eax, %eax
+
+	pushl	%eax
+	pushl	%edx
+
+	shll	$8, %eax
+	leal	0x100(%edx, %eax), %eax
+	addl	12(%esp), %eax
+	call	RangeDecoderBitDecode
+
+	setc	%al
+	popl	%edx
+	adcl	%edx, %edx
+
+	popl	%ecx
+	cmpb	%cl, %al
+	jz	3b
+
+5:
+
+	/* LzmaLiteralDecode */
+
+	cmpl	$0x100, %edx
+	jae	4f
+
+	pushl	%edx
+	movl	%edx, %eax
+	addl	8(%esp), %eax
+	call	RangeDecoderBitDecode
+	popl	%edx
+	adcl	%edx, %edx
+	jmp	5b
+
+4:
+	addl	$16, %esp
+
+	movb	%dl, %al
+	call	WriteByte
+
+	movb	state, %al
+	cmpb	$4, %al
+	jae	2f
+	xorb	%al, %al
+	jmp	3f
+2:
+	subb	$3, %al
+	cmpb	$7, %al
+	jb	3f
+	subb	$3, %al
+3:
+	movb	%al, state
+	jmp	lzma_decode_loop
+
+1:
+	movl	state, %eax
+	addl	$IsRep, %eax
+	call	RangeDecoderBitDecode
+	jnc	1f
+
+	movl	state, %eax
+	addl	$IsRepG0, %eax
+	call	RangeDecoderBitDecode
+	jc	10f
+
+	movl	(%esp), %eax
+	addl	$IsRep0Long, %eax
+	call	RangeDecoderBitDecode
+	jc	20f
+
+	cmpb	$7, state
+	movb	$9, state
+	jb	100f
+	addb	$2, state
+100:
+
+	movl	$1, %ecx
+
+3:
+	movl	rep0, %edx
+	negl	%edx
+
+4:
+	movb	(%edi, %edx), %al
+	call	WriteByte
+	loop	4b
+
+	popl	%eax
+	popl	%eax
+	jmp	lzma_decode_loop
+
+10:
+	movl	state, %eax
+	addl	$IsRepG1, %eax
+	call	RangeDecoderBitDecode
+	movl	rep1, %edx
+	jnc	100f
+
+	movl	state, %eax
+	addl	$IsRepG2, %eax
+	call	RangeDecoderBitDecode
+	movl	rep2, %edx
+	jnc	1000f
+	movl	rep2, %edx
+	xchgl	rep3, %edx
+1000:
+	pushl	rep1
+	popl	rep2
+100:
+	xchg	rep0, %edx
+	movl	%edx, rep1
+20:
+
+	movl	$RepLenCoder, %eax
+	call	LzmaLenDecode
+
+	cmpb	$7, state
+	movb	$8, state
+	jb	100f
+	addb	$3, state
+100:
+	jmp	2f
+
+1:
+	movl	rep0, %eax
+	xchgl	rep1, %eax
+	xchgl	rep2, %eax
+	movl	%eax, rep3
+
+	cmpb	$7, state
+	movb	$7, state
+	jb	10f
+	addb	$3, state
+10:
+
+	movl	$LenCoder, %eax
+	call	LzmaLenDecode
+	pushl	%edx
+
+	movl	$(kNumLenToPosStates - 1), %eax
+	cmpl	%eax, %edx
+	jbe	100f
+	movl	%eax, %edx
+100:
+	movb	$kNumPosSlotBits, %cl
+	shll	%cl, %edx
+	leal	PosSlot(%edx), %eax
+	call	RangeDecoderBitTreeDecode
+
+	movl	%edx, rep0
+	cmpl	$kStartPosModelIndex, %edx
+	jb	100f
+
+	movl	%edx, %ecx
+	shrl	$1, %ecx
+	decl	%ecx
+
+	movzbl	%dl, %eax
+	andb	$1, %al
+	orb	$2, %al
+	shll	%cl, %eax
+	movl	%eax, rep0
+
+	cmpl	$kEndPosModelIndex, %edx
+	jae	200f
+	movl	rep0, %eax
+	addl	$(SpecPos - 1), %eax
+	subl	%edx, %eax
+	jmp	300f
+200:
+
+	subb	$kNumAlignBits, %cl
+
+	/* RangeDecoderDecodeDirectBits */
+	xorl	%edx, %edx
+
+1000:
+	shrl	$1, range
+	shll	$1, %edx
+
+	movl	range, %eax
+	cmpl	%eax, code
+	jb	2000f
+	subl	%eax, code
+	orb	$1, %dl
+2000:
+
+	cmpl	$kTopValue, %eax
+	jae	3000f
+	shll	$8, range
+	shll	$8, code
+	lodsb
+	movb	%al, code
+
+3000:
+	loop	1000b
+
+	movb	$kNumAlignBits, %cl
+	shll	%cl, %edx
+	addl	%edx, rep0
+
+	movl	$Align, %eax
+
+300:
+	call	RangeDecoderReverseBitTreeDecode
+	addl	%ecx, rep0
+
+100:
+	incl	rep0
+	popl	%edx
+
+2:
+
+	addl	$kMatchMinLen, %edx
+	movl	%edx, %ecx
+
+	jmp	3b
diff --git a/kern/i386/pc/startup.S b/kern/i386/pc/startup.S
index 9542978..964643a 100644
--- a/kern/i386/pc/startup.S
+++ b/kern/i386/pc/startup.S
@@ -200,6 +200,7 @@ codestart:
 	incl	%eax
 	call	EXT_C(grub_gate_a20)
 	
+#if defined(ENABLE_LZO)
 	/* decompress the compressed part and put the result at 1MB */
 	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %esi
 	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %edi
@@ -213,6 +214,23 @@ codestart:
 	/* copy back the decompressed part */
 	movl	%eax, %ecx
 	cld
+#else
+	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %edi
+	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %esi
+	pushl	%edi
+	pushl	%esi
+	movl	EXT_C(grub_kernel_image_size), %ecx
+	addl	EXT_C(grub_total_module_size), %ecx
+	addl	EXT_C(grub_memdisk_image_size), %ecx
+	subl	$GRUB_KERNEL_MACHINE_RAW_SIZE, %ecx
+	pushl	%ecx
+	leal	(%edi, %ecx), %ebx
+	call	_LzmaDecodeA
+	popl	%ecx
+	popl	%edi
+	popl	%esi
+#endif
+
 	rep
 	movsb
 
@@ -476,7 +494,11 @@ gate_a20_check_state:
 	popl	%ebx
 	ret
 
+#if defined(ENABLE_LZO)
 #include "lzo1x.S"
+#else
+#include "lzma_decode.S"
+#endif
 
 /*
  * The code beyond this point is compressed.  Assert that the uncompressed
diff --git a/lib/LzFind.c b/lib/LzFind.c
new file mode 100644
index 0000000..e3b42e9
--- /dev/null
+++ b/lib/LzFind.c
@@ -0,0 +1,753 @@
+/* LzFind.c  -- Match finder for LZ algorithms
+2008-04-04
+Copyright (c) 1999-2008 Igor Pavlov
+Read LzFind.h for license options */
+
+#include <string.h>
+
+#include <grub/lib/LzFind.h>
+#include <grub/lib/LzHash.h>
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)3 << 30)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
+{
+  if (!p->directInput)
+  {
+    alloc->Free(alloc, p->bufferBase);
+    p->bufferBase = 0;
+  }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
+{
+  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+  if (p->directInput)
+  {
+    p->blockSize = blockSize;
+    return 1;
+  }
+  if (p->bufferBase == 0 || p->blockSize != blockSize)
+  {
+    LzInWindow_Free(p, alloc);
+    p->blockSize = blockSize;
+    p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
+  }
+  return (p->bufferBase != 0);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
+
+UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+  p->posLimit -= subValue;
+  p->pos -= subValue;
+  p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+  if (p->streamEndWasReached || p->result != SZ_OK)
+    return;
+  for (;;)
+  {
+    Byte *dest = p->buffer + (p->streamPos - p->pos);
+    size_t size = (p->bufferBase + p->blockSize - dest);
+    if (size == 0)
+      return;
+    p->result = p->stream->Read(p->stream, dest, &size);
+    if (p->result != SZ_OK)
+      return;
+    if (size == 0)
+    {
+      p->streamEndWasReached = 1;
+      return;
+    }
+    p->streamPos += (UInt32)size;
+    if (p->streamPos - p->pos > p->keepSizeAfter)
+      return;
+  }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+  memmove(p->bufferBase,
+    p->buffer - p->keepSizeBefore,
+    (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
+  p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+  /* if (p->streamEndWasReached) return 0; */
+  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+  if (p->streamEndWasReached)
+    return;
+  if (p->keepSizeAfter >= p->streamPos - p->pos)
+    MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+  if (MatchFinder_NeedMove(p))
+    MatchFinder_MoveBlock(p);
+  MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+  p->cutValue = 32;
+  p->btMode = 1;
+  p->numHashBytes = 4;
+  /* p->skipModeBits = 0; */
+  p->directInput = 0;
+  p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+  UInt32 i;
+  p->bufferBase = 0;
+  p->directInput = 0;
+  p->hash = 0;
+  MatchFinder_SetDefaultSettings(p);
+
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = i;
+    int j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
+    p->crc[i] = r;
+  }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->hash);
+  p->hash = 0;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
+{
+  MatchFinder_FreeThisClassMemory(p, alloc);
+  LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
+{
+  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  if (sizeInBytes / sizeof(CLzRef) != num)
+    return 0;
+  return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAlloc *alloc)
+{
+  UInt32 sizeReserv;
+  if (historySize > kMaxHistorySize)
+  {
+    MatchFinder_Free(p, alloc);
+    return 0;
+  }
+  sizeReserv = historySize >> 1;
+  if (historySize > ((UInt32)2 << 30))
+    sizeReserv = historySize >> 2;
+  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+  if (LzInWindow_Create(p, sizeReserv, alloc))
+  {
+    UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
+    UInt32 hs;
+    p->matchMaxLen = matchMaxLen;
+    {
+      p->fixedHashSize = 0;
+      if (p->numHashBytes == 2)
+        hs = (1 << 16) - 1;
+      else
+      {
+        hs = historySize - 1;
+        hs |= (hs >> 1);
+        hs |= (hs >> 2);
+        hs |= (hs >> 4);
+        hs |= (hs >> 8);
+        hs >>= 1;
+        /* hs >>= p->skipModeBits; */
+        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+        if (hs > (1 << 24))
+        {
+          if (p->numHashBytes == 3)
+            hs = (1 << 24) - 1;
+          else
+            hs >>= 1;
+        }
+      }
+      p->hashMask = hs;
+      hs++;
+      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      hs += p->fixedHashSize;
+    }
+
+    {
+      UInt32 prevSize = p->hashSizeSum + p->numSons;
+      UInt32 newSize;
+      p->historySize = historySize;
+      p->hashSizeSum = hs;
+      p->cyclicBufferSize = newCyclicBufferSize;
+      p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
+      newSize = p->hashSizeSum + p->numSons;
+      if (p->hash != 0 && prevSize == newSize)
+        return 1;
+      MatchFinder_FreeThisClassMemory(p, alloc);
+      p->hash = AllocRefs(newSize, alloc);
+      if (p->hash != 0)
+      {
+        p->son = p->hash + p->hashSizeSum;
+        return 1;
+      }
+    }
+  }
+  MatchFinder_Free(p, alloc);
+  return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+  UInt32 limit = kMaxValForNormalize - p->pos;
+  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+  if (limit2 < limit)
+    limit = limit2;
+  limit2 = p->streamPos - p->pos;
+  if (limit2 <= p->keepSizeAfter)
+  {
+    if (limit2 > 0)
+      limit2 = 1;
+  }
+  else
+    limit2 -= p->keepSizeAfter;
+  if (limit2 < limit)
+    limit = limit2;
+  {
+    UInt32 lenLimit = p->streamPos - p->pos;
+    if (lenLimit > p->matchMaxLen)
+      lenLimit = p->matchMaxLen;
+    p->lenLimit = lenLimit;
+  }
+  p->posLimit = p->pos + limit;
+}
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+  UInt32 i;
+  for(i = 0; i < p->hashSizeSum; i++)
+    p->hash[i] = kEmptyHashValue;
+  p->cyclicBufferPos = 0;
+  p->buffer = p->bufferBase;
+  p->pos = p->streamPos = p->cyclicBufferSize;
+  p->result = SZ_OK;
+  p->streamEndWasReached = 0;
+  MatchFinder_ReadBlock(p);
+  MatchFinder_SetLimits(p);
+}
+
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+  return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
+{
+  UInt32 i;
+  for (i = 0; i < numItems; i++)
+  {
+    UInt32 value = items[i];
+    if (value <= subValue)
+      value = kEmptyHashValue;
+    else
+      value -= subValue;
+    items[i] = value;
+  }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+  UInt32 subValue = MatchFinder_GetSubValue(p);
+  MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
+  MatchFinder_ReduceOffsets(p, subValue);
+}
+
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+  if (p->pos == kMaxValForNormalize)
+    MatchFinder_Normalize(p);
+  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+    MatchFinder_CheckAndMoveAndRead(p);
+  if (p->cyclicBufferPos == p->cyclicBufferSize)
+    p->cyclicBufferPos = 0;
+  MatchFinder_SetLimits(p);
+}
+
+static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  son[_cyclicBufferPos] = curMatch;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+      return distances;
+    {
+      const Byte *pb = cur - delta;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+      {
+        UInt32 len = 0;
+        while(++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        if (maxLen < len)
+        {
+          *distances++ = maxLen = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+            return distances;
+        }
+      }
+    }
+  }
+}
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
+  UInt32 len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return distances;
+    }
+    {
+      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      UInt32 len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while(++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
+          *distances++ = maxLen = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return distances;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
+  UInt32 len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return;
+    }
+    {
+      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      UInt32 len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        while(++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        {
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+#define MOVE_POS \
+  ++p->cyclicBufferPos; \
+  p->buffer++; \
+  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+  UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
+  lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+  cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+  offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
+  distances + offset, maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+  SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(2)
+  HASH2_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, delta2, maxLen, offset;
+  GET_MATCHES_HEADER(3)
+
+  HASH3_CALC;
+
+  delta2 = p->pos - p->hash[hash2Value];
+  curMatch = p->hash[kFix3HashSize + hashValue];
+
+  p->hash[hash2Value] =
+  p->hash[kFix3HashSize + hashValue] = p->pos;
+
+
+  maxLen = 2;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[0] = maxLen;
+    distances[1] = delta2 - 1;
+    offset = 2;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  delta2 = p->pos - p->hash[                hash2Value];
+  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
+  curMatch = p->hash[kFix4HashSize + hashValue];
+
+  p->hash[                hash2Value] =
+  p->hash[kFix3HashSize + hash3Value] =
+  p->hash[kFix4HashSize + hashValue] = p->pos;
+
+  maxLen = 1;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = delta2 - 1;
+    offset = 2;
+  }
+  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
+  {
+    maxLen = 3;
+    distances[offset + 1] = delta3 - 1;
+    offset += 2;
+    delta2 = delta3;
+  }
+  if (offset != 0)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  if (maxLen < 3)
+    maxLen = 3;
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  delta2 = p->pos - p->hash[                hash2Value];
+  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
+  curMatch = p->hash[kFix4HashSize + hashValue];
+
+  p->hash[                hash2Value] =
+  p->hash[kFix3HashSize + hash3Value] =
+  p->hash[kFix4HashSize + hashValue] = p->pos;
+
+  maxLen = 1;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = delta2 - 1;
+    offset = 2;
+  }
+  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
+  {
+    maxLen = 3;
+    distances[offset + 1] = delta3 - 1;
+    offset += 2;
+    delta2 = delta3;
+  }
+  if (offset != 0)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  if (maxLen < 3)
+    maxLen = 3;
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+    distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+    distances, 2) - (distances));
+  MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(2)
+    HASH2_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value;
+    SKIP_HEADER(3)
+    HASH3_CALC;
+    curMatch = p->hash[kFix3HashSize + hashValue];
+    p->hash[hash2Value] =
+    p->hash[kFix3HashSize + hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value, hash3Value;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    curMatch = p->hash[kFix4HashSize + hashValue];
+    p->hash[                hash2Value] =
+    p->hash[kFix3HashSize + hash3Value] = p->pos;
+    p->hash[kFix4HashSize + hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value, hash3Value;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    curMatch = p->hash[kFix4HashSize + hashValue];
+    p->hash[                hash2Value] =
+    p->hash[kFix3HashSize + hash3Value] =
+    p->hash[kFix4HashSize + hashValue] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+  vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  if (!p->btMode)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 2)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 3)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+  }
+  else
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+  }
+}
diff --git a/lib/LzmaDec.c b/lib/LzmaDec.c
new file mode 100644
index 0000000..dbd6e65
--- /dev/null
+++ b/lib/LzmaDec.c
@@ -0,0 +1,1014 @@
+/* LzmaDec.c -- LZMA Decoder
+2008-04-29
+Copyright (c) 1999-2008 Igor Pavlov
+Read LzmaDec.h for license options */
+
+#include "LzmaDec.h"
+
+#include <string.h>
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; } 
+#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)               
+
+#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; } 
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)               
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while(i < limit); i -= limit; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols) 
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+/*
+#define LZMA_STREAM_WAS_FINISHED_ID (-1)
+#define LZMA_SPEC_LEN_OFFSET (-3)
+*/
+
+Byte kLiteralNextStates[kNumStates * 2] = 
+{
+  0, 0, 0, 0, 1, 2, 3,  4,  5,  6,  4,  5, 
+  7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10
+};
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/* First LZMA-symbol is always decoded. 
+And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization 
+Out:
+  Result:
+    0 - OK
+    1 - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+    = kMatchSpecLenStart + 1 : Flush marker
+    = kMatchSpecLenStart + 2 : State Init Marker
+*/
+
+static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  CLzmaProb *probs = p->probs;
+
+  unsigned state = p->state;
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
+  unsigned lc = p->prop.lc;
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+  
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = processedPos & pbMask;
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (checkDicSize != 0 || processedPos != 0)
+        prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + 
+        (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
+
+      if (state < kNumLitStates)
+      {
+        symbol = 1;
+        do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        symbol = 1;
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & offs);
+          probLit = prob + offs + bit + symbol;
+          GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
+        }
+        while (symbol < 0x100);
+      }
+      dic[dicPos++] = (Byte)symbol;
+      processedPos++;
+
+      state = kLiteralNextStates[state];
+      /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */
+      continue;
+    }
+    else             
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else 
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          limit = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            limit = (1 << kLenNumMidBits);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            limit = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance; 
+          int numDirectBits = (int)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
+            prob = probs + SpecPos + distance - posSlot - 1;
+            {
+              UInt32 mask = 1;
+              unsigned i = 1;
+              do
+              {
+                GET_BIT2(prob + i, i, ; , distance |= mask);
+                mask <<= 1;
+              }
+              while(--numDirectBits != 0);
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+              
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
+              GET_BIT2(prob + i, i, ; , distance |= 1);
+              GET_BIT2(prob + i, i, ; , distance |= 2);
+              GET_BIT2(prob + i, i, ; , distance |= 4);
+              GET_BIT2(prob + i, i, ; , distance |= 8);
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
+              len += kMatchSpecLenStart;
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1; 
+        if (checkDicSize == 0)
+        {
+          if (distance >= processedPos)
+            return SZ_ERROR_DATA;
+        }
+        else if (distance >= checkDicSize)
+          return SZ_ERROR_DATA;
+        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+        /* state = kLiteralNextStates[state]; */
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem = limit - dicPos;
+        unsigned curLen = ((rem < len) ? (unsigned)rem : len);
+        SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
+
+        processedPos += curLen;
+
+        len -= curLen;
+        if (pos + curLen <= dicBufSize)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
+          dicPos += curLen;
+          do 
+            *(dest) = (Byte)*(dest + src); 
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+  NORMALIZE;
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
+  p->remainLen = len;
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
+  p->state = state;
+
+  return SZ_OK;
+}
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
+    unsigned len = p->remainLen;
+    UInt32 rep0 = p->reps[0];
+    if (limit - dicPos < len)
+      len = (unsigned)(limit - dicPos);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
+    p->processedPos += len;
+    p->remainLen -= len;
+    while (len-- != 0)
+    {
+      dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
+/* LzmaDec_DecodeReal2 decodes LZMA-symbols and sets p->needFlush and p->needInit, if required. */
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
+    }
+    RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
+    if (p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  if (p->remainLen > kMatchSpecLenStart)
+  {
+    p->remainLen = kMatchSpecLenStart;
+  }
+  return 0;
+}
+
+typedef enum 
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
+  CLzmaProb *probs = p->probs;
+  unsigned state = p->state;
+  ELzmaDummy res;
+
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += (LZMA_LIT_SIZE * 
+          ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + 
+          (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] + 
+            ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & offs);
+          probLit = prob + offs + bit + symbol;
+          GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else             
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else 
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            limit = 1 << kLenNumMidBits;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << 
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          int numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
+            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
+            do
+            {
+              GET_BIT_CHECK(prob + i, i);
+            }
+            while(--numDirectBits != 0);
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
+static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
+{
+  p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
+  p->range = 0xFFFFFFFF;
+  p->needFlush = 0;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) 
+{ 
+  p->needFlush = 1; 
+  p->remainLen = 0; 
+  p->tempBufSize = 0; 
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
+    p->needInitState = 1;
+  }
+  if (initState)
+    p->needInitState = 1;
+}
+
+void LzmaDec_Init(CLzmaDec *p) 
+{ 
+  p->dicPos = 0; 
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+static void LzmaDec_InitStateReal(CLzmaDec *p)
+{
+  UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
+  UInt32 i;
+  CLzmaProb *probs = p->probs;
+  for (i = 0; i < numProbs; i++)
+    probs[i] = kBitModelTotal >> 1; 
+  p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+  p->state = 0;
+  p->needInitState = 0;
+}
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, 
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  LzmaDec_WriteRem(p, dicLimit);
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->remainLen != kMatchSpecLenStart)
+  {
+      int checkEndMarkNow;
+
+      if (p->needFlush != 0)
+      {
+        for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+          p->tempBuf[p->tempBufSize++] = *src++;
+        if (p->tempBufSize < RC_INIT_SIZE)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+        if (p->tempBuf[0] != 0)
+          return SZ_ERROR_DATA;
+
+        LzmaDec_InitRc(p, p->tempBuf);
+        p->tempBufSize = 0;
+      }
+
+      checkEndMarkNow = 0;
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->needInitState)
+        LzmaDec_InitStateReal(p);
+  
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = p->buf - src;
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            (*srcLen) += lookAhead;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
+        (*srcLen) += lookAhead;
+        src += lookAhead;
+        inSize -= lookAhead;
+        p->tempBufSize = 0;
+      }
+  }
+  if (p->code == 0) 
+    *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
+}
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) 
+{ 
+  alloc->Free(alloc, p->probs);  
+  p->probs = 0; 
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) 
+{ 
+  alloc->Free(alloc, p->dic); 
+  p->dic = 0; 
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize; 
+  Byte d;
+  
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+ 
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
+  p->lc = d % 9;
+  d /= 9;
+  p->pb = d / 5;
+  p->lp = d % 5;
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (p->probs == 0 || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    p->numProbs = numProbs;
+    if (p->probs == 0)
+      return SZ_ERROR_MEM;
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  dicBufSize = propNew.dicSize;
+  if (p->dic == 0 || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
+    if (p->dic == 0)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
+    ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT inSize = *srcLen;
+  SizeT outSize = *destLen;
+  *srcLen = *destLen = 0;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+
+  LzmaDec_Construct(&p);
+  res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
+  if (res != 0)
+    return res;
+  p.dic = dest;
+  p.dicBufSize = outSize;
+
+  LzmaDec_Init(&p);
+  
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+
+  (*destLen) = p.dicPos;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/lib/LzmaEnc.c b/lib/LzmaEnc.c
new file mode 100644
index 0000000..ecbe496
--- /dev/null
+++ b/lib/LzmaEnc.c
@@ -0,0 +1,2332 @@
+/* LzmaEnc.c -- LZMA Encoder
+2008-04-28
+Copyright (c) 1999-2008 Igor Pavlov
+Read LzmaEnc.h for license options */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <grub/lib/LzmaEnc.h>
+
+#include <grub/lib/LzFind.h>
+#ifdef COMPRESS_MF_MT
+#include <grub/lib/LzFindMt.h>
+#endif
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#ifdef SHOW_STAT
+static int ttt = 0;
+#endif
+
+#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
+
+#define kBlockSize (9 << 10)
+#define kUnpackBlockSize (1 << 18)
+#define kMatchArraySize (1 << 21)
+#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
+
+#define kNumMaxDirectBits (31)
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+#define kBitPrice (1 << kNumBitPriceShiftBits)
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+  p->level = 5;
+  p->dictSize = p->mc = 0;
+  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->writeEndMark = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+  int level = p->level;
+  if (level < 0) level = 5;
+  p->level = level;
+  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
+  if (p->lc < 0) p->lc = 3;
+  if (p->lp < 0) p->lp = 0;
+  if (p->pb < 0) p->pb = 2;
+  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = 4;
+  if (p->mc == 0)  p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  if (p->numThreads < 0) p->numThreads = ((p->btMode && p->algo) ? 2 : 1);
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+  return props.dictSize;
+}
+
+/* #define LZMA_LOG_BSR */
+/* Define it for Intel's CPU */
+
+
+#ifdef LZMA_LOG_BSR
+
+#define kDicLogSizeMaxCompress 30
+
+#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
+
+UInt32 GetPosSlot1(UInt32 pos)
+{
+  UInt32 res;
+  BSR2_RET(pos, res);
+  return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+#else
+
+#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+  int c = 2, slotFast;
+  g_FastPos[0] = 0;
+  g_FastPos[1] = 1;
+
+  for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
+  {
+    UInt32 k = (1 << ((slotFast >> 1) - 1));
+    UInt32 j;
+    for (j = 0; j < k; j++, c++)
+      g_FastPos[c] = (Byte)slotFast;
+  }
+}
+
+#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+  res = p->g_FastPos[pos >> i] + (i * 2); }
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+  p->g_FastPos[pos >> 6] + 12 : \
+  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
+
+#endif
+
+
+#define LZMA_NUM_REPS 4
+
+typedef unsigned CState;
+
+typedef struct _COptimal
+{
+  UInt32 price;
+
+  CState state;
+  int prev1IsChar;
+  int prev2;
+
+  UInt32 posPrev2;
+  UInt32 backPrev2;
+
+  UInt32 posPrev;
+  UInt32 backPrev;
+  UInt32 backs[LZMA_NUM_REPS];
+} COptimal;
+
+#define kNumOpts (1 << 12)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+#define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
+
+#define kNumFullDistances (1 << (kEndPosModelIndex / 2))
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+typedef struct
+{
+  CLzmaProb choice;
+  CLzmaProb choice2;
+  CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
+  CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
+  CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+typedef struct
+{
+  CLenEnc p;
+  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+  UInt32 tableSize;
+  UInt32 counters[LZMA_NUM_PB_STATES_MAX];
+} CLenPriceEnc;
+
+typedef struct _CRangeEnc
+{
+  UInt32 range;
+  Byte cache;
+  UInt64 low;
+  UInt64 cacheSize;
+  Byte *buf;
+  Byte *bufLim;
+  Byte *bufBase;
+  ISeqOutStream *outStream;
+  UInt64 processed;
+  SRes res;
+} CRangeEnc;
+
+typedef struct _CSeqInStreamBuf
+{
+  ISeqInStream funcTable;
+  const Byte *data;
+  SizeT rem;
+} CSeqInStreamBuf;
+
+static SRes MyRead(void *pp, void *data, size_t *size)
+{
+  size_t curSize = *size;
+  CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp;
+  if (p->rem < curSize)
+    curSize = p->rem;
+  memcpy(data, p->data, curSize);
+  p->rem -= curSize;
+  p->data += curSize;
+  *size = curSize;
+  return SZ_OK;
+}
+
+typedef struct
+{
+  CLzmaProb *litProbs;
+
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 state;
+} CSaveState;
+
+typedef struct _CLzmaEnc
+{
+  IMatchFinder matchFinder;
+  void *matchFinderObj;
+
+  #ifdef COMPRESS_MF_MT
+  Bool mtMode;
+  CMatchFinderMt matchFinderMt;
+  #endif
+
+  CMatchFinder matchFinderBase;
+
+  #ifdef COMPRESS_MF_MT
+  Byte pad[128];
+  #endif
+
+  UInt32 optimumEndIndex;
+  UInt32 optimumCurrentIndex;
+
+  Bool longestMatchWasFound;
+  UInt32 longestMatchLength;
+  UInt32 numDistancePairs;
+
+  COptimal opt[kNumOpts];
+
+  #ifndef LZMA_LOG_BSR
+  Byte g_FastPos[1 << kNumLogBits];
+  #endif
+
+  UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+  UInt32 matchDistances[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+  UInt32 numFastBytes;
+  UInt32 additionalOffset;
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 state;
+
+  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+  UInt32 alignPrices[kAlignTableSize];
+  UInt32 alignPriceCount;
+
+  UInt32 distTableSize;
+
+  unsigned lc, lp, pb;
+  unsigned lpMask, pbMask;
+
+  CLzmaProb *litProbs;
+
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  unsigned lclp;
+
+  Bool fastMode;
+
+  CRangeEnc rc;
+
+  Bool writeEndMark;
+  UInt64 nowPos64;
+  UInt32 matchPriceCount;
+  Bool finished;
+  Bool multiThread;
+
+  SRes result;
+  UInt32 dictSize;
+  UInt32 matchFinderCycles;
+
+  ISeqInStream *inStream;
+  CSeqInStreamBuf seqBufInStream;
+
+  CSaveState saveState;
+} CLzmaEnc;
+
+void LzmaEnc_SaveState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CSaveState *dest = &p->saveState;
+  int i;
+  dest->lenEnc = p->lenEnc;
+  dest->repLenEnc = p->repLenEnc;
+  dest->state = p->state;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
+    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
+  }
+  for (i = 0; i < kNumLenToPosStates; i++)
+    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
+  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
+  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
+  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
+  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
+  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
+  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
+  memcpy(dest->reps, p->reps, sizeof(p->reps));
+  memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
+}
+
+void LzmaEnc_RestoreState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *dest = (CLzmaEnc *)pp;
+  const CSaveState *p = &dest->saveState;
+  int i;
+  dest->lenEnc = p->lenEnc;
+  dest->repLenEnc = p->repLenEnc;
+  dest->state = p->state;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
+    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
+  }
+  for (i = 0; i < kNumLenToPosStates; i++)
+    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
+  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
+  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
+  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
+  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
+  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
+  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
+  memcpy(dest->reps, p->reps, sizeof(p->reps));
+  memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
+}
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+
+  if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
+      props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30))
+    return SZ_ERROR_PARAM;
+  p->dictSize = props.dictSize;
+  p->matchFinderCycles = props.mc;
+  {
+    unsigned fb = props.fb;
+    if (fb < 5)
+      fb = 5;
+    if (fb > LZMA_MATCH_LEN_MAX)
+      fb = LZMA_MATCH_LEN_MAX;
+    p->numFastBytes = fb;
+  }
+  p->lc = props.lc;
+  p->lp = props.lp;
+  p->pb = props.pb;
+  p->fastMode = (props.algo == 0);
+  p->matchFinderBase.btMode = props.btMode;
+  {
+    UInt32 numHashBytes = 4;
+    if (props.btMode)
+    {
+      if (props.numHashBytes < 2)
+        numHashBytes = 2;
+      else if (props.numHashBytes < 4)
+        numHashBytes = props.numHashBytes;
+    }
+    p->matchFinderBase.numHashBytes = numHashBytes;
+  }
+
+  p->matchFinderBase.cutValue = props.mc;
+
+  p->writeEndMark = props.writeEndMark;
+
+  #ifdef COMPRESS_MF_MT
+  /*
+  if (newMultiThread != _multiThread)
+  {
+    ReleaseMatchFinder();
+    _multiThread = newMultiThread;
+  }
+  */
+  p->multiThread = (props.numThreads > 1);
+  #endif
+
+  return SZ_OK;
+}
+
+static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
+static const int kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const int kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+/*
+  void UpdateChar() { Index = kLiteralNextStates[Index]; }
+  void UpdateMatch() { Index = kMatchNextStates[Index]; }
+  void UpdateRep() { Index = kRepNextStates[Index]; }
+  void UpdateShortRep() { Index = kShortRepNextStates[Index]; }
+*/
+
+#define IsCharState(s) ((s) < 7)
+
+
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+  p->outStream = 0;
+  p->bufBase = 0;
+}
+
+#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
+{
+  if (p->bufBase == 0)
+  {
+    p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
+    if (p->bufBase == 0)
+      return 0;
+    p->bufLim = p->bufBase + RC_BUF_SIZE;
+  }
+  return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->bufBase);
+  p->bufBase = 0;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+  /* Stream.Init(); */
+  p->low = 0;
+  p->range = 0xFFFFFFFF;
+  p->cacheSize = 1;
+  p->cache = 0;
+
+  p->buf = p->bufBase;
+
+  p->processed = 0;
+  p->res = SZ_OK;
+}
+
+static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+  size_t num;
+  if (p->res != SZ_OK)
+    return;
+  num = p->buf - p->bufBase;
+  if (num != p->outStream->Write(p->outStream, p->bufBase, num))
+    p->res = SZ_ERROR_WRITE;
+  p->processed += num;
+  p->buf = p->bufBase;
+}
+
+static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+  if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
+  {
+    Byte temp = p->cache;
+    do
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(temp + (Byte)(p->low >> 32));
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      temp = 0xFF;
+    }
+    while (--p->cacheSize != 0);
+    p->cache = (Byte)((UInt32)p->low >> 24);
+  }
+  p->cacheSize++;
+  p->low = (UInt32)p->low << 8;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+  int i;
+  for (i = 0; i < 5; i++)
+    RangeEnc_ShiftLow(p);
+}
+
+static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
+{
+  do
+  {
+    p->range >>= 1;
+    p->low += p->range & (0 - ((value >> --numBits) & 1));
+    if (p->range < kTopValue)
+    {
+      p->range <<= 8;
+      RangeEnc_ShiftLow(p);
+    }
+  }
+  while (numBits != 0);
+}
+
+static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
+{
+  UInt32 ttt = *prob;
+  UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
+  if (symbol == 0)
+  {
+    p->range = newBound;
+    ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
+  }
+  else
+  {
+    p->low += newBound;
+    p->range -= newBound;
+    ttt -= ttt >> kNumMoveBits;
+  }
+  *prob = (CLzmaProb)ttt;
+  if (p->range < kTopValue)
+  {
+    p->range <<= 8;
+    RangeEnc_ShiftLow(p);
+  }
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
+{
+  symbol |= 0x100;
+  do
+  {
+    RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
+    symbol <<= 1;
+  }
+  while (symbol < 0x10000);
+}
+
+static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
+{
+  UInt32 offs = 0x100;
+  symbol |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
+    symbol <<= 1;
+    offs &= ~(matchByte ^ symbol);
+  }
+  while (symbol < 0x10000);
+}
+
+void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
+{
+  UInt32 i;
+  for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
+  {
+    const int kCyclesBits = kNumBitPriceShiftBits;
+    UInt32 w = i;
+    UInt32 bitCount = 0;
+    int j;
+    for (j = 0; j < kCyclesBits; j++)
+    {
+      w = w * w;
+      bitCount <<= 1;
+      while (w >= ((UInt32)1 << 16))
+      {
+        w >>= 1;
+        bitCount++;
+      }
+    }
+    ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+  }
+}
+
+
+#define GET_PRICE(prob, symbol) \
+  p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICEa(prob, symbol) \
+  ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  symbol |= 0x100;
+  do
+  {
+    price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
+    symbol <<= 1;
+  }
+  while (symbol < 0x10000);
+  return price;
+};
+
+static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 offs = 0x100;
+  symbol |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
+    symbol <<= 1;
+    offs &= ~(matchByte ^ symbol);
+  }
+  while (symbol < 0x10000);
+  return price;
+};
+
+
+static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
+{
+  UInt32 m = 1;
+  int i;
+  for (i = numBitLevels; i != 0 ;)
+  {
+    UInt32 bit;
+    i--;
+    bit = (symbol >> i) & 1;
+    RangeEnc_EncodeBit(rc, probs + m, bit);
+    m = (m << 1) | bit;
+  }
+};
+
+static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
+{
+  UInt32 m = 1;
+  int i;
+  for (i = 0; i < numBitLevels; i++)
+  {
+    UInt32 bit = symbol & 1;
+    RangeEnc_EncodeBit(rc, probs + m, bit);
+    m = (m << 1) | bit;
+    symbol >>= 1;
+  }
+}
+
+static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  symbol |= (1 << numBitLevels);
+  while (symbol != 1)
+  {
+    price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
+    symbol >>= 1;
+  }
+  return price;
+}
+
+static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 m = 1;
+  int i;
+  for (i = numBitLevels; i != 0; i--)
+  {
+    UInt32 bit = symbol & 1;
+    symbol >>= 1;
+    price += GET_PRICEa(probs[m], bit);
+    m = (m << 1) | bit;
+  }
+  return price;
+}
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+  unsigned i;
+  p->choice = p->choice2 = kProbInitValue;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
+    p->low[i] = kProbInitValue;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
+    p->mid[i] = kProbInitValue;
+  for (i = 0; i < kLenNumHighSymbols; i++)
+    p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
+{
+  if (symbol < kLenNumLowSymbols)
+  {
+    RangeEnc_EncodeBit(rc, &p->choice, 0);
+    RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
+  }
+  else
+  {
+    RangeEnc_EncodeBit(rc, &p->choice, 1);
+    if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
+    {
+      RangeEnc_EncodeBit(rc, &p->choice2, 0);
+      RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
+    }
+    else
+    {
+      RangeEnc_EncodeBit(rc, &p->choice2, 1);
+      RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
+    }
+  }
+}
+
+static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
+{
+  UInt32 a0 = GET_PRICE_0a(p->choice);
+  UInt32 a1 = GET_PRICE_1a(p->choice);
+  UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
+  UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
+  UInt32 i = 0;
+  for (i = 0; i < kLenNumLowSymbols; i++)
+  {
+    if (i >= numSymbols)
+      return;
+    prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
+  }
+  for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
+  {
+    if (i >= numSymbols)
+      return;
+    prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
+  }
+  for (; i < numSymbols; i++)
+    prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
+}
+
+static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
+{
+  LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
+  p->counters[posState] = p->tableSize;
+}
+
+static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
+{
+  UInt32 posState;
+  for (posState = 0; posState < numPosStates; posState++)
+    LenPriceEnc_UpdateTable(p, posState, ProbPrices);
+}
+
+static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
+{
+  LenEnc_Encode(&p->p, rc, symbol, posState);
+  if (updatePrice)
+    if (--p->counters[posState] == 0)
+      LenPriceEnc_UpdateTable(p, posState, ProbPrices);
+}
+
+
+
+
+static void MovePos(CLzmaEnc *p, UInt32 num)
+{
+  #ifdef SHOW_STAT
+  ttt += num;
+  printf("\n MovePos %d", num);
+  #endif
+  if (num != 0)
+  {
+    p->additionalOffset += num;
+    p->matchFinder.Skip(p->matchFinderObj, num);
+  }
+}
+
+static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
+{
+  UInt32 lenRes = 0, numDistancePairs;
+  numDistancePairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matchDistances);
+  #ifdef SHOW_STAT
+  printf("\n i = %d numPairs = %d    ", ttt, numDistancePairs / 2);
+  if (ttt >= 61994)
+    ttt = ttt;
+
+  ttt++;
+  {
+    UInt32 i;
+  for (i = 0; i < numDistancePairs; i += 2)
+    printf("%2d %6d   | ", p->matchDistances[i], p->matchDistances[i + 1]);
+  }
+  #endif
+  if (numDistancePairs > 0)
+  {
+    lenRes = p->matchDistances[numDistancePairs - 2];
+    if (lenRes == p->numFastBytes)
+    {
+      UInt32 numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) + 1;
+      const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+      UInt32 distance = p->matchDistances[numDistancePairs - 1] + 1;
+      if (numAvail > LZMA_MATCH_LEN_MAX)
+        numAvail = LZMA_MATCH_LEN_MAX;
+
+      {
+        const Byte *pby2 = pby - distance;
+        for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
+      }
+    }
+  }
+  p->additionalOffset++;
+  *numDistancePairsRes = numDistancePairs;
+  return lenRes;
+}
+
+
+#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
+#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
+#define IsShortRep(p) ((p)->backPrev == 0)
+
+static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
+{
+  return
+    GET_PRICE_0(p->isRepG0[state]) +
+    GET_PRICE_0(p->isRep0Long[state][posState]);
+}
+
+static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
+{
+  UInt32 price;
+  if (repIndex == 0)
+  {
+    price = GET_PRICE_0(p->isRepG0[state]);
+    price += GET_PRICE_1(p->isRep0Long[state][posState]);
+  }
+  else
+  {
+    price = GET_PRICE_1(p->isRepG0[state]);
+    if (repIndex == 1)
+      price += GET_PRICE_0(p->isRepG1[state]);
+    else
+    {
+      price += GET_PRICE_1(p->isRepG1[state]);
+      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+    }
+  }
+  return price;
+}
+
+static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
+{
+  return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
+    GetPureRepPrice(p, repIndex, state, posState);
+}
+
+static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
+{
+  UInt32 posMem = p->opt[cur].posPrev;
+  UInt32 backMem = p->opt[cur].backPrev;
+  p->optimumEndIndex = cur;
+  do
+  {
+    if (p->opt[cur].prev1IsChar)
+    {
+      MakeAsChar(&p->opt[posMem])
+      p->opt[posMem].posPrev = posMem - 1;
+      if (p->opt[cur].prev2)
+      {
+        p->opt[posMem - 1].prev1IsChar = False;
+        p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
+        p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
+      }
+    }
+    {
+      UInt32 posPrev = posMem;
+      UInt32 backCur = backMem;
+
+      backMem = p->opt[posPrev].backPrev;
+      posMem = p->opt[posPrev].posPrev;
+
+      p->opt[posPrev].backPrev = backCur;
+      p->opt[posPrev].posPrev = cur;
+      cur = posPrev;
+    }
+  }
+  while (cur != 0);
+  *backRes = p->opt[0].backPrev;
+  p->optimumCurrentIndex  = p->opt[0].posPrev;
+  return p->optimumCurrentIndex;
+}
+
+#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
+
+static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
+{
+  UInt32 numAvailableBytes, lenMain, numDistancePairs;
+  const Byte *data;
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 repLens[LZMA_NUM_REPS];
+  UInt32 repMaxIndex, i;
+  UInt32 *matchDistances;
+  Byte currentByte, matchByte;
+  UInt32 posState;
+  UInt32 matchPrice, repMatchPrice;
+  UInt32 lenEnd;
+  UInt32 len;
+  UInt32 normalMatchPrice;
+  UInt32 cur;
+  if (p->optimumEndIndex != p->optimumCurrentIndex)
+  {
+    const COptimal *opt = &p->opt[p->optimumCurrentIndex];
+    UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
+    *backRes = opt->backPrev;
+    p->optimumCurrentIndex = opt->posPrev;
+    return lenRes;
+  }
+  p->optimumCurrentIndex = p->optimumEndIndex = 0;
+
+  numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+
+  if (!p->longestMatchWasFound)
+  {
+    lenMain = ReadMatchDistances(p, &numDistancePairs);
+  }
+  else
+  {
+    lenMain = p->longestMatchLength;
+    numDistancePairs = p->numDistancePairs;
+    p->longestMatchWasFound = False;
+  }
+
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  if (numAvailableBytes < 2)
+  {
+    *backRes = (UInt32)(-1);
+    return 1;
+  }
+  if (numAvailableBytes > LZMA_MATCH_LEN_MAX)
+    numAvailableBytes = LZMA_MATCH_LEN_MAX;
+
+  repMaxIndex = 0;
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    UInt32 lenTest;
+    const Byte *data2;
+    reps[i] = p->reps[i];
+    data2 = data - (reps[i] + 1);
+    if (data[0] != data2[0] || data[1] != data2[1])
+    {
+      repLens[i] = 0;
+      continue;
+    }
+    for (lenTest = 2; lenTest < numAvailableBytes && data[lenTest] == data2[lenTest]; lenTest++);
+    repLens[i] = lenTest;
+    if (lenTest > repLens[repMaxIndex])
+      repMaxIndex = i;
+  }
+  if (repLens[repMaxIndex] >= p->numFastBytes)
+  {
+    UInt32 lenRes;
+    *backRes = repMaxIndex;
+    lenRes = repLens[repMaxIndex];
+    MovePos(p, lenRes - 1);
+    return lenRes;
+  }
+
+  matchDistances = p->matchDistances;
+  if (lenMain >= p->numFastBytes)
+  {
+    *backRes = matchDistances[numDistancePairs - 1] + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 1);
+    return lenMain;
+  }
+  currentByte = *data;
+  matchByte = *(data - (reps[0] + 1));
+
+  if (lenMain < 2 && currentByte != matchByte && repLens[repMaxIndex] < 2)
+  {
+    *backRes = (UInt32)-1;
+    return 1;
+  }
+
+  p->opt[0].state = (CState)p->state;
+
+  posState = (position & p->pbMask);
+
+  {
+    const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+    p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
+        (!IsCharState(p->state) ?
+          LitEnc_GetPriceMatched(probs, currentByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, currentByte, p->ProbPrices));
+  }
+
+  MakeAsChar(&p->opt[1]);
+
+  matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+  repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+
+  if (matchByte == currentByte)
+  {
+    UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
+    if (shortRepPrice < p->opt[1].price)
+    {
+      p->opt[1].price = shortRepPrice;
+      MakeAsShortRep(&p->opt[1]);
+    }
+  }
+  lenEnd = ((lenMain >= repLens[repMaxIndex]) ? lenMain : repLens[repMaxIndex]);
+
+  if (lenEnd < 2)
+  {
+    *backRes = p->opt[1].backPrev;
+    return 1;
+  }
+
+  p->opt[1].posPrev = 0;
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+    p->opt[0].backs[i] = reps[i];
+
+  len = lenEnd;
+  do
+    p->opt[len--].price = kInfinityPrice;
+  while (len >= 2);
+
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    UInt32 repLen = repLens[i];
+    UInt32 price;
+    if (repLen < 2)
+      continue;
+    price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
+    do
+    {
+      UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
+      COptimal *opt = &p->opt[repLen];
+      if (curAndLenPrice < opt->price)
+      {
+        opt->price = curAndLenPrice;
+        opt->posPrev = 0;
+        opt->backPrev = i;
+        opt->prev1IsChar = False;
+      }
+    }
+    while (--repLen >= 2);
+  }
+
+  normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
+
+  len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
+  if (len <= lenMain)
+  {
+    UInt32 offs = 0;
+    while (len > matchDistances[offs])
+      offs += 2;
+    for (; ; len++)
+    {
+      COptimal *opt;
+      UInt32 distance = matchDistances[offs + 1];
+
+      UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
+      UInt32 lenToPosState = GetLenToPosState(len);
+      if (distance < kNumFullDistances)
+        curAndLenPrice += p->distancesPrices[lenToPosState][distance];
+      else
+      {
+        UInt32 slot;
+        GetPosSlot2(distance, slot);
+        curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
+      }
+      opt = &p->opt[len];
+      if (curAndLenPrice < opt->price)
+      {
+        opt->price = curAndLenPrice;
+        opt->posPrev = 0;
+        opt->backPrev = distance + LZMA_NUM_REPS;
+        opt->prev1IsChar = False;
+      }
+      if (len == matchDistances[offs])
+      {
+        offs += 2;
+        if (offs == numDistancePairs)
+          break;
+      }
+    }
+  }
+
+  cur = 0;
+
+    #ifdef SHOW_STAT2
+    if (position >= 0)
+    {
+      unsigned i;
+      printf("\n pos = %4X", position);
+      for (i = cur; i <= lenEnd; i++)
+      printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
+    }
+    #endif
+
+  for (;;)
+  {
+    UInt32 numAvailableBytesFull, newLen, numDistancePairs;
+    COptimal *curOpt;
+    UInt32 posPrev;
+    UInt32 state;
+    UInt32 curPrice;
+    Bool nextIsChar;
+    const Byte *data;
+    Byte currentByte, matchByte;
+    UInt32 posState;
+    UInt32 curAnd1Price;
+    COptimal *nextOpt;
+    UInt32 matchPrice, repMatchPrice;
+    UInt32 numAvailableBytes;
+    UInt32 startLen;
+
+    cur++;
+    if (cur == lenEnd)
+      return Backward(p, backRes, cur);
+
+    numAvailableBytesFull = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+    newLen = ReadMatchDistances(p, &numDistancePairs);
+    if (newLen >= p->numFastBytes)
+    {
+      p->numDistancePairs = numDistancePairs;
+      p->longestMatchLength = newLen;
+      p->longestMatchWasFound = True;
+      return Backward(p, backRes, cur);
+    }
+    position++;
+    curOpt = &p->opt[cur];
+    posPrev = curOpt->posPrev;
+    if (curOpt->prev1IsChar)
+    {
+      posPrev--;
+      if (curOpt->prev2)
+      {
+        state = p->opt[curOpt->posPrev2].state;
+        if (curOpt->backPrev2 < LZMA_NUM_REPS)
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
+      else
+        state = p->opt[posPrev].state;
+      state = kLiteralNextStates[state];
+    }
+    else
+      state = p->opt[posPrev].state;
+    if (posPrev == cur - 1)
+    {
+      if (IsShortRep(curOpt))
+        state = kShortRepNextStates[state];
+      else
+        state = kLiteralNextStates[state];
+    }
+    else
+    {
+      UInt32 pos;
+      const COptimal *prevOpt;
+      if (curOpt->prev1IsChar && curOpt->prev2)
+      {
+        posPrev = curOpt->posPrev2;
+        pos = curOpt->backPrev2;
+        state = kRepNextStates[state];
+      }
+      else
+      {
+        pos = curOpt->backPrev;
+        if (pos < LZMA_NUM_REPS)
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
+      prevOpt = &p->opt[posPrev];
+      if (pos < LZMA_NUM_REPS)
+      {
+        UInt32 i;
+        reps[0] = prevOpt->backs[pos];
+        for (i = 1; i <= pos; i++)
+          reps[i] = prevOpt->backs[i - 1];
+        for (; i < LZMA_NUM_REPS; i++)
+          reps[i] = prevOpt->backs[i];
+      }
+      else
+      {
+        UInt32 i;
+        reps[0] = (pos - LZMA_NUM_REPS);
+        for (i = 1; i < LZMA_NUM_REPS; i++)
+          reps[i] = prevOpt->backs[i - 1];
+      }
+    }
+    curOpt->state = (CState)state;
+
+    curOpt->backs[0] = reps[0];
+    curOpt->backs[1] = reps[1];
+    curOpt->backs[2] = reps[2];
+    curOpt->backs[3] = reps[3];
+
+    curPrice = curOpt->price;
+    nextIsChar = False;
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    currentByte = *data;
+    matchByte = *(data - (reps[0] + 1));
+
+    posState = (position & p->pbMask);
+
+    curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
+    {
+      const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+      curAnd1Price +=
+        (!IsCharState(state) ?
+          LitEnc_GetPriceMatched(probs, currentByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, currentByte, p->ProbPrices));
+    }
+
+    nextOpt = &p->opt[cur + 1];
+
+    if (curAnd1Price < nextOpt->price)
+    {
+      nextOpt->price = curAnd1Price;
+      nextOpt->posPrev = cur;
+      MakeAsChar(nextOpt);
+      nextIsChar = True;
+    }
+
+    matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
+    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+
+    if (matchByte == currentByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
+    {
+      UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
+      if (shortRepPrice <= nextOpt->price)
+      {
+        nextOpt->price = shortRepPrice;
+        nextOpt->posPrev = cur;
+        MakeAsShortRep(nextOpt);
+        nextIsChar = True;
+      }
+    }
+
+    {
+      UInt32 temp = kNumOpts - 1 - cur;
+      if (temp <  numAvailableBytesFull)
+        numAvailableBytesFull = temp;
+    }
+    numAvailableBytes = numAvailableBytesFull;
+
+    if (numAvailableBytes < 2)
+      continue;
+    if (numAvailableBytes > p->numFastBytes)
+      numAvailableBytes = p->numFastBytes;
+    if (!nextIsChar && matchByte != currentByte) /* speed optimization */
+    {
+      /* try Literal + rep0 */
+      UInt32 temp;
+      UInt32 lenTest2;
+      const Byte *data2 = data - (reps[0] + 1);
+      UInt32 limit = p->numFastBytes + 1;
+      if (limit > numAvailableBytesFull)
+        limit = numAvailableBytesFull;
+
+      for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
+      lenTest2 = temp - 1;
+      if (lenTest2 >= 2)
+      {
+        UInt32 state2 = kLiteralNextStates[state];
+        UInt32 posStateNext = (position + 1) & p->pbMask;
+        UInt32 nextRepMatchPrice = curAnd1Price +
+            GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+            GET_PRICE_1(p->isRep[state2]);
+        /* for (; lenTest2 >= 2; lenTest2--) */
+        {
+          UInt32 curAndLenPrice;
+          COptimal *opt;
+          UInt32 offset = cur + 1 + lenTest2;
+          while (lenEnd < offset)
+            p->opt[++lenEnd].price = kInfinityPrice;
+          curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+          opt = &p->opt[offset];
+          if (curAndLenPrice < opt->price)
+          {
+            opt->price = curAndLenPrice;
+            opt->posPrev = cur + 1;
+            opt->backPrev = 0;
+            opt->prev1IsChar = True;
+            opt->prev2 = False;
+          }
+        }
+      }
+    }
+
+    startLen = 2; /* speed optimization */
+    {
+    UInt32 repIndex;
+    for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
+    {
+      UInt32 lenTest;
+      UInt32 lenTestTemp;
+      UInt32 price;
+      const Byte *data2 = data - (reps[repIndex] + 1);
+      if (data[0] != data2[0] || data[1] != data2[1])
+        continue;
+      for (lenTest = 2; lenTest < numAvailableBytes && data[lenTest] == data2[lenTest]; lenTest++);
+      while (lenEnd < cur + lenTest)
+        p->opt[++lenEnd].price = kInfinityPrice;
+      lenTestTemp = lenTest;
+      price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
+      do
+      {
+        UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
+        COptimal *opt = &p->opt[cur + lenTest];
+        if (curAndLenPrice < opt->price)
+        {
+          opt->price = curAndLenPrice;
+          opt->posPrev = cur;
+          opt->backPrev = repIndex;
+          opt->prev1IsChar = False;
+        }
+      }
+      while (--lenTest >= 2);
+      lenTest = lenTestTemp;
+
+      if (repIndex == 0)
+        startLen = lenTest + 1;
+
+      /* if (_maxMode) */
+        {
+          UInt32 lenTest2 = lenTest + 1;
+          UInt32 limit = lenTest2 + p->numFastBytes;
+          UInt32 nextRepMatchPrice;
+          if (limit > numAvailableBytesFull)
+            limit = numAvailableBytesFull;
+          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
+          lenTest2 -= lenTest + 1;
+          if (lenTest2 >= 2)
+          {
+            UInt32 state2 = kRepNextStates[state];
+            UInt32 posStateNext = (position + lenTest) & p->pbMask;
+            UInt32 curAndLenCharPrice =
+                price + p->repLenEnc.prices[posState][lenTest - 2] +
+                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
+                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
+                    data[lenTest], data2[lenTest], p->ProbPrices);
+            state2 = kLiteralNextStates[state2];
+            posStateNext = (position + lenTest + 1) & p->pbMask;
+            nextRepMatchPrice = curAndLenCharPrice +
+                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+                GET_PRICE_1(p->isRep[state2]);
+
+            /* for (; lenTest2 >= 2; lenTest2--) */
+            {
+              UInt32 curAndLenPrice;
+              COptimal *opt;
+              UInt32 offset = cur + lenTest + 1 + lenTest2;
+              while (lenEnd < offset)
+                p->opt[++lenEnd].price = kInfinityPrice;
+              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+              opt = &p->opt[offset];
+              if (curAndLenPrice < opt->price)
+              {
+                opt->price = curAndLenPrice;
+                opt->posPrev = cur + lenTest + 1;
+                opt->backPrev = 0;
+                opt->prev1IsChar = True;
+                opt->prev2 = True;
+                opt->posPrev2 = cur;
+                opt->backPrev2 = repIndex;
+              }
+            }
+          }
+        }
+    }
+    }
+    /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
+    if (newLen > numAvailableBytes)
+    {
+      newLen = numAvailableBytes;
+      for (numDistancePairs = 0; newLen > matchDistances[numDistancePairs]; numDistancePairs += 2);
+      matchDistances[numDistancePairs] = newLen;
+      numDistancePairs += 2;
+    }
+    if (newLen >= startLen)
+    {
+      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
+      UInt32 offs, curBack, posSlot;
+      UInt32 lenTest;
+      while (lenEnd < cur + newLen)
+        p->opt[++lenEnd].price = kInfinityPrice;
+
+      offs = 0;
+      while (startLen > matchDistances[offs])
+        offs += 2;
+      curBack = matchDistances[offs + 1];
+      GetPosSlot2(curBack, posSlot);
+      for (lenTest = /*2*/ startLen; ; lenTest++)
+      {
+        UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
+        UInt32 lenToPosState = GetLenToPosState(lenTest);
+        COptimal *opt;
+        if (curBack < kNumFullDistances)
+          curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
+        else
+          curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
+
+        opt = &p->opt[cur + lenTest];
+        if (curAndLenPrice < opt->price)
+        {
+          opt->price = curAndLenPrice;
+          opt->posPrev = cur;
+          opt->backPrev = curBack + LZMA_NUM_REPS;
+          opt->prev1IsChar = False;
+        }
+
+        if (/*_maxMode && */lenTest == matchDistances[offs])
+        {
+          /* Try Match + Literal + Rep0 */
+          const Byte *data2 = data - (curBack + 1);
+          UInt32 lenTest2 = lenTest + 1;
+          UInt32 limit = lenTest2 + p->numFastBytes;
+          UInt32 nextRepMatchPrice;
+          if (limit > numAvailableBytesFull)
+            limit = numAvailableBytesFull;
+          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
+          lenTest2 -= lenTest + 1;
+          if (lenTest2 >= 2)
+          {
+            UInt32 state2 = kMatchNextStates[state];
+            UInt32 posStateNext = (position + lenTest) & p->pbMask;
+            UInt32 curAndLenCharPrice = curAndLenPrice +
+                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
+                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
+                    data[lenTest], data2[lenTest], p->ProbPrices);
+            state2 = kLiteralNextStates[state2];
+            posStateNext = (posStateNext + 1) & p->pbMask;
+            nextRepMatchPrice = curAndLenCharPrice +
+                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+                GET_PRICE_1(p->isRep[state2]);
+
+            /* for (; lenTest2 >= 2; lenTest2--) */
+            {
+              UInt32 offset = cur + lenTest + 1 + lenTest2;
+              UInt32 curAndLenPrice;
+              COptimal *opt;
+              while (lenEnd < offset)
+                p->opt[++lenEnd].price = kInfinityPrice;
+              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+              opt = &p->opt[offset];
+              if (curAndLenPrice < opt->price)
+              {
+                opt->price = curAndLenPrice;
+                opt->posPrev = cur + lenTest + 1;
+                opt->backPrev = 0;
+                opt->prev1IsChar = True;
+                opt->prev2 = True;
+                opt->posPrev2 = cur;
+                opt->backPrev2 = curBack + LZMA_NUM_REPS;
+              }
+            }
+          }
+          offs += 2;
+          if (offs == numDistancePairs)
+            break;
+          curBack = matchDistances[offs + 1];
+          if (curBack >= kNumFullDistances)
+            GetPosSlot2(curBack, posSlot);
+        }
+      }
+    }
+  }
+}
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
+static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
+{
+  UInt32 numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+  UInt32 lenMain, numDistancePairs;
+  const Byte *data;
+  UInt32 repLens[LZMA_NUM_REPS];
+  UInt32 repMaxIndex, i;
+  UInt32 *matchDistances;
+  UInt32 backMain;
+
+  if (!p->longestMatchWasFound)
+  {
+    lenMain = ReadMatchDistances(p, &numDistancePairs);
+  }
+  else
+  {
+    lenMain = p->longestMatchLength;
+    numDistancePairs = p->numDistancePairs;
+    p->longestMatchWasFound = False;
+  }
+
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  if (numAvailableBytes > LZMA_MATCH_LEN_MAX)
+    numAvailableBytes = LZMA_MATCH_LEN_MAX;
+  if (numAvailableBytes < 2)
+  {
+    *backRes = (UInt32)(-1);
+    return 1;
+  }
+
+  repMaxIndex = 0;
+
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    const Byte *data2 = data - (p->reps[i] + 1);
+    UInt32 len;
+    if (data[0] != data2[0] || data[1] != data2[1])
+    {
+      repLens[i] = 0;
+      continue;
+    }
+    for (len = 2; len < numAvailableBytes && data[len] == data2[len]; len++);
+    if (len >= p->numFastBytes)
+    {
+      *backRes = i;
+      MovePos(p, len - 1);
+      return len;
+    }
+    repLens[i] = len;
+    if (len > repLens[repMaxIndex])
+      repMaxIndex = i;
+  }
+  matchDistances = p->matchDistances;
+  if (lenMain >= p->numFastBytes)
+  {
+    *backRes = matchDistances[numDistancePairs - 1] + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 1);
+    return lenMain;
+  }
+
+  backMain = 0; /* for GCC */
+  if (lenMain >= 2)
+  {
+    backMain = matchDistances[numDistancePairs - 1];
+    while (numDistancePairs > 2 && lenMain == matchDistances[numDistancePairs - 4] + 1)
+    {
+      if (!ChangePair(matchDistances[numDistancePairs - 3], backMain))
+        break;
+      numDistancePairs -= 2;
+      lenMain = matchDistances[numDistancePairs - 2];
+      backMain = matchDistances[numDistancePairs - 1];
+    }
+    if (lenMain == 2 && backMain >= 0x80)
+      lenMain = 1;
+  }
+
+  if (repLens[repMaxIndex] >= 2)
+  {
+    if (repLens[repMaxIndex] + 1 >= lenMain ||
+        (repLens[repMaxIndex] + 2 >= lenMain && (backMain > (1 << 9))) ||
+        (repLens[repMaxIndex] + 3 >= lenMain && (backMain > (1 << 15))))
+    {
+      UInt32 lenRes;
+      *backRes = repMaxIndex;
+      lenRes = repLens[repMaxIndex];
+      MovePos(p, lenRes - 1);
+      return lenRes;
+    }
+  }
+
+  if (lenMain >= 2 && numAvailableBytes > 2)
+  {
+    UInt32 i;
+    numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+    p->longestMatchLength = ReadMatchDistances(p, &p->numDistancePairs);
+    if (p->longestMatchLength >= 2)
+    {
+      UInt32 newDistance = matchDistances[p->numDistancePairs - 1];
+      if ((p->longestMatchLength >= lenMain && newDistance < backMain) ||
+          (p->longestMatchLength == lenMain + 1 && !ChangePair(backMain, newDistance)) ||
+          (p->longestMatchLength > lenMain + 1) ||
+          (p->longestMatchLength + 1 >= lenMain && lenMain >= 3 && ChangePair(newDistance, backMain)))
+      {
+        p->longestMatchWasFound = True;
+        *backRes = (UInt32)(-1);
+        return 1;
+      }
+    }
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    for (i = 0; i < LZMA_NUM_REPS; i++)
+    {
+      UInt32 len;
+      const Byte *data2 = data - (p->reps[i] + 1);
+      if (data[1] != data2[1] || data[2] != data2[2])
+      {
+        repLens[i] = 0;
+        continue;
+      }
+      for (len = 2; len < numAvailableBytes && data[len] == data2[len]; len++);
+      if (len + 1 >= lenMain)
+      {
+        p->longestMatchWasFound = True;
+        *backRes = (UInt32)(-1);
+        return 1;
+      }
+    }
+    *backRes = backMain + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 2);
+    return lenMain;
+  }
+  *backRes = (UInt32)(-1);
+  return 1;
+}
+
+static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
+{
+  UInt32 len;
+  RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
+  RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
+  p->state = kMatchNextStates[p->state];
+  len = LZMA_MATCH_LEN_MIN;
+  LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+  RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
+  RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
+  RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
+}
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+  if (p->result != SZ_OK)
+    return p->result;
+  if (p->rc.res != SZ_OK)
+    p->result = SZ_ERROR_WRITE;
+  if (p->matchFinderBase.result != SZ_OK)
+    p->result = SZ_ERROR_READ;
+  if (p->result != SZ_OK)
+    p->finished = True;
+  return p->result;
+}
+
+static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+{
+  /* ReleaseMFStream(); */
+  p->finished = True;
+  if (p->writeEndMark)
+    WriteEndMarker(p, nowPos & p->pbMask);
+  RangeEnc_FlushData(&p->rc);
+  RangeEnc_FlushStream(&p->rc);
+  return CheckErrors(p);
+}
+
+static void FillAlignPrices(CLzmaEnc *p)
+{
+  UInt32 i;
+  for (i = 0; i < kAlignTableSize; i++)
+    p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+  p->alignPriceCount = 0;
+}
+
+static void FillDistancesPrices(CLzmaEnc *p)
+{
+  UInt32 tempPrices[kNumFullDistances];
+  UInt32 i, lenToPosState;
+  for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
+  {
+    UInt32 posSlot = GetPosSlot1(i);
+    UInt32 footerBits = ((posSlot >> 1) - 1);
+    UInt32 base = ((2 | (posSlot & 1)) << footerBits);
+    tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
+  }
+
+  for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
+  {
+    UInt32 posSlot;
+    const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
+    UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
+    for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
+      posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
+    for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
+      posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+
+    {
+      UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
+      UInt32 i;
+      for (i = 0; i < kStartPosModelIndex; i++)
+        distancesPrices[i] = posSlotPrices[i];
+      for (; i < kNumFullDistances; i++)
+        distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
+    }
+  }
+  p->matchPriceCount = 0;
+}
+
+void LzmaEnc_Construct(CLzmaEnc *p)
+{
+  RangeEnc_Construct(&p->rc);
+  MatchFinder_Construct(&p->matchFinderBase);
+  #ifdef COMPRESS_MF_MT
+  MatchFinderMt_Construct(&p->matchFinderMt);
+  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
+  #endif
+
+  {
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    LzmaEnc_SetProps(p, &props);
+  }
+
+  #ifndef LZMA_LOG_BSR
+  LzmaEnc_FastPosInit(p->g_FastPos);
+  #endif
+
+  LzmaEnc_InitPriceTables(p->ProbPrices);
+  p->litProbs = 0;
+  p->saveState.litProbs = 0;
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
+{
+  void *p;
+  p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
+  if (p != 0)
+    LzmaEnc_Construct((CLzmaEnc *)p);
+  return p;
+}
+
+void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->litProbs);
+  alloc->Free(alloc, p->saveState.litProbs);
+  p->litProbs = 0;
+  p->saveState.litProbs = 0;
+}
+
+void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  #ifdef COMPRESS_MF_MT
+  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+  #endif
+  MatchFinder_Free(&p->matchFinderBase, allocBig);
+  LzmaEnc_FreeLits(p, alloc);
+  RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  alloc->Free(alloc, p);
+}
+
+static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
+{
+  UInt32 nowPos32, startPos32;
+  if (p->inStream != 0)
+  {
+    p->matchFinderBase.stream = p->inStream;
+    p->matchFinder.Init(p->matchFinderObj);
+    p->inStream = 0;
+  }
+
+  if (p->finished)
+    return p->result;
+  RINOK(CheckErrors(p));
+
+  nowPos32 = (UInt32)p->nowPos64;
+  startPos32 = nowPos32;
+
+  if (p->nowPos64 == 0)
+  {
+    UInt32 numDistancePairs;
+    Byte curByte;
+    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+      return Flush(p, nowPos32);
+    ReadMatchDistances(p, &numDistancePairs);
+    RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
+    p->state = kLiteralNextStates[p->state];
+    curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
+    LitEnc_Encode(&p->rc, p->litProbs, curByte);
+    p->additionalOffset--;
+    nowPos32++;
+  }
+
+  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
+  for (;;)
+  {
+    UInt32 pos, len, posState;
+
+    if (p->fastMode)
+      len = GetOptimumFast(p, &pos);
+    else
+      len = GetOptimum(p, nowPos32, &pos);
+
+    #ifdef SHOW_STAT2
+    printf("\n pos = %4X,   len = %d   pos = %d", nowPos32, len, pos);
+    #endif
+
+    posState = nowPos32 & p->pbMask;
+    if (len == 1 && pos == 0xFFFFFFFF)
+    {
+      Byte curByte;
+      CLzmaProb *probs;
+      const Byte *data;
+
+      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
+      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+      curByte = *data;
+      probs = LIT_PROBS(nowPos32, *(data - 1));
+      if (IsCharState(p->state))
+        LitEnc_Encode(&p->rc, probs, curByte);
+      else
+        LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
+      p->state = kLiteralNextStates[p->state];
+    }
+    else
+    {
+      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
+      if (pos < LZMA_NUM_REPS)
+      {
+        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
+        if (pos == 0)
+        {
+          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
+          RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
+        }
+        else
+        {
+          UInt32 distance = p->reps[pos];
+          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
+          if (pos == 1)
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
+          else
+          {
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
+            if (pos == 3)
+              p->reps[3] = p->reps[2];
+            p->reps[2] = p->reps[1];
+          }
+          p->reps[1] = p->reps[0];
+          p->reps[0] = distance;
+        }
+        if (len == 1)
+          p->state = kShortRepNextStates[p->state];
+        else
+        {
+          LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+          p->state = kRepNextStates[p->state];
+        }
+      }
+      else
+      {
+        UInt32 posSlot;
+        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
+        p->state = kMatchNextStates[p->state];
+        LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+        pos -= LZMA_NUM_REPS;
+        GetPosSlot(pos, posSlot);
+        RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
+
+        if (posSlot >= kStartPosModelIndex)
+        {
+          UInt32 footerBits = ((posSlot >> 1) - 1);
+          UInt32 base = ((2 | (posSlot & 1)) << footerBits);
+          UInt32 posReduced = pos - base;
+
+          if (posSlot < kEndPosModelIndex)
+            RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
+          else
+          {
+            RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
+            RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
+            p->alignPriceCount++;
+          }
+        }
+        p->reps[3] = p->reps[2];
+        p->reps[2] = p->reps[1];
+        p->reps[1] = p->reps[0];
+        p->reps[0] = pos;
+        p->matchPriceCount++;
+      }
+    }
+    p->additionalOffset -= len;
+    nowPos32 += len;
+    if (p->additionalOffset == 0)
+    {
+      UInt32 processed;
+      if (!p->fastMode)
+      {
+        if (p->matchPriceCount >= (1 << 7))
+          FillDistancesPrices(p);
+        if (p->alignPriceCount >= kAlignTableSize)
+          FillAlignPrices(p);
+      }
+      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+        break;
+      processed = nowPos32 - startPos32;
+      if (useLimits)
+      {
+        if (processed + kNumOpts + 300 >= maxUnpackSize ||
+            RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
+          break;
+      }
+      else if (processed >= (1 << 15))
+      {
+        p->nowPos64 += nowPos32 - startPos32;
+        return CheckErrors(p);
+      }
+    }
+  }
+  p->nowPos64 += nowPos32 - startPos32;
+  return Flush(p, nowPos32);
+}
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  UInt32 beforeSize = kNumOpts;
+  Bool btMode;
+  if (!RangeEnc_Alloc(&p->rc, alloc))
+    return SZ_ERROR_MEM;
+  btMode = (p->matchFinderBase.btMode != 0);
+  #ifdef COMPRESS_MF_MT
+  p->mtMode = (p->multiThread && !p->fastMode && btMode);
+  #endif
+
+  {
+    unsigned lclp = p->lc + p->lp;
+    if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
+    {
+      LzmaEnc_FreeLits(p, alloc);
+      p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
+      p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
+      if (p->litProbs == 0 || p->saveState.litProbs == 0)
+      {
+        LzmaEnc_FreeLits(p, alloc);
+        return SZ_ERROR_MEM;
+      }
+      p->lclp = lclp;
+    }
+  }
+
+  p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
+
+  if (beforeSize + p->dictSize < keepWindowSize)
+    beforeSize = keepWindowSize - p->dictSize;
+
+  #ifdef COMPRESS_MF_MT
+  if (p->mtMode)
+  {
+    RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
+    p->matchFinderObj = &p->matchFinderMt;
+    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+  }
+  else
+  #endif
+  {
+    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+      return SZ_ERROR_MEM;
+    p->matchFinderObj = &p->matchFinderBase;
+    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+  }
+  return SZ_OK;
+}
+
+void LzmaEnc_Init(CLzmaEnc *p)
+{
+  UInt32 i;
+  p->state = 0;
+  for(i = 0 ; i < LZMA_NUM_REPS; i++)
+    p->reps[i] = 0;
+
+  RangeEnc_Init(&p->rc);
+
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    UInt32 j;
+    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+    {
+      p->isMatch[i][j] = kProbInitValue;
+      p->isRep0Long[i][j] = kProbInitValue;
+    }
+    p->isRep[i] = kProbInitValue;
+    p->isRepG0[i] = kProbInitValue;
+    p->isRepG1[i] = kProbInitValue;
+    p->isRepG2[i] = kProbInitValue;
+  }
+
+  {
+    UInt32 num = 0x300 << (p->lp + p->lc);
+    for (i = 0; i < num; i++)
+      p->litProbs[i] = kProbInitValue;
+  }
+
+  {
+    for (i = 0; i < kNumLenToPosStates; i++)
+    {
+      CLzmaProb *probs = p->posSlotEncoder[i];
+      UInt32 j;
+      for (j = 0; j < (1 << kNumPosSlotBits); j++)
+        probs[j] = kProbInitValue;
+    }
+  }
+  {
+    for(i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
+      p->posEncoders[i] = kProbInitValue;
+  }
+
+  LenEnc_Init(&p->lenEnc.p);
+  LenEnc_Init(&p->repLenEnc.p);
+
+  for (i = 0; i < (1 << kNumAlignBits); i++)
+    p->posAlignEncoder[i] = kProbInitValue;
+
+  p->longestMatchWasFound = False;
+  p->optimumEndIndex = 0;
+  p->optimumCurrentIndex = 0;
+  p->additionalOffset = 0;
+
+  p->pbMask = (1 << p->pb) - 1;
+  p->lpMask = (1 << p->lp) - 1;
+}
+
+void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+  if (!p->fastMode)
+  {
+    FillDistancesPrices(p);
+    FillAlignPrices(p);
+  }
+
+  p->lenEnc.tableSize =
+  p->repLenEnc.tableSize =
+      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  UInt32 i;
+  for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
+    if (p->dictSize <= ((UInt32)1 << i))
+      break;
+  p->distTableSize = i * 2;
+
+  p->finished = False;
+  p->result = SZ_OK;
+  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
+  LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  p->nowPos64 = 0;
+  return SZ_OK;
+}
+
+static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->inStream = inStream;
+  p->rc.outStream = outStream;
+  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
+    ISeqInStream *inStream, UInt32 keepWindowSize,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->inStream = inStream;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+{
+  p->seqBufInStream.funcTable.Read = MyRead;
+  p->seqBufInStream.data = src;
+  p->seqBufInStream.rem = srcLen;
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+  p->inStream = &p->seqBufInStream.funcTable;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle pp)
+{
+  #ifdef COMPRESS_MF_MT
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  if (p->mtMode)
+    MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+  #endif
+}
+
+typedef struct _CSeqOutStreamBuf
+{
+  ISeqOutStream funcTable;
+  Byte *data;
+  SizeT rem;
+  Bool overflow;
+} CSeqOutStreamBuf;
+
+static size_t MyWrite(void *pp, const void *data, size_t size)
+{
+  CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
+  if (p->rem < size)
+  {
+    size = p->rem;
+    p->overflow = True;
+  }
+  memcpy(p->data, data, size);
+  p->rem -= size;
+  p->data += size;
+  return size;
+}
+
+
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
+    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  UInt64 nowPos64;
+  SRes res;
+  CSeqOutStreamBuf outStream;
+
+  outStream.funcTable.Write = MyWrite;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = False;
+  p->finished = False;
+  p->result = SZ_OK;
+
+  if (reInit)
+    LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  nowPos64 = p->nowPos64;
+  RangeEnc_Init(&p->rc);
+  p->rc.outStream = &outStream.funcTable;
+
+  res = LzmaEnc_CodeOneBlock(pp, True, desiredPackSize, *unpackSize);
+
+  *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+
+  return res;
+}
+
+SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  SRes res = SZ_OK;
+
+  #ifdef COMPRESS_MF_MT
+  Byte allocaDummy[0x300];
+  int i = 0;
+  for (i = 0; i < 16; i++)
+    allocaDummy[i] = (Byte)i;
+  #endif
+
+  RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig));
+
+  for (;;)
+  {
+    res = LzmaEnc_CodeOneBlock(pp, False, 0, 0);
+    if (res != SZ_OK || p->finished != 0)
+      break;
+    if (progress != 0)
+    {
+      res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+      if (res != SZ_OK)
+      {
+        res = SZ_ERROR_PROGRESS;
+        break;
+      }
+    }
+  }
+  LzmaEnc_Finish(pp);
+  return res;
+}
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  int i;
+  UInt32 dictSize = p->dictSize;
+  if (*size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_PARAM;
+  *size = LZMA_PROPS_SIZE;
+  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+  for (i = 11; i <= 30; i++)
+  {
+    if (dictSize <= ((UInt32)2 << i))
+    {
+      dictSize = (2 << i);
+      break;
+    }
+    if (dictSize <= ((UInt32)3 << i))
+    {
+      dictSize = (3 << i);
+      break;
+    }
+  }
+
+  for (i = 0; i < 4; i++)
+    props[1 + i] = (Byte)(dictSize >> (8 * i));
+  return SZ_OK;
+}
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  SRes res;
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+
+  CSeqOutStreamBuf outStream;
+
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+
+  outStream.funcTable.Write = MyWrite;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = writeEndMark;
+  res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable,
+      progress, alloc, allocBig);
+
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+  return res;
+}
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
+  SRes res;
+  if (p == 0)
+    return SZ_ERROR_MEM;
+
+  res = LzmaEnc_SetProps(p, props);
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+    if (res == SZ_OK)
+      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+          writeEndMark, progress, alloc, allocBig);
+  }
+
+  LzmaEnc_Destroy(p, alloc, allocBig);
+  return res;
+}
diff --git a/util/i386/pc/grub-mkimage.c b/util/i386/pc/grub-mkimage.c
index 189ec4e..e604acb 100644
--- a/util/i386/pc/grub-mkimage.c
+++ b/util/i386/pc/grub-mkimage.c
@@ -36,12 +36,22 @@
 #define _GNU_SOURCE	1
 #include <getopt.h>
 
+#if defined(ENABLE_LZO)
+
 #if defined(HAVE_LZO_LZO1X_H)
 # include <lzo/lzo1x.h>
 #elif defined(HAVE_LZO1X_H)
 # include <lzo1x.h>
 #endif
 
+#else
+
+#include <grub/lib/LzmaEnc.h>
+
+#endif
+
+#if defined(ENABLE_LZO)
+
 static void
 compress_kernel (char *kernel_img, size_t kernel_size,
 		 char **core_img, size_t *core_size)
@@ -76,6 +86,48 @@ compress_kernel (char *kernel_img, size_t kernel_size,
   *core_size = (size_t) size + GRUB_KERNEL_MACHINE_RAW_SIZE;
 }
 
+#else
+
+static void *SzAlloc(void *p, size_t size) { p = p; return xmalloc(size); }
+static void SzFree(void *p, void *address) { p = p; free(address); }
+static ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+static void
+compress_kernel (char *kernel_img, size_t kernel_size,
+		 char **core_img, size_t *core_size)
+{
+  CLzmaEncProps props;
+  unsigned char out_props[5];
+  size_t out_props_size = 5;
+
+  LzmaEncProps_Init(&props);
+  props.dictSize = 1 << 16;
+  props.lc = 3;
+  props.lp = 0;
+  props.pb = 2;
+  props.numThreads = 1;
+
+  grub_util_info ("kernel_img=%p, kernel_size=0x%x", kernel_img, kernel_size);
+  if (kernel_size < GRUB_KERNEL_MACHINE_RAW_SIZE)
+    grub_util_error ("the core image is too small");
+
+  *core_img = xmalloc (kernel_size);
+  memcpy (*core_img, kernel_img, GRUB_KERNEL_MACHINE_RAW_SIZE);
+
+  *core_size = kernel_size - GRUB_KERNEL_MACHINE_RAW_SIZE;
+  if (LzmaEncode((unsigned char *) *core_img + GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 core_size,
+                 (unsigned char *) kernel_img + GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 kernel_size - GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 &props, out_props, &out_props_size,
+                 0, NULL, &g_Alloc, &g_Alloc) != SZ_OK)
+    grub_util_error ("cannot compress the kernel image");
+
+  *core_size += GRUB_KERNEL_MACHINE_RAW_SIZE;
+}
+
+#endif
+
 static void
 generate_image (const char *dir, char *prefix, FILE *out, char *mods[], char *memdisk_path)
 {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 13:39 [PATCH] LZMA support in i386-pc kernel Bean
@ 2008-07-02 14:42 ` Robert Millan
  2008-07-02 15:11   ` Bean
  2008-07-02 17:50 ` Vesa Jääskeläinen
  2008-07-03 19:37 ` Isaac Dupree
  2 siblings, 1 reply; 14+ messages in thread
From: Robert Millan @ 2008-07-02 14:42 UTC (permalink / raw)
  To: The development of GRUB 2

On Wed, Jul 02, 2008 at 09:39:52PM +0800, Bean wrote:
> Hi,
> 
> This patch add support for lzma decompression. The assembly code
> lzma_decode.S is manually optimized to reduce size. The result decoder
> is tiny, only 416 bytes longer than the lzo version.
> 
> I also include lzma encode from the LZMA SDK. grub needs to use the
> ANSI-C version of encoder, which is only present in the latest 4.58
> beta. I can't find ready to use shared library in most distro.
> Including the encoder/decoder has advantages as well. We don't need to
> worry about the host os, and lzma encoder/decoder can be used in other
> place, like font compression.

Lenny's most likely going to ship with lzma 4.43.  Is that good?

> Result of
> ./grub-mkimage -o core.img biosdisk pc ext2 lvm raid
> 
> lzma version: 27,776 bytes
> lzo version: 32,768 bytes

Amazing.  I'd really like this to make it in time for lenny;  although we're
really pressed for time now.

> diff --git a/include/grub/lib/LzFind.h b/include/grub/lib/LzFind.h
> new file mode 100644
> index 0000000..3d74d0c
> --- /dev/null
> +++ b/include/grub/lib/LzFind.h
> @@ -0,0 +1,116 @@
> +/* LzFind.h  -- Match finder for LZ algorithms
> +2008-04-04
> +Copyright (c) 1999-2008 Igor Pavlov

Have you modified the files from LZMA SDK?  Please add our license boilerplate
to them if you did (e.g. like in kern/i386/pc/lzo1x.S).

> +#define FIXED_PROPS
> [...]
> +#ifdef FIXED_PROPS

What's this option for?  Some comment would be nice.

> +#if 0
> +
> +DbgOut:

This is just for debugging, right?  Maybe "#ifdef DEBUG" or so would be
better, to make it clear.

> +#ifndef ASM_FILE
> +	xorl	%eax, %eax
> +#endif

Why?  I thought ASM_FILE always ought to be defined for asm files in GRUB.

> diff --git a/kern/i386/pc/startup.S b/kern/i386/pc/startup.S
> index 9542978..964643a 100644
> --- a/kern/i386/pc/startup.S
> +++ b/kern/i386/pc/startup.S
> @@ -200,6 +200,7 @@ codestart:
>  	incl	%eax
>  	call	EXT_C(grub_gate_a20)
>  	
> +#if defined(ENABLE_LZO)
>  	/* decompress the compressed part and put the result at 1MB */
>  	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %esi
>  	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %edi
> @@ -213,6 +214,23 @@ codestart:
>  	/* copy back the decompressed part */
>  	movl	%eax, %ecx
>  	cld
> +#else
> +	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %edi
> +	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %esi
> +	pushl	%edi
> +	pushl	%esi
> +	movl	EXT_C(grub_kernel_image_size), %ecx
> +	addl	EXT_C(grub_total_module_size), %ecx
> +	addl	EXT_C(grub_memdisk_image_size), %ecx
> +	subl	$GRUB_KERNEL_MACHINE_RAW_SIZE, %ecx
> +	pushl	%ecx
> +	leal	(%edi, %ecx), %ebx
> +	call	_LzmaDecodeA
> +	popl	%ecx
> +	popl	%edi
> +	popl	%esi
> +#endif

I suggest having this explicit, like

#elif defined(ENABLE_LZMA)
[...]
#else
#error blah
#endif

> +#if defined(ENABLE_LZO)
>  #include "lzo1x.S"
> +#else
> +#include "lzma_decode.S"
> +#endif

Same here (and in grub-mkimage).


Nice work!

 
-- 
Robert Millan

<GPLv2> I know my rights; I want my phone call!
<DRM> What good is a phone call… if you are unable to speak?
(as seen on /.)



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 14:42 ` Robert Millan
@ 2008-07-02 15:11   ` Bean
  2008-07-02 16:00     ` Bean
  2008-07-03 14:06     ` Robert Millan
  0 siblings, 2 replies; 14+ messages in thread
From: Bean @ 2008-07-02 15:11 UTC (permalink / raw)
  To: The development of GRUB 2

On Wed, Jul 2, 2008 at 10:42 PM, Robert Millan <rmh@aybabtu.com> wrote:
> On Wed, Jul 02, 2008 at 09:39:52PM +0800, Bean wrote:
>> Hi,
>>
>> This patch add support for lzma decompression. The assembly code
>> lzma_decode.S is manually optimized to reduce size. The result decoder
>> is tiny, only 416 bytes longer than the lzo version.
>>
>> I also include lzma encode from the LZMA SDK. grub needs to use the
>> ANSI-C version of encoder, which is only present in the latest 4.58
>> beta. I can't find ready to use shared library in most distro.
>> Including the encoder/decoder has advantages as well. We don't need to
>> worry about the host os, and lzma encoder/decoder can be used in other
>> place, like font compression.
>
> Lenny's most likely going to ship with lzma 4.43.  Is that good?

The c encoder first appears in 4.58 beta. Previous version of lzma
only have cpp version.

>
>> Result of
>> ./grub-mkimage -o core.img biosdisk pc ext2 lvm raid
>>
>> lzma version: 27,776 bytes
>> lzo version: 32,768 bytes
>
> Amazing.  I'd really like this to make it in time for lenny;  although we're
> really pressed for time now.
>
>> diff --git a/include/grub/lib/LzFind.h b/include/grub/lib/LzFind.h
>> new file mode 100644
>> index 0000000..3d74d0c
>> --- /dev/null
>> +++ b/include/grub/lib/LzFind.h
>> @@ -0,0 +1,116 @@
>> +/* LzFind.h  -- Match finder for LZ algorithms
>> +2008-04-04
>> +Copyright (c) 1999-2008 Igor Pavlov
>
> Have you modified the files from LZMA SDK?  Please add our license boilerplate
> to them if you did (e.g. like in kern/i386/pc/lzo1x.S).

I change the location of some include files.

>
>> +#define FIXED_PROPS
>> [...]
>> +#ifdef FIXED_PROPS
>
> What's this option for?  Some comment would be nice.

lzma have three properties that control its behavior, lc, lp and pb.
We can use these values as variable or constant. The default value, 3,
0, 2 is nice in most case, we hardly need to change them. So I use it
as constant to save some extra space.

>
>> +#if 0
>> +
>> +DbgOut:
>
> This is just for debugging, right?  Maybe "#ifdef DEBUG" or so would be
> better, to make it clear.

Yes, this is used in debugging. Now it's working, it's not needed
anymore. I think it's better to keep it disable like this. The kernel
raw space is critical, if we happen to define DEBUG, the decoder code
would expand and kernel.img would fail to compile any more.

>
>> +#ifndef ASM_FILE
>> +     xorl    %eax, %eax
>> +#endif
>
> Why?  I thought ASM_FILE always ought to be defined for asm files in GRUB.

This is also used in debugging. Previously, I link it with c program
to check the decoder, which require to keep register like %esi, %edi,
%ebx. But inside grub, there is no need to keep them. I use ASM_FILE
to distinguish between these two conditions.

>
>> diff --git a/kern/i386/pc/startup.S b/kern/i386/pc/startup.S
>> index 9542978..964643a 100644
>> --- a/kern/i386/pc/startup.S
>> +++ b/kern/i386/pc/startup.S
>> @@ -200,6 +200,7 @@ codestart:
>>       incl    %eax
>>       call    EXT_C(grub_gate_a20)
>>
>> +#if defined(ENABLE_LZO)
>>       /* decompress the compressed part and put the result at 1MB */
>>       movl    $GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %esi
>>       movl    $(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %edi
>> @@ -213,6 +214,23 @@ codestart:
>>       /* copy back the decompressed part */
>>       movl    %eax, %ecx
>>       cld
>> +#else
>> +     movl    $GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %edi
>> +     movl    $(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %esi
>> +     pushl   %edi
>> +     pushl   %esi
>> +     movl    EXT_C(grub_kernel_image_size), %ecx
>> +     addl    EXT_C(grub_total_module_size), %ecx
>> +     addl    EXT_C(grub_memdisk_image_size), %ecx
>> +     subl    $GRUB_KERNEL_MACHINE_RAW_SIZE, %ecx
>> +     pushl   %ecx
>> +     leal    (%edi, %ecx), %ebx
>> +     call    _LzmaDecodeA
>> +     popl    %ecx
>> +     popl    %edi
>> +     popl    %esi
>> +#endif
>
> I suggest having this explicit, like
>
> #elif defined(ENABLE_LZMA)
> [...]
> #else
> #error blah
> #endif
>
>> +#if defined(ENABLE_LZO)
>>  #include "lzo1x.S"
>> +#else
>> +#include "lzma_decode.S"
>> +#endif
>
> Same here (and in grub-mkimage).

Sounds good to me.

-- 
Bean



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 15:11   ` Bean
@ 2008-07-02 16:00     ` Bean
  2008-07-03 18:24       ` Marco Gerards
  2008-07-03 14:06     ` Robert Millan
  1 sibling, 1 reply; 14+ messages in thread
From: Bean @ 2008-07-02 16:00 UTC (permalink / raw)
  To: The development of GRUB 2

[-- Attachment #1: Type: text/plain, Size: 38 bytes --]

Hi,

This is the new patch.

-- 
Bean

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: lzma_2.diff --]
[-- Type: text/x-diff; name=lzma_2.diff, Size: 166443 bytes --]

diff --git a/Makefile.in b/Makefile.in
index c29c5fd..f586a37 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -87,6 +87,7 @@ UNIFONT_HEX = @UNIFONT_HEX@
 # Options.
 enable_grub_emu = @enable_grub_emu@
 enable_grub_fstest = @enable_grub_fstest@
+enable_lzo = @enable_lzo@
 
 ### General variables.
 
diff --git a/conf/i386-pc.rmk b/conf/i386-pc.rmk
index 7b7924f..b677a96 100644
--- a/conf/i386-pc.rmk
+++ b/conf/i386-pc.rmk
@@ -72,10 +72,15 @@ sbin_UTILITIES += grub-emu
 endif
 
 # For grub-mkimage.
+ifeq ($(enable_lzo), yes)
 grub_mkimage_SOURCES = util/i386/pc/grub-mkimage.c util/misc.c \
 	util/resolve.c
-grub_mkimage_CFLAGS = -DGRUB_MEMORY_MACHINE_LINK_ADDR=$(GRUB_MEMORY_MACHINE_LINK_ADDR)
 grub_mkimage_LDFLAGS = $(LIBLZO)
+else
+grub_mkimage_SOURCES = util/i386/pc/grub-mkimage.c util/misc.c \
+	util/resolve.c lib/LzmaEnc.c lib/LzFind.c
+endif
+grub_mkimage_CFLAGS = -DGRUB_MEMORY_MACHINE_LINK_ADDR=$(GRUB_MEMORY_MACHINE_LINK_ADDR)
 util/i386/pc/grub-mkimage.c_DEPENDENCIES = Makefile
 
 # For grub-setup.
diff --git a/config.h.in b/config.h.in
index 1aef212..e2dd41f 100644
--- a/config.h.in
+++ b/config.h.in
@@ -13,6 +13,12 @@
 /* Define it to \"data32\" or \"data32;\" to make GAS happy */
 #undef DATA32
 
+/* Use lzma compression */
+#undef ENABLE_LZMA
+
+/* Use lzo compression */
+#undef ENABLE_LZO
+
 /* Define it to either end or _end */
 #undef END_SYMBOL
 
diff --git a/configure.ac b/configure.ac
index 5ec7a47..e9f699f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -143,19 +143,28 @@ fi
 
 # Check LZO when compiling for the i386-pc.
 if test "$target_cpu"-"$platform" = i386-pc; then
-  # There are three possibilities. LZO version 2 installed with the name
-  # liblzo2, with the name liblzo, and LZO version 1.
-  AC_CHECK_LIB(lzo2, __lzo_init_v2, [LIBLZO="-llzo2"],
-    AC_CHECK_LIB(lzo, __lzo_init_v2, [LIBLZO="-llzo"],
-      AC_CHECK_LIB(lzo, __lzo_init2, [LIBLZO="-llzo"],
-	       AC_MSG_ERROR([LZO library version 1.02 or later is required]))))
-  AC_SUBST(LIBLZO)
-  LIBS="$LIBS $LIBLZO"
-  AC_CHECK_FUNC(lzo1x_999_compress, , 
+  AC_ARG_ENABLE([lzo],
+	      [AS_HELP_STRING([--enable-lzo],
+                             [use lzo to compress kernel (default is lzma)])])
+  [if [ x"$enable_lzo" = xyes ]; then
+    # There are three possibilities. LZO version 2 installed with the name
+    # liblzo2, with the name liblzo, and LZO version 1.]
+    AC_DEFINE([ENABLE_LZO], [1], [Use lzo compression])
+    AC_CHECK_LIB([lzo2], [__lzo_init_v2], [LIBLZO="-llzo2"],
+      [AC_CHECK_LIB([lzo], [__lzo_init_v2], [LIBLZO="-llzo"],
+        [AC_CHECK_LIB([lzo], [__lzo_init2], [LIBLZO="-llzo"],
+	       [AC_MSG_ERROR([LZO library version 1.02 or later is required])])])])
+    AC_SUBST([LIBLZO])
+    [LIBS="$LIBS $LIBLZO"]
+    AC_CHECK_FUNC([lzo1x_999_compress], , 
 	        [AC_MSG_ERROR([LZO1X-999 must be enabled])])
 
-  # LZO version 2 uses lzo/lzo1x.h, while LZO version 1 uses lzo1x.h.
-  AC_CHECK_HEADERS(lzo/lzo1x.h lzo1x.h)
+    [# LZO version 2 uses lzo/lzo1x.h, while LZO version 1 uses lzo1x.h.]
+    AC_CHECK_HEADERS([lzo/lzo1x.h lzo1x.h])
+  [else]
+    AC_DEFINE([ENABLE_LZMA], [1], [Use lzma compression])
+  [fi]
+  AC_SUBST([enable_lzo])
 fi
 
 # Check for functions.
diff --git a/include/grub/i386/pc/kernel.h b/include/grub/i386/pc/kernel.h
index 13e8873..d8d9346 100644
--- a/include/grub/i386/pc/kernel.h
+++ b/include/grub/i386/pc/kernel.h
@@ -44,7 +44,11 @@
 #define GRUB_KERNEL_MACHINE_DATA_END		0x50
 
 /* The size of the first region which won't be compressed.  */
+#if defined(ENABLE_LZO)
 #define GRUB_KERNEL_MACHINE_RAW_SIZE		(GRUB_KERNEL_MACHINE_DATA_END + 0x450)
+#else
+#define GRUB_KERNEL_MACHINE_RAW_SIZE		(GRUB_KERNEL_MACHINE_DATA_END + 0x5F0)
+#endif
 
 #ifndef ASM_FILE
 
diff --git a/include/grub/lib/LzFind.h b/include/grub/lib/LzFind.h
new file mode 100644
index 0000000..eb20c0b
--- /dev/null
+++ b/include/grub/lib/LzFind.h
@@ -0,0 +1,130 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#ifndef __LZFIND_H
+#define __LZFIND_H
+
+#include <grub/lib/LzmaTypes.h>
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+  Byte *buffer;
+  UInt32 pos;
+  UInt32 posLimit;
+  UInt32 streamPos;
+  UInt32 lenLimit;
+
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+  UInt32 matchMaxLen;
+  CLzRef *hash;
+  CLzRef *son;
+  UInt32 hashMask;
+  UInt32 cutValue;
+
+  Byte *bufferBase;
+  ISeqInStream *stream;
+  int streamEndWasReached;
+
+  UInt32 blockSize;
+  UInt32 keepSizeBefore;
+  UInt32 keepSizeAfter;
+
+  UInt32 numHashBytes;
+  int directInput;
+  int btMode;
+  /* int skipModeBits; */
+  int bigHash;
+  UInt32 historySize;
+  UInt32 fixedHashSize;
+  UInt32 hashSizeSum;
+  UInt32 numSons;
+  SRes result;
+  UInt32 crc[256];
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)])
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+     historySize <= 3 GB
+     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAlloc *alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+  Mf_Init_Func Init;
+  Mf_GetIndexByte_Func GetIndexByte;
+  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+  Mf_GetMatches_Func GetMatches;
+  Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init(CMatchFinder *p);
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+#endif
diff --git a/include/grub/lib/LzHash.h b/include/grub/lib/LzHash.h
new file mode 100644
index 0000000..8a77ebe
--- /dev/null
+++ b/include/grub/lib/LzHash.h
@@ -0,0 +1,77 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#ifndef __LZHASH_H
+#define __LZHASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \
+  hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \
+  hash4Value &= (kHash4Size - 1); }
+
+/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+  hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  hash2Value = temp & (kHash2Size - 1); \
+  hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \
+  hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/include/grub/lib/LzmaDec.h b/include/grub/lib/LzmaDec.h
new file mode 100644
index 0000000..ac7cb9e
--- /dev/null
+++ b/include/grub/lib/LzmaDec.h
@@ -0,0 +1,246 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#ifndef __LZMADEC_H
+#define __LZMADEC_H
+
+#include "Types.h"
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  unsigned lc, lp, pb;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  Byte *dic;
+  const Byte *buf;
+  UInt32 range, code;
+  SizeT dicPos;
+  SizeT dicBufSize;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  unsigned state;
+  UInt32 reps[4];
+  unsigned remainLen;
+  int needFlush;
+  int needInitState;
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     0) Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc);
+void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Constr()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAlloc *alloc);
+
+#endif
diff --git a/include/grub/lib/LzmaEnc.h b/include/grub/lib/LzmaEnc.h
new file mode 100644
index 0000000..8343920
--- /dev/null
+++ b/include/grub/lib/LzmaEnc.h
@@ -0,0 +1,95 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#ifndef __LZMAENC_H
+#define __LZMAENC_H
+
+#include "LzmaTypes.h"
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaEncProps
+{
+  int level;       /*  0 <= level <= 9 */
+  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+                      (1 << 12) <= dictSize <= (1 << 30) for 64-bit version
+                       default = (1 << 24) */
+  int lc;          /* 0 <= lc <= 8, default = 3 */
+  int lp;          /* 0 <= lp <= 4, default = 0 */
+  int pb;          /* 0 <= pb <= 4, default = 2 */
+  int algo;        /* 0 - fast, 1 - normal, default = 1 */
+  int fb;          /* 5 <= fb <= 273, default = 32 */
+  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+  int numHashBytes; /* 2, 3 or 4, default = 4 */
+  UInt32 mc;        /* 1 <= mc <= (1 << 30), default = 32 */
+  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+  int numThreads;  /* 1 or 2, default = 2 */
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc_* functions can return the following exit codes:
+Returns:
+  SZ_OK           - OK
+  SZ_ERROR_MEM    - Memory allocation error
+  SZ_ERROR_PARAM  - Incorrect paramater in props
+  SZ_ERROR_WRITE  - Write callback error.
+  SZ_ERROR_PROGRESS - some break from progress callback
+  SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
+*/
+
+typedef void * CLzmaEncHandle;
+
+CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig);
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaEncode
+Return code:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+*/
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+#endif
diff --git a/include/grub/lib/LzmaTypes.h b/include/grub/lib/LzmaTypes.h
new file mode 100644
index 0000000..2769ed7
--- /dev/null
+++ b/include/grub/lib/LzmaTypes.h
@@ -0,0 +1,151 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+/* #define _SZ_NO_INT_64 */
+/* define it if your compiler doesn't support 64-bit integers */
+
+#ifdef _SZ_NO_INT_64
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+#include <stddef.h>
+typedef size_t SizeT;
+#endif
+
+typedef int Bool;
+#define True 1
+#define False 0
+
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_CDECL __cdecl
+#define MY_STD_CALL __stdcall
+#define MY_FAST_CALL MY_NO_INLINE __fastcall
+
+#else
+
+#define MY_CDECL
+#define MY_STD_CALL
+#define MY_FAST_CALL
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct
+{
+  SRes (*Read)(void *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+} ISeqInStream;
+
+typedef struct
+{
+  size_t (*Write)(void *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+      (result < size) means error */
+} ISeqOutStream;
+
+typedef struct
+{
+  SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+} ICompressProgress;
+
+typedef struct
+{
+  void *(*Alloc)(void *p, size_t size);
+  void (*Free)(void *p, void *address); /* address can be 0 */
+} ISzAlloc;
+
+#define IAlloc_Alloc(p, size) (p)->Alloc((p), size)
+#define IAlloc_Free(p, a) (p)->Free((p), a)
+
+#endif
diff --git a/kern/i386/pc/lzma_decode.S b/kern/i386/pc/lzma_decode.S
new file mode 100644
index 0000000..c194292
--- /dev/null
+++ b/kern/i386/pc/lzma_decode.S
@@ -0,0 +1,677 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define FIXED_PROPS
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LZMA_PROPERTIES_SIZE 5
+
+#define kNumTopBits 24
+#define kTopValue (1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+
+#if 0
+
+DbgOut:
+	pushf
+	pushl	%ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%edx
+	pushl	%ecx
+	pushl	%ebx
+	pushl	%eax
+
+	call	_DebugPrint
+
+	popl	%eax
+	popl	%ebx
+	popl	%ecx
+	popl	%edx
+	popl	%esi
+	popl	%edi
+	popl	%ebp
+	popf
+
+	ret
+
+
+/*
+ * int LzmaDecodeProperties(CLzmaProperties *propsRes,
+ *                          const unsigned char *propsData,
+ *                          int size);
+ */
+
+_LzmaDecodePropertiesA:
+	movb	(%edx), %dl
+
+	xorl	%ecx, %ecx
+1:
+	cmpb	$45, %dl
+	jb	2f
+	incl	%ecx
+	subb	$45, %dl
+	jmp	1b
+2:
+	movl	%ecx, 8(%eax)		/* pb */
+	xorl	%ecx, %ecx
+1:
+	cmpb	$9, %dl
+	jb	2f
+	incl	%ecx
+	subb	$9, %dl
+2:
+	movl	%ecx, 4(%eax)		/* lp */
+	movb	%dl, %cl
+	movl	%ecx, (%eax)		/* lc */
+
+#endif
+
+#ifndef ASM_FILE
+	xorl	%eax, %eax
+#endif
+	ret
+
+#define out_size	8(%ebp)
+
+#define now_pos		-4(%ebp)
+#define prev_byte	-8(%ebp)
+#define range		-12(%ebp)
+#define code		-16(%ebp)
+#define state		-20(%ebp)
+#define rep0		-24(%ebp)
+#define rep1		-28(%ebp)
+#define rep2		-32(%ebp)
+#define rep3		-36(%ebp)
+
+#ifdef FIXED_PROPS
+
+#define FIXED_LC	3
+#define FIXED_LP	0
+#define FIXED_PB	2
+
+#define POS_STATE_MASK	((1 << (FIXED_PB)) - 1)
+#define LIT_POS_MASK	((1 << (FIXED_LP)) - 1)
+
+#define LOCAL_SIZE	36
+
+#else
+
+#define lc		(%ebx)
+#define lp		4(%ebx)
+#define pb		8(%ebx)
+#define probs		12(%ebx)
+
+#define pos_state_mask	-40(%ebp)
+#define lit_pos_mask	-44(%ebp)
+
+#define LOCAL_SIZE	44
+
+#endif
+
+RangeDecoderBitDecode:
+#ifdef FIXED_PROPS
+	leal	(%ebx, %eax, 4), %eax
+#else
+	shll	$2, %eax
+	addl	probs, %eax
+#endif
+
+	movl	%eax, %ecx
+	movl	(%ecx), %eax
+
+	movl	range, %edx
+	shrl	$kNumBitModelTotalBits, %edx
+	mull	%edx
+
+	cmpl	code, %eax
+	jbe	1f
+
+	movl	%eax, range
+	movl	$kBitModelTotal, %edx
+	subl	(%ecx), %edx
+	shrl	$kNumMoveBits, %edx
+	addl	%edx, (%ecx)
+	clc
+3:
+	pushf
+	cmpl	$kTopValue, range
+	jnc	2f
+	shll	$8, code
+	lodsb
+	movb	%al, code
+	shll	$8, range
+2:
+	popf
+	ret
+1:
+	subl	%eax, range
+	subl	%eax, code
+	movl	(%ecx), %edx
+	shrl	$kNumMoveBits, %edx
+	subl	%edx, (%ecx)
+	stc
+	jmp	3b
+
+RangeDecoderBitTreeDecode:
+RangeDecoderReverseBitTreeDecode:
+	movzbl	%cl, %ecx
+	xorl	%edx, %edx
+	pushl	%edx
+	incl	%edx
+	pushl	%edx
+
+1:
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+
+	addl	%edx, %eax
+	call	RangeDecoderBitDecode
+
+	popl	%edx
+	popl	%ecx
+
+	jnc	2f
+	movl	4(%esp), %eax
+	orl	%eax, 8(%esp)
+	stc
+
+2:
+	adcl	%edx, %edx
+	popl	%eax
+
+	shll	$1, (%esp)
+	loop	1b
+
+	popl	%ecx
+	subl	%ecx, %edx		/* RangeDecoderBitTreeDecode */
+	popl	%ecx			/* RangeDecoderReverseBitTreeDecode */
+	ret
+
+LzmaLenDecode:
+	pushl	%eax
+	addl	$LenChoice, %eax
+	call	RangeDecoderBitDecode
+	popl	%eax
+	jc	1f
+	pushl	$0
+	movb	$kLenNumLowBits, %cl
+	addl	$LenLow, %eax
+2:
+	movl	12(%esp), %edx
+	shll	%cl, %edx
+	addl	%edx, %eax
+3:
+
+	call	RangeDecoderBitTreeDecode
+	popl	%eax
+	addl	%eax, %edx
+	ret
+
+1:
+	pushl	%eax
+	addl	$LenChoice2, %eax
+	call	RangeDecoderBitDecode
+	popl	%eax
+	jc	1f
+	pushl	$kLenNumLowSymbols
+	movb	$kLenNumMidBits, %cl
+	addl	$LenMid, %eax
+	jmp	2b
+
+1:
+	pushl	$(kLenNumLowSymbols + kLenNumMidSymbols)
+	addl	$LenHigh, %eax
+	movb	$kLenNumHighBits, %cl
+	jmp	3b
+
+WriteByte:
+	movb	%al, prev_byte
+	stosb
+	incl	now_pos
+	ret
+
+/*
+ * int LzmaDecode(CLzmaDecoderState *vs,
+ *                const unsigned char *inStream,
+ *                unsigned char *outStream,
+ *                SizeT outSize);
+ */
+
+_LzmaDecodeA:
+
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$LOCAL_SIZE, %esp
+
+#ifndef ASM_FILE
+	pushl	%esi
+	pushl	%edi
+	pushl	%ebx
+
+	movl	%eax, %ebx
+	movl	%edx, %esi
+	pushl	%ecx
+#else
+	pushl	%edi
+#endif
+
+	cld
+
+#ifdef FIXED_PROPS
+	movl	%ebx, %edi
+	movl	$(Literal + (LZMA_LIT_SIZE << (FIXED_LC + FIXED_LP))), %ecx
+#else
+	movl	$LZMA_LIT_SIZE, %eax
+	movb	lc, %cl
+	addb	lp, %cl
+	shll	%cl, %eax
+	addl	$Literal, %eax
+	movl	%eax, %ecx
+	movl	probs, %edi
+#endif
+
+	movl	$(kBitModelTotal >> 1), %eax
+
+	rep
+	stosl
+
+	popl	%edi
+
+	xorl	%eax, %eax
+	movl	%eax, now_pos
+	movl	%eax, prev_byte
+	movl	%eax, state
+
+	incl	%eax
+	movl	%eax, rep0
+	movl	%eax, rep1
+	movl	%eax, rep2
+	movl	%eax, rep3
+
+#ifndef FIXED_PROPS
+	movl	%eax, %edx
+	movb	pb, %cl
+	shll	%cl, %edx
+	decl	%edx
+	movl	%edx, pos_state_mask
+
+	movl	%eax, %edx
+	movb	lp, %cl
+	shll	%cl, %edx
+	decl	%edx
+	movl	%edx, lit_pos_mask;
+#endif
+
+	/* RangeDecoderInit */
+	negl	%eax
+	movl	%eax, range
+
+	incl	%eax
+	movb	$5, %cl
+
+1:
+	shll	$8, %eax
+	lodsb
+	loop	1b
+
+	movl	%eax, code
+
+lzma_decode_loop:
+	movl	now_pos, %eax
+	cmpl	out_size, %eax
+
+	jb	1f
+
+#ifndef ASM_FILE
+	xorl	%eax, %eax
+
+	popl	%ebx
+	popl	%edi
+	popl	%esi
+#endif
+
+	movl	%ebp, %esp
+	popl	%ebp
+	ret
+
+1:
+#ifdef FIXED_PROPS
+	andl	$POS_STATE_MASK, %eax
+#else
+	andl	pos_state_mask, %eax
+#endif
+	pushl	%eax				/* posState */
+	movl	state, %edx
+	shll	$kNumPosBitsMax, %edx
+	addl	%edx, %eax
+	pushl	%eax				/* (state << kNumPosBitsMax) + posState */
+
+	call	RangeDecoderBitDecode
+	jc	1f
+
+	movl	now_pos, %eax
+
+#ifdef FIXED_PROPS
+	andl	$LIT_POS_MASK, %eax
+	shll	$FIXED_LC, %eax
+	movl	prev_byte, %edx
+	shrl	$(8 - FIXED_LC), %edx
+#else
+	andl	lit_pos_mask, %eax
+	movb	lc, %cl
+	shll	%cl, %eax
+	negb	%cl
+	addb	$8, %cl
+	movl	prev_byte, %edx
+	shrl	%cl, %edx
+#endif
+
+	addl	%edx, %eax
+	movl	$LZMA_LIT_SIZE, %edx
+	mull	%edx
+	addl	$Literal, %eax
+	pushl	%eax
+
+	incl	%edx			/* edx = 1 */
+
+	movl	rep0, %eax
+	negl	%eax
+	pushl	(%edi, %eax)		/* matchByte */
+
+	cmpb	$kNumLitStates, state
+	jb	5f
+
+	/* LzmaLiteralDecodeMatch */
+
+3:
+	cmpl	$0x100, %edx
+	jae	4f
+
+	xorl	%eax, %eax
+	shlb	$1, (%esp)
+	adcl	%eax, %eax
+
+	pushl	%eax
+	pushl	%edx
+
+	shll	$8, %eax
+	leal	0x100(%edx, %eax), %eax
+	addl	12(%esp), %eax
+	call	RangeDecoderBitDecode
+
+	setc	%al
+	popl	%edx
+	adcl	%edx, %edx
+
+	popl	%ecx
+	cmpb	%cl, %al
+	jz	3b
+
+5:
+
+	/* LzmaLiteralDecode */
+
+	cmpl	$0x100, %edx
+	jae	4f
+
+	pushl	%edx
+	movl	%edx, %eax
+	addl	8(%esp), %eax
+	call	RangeDecoderBitDecode
+	popl	%edx
+	adcl	%edx, %edx
+	jmp	5b
+
+4:
+	addl	$16, %esp
+
+	movb	%dl, %al
+	call	WriteByte
+
+	movb	state, %al
+	cmpb	$4, %al
+	jae	2f
+	xorb	%al, %al
+	jmp	3f
+2:
+	subb	$3, %al
+	cmpb	$7, %al
+	jb	3f
+	subb	$3, %al
+3:
+	movb	%al, state
+	jmp	lzma_decode_loop
+
+1:
+	movl	state, %eax
+	addl	$IsRep, %eax
+	call	RangeDecoderBitDecode
+	jnc	1f
+
+	movl	state, %eax
+	addl	$IsRepG0, %eax
+	call	RangeDecoderBitDecode
+	jc	10f
+
+	movl	(%esp), %eax
+	addl	$IsRep0Long, %eax
+	call	RangeDecoderBitDecode
+	jc	20f
+
+	cmpb	$7, state
+	movb	$9, state
+	jb	100f
+	addb	$2, state
+100:
+
+	movl	$1, %ecx
+
+3:
+	movl	rep0, %edx
+	negl	%edx
+
+4:
+	movb	(%edi, %edx), %al
+	call	WriteByte
+	loop	4b
+
+	popl	%eax
+	popl	%eax
+	jmp	lzma_decode_loop
+
+10:
+	movl	state, %eax
+	addl	$IsRepG1, %eax
+	call	RangeDecoderBitDecode
+	movl	rep1, %edx
+	jnc	100f
+
+	movl	state, %eax
+	addl	$IsRepG2, %eax
+	call	RangeDecoderBitDecode
+	movl	rep2, %edx
+	jnc	1000f
+	movl	rep2, %edx
+	xchgl	rep3, %edx
+1000:
+	pushl	rep1
+	popl	rep2
+100:
+	xchg	rep0, %edx
+	movl	%edx, rep1
+20:
+
+	movl	$RepLenCoder, %eax
+	call	LzmaLenDecode
+
+	cmpb	$7, state
+	movb	$8, state
+	jb	100f
+	addb	$3, state
+100:
+	jmp	2f
+
+1:
+	movl	rep0, %eax
+	xchgl	rep1, %eax
+	xchgl	rep2, %eax
+	movl	%eax, rep3
+
+	cmpb	$7, state
+	movb	$7, state
+	jb	10f
+	addb	$3, state
+10:
+
+	movl	$LenCoder, %eax
+	call	LzmaLenDecode
+	pushl	%edx
+
+	movl	$(kNumLenToPosStates - 1), %eax
+	cmpl	%eax, %edx
+	jbe	100f
+	movl	%eax, %edx
+100:
+	movb	$kNumPosSlotBits, %cl
+	shll	%cl, %edx
+	leal	PosSlot(%edx), %eax
+	call	RangeDecoderBitTreeDecode
+
+	movl	%edx, rep0
+	cmpl	$kStartPosModelIndex, %edx
+	jb	100f
+
+	movl	%edx, %ecx
+	shrl	$1, %ecx
+	decl	%ecx
+
+	movzbl	%dl, %eax
+	andb	$1, %al
+	orb	$2, %al
+	shll	%cl, %eax
+	movl	%eax, rep0
+
+	cmpl	$kEndPosModelIndex, %edx
+	jae	200f
+	movl	rep0, %eax
+	addl	$(SpecPos - 1), %eax
+	subl	%edx, %eax
+	jmp	300f
+200:
+
+	subb	$kNumAlignBits, %cl
+
+	/* RangeDecoderDecodeDirectBits */
+	xorl	%edx, %edx
+
+1000:
+	shrl	$1, range
+	shll	$1, %edx
+
+	movl	range, %eax
+	cmpl	%eax, code
+	jb	2000f
+	subl	%eax, code
+	orb	$1, %dl
+2000:
+
+	cmpl	$kTopValue, %eax
+	jae	3000f
+	shll	$8, range
+	shll	$8, code
+	lodsb
+	movb	%al, code
+
+3000:
+	loop	1000b
+
+	movb	$kNumAlignBits, %cl
+	shll	%cl, %edx
+	addl	%edx, rep0
+
+	movl	$Align, %eax
+
+300:
+	call	RangeDecoderReverseBitTreeDecode
+	addl	%ecx, rep0
+
+100:
+	incl	rep0
+	popl	%edx
+
+2:
+
+	addl	$kMatchMinLen, %edx
+	movl	%edx, %ecx
+
+	jmp	3b
diff --git a/kern/i386/pc/startup.S b/kern/i386/pc/startup.S
index 9542978..4c6621e 100644
--- a/kern/i386/pc/startup.S
+++ b/kern/i386/pc/startup.S
@@ -200,6 +200,7 @@ codestart:
 	incl	%eax
 	call	EXT_C(grub_gate_a20)
 	
+#if defined(ENABLE_LZO)
 	/* decompress the compressed part and put the result at 1MB */
 	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %esi
 	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %edi
@@ -213,6 +214,23 @@ codestart:
 	/* copy back the decompressed part */
 	movl	%eax, %ecx
 	cld
+#elif defined(ENABLE_LZMA)
+	movl	$GRUB_MEMORY_MACHINE_DECOMPRESSION_ADDR, %edi
+	movl	$(START_SYMBOL + GRUB_KERNEL_MACHINE_RAW_SIZE), %esi
+	pushl	%edi
+	pushl	%esi
+	movl	EXT_C(grub_kernel_image_size), %ecx
+	addl	EXT_C(grub_total_module_size), %ecx
+	addl	EXT_C(grub_memdisk_image_size), %ecx
+	subl	$GRUB_KERNEL_MACHINE_RAW_SIZE, %ecx
+	pushl	%ecx
+	leal	(%edi, %ecx), %ebx
+	call	_LzmaDecodeA
+	popl	%ecx
+	popl	%edi
+	popl	%esi
+#endif
+
 	rep
 	movsb
 
@@ -476,7 +494,11 @@ gate_a20_check_state:
 	popl	%ebx
 	ret
 
+#if defined(ENABLE_LZO)
 #include "lzo1x.S"
+#elif defined(ENABLE_LZMA)
+#include "lzma_decode.S"
+#endif
 
 /*
  * The code beyond this point is compressed.  Assert that the uncompressed
diff --git a/lib/LzFind.c b/lib/LzFind.c
new file mode 100644
index 0000000..e9d8e8a
--- /dev/null
+++ b/lib/LzFind.c
@@ -0,0 +1,774 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#include <string.h>
+
+#include <grub/lib/LzFind.h>
+#include <grub/lib/LzHash.h>
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)3 << 30)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc)
+{
+  if (!p->directInput)
+  {
+    alloc->Free(alloc, p->bufferBase);
+    p->bufferBase = 0;
+  }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc)
+{
+  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+  if (p->directInput)
+  {
+    p->blockSize = blockSize;
+    return 1;
+  }
+  if (p->bufferBase == 0 || p->blockSize != blockSize)
+  {
+    LzInWindow_Free(p, alloc);
+    p->blockSize = blockSize;
+    p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize);
+  }
+  return (p->bufferBase != 0);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; }
+
+UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+  p->posLimit -= subValue;
+  p->pos -= subValue;
+  p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+  if (p->streamEndWasReached || p->result != SZ_OK)
+    return;
+  for (;;)
+  {
+    Byte *dest = p->buffer + (p->streamPos - p->pos);
+    size_t size = (p->bufferBase + p->blockSize - dest);
+    if (size == 0)
+      return;
+    p->result = p->stream->Read(p->stream, dest, &size);
+    if (p->result != SZ_OK)
+      return;
+    if (size == 0)
+    {
+      p->streamEndWasReached = 1;
+      return;
+    }
+    p->streamPos += (UInt32)size;
+    if (p->streamPos - p->pos > p->keepSizeAfter)
+      return;
+  }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+  memmove(p->bufferBase,
+    p->buffer - p->keepSizeBefore,
+    (size_t)(p->streamPos - p->pos + p->keepSizeBefore));
+  p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+  /* if (p->streamEndWasReached) return 0; */
+  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+  if (p->streamEndWasReached)
+    return;
+  if (p->keepSizeAfter >= p->streamPos - p->pos)
+    MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+  if (MatchFinder_NeedMove(p))
+    MatchFinder_MoveBlock(p);
+  MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+  p->cutValue = 32;
+  p->btMode = 1;
+  p->numHashBytes = 4;
+  /* p->skipModeBits = 0; */
+  p->directInput = 0;
+  p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+  UInt32 i;
+  p->bufferBase = 0;
+  p->directInput = 0;
+  p->hash = 0;
+  MatchFinder_SetDefaultSettings(p);
+
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = i;
+    int j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1));
+    p->crc[i] = r;
+  }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->hash);
+  p->hash = 0;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc)
+{
+  MatchFinder_FreeThisClassMemory(p, alloc);
+  LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc)
+{
+  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  if (sizeInBytes / sizeof(CLzRef) != num)
+    return 0;
+  return (CLzRef *)alloc->Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAlloc *alloc)
+{
+  UInt32 sizeReserv;
+  if (historySize > kMaxHistorySize)
+  {
+    MatchFinder_Free(p, alloc);
+    return 0;
+  }
+  sizeReserv = historySize >> 1;
+  if (historySize > ((UInt32)2 << 30))
+    sizeReserv = historySize >> 2;
+  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+  if (LzInWindow_Create(p, sizeReserv, alloc))
+  {
+    UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1;
+    UInt32 hs;
+    p->matchMaxLen = matchMaxLen;
+    {
+      p->fixedHashSize = 0;
+      if (p->numHashBytes == 2)
+        hs = (1 << 16) - 1;
+      else
+      {
+        hs = historySize - 1;
+        hs |= (hs >> 1);
+        hs |= (hs >> 2);
+        hs |= (hs >> 4);
+        hs |= (hs >> 8);
+        hs >>= 1;
+        /* hs >>= p->skipModeBits; */
+        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+        if (hs > (1 << 24))
+        {
+          if (p->numHashBytes == 3)
+            hs = (1 << 24) - 1;
+          else
+            hs >>= 1;
+        }
+      }
+      p->hashMask = hs;
+      hs++;
+      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      hs += p->fixedHashSize;
+    }
+
+    {
+      UInt32 prevSize = p->hashSizeSum + p->numSons;
+      UInt32 newSize;
+      p->historySize = historySize;
+      p->hashSizeSum = hs;
+      p->cyclicBufferSize = newCyclicBufferSize;
+      p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize);
+      newSize = p->hashSizeSum + p->numSons;
+      if (p->hash != 0 && prevSize == newSize)
+        return 1;
+      MatchFinder_FreeThisClassMemory(p, alloc);
+      p->hash = AllocRefs(newSize, alloc);
+      if (p->hash != 0)
+      {
+        p->son = p->hash + p->hashSizeSum;
+        return 1;
+      }
+    }
+  }
+  MatchFinder_Free(p, alloc);
+  return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+  UInt32 limit = kMaxValForNormalize - p->pos;
+  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+  if (limit2 < limit)
+    limit = limit2;
+  limit2 = p->streamPos - p->pos;
+  if (limit2 <= p->keepSizeAfter)
+  {
+    if (limit2 > 0)
+      limit2 = 1;
+  }
+  else
+    limit2 -= p->keepSizeAfter;
+  if (limit2 < limit)
+    limit = limit2;
+  {
+    UInt32 lenLimit = p->streamPos - p->pos;
+    if (lenLimit > p->matchMaxLen)
+      lenLimit = p->matchMaxLen;
+    p->lenLimit = lenLimit;
+  }
+  p->posLimit = p->pos + limit;
+}
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+  UInt32 i;
+  for(i = 0; i < p->hashSizeSum; i++)
+    p->hash[i] = kEmptyHashValue;
+  p->cyclicBufferPos = 0;
+  p->buffer = p->bufferBase;
+  p->pos = p->streamPos = p->cyclicBufferSize;
+  p->result = SZ_OK;
+  p->streamEndWasReached = 0;
+  MatchFinder_ReadBlock(p);
+  MatchFinder_SetLimits(p);
+}
+
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+  return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems)
+{
+  UInt32 i;
+  for (i = 0; i < numItems; i++)
+  {
+    UInt32 value = items[i];
+    if (value <= subValue)
+      value = kEmptyHashValue;
+    else
+      value -= subValue;
+    items[i] = value;
+  }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+  UInt32 subValue = MatchFinder_GetSubValue(p);
+  MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons);
+  MatchFinder_ReduceOffsets(p, subValue);
+}
+
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+  if (p->pos == kMaxValForNormalize)
+    MatchFinder_Normalize(p);
+  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+    MatchFinder_CheckAndMoveAndRead(p);
+  if (p->cyclicBufferPos == p->cyclicBufferSize)
+    p->cyclicBufferPos = 0;
+  MatchFinder_SetLimits(p);
+}
+
+static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  son[_cyclicBufferPos] = curMatch;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+      return distances;
+    {
+      const Byte *pb = cur - delta;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+      {
+        UInt32 len = 0;
+        while(++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        if (maxLen < len)
+        {
+          *distances++ = maxLen = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+            return distances;
+        }
+      }
+    }
+  }
+}
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
+  UInt32 len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return distances;
+    }
+    {
+      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      UInt32 len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while(++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
+          *distances++ = maxLen = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return distances;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+  CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + (_cyclicBufferPos << 1);
+  UInt32 len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return;
+    }
+    {
+      CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      UInt32 len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        while(++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        {
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+#define MOVE_POS \
+  ++p->cyclicBufferPos; \
+  p->buffer++; \
+  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+  UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
+  lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+  cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+  offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
+  distances + offset, maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+  SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(2)
+  HASH2_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, delta2, maxLen, offset;
+  GET_MATCHES_HEADER(3)
+
+  HASH3_CALC;
+
+  delta2 = p->pos - p->hash[hash2Value];
+  curMatch = p->hash[kFix3HashSize + hashValue];
+
+  p->hash[hash2Value] =
+  p->hash[kFix3HashSize + hashValue] = p->pos;
+
+
+  maxLen = 2;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[0] = maxLen;
+    distances[1] = delta2 - 1;
+    offset = 2;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  delta2 = p->pos - p->hash[                hash2Value];
+  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
+  curMatch = p->hash[kFix4HashSize + hashValue];
+
+  p->hash[                hash2Value] =
+  p->hash[kFix3HashSize + hash3Value] =
+  p->hash[kFix4HashSize + hashValue] = p->pos;
+
+  maxLen = 1;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = delta2 - 1;
+    offset = 2;
+  }
+  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
+  {
+    maxLen = 3;
+    distances[offset + 1] = delta3 - 1;
+    offset += 2;
+    delta2 = delta3;
+  }
+  if (offset != 0)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  if (maxLen < 3)
+    maxLen = 3;
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  delta2 = p->pos - p->hash[                hash2Value];
+  delta3 = p->pos - p->hash[kFix3HashSize + hash3Value];
+  curMatch = p->hash[kFix4HashSize + hashValue];
+
+  p->hash[                hash2Value] =
+  p->hash[kFix3HashSize + hash3Value] =
+  p->hash[kFix4HashSize + hashValue] = p->pos;
+
+  maxLen = 1;
+  offset = 0;
+  if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = delta2 - 1;
+    offset = 2;
+  }
+  if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur)
+  {
+    maxLen = 3;
+    distances[offset + 1] = delta3 - 1;
+    offset += 2;
+    delta2 = delta3;
+  }
+  if (offset != 0)
+  {
+    for (; maxLen != lenLimit; maxLen++)
+      if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen])
+        break;
+    distances[offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  if (maxLen < 3)
+    maxLen = 3;
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+    distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hashValue];
+  p->hash[hashValue] = p->pos;
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+    distances, 2) - (distances));
+  MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(2)
+    HASH2_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value;
+    SKIP_HEADER(3)
+    HASH3_CALC;
+    curMatch = p->hash[kFix3HashSize + hashValue];
+    p->hash[hash2Value] =
+    p->hash[kFix3HashSize + hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value, hash3Value;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    curMatch = p->hash[kFix4HashSize + hashValue];
+    p->hash[                hash2Value] =
+    p->hash[kFix3HashSize + hash3Value] = p->pos;
+    p->hash[kFix4HashSize + hashValue] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 hash2Value, hash3Value;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    curMatch = p->hash[kFix4HashSize + hashValue];
+    p->hash[                hash2Value] =
+    p->hash[kFix3HashSize + hash3Value] =
+    p->hash[kFix4HashSize + hashValue] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hashValue];
+    p->hash[hashValue] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+  vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  if (!p->btMode)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 2)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 3)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+  }
+  else
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+  }
+}
diff --git a/lib/LzmaDec.c b/lib/LzmaDec.c
new file mode 100644
index 0000000..8977cb3
--- /dev/null
+++ b/lib/LzmaDec.c
@@ -0,0 +1,1035 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#include <grub/lib/LzmaDec.h>
+
+#include <string.h>
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; }
+#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;)
+
+#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); }
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while(i < limit); i -= limit; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+
+#define kNumStates 12
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+/*
+#define LZMA_STREAM_WAS_FINISHED_ID (-1)
+#define LZMA_SPEC_LEN_OFFSET (-3)
+*/
+
+Byte kLiteralNextStates[kNumStates * 2] =
+{
+  0, 0, 0, 0, 1, 2, 3,  4,  5,  6,  4,  5,
+  7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10
+};
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/* First LZMA-symbol is always decoded.
+And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization
+Out:
+  Result:
+    0 - OK
+    1 - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+    = kMatchSpecLenStart + 1 : Flush marker
+    = kMatchSpecLenStart + 2 : State Init Marker
+*/
+
+static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  CLzmaProb *probs = p->probs;
+
+  unsigned state = p->state;
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1;
+  unsigned lc = p->prop.lc;
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = processedPos & pbMask;
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (checkDicSize != 0 || processedPos != 0)
+        prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) +
+        (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
+
+      if (state < kNumLitStates)
+      {
+        symbol = 1;
+        do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        symbol = 1;
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & offs);
+          probLit = prob + offs + bit + symbol;
+          GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
+        }
+        while (symbol < 0x100);
+      }
+      dic[dicPos++] = (Byte)symbol;
+      processedPos++;
+
+      state = kLiteralNextStates[state];
+      /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */
+      continue;
+    }
+    else
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          limit = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            limit = (1 << kLenNumMidBits);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            limit = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance;
+          int numDirectBits = (int)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
+            prob = probs + SpecPos + distance - posSlot - 1;
+            {
+              UInt32 mask = 1;
+              unsigned i = 1;
+              do
+              {
+                GET_BIT2(prob + i, i, ; , distance |= mask);
+                mask <<= 1;
+              }
+              while(--numDirectBits != 0);
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
+              GET_BIT2(prob + i, i, ; , distance |= 1);
+              GET_BIT2(prob + i, i, ; , distance |= 2);
+              GET_BIT2(prob + i, i, ; , distance |= 4);
+              GET_BIT2(prob + i, i, ; , distance |= 8);
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
+              len += kMatchSpecLenStart;
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1;
+        if (checkDicSize == 0)
+        {
+          if (distance >= processedPos)
+            return SZ_ERROR_DATA;
+        }
+        else if (distance >= checkDicSize)
+          return SZ_ERROR_DATA;
+        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+        /* state = kLiteralNextStates[state]; */
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem = limit - dicPos;
+        unsigned curLen = ((rem < len) ? (unsigned)rem : len);
+        SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
+
+        processedPos += curLen;
+
+        len -= curLen;
+        if (pos + curLen <= dicBufSize)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
+          dicPos += curLen;
+          do
+            *(dest) = (Byte)*(dest + src);
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+  NORMALIZE;
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
+  p->remainLen = len;
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
+  p->state = state;
+
+  return SZ_OK;
+}
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
+    unsigned len = p->remainLen;
+    UInt32 rep0 = p->reps[0];
+    if (limit - dicPos < len)
+      len = (unsigned)(limit - dicPos);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
+    p->processedPos += len;
+    p->remainLen -= len;
+    while (len-- != 0)
+    {
+      dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
+/* LzmaDec_DecodeReal2 decodes LZMA-symbols and sets p->needFlush and p->needInit, if required. */
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
+    }
+    RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
+    if (p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  if (p->remainLen > kMatchSpecLenStart)
+  {
+    p->remainLen = kMatchSpecLenStart;
+  }
+  return 0;
+}
+
+typedef enum
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
+  CLzmaProb *probs = p->probs;
+  unsigned state = p->state;
+  ELzmaDummy res;
+
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1);
+
+    prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += (LZMA_LIT_SIZE *
+          ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+          (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+            ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          matchByte <<= 1;
+          bit = (matchByte & offs);
+          probLit = prob + offs + bit + symbol;
+          GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
+          prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
+          probLen = prob + LenLow + (posState << kLenNumLowBits);
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
+            probLen = prob + LenMid + (posState << kLenNumMidBits);
+            offset = kLenNumLowSymbols;
+            limit = 1 << kLenNumMidBits;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols + kLenNumMidSymbols;
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) <<
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          int numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
+            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1;
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
+            while (--numDirectBits != 0);
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
+            do
+            {
+              GET_BIT_CHECK(prob + i, i);
+            }
+            while(--numDirectBits != 0);
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
+static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data)
+{
+  p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]);
+  p->range = 0xFFFFFFFF;
+  p->needFlush = 0;
+}
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState)
+{
+  p->needFlush = 1;
+  p->remainLen = 0;
+  p->tempBufSize = 0;
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
+    p->needInitState = 1;
+  }
+  if (initState)
+    p->needInitState = 1;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+  p->dicPos = 0;
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+static void LzmaDec_InitStateReal(CLzmaDec *p)
+{
+  UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp));
+  UInt32 i;
+  CLzmaProb *probs = p->probs;
+  for (i = 0; i < numProbs; i++)
+    probs[i] = kBitModelTotal >> 1;
+  p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+  p->state = 0;
+  p->needInitState = 0;
+}
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  LzmaDec_WriteRem(p, dicLimit);
+
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  while (p->remainLen != kMatchSpecLenStart)
+  {
+      int checkEndMarkNow;
+
+      if (p->needFlush != 0)
+      {
+        for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+          p->tempBuf[p->tempBufSize++] = *src++;
+        if (p->tempBufSize < RC_INIT_SIZE)
+        {
+          *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+          return SZ_OK;
+        }
+        if (p->tempBuf[0] != 0)
+          return SZ_ERROR_DATA;
+
+        LzmaDec_InitRc(p, p->tempBuf);
+        p->tempBufSize = 0;
+      }
+
+      checkEndMarkNow = 0;
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->needInitState)
+        LzmaDec_InitStateReal(p);
+
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = p->buf - src;
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            (*srcLen) += lookAhead;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf));
+        (*srcLen) += lookAhead;
+        src += lookAhead;
+        inSize -= lookAhead;
+        p->tempBufSize = 0;
+      }
+  }
+  if (p->code == 0)
+    *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA;
+}
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->probs);
+  p->probs = 0;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->dic);
+  p->dic = 0;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize;
+  Byte d;
+
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
+  p->lc = d % 9;
+  d /= 9;
+  p->pb = d / 5;
+  p->lp = d % 5;
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (p->probs == 0 || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    p->numProbs = numProbs;
+    if (p->probs == 0)
+      return SZ_ERROR_MEM;
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  dicBufSize = propNew.dicSize;
+  if (p->dic == 0 || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize);
+    if (p->dic == 0)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAlloc *alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT inSize = *srcLen;
+  SizeT outSize = *destLen;
+  *srcLen = *destLen = 0;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+
+  LzmaDec_Construct(&p);
+  res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc);
+  if (res != 0)
+    return res;
+  p.dic = dest;
+  p.dicBufSize = outSize;
+
+  LzmaDec_Init(&p);
+
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+
+  (*destLen) = p.dicPos;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/lib/LzmaEnc.c b/lib/LzmaEnc.c
new file mode 100644
index 0000000..6bdb703
--- /dev/null
+++ b/lib/LzmaEnc.c
@@ -0,0 +1,2353 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (c) 1999-2008 Igor Pavlov
+ *  Copyright (C) 2008  Free Software Foundation, Inc.
+ *
+ *  GRUB is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  GRUB is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code was taken from LZMA SDK 4.58 beta, and was slightly modified
+ * to adapt it to GRUB's requirement.
+ *
+ * See <http://www.7-zip.org>, for more information about LZMA.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <grub/lib/LzmaEnc.h>
+
+#include <grub/lib/LzFind.h>
+#ifdef COMPRESS_MF_MT
+#include <grub/lib/LzFindMt.h>
+#endif
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#ifdef SHOW_STAT
+static int ttt = 0;
+#endif
+
+#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1)
+
+#define kBlockSize (9 << 10)
+#define kUnpackBlockSize (1 << 18)
+#define kMatchArraySize (1 << 21)
+#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX)
+
+#define kNumMaxDirectBits (31)
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+#define kBitPrice (1 << kNumBitPriceShiftBits)
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+  p->level = 5;
+  p->dictSize = p->mc = 0;
+  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->writeEndMark = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+  int level = p->level;
+  if (level < 0) level = 5;
+  p->level = level;
+  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26)));
+  if (p->lc < 0) p->lc = 3;
+  if (p->lp < 0) p->lp = 0;
+  if (p->pb < 0) p->pb = 2;
+  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = 4;
+  if (p->mc == 0)  p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  if (p->numThreads < 0) p->numThreads = ((p->btMode && p->algo) ? 2 : 1);
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+  return props.dictSize;
+}
+
+/* #define LZMA_LOG_BSR */
+/* Define it for Intel's CPU */
+
+
+#ifdef LZMA_LOG_BSR
+
+#define kDicLogSizeMaxCompress 30
+
+#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); }
+
+UInt32 GetPosSlot1(UInt32 pos)
+{
+  UInt32 res;
+  BSR2_RET(pos, res);
+  return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+#else
+
+#define kNumLogBits (9 + (int)sizeof(size_t) / 2)
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+  int c = 2, slotFast;
+  g_FastPos[0] = 0;
+  g_FastPos[1] = 1;
+
+  for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++)
+  {
+    UInt32 k = (1 << ((slotFast >> 1) - 1));
+    UInt32 j;
+    for (j = 0; j < k; j++, c++)
+      g_FastPos[c] = (Byte)slotFast;
+  }
+}
+
+#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+  res = p->g_FastPos[pos >> i] + (i * 2); }
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+  p->g_FastPos[pos >> 6] + 12 : \
+  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); }
+
+#endif
+
+
+#define LZMA_NUM_REPS 4
+
+typedef unsigned CState;
+
+typedef struct _COptimal
+{
+  UInt32 price;
+
+  CState state;
+  int prev1IsChar;
+  int prev2;
+
+  UInt32 posPrev2;
+  UInt32 backPrev2;
+
+  UInt32 posPrev;
+  UInt32 backPrev;
+  UInt32 backs[LZMA_NUM_REPS];
+} COptimal;
+
+#define kNumOpts (1 << 12)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+#define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
+
+#define kNumFullDistances (1 << (kEndPosModelIndex / 2))
+
+#ifdef _LZMA_PROB32
+#define CLzmaProb UInt32
+#else
+#define CLzmaProb UInt16
+#endif
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+typedef struct
+{
+  CLzmaProb choice;
+  CLzmaProb choice2;
+  CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits];
+  CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits];
+  CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+typedef struct
+{
+  CLenEnc p;
+  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+  UInt32 tableSize;
+  UInt32 counters[LZMA_NUM_PB_STATES_MAX];
+} CLenPriceEnc;
+
+typedef struct _CRangeEnc
+{
+  UInt32 range;
+  Byte cache;
+  UInt64 low;
+  UInt64 cacheSize;
+  Byte *buf;
+  Byte *bufLim;
+  Byte *bufBase;
+  ISeqOutStream *outStream;
+  UInt64 processed;
+  SRes res;
+} CRangeEnc;
+
+typedef struct _CSeqInStreamBuf
+{
+  ISeqInStream funcTable;
+  const Byte *data;
+  SizeT rem;
+} CSeqInStreamBuf;
+
+static SRes MyRead(void *pp, void *data, size_t *size)
+{
+  size_t curSize = *size;
+  CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp;
+  if (p->rem < curSize)
+    curSize = p->rem;
+  memcpy(data, p->data, curSize);
+  p->rem -= curSize;
+  p->data += curSize;
+  *size = curSize;
+  return SZ_OK;
+}
+
+typedef struct
+{
+  CLzmaProb *litProbs;
+
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 state;
+} CSaveState;
+
+typedef struct _CLzmaEnc
+{
+  IMatchFinder matchFinder;
+  void *matchFinderObj;
+
+  #ifdef COMPRESS_MF_MT
+  Bool mtMode;
+  CMatchFinderMt matchFinderMt;
+  #endif
+
+  CMatchFinder matchFinderBase;
+
+  #ifdef COMPRESS_MF_MT
+  Byte pad[128];
+  #endif
+
+  UInt32 optimumEndIndex;
+  UInt32 optimumCurrentIndex;
+
+  Bool longestMatchWasFound;
+  UInt32 longestMatchLength;
+  UInt32 numDistancePairs;
+
+  COptimal opt[kNumOpts];
+
+  #ifndef LZMA_LOG_BSR
+  Byte g_FastPos[1 << kNumLogBits];
+  #endif
+
+  UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+  UInt32 matchDistances[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+  UInt32 numFastBytes;
+  UInt32 additionalOffset;
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 state;
+
+  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+  UInt32 alignPrices[kAlignTableSize];
+  UInt32 alignPriceCount;
+
+  UInt32 distTableSize;
+
+  unsigned lc, lp, pb;
+  unsigned lpMask, pbMask;
+
+  CLzmaProb *litProbs;
+
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex];
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  unsigned lclp;
+
+  Bool fastMode;
+
+  CRangeEnc rc;
+
+  Bool writeEndMark;
+  UInt64 nowPos64;
+  UInt32 matchPriceCount;
+  Bool finished;
+  Bool multiThread;
+
+  SRes result;
+  UInt32 dictSize;
+  UInt32 matchFinderCycles;
+
+  ISeqInStream *inStream;
+  CSeqInStreamBuf seqBufInStream;
+
+  CSaveState saveState;
+} CLzmaEnc;
+
+void LzmaEnc_SaveState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CSaveState *dest = &p->saveState;
+  int i;
+  dest->lenEnc = p->lenEnc;
+  dest->repLenEnc = p->repLenEnc;
+  dest->state = p->state;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
+    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
+  }
+  for (i = 0; i < kNumLenToPosStates; i++)
+    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
+  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
+  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
+  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
+  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
+  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
+  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
+  memcpy(dest->reps, p->reps, sizeof(p->reps));
+  memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb));
+}
+
+void LzmaEnc_RestoreState(CLzmaEncHandle pp)
+{
+  CLzmaEnc *dest = (CLzmaEnc *)pp;
+  const CSaveState *p = &dest->saveState;
+  int i;
+  dest->lenEnc = p->lenEnc;
+  dest->repLenEnc = p->repLenEnc;
+  dest->state = p->state;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i]));
+    memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i]));
+  }
+  for (i = 0; i < kNumLenToPosStates; i++)
+    memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i]));
+  memcpy(dest->isRep, p->isRep, sizeof(p->isRep));
+  memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0));
+  memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1));
+  memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2));
+  memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders));
+  memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder));
+  memcpy(dest->reps, p->reps, sizeof(p->reps));
+  memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb));
+}
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+
+  if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX ||
+      props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30))
+    return SZ_ERROR_PARAM;
+  p->dictSize = props.dictSize;
+  p->matchFinderCycles = props.mc;
+  {
+    unsigned fb = props.fb;
+    if (fb < 5)
+      fb = 5;
+    if (fb > LZMA_MATCH_LEN_MAX)
+      fb = LZMA_MATCH_LEN_MAX;
+    p->numFastBytes = fb;
+  }
+  p->lc = props.lc;
+  p->lp = props.lp;
+  p->pb = props.pb;
+  p->fastMode = (props.algo == 0);
+  p->matchFinderBase.btMode = props.btMode;
+  {
+    UInt32 numHashBytes = 4;
+    if (props.btMode)
+    {
+      if (props.numHashBytes < 2)
+        numHashBytes = 2;
+      else if (props.numHashBytes < 4)
+        numHashBytes = props.numHashBytes;
+    }
+    p->matchFinderBase.numHashBytes = numHashBytes;
+  }
+
+  p->matchFinderBase.cutValue = props.mc;
+
+  p->writeEndMark = props.writeEndMark;
+
+  #ifdef COMPRESS_MF_MT
+  /*
+  if (newMultiThread != _multiThread)
+  {
+    ReleaseMatchFinder();
+    _multiThread = newMultiThread;
+  }
+  */
+  p->multiThread = (props.numThreads > 1);
+  #endif
+
+  return SZ_OK;
+}
+
+static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
+static const int kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const int kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+/*
+  void UpdateChar() { Index = kLiteralNextStates[Index]; }
+  void UpdateMatch() { Index = kMatchNextStates[Index]; }
+  void UpdateRep() { Index = kRepNextStates[Index]; }
+  void UpdateShortRep() { Index = kShortRepNextStates[Index]; }
+*/
+
+#define IsCharState(s) ((s) < 7)
+
+
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+  p->outStream = 0;
+  p->bufBase = 0;
+}
+
+#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc)
+{
+  if (p->bufBase == 0)
+  {
+    p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE);
+    if (p->bufBase == 0)
+      return 0;
+    p->bufLim = p->bufBase + RC_BUF_SIZE;
+  }
+  return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->bufBase);
+  p->bufBase = 0;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+  /* Stream.Init(); */
+  p->low = 0;
+  p->range = 0xFFFFFFFF;
+  p->cacheSize = 1;
+  p->cache = 0;
+
+  p->buf = p->bufBase;
+
+  p->processed = 0;
+  p->res = SZ_OK;
+}
+
+static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+  size_t num;
+  if (p->res != SZ_OK)
+    return;
+  num = p->buf - p->bufBase;
+  if (num != p->outStream->Write(p->outStream, p->bufBase, num))
+    p->res = SZ_ERROR_WRITE;
+  p->processed += num;
+  p->buf = p->bufBase;
+}
+
+static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+  if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0)
+  {
+    Byte temp = p->cache;
+    do
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(temp + (Byte)(p->low >> 32));
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      temp = 0xFF;
+    }
+    while (--p->cacheSize != 0);
+    p->cache = (Byte)((UInt32)p->low >> 24);
+  }
+  p->cacheSize++;
+  p->low = (UInt32)p->low << 8;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+  int i;
+  for (i = 0; i < 5; i++)
+    RangeEnc_ShiftLow(p);
+}
+
+static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits)
+{
+  do
+  {
+    p->range >>= 1;
+    p->low += p->range & (0 - ((value >> --numBits) & 1));
+    if (p->range < kTopValue)
+    {
+      p->range <<= 8;
+      RangeEnc_ShiftLow(p);
+    }
+  }
+  while (numBits != 0);
+}
+
+static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol)
+{
+  UInt32 ttt = *prob;
+  UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt;
+  if (symbol == 0)
+  {
+    p->range = newBound;
+    ttt += (kBitModelTotal - ttt) >> kNumMoveBits;
+  }
+  else
+  {
+    p->low += newBound;
+    p->range -= newBound;
+    ttt -= ttt >> kNumMoveBits;
+  }
+  *prob = (CLzmaProb)ttt;
+  if (p->range < kTopValue)
+  {
+    p->range <<= 8;
+    RangeEnc_ShiftLow(p);
+  }
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol)
+{
+  symbol |= 0x100;
+  do
+  {
+    RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1);
+    symbol <<= 1;
+  }
+  while (symbol < 0x10000);
+}
+
+static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte)
+{
+  UInt32 offs = 0x100;
+  symbol |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1);
+    symbol <<= 1;
+    offs &= ~(matchByte ^ symbol);
+  }
+  while (symbol < 0x10000);
+}
+
+void LzmaEnc_InitPriceTables(UInt32 *ProbPrices)
+{
+  UInt32 i;
+  for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits))
+  {
+    const int kCyclesBits = kNumBitPriceShiftBits;
+    UInt32 w = i;
+    UInt32 bitCount = 0;
+    int j;
+    for (j = 0; j < kCyclesBits; j++)
+    {
+      w = w * w;
+      bitCount <<= 1;
+      while (w >= ((UInt32)1 << 16))
+      {
+        w >>= 1;
+        bitCount++;
+      }
+    }
+    ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+  }
+}
+
+
+#define GET_PRICE(prob, symbol) \
+  p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICEa(prob, symbol) \
+  ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  symbol |= 0x100;
+  do
+  {
+    price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1);
+    symbol <<= 1;
+  }
+  while (symbol < 0x10000);
+  return price;
+};
+
+static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 offs = 0x100;
+  symbol |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
+    symbol <<= 1;
+    offs &= ~(matchByte ^ symbol);
+  }
+  while (symbol < 0x10000);
+  return price;
+};
+
+
+static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
+{
+  UInt32 m = 1;
+  int i;
+  for (i = numBitLevels; i != 0 ;)
+  {
+    UInt32 bit;
+    i--;
+    bit = (symbol >> i) & 1;
+    RangeEnc_EncodeBit(rc, probs + m, bit);
+    m = (m << 1) | bit;
+  }
+};
+
+static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol)
+{
+  UInt32 m = 1;
+  int i;
+  for (i = 0; i < numBitLevels; i++)
+  {
+    UInt32 bit = symbol & 1;
+    RangeEnc_EncodeBit(rc, probs + m, bit);
+    m = (m << 1) | bit;
+    symbol >>= 1;
+  }
+}
+
+static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  symbol |= (1 << numBitLevels);
+  while (symbol != 1)
+  {
+    price += GET_PRICEa(probs[symbol >> 1], symbol & 1);
+    symbol >>= 1;
+  }
+  return price;
+}
+
+static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 m = 1;
+  int i;
+  for (i = numBitLevels; i != 0; i--)
+  {
+    UInt32 bit = symbol & 1;
+    symbol >>= 1;
+    price += GET_PRICEa(probs[m], bit);
+    m = (m << 1) | bit;
+  }
+  return price;
+}
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+  unsigned i;
+  p->choice = p->choice2 = kProbInitValue;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++)
+    p->low[i] = kProbInitValue;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++)
+    p->mid[i] = kProbInitValue;
+  for (i = 0; i < kLenNumHighSymbols; i++)
+    p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState)
+{
+  if (symbol < kLenNumLowSymbols)
+  {
+    RangeEnc_EncodeBit(rc, &p->choice, 0);
+    RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol);
+  }
+  else
+  {
+    RangeEnc_EncodeBit(rc, &p->choice, 1);
+    if (symbol < kLenNumLowSymbols + kLenNumMidSymbols)
+    {
+      RangeEnc_EncodeBit(rc, &p->choice2, 0);
+      RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols);
+    }
+    else
+    {
+      RangeEnc_EncodeBit(rc, &p->choice2, 1);
+      RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols);
+    }
+  }
+}
+
+static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices)
+{
+  UInt32 a0 = GET_PRICE_0a(p->choice);
+  UInt32 a1 = GET_PRICE_1a(p->choice);
+  UInt32 b0 = a1 + GET_PRICE_0a(p->choice2);
+  UInt32 b1 = a1 + GET_PRICE_1a(p->choice2);
+  UInt32 i = 0;
+  for (i = 0; i < kLenNumLowSymbols; i++)
+  {
+    if (i >= numSymbols)
+      return;
+    prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices);
+  }
+  for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++)
+  {
+    if (i >= numSymbols)
+      return;
+    prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices);
+  }
+  for (; i < numSymbols; i++)
+    prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices);
+}
+
+static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices)
+{
+  LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices);
+  p->counters[posState] = p->tableSize;
+}
+
+static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices)
+{
+  UInt32 posState;
+  for (posState = 0; posState < numPosStates; posState++)
+    LenPriceEnc_UpdateTable(p, posState, ProbPrices);
+}
+
+static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices)
+{
+  LenEnc_Encode(&p->p, rc, symbol, posState);
+  if (updatePrice)
+    if (--p->counters[posState] == 0)
+      LenPriceEnc_UpdateTable(p, posState, ProbPrices);
+}
+
+
+
+
+static void MovePos(CLzmaEnc *p, UInt32 num)
+{
+  #ifdef SHOW_STAT
+  ttt += num;
+  printf("\n MovePos %d", num);
+  #endif
+  if (num != 0)
+  {
+    p->additionalOffset += num;
+    p->matchFinder.Skip(p->matchFinderObj, num);
+  }
+}
+
+static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes)
+{
+  UInt32 lenRes = 0, numDistancePairs;
+  numDistancePairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matchDistances);
+  #ifdef SHOW_STAT
+  printf("\n i = %d numPairs = %d    ", ttt, numDistancePairs / 2);
+  if (ttt >= 61994)
+    ttt = ttt;
+
+  ttt++;
+  {
+    UInt32 i;
+  for (i = 0; i < numDistancePairs; i += 2)
+    printf("%2d %6d   | ", p->matchDistances[i], p->matchDistances[i + 1]);
+  }
+  #endif
+  if (numDistancePairs > 0)
+  {
+    lenRes = p->matchDistances[numDistancePairs - 2];
+    if (lenRes == p->numFastBytes)
+    {
+      UInt32 numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) + 1;
+      const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+      UInt32 distance = p->matchDistances[numDistancePairs - 1] + 1;
+      if (numAvail > LZMA_MATCH_LEN_MAX)
+        numAvail = LZMA_MATCH_LEN_MAX;
+
+      {
+        const Byte *pby2 = pby - distance;
+        for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++);
+      }
+    }
+  }
+  p->additionalOffset++;
+  *numDistancePairsRes = numDistancePairs;
+  return lenRes;
+}
+
+
+#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False;
+#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False;
+#define IsShortRep(p) ((p)->backPrev == 0)
+
+static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState)
+{
+  return
+    GET_PRICE_0(p->isRepG0[state]) +
+    GET_PRICE_0(p->isRep0Long[state][posState]);
+}
+
+static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState)
+{
+  UInt32 price;
+  if (repIndex == 0)
+  {
+    price = GET_PRICE_0(p->isRepG0[state]);
+    price += GET_PRICE_1(p->isRep0Long[state][posState]);
+  }
+  else
+  {
+    price = GET_PRICE_1(p->isRepG0[state]);
+    if (repIndex == 1)
+      price += GET_PRICE_0(p->isRepG1[state]);
+    else
+    {
+      price += GET_PRICE_1(p->isRepG1[state]);
+      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+    }
+  }
+  return price;
+}
+
+static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState)
+{
+  return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] +
+    GetPureRepPrice(p, repIndex, state, posState);
+}
+
+static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur)
+{
+  UInt32 posMem = p->opt[cur].posPrev;
+  UInt32 backMem = p->opt[cur].backPrev;
+  p->optimumEndIndex = cur;
+  do
+  {
+    if (p->opt[cur].prev1IsChar)
+    {
+      MakeAsChar(&p->opt[posMem])
+      p->opt[posMem].posPrev = posMem - 1;
+      if (p->opt[cur].prev2)
+      {
+        p->opt[posMem - 1].prev1IsChar = False;
+        p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2;
+        p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2;
+      }
+    }
+    {
+      UInt32 posPrev = posMem;
+      UInt32 backCur = backMem;
+
+      backMem = p->opt[posPrev].backPrev;
+      posMem = p->opt[posPrev].posPrev;
+
+      p->opt[posPrev].backPrev = backCur;
+      p->opt[posPrev].posPrev = cur;
+      cur = posPrev;
+    }
+  }
+  while (cur != 0);
+  *backRes = p->opt[0].backPrev;
+  p->optimumCurrentIndex  = p->opt[0].posPrev;
+  return p->optimumCurrentIndex;
+}
+
+#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300)
+
+static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes)
+{
+  UInt32 numAvailableBytes, lenMain, numDistancePairs;
+  const Byte *data;
+  UInt32 reps[LZMA_NUM_REPS];
+  UInt32 repLens[LZMA_NUM_REPS];
+  UInt32 repMaxIndex, i;
+  UInt32 *matchDistances;
+  Byte currentByte, matchByte;
+  UInt32 posState;
+  UInt32 matchPrice, repMatchPrice;
+  UInt32 lenEnd;
+  UInt32 len;
+  UInt32 normalMatchPrice;
+  UInt32 cur;
+  if (p->optimumEndIndex != p->optimumCurrentIndex)
+  {
+    const COptimal *opt = &p->opt[p->optimumCurrentIndex];
+    UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex;
+    *backRes = opt->backPrev;
+    p->optimumCurrentIndex = opt->posPrev;
+    return lenRes;
+  }
+  p->optimumCurrentIndex = p->optimumEndIndex = 0;
+
+  numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+
+  if (!p->longestMatchWasFound)
+  {
+    lenMain = ReadMatchDistances(p, &numDistancePairs);
+  }
+  else
+  {
+    lenMain = p->longestMatchLength;
+    numDistancePairs = p->numDistancePairs;
+    p->longestMatchWasFound = False;
+  }
+
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  if (numAvailableBytes < 2)
+  {
+    *backRes = (UInt32)(-1);
+    return 1;
+  }
+  if (numAvailableBytes > LZMA_MATCH_LEN_MAX)
+    numAvailableBytes = LZMA_MATCH_LEN_MAX;
+
+  repMaxIndex = 0;
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    UInt32 lenTest;
+    const Byte *data2;
+    reps[i] = p->reps[i];
+    data2 = data - (reps[i] + 1);
+    if (data[0] != data2[0] || data[1] != data2[1])
+    {
+      repLens[i] = 0;
+      continue;
+    }
+    for (lenTest = 2; lenTest < numAvailableBytes && data[lenTest] == data2[lenTest]; lenTest++);
+    repLens[i] = lenTest;
+    if (lenTest > repLens[repMaxIndex])
+      repMaxIndex = i;
+  }
+  if (repLens[repMaxIndex] >= p->numFastBytes)
+  {
+    UInt32 lenRes;
+    *backRes = repMaxIndex;
+    lenRes = repLens[repMaxIndex];
+    MovePos(p, lenRes - 1);
+    return lenRes;
+  }
+
+  matchDistances = p->matchDistances;
+  if (lenMain >= p->numFastBytes)
+  {
+    *backRes = matchDistances[numDistancePairs - 1] + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 1);
+    return lenMain;
+  }
+  currentByte = *data;
+  matchByte = *(data - (reps[0] + 1));
+
+  if (lenMain < 2 && currentByte != matchByte && repLens[repMaxIndex] < 2)
+  {
+    *backRes = (UInt32)-1;
+    return 1;
+  }
+
+  p->opt[0].state = (CState)p->state;
+
+  posState = (position & p->pbMask);
+
+  {
+    const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+    p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) +
+        (!IsCharState(p->state) ?
+          LitEnc_GetPriceMatched(probs, currentByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, currentByte, p->ProbPrices));
+  }
+
+  MakeAsChar(&p->opt[1]);
+
+  matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+  repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+
+  if (matchByte == currentByte)
+  {
+    UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState);
+    if (shortRepPrice < p->opt[1].price)
+    {
+      p->opt[1].price = shortRepPrice;
+      MakeAsShortRep(&p->opt[1]);
+    }
+  }
+  lenEnd = ((lenMain >= repLens[repMaxIndex]) ? lenMain : repLens[repMaxIndex]);
+
+  if (lenEnd < 2)
+  {
+    *backRes = p->opt[1].backPrev;
+    return 1;
+  }
+
+  p->opt[1].posPrev = 0;
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+    p->opt[0].backs[i] = reps[i];
+
+  len = lenEnd;
+  do
+    p->opt[len--].price = kInfinityPrice;
+  while (len >= 2);
+
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    UInt32 repLen = repLens[i];
+    UInt32 price;
+    if (repLen < 2)
+      continue;
+    price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState);
+    do
+    {
+      UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2];
+      COptimal *opt = &p->opt[repLen];
+      if (curAndLenPrice < opt->price)
+      {
+        opt->price = curAndLenPrice;
+        opt->posPrev = 0;
+        opt->backPrev = i;
+        opt->prev1IsChar = False;
+      }
+    }
+    while (--repLen >= 2);
+  }
+
+  normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]);
+
+  len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2);
+  if (len <= lenMain)
+  {
+    UInt32 offs = 0;
+    while (len > matchDistances[offs])
+      offs += 2;
+    for (; ; len++)
+    {
+      COptimal *opt;
+      UInt32 distance = matchDistances[offs + 1];
+
+      UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN];
+      UInt32 lenToPosState = GetLenToPosState(len);
+      if (distance < kNumFullDistances)
+        curAndLenPrice += p->distancesPrices[lenToPosState][distance];
+      else
+      {
+        UInt32 slot;
+        GetPosSlot2(distance, slot);
+        curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot];
+      }
+      opt = &p->opt[len];
+      if (curAndLenPrice < opt->price)
+      {
+        opt->price = curAndLenPrice;
+        opt->posPrev = 0;
+        opt->backPrev = distance + LZMA_NUM_REPS;
+        opt->prev1IsChar = False;
+      }
+      if (len == matchDistances[offs])
+      {
+        offs += 2;
+        if (offs == numDistancePairs)
+          break;
+      }
+    }
+  }
+
+  cur = 0;
+
+    #ifdef SHOW_STAT2
+    if (position >= 0)
+    {
+      unsigned i;
+      printf("\n pos = %4X", position);
+      for (i = cur; i <= lenEnd; i++)
+      printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price);
+    }
+    #endif
+
+  for (;;)
+  {
+    UInt32 numAvailableBytesFull, newLen, numDistancePairs;
+    COptimal *curOpt;
+    UInt32 posPrev;
+    UInt32 state;
+    UInt32 curPrice;
+    Bool nextIsChar;
+    const Byte *data;
+    Byte currentByte, matchByte;
+    UInt32 posState;
+    UInt32 curAnd1Price;
+    COptimal *nextOpt;
+    UInt32 matchPrice, repMatchPrice;
+    UInt32 numAvailableBytes;
+    UInt32 startLen;
+
+    cur++;
+    if (cur == lenEnd)
+      return Backward(p, backRes, cur);
+
+    numAvailableBytesFull = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+    newLen = ReadMatchDistances(p, &numDistancePairs);
+    if (newLen >= p->numFastBytes)
+    {
+      p->numDistancePairs = numDistancePairs;
+      p->longestMatchLength = newLen;
+      p->longestMatchWasFound = True;
+      return Backward(p, backRes, cur);
+    }
+    position++;
+    curOpt = &p->opt[cur];
+    posPrev = curOpt->posPrev;
+    if (curOpt->prev1IsChar)
+    {
+      posPrev--;
+      if (curOpt->prev2)
+      {
+        state = p->opt[curOpt->posPrev2].state;
+        if (curOpt->backPrev2 < LZMA_NUM_REPS)
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
+      else
+        state = p->opt[posPrev].state;
+      state = kLiteralNextStates[state];
+    }
+    else
+      state = p->opt[posPrev].state;
+    if (posPrev == cur - 1)
+    {
+      if (IsShortRep(curOpt))
+        state = kShortRepNextStates[state];
+      else
+        state = kLiteralNextStates[state];
+    }
+    else
+    {
+      UInt32 pos;
+      const COptimal *prevOpt;
+      if (curOpt->prev1IsChar && curOpt->prev2)
+      {
+        posPrev = curOpt->posPrev2;
+        pos = curOpt->backPrev2;
+        state = kRepNextStates[state];
+      }
+      else
+      {
+        pos = curOpt->backPrev;
+        if (pos < LZMA_NUM_REPS)
+          state = kRepNextStates[state];
+        else
+          state = kMatchNextStates[state];
+      }
+      prevOpt = &p->opt[posPrev];
+      if (pos < LZMA_NUM_REPS)
+      {
+        UInt32 i;
+        reps[0] = prevOpt->backs[pos];
+        for (i = 1; i <= pos; i++)
+          reps[i] = prevOpt->backs[i - 1];
+        for (; i < LZMA_NUM_REPS; i++)
+          reps[i] = prevOpt->backs[i];
+      }
+      else
+      {
+        UInt32 i;
+        reps[0] = (pos - LZMA_NUM_REPS);
+        for (i = 1; i < LZMA_NUM_REPS; i++)
+          reps[i] = prevOpt->backs[i - 1];
+      }
+    }
+    curOpt->state = (CState)state;
+
+    curOpt->backs[0] = reps[0];
+    curOpt->backs[1] = reps[1];
+    curOpt->backs[2] = reps[2];
+    curOpt->backs[3] = reps[3];
+
+    curPrice = curOpt->price;
+    nextIsChar = False;
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    currentByte = *data;
+    matchByte = *(data - (reps[0] + 1));
+
+    posState = (position & p->pbMask);
+
+    curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]);
+    {
+      const CLzmaProb *probs = LIT_PROBS(position, *(data - 1));
+      curAnd1Price +=
+        (!IsCharState(state) ?
+          LitEnc_GetPriceMatched(probs, currentByte, matchByte, p->ProbPrices) :
+          LitEnc_GetPrice(probs, currentByte, p->ProbPrices));
+    }
+
+    nextOpt = &p->opt[cur + 1];
+
+    if (curAnd1Price < nextOpt->price)
+    {
+      nextOpt->price = curAnd1Price;
+      nextOpt->posPrev = cur;
+      MakeAsChar(nextOpt);
+      nextIsChar = True;
+    }
+
+    matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]);
+    repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]);
+
+    if (matchByte == currentByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0))
+    {
+      UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState);
+      if (shortRepPrice <= nextOpt->price)
+      {
+        nextOpt->price = shortRepPrice;
+        nextOpt->posPrev = cur;
+        MakeAsShortRep(nextOpt);
+        nextIsChar = True;
+      }
+    }
+
+    {
+      UInt32 temp = kNumOpts - 1 - cur;
+      if (temp <  numAvailableBytesFull)
+        numAvailableBytesFull = temp;
+    }
+    numAvailableBytes = numAvailableBytesFull;
+
+    if (numAvailableBytes < 2)
+      continue;
+    if (numAvailableBytes > p->numFastBytes)
+      numAvailableBytes = p->numFastBytes;
+    if (!nextIsChar && matchByte != currentByte) /* speed optimization */
+    {
+      /* try Literal + rep0 */
+      UInt32 temp;
+      UInt32 lenTest2;
+      const Byte *data2 = data - (reps[0] + 1);
+      UInt32 limit = p->numFastBytes + 1;
+      if (limit > numAvailableBytesFull)
+        limit = numAvailableBytesFull;
+
+      for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++);
+      lenTest2 = temp - 1;
+      if (lenTest2 >= 2)
+      {
+        UInt32 state2 = kLiteralNextStates[state];
+        UInt32 posStateNext = (position + 1) & p->pbMask;
+        UInt32 nextRepMatchPrice = curAnd1Price +
+            GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+            GET_PRICE_1(p->isRep[state2]);
+        /* for (; lenTest2 >= 2; lenTest2--) */
+        {
+          UInt32 curAndLenPrice;
+          COptimal *opt;
+          UInt32 offset = cur + 1 + lenTest2;
+          while (lenEnd < offset)
+            p->opt[++lenEnd].price = kInfinityPrice;
+          curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+          opt = &p->opt[offset];
+          if (curAndLenPrice < opt->price)
+          {
+            opt->price = curAndLenPrice;
+            opt->posPrev = cur + 1;
+            opt->backPrev = 0;
+            opt->prev1IsChar = True;
+            opt->prev2 = False;
+          }
+        }
+      }
+    }
+
+    startLen = 2; /* speed optimization */
+    {
+    UInt32 repIndex;
+    for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++)
+    {
+      UInt32 lenTest;
+      UInt32 lenTestTemp;
+      UInt32 price;
+      const Byte *data2 = data - (reps[repIndex] + 1);
+      if (data[0] != data2[0] || data[1] != data2[1])
+        continue;
+      for (lenTest = 2; lenTest < numAvailableBytes && data[lenTest] == data2[lenTest]; lenTest++);
+      while (lenEnd < cur + lenTest)
+        p->opt[++lenEnd].price = kInfinityPrice;
+      lenTestTemp = lenTest;
+      price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState);
+      do
+      {
+        UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2];
+        COptimal *opt = &p->opt[cur + lenTest];
+        if (curAndLenPrice < opt->price)
+        {
+          opt->price = curAndLenPrice;
+          opt->posPrev = cur;
+          opt->backPrev = repIndex;
+          opt->prev1IsChar = False;
+        }
+      }
+      while (--lenTest >= 2);
+      lenTest = lenTestTemp;
+
+      if (repIndex == 0)
+        startLen = lenTest + 1;
+
+      /* if (_maxMode) */
+        {
+          UInt32 lenTest2 = lenTest + 1;
+          UInt32 limit = lenTest2 + p->numFastBytes;
+          UInt32 nextRepMatchPrice;
+          if (limit > numAvailableBytesFull)
+            limit = numAvailableBytesFull;
+          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
+          lenTest2 -= lenTest + 1;
+          if (lenTest2 >= 2)
+          {
+            UInt32 state2 = kRepNextStates[state];
+            UInt32 posStateNext = (position + lenTest) & p->pbMask;
+            UInt32 curAndLenCharPrice =
+                price + p->repLenEnc.prices[posState][lenTest - 2] +
+                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
+                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
+                    data[lenTest], data2[lenTest], p->ProbPrices);
+            state2 = kLiteralNextStates[state2];
+            posStateNext = (position + lenTest + 1) & p->pbMask;
+            nextRepMatchPrice = curAndLenCharPrice +
+                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+                GET_PRICE_1(p->isRep[state2]);
+
+            /* for (; lenTest2 >= 2; lenTest2--) */
+            {
+              UInt32 curAndLenPrice;
+              COptimal *opt;
+              UInt32 offset = cur + lenTest + 1 + lenTest2;
+              while (lenEnd < offset)
+                p->opt[++lenEnd].price = kInfinityPrice;
+              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+              opt = &p->opt[offset];
+              if (curAndLenPrice < opt->price)
+              {
+                opt->price = curAndLenPrice;
+                opt->posPrev = cur + lenTest + 1;
+                opt->backPrev = 0;
+                opt->prev1IsChar = True;
+                opt->prev2 = True;
+                opt->posPrev2 = cur;
+                opt->backPrev2 = repIndex;
+              }
+            }
+          }
+        }
+    }
+    }
+    /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */
+    if (newLen > numAvailableBytes)
+    {
+      newLen = numAvailableBytes;
+      for (numDistancePairs = 0; newLen > matchDistances[numDistancePairs]; numDistancePairs += 2);
+      matchDistances[numDistancePairs] = newLen;
+      numDistancePairs += 2;
+    }
+    if (newLen >= startLen)
+    {
+      UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]);
+      UInt32 offs, curBack, posSlot;
+      UInt32 lenTest;
+      while (lenEnd < cur + newLen)
+        p->opt[++lenEnd].price = kInfinityPrice;
+
+      offs = 0;
+      while (startLen > matchDistances[offs])
+        offs += 2;
+      curBack = matchDistances[offs + 1];
+      GetPosSlot2(curBack, posSlot);
+      for (lenTest = /*2*/ startLen; ; lenTest++)
+      {
+        UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN];
+        UInt32 lenToPosState = GetLenToPosState(lenTest);
+        COptimal *opt;
+        if (curBack < kNumFullDistances)
+          curAndLenPrice += p->distancesPrices[lenToPosState][curBack];
+        else
+          curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask];
+
+        opt = &p->opt[cur + lenTest];
+        if (curAndLenPrice < opt->price)
+        {
+          opt->price = curAndLenPrice;
+          opt->posPrev = cur;
+          opt->backPrev = curBack + LZMA_NUM_REPS;
+          opt->prev1IsChar = False;
+        }
+
+        if (/*_maxMode && */lenTest == matchDistances[offs])
+        {
+          /* Try Match + Literal + Rep0 */
+          const Byte *data2 = data - (curBack + 1);
+          UInt32 lenTest2 = lenTest + 1;
+          UInt32 limit = lenTest2 + p->numFastBytes;
+          UInt32 nextRepMatchPrice;
+          if (limit > numAvailableBytesFull)
+            limit = numAvailableBytesFull;
+          for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++);
+          lenTest2 -= lenTest + 1;
+          if (lenTest2 >= 2)
+          {
+            UInt32 state2 = kMatchNextStates[state];
+            UInt32 posStateNext = (position + lenTest) & p->pbMask;
+            UInt32 curAndLenCharPrice = curAndLenPrice +
+                GET_PRICE_0(p->isMatch[state2][posStateNext]) +
+                LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]),
+                    data[lenTest], data2[lenTest], p->ProbPrices);
+            state2 = kLiteralNextStates[state2];
+            posStateNext = (posStateNext + 1) & p->pbMask;
+            nextRepMatchPrice = curAndLenCharPrice +
+                GET_PRICE_1(p->isMatch[state2][posStateNext]) +
+                GET_PRICE_1(p->isRep[state2]);
+
+            /* for (; lenTest2 >= 2; lenTest2--) */
+            {
+              UInt32 offset = cur + lenTest + 1 + lenTest2;
+              UInt32 curAndLenPrice;
+              COptimal *opt;
+              while (lenEnd < offset)
+                p->opt[++lenEnd].price = kInfinityPrice;
+              curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext);
+              opt = &p->opt[offset];
+              if (curAndLenPrice < opt->price)
+              {
+                opt->price = curAndLenPrice;
+                opt->posPrev = cur + lenTest + 1;
+                opt->backPrev = 0;
+                opt->prev1IsChar = True;
+                opt->prev2 = True;
+                opt->posPrev2 = cur;
+                opt->backPrev2 = curBack + LZMA_NUM_REPS;
+              }
+            }
+          }
+          offs += 2;
+          if (offs == numDistancePairs)
+            break;
+          curBack = matchDistances[offs + 1];
+          if (curBack >= kNumFullDistances)
+            GetPosSlot2(curBack, posSlot);
+        }
+      }
+    }
+  }
+}
+
+#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist))
+
+static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes)
+{
+  UInt32 numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+  UInt32 lenMain, numDistancePairs;
+  const Byte *data;
+  UInt32 repLens[LZMA_NUM_REPS];
+  UInt32 repMaxIndex, i;
+  UInt32 *matchDistances;
+  UInt32 backMain;
+
+  if (!p->longestMatchWasFound)
+  {
+    lenMain = ReadMatchDistances(p, &numDistancePairs);
+  }
+  else
+  {
+    lenMain = p->longestMatchLength;
+    numDistancePairs = p->numDistancePairs;
+    p->longestMatchWasFound = False;
+  }
+
+  data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+  if (numAvailableBytes > LZMA_MATCH_LEN_MAX)
+    numAvailableBytes = LZMA_MATCH_LEN_MAX;
+  if (numAvailableBytes < 2)
+  {
+    *backRes = (UInt32)(-1);
+    return 1;
+  }
+
+  repMaxIndex = 0;
+
+  for (i = 0; i < LZMA_NUM_REPS; i++)
+  {
+    const Byte *data2 = data - (p->reps[i] + 1);
+    UInt32 len;
+    if (data[0] != data2[0] || data[1] != data2[1])
+    {
+      repLens[i] = 0;
+      continue;
+    }
+    for (len = 2; len < numAvailableBytes && data[len] == data2[len]; len++);
+    if (len >= p->numFastBytes)
+    {
+      *backRes = i;
+      MovePos(p, len - 1);
+      return len;
+    }
+    repLens[i] = len;
+    if (len > repLens[repMaxIndex])
+      repMaxIndex = i;
+  }
+  matchDistances = p->matchDistances;
+  if (lenMain >= p->numFastBytes)
+  {
+    *backRes = matchDistances[numDistancePairs - 1] + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 1);
+    return lenMain;
+  }
+
+  backMain = 0; /* for GCC */
+  if (lenMain >= 2)
+  {
+    backMain = matchDistances[numDistancePairs - 1];
+    while (numDistancePairs > 2 && lenMain == matchDistances[numDistancePairs - 4] + 1)
+    {
+      if (!ChangePair(matchDistances[numDistancePairs - 3], backMain))
+        break;
+      numDistancePairs -= 2;
+      lenMain = matchDistances[numDistancePairs - 2];
+      backMain = matchDistances[numDistancePairs - 1];
+    }
+    if (lenMain == 2 && backMain >= 0x80)
+      lenMain = 1;
+  }
+
+  if (repLens[repMaxIndex] >= 2)
+  {
+    if (repLens[repMaxIndex] + 1 >= lenMain ||
+        (repLens[repMaxIndex] + 2 >= lenMain && (backMain > (1 << 9))) ||
+        (repLens[repMaxIndex] + 3 >= lenMain && (backMain > (1 << 15))))
+    {
+      UInt32 lenRes;
+      *backRes = repMaxIndex;
+      lenRes = repLens[repMaxIndex];
+      MovePos(p, lenRes - 1);
+      return lenRes;
+    }
+  }
+
+  if (lenMain >= 2 && numAvailableBytes > 2)
+  {
+    UInt32 i;
+    numAvailableBytes = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+    p->longestMatchLength = ReadMatchDistances(p, &p->numDistancePairs);
+    if (p->longestMatchLength >= 2)
+    {
+      UInt32 newDistance = matchDistances[p->numDistancePairs - 1];
+      if ((p->longestMatchLength >= lenMain && newDistance < backMain) ||
+          (p->longestMatchLength == lenMain + 1 && !ChangePair(backMain, newDistance)) ||
+          (p->longestMatchLength > lenMain + 1) ||
+          (p->longestMatchLength + 1 >= lenMain && lenMain >= 3 && ChangePair(newDistance, backMain)))
+      {
+        p->longestMatchWasFound = True;
+        *backRes = (UInt32)(-1);
+        return 1;
+      }
+    }
+    data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
+    for (i = 0; i < LZMA_NUM_REPS; i++)
+    {
+      UInt32 len;
+      const Byte *data2 = data - (p->reps[i] + 1);
+      if (data[1] != data2[1] || data[2] != data2[2])
+      {
+        repLens[i] = 0;
+        continue;
+      }
+      for (len = 2; len < numAvailableBytes && data[len] == data2[len]; len++);
+      if (len + 1 >= lenMain)
+      {
+        p->longestMatchWasFound = True;
+        *backRes = (UInt32)(-1);
+        return 1;
+      }
+    }
+    *backRes = backMain + LZMA_NUM_REPS;
+    MovePos(p, lenMain - 2);
+    return lenMain;
+  }
+  *backRes = (UInt32)(-1);
+  return 1;
+}
+
+static void WriteEndMarker(CLzmaEnc *p, UInt32 posState)
+{
+  UInt32 len;
+  RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
+  RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
+  p->state = kMatchNextStates[p->state];
+  len = LZMA_MATCH_LEN_MIN;
+  LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+  RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1);
+  RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits);
+  RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask);
+}
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+  if (p->result != SZ_OK)
+    return p->result;
+  if (p->rc.res != SZ_OK)
+    p->result = SZ_ERROR_WRITE;
+  if (p->matchFinderBase.result != SZ_OK)
+    p->result = SZ_ERROR_READ;
+  if (p->result != SZ_OK)
+    p->finished = True;
+  return p->result;
+}
+
+static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+{
+  /* ReleaseMFStream(); */
+  p->finished = True;
+  if (p->writeEndMark)
+    WriteEndMarker(p, nowPos & p->pbMask);
+  RangeEnc_FlushData(&p->rc);
+  RangeEnc_FlushStream(&p->rc);
+  return CheckErrors(p);
+}
+
+static void FillAlignPrices(CLzmaEnc *p)
+{
+  UInt32 i;
+  for (i = 0; i < kAlignTableSize; i++)
+    p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+  p->alignPriceCount = 0;
+}
+
+static void FillDistancesPrices(CLzmaEnc *p)
+{
+  UInt32 tempPrices[kNumFullDistances];
+  UInt32 i, lenToPosState;
+  for (i = kStartPosModelIndex; i < kNumFullDistances; i++)
+  {
+    UInt32 posSlot = GetPosSlot1(i);
+    UInt32 footerBits = ((posSlot >> 1) - 1);
+    UInt32 base = ((2 | (posSlot & 1)) << footerBits);
+    tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices);
+  }
+
+  for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++)
+  {
+    UInt32 posSlot;
+    const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState];
+    UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState];
+    for (posSlot = 0; posSlot < p->distTableSize; posSlot++)
+      posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices);
+    for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++)
+      posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+
+    {
+      UInt32 *distancesPrices = p->distancesPrices[lenToPosState];
+      UInt32 i;
+      for (i = 0; i < kStartPosModelIndex; i++)
+        distancesPrices[i] = posSlotPrices[i];
+      for (; i < kNumFullDistances; i++)
+        distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i];
+    }
+  }
+  p->matchPriceCount = 0;
+}
+
+void LzmaEnc_Construct(CLzmaEnc *p)
+{
+  RangeEnc_Construct(&p->rc);
+  MatchFinder_Construct(&p->matchFinderBase);
+  #ifdef COMPRESS_MF_MT
+  MatchFinderMt_Construct(&p->matchFinderMt);
+  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
+  #endif
+
+  {
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    LzmaEnc_SetProps(p, &props);
+  }
+
+  #ifndef LZMA_LOG_BSR
+  LzmaEnc_FastPosInit(p->g_FastPos);
+  #endif
+
+  LzmaEnc_InitPriceTables(p->ProbPrices);
+  p->litProbs = 0;
+  p->saveState.litProbs = 0;
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc)
+{
+  void *p;
+  p = alloc->Alloc(alloc, sizeof(CLzmaEnc));
+  if (p != 0)
+    LzmaEnc_Construct((CLzmaEnc *)p);
+  return p;
+}
+
+void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc)
+{
+  alloc->Free(alloc, p->litProbs);
+  alloc->Free(alloc, p->saveState.litProbs);
+  p->litProbs = 0;
+  p->saveState.litProbs = 0;
+}
+
+void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  #ifdef COMPRESS_MF_MT
+  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+  #endif
+  MatchFinder_Free(&p->matchFinderBase, allocBig);
+  LzmaEnc_FreeLits(p, alloc);
+  RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  alloc->Free(alloc, p);
+}
+
+static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize)
+{
+  UInt32 nowPos32, startPos32;
+  if (p->inStream != 0)
+  {
+    p->matchFinderBase.stream = p->inStream;
+    p->matchFinder.Init(p->matchFinderObj);
+    p->inStream = 0;
+  }
+
+  if (p->finished)
+    return p->result;
+  RINOK(CheckErrors(p));
+
+  nowPos32 = (UInt32)p->nowPos64;
+  startPos32 = nowPos32;
+
+  if (p->nowPos64 == 0)
+  {
+    UInt32 numDistancePairs;
+    Byte curByte;
+    if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+      return Flush(p, nowPos32);
+    ReadMatchDistances(p, &numDistancePairs);
+    RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0);
+    p->state = kLiteralNextStates[p->state];
+    curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset);
+    LitEnc_Encode(&p->rc, p->litProbs, curByte);
+    p->additionalOffset--;
+    nowPos32++;
+  }
+
+  if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0)
+  for (;;)
+  {
+    UInt32 pos, len, posState;
+
+    if (p->fastMode)
+      len = GetOptimumFast(p, &pos);
+    else
+      len = GetOptimum(p, nowPos32, &pos);
+
+    #ifdef SHOW_STAT2
+    printf("\n pos = %4X,   len = %d   pos = %d", nowPos32, len, pos);
+    #endif
+
+    posState = nowPos32 & p->pbMask;
+    if (len == 1 && pos == 0xFFFFFFFF)
+    {
+      Byte curByte;
+      CLzmaProb *probs;
+      const Byte *data;
+
+      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0);
+      data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+      curByte = *data;
+      probs = LIT_PROBS(nowPos32, *(data - 1));
+      if (IsCharState(p->state))
+        LitEnc_Encode(&p->rc, probs, curByte);
+      else
+        LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1));
+      p->state = kLiteralNextStates[p->state];
+    }
+    else
+    {
+      RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1);
+      if (pos < LZMA_NUM_REPS)
+      {
+        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1);
+        if (pos == 0)
+        {
+          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0);
+          RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1));
+        }
+        else
+        {
+          UInt32 distance = p->reps[pos];
+          RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1);
+          if (pos == 1)
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0);
+          else
+          {
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1);
+            RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2);
+            if (pos == 3)
+              p->reps[3] = p->reps[2];
+            p->reps[2] = p->reps[1];
+          }
+          p->reps[1] = p->reps[0];
+          p->reps[0] = distance;
+        }
+        if (len == 1)
+          p->state = kShortRepNextStates[p->state];
+        else
+        {
+          LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+          p->state = kRepNextStates[p->state];
+        }
+      }
+      else
+      {
+        UInt32 posSlot;
+        RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0);
+        p->state = kMatchNextStates[p->state];
+        LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices);
+        pos -= LZMA_NUM_REPS;
+        GetPosSlot(pos, posSlot);
+        RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot);
+
+        if (posSlot >= kStartPosModelIndex)
+        {
+          UInt32 footerBits = ((posSlot >> 1) - 1);
+          UInt32 base = ((2 | (posSlot & 1)) << footerBits);
+          UInt32 posReduced = pos - base;
+
+          if (posSlot < kEndPosModelIndex)
+            RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced);
+          else
+          {
+            RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits);
+            RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask);
+            p->alignPriceCount++;
+          }
+        }
+        p->reps[3] = p->reps[2];
+        p->reps[2] = p->reps[1];
+        p->reps[1] = p->reps[0];
+        p->reps[0] = pos;
+        p->matchPriceCount++;
+      }
+    }
+    p->additionalOffset -= len;
+    nowPos32 += len;
+    if (p->additionalOffset == 0)
+    {
+      UInt32 processed;
+      if (!p->fastMode)
+      {
+        if (p->matchPriceCount >= (1 << 7))
+          FillDistancesPrices(p);
+        if (p->alignPriceCount >= kAlignTableSize)
+          FillAlignPrices(p);
+      }
+      if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0)
+        break;
+      processed = nowPos32 - startPos32;
+      if (useLimits)
+      {
+        if (processed + kNumOpts + 300 >= maxUnpackSize ||
+            RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize)
+          break;
+      }
+      else if (processed >= (1 << 15))
+      {
+        p->nowPos64 += nowPos32 - startPos32;
+        return CheckErrors(p);
+      }
+    }
+  }
+  p->nowPos64 += nowPos32 - startPos32;
+  return Flush(p, nowPos32);
+}
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  UInt32 beforeSize = kNumOpts;
+  Bool btMode;
+  if (!RangeEnc_Alloc(&p->rc, alloc))
+    return SZ_ERROR_MEM;
+  btMode = (p->matchFinderBase.btMode != 0);
+  #ifdef COMPRESS_MF_MT
+  p->mtMode = (p->multiThread && !p->fastMode && btMode);
+  #endif
+
+  {
+    unsigned lclp = p->lc + p->lp;
+    if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp)
+    {
+      LzmaEnc_FreeLits(p, alloc);
+      p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
+      p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb));
+      if (p->litProbs == 0 || p->saveState.litProbs == 0)
+      {
+        LzmaEnc_FreeLits(p, alloc);
+        return SZ_ERROR_MEM;
+      }
+      p->lclp = lclp;
+    }
+  }
+
+  p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit);
+
+  if (beforeSize + p->dictSize < keepWindowSize)
+    beforeSize = keepWindowSize - p->dictSize;
+
+  #ifdef COMPRESS_MF_MT
+  if (p->mtMode)
+  {
+    RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig));
+    p->matchFinderObj = &p->matchFinderMt;
+    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+  }
+  else
+  #endif
+  {
+    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+      return SZ_ERROR_MEM;
+    p->matchFinderObj = &p->matchFinderBase;
+    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+  }
+  return SZ_OK;
+}
+
+void LzmaEnc_Init(CLzmaEnc *p)
+{
+  UInt32 i;
+  p->state = 0;
+  for(i = 0 ; i < LZMA_NUM_REPS; i++)
+    p->reps[i] = 0;
+
+  RangeEnc_Init(&p->rc);
+
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    UInt32 j;
+    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+    {
+      p->isMatch[i][j] = kProbInitValue;
+      p->isRep0Long[i][j] = kProbInitValue;
+    }
+    p->isRep[i] = kProbInitValue;
+    p->isRepG0[i] = kProbInitValue;
+    p->isRepG1[i] = kProbInitValue;
+    p->isRepG2[i] = kProbInitValue;
+  }
+
+  {
+    UInt32 num = 0x300 << (p->lp + p->lc);
+    for (i = 0; i < num; i++)
+      p->litProbs[i] = kProbInitValue;
+  }
+
+  {
+    for (i = 0; i < kNumLenToPosStates; i++)
+    {
+      CLzmaProb *probs = p->posSlotEncoder[i];
+      UInt32 j;
+      for (j = 0; j < (1 << kNumPosSlotBits); j++)
+        probs[j] = kProbInitValue;
+    }
+  }
+  {
+    for(i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
+      p->posEncoders[i] = kProbInitValue;
+  }
+
+  LenEnc_Init(&p->lenEnc.p);
+  LenEnc_Init(&p->repLenEnc.p);
+
+  for (i = 0; i < (1 << kNumAlignBits); i++)
+    p->posAlignEncoder[i] = kProbInitValue;
+
+  p->longestMatchWasFound = False;
+  p->optimumEndIndex = 0;
+  p->optimumCurrentIndex = 0;
+  p->additionalOffset = 0;
+
+  p->pbMask = (1 << p->pb) - 1;
+  p->lpMask = (1 << p->lp) - 1;
+}
+
+void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+  if (!p->fastMode)
+  {
+    FillDistancesPrices(p);
+    FillAlignPrices(p);
+  }
+
+  p->lenEnc.tableSize =
+  p->repLenEnc.tableSize =
+      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices);
+}
+
+static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  UInt32 i;
+  for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++)
+    if (p->dictSize <= ((UInt32)1 << i))
+      break;
+  p->distTableSize = i * 2;
+
+  p->finished = False;
+  p->result = SZ_OK;
+  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
+  LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  p->nowPos64 = 0;
+  return SZ_OK;
+}
+
+static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->inStream = inStream;
+  p->rc.outStream = outStream;
+  return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+}
+
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
+    ISeqInStream *inStream, UInt32 keepWindowSize,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->inStream = inStream;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+{
+  p->seqBufInStream.funcTable.Read = MyRead;
+  p->seqBufInStream.data = src;
+  p->seqBufInStream.rem = srcLen;
+}
+
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+  p->inStream = &p->seqBufInStream.funcTable;
+  return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+}
+
+void LzmaEnc_Finish(CLzmaEncHandle pp)
+{
+  #ifdef COMPRESS_MF_MT
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  if (p->mtMode)
+    MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+  #endif
+}
+
+typedef struct _CSeqOutStreamBuf
+{
+  ISeqOutStream funcTable;
+  Byte *data;
+  SizeT rem;
+  Bool overflow;
+} CSeqOutStreamBuf;
+
+static size_t MyWrite(void *pp, const void *data, size_t size)
+{
+  CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp;
+  if (p->rem < size)
+  {
+    size = p->rem;
+    p->overflow = True;
+  }
+  memcpy(p->data, data, size);
+  p->rem -= size;
+  p->data += size;
+  return size;
+}
+
+
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit,
+    Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  UInt64 nowPos64;
+  SRes res;
+  CSeqOutStreamBuf outStream;
+
+  outStream.funcTable.Write = MyWrite;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = False;
+  p->finished = False;
+  p->result = SZ_OK;
+
+  if (reInit)
+    LzmaEnc_Init(p);
+  LzmaEnc_InitPrices(p);
+  nowPos64 = p->nowPos64;
+  RangeEnc_Init(&p->rc);
+  p->rc.outStream = &outStream.funcTable;
+
+  res = LzmaEnc_CodeOneBlock(pp, True, desiredPackSize, *unpackSize);
+
+  *unpackSize = (UInt32)(p->nowPos64 - nowPos64);
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+
+  return res;
+}
+
+SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
+    ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  SRes res = SZ_OK;
+
+  #ifdef COMPRESS_MF_MT
+  Byte allocaDummy[0x300];
+  int i = 0;
+  for (i = 0; i < 16; i++)
+    allocaDummy[i] = (Byte)i;
+  #endif
+
+  RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig));
+
+  for (;;)
+  {
+    res = LzmaEnc_CodeOneBlock(pp, False, 0, 0);
+    if (res != SZ_OK || p->finished != 0)
+      break;
+    if (progress != 0)
+    {
+      res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc));
+      if (res != SZ_OK)
+      {
+        res = SZ_ERROR_PROGRESS;
+        break;
+      }
+    }
+  }
+  LzmaEnc_Finish(pp);
+  return res;
+}
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  int i;
+  UInt32 dictSize = p->dictSize;
+  if (*size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_PARAM;
+  *size = LZMA_PROPS_SIZE;
+  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+  for (i = 11; i <= 30; i++)
+  {
+    if (dictSize <= ((UInt32)2 << i))
+    {
+      dictSize = (2 << i);
+      break;
+    }
+    if (dictSize <= ((UInt32)3 << i))
+    {
+      dictSize = (3 << i);
+      break;
+    }
+  }
+
+  for (i = 0; i < 4; i++)
+    props[1 + i] = (Byte)(dictSize >> (8 * i));
+  return SZ_OK;
+}
+
+SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  SRes res;
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+
+  CSeqOutStreamBuf outStream;
+
+  LzmaEnc_SetInputBuf(p, src, srcLen);
+
+  outStream.funcTable.Write = MyWrite;
+  outStream.data = dest;
+  outStream.rem = *destLen;
+  outStream.overflow = False;
+
+  p->writeEndMark = writeEndMark;
+  res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable,
+      progress, alloc, allocBig);
+
+  *destLen -= outStream.rem;
+  if (outStream.overflow)
+    return SZ_ERROR_OUTPUT_EOF;
+  return res;
+}
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig)
+{
+  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
+  SRes res;
+  if (p == 0)
+    return SZ_ERROR_MEM;
+
+  res = LzmaEnc_SetProps(p, props);
+  if (res == SZ_OK)
+  {
+    res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize);
+    if (res == SZ_OK)
+      res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen,
+          writeEndMark, progress, alloc, allocBig);
+  }
+
+  LzmaEnc_Destroy(p, alloc, allocBig);
+  return res;
+}
diff --git a/util/i386/pc/grub-mkimage.c b/util/i386/pc/grub-mkimage.c
index 189ec4e..e992655 100644
--- a/util/i386/pc/grub-mkimage.c
+++ b/util/i386/pc/grub-mkimage.c
@@ -36,12 +36,22 @@
 #define _GNU_SOURCE	1
 #include <getopt.h>
 
+#if defined(ENABLE_LZO)
+
 #if defined(HAVE_LZO_LZO1X_H)
 # include <lzo/lzo1x.h>
 #elif defined(HAVE_LZO1X_H)
 # include <lzo1x.h>
 #endif
 
+#elif defined(ENABLE_LZMA)
+
+#include <grub/lib/LzmaEnc.h>
+
+#endif
+
+#if defined(ENABLE_LZO)
+
 static void
 compress_kernel (char *kernel_img, size_t kernel_size,
 		 char **core_img, size_t *core_size)
@@ -76,6 +86,48 @@ compress_kernel (char *kernel_img, size_t kernel_size,
   *core_size = (size_t) size + GRUB_KERNEL_MACHINE_RAW_SIZE;
 }
 
+#elif defined(ENABLE_LZMA)
+
+static void *SzAlloc(void *p, size_t size) { p = p; return xmalloc(size); }
+static void SzFree(void *p, void *address) { p = p; free(address); }
+static ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+static void
+compress_kernel (char *kernel_img, size_t kernel_size,
+		 char **core_img, size_t *core_size)
+{
+  CLzmaEncProps props;
+  unsigned char out_props[5];
+  size_t out_props_size = 5;
+
+  LzmaEncProps_Init(&props);
+  props.dictSize = 1 << 16;
+  props.lc = 3;
+  props.lp = 0;
+  props.pb = 2;
+  props.numThreads = 1;
+
+  grub_util_info ("kernel_img=%p, kernel_size=0x%x", kernel_img, kernel_size);
+  if (kernel_size < GRUB_KERNEL_MACHINE_RAW_SIZE)
+    grub_util_error ("the core image is too small");
+
+  *core_img = xmalloc (kernel_size);
+  memcpy (*core_img, kernel_img, GRUB_KERNEL_MACHINE_RAW_SIZE);
+
+  *core_size = kernel_size - GRUB_KERNEL_MACHINE_RAW_SIZE;
+  if (LzmaEncode((unsigned char *) *core_img + GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 core_size,
+                 (unsigned char *) kernel_img + GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 kernel_size - GRUB_KERNEL_MACHINE_RAW_SIZE,
+                 &props, out_props, &out_props_size,
+                 0, NULL, &g_Alloc, &g_Alloc) != SZ_OK)
+    grub_util_error ("cannot compress the kernel image");
+
+  *core_size += GRUB_KERNEL_MACHINE_RAW_SIZE;
+}
+
+#endif
+
 static void
 generate_image (const char *dir, char *prefix, FILE *out, char *mods[], char *memdisk_path)
 {

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 13:39 [PATCH] LZMA support in i386-pc kernel Bean
  2008-07-02 14:42 ` Robert Millan
@ 2008-07-02 17:50 ` Vesa Jääskeläinen
  2008-07-02 18:27   ` Bean
  2008-07-03 19:37 ` Isaac Dupree
  2 siblings, 1 reply; 14+ messages in thread
From: Vesa Jääskeläinen @ 2008-07-02 17:50 UTC (permalink / raw)
  To: The development of GRUB 2

Bean wrote:
> Hi,
> 
> This patch add support for lzma decompression. The assembly code
> lzma_decode.S is manually optimized to reduce size. The result decoder
> is tiny, only 416 bytes longer than the lzo version.

Not bad.

> I also include lzma encode from the LZMA SDK. grub needs to use the
> ANSI-C version of encoder, which is only present in the latest 4.58
> beta. I can't find ready to use shared library in most distro.
> Including the encoder/decoder has advantages as well. We don't need to
> worry about the host os, and lzma encoder/decoder can be used in other
> place, like font compression.

We can load more complete decoder module after we can load modules and 
leave more optimized version in boot code.

> I use lzma as default, it's still possible to use the old lzo
> compression, you just need to add --enable-lzo option when running
> configure.
> 
> PS, here are some information about the lzma decoder:
> 
> properties: lc = 3 lp = 0 pb = 2
> memory requirement for the decoder: 15980 bytes

Can we probe for best values and then use always the best? Perhaps embed 
them as constants to boot code?

> Result of
> ./grub-mkimage -o core.img biosdisk pc ext2 lvm raid
> 
> lzma version: 27,776 bytes
> lzo version: 32,768 bytes

Not bad. Not bad at all.




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 17:50 ` Vesa Jääskeläinen
@ 2008-07-02 18:27   ` Bean
  2008-07-02 18:45     ` Vesa Jääskeläinen
  0 siblings, 1 reply; 14+ messages in thread
From: Bean @ 2008-07-02 18:27 UTC (permalink / raw)
  To: The development of GRUB 2

On Thu, Jul 3, 2008 at 1:50 AM, Vesa Jääskeläinen <chaac@nic.fi> wrote:
>> properties: lc = 3 lp = 0 pb = 2
>> memory requirement for the decoder: 15980 bytes
>
> Can we probe for best values and then use always the best? Perhaps embed
> them as constants to boot code?

Hi,

These are the default value of lzma, they are tricky to set, and not
much can be gained by adjusting them anyway. Inside the boot code, I
already use constant for these value, although it can also uses
variable if macro FIXED_PROPS is not defined.

-- 
Bean



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 18:27   ` Bean
@ 2008-07-02 18:45     ` Vesa Jääskeläinen
  0 siblings, 0 replies; 14+ messages in thread
From: Vesa Jääskeläinen @ 2008-07-02 18:45 UTC (permalink / raw)
  To: The development of GRUB 2

Bean wrote:
> On Thu, Jul 3, 2008 at 1:50 AM, Vesa Jääskeläinen <chaac@nic.fi> wrote:
>>> properties: lc = 3 lp = 0 pb = 2
>>> memory requirement for the decoder: 15980 bytes
>> Can we probe for best values and then use always the best? Perhaps embed
>> them as constants to boot code?
> 
> Hi,
> 
> These are the default value of lzma, they are tricky to set, and not
> much can be gained by adjusting them anyway. Inside the boot code, I
> already use constant for these value, although it can also uses
> variable if macro FIXED_PROPS is not defined.
> 

Well... If they are default and you did not pick them by hand. Go with 
defaults then :)




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 15:11   ` Bean
  2008-07-02 16:00     ` Bean
@ 2008-07-03 14:06     ` Robert Millan
  1 sibling, 0 replies; 14+ messages in thread
From: Robert Millan @ 2008-07-03 14:06 UTC (permalink / raw)
  To: The development of GRUB 2

On Wed, Jul 02, 2008 at 11:11:28PM +0800, Bean wrote:
> >
> > Lenny's most likely going to ship with lzma 4.43.  Is that good?
> 
> The c encoder first appears in 4.58 beta. Previous version of lzma
> only have cpp version.

That's too bad, I guess it'll have to be for lenny+1.

> >> +#define FIXED_PROPS
> >> [...]
> >> +#ifdef FIXED_PROPS
> >
> > What's this option for?  Some comment would be nice.
> 
> lzma have three properties that control its behavior, lc, lp and pb.
> We can use these values as variable or constant. The default value, 3,
> 0, 2 is nice in most case, we hardly need to change them. So I use it
> as constant to save some extra space.

Could you make a comment out of this? :-)

> >> +#if 0
> >> +
> >> +DbgOut:
> >
> > This is just for debugging, right?  Maybe "#ifdef DEBUG" or so would be
> > better, to make it clear.
> 
> Yes, this is used in debugging. Now it's working, it's not needed
> anymore. I think it's better to keep it disable like this. The kernel
> raw space is critical, if we happen to define DEBUG, the decoder code
> would expand and kernel.img would fail to compile any more.

Ok.

> >> +#ifndef ASM_FILE
> >> +     xorl    %eax, %eax
> >> +#endif
> >
> > Why?  I thought ASM_FILE always ought to be defined for asm files in GRUB.
> 
> This is also used in debugging. Previously, I link it with c program
> to check the decoder, which require to keep register like %esi, %edi,
> %ebx. But inside grub, there is no need to keep them. I use ASM_FILE
> to distinguish between these two conditions.

Ok.  If you intend to keep it, please add a note explaining why; it
looks a bit puzzling without it.

-- 
Robert Millan

<GPLv2> I know my rights; I want my phone call!
<DRM> What good is a phone call… if you are unable to speak?
(as seen on /.)



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 16:00     ` Bean
@ 2008-07-03 18:24       ` Marco Gerards
  2008-07-03 18:56         ` Bean
  0 siblings, 1 reply; 14+ messages in thread
From: Marco Gerards @ 2008-07-03 18:24 UTC (permalink / raw)
  To: The development of GRUB 2

Hi,

Bean <bean123ch@gmail.com> writes:

> This is the new patch.

Can you please include a changelog entry?

--
Marco




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-03 18:24       ` Marco Gerards
@ 2008-07-03 18:56         ` Bean
  0 siblings, 0 replies; 14+ messages in thread
From: Bean @ 2008-07-03 18:56 UTC (permalink / raw)
  To: The development of GRUB 2

On Fri, Jul 4, 2008 at 2:24 AM, Marco Gerards <mgerards@xs4all.nl> wrote:
> Hi,
>
> Bean <bean123ch@gmail.com> writes:
>
>> This is the new patch.
>
> Can you please include a changelog entry?

Hi,

Oh, this is the changelog

2008-07-04  Bean  <bean123ch@gmail.com>

	* Makefile.in (enable_lzo): New rule.

	* i386-pc.rmk (grub_mkimage_SOURCES): New test with enable_lzo.

	* configure.ac (ENABLE_LZO): New option --enable-lzo.

	* include/grub/i386/pc/kernel.h (GRUB_KERNEL_MACHINE_RAW_SIZE): Change
	its value accordding to the compression algorithm used, lzo or lzma.

	* util/i386/pc/grub-mkimage.c (compress_kernel): Use different compression
	according to macro ENABLE_LZO.

	* kern/i386/pc/startup.S (codestart): Likewise.

	* kern/i386/pc/lzma_decode.S: New file.

	* include/grub/lib/LzFind.h: Likewise.

	* include/grub/lib/LzHash.h: Likewise.

	* include/grub/lib/LzmaDec.h: Likewise.

	* lib/LzFind.c: Likewise.

	* lib/LzmaDec.c: Likewise.

	* lib/LzmaEnc.c: Likewise.

-- 
Bean



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-02 13:39 [PATCH] LZMA support in i386-pc kernel Bean
  2008-07-02 14:42 ` Robert Millan
  2008-07-02 17:50 ` Vesa Jääskeläinen
@ 2008-07-03 19:37 ` Isaac Dupree
  2008-07-03 19:59   ` Bean
  2 siblings, 1 reply; 14+ messages in thread
From: Isaac Dupree @ 2008-07-03 19:37 UTC (permalink / raw)
  To: The development of GRUB 2

by the way, is LZMA decoding slow enough to be noticable on old 
machines, every boot time? (say, more than 1 second to decode core.img?)

-Isaac




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-03 19:37 ` Isaac Dupree
@ 2008-07-03 19:59   ` Bean
  2008-07-03 20:11     ` Javier Martín
  0 siblings, 1 reply; 14+ messages in thread
From: Bean @ 2008-07-03 19:59 UTC (permalink / raw)
  To: The development of GRUB 2

On Fri, Jul 4, 2008 at 3:37 AM, Isaac Dupree
<id@isaac.cedarswampstudios.org> wrote:
> by the way, is LZMA decoding slow enough to be noticable on old machines,
> every boot time? (say, more than 1 second to decode core.img?)

The code is highly optimized for size, speed may hurt a little bit.
But anyway, today's machine should have no problem with it. Besides,
the time spent to load font file or a true color splash image may well
exceed it

-- 
Bean



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-03 19:59   ` Bean
@ 2008-07-03 20:11     ` Javier Martín
  2008-07-13  2:03       ` Bean
  0 siblings, 1 reply; 14+ messages in thread
From: Javier Martín @ 2008-07-03 20:11 UTC (permalink / raw)
  To: The development of GRUB 2

[-- Attachment #1: Type: text/plain, Size: 754 bytes --]

El vie, 04-07-2008 a las 03:59 +0800, Bean escribió:
> On Fri, Jul 4, 2008 at 3:37 AM, Isaac Dupree
> <id@isaac.cedarswampstudios.org> wrote:
> > by the way, is LZMA decoding slow enough to be noticable on old machines,
> > every boot time? (say, more than 1 second to decode core.img?)
> 
> The code is highly optimized for size, speed may hurt a little bit.
> But anyway, today's machine should have no problem with it. Besides,
> the time spent to load font file or a true color splash image may well
> exceed it
Besides, on old machines there is also the possibility of using LZO - I
doubt someone would try to put a setup which required embedding
[biosdisk pc raid lvm ext2], which is what pushed core.img over the
limit, on a 80386.

[-- Attachment #2: Esta parte del mensaje está firmada digitalmente --]
[-- Type: application/pgp-signature, Size: 827 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] LZMA support in i386-pc kernel
  2008-07-03 20:11     ` Javier Martín
@ 2008-07-13  2:03       ` Bean
  0 siblings, 0 replies; 14+ messages in thread
From: Bean @ 2008-07-13  2:03 UTC (permalink / raw)
  To: The development of GRUB 2

Committed.

-- 
Bean



^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2008-07-13  2:03 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-02 13:39 [PATCH] LZMA support in i386-pc kernel Bean
2008-07-02 14:42 ` Robert Millan
2008-07-02 15:11   ` Bean
2008-07-02 16:00     ` Bean
2008-07-03 18:24       ` Marco Gerards
2008-07-03 18:56         ` Bean
2008-07-03 14:06     ` Robert Millan
2008-07-02 17:50 ` Vesa Jääskeläinen
2008-07-02 18:27   ` Bean
2008-07-02 18:45     ` Vesa Jääskeläinen
2008-07-03 19:37 ` Isaac Dupree
2008-07-03 19:59   ` Bean
2008-07-03 20:11     ` Javier Martín
2008-07-13  2:03       ` Bean

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.