diff --git a/.gitignore b/.gitignore
index 8641d7f9963..a31ffdc79c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,9 @@ ipch/
 
 # Other files
 .directory
+_codelite
+_zstdbench
+
+lib/zstd_opt_LZ5.c
+lib/zstd_opt_llen.c
+lib/zstd_opt_nollen.c
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index 065e6f143b4..fda0b05b55c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,6 @@ language: c
 
 before_install:
   - sudo apt-get update  -qq
-  - sudo apt-get install -qq gcc-arm-linux-gnueabi
   - sudo apt-get install -qq clang
   - sudo apt-get install -qq g++-multilib
   - sudo apt-get install -qq gcc-multilib
@@ -13,7 +12,7 @@ env:
   - ZSTD_TRAVIS_CI_ENV=cmaketest
   - ZSTD_TRAVIS_CI_ENV=clangtest  
   - ZSTD_TRAVIS_CI_ENV=gpptest  
-  - ZSTD_TRAVIS_CI_ENV=armtest  
+  - ZSTD_TRAVIS_CI_ENV=armtest-w-install  
   - ZSTD_TRAVIS_CI_ENV=test  
   - ZSTD_TRAVIS_CI_ENV="-C programs test32"  
   - ZSTD_TRAVIS_CI_ENV="-C programs test-zstd_nolegacy"
diff --git a/Makefile b/Makefile
index 9ff867fbb2a..93d5e05286c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 # ################################################################
 # zstd - Makefile
-# Copyright (C) Yann Collet 2014-2015
+# Copyright (C) Yann Collet 2014-2016
 # All rights reserved.
 # 
 # BSD license
@@ -27,16 +27,14 @@
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # 
 # You can contact the author at :
-#  - zstd source repository : https://github.com/Cyan4973/zstd
-#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+#  - zstd homepage : http://www.zstd.net/
 # ################################################################
 
 # force a version number : uncomment below export (otherwise, default to the one declared into zstd.h)
-#export VERSION := 0.4.6
+#export VERSION := 0.5.1
 
 PRGDIR  = programs
 ZSTDDIR = lib
-DICTDIR = dictBuilder
 
 # Define nul output
 ifneq (,$(filter Windows%,$(OS)))
@@ -52,7 +50,6 @@ default: zstdprogram
 all: 
 	$(MAKE) -C $(ZSTDDIR) $@
 	$(MAKE) -C $(PRGDIR) $@
-	$(MAKE) -C $(DICTDIR) $@
 
 zstdprogram:
 	$(MAKE) -C $(PRGDIR)
@@ -60,7 +57,6 @@ zstdprogram:
 clean:
 	@$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
 	@$(MAKE) -C $(PRGDIR) $@ > $(VOID)
-	@$(MAKE) -C $(DICTDIR) $@ > $(VOID)
 	@echo Cleaning completed
 
 
@@ -81,7 +77,6 @@ travis-install:
 
 test:
 	$(MAKE) -C $(PRGDIR) $@
-	$(MAKE) -C $(DICTDIR) $@
 
 cmaketest:
 	cd contrib/cmake ; cmake . ; $(MAKE)
@@ -94,8 +89,34 @@ gpptest: clean
 	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
 
 armtest: clean
-	$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
-	$(MAKE) -C $(PRGDIR) CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"
+#	$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
+	$(MAKE) -C $(PRGDIR) test CC=arm-linux-gnueabi-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static"
+
+# for Travis CI
+arminstall: clean   
+	sudo apt-get install -q qemu  
+	sudo apt-get install -q binfmt-support
+	sudo apt-get install -q qemu-user-static
+	sudo apt-get install -q gcc-arm-linux-gnueabi
+
+# for Travis CI
+armtest-w-install: clean arminstall armtest
+
+ppctest: clean
+	$(MAKE) -C $(PRGDIR) datagen   # use native, faster
+	$(MAKE) -C $(PRGDIR) test CC=powerpc-linux-gnu-gcc ZSTDRTTEST= MOREFLAGS=-static # MOREFLAGS="-Werror -static" 
+
+# for Travis CI
+ppcinstall: clean   
+	sudo apt-get install -q qemu  
+	sudo apt-get install -q binfmt-support
+	sudo apt-get install -q qemu-user-static
+	sudo apt-get update  -q
+	sudo apt-get install -q gcc-powerpc-linux-gnu   # unfortunately, doesn't work on Travis CI (package not available)
+
+# for Travis CI
+ppctest-w-install: clean ppcinstall ppctest
 
 usan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
diff --git a/NEWS b/NEWS
index 46b170a1058..ccbf15eb583 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+v0.5.1
+New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
+Changed : Dictionary builder integrated into libzstd and zstd cli
+Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`.
+Fix : high compression modes for big-endian platforms
+New : zstd cli : `-t` | `--test` command
+
 v0.5.0
 New : dictionary builder utility
 Changed : streaming & dictionary API
diff --git a/README.md b/README.md
index 01ac56850e5..b84d8a8ffa5 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ As a reference, several fast compression algorithms were tested and compared to
 |Name             | Ratio | C.speed | D.speed |
 |-----------------|-------|--------:|--------:|
 |                 |       |   MB/s  |  MB/s   |
-|**zstd 0.4.7 -1**|**2.875**|**330**| **890** |
+|**zstd 0.5.1 -1**|**2.876**|**330**| **890** |
 | [zlib] 1.2.8 -1 | 2.730 |    95   |   360   |
 | brotli -0       | 2.708 |   220   |   430   |
 | QuickLZ 1.5     | 2.237 |   510   |   605   |
@@ -35,38 +35,88 @@ The following test is run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an o
 
 Compression Speed vs Ratio | Decompression Speed
 ---------------------------|--------------------
-![Compression Speed vs Ratio](images/CSpeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/DSpeed.png "Decompression Speed")
+![Compression Speed vs Ratio](images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed4.png "Decompression Speed")
+
+Several algorithms can produce higher compression ratio at slower speed, falling outside of the graph.
+For a larger picture including very slow modes, [click on this link](images/DCspeed5.png) .
 
 
 ### The case for Small Data compression
 
-The above chart is applicable to large files or large streams scenarios (200 MB in this case).
+Above chart provides results applicable to large files or large streams scenarios (200 MB for this case).
 Small data (< 64 KB) come with different perspectives.
 The smaller the amount of data to compress, the more difficult it is to achieve any significant compression.
 On reaching the 1 KB region, it becomes almost impossible to compress anything.
-This problem is common to all compression algorithms, and throwing CPU power at it achieves no significant gains.
+This problem is common to any compression algorithms, and throwing CPU power at it achieves little gains.
 
 The reason is, compression algorithms learn from past data how to compress future data.
 But at the beginning of a new file, there is no "past" to build upon.
 
-[Starting with 0.5](https://github.com/Cyan4973/zstd/releases), Zstd now offers [a _Dictionary Builder_ tool](https://github.com/Cyan4973/zstd/tree/master/dictBuilder).
-It can be used to train the algorithm to fit a selected type of data, by providing it with some samples.
-The result is a file (or a byte buffer) called "dictionary", which can be loaded before compression and decompression.
-By using this dictionary, the compression ratio achievable on small data improves dramatically :
+To solve this situation, Zstd now offers a __training mode__,
+which can be used to make the algorithm fit a selected type of data, by providing it with some samples.
+The result of the training is a file called "dictionary", which can be loaded before compression and decompression.
+Using this dictionary, the compression ratio achievable on small data improves dramatically :
 
-| Collection Name    | Direct compression | Dictionary Compression | Gains  | Average unit | Range       |
-| ---------------    | ------------------ | ---------------------- | -----  | ------------:| -----       |
-| Small JSON records | x1.331 - x1.366	  | x5.860 - x6.830        | ~ x4.7 | 300          | 200 - 400   |
-| Mercurial events   | x2.322 - x2.538    | x3.377 - x4.462        | ~ x1.5 | 1.5 KB       | 20 - 200 KB |	
-| Large JSON docs    | x3.813 - x4.043    | x8.935 - x13.366       | ~ x2.8 | 6 KB         | 800 - 20 KB |	
+| Collection Name    | Direct compression | Dictionary Compression | Gains      | Average unit | Range       |
+| ---------------    | ------------------ | ---------------------- | ---------  | ------------:| -----       |
+| Small JSON records | x1.331 - x1.366	  | x5.860 - x6.830        | ~ __x4.7__ | 300          | 200 - 400   |
+| Mercurial events   | x2.322 - x2.538    | x3.377 - x4.462        | ~ __x1.5__ | 1.5 KB       | 20 - 200 KB |	
+| Large JSON docs    | x3.813 - x4.043    | x8.935 - x13.366       | ~ __x2.8__ | 6 KB         | 800 - 20 KB |	
 
-It has to be noted that these compression gains are achieved without any speed loss, and even some faster decompression processing.
+These compression gains are achieved without any speed loss, and prove in general a bit faster to compress and decompress.
 
 Dictionary work if there is some correlation in a family of small data (there is no _universal dictionary_).
 Hence, deploying one dictionary per type of data will provide the greater benefits.
 
 Large documents will benefit proportionally less, since dictionary gains are mostly effective in the first few KB.
-Then there is enough history to build upon, and the compression algorithm can rely on it to compress the rest of the file.
+Then, the compression algorithm will rely more and more on already decoded content to compress the rest of the file.
+
+#### Dictionary compression How To :
+
+##### _Using the Command Line Utility_ :
+
+1) Create the dictionary
+
+`zstd --train FullPathToTrainingSet/* -o dictionaryName`
+
+2) Compression with dictionary
+
+`zstd FILE -D dictionaryName`
+
+3) Decompress with dictionary
+
+`zstd --decompress FILE.zst -D dictionaryName`
+
+##### _Using API_ :
+
+1) Create dictionary
+
+```
+#include "zdict.h"
+(...)
+/* Train a dictionary from a memory buffer `samplesBuffer`, 
+   where `nbSamples` samples have been stored concatenated. */
+size_t dictSize = ZDICT_trainFromBuffer(dictBuffer, dictBufferCapacity,
+                                        samplesBuffer, samplesSizes, nbSamples);
+```
+
+2) Compression with dictionary
+
+```
+#include "zstd.h"
+(...)
+ZSTD_CCtx* context = ZSTD_createCCtx();
+size_t compressedSize = ZSTD_compress_usingDict(context, dst, dstCapacity, src, srcSize, dict, dictSize, compressionLevel);
+```
+
+3) Decompress with dictionary
+
+```
+#include "zstd.h"
+(...)
+ZSTD_DCtx* context = ZSTD_createDCtx();
+size_t regeneratedSize = ZSTD_decompress_usingDict(context, dst, dstCapacity, cSrc, cSrcSize, dict, dictSize);
+```
 
 
 ### Status
diff --git a/contrib/cmake/CMakeLists.txt b/contrib/cmake/CMakeLists.txt
index 3687c9ec2da..41883373578 100644
--- a/contrib/cmake/CMakeLists.txt
+++ b/contrib/cmake/CMakeLists.txt
@@ -1,6 +1,6 @@
 # ################################################################
 # zstd - Makefile
-# Copyright (C) Yann Collet 2014-2015
+# Copyright (C) Yann Collet 2014-2016
 # All rights reserved.
 # 
 # BSD license
diff --git a/contrib/cmake/lib/CMakeLists.txt b/contrib/cmake/lib/CMakeLists.txt
index a8247ddca70..bb2c057556a 100644
--- a/contrib/cmake/lib/CMakeLists.txt
+++ b/contrib/cmake/lib/CMakeLists.txt
@@ -1,6 +1,6 @@
 # ################################################################
 # zstd - Makefile
-# Copyright (C) Yann Collet 2014-2015
+# Copyright (C) Yann Collet 2014-2016
 # All rights reserved.
 # 
 # BSD license
@@ -27,8 +27,7 @@
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # 
 # You can contact the author at :
-#  - zstd source repository : https://github.com/Cyan4973/zstd
-#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+#  - zstd homepage : http://www.zstd.net/
 # ################################################################
 
 # Get library version based on information from input content (use regular exp)
@@ -58,9 +57,11 @@ GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
 MESSAGE("ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
 
 SET(Sources
+        ${LIBRARY_DIR}/divsufsort.c
         ${LIBRARY_DIR}/fse.c
         ${LIBRARY_DIR}/huff0.c
-        ${LIBRARY_DIR}/zstd_buffered.c
+        ${LIBRARY_DIR}/zbuff.c
+        ${LIBRARY_DIR}/zdict.c
         ${LIBRARY_DIR}/zstd_compress.c
         ${LIBRARY_DIR}/zstd_decompress.c)
 
@@ -73,8 +74,10 @@ SET(Headers
         ${LIBRARY_DIR}/huff0.h
         ${LIBRARY_DIR}/huff0_static.h
         ${LIBRARY_DIR}/mem.h
-        ${LIBRARY_DIR}/zstd_buffered_static.h
-        ${LIBRARY_DIR}/zstd_buffered.h
+        ${LIBRARY_DIR}/zbuff.h
+        ${LIBRARY_DIR}/zbuff_static.h
+        ${LIBRARY_DIR}/zdict.h
+        ${LIBRARY_DIR}/zdict_static.h
         ${LIBRARY_DIR}/zstd_internal.h
         ${LIBRARY_DIR}/zstd_static.h
         ${LIBRARY_DIR}/zstd.h)
@@ -86,13 +89,15 @@ IF (ZSTD_LEGACY_SUPPORT)
     SET(Sources ${Sources}
             ${LIBRARY_LEGACY_DIR}/zstd_v01.c
             ${LIBRARY_LEGACY_DIR}/zstd_v02.c
-            ${LIBRARY_LEGACY_DIR}/zstd_v03.c)
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.c
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.c)
 
     SET(Headers ${Headers}
             ${LIBRARY_LEGACY_DIR}/zstd_legacy.h
             ${LIBRARY_LEGACY_DIR}/zstd_v01.h
             ${LIBRARY_LEGACY_DIR}/zstd_v02.h
-            ${LIBRARY_LEGACY_DIR}/zstd_v03.h)
+            ${LIBRARY_LEGACY_DIR}/zstd_v03.h
+            ${LIBRARY_LEGACY_DIR}/zstd_v04.h)
 ENDIF (ZSTD_LEGACY_SUPPORT)
 
 IF (MSVC)
@@ -161,7 +166,7 @@ IF (UNIX)
     SET(INSTALL_INCLUDE_DIR ${PREFIX}/include)
 
     # install target
-    INSTALL(FILES ${LIBRARY_DIR}/zstd.h DESTINATION ${INSTALL_INCLUDE_DIR})
+    INSTALL(FILES ${LIBRARY_DIR}/zstd.h ${LIBRARY_DIR}/zstd_buffered.h ${LIBRARY_DIR}/dictBuilder.h DESTINATION ${INSTALL_INCLUDE_DIR})
     INSTALL(TARGETS libzstd_static DESTINATION ${INSTALL_LIBRARY_DIR})
     INSTALL(TARGETS libzstd_shared LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR})
 
diff --git a/contrib/cmake/programs/CMakeLists.txt b/contrib/cmake/programs/CMakeLists.txt
index af9a0572510..ebee7c210c6 100644
--- a/contrib/cmake/programs/CMakeLists.txt
+++ b/contrib/cmake/programs/CMakeLists.txt
@@ -1,6 +1,6 @@
 # ################################################################
 # zstd - Makefile
-# Copyright (C) Yann Collet 2014-2015
+# Copyright (C) Yann Collet 2014-2016
 # All rights reserved.
 #
 # BSD license
@@ -27,8 +27,7 @@
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # You can contact the author at :
-#  - zstd source repository : https://github.com/Cyan4973/zstd
-#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+#  - zstd homepage : http://www.zstd.net/
 # ################################################################
 
 PROJECT(programs)
@@ -59,7 +58,7 @@ IF (ZSTD_LEGACY_SUPPORT)
     SET(ZSTD_FILEIO_LEGACY ${PROGRAMS_LEGACY_DIR}/fileio_legacy.c)
 ENDIF (ZSTD_LEGACY_SUPPORT)
 
-ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/datagen.c ${ZSTD_FILEIO_LEGACY})
+ADD_EXECUTABLE(zstd ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${PROGRAMS_DIR}/bench.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/dibio.c ${ZSTD_FILEIO_LEGACY})
 TARGET_LINK_LIBRARIES(zstd libzstd_static)
 
 ADD_EXECUTABLE(fullbench ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/fullbench.c)
@@ -69,9 +68,9 @@ ADD_EXECUTABLE(fuzzer ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGR
 TARGET_LINK_LIBRARIES(fuzzer libzstd_static)
 
 IF (UNIX)
-    ADD_EXECUTABLE(zstd-noBench ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c ${ZSTD_FILEIO_LEGACY})
-    TARGET_LINK_LIBRARIES(zstd-noBench libzstd_static)
-    SET_TARGET_PROPERTIES(zstd-noBench PROPERTIES COMPILE_DEFINITIONS "ZSTD_NOBENCH")
+    ADD_EXECUTABLE(zstd-frugal ${PROGRAMS_DIR}/zstdcli.c ${PROGRAMS_DIR}/fileio.c)
+    TARGET_LINK_LIBRARIES(zstd-frugal libzstd_static)
+    SET_TARGET_PROPERTIES(zstd-frugal PROPERTIES COMPILE_DEFINITIONS "ZSTD_NOBENCH;ZSTD_NODICT")
 
     ADD_EXECUTABLE(zbufftest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/xxhash.c ${PROGRAMS_DIR}/zbufftest.c)
     TARGET_LINK_LIBRARIES(zbufftest libzstd_static)
diff --git a/dictBuilder/COPYING b/dictBuilder/COPYING
deleted file mode 100644
index d159169d105..00000000000
--- a/dictBuilder/COPYING
+++ /dev/null
@@ -1,339 +0,0 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users.  This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it.  (Some other Free Software Foundation software is covered by
-the GNU Lesser General Public License instead.)  You can apply it to
-your programs, too.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-  To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must show them these terms so they know their
-rights.
-
-  We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-  Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software.  If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
-  Finally, any free program is threatened constantly by software
-patents.  We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary.  To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                    GNU GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License.  The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language.  (Hereinafter, translation is included without limitation in
-the term "modification".)  Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-  1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-  2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) You must cause the modified files to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    b) You must cause any work that you distribute or publish, that in
-    whole or in part contains or is derived from the Program or any
-    part thereof, to be licensed as a whole at no charge to all third
-    parties under the terms of this License.
-
-    c) If the modified program normally reads commands interactively
-    when run, you must cause it, when started running for such
-    interactive use in the most ordinary way, to print or display an
-    announcement including an appropriate copyright notice and a
-    notice that there is no warranty (or else, saying that you provide
-    a warranty) and that users may redistribute the program under
-    these conditions, and telling the user how to view a copy of this
-    License.  (Exception: if the Program itself is interactive but
-    does not normally print such an announcement, your work based on
-    the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-    a) Accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of Sections
-    1 and 2 above on a medium customarily used for software interchange; or,
-
-    b) Accompany it with a written offer, valid for at least three
-    years, to give any third party, for a charge no more than your
-    cost of physically performing source distribution, a complete
-    machine-readable copy of the corresponding source code, to be
-    distributed under the terms of Sections 1 and 2 above on a medium
-    customarily used for software interchange; or,
-
-    c) Accompany it with the information you received as to the offer
-    to distribute corresponding source code.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form with such
-    an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it.  For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable.  However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License.  Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-  5. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Program or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-  6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-  7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded.  In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-  9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation.  If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-  10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission.  For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this.  Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-                            NO WARRANTY
-
-  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-    Gnomovision version 69, Copyright (C) year name of author
-    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-  `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-  <signature of Ty Coon>, 1 April 1989
-  Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs.  If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.
diff --git a/dictBuilder/Makefile b/dictBuilder/Makefile
deleted file mode 100644
index e5a4f1aa708..00000000000
--- a/dictBuilder/Makefile
+++ /dev/null
@@ -1,69 +0,0 @@
-# ##########################################################################
-# Dict Builder - Makefile
-# Copyright (C) Yann Collet 2015
-#
-# GPL v2 License
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# You can contact the author at :
-#  - ZSTD source repository : http://code.google.com/p/zstd/
-#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
-# ##########################################################################
-
-CPPFLAGS= -I../lib
-CFLAGS ?= -O3  
-CFLAGS += -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -Wcast-align -Wundef -Wstrict-prototypes -Wstrict-aliasing=1
-FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
-
-ZSTDDIR = ../lib
-
-
-# Define *.exe as extension for Windows systems
-ifneq (,$(filter Windows%,$(OS)))
-EXT =.exe
-VOID = nul
-else
-EXT =
-VOID = /dev/null
-endif
-
-
-.PHONY: default all test
-
-default: dictBuilder
-
-all: dictBuilder
-
-dictBuilder: dictBuilder.c dibcli.c divsufsort.c sssort.c trsort.c $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_decompress.c
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
-
-clean:
-	@rm -f core *.o tmp* result* *.gcda \
-        dictBuilder$(EXT)
-	@echo Cleaning completed
-
-test: dictBuilder
-	./dictBuilder *
-	@rm dictionary
-
-clangtest: CC = clang
-clangtest: CFLAGS += -Werror
-clangtest: clean dictBuilder
-
-gpptest: CC = g++
-gpptest: CFLAGS=-O3 -Wall -Wextra -Wshadow -Wcast-align -Wcast-qual -Wundef -Werror
-gpptest: clean dictBuilder
-
diff --git a/dictBuilder/dibcli.c b/dictBuilder/dibcli.c
deleted file mode 100644
index 8566c624570..00000000000
--- a/dictBuilder/dibcli.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
-  dibcli - Command Line Interface (cli) for Dictionary Builder
-  Copyright (C) Yann Collet 2016
-
-  GPL v2 License
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License along
-  with this program; if not, write to the Free Software Foundation, Inc.,
-  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-  You can contact the author at :
-  - zstd source repository : https://github.com/Cyan4973/zstd
-*/
-
-/* **************************************
-*  Compiler Specifics
-****************************************/
-/* Disable some Visual warning messages */
-#ifdef _MSC_VER
-#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
-#endif
-
-
-/*-************************************
-*  Includes
-**************************************/
-#include <stdlib.h>   /* exit, calloc, free */
-#include <string.h>   /* strcmp, strlen */
-#include <stdio.h>    /* fprintf, getchar */
-
-#include "dictBuilder.h"
-
-
-/*-************************************
-*  Constants
-**************************************/
-#define PROGRAM_DESCRIPTION "Dictionary builder"
-#ifndef PROGRAM_VERSION
-#  define QUOTE(str) #str
-#  define EXP_Q(str) QUOTE(str)
-#  define PROGRAM_VERSION "v" EXP_Q(DiB_VERSION_MAJOR) "." EXP_Q(DiB_VERSION_MINOR) "." EXP_Q(DiB_VERSION_RELEASE)
-#endif
-#define AUTHOR "Yann Collet"
-#define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, PROGRAM_VERSION, (int)(sizeof(void*)*8), AUTHOR
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-static const unsigned compressionLevelDefault = 5;
-static const unsigned selectionLevelDefault = 9;     /* determined experimentally */
-static const unsigned maxDictSizeDefault = 110 KB;
-static const char* dictFileNameDefault = "dictionary";
-
-
-/*-************************************
-*  Display Macros
-**************************************/
-#define DISPLAY(...)           fprintf(g_displayOut, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...)   if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static FILE* g_displayOut;
-static unsigned g_displayLevel = 2;   // 0 : no display  // 1: errors  // 2 : + result + interaction + warnings ;  // 3 : + progression;  // 4 : + information
-
-
-/*-************************************
-*  Exceptions
-**************************************/
-#define DEBUG 0
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...)                                             \
-{                                                                         \
-    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
-    DISPLAYLEVEL(1, "Error %i : ", error);                                \
-    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
-    DISPLAYLEVEL(1, "\n");                                                \
-    exit(error);                                                          \
-}
-
-
-/*-************************************
-*  Command Line
-**************************************/
-static int usage(const char* programName)
-{
-    DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [arg] [filenames]\n", programName);
-    DISPLAY( "\n");
-    DISPLAY( "Arguments :\n");
-    DISPLAY( " -o       : name of dictionary file (default: %s) \n", dictFileNameDefault);
-    DISPLAY( "--maxdict : limit dictionary to specified size (default : %u) \n", maxDictSizeDefault);
-    DISPLAY( " -h/-H    : display help/long help and exit\n");
-    return 0;
-}
-
-static int usage_advanced(const char* programName)
-{
-    DISPLAY(WELCOME_MESSAGE);
-    usage(programName);
-    DISPLAY( "\n");
-    DISPLAY( "Advanced arguments :\n");
-    DISPLAY( " -V     : display Version number and exit\n");
-    DISPLAY( "--fast  : fast sampling mode\n");
-    DISPLAY( " -L#    : target compression level (default: %u)\n", compressionLevelDefault);
-    DISPLAY( " -S#    : dictionary selectivity level (default: %u)\n", selectionLevelDefault);
-    DISPLAY( " -v     : verbose mode\n");
-    DISPLAY( " -q     : suppress notifications; specify twice to suppress errors too\n");
-    return 0;
-}
-
-static int badusage(const char* programName)
-{
-    DISPLAYLEVEL(1, "Incorrect parameters\n");
-    if (g_displayLevel >= 1) usage(programName);
-    return 1;
-}
-
-
-static void waitEnter(void)
-{
-    int unused;
-    DISPLAY("Press enter to continue...\n");
-    unused = getchar();
-    (void)unused;
-}
-
-
-int main(int argCount, const char** argv)
-{
-    int i,
-        main_pause=0,
-        operationResult=0,
-        nextArgumentIsMaxDict=0,
-        nextArgumentIsDictFileName=0;
-    unsigned cLevel = compressionLevelDefault;
-    unsigned maxDictSize = maxDictSizeDefault;
-    unsigned selectionLevel = selectionLevelDefault;
-    const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
-    unsigned filenameIdx = 0;
-    const char* programName = argv[0];
-    const char* dictFileName = dictFileNameDefault;
-
-    /* init */
-    g_displayOut = stderr;   /* unfortunately, cannot be set at declaration */
-    if (filenameTable==NULL) EXM_THROW(1, "not enough memory\n");
-    /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
-    for (i = (int)strlen(programName); i > 0; i--) { if ((programName[i] == '/') || (programName[i] == '\\')) { i++; break; } }
-    programName += i;
-
-    /* command switches */
-    for(i=1; i<argCount; i++) {
-        const char* argument = argv[i];
-
-        if(!argument) continue;   /* Protection if argument empty */
-
-        if (nextArgumentIsDictFileName) {
-            nextArgumentIsDictFileName=0;
-            dictFileName = argument;
-            continue;
-        }
-
-        if (nextArgumentIsMaxDict) {
-            nextArgumentIsMaxDict = 0;
-            maxDictSize = 0;
-            while ((*argument>='0') && (*argument<='9'))
-                maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
-            if (*argument=='k' || *argument=='K')
-                maxDictSize <<= 10;
-            continue;
-        }
-
-        /* long commands (--long-word) */
-        if (!strcmp(argument, "--version")) { g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
-        if (!strcmp(argument, "--help")) { g_displayOut=stdout; return usage_advanced(programName); }
-        if (!strcmp(argument, "--verbose")) { g_displayLevel++; if (g_displayLevel<3) g_displayLevel=3; continue; }
-        if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
-        if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
-        if (!strcmp(argument, "--fast")) { selectionLevel=1; cLevel=1; continue; }
-
-        /* Decode commands (note : aggregated commands are allowed) */
-        if (argument[0]=='-') {
-            argument++;
-
-            while (argument[0]!=0) {
-                switch(argument[0])
-                {
-                    /* Display help */
-                case 'V': g_displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0;   /* Version Only */
-                case 'H':
-                case 'h': g_displayOut=stdout; return usage_advanced(programName);
-
-                    /* Selection level */
-                case 'S': argument++;
-                    selectionLevel = 0;
-                    while ((*argument >= '0') && (*argument <= '9'))
-                        selectionLevel *= 10, selectionLevel += *argument++ - '0';
-                    break;
-
-                    /* Selection level */
-                case 'L': argument++;
-                    cLevel = 0;
-                    while ((*argument >= '0') && (*argument <= '9'))
-                        cLevel *= 10, cLevel += *argument++ - '0';
-                    break;
-
-                    /* Verbose mode */
-                case 'v': g_displayLevel++; if (g_displayLevel<3) g_displayLevel=3; argument++; break;
-
-                    /* Quiet mode */
-                case 'q': g_displayLevel--; argument++; break;
-
-                    /* dictionary name */
-                case 'o': nextArgumentIsDictFileName=1; argument++; break;
-
-                    /* Pause at the end (hidden option) */
-                case 'p': main_pause=1; argument++; break;
-
-                    /* unknown command */
-                default : return badusage(programName);
-            }   }
-            continue;
-        }
-
-        /* add filename to list */
-        filenameTable[filenameIdx++] = argument;
-    }
-
-    /* Welcome message (if verbose) */
-    DISPLAYLEVEL(3, WELCOME_MESSAGE);
-
-    /* check nb files */
-    if (filenameIdx==0) return badusage(programName);
-    if (filenameIdx < 100)
-    {
-        DISPLAYLEVEL(2, "Warning : set contains only %u files ... \n", filenameIdx);
-        DISPLAYLEVEL(3, "!! For better results, consider providing > 1.000 samples     !!\n");
-        DISPLAYLEVEL(3, "!! Each sample should preferably be stored as a separate file !!\n");
-    }
-
-    /* building ... */
-    {
-        DiB_params_t param;
-        param.selectivityLevel = selectionLevel;
-        param.compressionLevel = cLevel;
-        DiB_setNotificationLevel(g_displayLevel);
-        operationResult = DiB_trainFromFiles(dictFileName, maxDictSize,
-                                             filenameTable, filenameIdx,
-                                             param);
-    }
-
-    if (main_pause) waitEnter();
-    free((void*)filenameTable);
-    return operationResult;
-}
diff --git a/dictBuilder/dictBuilder.h b/dictBuilder/dictBuilder.h
deleted file mode 100644
index a022583eba1..00000000000
--- a/dictBuilder/dictBuilder.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-    dictBuilder.h
-    Copyright (C) Yann Collet 2016
-
-    GPL v2 License
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-/* This library is designed for a single-threaded console application.
-*  It exit() and printf() into stderr when it encounters an error condition. */
-
-#ifndef DICTBUILDER_H_001
-#define DICTBUILDER_H_001
-
-/*-*************************************
-*  Version
-***************************************/
-#define DiB_VERSION_MAJOR    0    /* for breaking interface changes  */
-#define DiB_VERSION_MINOR    0    /* for new (non-breaking) interface capabilities */
-#define DiB_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
-#define DiB_VERSION_NUMBER  (DiB_VERSION_MAJOR *100*100 + DiB_VERSION_MINOR *100 + DiB_VERSION_RELEASE)
-unsigned DiB_versionNumber (void);
-
-
-/*-*************************************
-*  Public type
-***************************************/
-typedef struct {
-    unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */
-    unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */
-} DiB_params_t;
-
-
-/*-*************************************
-*  Public functions
-***************************************/
-/*! DiB_trainFromBuffer
-    Train a dictionary from a memory buffer @samplesBuffer
-    where @nbSamples samples have been stored concatenated.
-    Each sample size is provided into an orderly table @sampleSizes.
-    Resulting dictionary will be saved into @dictBuffer.
-    @parameters is optional and can be provided with 0 values to mean "default".
-    @result : size of dictionary stored into @dictBuffer (<= @dictBufferSize)
-              or an error code, which can be tested by DiB_isError().
-    note : DiB_trainFromBuffer() will send notifications into stderr if instructed to, using DiB_setNotificationLevel()
-*/
-size_t DiB_trainFromBuffer(void* dictBuffer, size_t dictBufferSize,
-                           const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
-                           DiB_params_t parameters);
-
-
-/*! DiB_trainFromFiles
-    Train a dictionary from a set of files provided by @fileNamesTable
-    Resulting dictionary is written into file @dictFileName.
-    @parameters is optional and can be provided with 0 values.
-    @result : 0 == ok. Any other : error.
-*/
-int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
-                       const char** fileNamesTable, unsigned nbFiles,
-                       DiB_params_t parameters);
-
-
-/*-*************************************
-*  Helper functions
-***************************************/
-unsigned DiB_isError(size_t errorCode);
-const char* DiB_getErrorName(size_t errorCode);
-
-/*! DiB_setNotificationLevel
-    Set amount of notification to be displayed on the console.
-    default initial value : 0 = no console notification.
-    Note : not thread-safe (use a global constant)
-*/
-void DiB_setNotificationLevel(unsigned l);
-
-
-#endif
diff --git a/dictBuilder/divsufsort.c b/dictBuilder/divsufsort.c
deleted file mode 100644
index 312813597a4..00000000000
--- a/dictBuilder/divsufsort.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * divsufsort.c for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*- Compiler specifics -*/
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wshorten-64-to-32"
-#endif
-
-/*- Dependencies -*/
-#include "divsufsort_private.h"
-#ifdef _OPENMP
-# include <omp.h>
-#endif
-
-
-/*- Private Functions -*/
-
-/* Sorts suffixes of type B*. */
-static
-saidx_t
-sort_typeBstar(const sauchar_t *T, saidx_t *SA,
-               saidx_t *bucket_A, saidx_t *bucket_B,
-               saidx_t n) {
-  saidx_t *PAb, *ISAb, *buf;
-#ifdef _OPENMP
-  saidx_t *curbuf;
-  saidx_t l;
-#endif
-  saidx_t i, j, k, t, m, bufsize;
-  saint_t c0, c1;
-#ifdef _OPENMP
-  saint_t d0, d1;
-  int tmp;
-#endif
-
-  /* Initialize bucket arrays. */
-  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
-  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
-
-  /* Count the number of occurrences of the first one or two characters of each
-     type A, B and B* suffix. Moreover, store the beginning position of all
-     type B* suffixes into the array SA. */
-  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
-    /* type A suffix. */
-    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
-    if(0 <= i) {
-      /* type B* suffix. */
-      ++BUCKET_BSTAR(c0, c1);
-      SA[--m] = i;
-      /* type B suffix. */
-      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
-        ++BUCKET_B(c0, c1);
-      }
-    }
-  }
-  m = n - m;
-/*
-note:
-  A type B* suffix is lexicographically smaller than a type B suffix that
-  begins with the same first two characters.
-*/
-
-  /* Calculate the index of start/end point of each bucket. */
-  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
-    t = i + BUCKET_A(c0);
-    BUCKET_A(c0) = i + j; /* start point */
-    i = t + BUCKET_B(c0, c0);
-    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
-      j += BUCKET_BSTAR(c0, c1);
-      BUCKET_BSTAR(c0, c1) = j; /* end point */
-      i += BUCKET_B(c0, c1);
-    }
-  }
-
-  if(0 < m) {
-    /* Sort the type B* suffixes by their first two characters. */
-    PAb = SA + n - m; ISAb = SA + m;
-    for(i = m - 2; 0 <= i; --i) {
-      t = PAb[i], c0 = T[t], c1 = T[t + 1];
-      SA[--BUCKET_BSTAR(c0, c1)] = i;
-    }
-    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
-    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
-
-    /* Sort the type B* substrings using sssort. */
-#ifdef _OPENMP
-    tmp = omp_get_max_threads();
-    buf = SA + m, bufsize = (n - (2 * m)) / tmp;
-    c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
-#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
-    {
-      tmp = omp_get_thread_num();
-      curbuf = buf + tmp * bufsize;
-      k = 0;
-      for(;;) {
-        #pragma omp critical(sssort_lock)
-        {
-          if(0 < (l = j)) {
-            d0 = c0, d1 = c1;
-            do {
-              k = BUCKET_BSTAR(d0, d1);
-              if(--d1 <= d0) {
-                d1 = ALPHABET_SIZE - 1;
-                if(--d0 < 0) { break; }
-              }
-            } while(((l - k) <= 1) && (0 < (l = k)));
-            c0 = d0, c1 = d1, j = k;
-          }
-        }
-        if(l == 0) { break; }
-        sssort(T, PAb, SA + k, SA + l,
-               curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
-      }
-    }
-#else
-    buf = SA + m, bufsize = n - (2 * m);
-    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
-      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
-        i = BUCKET_BSTAR(c0, c1);
-        if(1 < (j - i)) {
-          sssort(T, PAb, SA + i, SA + j,
-                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
-        }
-      }
-    }
-#endif
-
-    /* Compute ranks of type B* substrings. */
-    for(i = m - 1; 0 <= i; --i) {
-      if(0 <= SA[i]) {
-        j = i;
-        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
-        SA[i + 1] = i - j;
-        if(i <= 0) { break; }
-      }
-      j = i;
-      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
-      ISAb[SA[i]] = j;
-    }
-
-    /* Construct the inverse suffix array of type B* suffixes using trsort. */
-    trsort(ISAb, SA, m, 1);
-
-    /* Set the sorted order of tyoe B* suffixes. */
-    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
-      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
-      if(0 <= i) {
-        t = i;
-        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
-        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
-      }
-    }
-
-    /* Calculate the index of start/end point of each bucket. */
-    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
-    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
-      i = BUCKET_A(c0 + 1) - 1;
-      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
-        t = i - BUCKET_B(c0, c1);
-        BUCKET_B(c0, c1) = i; /* end point */
-
-        /* Move all type B* suffixes to the correct position. */
-        for(i = t, j = BUCKET_BSTAR(c0, c1);
-            j <= k;
-            --i, --k) { SA[i] = SA[k]; }
-      }
-      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
-      BUCKET_B(c0, c0) = i; /* end point */
-    }
-  }
-
-  return m;
-}
-
-/* Constructs the suffix array by using the sorted order of type B* suffixes. */
-static
-void
-construct_SA(const sauchar_t *T, saidx_t *SA,
-             saidx_t *bucket_A, saidx_t *bucket_B,
-             saidx_t n, saidx_t m) {
-  saidx_t *i, *j, *k;
-  saidx_t s;
-  saint_t c0, c1, c2;
-
-  if(0 < m) {
-    /* Construct the sorted order of type B suffixes by using
-       the sorted order of type B* suffixes. */
-    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
-      /* Scan the suffix array from right to left. */
-      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
-          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
-          i <= j;
-          --j) {
-        if(0 < (s = *j)) {
-          assert(T[s] == c1);
-          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
-          assert(T[s - 1] <= T[s]);
-          *j = ~s;
-          c0 = T[--s];
-          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
-          if(c0 != c2) {
-            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
-            k = SA + BUCKET_B(c2 = c0, c1);
-          }
-          assert(k < j);
-          *k-- = s;
-        } else {
-          assert(((s == 0) && (T[s] == c1)) || (s < 0));
-          *j = ~s;
-        }
-      }
-    }
-  }
-
-  /* Construct the suffix array by using
-     the sorted order of type B suffixes. */
-  k = SA + BUCKET_A(c2 = T[n - 1]);
-  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
-  /* Scan the suffix array from left to right. */
-  for(i = SA, j = SA + n; i < j; ++i) {
-    if(0 < (s = *i)) {
-      assert(T[s - 1] >= T[s]);
-      c0 = T[--s];
-      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
-      if(c0 != c2) {
-        BUCKET_A(c2) = k - SA;
-        k = SA + BUCKET_A(c2 = c0);
-      }
-      assert(i < k);
-      *k++ = s;
-    } else {
-      assert(s < 0);
-      *i = ~s;
-    }
-  }
-}
-
-/* Constructs the burrows-wheeler transformed string directly
-   by using the sorted order of type B* suffixes. */
-static
-saidx_t
-construct_BWT(const sauchar_t *T, saidx_t *SA,
-              saidx_t *bucket_A, saidx_t *bucket_B,
-              saidx_t n, saidx_t m) {
-  saidx_t *i, *j, *k, *orig;
-  saidx_t s;
-  saint_t c0, c1, c2;
-
-  if(0 < m) {
-    /* Construct the sorted order of type B suffixes by using
-       the sorted order of type B* suffixes. */
-    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
-      /* Scan the suffix array from right to left. */
-      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
-          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
-          i <= j;
-          --j) {
-        if(0 < (s = *j)) {
-          assert(T[s] == c1);
-          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
-          assert(T[s - 1] <= T[s]);
-          c0 = T[--s];
-          *j = ~((saidx_t)c0);
-          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
-          if(c0 != c2) {
-            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
-            k = SA + BUCKET_B(c2 = c0, c1);
-          }
-          assert(k < j);
-          *k-- = s;
-        } else if(s != 0) {
-          *j = ~s;
-#ifndef NDEBUG
-        } else {
-          assert(T[s] == c1);
-#endif
-        }
-      }
-    }
-  }
-
-  /* Construct the BWTed string by using
-     the sorted order of type B suffixes. */
-  k = SA + BUCKET_A(c2 = T[n - 1]);
-  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
-  /* Scan the suffix array from left to right. */
-  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
-    if(0 < (s = *i)) {
-      assert(T[s - 1] >= T[s]);
-      c0 = T[--s];
-      *i = c0;
-      if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
-      if(c0 != c2) {
-        BUCKET_A(c2) = k - SA;
-        k = SA + BUCKET_A(c2 = c0);
-      }
-      assert(i < k);
-      *k++ = s;
-    } else if(s != 0) {
-      *i = ~s;
-    } else {
-      orig = i;
-    }
-  }
-
-  return orig - SA;
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/*- Function -*/
-
-saint_t
-divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
-  saidx_t *bucket_A, *bucket_B;
-  saidx_t m;
-  saint_t err = 0;
-
-  /* Check arguments. */
-  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
-  else if(n == 0) { return 0; }
-  else if(n == 1) { SA[0] = 0; return 0; }
-  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
-
-  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
-  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
-
-  /* Suffixsort. */
-  if((bucket_A != NULL) && (bucket_B != NULL)) {
-    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
-    construct_SA(T, SA, bucket_A, bucket_B, n, m);
-  } else {
-    err = -2;
-  }
-
-  free(bucket_B);
-  free(bucket_A);
-
-  return err;
-}
-
-saidx_t
-divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
-  saidx_t *B;
-  saidx_t *bucket_A, *bucket_B;
-  saidx_t m, pidx, i;
-
-  /* Check arguments. */
-  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
-  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
-
-  if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
-  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
-  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
-
-  /* Burrows-Wheeler Transform. */
-  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
-    m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
-    pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
-
-    /* Copy to output string. */
-    U[0] = T[n - 1];
-    for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
-    for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
-    pidx += 1;
-  } else {
-    pidx = -2;
-  }
-
-  free(bucket_B);
-  free(bucket_A);
-  if(A == NULL) { free(B); }
-
-  return pidx;
-}
-
-const char *
-divsufsort_version(void) {
-  return PROJECT_VERSION_FULL;
-}
diff --git a/dictBuilder/divsufsort.h b/dictBuilder/divsufsort.h
deleted file mode 100644
index 6d3e648701c..00000000000
--- a/dictBuilder/divsufsort.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * divsufsort.h for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _DIVSUFSORT_H
-#define _DIVSUFSORT_H 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#include <inttypes.h>
-
-#ifndef DIVSUFSORT_API
-# ifdef DIVSUFSORT_BUILD_DLL
-#  define DIVSUFSORT_API 
-# else
-#  define DIVSUFSORT_API 
-# endif
-#endif
-
-/*- Datatypes -*/
-#ifndef SAUCHAR_T
-#define SAUCHAR_T
-typedef uint8_t sauchar_t;
-#endif /* SAUCHAR_T */
-#ifndef SAINT_T
-#define SAINT_T
-typedef int32_t saint_t;
-#endif /* SAINT_T */
-#ifndef SAIDX_T
-#define SAIDX_T
-typedef int32_t saidx_t;
-#endif /* SAIDX_T */
-#ifndef PRIdSAINT_T
-#define PRIdSAINT_T PRId32
-#endif /* PRIdSAINT_T */
-#ifndef PRIdSAIDX_T
-#define PRIdSAIDX_T PRId32
-#endif /* PRIdSAIDX_T */
-
-
-/*- Prototypes -*/
-
-/**
- * Constructs the suffix array of a given string.
- * @param T[0..n-1] The input string.
- * @param SA[0..n-1] The output array of suffixes.
- * @param n The length of the given string.
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-DIVSUFSORT_API
-saint_t
-divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
-
-/**
- * Constructs the burrows-wheeler transformed string of a given string.
- * @param T[0..n-1] The input string.
- * @param U[0..n-1] The output string. (can be T)
- * @param A[0..n-1] The temporary array. (can be NULL)
- * @param n The length of the given string.
- * @return The primary index if no error occurred, -1 or -2 otherwise.
- */
-DIVSUFSORT_API
-saidx_t
-divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
-
-/**
- * Returns the version of the divsufsort library.
- * @return The version number string.
- */
-DIVSUFSORT_API
-const char *
-divsufsort_version(void);
-
-
-/**
- * Constructs the burrows-wheeler transformed string of a given string and suffix array.
- * @param T[0..n-1] The input string.
- * @param U[0..n-1] The output string. (can be T)
- * @param SA[0..n-1] The suffix array. (can be NULL)
- * @param n The length of the given string.
- * @param idx The output primary index.
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-DIVSUFSORT_API
-saint_t
-bw_transform(const sauchar_t *T, sauchar_t *U,
-             saidx_t *SA /* can NULL */,
-             saidx_t n, saidx_t *idx);
-
-/**
- * Inverse BW-transforms a given BWTed string.
- * @param T[0..n-1] The input string.
- * @param U[0..n-1] The output string. (can be T)
- * @param A[0..n-1] The temporary array. (can be NULL)
- * @param n The length of the given string.
- * @param idx The primary index.
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-DIVSUFSORT_API
-saint_t
-inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
-                     saidx_t *A /* can NULL */,
-                     saidx_t n, saidx_t idx);
-
-/**
- * Checks the correctness of a given suffix array.
- * @param T[0..n-1] The input string.
- * @param SA[0..n-1] The input suffix array.
- * @param n The length of the given string.
- * @param verbose The verbose mode.
- * @return 0 if no error occurred.
- */
-DIVSUFSORT_API
-saint_t
-sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
-
-/**
- * Search for the pattern P in the string T.
- * @param T[0..Tsize-1] The input string.
- * @param Tsize The length of the given string.
- * @param P[0..Psize-1] The input pattern string.
- * @param Psize The length of the given pattern string.
- * @param SA[0..SAsize-1] The input suffix array.
- * @param SAsize The length of the given suffix array.
- * @param idx The output index.
- * @return The count of matches if no error occurred, -1 otherwise.
- */
-DIVSUFSORT_API
-saidx_t
-sa_search(const sauchar_t *T, saidx_t Tsize,
-          const sauchar_t *P, saidx_t Psize,
-          const saidx_t *SA, saidx_t SAsize,
-          saidx_t *left);
-
-/**
- * Search for the character c in the string T.
- * @param T[0..Tsize-1] The input string.
- * @param Tsize The length of the given string.
- * @param SA[0..SAsize-1] The input suffix array.
- * @param SAsize The length of the given suffix array.
- * @param c The input character.
- * @param idx The output index.
- * @return The count of matches if no error occurred, -1 otherwise.
- */
-DIVSUFSORT_API
-saidx_t
-sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
-                const saidx_t *SA, saidx_t SAsize,
-                saint_t c, saidx_t *left);
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
-
-#endif /* _DIVSUFSORT_H */
diff --git a/dictBuilder/divsufsort_private.h b/dictBuilder/divsufsort_private.h
deleted file mode 100644
index 0a18f6d28cb..00000000000
--- a/dictBuilder/divsufsort_private.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * divsufsort_private.h for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _DIVSUFSORT_PRIVATE_H
-#define _DIVSUFSORT_PRIVATE_H 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/* *************************
-*  Includes
-***************************/
-#include <assert.h>
-#include <stdlib.h>   /* unconditional */
-#include <stdio.h>
-#include "config.h"   /* unconditional */
-
-
-#if HAVE_STRING_H
-# include <string.h>
-#endif
-#if HAVE_MEMORY_H
-# include <memory.h>
-#endif
-#if HAVE_STDDEF_H
-# include <stddef.h>
-#endif
-#if HAVE_STRINGS_H
-# ifdef _WIN32
-#  include <string.h>
-# else
-#  include <strings.h>
-# endif
-#endif
-#if HAVE_INTTYPES_H
-# include <inttypes.h>
-#else
-# if HAVE_STDINT_H
-#  include <stdint.h>
-# endif
-#endif
-#if defined(BUILD_DIVSUFSORT64)
-# include "divsufsort64.h"
-# ifndef SAIDX_T
-#  define SAIDX_T
-#  define saidx_t saidx64_t
-# endif /* SAIDX_T */
-# ifndef PRIdSAIDX_T
-#  define PRIdSAIDX_T PRIdSAIDX64_T
-# endif /* PRIdSAIDX_T */
-# define divsufsort divsufsort64
-# define divbwt divbwt64
-# define divsufsort_version divsufsort64_version
-# define bw_transform bw_transform64
-# define inverse_bw_transform inverse_bw_transform64
-# define sufcheck sufcheck64
-# define sa_search sa_search64
-# define sa_simplesearch sa_simplesearch64
-# define sssort sssort64
-# define trsort trsort64
-#else
-# include "divsufsort.h"
-#endif
-
-
-/*- Constants -*/
-#if !defined(UINT8_MAX)
-# define UINT8_MAX (255)
-#endif /* UINT8_MAX */
-#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
-# undef ALPHABET_SIZE
-#endif
-#if !defined(ALPHABET_SIZE)
-# define ALPHABET_SIZE (UINT8_MAX + 1)
-#endif
-/* for divsufsort.c */
-#define BUCKET_A_SIZE (ALPHABET_SIZE)
-#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
-/* for sssort.c */
-#if defined(SS_INSERTIONSORT_THRESHOLD)
-# if SS_INSERTIONSORT_THRESHOLD < 1
-#  undef SS_INSERTIONSORT_THRESHOLD
-#  define SS_INSERTIONSORT_THRESHOLD (1)
-# endif
-#else
-# define SS_INSERTIONSORT_THRESHOLD (8)
-#endif
-#if defined(SS_BLOCKSIZE)
-# if SS_BLOCKSIZE < 0
-#  undef SS_BLOCKSIZE
-#  define SS_BLOCKSIZE (0)
-# elif 32768 <= SS_BLOCKSIZE
-#  undef SS_BLOCKSIZE
-#  define SS_BLOCKSIZE (32767)
-# endif
-#else
-# define SS_BLOCKSIZE (1024)
-#endif
-/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
-#if SS_BLOCKSIZE == 0
-# if defined(BUILD_DIVSUFSORT64)
-#  define SS_MISORT_STACKSIZE (96)
-# else
-#  define SS_MISORT_STACKSIZE (64)
-# endif
-#elif SS_BLOCKSIZE <= 4096
-# define SS_MISORT_STACKSIZE (16)
-#else
-# define SS_MISORT_STACKSIZE (24)
-#endif
-#if defined(BUILD_DIVSUFSORT64)
-# define SS_SMERGE_STACKSIZE (64)
-#else
-# define SS_SMERGE_STACKSIZE (32)
-#endif
-/* for trsort.c */
-#define TR_INSERTIONSORT_THRESHOLD (8)
-#if defined(BUILD_DIVSUFSORT64)
-# define TR_STACKSIZE (96)
-#else
-# define TR_STACKSIZE (64)
-#endif
-
-
-/*- Macros -*/
-#ifndef SWAP
-# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
-#endif /* SWAP */
-#ifndef MIN
-# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
-#endif /* MIN */
-#ifndef MAX
-# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
-#endif /* MAX */
-#define STACK_PUSH(_a, _b, _c, _d)\
-  do {\
-    assert(ssize < STACK_SIZE);\
-    stack[ssize].a = (_a), stack[ssize].b = (_b),\
-    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
-  } while(0)
-#define STACK_PUSH5(_a, _b, _c, _d, _e)\
-  do {\
-    assert(ssize < STACK_SIZE);\
-    stack[ssize].a = (_a), stack[ssize].b = (_b),\
-    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
-  } while(0)
-#define STACK_POP(_a, _b, _c, _d)\
-  do {\
-    assert(0 <= ssize);\
-    if(ssize == 0) { return; }\
-    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
-    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
-  } while(0)
-#define STACK_POP5(_a, _b, _c, _d, _e)\
-  do {\
-    assert(0 <= ssize);\
-    if(ssize == 0) { return; }\
-    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
-    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
-  } while(0)
-/* for divsufsort.c */
-#define BUCKET_A(_c0) bucket_A[(_c0)]
-#if ALPHABET_SIZE == 256
-#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
-#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
-#else
-#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
-#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
-#endif
-
-
-/*- Private Prototypes -*/
-/* sssort.c */
-void
-sssort(const sauchar_t *Td, const saidx_t *PA,
-       saidx_t *first, saidx_t *last,
-       saidx_t *buf, saidx_t bufsize,
-       saidx_t depth, saidx_t n, saint_t lastsuffix);
-/* trsort.c */
-void
-trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
-
-#endif /* _DIVSUFSORT_PRIVATE_H */
diff --git a/dictBuilder/lfs.h b/dictBuilder/lfs.h
deleted file mode 100644
index 7ef88f0b4d4..00000000000
--- a/dictBuilder/lfs.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * lfs.h for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef _LFS_H
-#define _LFS_H 1
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef __STRICT_ANSI__
-# define LFS_OFF_T off_t
-# define LFS_FOPEN fopen
-# define LFS_FTELL ftello
-# define LFS_FSEEK fseeko
-# define LFS_PRId  PRIdMAX
-#else
-# define LFS_OFF_T long
-# define LFS_FOPEN fopen
-# define LFS_FTELL ftell
-# define LFS_FSEEK fseek
-# define LFS_PRId "ld"
-#endif
-#ifndef PRIdOFF_T
-# define PRIdOFF_T LFS_PRId
-#endif
-
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
-
-#endif /* _LFS_H */
diff --git a/dictBuilder/sssort.c b/dictBuilder/sssort.c
deleted file mode 100644
index 07cb5bd1bc4..00000000000
--- a/dictBuilder/sssort.c
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- * sssort.c for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*- Compiler specifics -*/
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wshorten-64-to-32"
-#endif
-
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-/* inline is defined */
-#elif defined(_MSC_VER)
-#  define inline __inline
-#else
-#  define inline /* disable inline */
-#endif
-
-#ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#  define FORCE_INLINE static __forceinline
-#else
-#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-/*- Dependencies -*/
-#include "divsufsort_private.h"
-
-
-/*- Private Functions -*/
-
-static const saint_t lg_table[256]= {
- -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
-};
-
-#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
-
-static INLINE
-saint_t
-ss_ilg(saidx_t n) {
-#if SS_BLOCKSIZE == 0
-# if defined(BUILD_DIVSUFSORT64)
-  return (n >> 32) ?
-          ((n >> 48) ?
-            ((n >> 56) ?
-              56 + lg_table[(n >> 56) & 0xff] :
-              48 + lg_table[(n >> 48) & 0xff]) :
-            ((n >> 40) ?
-              40 + lg_table[(n >> 40) & 0xff] :
-              32 + lg_table[(n >> 32) & 0xff])) :
-          ((n & 0xffff0000) ?
-            ((n & 0xff000000) ?
-              24 + lg_table[(n >> 24) & 0xff] :
-              16 + lg_table[(n >> 16) & 0xff]) :
-            ((n & 0x0000ff00) ?
-               8 + lg_table[(n >>  8) & 0xff] :
-               0 + lg_table[(n >>  0) & 0xff]));
-# else
-  return (n & 0xffff0000) ?
-          ((n & 0xff000000) ?
-            24 + lg_table[(n >> 24) & 0xff] :
-            16 + lg_table[(n >> 16) & 0xff]) :
-          ((n & 0x0000ff00) ?
-             8 + lg_table[(n >>  8) & 0xff] :
-             0 + lg_table[(n >>  0) & 0xff]);
-# endif
-#elif SS_BLOCKSIZE < 256
-  return lg_table[n];
-#else
-  return (n & 0xff00) ?
-          8 + lg_table[(n >> 8) & 0xff] :
-          0 + lg_table[(n >> 0) & 0xff];
-#endif
-}
-
-#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
-
-#if SS_BLOCKSIZE != 0
-
-static const saint_t sqq_table[256] = {
-  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
- 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
- 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
-110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
-128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
-143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
-156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
-169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
-181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
-192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
-202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
-212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
-221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
-230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
-239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
-247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
-};
-
-static INLINE
-saidx_t
-ss_isqrt(saidx_t x) {
-  saidx_t y, e;
-
-  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
-  e = (x & 0xffff0000) ?
-        ((x & 0xff000000) ?
-          24 + lg_table[(x >> 24) & 0xff] :
-          16 + lg_table[(x >> 16) & 0xff]) :
-        ((x & 0x0000ff00) ?
-           8 + lg_table[(x >>  8) & 0xff] :
-           0 + lg_table[(x >>  0) & 0xff]);
-
-  if(e >= 16) {
-    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
-    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
-    y = (y + 1 + x / y) >> 1;
-  } else if(e >= 8) {
-    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
-  } else {
-    return sqq_table[x] >> 4;
-  }
-
-  return (x < (y * y)) ? y - 1 : y;
-}
-
-#endif /* SS_BLOCKSIZE != 0 */
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Compares two suffixes. */
-static INLINE
-saint_t
-ss_compare(const sauchar_t *T,
-           const saidx_t *p1, const saidx_t *p2,
-           saidx_t depth) {
-  const sauchar_t *U1, *U2, *U1n, *U2n;
-
-  for(U1 = T + depth + *p1,
-      U2 = T + depth + *p2,
-      U1n = T + *(p1 + 1) + 2,
-      U2n = T + *(p2 + 1) + 2;
-      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
-      ++U1, ++U2) {
-  }
-
-  return U1 < U1n ?
-        (U2 < U2n ? *U1 - *U2 : 1) :
-        (U2 < U2n ? -1 : 0);
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
-
-/* Insertionsort for small size groups */
-static
-void
-ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
-                 saidx_t *first, saidx_t *last, saidx_t depth) {
-  saidx_t *i, *j;
-  saidx_t t;
-  saint_t r;
-
-  for(i = last - 2; first <= i; --i) {
-    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
-      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
-      if(last <= j) { break; }
-    }
-    if(r == 0) { *j = ~*j; }
-    *(j - 1) = t;
-  }
-}
-
-#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
-
-
-/*---------------------------------------------------------------------------*/
-
-#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
-
-static INLINE
-void
-ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
-           saidx_t *SA, saidx_t i, saidx_t size) {
-  saidx_t j, k;
-  saidx_t v;
-  saint_t c, d, e;
-
-  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
-    d = Td[PA[SA[k = j++]]];
-    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
-    if(d <= c) { break; }
-  }
-  SA[i] = v;
-}
-
-/* Simple top-down heapsort. */
-static
-void
-ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
-  saidx_t i, m;
-  saidx_t t;
-
-  m = size;
-  if((size % 2) == 0) {
-    m--;
-    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
-  }
-
-  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
-  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
-  for(i = m - 1; 0 < i; --i) {
-    t = SA[0], SA[0] = SA[i];
-    ss_fixdown(Td, PA, SA, 0, i);
-    SA[i] = t;
-  }
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Returns the median of three elements. */
-static INLINE
-saidx_t *
-ss_median3(const sauchar_t *Td, const saidx_t *PA,
-           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
-  saidx_t *t;
-  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
-  if(Td[PA[*v2]] > Td[PA[*v3]]) {
-    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
-    else { return v3; }
-  }
-  return v2;
-}
-
-/* Returns the median of five elements. */
-static INLINE
-saidx_t *
-ss_median5(const sauchar_t *Td, const saidx_t *PA,
-           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
-  saidx_t *t;
-  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
-  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
-  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
-  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
-  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
-  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
-  return v3;
-}
-
-/* Returns the pivot element. */
-static INLINE
-saidx_t *
-ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
-  saidx_t *middle;
-  saidx_t t;
-
-  t = last - first;
-  middle = first + t / 2;
-
-  if(t <= 512) {
-    if(t <= 32) {
-      return ss_median3(Td, PA, first, middle, last - 1);
-    } else {
-      t >>= 2;
-      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
-    }
-  }
-  t >>= 3;
-  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
-  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
-  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
-  return ss_median3(Td, PA, first, middle, last);
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Binary partition for substrings. */
-static INLINE
-saidx_t *
-ss_partition(const saidx_t *PA,
-                    saidx_t *first, saidx_t *last, saidx_t depth) {
-  saidx_t *a, *b;
-  saidx_t t;
-  for(a = first - 1, b = last;;) {
-    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
-    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
-    if(b <= a) { break; }
-    t = ~*b;
-    *b = *a;
-    *a = t;
-  }
-  if(first < a) { *first = ~*first; }
-  return a;
-}
-
-/* Multikey introsort for medium size groups. */
-static
-void
-ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
-              saidx_t *first, saidx_t *last,
-              saidx_t depth) {
-#define STACK_SIZE SS_MISORT_STACKSIZE
-  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
-  const sauchar_t *Td;
-  saidx_t *a, *b, *c, *d, *e, *f;
-  saidx_t s, t;
-  saint_t ssize;
-  saint_t limit;
-  saint_t v, x = 0;
-
-  for(ssize = 0, limit = ss_ilg(last - first);;) {
-
-    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
-#if 1 < SS_INSERTIONSORT_THRESHOLD
-      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
-#endif
-      STACK_POP(first, last, depth, limit);
-      continue;
-    }
-
-    Td = T + depth;
-    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
-    if(limit < 0) {
-      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
-        if((x = Td[PA[*a]]) != v) {
-          if(1 < (a - first)) { break; }
-          v = x;
-          first = a;
-        }
-      }
-      if(Td[PA[*first] - 1] < v) {
-        first = ss_partition(PA, first, a, depth);
-      }
-      if((a - first) <= (last - a)) {
-        if(1 < (a - first)) {
-          STACK_PUSH(a, last, depth, -1);
-          last = a, depth += 1, limit = ss_ilg(a - first);
-        } else {
-          first = a, limit = -1;
-        }
-      } else {
-        if(1 < (last - a)) {
-          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
-          first = a, limit = -1;
-        } else {
-          last = a, depth += 1, limit = ss_ilg(a - first);
-        }
-      }
-      continue;
-    }
-
-    /* choose pivot */
-    a = ss_pivot(Td, PA, first, last);
-    v = Td[PA[*a]];
-    SWAP(*first, *a);
-
-    /* partition */
-    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
-    if(((a = b) < last) && (x < v)) {
-      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
-        if(x == v) { SWAP(*b, *a); ++a; }
-      }
-    }
-    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
-    if((b < (d = c)) && (x > v)) {
-      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
-        if(x == v) { SWAP(*c, *d); --d; }
-      }
-    }
-    for(; b < c;) {
-      SWAP(*b, *c);
-      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
-        if(x == v) { SWAP(*b, *a); ++a; }
-      }
-      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
-        if(x == v) { SWAP(*c, *d); --d; }
-      }
-    }
-
-    if(a <= d) {
-      c = b - 1;
-
-      if((s = a - first) > (t = b - a)) { s = t; }
-      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
-      if((s = d - c) > (t = last - d - 1)) { s = t; }
-      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
-
-      a = first + (b - a), c = last - (d - c);
-      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
-
-      if((a - first) <= (last - c)) {
-        if((last - c) <= (c - b)) {
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
-          STACK_PUSH(c, last, depth, limit);
-          last = a;
-        } else if((a - first) <= (c - b)) {
-          STACK_PUSH(c, last, depth, limit);
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
-          last = a;
-        } else {
-          STACK_PUSH(c, last, depth, limit);
-          STACK_PUSH(first, a, depth, limit);
-          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
-        }
-      } else {
-        if((a - first) <= (c - b)) {
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
-          STACK_PUSH(first, a, depth, limit);
-          first = c;
-        } else if((last - c) <= (c - b)) {
-          STACK_PUSH(first, a, depth, limit);
-          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
-          first = c;
-        } else {
-          STACK_PUSH(first, a, depth, limit);
-          STACK_PUSH(c, last, depth, limit);
-          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
-        }
-      }
-    } else {
-      limit += 1;
-      if(Td[PA[*first] - 1] < v) {
-        first = ss_partition(PA, first, last, depth);
-        limit = ss_ilg(last - first);
-      }
-      depth += 1;
-    }
-  }
-#undef STACK_SIZE
-}
-
-#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
-
-
-/*---------------------------------------------------------------------------*/
-
-#if SS_BLOCKSIZE != 0
-
-static INLINE
-void
-ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
-  saidx_t t;
-  for(; 0 < n; --n, ++a, ++b) {
-    t = *a, *a = *b, *b = t;
-  }
-}
-
-static INLINE
-void
-ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
-  saidx_t *a, *b, t;
-  saidx_t l, r;
-  l = middle - first, r = last - middle;
-  for(; (0 < l) && (0 < r);) {
-    if(l == r) { ss_blockswap(first, middle, l); break; }
-    if(l < r) {
-      a = last - 1, b = middle - 1;
-      t = *a;
-      do {
-        *a-- = *b, *b-- = *a;
-        if(b < first) {
-          *a = t;
-          last = a;
-          if((r -= l + 1) <= l) { break; }
-          a -= 1, b = middle - 1;
-          t = *a;
-        }
-      } while(1);
-    } else {
-      a = first, b = middle;
-      t = *a;
-      do {
-        *a++ = *b, *b++ = *a;
-        if(last <= b) {
-          *a = t;
-          first = a + 1;
-          if((l -= r + 1) <= r) { break; }
-          a += 1, b = middle;
-          t = *a;
-        }
-      } while(1);
-    }
-  }
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-static
-void
-ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
-                saidx_t *first, saidx_t *middle, saidx_t *last,
-                saidx_t depth) {
-  const saidx_t *p;
-  saidx_t *a, *b;
-  saidx_t len, half;
-  saint_t q, r;
-  saint_t x;
-
-  for(;;) {
-    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
-    else                { x = 0; p = PA +  *(last - 1); }
-    for(a = first, len = middle - first, half = len >> 1, r = -1;
-        0 < len;
-        len = half, half >>= 1) {
-      b = a + half;
-      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
-      if(q < 0) {
-        a = b + 1;
-        half -= (len & 1) ^ 1;
-      } else {
-        r = q;
-      }
-    }
-    if(a < middle) {
-      if(r == 0) { *a = ~*a; }
-      ss_rotate(a, middle, last);
-      last -= middle - a;
-      middle = a;
-      if(first == middle) { break; }
-    }
-    --last;
-    if(x != 0) { while(*--last < 0) { } }
-    if(middle == last) { break; }
-  }
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Merge-forward with internal buffer. */
-static
-void
-ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
-                saidx_t *first, saidx_t *middle, saidx_t *last,
-                saidx_t *buf, saidx_t depth) {
-  saidx_t *a, *b, *c, *bufend;
-  saidx_t t;
-  saint_t r;
-
-  bufend = buf + (middle - first) - 1;
-  ss_blockswap(buf, first, middle - first);
-
-  for(t = *(a = first), b = buf, c = middle;;) {
-    r = ss_compare(T, PA + *b, PA + *c, depth);
-    if(r < 0) {
-      do {
-        *a++ = *b;
-        if(bufend <= b) { *bufend = t; return; }
-        *b++ = *a;
-      } while(*b < 0);
-    } else if(r > 0) {
-      do {
-        *a++ = *c, *c++ = *a;
-        if(last <= c) {
-          while(b < bufend) { *a++ = *b, *b++ = *a; }
-          *a = *b, *b = t;
-          return;
-        }
-      } while(*c < 0);
-    } else {
-      *c = ~*c;
-      do {
-        *a++ = *b;
-        if(bufend <= b) { *bufend = t; return; }
-        *b++ = *a;
-      } while(*b < 0);
-
-      do {
-        *a++ = *c, *c++ = *a;
-        if(last <= c) {
-          while(b < bufend) { *a++ = *b, *b++ = *a; }
-          *a = *b, *b = t;
-          return;
-        }
-      } while(*c < 0);
-    }
-  }
-}
-
-/* Merge-backward with internal buffer. */
-static
-void
-ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
-                 saidx_t *first, saidx_t *middle, saidx_t *last,
-                 saidx_t *buf, saidx_t depth) {
-  const saidx_t *p1, *p2;
-  saidx_t *a, *b, *c, *bufend;
-  saidx_t t;
-  saint_t r;
-  saint_t x;
-
-  bufend = buf + (last - middle) - 1;
-  ss_blockswap(buf, middle, last - middle);
-
-  x = 0;
-  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
-  else                  { p1 = PA +  *bufend; }
-  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
-  else                  { p2 = PA +  *(middle - 1); }
-  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
-    r = ss_compare(T, p1, p2, depth);
-    if(0 < r) {
-      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
-      *a-- = *b;
-      if(b <= buf) { *buf = t; break; }
-      *b-- = *a;
-      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
-      else       { p1 = PA +  *b; }
-    } else if(r < 0) {
-      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
-      *a-- = *c, *c-- = *a;
-      if(c < first) {
-        while(buf < b) { *a-- = *b, *b-- = *a; }
-        *a = *b, *b = t;
-        break;
-      }
-      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
-      else       { p2 = PA +  *c; }
-    } else {
-      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
-      *a-- = ~*b;
-      if(b <= buf) { *buf = t; break; }
-      *b-- = *a;
-      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
-      *a-- = *c, *c-- = *a;
-      if(c < first) {
-        while(buf < b) { *a-- = *b, *b-- = *a; }
-        *a = *b, *b = t;
-        break;
-      }
-      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
-      else       { p1 = PA +  *b; }
-      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
-      else       { p2 = PA +  *c; }
-    }
-  }
-}
-
-/* D&C based merge. */
-static
-void
-ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
-             saidx_t *first, saidx_t *middle, saidx_t *last,
-             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
-#define STACK_SIZE SS_SMERGE_STACKSIZE
-#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
-#define MERGE_CHECK(a, b, c)\
-  do {\
-    if(((c) & 1) ||\
-       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
-      *(a) = ~*(a);\
-    }\
-    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
-      *(b) = ~*(b);\
-    }\
-  } while(0)
-  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
-  saidx_t *l, *r, *lm, *rm;
-  saidx_t m, len, half;
-  saint_t ssize;
-  saint_t check, next;
-
-  for(check = 0, ssize = 0;;) {
-    if((last - middle) <= bufsize) {
-      if((first < middle) && (middle < last)) {
-        ss_mergebackward(T, PA, first, middle, last, buf, depth);
-      }
-      MERGE_CHECK(first, last, check);
-      STACK_POP(first, middle, last, check);
-      continue;
-    }
-
-    if((middle - first) <= bufsize) {
-      if(first < middle) {
-        ss_mergeforward(T, PA, first, middle, last, buf, depth);
-      }
-      MERGE_CHECK(first, last, check);
-      STACK_POP(first, middle, last, check);
-      continue;
-    }
-
-    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
-        0 < len;
-        len = half, half >>= 1) {
-      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
-                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
-        m += half + 1;
-        half -= (len & 1) ^ 1;
-      }
-    }
-
-    if(0 < m) {
-      lm = middle - m, rm = middle + m;
-      ss_blockswap(lm, middle, m);
-      l = r = middle, next = 0;
-      if(rm < last) {
-        if(*rm < 0) {
-          *rm = ~*rm;
-          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
-          next |= 1;
-        } else if(first < lm) {
-          for(; *r < 0; ++r) { }
-          next |= 2;
-        }
-      }
-
-      if((l - first) <= (last - r)) {
-        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
-        middle = lm, last = l, check = (check & 3) | (next & 4);
-      } else {
-        if((next & 2) && (r == middle)) { next ^= 6; }
-        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
-        first = r, middle = rm, check = (next & 3) | (check & 4);
-      }
-    } else {
-      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
-        *middle = ~*middle;
-      }
-      MERGE_CHECK(first, last, check);
-      STACK_POP(first, middle, last, check);
-    }
-  }
-#undef STACK_SIZE
-}
-
-#endif /* SS_BLOCKSIZE != 0 */
-
-
-/*---------------------------------------------------------------------------*/
-
-/*- Function -*/
-
-/* Substring sort */
-void
-sssort(const sauchar_t *T, const saidx_t *PA,
-       saidx_t *first, saidx_t *last,
-       saidx_t *buf, saidx_t bufsize,
-       saidx_t depth, saidx_t n, saint_t lastsuffix) {
-  saidx_t *a;
-#if SS_BLOCKSIZE != 0
-  saidx_t *b, *middle, *curbuf;
-  saidx_t j, k, curbufsize, limit;
-#endif
-  saidx_t i;
-
-  if(lastsuffix != 0) { ++first; }
-
-#if SS_BLOCKSIZE == 0
-  ss_mintrosort(T, PA, first, last, depth);
-#else
-  if((bufsize < SS_BLOCKSIZE) &&
-      (bufsize < (last - first)) &&
-      (bufsize < (limit = ss_isqrt(last - first)))) {
-    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
-    buf = middle = last - limit, bufsize = limit;
-  } else {
-    middle = last, limit = 0;
-  }
-  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
-    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
-#elif 1 < SS_BLOCKSIZE
-    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
-#endif
-    curbufsize = last - (a + SS_BLOCKSIZE);
-    curbuf = a + SS_BLOCKSIZE;
-    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
-    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
-      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
-    }
-  }
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
-  ss_mintrosort(T, PA, a, middle, depth);
-#elif 1 < SS_BLOCKSIZE
-  ss_insertionsort(T, PA, a, middle, depth);
-#endif
-  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
-    if(i & 1) {
-      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
-      a -= k;
-    }
-  }
-  if(limit != 0) {
-#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
-    ss_mintrosort(T, PA, middle, last, depth);
-#elif 1 < SS_BLOCKSIZE
-    ss_insertionsort(T, PA, middle, last, depth);
-#endif
-    ss_inplacemerge(T, PA, first, middle, last, depth);
-  }
-#endif
-
-  if(lastsuffix != 0) {
-    /* Insert last type B* suffix. */
-    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
-    for(a = first, i = *(first - 1);
-        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
-        ++a) {
-      *(a - 1) = *a;
-    }
-    *(a - 1) = i;
-  }
-}
diff --git a/dictBuilder/trsort.c b/dictBuilder/trsort.c
deleted file mode 100644
index 9e21e68ec1c..00000000000
--- a/dictBuilder/trsort.c
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- * trsort.c for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*- Compiler specifics -*/
-#ifdef __clang__
-#pragma clang diagnostic ignored "-Wshorten-64-to-32"
-#endif
-
-#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-/* inline is defined */
-#elif defined(_MSC_VER)
-#  define inline __inline
-#else
-#  define inline /* disable inline */
-#endif
-
-#ifdef _MSC_VER    /* Visual Studio */
-#  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
-#  define FORCE_INLINE static __forceinline
-#else
-#  if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-#    ifdef __GNUC__
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif /* __STDC_VERSION__ */
-#endif
-
-/*- Dependencies -*/
-#include "divsufsort_private.h"
-
-
-/*- Private Functions -*/
-
-static const saint_t lg_table[256]= {
- -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
-};
-
-static INLINE
-saint_t
-tr_ilg(saidx_t n) {
-#if defined(BUILD_DIVSUFSORT64)
-  return (n >> 32) ?
-          ((n >> 48) ?
-            ((n >> 56) ?
-              56 + lg_table[(n >> 56) & 0xff] :
-              48 + lg_table[(n >> 48) & 0xff]) :
-            ((n >> 40) ?
-              40 + lg_table[(n >> 40) & 0xff] :
-              32 + lg_table[(n >> 32) & 0xff])) :
-          ((n & 0xffff0000) ?
-            ((n & 0xff000000) ?
-              24 + lg_table[(n >> 24) & 0xff] :
-              16 + lg_table[(n >> 16) & 0xff]) :
-            ((n & 0x0000ff00) ?
-               8 + lg_table[(n >>  8) & 0xff] :
-               0 + lg_table[(n >>  0) & 0xff]));
-#else
-  return (n & 0xffff0000) ?
-          ((n & 0xff000000) ?
-            24 + lg_table[(n >> 24) & 0xff] :
-            16 + lg_table[(n >> 16) & 0xff]) :
-          ((n & 0x0000ff00) ?
-             8 + lg_table[(n >>  8) & 0xff] :
-             0 + lg_table[(n >>  0) & 0xff]);
-#endif
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Simple insertionsort for small size groups. */
-static
-void
-tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
-  saidx_t *a, *b;
-  saidx_t t, r;
-
-  for(a = first + 1; a < last; ++a) {
-    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
-      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
-      if(b < first) { break; }
-    }
-    if(r == 0) { *b = ~*b; }
-    *(b + 1) = t;
-  }
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-static INLINE
-void
-tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
-  saidx_t j, k;
-  saidx_t v;
-  saidx_t c, d, e;
-
-  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
-    d = ISAd[SA[k = j++]];
-    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
-    if(d <= c) { break; }
-  }
-  SA[i] = v;
-}
-
-/* Simple top-down heapsort. */
-static
-void
-tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
-  saidx_t i, m;
-  saidx_t t;
-
-  m = size;
-  if((size % 2) == 0) {
-    m--;
-    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
-  }
-
-  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
-  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
-  for(i = m - 1; 0 < i; --i) {
-    t = SA[0], SA[0] = SA[i];
-    tr_fixdown(ISAd, SA, 0, i);
-    SA[i] = t;
-  }
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-/* Returns the median of three elements. */
-static INLINE
-saidx_t *
-tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
-  saidx_t *t;
-  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
-  if(ISAd[*v2] > ISAd[*v3]) {
-    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
-    else { return v3; }
-  }
-  return v2;
-}
-
-/* Returns the median of five elements. */
-static INLINE
-saidx_t *
-tr_median5(const saidx_t *ISAd,
-           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
-  saidx_t *t;
-  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
-  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
-  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
-  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
-  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
-  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
-  return v3;
-}
-
-/* Returns the pivot element. */
-static INLINE
-saidx_t *
-tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
-  saidx_t *middle;
-  saidx_t t;
-
-  t = last - first;
-  middle = first + t / 2;
-
-  if(t <= 512) {
-    if(t <= 32) {
-      return tr_median3(ISAd, first, middle, last - 1);
-    } else {
-      t >>= 2;
-      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
-    }
-  }
-  t >>= 3;
-  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
-  middle = tr_median3(ISAd, middle - t, middle, middle + t);
-  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
-  return tr_median3(ISAd, first, middle, last);
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-typedef struct _trbudget_t trbudget_t;
-struct _trbudget_t {
-  saidx_t chance;
-  saidx_t remain;
-  saidx_t incval;
-  saidx_t count;
-};
-
-static INLINE
-void
-trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
-  budget->chance = chance;
-  budget->remain = budget->incval = incval;
-}
-
-static INLINE
-saint_t
-trbudget_check(trbudget_t *budget, saidx_t size) {
-  if(size <= budget->remain) { budget->remain -= size; return 1; }
-  if(budget->chance == 0) { budget->count += size; return 0; }
-  budget->remain += budget->incval - size;
-  budget->chance -= 1;
-  return 1;
-}
-
-
-/*---------------------------------------------------------------------------*/
-
-static INLINE
-void
-tr_partition(const saidx_t *ISAd,
-             saidx_t *first, saidx_t *middle, saidx_t *last,
-             saidx_t **pa, saidx_t **pb, saidx_t v) {
-  saidx_t *a, *b, *c, *d, *e, *f;
-  saidx_t t, s;
-  saidx_t x = 0;
-
-  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
-  if(((a = b) < last) && (x < v)) {
-    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
-      if(x == v) { SWAP(*b, *a); ++a; }
-    }
-  }
-  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
-  if((b < (d = c)) && (x > v)) {
-    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
-      if(x == v) { SWAP(*c, *d); --d; }
-    }
-  }
-  for(; b < c;) {
-    SWAP(*b, *c);
-    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
-      if(x == v) { SWAP(*b, *a); ++a; }
-    }
-    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
-      if(x == v) { SWAP(*c, *d); --d; }
-    }
-  }
-
-  if(a <= d) {
-    c = b - 1;
-    if((s = a - first) > (t = b - a)) { s = t; }
-    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
-    if((s = d - c) > (t = last - d - 1)) { s = t; }
-    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
-    first += (b - a), last -= (d - c);
-  }
-  *pa = first, *pb = last;
-}
-
-static
-void
-tr_copy(saidx_t *ISA, const saidx_t *SA,
-        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
-        saidx_t depth) {
-  /* sort suffixes of middle partition
-     by using sorted order of suffixes of left and right partition. */
-  saidx_t *c, *d, *e;
-  saidx_t s, v;
-
-  v = b - SA - 1;
-  for(c = first, d = a - 1; c <= d; ++c) {
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
-      *++d = s;
-      ISA[s] = d - SA;
-    }
-  }
-  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
-      *--d = s;
-      ISA[s] = d - SA;
-    }
-  }
-}
-
-static
-void
-tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
-               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
-               saidx_t depth) {
-  saidx_t *c, *d, *e;
-  saidx_t s, v;
-  saidx_t rank, lastrank, newrank = -1;
-
-  v = b - SA - 1;
-  lastrank = -1;
-  for(c = first, d = a - 1; c <= d; ++c) {
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
-      *++d = s;
-      rank = ISA[s + depth];
-      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
-      ISA[s] = newrank;
-    }
-  }
-
-  lastrank = -1;
-  for(e = d; first <= e; --e) {
-    rank = ISA[*e];
-    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
-    if(newrank != rank) { ISA[*e] = newrank; }
-  }
-
-  lastrank = -1;
-  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
-    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
-      *--d = s;
-      rank = ISA[s + depth];
-      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
-      ISA[s] = newrank;
-    }
-  }
-}
-
-static
-void
-tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
-             saidx_t *SA, saidx_t *first, saidx_t *last,
-             trbudget_t *budget) {
-#define STACK_SIZE TR_STACKSIZE
-  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
-  saidx_t *a, *b, *c;
-  saidx_t t;
-  saidx_t v, x = 0;
-  saidx_t incr = ISAd - ISA;
-  saint_t limit, next;
-  saint_t ssize, trlink = -1;
-
-  for(ssize = 0, limit = tr_ilg(last - first);;) {
-
-    if(limit < 0) {
-      if(limit == -1) {
-        /* tandem repeat partition */
-        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
-
-        /* update ranks */
-        if(a < last) {
-          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
-        }
-        if(b < last) {
-          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
-        }
-
-        /* push */
-        if(1 < (b - a)) {
-          STACK_PUSH5(NULL, a, b, 0, 0);
-          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
-          trlink = ssize - 2;
-        }
-        if((a - first) <= (last - b)) {
-          if(1 < (a - first)) {
-            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
-            last = a, limit = tr_ilg(a - first);
-          } else if(1 < (last - b)) {
-            first = b, limit = tr_ilg(last - b);
-          } else {
-            STACK_POP5(ISAd, first, last, limit, trlink);
-          }
-        } else {
-          if(1 < (last - b)) {
-            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
-            first = b, limit = tr_ilg(last - b);
-          } else if(1 < (a - first)) {
-            last = a, limit = tr_ilg(a - first);
-          } else {
-            STACK_POP5(ISAd, first, last, limit, trlink);
-          }
-        }
-      } else if(limit == -2) {
-        /* tandem repeat copy */
-        a = stack[--ssize].b, b = stack[ssize].c;
-        if(stack[ssize].d == 0) {
-          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
-        } else {
-          if(0 <= trlink) { stack[trlink].d = -1; }
-          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
-        }
-        STACK_POP5(ISAd, first, last, limit, trlink);
-      } else {
-        /* sorted partition */
-        if(0 <= *first) {
-          a = first;
-          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
-          first = a;
-        }
-        if(first < last) {
-          a = first; do { *a = ~*a; } while(*++a < 0);
-          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
-          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
-
-          /* push */
-          if(trbudget_check(budget, a - first)) {
-            if((a - first) <= (last - a)) {
-              STACK_PUSH5(ISAd, a, last, -3, trlink);
-              ISAd += incr, last = a, limit = next;
-            } else {
-              if(1 < (last - a)) {
-                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
-                first = a, limit = -3;
-              } else {
-                ISAd += incr, last = a, limit = next;
-              }
-            }
-          } else {
-            if(0 <= trlink) { stack[trlink].d = -1; }
-            if(1 < (last - a)) {
-              first = a, limit = -3;
-            } else {
-              STACK_POP5(ISAd, first, last, limit, trlink);
-            }
-          }
-        } else {
-          STACK_POP5(ISAd, first, last, limit, trlink);
-        }
-      }
-      continue;
-    }
-
-    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
-      tr_insertionsort(ISAd, first, last);
-      limit = -3;
-      continue;
-    }
-
-    if(limit-- == 0) {
-      tr_heapsort(ISAd, first, last - first);
-      for(a = last - 1; first < a; a = b) {
-        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
-      }
-      limit = -3;
-      continue;
-    }
-
-    /* choose pivot */
-    a = tr_pivot(ISAd, first, last);
-    SWAP(*first, *a);
-    v = ISAd[*first];
-
-    /* partition */
-    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
-    if((last - first) != (b - a)) {
-      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
-
-      /* update ranks */
-      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
-      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
-
-      /* push */
-      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
-        if((a - first) <= (last - b)) {
-          if((last - b) <= (b - a)) {
-            if(1 < (a - first)) {
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              STACK_PUSH5(ISAd, b, last, limit, trlink);
-              last = a;
-            } else if(1 < (last - b)) {
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              first = b;
-            } else {
-              ISAd += incr, first = a, last = b, limit = next;
-            }
-          } else if((a - first) <= (b - a)) {
-            if(1 < (a - first)) {
-              STACK_PUSH5(ISAd, b, last, limit, trlink);
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              last = a;
-            } else {
-              STACK_PUSH5(ISAd, b, last, limit, trlink);
-              ISAd += incr, first = a, last = b, limit = next;
-            }
-          } else {
-            STACK_PUSH5(ISAd, b, last, limit, trlink);
-            STACK_PUSH5(ISAd, first, a, limit, trlink);
-            ISAd += incr, first = a, last = b, limit = next;
-          }
-        } else {
-          if((a - first) <= (b - a)) {
-            if(1 < (last - b)) {
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              STACK_PUSH5(ISAd, first, a, limit, trlink);
-              first = b;
-            } else if(1 < (a - first)) {
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              last = a;
-            } else {
-              ISAd += incr, first = a, last = b, limit = next;
-            }
-          } else if((last - b) <= (b - a)) {
-            if(1 < (last - b)) {
-              STACK_PUSH5(ISAd, first, a, limit, trlink);
-              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
-              first = b;
-            } else {
-              STACK_PUSH5(ISAd, first, a, limit, trlink);
-              ISAd += incr, first = a, last = b, limit = next;
-            }
-          } else {
-            STACK_PUSH5(ISAd, first, a, limit, trlink);
-            STACK_PUSH5(ISAd, b, last, limit, trlink);
-            ISAd += incr, first = a, last = b, limit = next;
-          }
-        }
-      } else {
-        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
-        if((a - first) <= (last - b)) {
-          if(1 < (a - first)) {
-            STACK_PUSH5(ISAd, b, last, limit, trlink);
-            last = a;
-          } else if(1 < (last - b)) {
-            first = b;
-          } else {
-            STACK_POP5(ISAd, first, last, limit, trlink);
-          }
-        } else {
-          if(1 < (last - b)) {
-            STACK_PUSH5(ISAd, first, a, limit, trlink);
-            first = b;
-          } else if(1 < (a - first)) {
-            last = a;
-          } else {
-            STACK_POP5(ISAd, first, last, limit, trlink);
-          }
-        }
-      }
-    } else {
-      if(trbudget_check(budget, last - first)) {
-        limit = tr_ilg(last - first), ISAd += incr;
-      } else {
-        if(0 <= trlink) { stack[trlink].d = -1; }
-        STACK_POP5(ISAd, first, last, limit, trlink);
-      }
-    }
-  }
-#undef STACK_SIZE
-}
-
-
-
-/*---------------------------------------------------------------------------*/
-
-/*- Function -*/
-
-/* Tandem repeat sort */
-void
-trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
-  saidx_t *ISAd;
-  saidx_t *first, *last;
-  trbudget_t budget;
-  saidx_t t, skip, unsorted;
-
-  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
-/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
-  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
-    first = SA;
-    skip = 0;
-    unsorted = 0;
-    do {
-      if((t = *first) < 0) { first -= t; skip += t; }
-      else {
-        if(skip != 0) { *(first + skip) = skip; skip = 0; }
-        last = SA + ISA[t] + 1;
-        if(1 < (last - first)) {
-          budget.count = 0;
-          tr_introsort(ISA, ISAd, SA, first, last, &budget);
-          if(budget.count != 0) { unsorted += budget.count; }
-          else { skip = first - last; }
-        } else if((last - first) == 1) {
-          skip = -1;
-        }
-        first = last;
-      }
-    } while(first < (SA + n));
-    if(skip != 0) { *(first + skip) = skip; }
-    if(unsorted == 0) { break; }
-  }
-}
diff --git a/dictBuilder/utils.c b/dictBuilder/utils.c
deleted file mode 100644
index 90fb23efa89..00000000000
--- a/dictBuilder/utils.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * utils.c for libdivsufsort
- * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "divsufsort_private.h"
-
-
-/*- Private Function -*/
-
-/* Binary search for inverse bwt. */
-static
-saidx_t
-binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
-  saidx_t half, i;
-  for(i = 0, half = size >> 1;
-      0 < size;
-      size = half, half >>= 1) {
-    if(A[i + half] < value) {
-      i += half + 1;
-      half -= (size & 1) ^ 1;
-    }
-  }
-  return i;
-}
-
-
-/*- Functions -*/
-
-/* Burrows-Wheeler transform. */
-saint_t
-bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
-             saidx_t n, saidx_t *idx) {
-  saidx_t *A, i, j, p, t;
-  saint_t c;
-
-  /* Check arguments. */
-  if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
-  if(n <= 1) {
-    if(n == 1) { U[0] = T[0]; }
-    *idx = n;
-    return 0;
-  }
-
-  if((A = SA) == NULL) {
-    i = divbwt(T, U, NULL, n);
-    if(0 <= i) { *idx = i; i = 0; }
-    return (saint_t)i;
-  }
-
-  /* BW transform. */
-  if(T == U) {
-    t = n;
-    for(i = 0, j = 0; i < n; ++i) {
-      p = t - 1;
-      t = A[i];
-      if(0 <= p) {
-        c = T[j];
-        U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
-        A[j] = c;
-        j++;
-      } else {
-        *idx = i;
-      }
-    }
-    p = t - 1;
-    if(0 <= p) {
-      c = T[j];
-      U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
-      A[j] = c;
-    } else {
-      *idx = i;
-    }
-  } else {
-    U[0] = T[n - 1];
-    for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
-    *idx = i + 1;
-    for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
-  }
-
-  if(SA == NULL) {
-    /* Deallocate memory. */
-    free(A);
-  }
-
-  return 0;
-}
-
-/* Inverse Burrows-Wheeler transform. */
-saint_t
-inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
-                     saidx_t n, saidx_t idx) {
-  saidx_t C[ALPHABET_SIZE];
-  sauchar_t D[ALPHABET_SIZE];
-  saidx_t *B;
-  saidx_t i, p;
-  saint_t c, d;
-
-  /* Check arguments. */
-  if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
-     (n < idx) || ((0 < n) && (idx == 0))) {
-    return -1;
-  }
-  if(n <= 1) { return 0; }
-
-  if((B = A) == NULL) {
-    /* Allocate n*sizeof(saidx_t) bytes of memory. */
-    if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
-  }
-
-  /* Inverse BW transform. */
-  for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
-  for(i = 0; i < n; ++i) { ++C[T[i]]; }
-  for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
-    p = C[c];
-    if(0 < p) {
-      C[c] = i;
-      D[d++] = (sauchar_t)c;
-      i += p;
-    }
-  }
-  for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
-  for( ; i < n; ++i)       { B[C[T[i]]++] = i + 1; }
-  for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
-  for(i = 0, p = idx; i < n; ++i) {
-    U[i] = D[binarysearch_lower(C, d, p)];
-    p = B[p - 1];
-  }
-
-  if(A == NULL) {
-    /* Deallocate memory. */
-    free(B);
-  }
-
-  return 0;
-}
-
-/* Checks the suffix array SA of the string T. */
-saint_t
-sufcheck(const sauchar_t *T, const saidx_t *SA,
-         saidx_t n, saint_t verbose) {
-  saidx_t C[ALPHABET_SIZE];
-  saidx_t i, p, q, t;
-  saint_t c;
-
-  if(verbose) { fprintf(stderr, "sufcheck: "); }
-
-  /* Check arguments. */
-  if((T == NULL) || (SA == NULL) || (n < 0)) {
-    if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
-    return -1;
-  }
-  if(n == 0) {
-    if(verbose) { fprintf(stderr, "Done.\n"); }
-    return 0;
-  }
-
-  /* check range: [0..n-1] */
-  for(i = 0; i < n; ++i) {
-    if((SA[i] < 0) || (n <= SA[i])) {
-      if(verbose) {
-        fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
-                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
-                        n - 1, i, SA[i]);
-      }
-      return -2;
-    }
-  }
-
-  /* check first characters. */
-  for(i = 1; i < n; ++i) {
-    if(T[SA[i - 1]] > T[SA[i]]) {
-      if(verbose) {
-        fprintf(stderr, "Suffixes in wrong order.\n"
-                        "  T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
-                        " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
-                        i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
-      }
-      return -3;
-    }
-  }
-
-  /* check suffixes. */
-  for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
-  for(i = 0; i < n; ++i) { ++C[T[i]]; }
-  for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
-    t = C[i];
-    C[i] = p;
-    p += t;
-  }
-
-  q = C[T[n - 1]];
-  C[T[n - 1]] += 1;
-  for(i = 0; i < n; ++i) {
-    p = SA[i];
-    if(0 < p) {
-      c = T[--p];
-      t = C[c];
-    } else {
-      c = T[p = n - 1];
-      t = q;
-    }
-    if((t < 0) || (p != SA[t])) {
-      if(verbose) {
-        fprintf(stderr, "Suffix in wrong position.\n"
-                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
-                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
-                        t, (0 <= t) ? SA[t] : -1, i, SA[i]);
-      }
-      return -4;
-    }
-    if(t != q) {
-      ++C[c];
-      if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
-    }
-  }
-
-  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
-  return 0;
-}
-
-
-static
-int
-_compare(const sauchar_t *T, saidx_t Tsize,
-         const sauchar_t *P, saidx_t Psize,
-         saidx_t suf, saidx_t *match) {
-  saidx_t i, j;
-  saint_t r;
-  for(i = suf + *match, j = *match, r = 0;
-      (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
-  *match = j;
-  return (r == 0) ? -(j != Psize) : r;
-}
-
-/* Search for the pattern P in the string T. */
-saidx_t
-sa_search(const sauchar_t *T, saidx_t Tsize,
-          const sauchar_t *P, saidx_t Psize,
-          const saidx_t *SA, saidx_t SAsize,
-          saidx_t *idx) {
-  saidx_t size, lsize, rsize, half;
-  saidx_t match, lmatch, rmatch;
-  saidx_t llmatch, lrmatch, rlmatch, rrmatch;
-  saidx_t i, j, k;
-  saint_t r;
-
-  if(idx != NULL) { *idx = -1; }
-  if((T == NULL) || (P == NULL) || (SA == NULL) ||
-     (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
-  if((Tsize == 0) || (SAsize == 0)) { return 0; }
-  if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
-
-  for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
-      0 < size;
-      size = half, half >>= 1) {
-    match = MIN(lmatch, rmatch);
-    r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
-    if(r < 0) {
-      i += half + 1;
-      half -= (size & 1) ^ 1;
-      lmatch = match;
-    } else if(r > 0) {
-      rmatch = match;
-    } else {
-      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
-
-      /* left part */
-      for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
-          0 < lsize;
-          lsize = half, half >>= 1) {
-        lmatch = MIN(llmatch, lrmatch);
-        r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
-        if(r < 0) {
-          j += half + 1;
-          half -= (lsize & 1) ^ 1;
-          llmatch = lmatch;
-        } else {
-          lrmatch = lmatch;
-        }
-      }
-
-      /* right part */
-      for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
-          0 < rsize;
-          rsize = half, half >>= 1) {
-        rmatch = MIN(rlmatch, rrmatch);
-        r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
-        if(r <= 0) {
-          k += half + 1;
-          half -= (rsize & 1) ^ 1;
-          rlmatch = rmatch;
-        } else {
-          rrmatch = rmatch;
-        }
-      }
-
-      break;
-    }
-  }
-
-  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
-  return k - j;
-}
-
-/* Search for the character c in the string T. */
-saidx_t
-sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
-                const saidx_t *SA, saidx_t SAsize,
-                saint_t c, saidx_t *idx) {
-  saidx_t size, lsize, rsize, half;
-  saidx_t i, j, k, p;
-  saint_t r;
-
-  if(idx != NULL) { *idx = -1; }
-  if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
-  if((Tsize == 0) || (SAsize == 0)) { return 0; }
-
-  for(i = j = k = 0, size = SAsize, half = size >> 1;
-      0 < size;
-      size = half, half >>= 1) {
-    p = SA[i + half];
-    r = (p < Tsize) ? T[p] - c : -1;
-    if(r < 0) {
-      i += half + 1;
-      half -= (size & 1) ^ 1;
-    } else if(r == 0) {
-      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
-
-      /* left part */
-      for(half = lsize >> 1;
-          0 < lsize;
-          lsize = half, half >>= 1) {
-        p = SA[j + half];
-        r = (p < Tsize) ? T[p] - c : -1;
-        if(r < 0) {
-          j += half + 1;
-          half -= (lsize & 1) ^ 1;
-        }
-      }
-
-      /* right part */
-      for(half = rsize >> 1;
-          0 < rsize;
-          rsize = half, half >>= 1) {
-        p = SA[k + half];
-        r = (p < Tsize) ? T[p] - c : -1;
-        if(r <= 0) {
-          k += half + 1;
-          half -= (rsize & 1) ^ 1;
-        }
-      }
-
-      break;
-    }
-  }
-
-  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
-  return k - j;
-}
diff --git a/images/CSpeed.png b/images/CSpeed.png
deleted file mode 100644
index 5ba0561e6d7..00000000000
Binary files a/images/CSpeed.png and /dev/null differ
diff --git a/images/Cspeed4.png b/images/Cspeed4.png
new file mode 100644
index 00000000000..843e5eba847
Binary files /dev/null and b/images/Cspeed4.png differ
diff --git a/images/DCspeed5.png b/images/DCspeed5.png
new file mode 100644
index 00000000000..db5ef3cf9eb
Binary files /dev/null and b/images/DCspeed5.png differ
diff --git a/images/DSpeed.png b/images/DSpeed.png
deleted file mode 100644
index 1cd4713990a..00000000000
Binary files a/images/DSpeed.png and /dev/null differ
diff --git a/images/Dspeed4.png b/images/Dspeed4.png
new file mode 100644
index 00000000000..107e26c605e
Binary files /dev/null and b/images/Dspeed4.png differ
diff --git a/lib/Makefile b/lib/Makefile
index e0ca0c626d3..6bdf2f8763b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -1,6 +1,6 @@
 # ################################################################
 # ZSTD library - Makefile
-# Copyright (C) Yann Collet 2015
+# Copyright (C) Yann Collet 2015-2016
 # All rights reserved.
 # 
 # BSD license
@@ -28,7 +28,6 @@
 # 
 # You can contact the author at :
 #  - ZSTD homepage : http://www.zstd.net
-#  - ZSTD source repository : https://github.com/Cyan4973/zstd
 # ################################################################
 
 # Version numbers
@@ -52,7 +51,7 @@ FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MOREFLAGS)
 LIBDIR ?= $(PREFIX)/lib
 INCLUDEDIR=$(PREFIX)/include
 
-ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c
+ZSTD_FILES := zstd_compress.c zstd_decompress.c fse.c huff0.c zdict.c divsufsort.c
 ZSTD_LEGACY:= legacy/zstd_v01.c legacy/zstd_v02.c legacy/zstd_v03.c legacy/zstd_v04.c
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
@@ -119,6 +118,8 @@ install: libzstd libzstd.pc
 	@cp -a libzstd.pc $(DESTDIR)$(LIBDIR)/pkgconfig/
 	@install -m 644 libzstd.a $(DESTDIR)$(LIBDIR)/libzstd.a
 	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zbuff.h
+	@install -m 644 zstd.h $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd static and shared library installed
 
 uninstall:
@@ -128,6 +129,8 @@ uninstall:
 	@[ -x $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.$(SHARED_EXT_VER)
 	@[ -f $(DESTDIR)$(LIBDIR)/libzstd.a ] && rm -f $(DESTDIR)$(LIBDIR)/libzstd.a
 	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zstd.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zbuff.h
+	@[ -f $(DESTDIR)$(INCLUDEDIR)/zstd.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/zdict.h
 	@echo zstd libraries successfully uninstalled
 
 endif
diff --git a/lib/README.md b/lib/README.md
new file mode 100644
index 00000000000..a04455488bd
--- /dev/null
+++ b/lib/README.md
@@ -0,0 +1,56 @@
+zstd - library files
+================================
+
+The __lib__ directory contains several files, but depending on target use case, some of them may not be necessary.
+
+#### Minimal library files
+
+##### Shared ressources
+
+- [mem.h](mem.h)
+- [error_private.h](error_private.h)
+- [error_public.h](error_public.h)
+
+##### zstd core compression
+
+- [bitstream.h](bitstream.h)
+- fse.c
+- fse.h
+- fse_static.h
+- huff0.c
+- huff0.h
+- huff0_static.h
+- zstd_compress.c
+- zstd_decompress.c
+- zstd_internal.h
+- zstd_opt.h
+- zstd.h
+- zstd_static.h
+
+#### Buffered streaming
+
+This complementary API makes streaming integration easier.
+It is used by `zstd` command line utility :
+
+- zbuff.c
+- zbuff.h
+- zbuff_static.h
+
+#### Dictionary builder
+
+To create dictionaries from training sets :
+
+- divsufsort.c
+- divsufsort.h
+- zdict.c
+- zdict.h
+- zdict_static.h
+
+#### Miscellaneous
+
+The other files are not source code. There are :
+
+ - LICENSE : contains the BSD license text
+ - Makefile : script to compile or install zstd library (static or dynamic)
+ - libzstd.pc.in : for pkg-config (make install)
+
diff --git a/lib/divsufsort.c b/lib/divsufsort.c
new file mode 100644
index 00000000000..60cceb08832
--- /dev/null
+++ b/lib/divsufsort.c
@@ -0,0 +1,1913 @@
+/*
+ * divsufsort.c for libdivsufsort-lite
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*- Compiler specifics -*/
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wshorten-64-to-32"
+#endif
+
+#if defined(_MSC_VER)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
+#endif
+
+
+/*- Dependencies -*/
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "divsufsort.h"
+
+/*- Constants -*/
+#if defined(INLINE)
+# undef INLINE
+#endif
+#if !defined(INLINE)
+# define INLINE __inline
+#endif
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (256)
+#endif
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+# if SS_INSERTIONSORT_THRESHOLD < 1
+#  undef SS_INSERTIONSORT_THRESHOLD
+#  define SS_INSERTIONSORT_THRESHOLD (1)
+# endif
+#else
+# define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+# if SS_BLOCKSIZE < 0
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (0)
+# elif 32768 <= SS_BLOCKSIZE
+#  undef SS_BLOCKSIZE
+#  define SS_BLOCKSIZE (32767)
+# endif
+#else
+# define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+# define SS_MISORT_STACKSIZE (96)
+#elif SS_BLOCKSIZE <= 4096
+# define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#define SS_SMERGE_STACKSIZE (32)
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#define TR_STACKSIZE (64)
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Functions -*/
+
+static const int lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+int
+ss_ilg(int n) {
+#if SS_BLOCKSIZE == 0
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const int sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+static INLINE
+int
+ss_isqrt(int x) {
+  int y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+static INLINE
+int
+ss_compare(const unsigned char *T,
+           const int *p1, const int *p2,
+           int depth) {
+  const unsigned char *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const unsigned char *T, const int *PA,
+                 int *first, int *last, int depth) {
+  int *i, *j;
+  int t;
+  int r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+static INLINE
+void
+ss_fixdown(const unsigned char *Td, const int *PA,
+           int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+ss_median3(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3) {
+  int *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+ss_median5(const unsigned char *Td, const int *PA,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+static INLINE
+int *
+ss_partition(const int *PA,
+                    int *first, int *last, int depth) {
+  int *a, *b;
+  int t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const unsigned char *T, const int *PA,
+              int *first, int *last,
+              int depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { int *a, *b, c; int d; } stack[STACK_SIZE];
+  const unsigned char *Td;
+  int *a, *b, *c, *d, *e, *f;
+  int s, t;
+  int ssize;
+  int limit;
+  int v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+static INLINE
+void
+ss_blockswap(int *a, int *b, int n) {
+  int t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+static INLINE
+void
+ss_rotate(int *first, int *middle, int *last) {
+  int *a, *b, t;
+  int l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int depth) {
+  const int *p;
+  int *a, *b;
+  int len, half;
+  int q, r;
+  int x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const unsigned char *T, const int *PA,
+                int *first, int *middle, int *last,
+                int *buf, int depth) {
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const unsigned char *T, const int *PA,
+                 int *first, int *middle, int *last,
+                 int *buf, int depth) {
+  const int *p1, *p2;
+  int *a, *b, *c, *bufend;
+  int t;
+  int r;
+  int x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const unsigned char *T, const int *PA,
+             int *first, int *middle, int *last,
+             int *buf, int bufsize, int depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
+  int *l, *r, *lm, *rm;
+  int m, len, half;
+  int ssize;
+  int check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Substring sort */
+static
+void
+sssort(const unsigned char *T, const int *PA,
+       int *first, int *last,
+       int *buf, int bufsize,
+       int depth, int n, int lastsuffix) {
+  int *a;
+#if SS_BLOCKSIZE != 0
+  int *b, *middle, *curbuf;
+  int j, k, curbufsize, limit;
+#endif
+  int i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+int
+tr_ilg(int n) {
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const int *ISAd, int *first, int *last) {
+  int *a, *b;
+  int t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_fixdown(const int *ISAd, int *SA, int i, int size) {
+  int j, k;
+  int v;
+  int c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const int *ISAd, int *SA, int size) {
+  int i, m;
+  int t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static INLINE
+int *
+tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
+  int *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+static INLINE
+int *
+tr_median5(const int *ISAd,
+           int *v1, int *v2, int *v3, int *v4, int *v5) {
+  int *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+static INLINE
+int *
+tr_pivot(const int *ISAd, int *first, int *last) {
+  int *middle;
+  int t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  int chance;
+  int remain;
+  int incval;
+  int count;
+};
+
+static INLINE
+void
+trbudget_init(trbudget_t *budget, int chance, int incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+static INLINE
+int
+trbudget_check(trbudget_t *budget, int size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static INLINE
+void
+tr_partition(const int *ISAd,
+             int *first, int *middle, int *last,
+             int **pa, int **pb, int v) {
+  int *a, *b, *c, *d, *e, *f;
+  int t, s;
+  int x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(int *ISA, const int *SA,
+        int *first, int *a, int *b, int *last,
+        int depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  int *c, *d, *e;
+  int s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(int *ISA, const int *SA,
+               int *first, int *a, int *b, int *last,
+               int depth) {
+  int *c, *d, *e;
+  int s, v;
+  int rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(int *ISA, const int *ISAd,
+             int *SA, int *first, int *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
+  int *a, *b, *c;
+  int t;
+  int v, x = 0;
+  int incr = ISAd - ISA;
+  int limit, next;
+  int ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Tandem repeat sort */
+static
+void
+trsort(int *ISA, int *SA, int n, int depth) {
+  int *ISAd;
+  int *first, *last;
+  trbudget_t budget;
+  int t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Sorts suffixes of type B*. */
+static
+int
+sort_typeBstar(const unsigned char *T, int *SA,
+               int *bucket_A, int *bucket_B,
+               int n, int openMP) {
+  int *PAb, *ISAb, *buf;
+#ifdef LIBBSC_OPENMP
+  int *curbuf;
+  int l;
+#endif
+  int i, j, k, t, m, bufsize;
+  int c0, c1;
+#ifdef LIBBSC_OPENMP
+  int d0, d1;
+#endif
+  (void)openMP;
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef LIBBSC_OPENMP
+    if (openMP)
+    {
+        buf = SA + m;
+        c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
+        {
+          bufsize = (n - (2 * m)) / omp_get_num_threads();
+          curbuf = buf + omp_get_thread_num() * bufsize;
+          k = 0;
+          for(;;) {
+            #pragma omp critical(sssort_lock)
+            {
+              if(0 < (l = j)) {
+                d0 = c0, d1 = c1;
+                do {
+                  k = BUCKET_BSTAR(d0, d1);
+                  if(--d1 <= d0) {
+                    d1 = ALPHABET_SIZE - 1;
+                    if(--d0 < 0) { break; }
+                  }
+                } while(((l - k) <= 1) && (0 < (l = k)));
+                c0 = d0, c1 = d1, j = k;
+              }
+            }
+            if(l == 0) { break; }
+            sssort(T, PAb, SA + k, SA + l,
+                   curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+          }
+        }
+    }
+    else
+    {
+        buf = SA + m, bufsize = n - (2 * m);
+        for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+          for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+            i = BUCKET_BSTAR(c0, c1);
+            if(1 < (j - i)) {
+              sssort(T, PAb, SA + i, SA + j,
+                     buf, bufsize, 2, n, *(SA + i) == (m - 1));
+            }
+          }
+        }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const unsigned char *T, int *SA,
+             int *bucket_A, int *bucket_B,
+             int n, int m) {
+  int *i, *j, *k;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT(const unsigned char *T, int *SA,
+              int *bucket_A, int *bucket_B,
+              int n, int m) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+int
+construct_BWT_indexes(const unsigned char *T, int *SA,
+                      int *bucket_A, int *bucket_B,
+                      int n, int m,
+                      unsigned char * num_indexes, int * indexes) {
+  int *i, *j, *k, *orig;
+  int s;
+  int c0, c1, c2;
+
+  int mod = n / 8;
+  {
+      mod |= mod >> 1;  mod |= mod >> 2;
+      mod |= mod >> 4;  mod |= mod >> 8;
+      mod |= mod >> 16; mod >>= 1;
+
+      *num_indexes = (unsigned char)((n - 1) / (mod + 1));
+  }
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
+
+          c0 = T[--s];
+          *j = ~((int)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  if (T[n - 2] < c2) {
+    if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
+    *k++ = ~((int)T[n - 2]);
+  }
+  else {
+    *k++ = n - 1;
+  }
+
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+
+      if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
+
+      c0 = T[--s];
+      *i = c0;
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      if((0 < s) && (T[s - 1] < c0)) {
+          if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
+          *k++ = ~((int)T[s - 1]);
+      } else
+        *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
+  int *bucket_A, *bucket_B;
+  int m;
+  int err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Suffixsort. */
+  if((bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
+    construct_SA(T, SA, bucket_A, bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+
+  return err;
+}
+
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
+  int *B;
+  int *bucket_A, *bucket_B;
+  int m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
+  bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
+  bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
+
+    if (num_indexes == NULL || indexes == NULL) {
+        pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+    } else {
+        pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
+    }
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
diff --git a/dictBuilder/config.h b/lib/divsufsort.h
similarity index 52%
rename from dictBuilder/config.h
rename to lib/divsufsort.h
index c2925d335bb..dac0936698c 100644
--- a/dictBuilder/config.h
+++ b/lib/divsufsort.h
@@ -1,5 +1,5 @@
 /*
- * config.h for libdivsufsort
+ * divsufsort.h for libdivsufsort-lite
  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person
@@ -24,60 +24,44 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef _CONFIG_H
-#define _CONFIG_H 1
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 
-/** Define to the version of this package. **/
-#define PROJECT_VERSION_FULL "2.0.1"
 
-/** Define to 1 if you have the header files. **/
-#define HAVE_INTTYPES_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_STRING_H 1
-#define HAVE_STRINGS_H 1
-#define HAVE_MEMORY_H 1
-#define HAVE_SYS_TYPES_H 1
+/*- Prototypes -*/
 
-/** for WinIO **/
-/* #undef HAVE_IO_H */
-/* #undef HAVE_FCNTL_H */
-/* #undef HAVE__SETMODE */
-/* #undef HAVE_SETMODE */
-/* #undef HAVE__FILENO */
-/* #undef HAVE_FOPEN_S */
-/* #undef HAVE__O_BINARY */
-/*
-#ifndef HAVE__SETMODE
-# if HAVE_SETMODE
-#  define _setmode setmode
-#  define HAVE__SETMODE 1
-# endif
-# if HAVE__SETMODE && !HAVE__O_BINARY
-#  define _O_BINARY 0
-#  define HAVE__O_BINARY 1
-# endif
-#endif
-*/
-
-/** for inline **/
-#ifndef INLINE
-# define INLINE inline
-#endif
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @param openMP enables OpenMP optimization.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+int
+divsufsort(const unsigned char *T, int *SA, int n, int openMP);
 
-/** for VC++ warning **/
-#ifdef _MSC_VER
-#pragma warning(disable: 4127)
-#endif
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param num_indexes The length of secondary indexes array. (can be NULL)
+ * @param indexes The secondary indexes array. (can be NULL)
+ * @param openMP enables OpenMP optimization.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+int
+divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
 
 
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 
-#endif /* _CONFIG_H */
+#endif /* _DIVSUFSORT_H */
diff --git a/lib/legacy/zstd_v02.c b/lib/legacy/zstd_v02.c
index 73136f81a63..860df94a6fd 100644
--- a/lib/legacy/zstd_v02.c
+++ b/lib/legacy/zstd_v02.c
@@ -2133,7 +2133,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        {if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
 
     /* Get start index of each weight */
     {
@@ -2465,7 +2466,9 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
+
 
     /* Get start index of each weight */
     {
diff --git a/lib/legacy/zstd_v03.c b/lib/legacy/zstd_v03.c
index 5c75eb4e431..6a048fd273b 100644
--- a/lib/legacy/zstd_v03.c
+++ b/lib/legacy/zstd_v03.c
@@ -2133,7 +2133,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
 
     /* Get start index of each weight */
     {
@@ -2465,7 +2466,8 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
 
     /* Get start index of each weight */
     {
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 45a3f61cc8d..57d724c27c3 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -1342,9 +1342,9 @@ typedef struct
 
 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
 {
-    const void* ptr = dt;
-    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
-    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    FSE_DTableHeader DTableH;
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog);
     BIT_reloadDStream(bitD);
     DStatePtr->table = dt + 1;
 }
@@ -1465,7 +1465,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 
 
 /* **************************************************************
-*  Includes
+*  Dependencies
 ****************************************************************/
 #include <stdlib.h>     /* malloc, free, qsort */
 #include <string.h>     /* memcpy, memset */
@@ -1499,7 +1499,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
 
 
-/* **************************************************************
+/*-**************************************************************
 *  Templates
 ****************************************************************/
 /*
@@ -1841,9 +1841,11 @@ static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
                             const void* cSrc, size_t cSrcSize,
                             const FSE_DTable* dt)
 {
-    const void* ptr = dt;
-    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
-    const U32 fastMode = DTableH->fastMode;
+    FSE_DTableHeader DTableH;
+    U32 fastMode;
+
+    memcpy(&DTableH, dt, sizeof(DTableH));
+    fastMode = DTableH.fastMode;
 
     /* select fast mode (static) */
     if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
@@ -2561,7 +2563,8 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
 
     /* Get start index of each weight */
     {
@@ -2889,7 +2892,8 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
     if (tableLog > memLog) return ERROR(tableLog_tooLarge);   /* DTable is too small */
 
     /* find maxWeight */
-    for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+    for (maxW = tableLog; rankStats[maxW]==0; maxW--)
+        { if (!maxW) return ERROR(GENERIC); }  /* necessarily finds a solution before maxW==0 */
 
     /* Get start index of each weight */
     {
@@ -4245,39 +4249,32 @@ static size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbc, void* dst, size_t* maxDs
                 ip += headerSize;
                 headerSize = ZSTD_getFrameParams(&(zbc->params), zbc->headerBuffer, zbc->hPos);
                 if (ZSTD_isError(headerSize)) return headerSize;
-                if (headerSize)
-                {
+                if (headerSize) {
                     /* not enough input to decode header : tell how many bytes would be necessary */
                     *maxDstSizePtr = 0;
                     return headerSize - zbc->hPos;
-                }
-                // zbc->stage = ZBUFFds_decodeHeader; break;   /* useless : stage follows */
-            }
+            }   }
 
         case ZBUFFds_decodeHeader:
                 /* apply header to create / resize buffers */
                 {
                     size_t neededOutSize = (size_t)1 << zbc->params.windowLog;
                     size_t neededInSize = BLOCKSIZE;   /* a block is never > BLOCKSIZE */
-                    if (zbc->inBuffSize < neededInSize)
-                    {
+                    if (zbc->inBuffSize < neededInSize) {
                         free(zbc->inBuff);
                         zbc->inBuffSize = neededInSize;
                         zbc->inBuff = (char*)malloc(neededInSize);
                         if (zbc->inBuff == NULL) return ERROR(memory_allocation);
                     }
-                    if (zbc->outBuffSize < neededOutSize)
-                    {
+                    if (zbc->outBuffSize < neededOutSize) {
                         free(zbc->outBuff);
                         zbc->outBuffSize = neededOutSize;
                         zbc->outBuff = (char*)malloc(neededOutSize);
                         if (zbc->outBuff == NULL) return ERROR(memory_allocation);
-                    }
-                }
+                }   }
                 if (zbc->dictSize)
                     ZSTD_decompress_insertDictionary(zbc->zc, zbc->dict, zbc->dictSize);
-                if (zbc->hPos)
-                {
+                if (zbc->hPos) {
                     /* some data already loaded into headerBuffer : transfer into inBuff */
                     memcpy(zbc->inBuff, zbc->headerBuffer, zbc->hPos);
                     zbc->inPos = zbc->hPos;
diff --git a/lib/mem.h b/lib/mem.h
index 36ba06f062d..0e357e53099 100644
--- a/lib/mem.h
+++ b/lib/mem.h
@@ -86,7 +86,7 @@ extern "C" {
 /*-**************************************************************
 *  Memory I/O
 *****************************************************************/
-/*!MEM_FORCE_MEMORY_ACCESS
+/* MEM_FORCE_MEMORY_ACCESS :
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
@@ -119,11 +119,12 @@ MEM_STATIC unsigned MEM_isLittleEndian(void)
 
 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
 
-/* violates C standard on structure alignment.
+/* violates C standard, by lying on structure alignment.
 Only use if no other choice to achieve best performance on target platform */
 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
+MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
 
 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
@@ -133,11 +134,12 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 
 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 /* currently only defined for gcc and icc */
-typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
+typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
 
 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
+MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
 
 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
@@ -163,6 +165,11 @@ MEM_STATIC U64 MEM_read64(const void* memPtr)
     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
 }
 
+MEM_STATIC size_t MEM_readST(const void* memPtr)
+{
+    size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
 {
     memcpy(memPtr, &value, sizeof(value));
@@ -178,7 +185,7 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value)
     memcpy(memPtr, &value, sizeof(value));
 }
 
-#endif // MEM_FORCE_MEMORY_ACCESS
+#endif /* MEM_FORCE_MEMORY_ACCESS */
 
 
 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
diff --git a/lib/zstd_buffered.c b/lib/zbuff.c
similarity index 99%
rename from lib/zstd_buffered.c
rename to lib/zbuff.c
index 133c54e14ed..4c1eb2cf267 100644
--- a/lib/zstd_buffered.c
+++ b/lib/zbuff.c
@@ -41,7 +41,7 @@
 #include <stdlib.h>
 #include "error_private.h"
 #include "zstd_static.h"
-#include "zstd_buffered_static.h"
+#include "zbuff_static.h"
 
 
 /* *************************************
diff --git a/lib/zstd_buffered.h b/lib/zbuff.h
similarity index 88%
rename from lib/zstd_buffered.h
rename to lib/zbuff.h
index 8aa37650e7a..d3275b7df32 100644
--- a/lib/zstd_buffered.h
+++ b/lib/zbuff.h
@@ -1,6 +1,6 @@
 /*
     Buffered version of Zstd compression library
-    Copyright (C) 2015, Yann Collet.
+    Copyright (C) 2015-2016, Yann Collet.
 
     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -26,14 +26,13 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net/
 */
 #ifndef ZSTD_BUFFERED_H
 #define ZSTD_BUFFERED_H
 
 /* The objects defined into this file should be considered experimental.
- * They are not labelled stable, as their prototype may change in the future.
+ * They are not considered stable, as their prototype may change in the future.
  * You can use them for tests, provide feedback, or if you can endure risk of future changes.
  */
 
@@ -42,7 +41,7 @@ extern "C" {
 #endif
 
 /* *************************************
-*  Includes
+*  Dependencies
 ***************************************/
 #include <stddef.h>   /* size_t */
 
@@ -75,7 +74,7 @@ ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* d
 ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
 ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
 
-/** ************************************************
+/*-*************************************************
 *  Streaming compression
 *
 *  A ZBUFF_CCtx object is required to track streaming operation.
@@ -123,12 +122,14 @@ ZSTDLIB_API size_t      ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
 ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
 ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
 
-ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
+ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
+                                            void* dst, size_t* dstCapacityPtr,
+                                      const void* src, size_t* srcSizePtr);
 
-/** ************************************************
+/*-***************************************************************************
 *  Streaming decompression
 *
-*  A ZBUFF_DCtx object is required to track streaming operation.
+*  A ZBUFF_DCtx object is required to track streaming operations.
 *  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
 *  Use ZBUFF_decompressInit() to start a new decompression operation,
 *   or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
@@ -143,10 +144,10 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t*
 *            or 0 when a frame is completely decoded
 *            or an error code, which can be tested using ZBUFF_isError().
 *
-*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
-*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
-*  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
-* **************************************************/
+*  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() / ZBUFF_recommendedDOutSize()
+*  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
+*  input  : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
+* *******************************************************************************/
 
 
 /* *************************************
@@ -155,7 +156,7 @@ ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, void* dst, size_t*
 ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
 ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
 
-/** The below functions provide recommended buffer sizes for Compression or Decompression operations.
+/** Functions below provide recommended buffer sizes for Compression or Decompression operations.
 *   These sizes are just hints, and tend to offer better latency */
 ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
 ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);
diff --git a/lib/zstd_buffered_static.h b/lib/zbuff_static.h
similarity index 92%
rename from lib/zstd_buffered_static.h
rename to lib/zbuff_static.h
index 5052f4c3e3c..405508900f3 100644
--- a/lib/zstd_buffered_static.h
+++ b/lib/zbuff_static.h
@@ -1,7 +1,7 @@
 /*
     zstd - buffered version of compression library
     experimental complementary API, for static linking only
-    Copyright (C) 2015, Yann Collet.
+    Copyright (C) 2015-2016, Yann Collet.
 
     BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -27,8 +27,7 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net
 */
 #ifndef ZSTD_BUFFERED_STATIC_H
 #define ZSTD_BUFFERED_STATIC_H
@@ -46,7 +45,7 @@ extern "C" {
 *  Includes
 ***************************************/
 #include "zstd_static.h"     /* ZSTD_parameters */
-#include "zstd_buffered.h"
+#include "zbuff.h"
 
 
 /* *************************************
diff --git a/dictBuilder/dictBuilder.c b/lib/zdict.c
similarity index 67%
rename from dictBuilder/dictBuilder.c
rename to lib/zdict.c
index d8b2bdb3000..d3d5784dda8 100644
--- a/dictBuilder/dictBuilder.c
+++ b/lib/zdict.c
@@ -1,33 +1,41 @@
 /*
-    dictBuilder - dictionary builder for LZ algorithms
+    dictBuilder - dictionary builder for zstd
     Copyright (C) Yann Collet 2016
 
-    GPL v2 License
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
+    - Zstd homepage : https://www.zstd.net
 */
 
-/* **************************************
+/*-**************************************
 *  Compiler Options
 ****************************************/
 /* Disable some Visual warning messages */
 #ifdef _MSC_VER
-#  define _CRT_SECURE_NO_WARNINGS                /* fopen */
 #  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
 #endif
 
@@ -41,7 +49,7 @@
 
 
 /*-*************************************
-*  Includes
+*  Dependencies
 ***************************************/
 #include <stdlib.h>        /* malloc, free */
 #include <string.h>        /* memset */
@@ -52,10 +60,11 @@
 
 #include "mem.h"           /* read */
 #include "error_private.h"
-#include "divsufsort.h"
-#include "dictBuilder.h"
-#include "zstd_compress.c"
+#include "fse.h"
 #include "huff0_static.h"
+#include "zstd_internal.h"
+#include "divsufsort.h"
+#include "zdict_static.h"
 
 
 /*-*************************************
@@ -74,8 +83,6 @@
 #define GB *(1U<<30)
 
 #define DICTLISTSIZE 10000
-#define MEMMULT 11
-static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
 
 #define NOISELENGTH 32
 #define PRIME1   2654435761U
@@ -94,16 +101,15 @@ static const size_t g_min_fast_dictContent = 192;
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
-void DiB_setNotificationLevel(unsigned l) { g_displayLevel=l; }
 
 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if (DiB_GetMilliSpan(g_time) > refreshRate)  \
+            if (ZDICT_GetMilliSpan(g_time) > refreshRate)  \
             { g_time = clock(); DISPLAY(__VA_ARGS__); \
             if (g_displayLevel>=4) fflush(stdout); } }
 static const unsigned refreshRate = 300;
 static clock_t g_time = 0;
 
-void DiB_printHex(U32 dlevel, const void* ptr, size_t length)
+static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
 {
     const BYTE* const b = (const BYTE*)ptr;
     size_t u;
@@ -116,98 +122,25 @@ void DiB_printHex(U32 dlevel, const void* ptr, size_t length)
 }
 
 
-/*-*************************************
-*  Exceptions
-***************************************/
-#ifndef DEBUG
-#  define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...)                                             \
-{                                                                         \
-    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
-    DISPLAYLEVEL(1, "Error %i : ", error);                                \
-    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
-    DISPLAYLEVEL(1, "\n");                                                \
-    exit(error);                                                          \
-}
-
-
-/* ********************************************************
+/*-********************************************************
 *  Helper functions
 **********************************************************/
-unsigned DiB_versionNumber (void) { return DiB_VERSION_NUMBER; }
-
-static unsigned DiB_GetMilliSpan(clock_t nPrevious)
+static unsigned ZDICT_GetMilliSpan(clock_t nPrevious)
 {
     clock_t nCurrent = clock();
     unsigned nSpan = (unsigned)(((nCurrent - nPrevious) * 1000) / CLOCKS_PER_SEC);
     return nSpan;
 }
 
-unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
-
-const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
 
-
-/* ********************************************************
-*  File related operations
-**********************************************************/
-static unsigned long long DiB_getFileSize(const char* infilename)
-{
-    int r;
-#if defined(_MSC_VER)
-    struct _stat64 statbuf;
-    r = _stat64(infilename, &statbuf);
-#else
-    struct stat statbuf;
-    r = stat(infilename, &statbuf);
-#endif
-    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
-    return (unsigned long long)statbuf.st_size;
-}
-
-
-static unsigned long long DiB_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
-{
-    unsigned long long total = 0;
-    unsigned n;
-    for (n=0; n<nbFiles; n++)
-        total += DiB_getFileSize(fileNamesTable[n]);
-    return total;
-}
-
-
-static void DiB_loadFiles(void* buffer, size_t bufferSize,
-                          size_t* fileSizes,
-                          const char** fileNamesTable, unsigned nbFiles)
-{
-    char* buff = (char*)buffer;
-    size_t pos = 0;
-    unsigned n;
-
-    for (n=0; n<nbFiles; n++) {
-        size_t readSize;
-        unsigned long long fileSize = DiB_getFileSize(fileNamesTable[n]);
-        FILE* f = fopen(fileNamesTable[n], "rb");
-        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
-        DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
-        if (fileSize > bufferSize-pos) fileSize = 0;  /* stop there, not enough memory to load all files */
-        readSize = fread(buff+pos, 1, (size_t)fileSize, f);
-        if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
-        pos += readSize;
-        fileSizes[n] = (size_t)fileSize;
-        fclose(f);
-    }
-}
+const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
 
 
 /*-********************************************************
 *  Dictionary training functions
 **********************************************************/
-static size_t DiB_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
-
-static unsigned DiB_NbCommonBytes (register size_t val)
+static unsigned ZDICT_NbCommonBytes (register size_t val)
 {
     if (MEM_isLittleEndian()) {
         if (MEM_64bits()) {
@@ -266,17 +199,17 @@ static unsigned DiB_NbCommonBytes (register size_t val)
 }
 
 
-/*! DiB_count() :
+/*! ZDICT_count() :
     Count the nb of common bytes between 2 pointers.
     Note : this function presumes end of buffer followed by noisy guard band.
 */
-static size_t DiB_count(const void* pIn, const void* pMatch)
+static size_t ZDICT_count(const void* pIn, const void* pMatch)
 {
     const char* const pStart = (const char*)pIn;
     for (;;) {
-        size_t diff = DiB_read_ARCH(pMatch) ^ DiB_read_ARCH(pIn);
+        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
         if (!diff) { pIn = (const char*)pIn+sizeof(size_t); pMatch = (const char*)pMatch+sizeof(size_t); continue; }
-        pIn = (const char*)pIn+DiB_NbCommonBytes(diff);
+        pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff);
         return (size_t)((const char*)pIn - pStart);
     }
 }
@@ -288,7 +221,7 @@ typedef struct {
     U32 savings;
 } dictItem;
 
-void DiB_initDictItem(dictItem* d)
+static void ZDICT_initDictItem(dictItem* d)
 {
     d->pos = 1;
     d->length = 0;
@@ -298,9 +231,9 @@ void DiB_initDictItem(dictItem* d)
 
 #define LLIMIT 64          /* heuristic determined experimentally */
 #define MINMATCHLENGTH 7   /* heuristic determined experimentally */
-static dictItem DiB_analyzePos(
+static dictItem ZDICT_analyzePos(
                        BYTE* doneMarks,
-                       const saidx_t* suffix, U32 start,
+                       const int* suffix, U32 start,
                        const void* buffer, U32 minRatio)
 {
     U32 lengthList[LLIMIT] = {0};
@@ -334,12 +267,12 @@ static dictItem DiB_analyzePos(
     /* look forward */
     do {
         end++;
-        length = DiB_count(b + pos, b + suffix[end]);
+        length = ZDICT_count(b + pos, b + suffix[end]);
     } while (length >=MINMATCHLENGTH);
 
     /* look backward */
     do {
-        length = DiB_count(b + pos, b + *(suffix+start-1));
+        length = ZDICT_count(b + pos, b + *(suffix+start-1));
         if (length >=MINMATCHLENGTH) start--;
     } while(length >= MINMATCHLENGTH);
 
@@ -400,14 +333,14 @@ static dictItem DiB_analyzePos(
         /* look forward */
         do {
             end++;
-            length = DiB_count(b + pos, b + suffix[end]);
+            length = ZDICT_count(b + pos, b + suffix[end]);
             if (length >= LLIMIT) length = LLIMIT-1;
             lengthList[length]++;
         } while (length >=MINMATCHLENGTH);
 
         /* look backward */
         do {
-            length = DiB_count(b + pos, b + suffix[start-1]);
+            length = ZDICT_count(b + pos, b + suffix[start-1]);
             if (length >= LLIMIT) length = LLIMIT-1;
             lengthList[length]++;
             if (length >=MINMATCHLENGTH) start--;
@@ -453,7 +386,7 @@ static dictItem DiB_analyzePos(
                 if (testedPos == pos)
                     length = solution.length;
                 else {
-                    length = DiB_count(b+pos, b+testedPos);
+                    length = ZDICT_count(b+pos, b+testedPos);
                     if (length > solution.length) length = solution.length;
                 }
                 pEnd = (U32)(testedPos + length);
@@ -465,11 +398,11 @@ static dictItem DiB_analyzePos(
 }
 
 
-/*! DiB_checkMerge
+/*! ZDICT_checkMerge
     check if dictItem can be merged, do it if possible
     @return : id of destination elt, 0 if not merged
 */
-static U32 DiB_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
+static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
 {
     const U32 tableSize = table->pos;
     const U32 max = elt.pos + (elt.length-1);
@@ -513,7 +446,7 @@ static U32 DiB_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
 }
 
 
-static void DiB_removeDictItem(dictItem* table, U32 id)
+static void ZDICT_removeDictItem(dictItem* table, U32 id)
 {
     /* convention : first element is nb of elts */
     U32 max = table->pos;
@@ -525,15 +458,15 @@ static void DiB_removeDictItem(dictItem* table, U32 id)
 }
 
 
-static void DiB_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
+static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
 {
     /* merge if possible */
-    U32 mergeId = DiB_checkMerge(table, elt, 0);
+    U32 mergeId = ZDICT_checkMerge(table, elt, 0);
     if (mergeId) {
         U32 newMerge = 1;
         while (newMerge) {
-            newMerge = DiB_checkMerge(table, table[mergeId], mergeId);
-            if (newMerge) DiB_removeDictItem(table, mergeId);
+            newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
+            if (newMerge) ZDICT_removeDictItem(table, mergeId);
             mergeId = newMerge;
         }
         return;
@@ -555,7 +488,7 @@ static void DiB_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
 }
 
 
-static U32 DiB_dictSize(const dictItem* dictList)
+static U32 ZDICT_dictSize(const dictItem* dictList)
 {
     U32 u, dictSize = 0;
     for (u=1; u<dictList[0].pos; u++)
@@ -564,32 +497,35 @@ static U32 DiB_dictSize(const dictItem* dictList)
 }
 
 
-static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
+static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
                             const void* const buffer, const size_t bufferSize,   /* buffer must end with noisy guard band */
                             const size_t* fileSizes, unsigned nbFiles,
                             U32 shiftRatio, unsigned maxDictSize)
 {
-    saidx_t* const suffix0 = (saidx_t*)malloc((bufferSize+2)*sizeof(*suffix0));
-    saidx_t* const suffix = suffix0+1;
+    int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
+    int* const suffix = suffix0+1;
     U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
     BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
     U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
     U32 minRatio = nbFiles >> shiftRatio;
-    saint_t errorCode;
+    int divSuftSortResult;
+    size_t result = 0;
 
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
-    if (!suffix0 || !reverseSuffix || !doneMarks || !filePos)
-        EXM_THROW(1, "not enough memory for DiB_trainBuffer");
+    if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
+        result = ERROR(memory_allocation);
+        goto _cleanup;
+    }
     if (minRatio < MINRATIO) minRatio = MINRATIO;
     memset(doneMarks, 0, bufferSize+16);
 
     /* sort */
     DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
-    errorCode = divsufsort((const sauchar_t*)buffer, suffix, (saidx_t)bufferSize);
-    if (errorCode != 0) EXM_THROW(2, "sort failed");
-    suffix[bufferSize] = (saidx_t)bufferSize;   /* leads into noise */
-    suffix0[0] = (saidx_t)bufferSize;           /* leads into noise */
+    divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
+    if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
+    suffix[bufferSize] = (int)bufferSize;   /* leads into noise */
+    suffix0[0] = (int)bufferSize;           /* leads into noise */
     {
         /* build reverse suffix sort */
         size_t pos;
@@ -608,9 +544,9 @@ static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
         U32 cursor; for (cursor=0; cursor < bufferSize; ) {
             dictItem solution;
             if (doneMarks[cursor]) { cursor++; continue; }
-            solution = DiB_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
+            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
             if (solution.length==0) { cursor++; continue; }
-            DiB_insertDictItem(dictList, dictListSize, solution);
+            ZDICT_insertDictItem(dictList, dictListSize, solution);
             cursor += solution.length;
             DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
     }   }
@@ -626,33 +562,16 @@ static void DiB_trainBuffer(dictItem* dictList, U32 dictListSize,
         dictList->pos = n;
     }
 
+_cleanup:
     free(suffix0);
     free(reverseSuffix);
     free(doneMarks);
     free(filePos);
+    return result;
 }
 
 
-static size_t DiB_findMaxMem(unsigned long long requiredMem)
-{
-    size_t step = 8 MB;
-    void* testmem = NULL;
-
-    requiredMem = (((requiredMem >> 23) + 1) << 23);
-    requiredMem += 2 * step;
-    if (requiredMem > maxMemory) requiredMem = maxMemory;
-
-    while (!testmem) {
-        requiredMem -= step;
-        testmem = malloc((size_t)requiredMem);
-    }
-
-    free(testmem);
-    return (size_t)(requiredMem - step);
-}
-
-
-static void DiB_fillNoise(void* buffer, size_t length)
+static void ZDICT_fillNoise(void* buffer, size_t length)
 {
     unsigned acc = PRIME1;
     size_t p=0;;
@@ -672,34 +591,36 @@ typedef struct
 } EStats_ress_t;
 
 
-static void DiB_countEStats(EStats_ress_t esr,
+static void ZDICT_countEStats(EStats_ress_t esr,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
                             const void* src, size_t srcSize)
 {
     const BYTE* bytePtr;
     const U32* u32Ptr;
+    seqStore_t seqStore;
 
     if (srcSize > BLOCKSIZE) srcSize = BLOCKSIZE;   /* protection vs large samples */
     ZSTD_copyCCtx(esr.zc, esr.ref);
     ZSTD_compressBlock(esr.zc, esr.workPlace, BLOCKSIZE, src, srcSize);
+    seqStore = ZSTD_copySeqStore(esr.zc);
 
     /* count stats */
-    for(bytePtr = esr.zc->seqStore.litStart; bytePtr < esr.zc->seqStore.lit; bytePtr++)
+    for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++)
         countLit[*bytePtr]++;
-    for(u32Ptr = esr.zc->seqStore.offsetStart; u32Ptr < esr.zc->seqStore.offset; u32Ptr++) {
+    for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) {
         BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1;
         if (*u32Ptr==0) offcode=0;
         offsetcodeCount[offcode]++;
     }
-    for(bytePtr = esr.zc->seqStore.matchLengthStart; bytePtr < esr.zc->seqStore.matchLength; bytePtr++)
+    for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
         matchlengthCount[*bytePtr]++;
-    for(bytePtr = esr.zc->seqStore.litLengthStart; bytePtr < esr.zc->seqStore.litLength; bytePtr++)
+    for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
         litlengthCount[*bytePtr]++;
 }
 
 
-#define OFFCODE_MAX 18
-static size_t DiB_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
+#define OFFCODE_MAX 18  /* only applicable to first block */
+static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
                                  unsigned compressionLevel,
                            const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
                            const void* dictBuffer, size_t  dictBufferSize)
@@ -726,7 +647,11 @@ static size_t DiB_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     esr.ref = ZSTD_createCCtx();
     esr.zc = ZSTD_createCCtx();
     esr.workPlace = malloc(BLOCKSIZE);
-    if (!esr.ref || !esr.zc || !esr.workPlace) EXM_THROW(30, "Not enough memory");
+    if (!esr.ref || !esr.zc || !esr.workPlace) {
+            eSize = ERROR(memory_allocation);
+            DISPLAYLEVEL(1, "Not enough memory");
+            goto _cleanup;
+    }
     if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
     params = ZSTD_getParams(compressionLevel, dictBufferSize + 15 KB);
     params.strategy = ZSTD_greedy;
@@ -734,7 +659,7 @@ static size_t DiB_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
     /* collect stats on all files */
     for (u=0; u<nbFiles; u++) {
-        DiB_countEStats(esr,
+        ZDICT_countEStats(esr,
                         countLit, offcodeCount, matchLengthCount, litlengthCount,
            (const char*)srcBuffer + pos, fileSizes[u]);
         pos += fileSizes[u];
@@ -742,50 +667,82 @@ static size_t DiB_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 
     /* analyze */
     errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
-    if (HUF_isError(errorCode)) EXM_THROW(31, "HUF_buildCTable error");
+    if (HUF_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "HUF_buildCTable error");
+        goto _cleanup;
+    }
     huffLog = (U32)errorCode;
 
     total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
     errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
-    if (FSE_isError(errorCode)) EXM_THROW(32, "FSE_normalizeCount error with offcodeCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
+        goto _cleanup;
+    }
     Offlog = (U32)errorCode;
 
     total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
     errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
-    if (FSE_isError(errorCode)) EXM_THROW(33, "FSE_normalizeCount error with matchLengthCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
+        goto _cleanup;
+    }
     mlLog = (U32)errorCode;
 
     total=0; for (u=0; u<=MaxLL; u++) total+=litlengthCount[u];
     errorCode = FSE_normalizeCount(litlengthNCount, llLog, litlengthCount, total, MaxLL);
-    if (FSE_isError(errorCode)) EXM_THROW(34, "FSE_normalizeCount error with litlengthCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with litlengthCount");
+        goto _cleanup;
+    }
     llLog = (U32)errorCode;
 
     /* write result to buffer */
     errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
-    if (HUF_isError(errorCode)) EXM_THROW(41, "HUF_writeCTable error");
+    if (HUF_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "HUF_writeCTable error");
+        goto _cleanup;
+    }
     dstBuffer = (char*)dstBuffer + errorCode;
     maxDstSize -= errorCode;
     eSize += errorCode;
 
     errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
-    if (FSE_isError(errorCode)) EXM_THROW(42, "FSE_writeNCount error with offcodeNCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
+        goto _cleanup;
+    }
     dstBuffer = (char*)dstBuffer + errorCode;
     maxDstSize -= errorCode;
     eSize += errorCode;
 
     errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
-    if (FSE_isError(errorCode)) EXM_THROW(43, "FSE_writeNCount error with matchLengthNCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
+        goto _cleanup;
+    }
     dstBuffer = (char*)dstBuffer + errorCode;
     maxDstSize -= errorCode;
     eSize += errorCode;
 
     errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litlengthNCount, MaxLL, llLog);
-    if (FSE_isError(errorCode)) EXM_THROW(43, "FSE_writeNCount error with litlengthNCount");
+    if (FSE_isError(errorCode)) {
+        eSize = ERROR(GENERIC);
+        DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+        goto _cleanup;
+    }
     dstBuffer = (char*)dstBuffer + errorCode;
     maxDstSize -= errorCode;
     eSize += errorCode;
 
-    /* clean */
+_cleanup:
     ZSTD_freeCCtx(esr.ref);
     ZSTD_freeCCtx(esr.zc);
     free(esr.workPlace);
@@ -794,33 +751,16 @@ static size_t DiB_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
 }
 
 
-static void DiB_saveDict(const char* dictFileName,
-                         const void* buff, size_t buffSize)
-{
-    FILE* f;
-    size_t n;
-
-    f = fopen(dictFileName, "wb");
-    if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
-
-    n = fwrite(buff, 1, buffSize, f);
-    if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName)
-
-    n = (size_t)fclose(f);
-    if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName)
-}
-
-
 #define DIB_FASTSEGMENTSIZE 64
-/*! DiB_fastSampling (based on an idea by Giuseppe Ottaviano)
-    Fill @dictBuffer with stripes of size DIB_FASTSEGMENTSIZE from @samplesBuffer
-    up to @dictSize.
-    Filling starts from the end of @dictBuffer, down to maximum possible.
-    if @dictSize is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of @dictBuffer won't be used.
-    @return : amount of data written into @dictBuffer
-              or an error Code (if @dictSize or @samplesSize too small)
+/*! ZDICT_fastSampling()  (based on an idea proposed by Giuseppe Ottaviano) :
+    Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
+    up to `dictSize`.
+    Filling starts from the end of `dictBuffer`, down to maximum possible.
+    if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
+    @return : amount of data written into `dictBuffer`,
+              or an error code
 */
-static size_t DiB_fastSampling(void* dictBuffer, size_t dictSize,
+static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
                          const void* samplesBuffer, size_t samplesSize)
 {
     char* dstPtr = (char*)dictBuffer + dictSize;
@@ -851,10 +791,10 @@ static size_t DiB_fastSampling(void* dictBuffer, size_t dictSize,
 }
 
 
-static size_t DiB_trainFromBuffer_internal(
+size_t ZDICT_trainFromBuffer_unsafe(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
-                            DiB_params_t params)
+                            ZDICT_params_t params)
 {
     const U32 dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
     dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -869,14 +809,15 @@ static size_t DiB_trainFromBuffer_internal(
 
     /* init */
     { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
-    if (!dictList) { DISPLAYLEVEL(1, "not enough memory for DiB_trainFromBuffer"); return ERROR(memory_allocation); }
-    DiB_initDictItem(dictList);
+    if (!dictList) return ERROR(memory_allocation);
+    ZDICT_initDictItem(dictList);
+    g_displayLevel = params.notificationLevel;
     if (selectivity==0) selectivity = g_selectivity_default;
     if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
 
-    /* select stripes */
-    if (selectivity>1) {
-        DiB_trainBuffer(dictList, dictListSize,
+    /* build dictionary */
+    if (selectivity>1) {  /* selectivity == 1 => fast mode */
+        ZDICT_trainBuffer(dictList, dictListSize,
                         samplesBuffer, sBuffSize,
                         sampleSizes, nbSamples,
                         selectivity, (U32)targetDictSize);
@@ -885,7 +826,7 @@ static size_t DiB_trainFromBuffer_internal(
         if (g_displayLevel>= 3) {
             const U32 nb = 25;
             U32 u;
-            U32 dictContentSize = DiB_dictSize(dictList);
+            U32 dictContentSize = ZDICT_dictSize(dictList);
             DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
             DISPLAYLEVEL(3, "list %u best segments \n", nb);
             for (u=1; u<=nb; u++) {
@@ -894,13 +835,13 @@ static size_t DiB_trainFromBuffer_internal(
                 U32 d = MIN(40, l);
                 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
                              u, l, p, dictList[u].savings);
-                DiB_printHex(3, (const char*)samplesBuffer+p, d);
+                ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
                 DISPLAYLEVEL(3, "| \n");
     }   }   }
 
     /* create dictionary */
     {
-        U32 dictContentSize = DiB_dictSize(dictList);
+        U32 dictContentSize = ZDICT_dictSize(dictList);
         size_t hSize;
         BYTE* ptr;
         U32 u;
@@ -918,7 +859,7 @@ static size_t DiB_trainFromBuffer_internal(
         if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */
             DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */
             DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
-            dictContentSize = (U32)DiB_fastSampling((char*)dictBuffer + g_provision_entropySize,
+            dictContentSize = (U32)ZDICT_fastSampling((char*)dictBuffer + g_provision_entropySize,
                                                targetDictSize, samplesBuffer, sBuffSize);
         }
 
@@ -929,7 +870,7 @@ static size_t DiB_trainFromBuffer_internal(
         /* entropic tables */
         DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
         DISPLAYLEVEL(2, "statistics ... \n");
-        hSize += DiB_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4,
+        hSize += ZDICT_analyzeEntropy((char*)dictBuffer+4, maxDictSize-4,
                                     compressionLevel,
                                     samplesBuffer, sampleSizes, nbSamples,
                                     (char*)dictBuffer + maxDictSize - dictContentSize, dictContentSize);
@@ -945,76 +886,38 @@ static size_t DiB_trainFromBuffer_internal(
 }
 
 
-/* issue : samplesBuffer need to be followed by a noisy guard band.
-*  work around : duplicate the buffer, and add the noise ? */
-size_t DiB_trainFromBuffer(void* dictBuffer, size_t maxDictSize,
-                           const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
-                           DiB_params_t params)
+size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+                           const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                           ZDICT_params_t params)
 {
     size_t sBuffSize;
     void* newBuff;
     size_t result;
 
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
+    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
     newBuff = malloc(sBuffSize + NOISELENGTH);
     if (!newBuff) return ERROR(memory_allocation);
 
     memcpy(newBuff, samplesBuffer, sBuffSize);
-    DiB_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+    ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
 
-    result = DiB_trainFromBuffer_internal(dictBuffer, maxDictSize,
-                                        newBuff, sampleSizes, nbSamples,
+    result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity,
+                                        newBuff, samplesSizes, nbSamples,
                                         params);
     free(newBuff);
     return result;
 }
 
 
-int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
-                       const char** fileNamesTable, unsigned nbFiles,
-                       DiB_params_t params)
+/* issue : samplesBuffer need to be followed by a noisy guard band.
+*  work around : duplicate the buffer, and add the noise ? */
+size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
 {
-    void* srcBuffer;
-    size_t benchedSize;
-    size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
-    unsigned long long totalSizeToLoad = DiB_getTotalFileSize(fileNamesTable, nbFiles);
-    void* dictBuffer = malloc(maxDictSize);
-    size_t dictSize;
-    int result = 0;
-
-    /* init */
-    benchedSize = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
-    if ((unsigned long long)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
-    if (benchedSize < totalSizeToLoad)
-        DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
-
-    /* Memory allocation & restrictions */
-    srcBuffer = malloc(benchedSize+NOISELENGTH);     /* + noise */
-    if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles");  /* should not happen */
-
-    /* Load input buffer */
-    DiB_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
-    DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
-
-    /* call buffer version */
-    dictSize = DiB_trainFromBuffer_internal(dictBuffer, maxDictSize,
-                        srcBuffer, fileSizes, nbFiles,
-                        params);
-    if (DiB_isError(dictSize))
-    {
-        DISPLAYLEVEL(1, "dictionary training failed : %s", DiB_getErrorName(dictSize));  /* should not happen */
-        result = 1;
-        goto _cleanup;
-    }
-
-    /* save dict */
-    DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
-    DiB_saveDict(dictFileName, dictBuffer, dictSize);
-
-    /* clean up */
-_cleanup:
-    free(srcBuffer);
-    free(dictBuffer);
-    free(fileSizes);
-    return result;
+    ZDICT_params_t params;
+    memset(&params, 0, sizeof(params));
+    return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
+                                          samplesBuffer, samplesSizes, nbSamples,
+                                          params);
 }
+
diff --git a/lib/zdict.h b/lib/zdict.h
new file mode 100644
index 00000000000..2ca190ce34a
--- /dev/null
+++ b/lib/zdict.h
@@ -0,0 +1,67 @@
+/*
+    dictBuilder header file
+    Copyright (C) Yann Collet 2016
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+#ifndef DICTBUILDER_H_001
+#define DICTBUILDER_H_001
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Public functions
+***************************************/
+/*! ZDICT_trainFromBuffer() :
+    Train a dictionary from a memory buffer `samplesBuffer`,
+    where `nbSamples` samples have been stored concatenated.
+    Each sample size is provided into an orderly table `samplesSizes`.
+    Resulting dictionary will be saved into `dictBuffer`.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code, which can be tested by ZDICT_isError().
+*/
+size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+unsigned ZDICT_isError(size_t errorCode);
+const char* ZDICT_getErrorName(size_t errorCode);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif
diff --git a/lib/zdict_static.h b/lib/zdict_static.h
new file mode 100644
index 00000000000..e5f909ac74f
--- /dev/null
+++ b/lib/zdict_static.h
@@ -0,0 +1,80 @@
+/*
+    dictBuilder header file
+    for static linking only
+    Copyright (C) Yann Collet 2016
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+/* This library is EXPERIMENTAL, below API is not yet stable */
+
+#ifndef DICTBUILDER_STATIC_H_002
+#define DICTBUILDER_STATIC_H_002
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zdict.h"
+
+
+/*-*************************************
+*  Public type
+***************************************/
+typedef struct {
+    unsigned selectivityLevel;   /* 0 means default; larger => bigger selection => larger dictionary */
+    unsigned compressionLevel;   /* 0 means default; target a specific zstd compression level */
+    unsigned notificationLevel;  /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+    unsigned reserved[3];        /* space for future parameters */
+} ZDICT_params_t;
+
+
+/*-*************************************
+*  Public functions
+***************************************/
+/*! ZDICT_trainFromBuffer_advanced() :
+    Same as ZDICT_trainFromBuffer() with control over more parameters.
+    `parameters` is optional and can be provided with values set to 0 to mean "default".
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`)
+              or an error code, which can be tested by DiB_isError().
+    note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using ZDICT_setNotificationLevel()
+*/
+size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
+                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                             ZDICT_params_t parameters);
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif  /* DICTBUILDER_STATIC_H_002 */
diff --git a/lib/zstd.h b/lib/zstd.h
index ce56c635157..53ed6973987 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -61,7 +61,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
 #define ZSTD_VERSION_MINOR    5    /* for new (non-breaking) interface capabilities */
-#define ZSTD_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 ZSTDLIB_API unsigned ZSTD_versionNumber (void);
 
diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c
index c774891b627..7bea6abea9e 100644
--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@@ -48,7 +48,7 @@
 #endif
 
 
-/* *************************************
+/*-*************************************
 *  Dependencies
 ***************************************/
 #include <stdlib.h>   /* malloc */
@@ -59,36 +59,39 @@
 #include "zstd_internal.h"
 
 
-/* *************************************
+/*-*************************************
 *  Constants
 ***************************************/
 static const U32 g_searchStrength = 8;
 
 
-/* *************************************
+/*-*************************************
 *  Helper functions
 ***************************************/
 size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
 
 
-/* *************************************
+/*-*************************************
 *  Sequence storage
 ***************************************/
-typedef struct {
-    void* buffer;
-    U32*  offsetStart;
-    U32*  offset;
-    BYTE* offCodeStart;
-    BYTE* offCode;
-    BYTE* litStart;
-    BYTE* lit;
-    BYTE* litLengthStart;
-    BYTE* litLength;
-    BYTE* matchLengthStart;
-    BYTE* matchLength;
-    BYTE* dumpsStart;
-    BYTE* dumps;
-} seqStore_t;
+/** ZSTD_resetFreqs() : for opt variants */
+static void ZSTD_resetFreqs(seqStore_t* ssPtr)
+{
+    unsigned u;
+    ssPtr->matchLengthSum = 512; // (1<<MLbits);
+    ssPtr->litLengthSum = 256; // (1<<LLbits);
+    ssPtr->litSum = (1<<Litbits);
+    ssPtr->offCodeSum = (1<<Offbits);
+
+    for (u=0; u<=MaxLit; u++)
+        ssPtr->litFreq[u] = 1;
+    for (u=0; u<=MaxLL; u++)
+        ssPtr->litLengthFreq[u] = 1;
+    for (u=0; u<=MaxML; u++)
+        ssPtr->matchLengthFreq[u] = 1;
+    for (u=0; u<=MaxOff; u++)
+        ssPtr->offCodeFreq[u] = 1;
+}
 
 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 {
@@ -100,7 +103,7 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 }
 
 
-/* *************************************
+/*-*************************************
 *  Context memory management
 ***************************************/
 struct ZSTD_CCtx_s
@@ -130,7 +133,6 @@ struct ZSTD_CCtx_s
     FSE_CTable litlengthCTable   [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
 };
 
-
 ZSTD_CCtx* ZSTD_createCCtx(void)
 {
     return (ZSTD_CCtx*) calloc(1, sizeof(ZSTD_CCtx));
@@ -143,37 +145,40 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
     return 0;   /* reserved as a potential error code in the future */
 }
 
+seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx)
+{
+    return ctx->seqStore;
+}
+
 
 static unsigned ZSTD_highbit(U32 val);
 
-/** ZSTD_validateParams
-    correct params value to remain within authorized range
-    optimize for srcSize if srcSize > 0 */
+#define CLAMP(val,min,max) { if (val<min) val=min; else if (val>max) val=max; }
+
+/** ZSTD_validateParams() :
+    correct params value to remain within authorized range,
+    optimize for `srcSize` if srcSize > 0 */
 void ZSTD_validateParams(ZSTD_parameters* params)
 {
-    const U32 btPlus = (params->strategy == ZSTD_btlazy2);
+    const U32 btPlus = (params->strategy == ZSTD_btlazy2) || (params->strategy == ZSTD_btopt);
 
     /* validate params */
     if (MEM_32bits()) if (params->windowLog > 25) params->windowLog = 25;   /* 32 bits mode cannot flush > 24 bits */
-    if (params->windowLog   > ZSTD_WINDOWLOG_MAX) params->windowLog = ZSTD_WINDOWLOG_MAX;
-    if (params->windowLog   < ZSTD_WINDOWLOG_MIN) params->windowLog = ZSTD_WINDOWLOG_MIN;
+    CLAMP(params->windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
+    CLAMP(params->contentLog, ZSTD_CONTENTLOG_MIN, ZSTD_CONTENTLOG_MAX);
+    CLAMP(params->hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
+    CLAMP(params->searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
+    CLAMP(params->searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
+    CLAMP(params->targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+    if ((U32)params->strategy>(U32)ZSTD_btopt) params->strategy = ZSTD_btopt;
 
     /* correct params, to use less memory */
     if ((params->srcSize > 0) && (params->srcSize < (1<<ZSTD_WINDOWLOG_MAX))) {
         U32 srcLog = ZSTD_highbit((U32)(params->srcSize)-1) + 1;
         if (params->windowLog > srcLog) params->windowLog = srcLog;
     }
-
     if (params->windowLog   < ZSTD_WINDOWLOG_ABSOLUTEMIN) params->windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* required for frame header */
     if (params->contentLog  > params->windowLog+btPlus) params->contentLog = params->windowLog+btPlus;   /* <= ZSTD_CONTENTLOG_MAX */
-    if (params->contentLog  < ZSTD_CONTENTLOG_MIN) params->contentLog = ZSTD_CONTENTLOG_MIN;
-    if (params->hashLog     > ZSTD_HASHLOG_MAX) params->hashLog = ZSTD_HASHLOG_MAX;
-    if (params->hashLog     < ZSTD_HASHLOG_MIN) params->hashLog = ZSTD_HASHLOG_MIN;
-    if (params->searchLog   > ZSTD_SEARCHLOG_MAX) params->searchLog = ZSTD_SEARCHLOG_MAX;
-    if (params->searchLog   < ZSTD_SEARCHLOG_MIN) params->searchLog = ZSTD_SEARCHLOG_MIN;
-    if (params->searchLength> ZSTD_SEARCHLENGTH_MAX) params->searchLength = ZSTD_SEARCHLENGTH_MAX;
-    if (params->searchLength< ZSTD_SEARCHLENGTH_MIN) params->searchLength = ZSTD_SEARCHLENGTH_MIN;
-    if ((U32)params->strategy>(U32)ZSTD_btlazy2) params->strategy = ZSTD_btlazy2;
 }
 
 
@@ -184,7 +189,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
     /* reserve table memory */
     const U32 contentLog = (params.strategy == ZSTD_fast) ? 1 : params.contentLog;
     const size_t tableSpace = ((1 << contentLog) + (1 << params.hashLog)) * sizeof(U32);
-    const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize);
+    const size_t neededSpace = tableSpace + (256*sizeof(U32)) + (3*blockSize) + ((1<<MLbits) + (1<<LLbits) + (1<<Offbits) + (1<<Litbits))*sizeof(U32);
     if (zc->workSpaceSize < neededSpace) {
         free(zc->workSpace);
         zc->workSpace = malloc(neededSpace);
@@ -207,12 +212,20 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
     zc->lowLimit = 0;
     zc->params = params;
     zc->blockSize = blockSize;
-    zc->seqStore.offsetStart = (U32*) (zc->seqStore.buffer);
+
+    zc->seqStore.litFreq = (U32*) (zc->seqStore.buffer);
+    zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
+    zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (1<<LLbits);
+    zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (1<<MLbits);
+
+    zc->seqStore.offsetStart = zc->seqStore.offCodeFreq + (1<<Offbits);
     zc->seqStore.offCodeStart = (BYTE*) (zc->seqStore.offsetStart + (blockSize>>2));
     zc->seqStore.litStart = zc->seqStore.offCodeStart + (blockSize>>2);
     zc->seqStore.litLengthStart =  zc->seqStore.litStart + blockSize;
     zc->seqStore.matchLengthStart = zc->seqStore.litLengthStart + (blockSize>>2);
     zc->seqStore.dumpsStart = zc->seqStore.matchLengthStart + (blockSize>>2);
+    // zc->seqStore.XXX = zc->seqStore.dumpsStart + (blockSize>>4);
+
     zc->hbSize = 0;
     zc->stage = 0;
     zc->loadedDictEnd = 0;
@@ -528,7 +541,6 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
     const size_t maxCSize = srcSize - minGain;
     BYTE* seqHead;
 
-
     /* Compress literals */
     {
         size_t cSize;
@@ -766,33 +778,9 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const B
 }
 
 
-/* *************************************
+/*-*************************************
 *  Match length counter
 ***************************************/
-static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
-
-static unsigned ZSTD_highbit(U32 val)
-{
-#   if defined(_MSC_VER)   /* Visual */
-    unsigned long r=0;
-    _BitScanReverse(&r, val);
-    return (unsigned)r;
-#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-    return 31 - __builtin_clz(val);
-#   else   /* Software version */
-    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
-    U32 v = val;
-    int r;
-    v |= v >> 1;
-    v |= v >> 2;
-    v |= v >> 4;
-    v |= v >> 8;
-    v |= v >> 16;
-    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
-    return r;
-#   endif
-}
-
 static unsigned ZSTD_NbCommonBytes (register size_t val)
 {
     if (MEM_isLittleEndian()) {
@@ -857,20 +845,19 @@ static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLim
     const BYTE* const pStart = pIn;
 
     while ((pIn<pInLimit-(sizeof(size_t)-1))) {
-        size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
+        size_t diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
         if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
         pIn += ZSTD_NbCommonBytes(diff);
         return (size_t)(pIn - pStart);
     }
-
     if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
     return (size_t)(pIn - pStart);
 }
 
-/** ZSTD_count_2segments
-*   can count match length with ip & match in potentially 2 different segments.
+/** ZSTD_count_2segments() :
+*   can count match length with `ip` & `match` in 2 different segments.
 *   convention : on reaching mEnd, match count continue starting from iStart
 */
 static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
@@ -894,15 +881,15 @@ static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read
 
 static const U64 prime5bytes = 889523592379ULL;
 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
-static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_read64(p), h); }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
 
 static const U64 prime6bytes = 227718039650203ULL;
 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_read64(p), h); }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
 
 static const U64 prime7bytes = 58295818150454627ULL;
 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
-static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_read64(p), h); }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
 
 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 {
@@ -1009,8 +996,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
                 continue;   /* faster when present ... (?) */
     }   }   }
 
-    /* Last Literals */
-    {
+    {   /* Last Literals */
         size_t lastLLSize = iend - anchor;
         memcpy(seqStorePtr->lit, anchor, lastLLSize);
         seqStorePtr->lit += lastLLSize;
@@ -1018,7 +1004,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
 }
 
 
-void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
+static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
                        const void* src, size_t srcSize)
 {
     const U32 mls = ctx->params.searchLength;
@@ -1037,8 +1023,7 @@ void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
 }
 
 
-//FORCE_INLINE
-void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
+static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
                                  const void* src, size_t srcSize,
                                  const U32 mls)
 {
@@ -1138,7 +1123,7 @@ void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
 }
 
 
-void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
+static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
                          const void* src, size_t srcSize)
 {
     const U32 mls = ctx->params.searchLength;
@@ -1157,11 +1142,11 @@ void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
 }
 
 
-/* *************************************
+/*-*************************************
 *  Binary Tree search
 ***************************************/
-/** ZSTD_insertBt1 : add one or multiple positions to tree
-*   @ip : assumed <= iend-8
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+*   ip : assumed <= iend-8 .
 *   @return : nb of positions added */
 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
                           U32 extDict)
@@ -1187,6 +1172,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     U32 dummy32;   /* to be nullified at the end */
     const U32 windowLow = zc->lowLimit;
     U32 matchEndIdx = current+8;
+    size_t bestLength = 8;
     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
     U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
     predictedSmall += (predictedSmall>0);
@@ -1196,9 +1182,9 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
 
     while (nbCompares-- && (matchIndex > windowLow)) {
         U32* nextPtr = bt + 2*(matchIndex & btMask);
-        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
-
+#if 1   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
         if (matchIndex == predictedSmall) {
             /* no need to check length, result known */
             *smallerPtr = matchIndex;
@@ -1208,7 +1194,6 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
             continue;
         }
-
         if (matchIndex == predictedLarge) {
             *largerPtr = matchIndex;
             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
@@ -1217,7 +1202,7 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
             continue;
         }
-
+#endif
         if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
             match = base + matchIndex;
             if (match[matchLength] == ip[matchLength])
@@ -1229,8 +1214,11 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
 				match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
         }
 
-        if (matchLength > matchEndIdx - matchIndex)
-            matchEndIdx = matchIndex + (U32)matchLength;
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
 
         if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
@@ -1252,22 +1240,13 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, co
     }   }
 
     *smallerPtr = *largerPtr = 0;
-    return (matchEndIdx > current + 8) ? matchEndIdx - current - 8 : 1;
+    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));
+    if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
+    return 1;
 }
 
 
-static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
-{
-    const BYTE* const base = zc->base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = zc->nextToUpdate;
-
-    for( ; idx < target ; )
-        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
-}
-
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_insertBtAndFindBestMatch (
+static size_t ZSTD_insertBtAndFindBestMatch (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iend,
                         size_t* offsetPtr,
@@ -1337,8 +1316,7 @@ size_t ZSTD_insertBtAndFindBestMatch (
             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
             largerPtr = nextPtr;
             matchIndex = nextPtr[0];
-        }
-    }
+    }   }
 
     *smallerPtr = *largerPtr = 0;
 
@@ -1347,9 +1325,18 @@ size_t ZSTD_insertBtAndFindBestMatch (
 }
 
 
+static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
+{
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while(idx < target)
+        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
+}
+
 /** Tree updater, providing best match */
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_BtFindBestMatch (
+static size_t ZSTD_BtFindBestMatch (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -1361,7 +1348,7 @@ size_t ZSTD_BtFindBestMatch (
 }
 
 
-FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS (
+static size_t ZSTD_BtFindBestMatch_selectMLS (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
                         const BYTE* ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -1383,14 +1370,12 @@ static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const B
     const U32 target = (U32)(ip - base);
     U32 idx = zc->nextToUpdate;
 
-    for( ; idx < target ; )
-        idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
+    while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
 }
 
 
 /** Tree updater, providing best match */
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_BtFindBestMatch_extDict (
+static size_t ZSTD_BtFindBestMatch_extDict (
                         ZSTD_CCtx* zc,
                         const BYTE* const ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -1402,7 +1387,7 @@ size_t ZSTD_BtFindBestMatch_extDict (
 }
 
 
-FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
+static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
                         ZSTD_CCtx* zc,   /* Index table will be updated */
                         const BYTE* ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -1426,7 +1411,8 @@ FORCE_INLINE size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
 
 /* Update chains up to ip (excluded)
    Assumption : always within prefix (ie. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
+FORCE_INLINE
+U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
     const U32 hashLog = zc->params.hashLog;
@@ -1665,6 +1651,18 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
     }
 }
 
+#include "zstd_opt.h"
+
+static void ZSTD_compressBlock_opt_bt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1, 2);
+}
+
+static void ZSTD_compressBlock_opt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0, 2);
+}
+
 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
 {
     ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
@@ -1879,14 +1877,24 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src,
     ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
 }
 
+static void ZSTD_compressBlock_opt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0, 2);
+}
+
+static void ZSTD_compressBlock_opt_bt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1, 2);
+}
+
 
 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
 
 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
-    static const ZSTD_blockCompressor blockCompressor[2][5] = {
-        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2 },
-        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict }
+    static const ZSTD_blockCompressor blockCompressor[2][7] = {
+        { ZSTD_compressBlock_fast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy,ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_opt, ZSTD_compressBlock_opt_bt },
+        { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_opt_extDict, ZSTD_compressBlock_opt_bt_extDict }
     };
 
     return blockCompressor[extDict][(U32)strat];
@@ -1980,7 +1988,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
 
     /* preemptive overflow correction */
     if (zc->lowLimit > (1<<30)) {
-        U32 btplus = (zc->params.strategy == ZSTD_btlazy2);
+        U32 btplus = (zc->params.strategy == ZSTD_btlazy2) || (zc->params.strategy == ZSTD_btopt);
         U32 contentMask = (1 << (zc->params.contentLog - btplus)) - 1;
         U32 newLowLimit = zc->lowLimit & contentMask;   /* preserve position % contentSize */
         U32 correction = zc->lowLimit - newLowLimit;
@@ -2050,10 +2058,12 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
+    case ZSTD_opt:
         ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength);
         break;
 
     case ZSTD_btlazy2:
+    case ZSTD_btopt:
         ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
         break;
 
@@ -2256,106 +2266,112 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi
 }
 
 
-/*- Pre-defined compression levels -*/
+/*-=====  Pre-defined compression levels  =====-*/
 
+#define ZSTD_MAX_CLEVEL 21
 unsigned ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 
 static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = {
 {   /* "default" */
-    /*    W,  C,  H,  S,  L, strat */
-    { 0, 18, 12, 12,  1,  4, ZSTD_fast    },  /* level  0 - never used */
-    { 0, 19, 13, 14,  1,  7, ZSTD_fast    },  /* level  1 */
-    { 0, 19, 15, 16,  1,  6, ZSTD_fast    },  /* level  2 */
-    { 0, 20, 18, 20,  1,  6, ZSTD_fast    },  /* level  3 */
-    { 0, 21, 19, 21,  1,  6, ZSTD_fast    },  /* level  4 */
-    { 0, 20, 14, 18,  3,  5, ZSTD_greedy  },  /* level  5 */
-    { 0, 20, 18, 19,  3,  5, ZSTD_greedy  },  /* level  6 */
-    { 0, 21, 17, 20,  3,  5, ZSTD_lazy    },  /* level  7 */
-    { 0, 21, 19, 20,  3,  5, ZSTD_lazy    },  /* level  8 */
-    { 0, 21, 20, 20,  3,  5, ZSTD_lazy2   },  /* level  9 */
-    { 0, 21, 19, 21,  4,  5, ZSTD_lazy2   },  /* level 10 */
-    { 0, 22, 20, 22,  4,  5, ZSTD_lazy2   },  /* level 11 */
-    { 0, 22, 20, 22,  5,  5, ZSTD_lazy2   },  /* level 12 */
-    { 0, 22, 21, 22,  5,  5, ZSTD_lazy2   },  /* level 13 */
-    { 0, 22, 22, 23,  5,  5, ZSTD_lazy2   },  /* level 14 */
-    { 0, 23, 23, 23,  5,  5, ZSTD_lazy2   },  /* level 15 */
-    { 0, 23, 21, 22,  5,  5, ZSTD_btlazy2 },  /* level 16 */
-    { 0, 23, 24, 23,  4,  5, ZSTD_btlazy2 },  /* level 17 */
-    { 0, 25, 24, 23,  5,  5, ZSTD_btlazy2 },  /* level 18 */
-    { 0, 25, 26, 23,  5,  5, ZSTD_btlazy2 },  /* level 19 */
-    { 0, 26, 27, 25,  9,  5, ZSTD_btlazy2 },  /* level 20 */
+    /* l,  W,  C,  H,  S,  L, SL, strat */
+    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    {  0, 19, 13, 14,  1,  7,  4, ZSTD_fast    },  /* level  1 */
+    {  0, 19, 15, 16,  1,  6,  4, ZSTD_fast    },  /* level  2 */
+    {  0, 20, 18, 20,  1,  6,  4, ZSTD_fast    },  /* level  3 */
+    {  0, 21, 19, 21,  1,  6,  4, ZSTD_fast    },  /* level  4 */
+    {  0, 20, 14, 18,  3,  5,  4, ZSTD_greedy  },  /* level  5 */
+    {  0, 20, 18, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    {  0, 21, 17, 20,  3,  5,  4, ZSTD_lazy    },  /* level  7 */
+    {  0, 21, 19, 20,  3,  5,  4, ZSTD_lazy    },  /* level  8 */
+    {  0, 21, 20, 20,  3,  5,  4, ZSTD_lazy2   },  /* level  9 */
+    {  0, 21, 19, 21,  4,  5,  4, ZSTD_lazy2   },  /* level 10 */
+    {  0, 22, 20, 22,  4,  5,  4, ZSTD_lazy2   },  /* level 11 */
+    {  0, 22, 20, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 12 */
+    {  0, 22, 21, 22,  5,  5,  4, ZSTD_lazy2   },  /* level 13 */
+    {  0, 22, 22, 23,  5,  5,  4, ZSTD_lazy2   },  /* level 14 */
+    {  0, 23, 23, 23,  5,  5,  4, ZSTD_lazy2   },  /* level 15 */
+    {  0, 23, 22, 22,  5,  5,  4, ZSTD_btlazy2 },  /* level 16 */
+    {  0, 24, 24, 23,  4,  5,  4, ZSTD_btlazy2 },  /* level 17 */
+    {  0, 24, 24, 23,  5,  5, 30, ZSTD_btopt   },  /* level 18 */
+    {  0, 25, 25, 24,  5,  4, 40, ZSTD_btopt   },  /* level 19 */
+    {  0, 26, 26, 25,  8,  4,256, ZSTD_btopt   },  /* level 20 */
+    {  0, 26, 27, 25, 10,  4,256, ZSTD_btopt   },  /* level 21 */
 },
 {   /* for srcSize <= 256 KB */
-    /*     W,  C,  H,  S,  L, strat */
-    {  0, 18, 13, 14,  1,  7, ZSTD_fast    },  /* level  0 - never used */
-    {  0, 18, 14, 15,  1,  6, ZSTD_fast    },  /* level  1 */
-    {  0, 18, 14, 15,  1,  5, ZSTD_fast    },  /* level  2 */
-    {  0, 18, 12, 15,  3,  4, ZSTD_greedy  },  /* level  3 */
-    {  0, 18, 13, 15,  4,  4, ZSTD_greedy  },  /* level  4 */
-    {  0, 18, 14, 15,  5,  4, ZSTD_greedy  },  /* level  5 */
-    {  0, 18, 13, 15,  4,  4, ZSTD_lazy    },  /* level  6 */
-    {  0, 18, 14, 16,  5,  4, ZSTD_lazy    },  /* level  7 */
-    {  0, 18, 15, 16,  6,  4, ZSTD_lazy    },  /* level  8 */
-    {  0, 18, 15, 15,  7,  4, ZSTD_lazy    },  /* level  9 */
-    {  0, 18, 16, 16,  7,  4, ZSTD_lazy    },  /* level 10 */
-    {  0, 18, 16, 16,  8,  4, ZSTD_lazy    },  /* level 11 */
-    {  0, 18, 17, 16,  8,  4, ZSTD_lazy    },  /* level 12 */
-    {  0, 18, 17, 16,  9,  4, ZSTD_lazy    },  /* level 13 */
-    {  0, 18, 18, 16,  9,  4, ZSTD_lazy    },  /* level 14 */
-    {  0, 18, 17, 17,  9,  4, ZSTD_lazy2   },  /* level 15 */
-    {  0, 18, 18, 18,  9,  4, ZSTD_lazy2   },  /* level 16 */
-    {  0, 18, 18, 18, 10,  4, ZSTD_lazy2   },  /* level 17 */
-    {  0, 18, 18, 18, 11,  4, ZSTD_lazy2   },  /* level 18 */
-    {  0, 18, 18, 18, 12,  4, ZSTD_lazy2   },  /* level 19 */
-    {  0, 18, 18, 18, 13,  4, ZSTD_lazy2   },  /* level 20 */
+    /* l,  W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 */
+    {  0, 18, 14, 15,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    {  0, 18, 14, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    {  0, 18, 14, 17,  1,  5,  4, ZSTD_fast    },  /* level  3.*/
+    {  0, 18, 14, 15,  4,  4,  4, ZSTD_greedy  },  /* level  4 */
+    {  0, 18, 16, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
+    {  0, 18, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    {  0, 18, 17, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    {  0, 18, 17, 17,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    {  0, 18, 17, 17,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    {  0, 18, 17, 17,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    {  0, 18, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
+    {  0, 18, 18, 17,  4,  4,  4, ZSTD_btlazy2 },  /* level 12 */
+    {  0, 18, 19, 17,  7,  4,  4, ZSTD_btlazy2 },  /* level 13.*/
+    {  0, 18, 17, 19,  8,  4, 24, ZSTD_btopt   },  /* level 14.*/
+    {  0, 18, 19, 19,  8,  4, 48, ZSTD_btopt   },  /* level 15.*/
+    {  0, 18, 19, 18,  9,  4,128, ZSTD_btopt   },  /* level 16.*/
+    {  0, 18, 19, 18,  9,  4,192, ZSTD_btopt   },  /* level 17.*/
+    {  0, 18, 19, 18,  9,  4,256, ZSTD_btopt   },  /* level 18.*/
+    {  0, 18, 19, 18, 10,  4,256, ZSTD_btopt   },  /* level 19.*/
+    {  0, 18, 19, 18, 11,  4,256, ZSTD_btopt   },  /* level 20.*/
+    {  0, 18, 19, 18, 12,  4,256, ZSTD_btopt   },  /* level 21.*/
 },
 {   /* for srcSize <= 128 KB */
-    /*    W,  C,  H,  S,  L, strat */
-    { 0, 17, 12, 12,  1,  4, ZSTD_fast    },  /* level  0 - never used */
-    { 0, 17, 12, 13,  1,  6, ZSTD_fast    },  /* level  1 */
-    { 0, 17, 14, 16,  1,  5, ZSTD_fast    },  /* level  2 */
-    { 0, 17, 15, 17,  1,  5, ZSTD_fast    },  /* level  3 */
-    { 0, 17, 13, 15,  2,  4, ZSTD_greedy  },  /* level  4 */
-    { 0, 17, 15, 17,  3,  4, ZSTD_greedy  },  /* level  5 */
-    { 0, 17, 14, 17,  3,  4, ZSTD_lazy    },  /* level  6 */
-    { 0, 17, 16, 17,  4,  4, ZSTD_lazy    },  /* level  7 */
-    { 0, 17, 16, 17,  4,  4, ZSTD_lazy2   },  /* level  8 */
-    { 0, 17, 17, 16,  5,  4, ZSTD_lazy2   },  /* level  9 */
-    { 0, 17, 17, 16,  6,  4, ZSTD_lazy2   },  /* level 10 */
-    { 0, 17, 17, 16,  7,  4, ZSTD_lazy2   },  /* level 11 */
-    { 0, 17, 17, 16,  8,  4, ZSTD_lazy2   },  /* level 12 */
-    { 0, 17, 18, 16,  4,  4, ZSTD_btlazy2 },  /* level 13 */
-    { 0, 17, 18, 16,  5,  4, ZSTD_btlazy2 },  /* level 14 */
-    { 0, 17, 18, 16,  6,  4, ZSTD_btlazy2 },  /* level 15 */
-    { 0, 17, 18, 16,  7,  4, ZSTD_btlazy2 },  /* level 16 */
-    { 0, 17, 18, 16,  8,  4, ZSTD_btlazy2 },  /* level 17 */
-    { 0, 17, 18, 16,  9,  4, ZSTD_btlazy2 },  /* level 18 */
-    { 0, 17, 18, 16, 10,  4, ZSTD_btlazy2 },  /* level 19 */
-    { 0, 17, 18, 18, 12,  4, ZSTD_btlazy2 },  /* level 20 */
+    /* l,  W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
+    {  0, 17, 12, 13,  1,  6,  4, ZSTD_fast    },  /* level  1 */
+    {  0, 17, 13, 16,  1,  5,  4, ZSTD_fast    },  /* level  2 */
+    {  0, 17, 13, 14,  2,  5,  4, ZSTD_greedy  },  /* level  3 */
+    {  0, 17, 13, 15,  3,  4,  4, ZSTD_greedy  },  /* level  4 */
+    {  0, 17, 15, 17,  4,  4,  4, ZSTD_greedy  },  /* level  5 */
+    {  0, 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    {  0, 17, 16, 17,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    {  0, 17, 17, 16,  4,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    {  0, 17, 17, 16,  5,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    {  0, 17, 17, 16,  6,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    {  0, 17, 17, 17,  7,  4,  4, ZSTD_lazy2   },  /* level 11 */
+    {  0, 17, 17, 17,  8,  4,  4, ZSTD_lazy2   },  /* level 12 */
+    {  0, 17, 17, 17,  9,  4,  4, ZSTD_lazy2   },  /* level 13 */
+    {  0, 17, 18, 16,  5,  4, 20, ZSTD_btopt   },  /* level 14 */
+    {  0, 17, 18, 16,  9,  4, 48, ZSTD_btopt   },  /* level 15 */
+    {  0, 17, 18, 17,  7,  4,128, ZSTD_btopt   },  /* level 16 */
+    {  0, 17, 18, 17,  8,  4,128, ZSTD_btopt   },  /* level 17 */
+    {  0, 17, 18, 17,  8,  4,256, ZSTD_btopt   },  /* level 18 */
+    {  0, 17, 18, 17,  9,  4,256, ZSTD_btopt   },  /* level 19 */
+    {  0, 17, 18, 17, 10,  4,512, ZSTD_btopt   },  /* level 20 */
+    {  0, 17, 18, 17, 11,  4,512, ZSTD_btopt   },  /* level 21 */
+
 },
 {   /* for srcSize <= 16 KB */
-    /*     W,  C,  H,  S,  L, strat */
-    {  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 - never used */
-    {  0, 14, 14, 14,  1,  4, ZSTD_fast    },  /* level  1 */
-    {  0, 14, 14, 16,  1,  4, ZSTD_fast    },  /* level  2 */
-    {  0, 14, 14, 14,  5,  4, ZSTD_greedy  },  /* level  3 */
-    {  0, 14, 14, 14,  8,  4, ZSTD_greedy  },  /* level  4 */
-    {  0, 14, 11, 14,  6,  4, ZSTD_lazy    },  /* level  5 */
-    {  0, 14, 14, 13,  6,  5, ZSTD_lazy    },  /* level  6 */
-    {  0, 14, 14, 14,  7,  6, ZSTD_lazy    },  /* level  7 */
-    {  0, 14, 14, 14,  8,  4, ZSTD_lazy    },  /* level  8 */
-    {  0, 14, 14, 15,  9,  4, ZSTD_lazy    },  /* level  9 */
-    {  0, 14, 14, 15, 10,  4, ZSTD_lazy    },  /* level 10 */
-    {  0, 14, 15, 15,  6,  4, ZSTD_btlazy2 },  /* level 11 */
-    {  0, 14, 15, 15,  7,  4, ZSTD_btlazy2 },  /* level 12 */
-    {  0, 14, 15, 15,  8,  4, ZSTD_btlazy2 },  /* level 13 */
-    {  0, 14, 15, 15,  9,  4, ZSTD_btlazy2 },  /* level 14 */
-    {  0, 14, 15, 15, 10,  4, ZSTD_btlazy2 },  /* level 15 */
-    {  0, 14, 15, 15, 11,  4, ZSTD_btlazy2 },  /* level 16 */
-    {  0, 14, 15, 15, 12,  4, ZSTD_btlazy2 },  /* level 17 */
-    {  0, 14, 15, 15, 13,  4, ZSTD_btlazy2 },  /* level 18 */
-    {  0, 14, 15, 15, 14,  4, ZSTD_btlazy2 },  /* level 19 */
-    {  0, 14, 15, 15, 15,  4, ZSTD_btlazy2 },  /* level 20 */
+    /* l,  W,  C,  H,  S,  L,  T, strat */
+    {  0,  0,  0,  0,  0,  0,  0, ZSTD_fast    },  /* level  0 -- never used */
+    {  0, 14, 14, 14,  1,  4,  4, ZSTD_fast    },  /* level  1 */
+    {  0, 14, 14, 15,  1,  4,  4, ZSTD_fast    },  /* level  2 */
+    {  0, 14, 13, 15,  4,  4,  4, ZSTD_greedy  },  /* level  3 */
+    {  0, 14, 14, 15,  3,  4,  4, ZSTD_lazy    },  /* level  4 */
+    {  0, 14, 14, 14,  6,  4,  4, ZSTD_lazy    },  /* level  5 */
+    {  0, 14, 14, 14,  5,  4,  4, ZSTD_lazy2   },  /* level  6 */
+    {  0, 14, 14, 14,  7,  4,  4, ZSTD_lazy2   },  /* level  7 */
+    {  0, 14, 14, 14,  8,  4,  4, ZSTD_lazy2   },  /* level  8 */
+    {  0, 14, 14, 14,  9,  4,  4, ZSTD_lazy2   },  /* level  9 */
+    {  0, 14, 14, 14, 10,  4,  4, ZSTD_lazy2   },  /* level 10 */
+    {  0, 14, 14, 14, 11,  4,  4, ZSTD_lazy2   },  /* level 11 */
+    {  0, 14, 15, 15, 12,  4, 32, ZSTD_btopt   },  /* level 12 */
+    {  0, 14, 15, 15, 12,  4, 64, ZSTD_btopt   },  /* level 13 */
+    {  0, 14, 15, 15, 12,  4, 96, ZSTD_btopt   },  /* level 14 */
+    {  0, 14, 15, 15, 12,  4,128, ZSTD_btopt   },  /* level 15 */
+    {  0, 14, 15, 15, 12,  4,256, ZSTD_btopt   },  /* level 16 */
+    {  0, 14, 15, 15, 13,  4,256, ZSTD_btopt   },  /* level 17 */
+    {  0, 14, 15, 15, 14,  4,256, ZSTD_btopt   },  /* level 18 */
+    {  0, 14, 15, 15, 15,  4,256, ZSTD_btopt   },  /* level 19 */
+    {  0, 14, 15, 15, 16,  4,256, ZSTD_btopt   },  /* level 20 */
+    {  0, 14, 15, 15, 17,  4,256, ZSTD_btopt   },  /* level 21 */
 },
 };
 
@@ -2368,6 +2384,9 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint)
     int tableID = ((srcSizeHint-1) <= 256 KB) + ((srcSizeHint-1) <= 128 KB) + ((srcSizeHint-1) <= 16 KB);   /* intentional underflow for srcSizeHint == 0 */
     if (compressionLevel<=0) compressionLevel = 1;
     if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
+#if ZSTD_OPT_DEBUG >= 1
+    tableID=0;
+#endif
     result = ZSTD_defaultParameters[tableID][compressionLevel];
     result.srcSize = srcSizeHint;
     return result;
diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c
index 529f1b6f21a..bfa0ea334a1 100644
--- a/lib/zstd_decompress.c
+++ b/lib/zstd_decompress.c
@@ -612,6 +612,8 @@ typedef struct {
     const BYTE* dumpsEnd;
 } seqState_t;
 
+
+
 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
 {
     size_t litLength;
diff --git a/lib/zstd_internal.h b/lib/zstd_internal.h
index 20c62673813..26fc8578519 100644
--- a/lib/zstd_internal.h
+++ b/lib/zstd_internal.h
@@ -32,11 +32,7 @@
 #ifndef ZSTD_CCOMMON_H_MODULE
 #define ZSTD_CCOMMON_H_MODULE
 
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/* *************************************
+/*-*************************************
 *  Dependencies
 ***************************************/
 #include "mem.h"
@@ -44,17 +40,16 @@ extern "C" {
 #include "zstd_static.h"
 
 
-/* *************************************
+/*-*************************************
 *  Common macros
 ***************************************/
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
 
 
-/* *************************************
+/*-*************************************
 *  Common constants
 ***************************************/
-#define ZSTD_MAGICNUMBER 0xFD2FB525   /* v0.5 */
 #define ZSTD_DICT_MAGIC  0xEC30A435
 
 #define KB *(1 <<10)
@@ -82,9 +77,11 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
 #define MINMATCH 4
 #define REPCODE_STARTVALUE 1
 
+#define Litbits  8
 #define MLbits   7
 #define LLbits   6
 #define Offbits  5
+#define MaxLit ((1<<Litbits) - 1)
 #define MaxML  ((1<<MLbits) - 1)
 #define MaxLL  ((1<<LLbits) - 1)
 #define MaxOff ((1<<Offbits)- 1)
@@ -128,9 +125,58 @@ MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
     while (op < oend);
 }
 
-
-#if defined (__cplusplus)
+MEM_STATIC unsigned ZSTD_highbit(U32 val)
+{
+#   if defined(_MSC_VER)   /* Visual */
+    unsigned long r=0;
+    _BitScanReverse(&r, val);
+    return (unsigned)r;
+#   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
+    return 31 - __builtin_clz(val);
+#   else   /* Software version */
+    static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+    U32 v = val;
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
+    return r;
+#   endif
 }
-#endif
+
+
+/*-*******************************************
+*  Private interfaces
+*********************************************/
+typedef struct {
+    void* buffer;
+    U32*  offsetStart;
+    U32*  offset;
+    BYTE* offCodeStart;
+    BYTE* offCode;
+    BYTE* litStart;
+    BYTE* lit;
+    BYTE* litLengthStart;
+    BYTE* litLength;
+    BYTE* matchLengthStart;
+    BYTE* matchLength;
+    BYTE* dumpsStart;
+    BYTE* dumps;
+    /* opt */
+    U32* matchLengthFreq;
+    U32* litLengthFreq;
+    U32* litFreq;
+    U32* offCodeFreq;
+    U32  matchLengthSum;
+    U32  litLengthSum;
+    U32  litSum;
+    U32  offCodeSum;
+} seqStore_t;
+
+seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx);
+
 
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd_opt.h b/lib/zstd_opt.h
new file mode 100644
index 00000000000..ec9a2a158ba
--- /dev/null
+++ b/lib/zstd_opt.h
@@ -0,0 +1,1125 @@
+/*
+    ZSTD Optimal mode
+    Copyright (C) 2016, Przemyslaw Skibinski, Yann Collet.
+
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+    You can contact the author at :
+       - Zstd source repository : https://www.zstd.net
+*/
+
+/* Note : this file is intended to be included within zstd_compress.c */
+
+/*-  Dependencies  -*/
+#include <stdio.h>  /* for debug */
+
+
+/*-  Local types  -*/
+typedef struct {
+    U32 off;
+    U32 len;
+    U32 back;
+} ZSTD_match_t;
+
+typedef struct {
+    U32 price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep;
+    U32 rep2;
+} ZSTD_optimal_t;
+
+
+/*-  Constants  -*/
+#define ZSTD_OPT_NUM   (1<<12)
+#define ZSTD_FREQ_THRESHOLD (256)
+
+/*-  Debug  -*/
+#define ZSTD_OPT_DEBUG 0     // 1 = tableID=0;    5 = check encoded sequences
+
+#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=1
+    #define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
+    #define ZSTD_LOG_TRY_PRICE(...) printf(__VA_ARGS__)
+#else
+    #define ZSTD_LOG_PARSER(...)
+    #define ZSTD_LOG_ENCODE(...)
+    #define ZSTD_LOG_TRY_PRICE(...)
+#endif
+
+
+FORCE_INLINE U32 ZSTD_getLiteralPriceReal(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals)
+{
+    U32 price, freq, u;
+
+    if (!litLength) return 1;   /* special case */
+
+    /* literals */
+    price = litLength * ZSTD_highbit(seqStorePtr->litSum);
+    for (u=0; u < litLength; u++)
+        price -= ZSTD_highbit(seqStorePtr->litFreq[literals[u]]);
+
+    /* literal Length */
+    price += ((litLength >= MaxLL)*8) + ((litLength >= 255+MaxLL)*16) + ((litLength>=(1<<15))*8);
+    if (litLength >= MaxLL) litLength = MaxLL;
+    freq = seqStorePtr->litLengthFreq[litLength];
+    price += ZSTD_highbit(seqStorePtr->litLengthSum) - ZSTD_highbit(freq);
+
+    return price;
+}
+
+
+FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals)
+{
+    if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD)
+        return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals);
+    /* backup eval */
+    return 1 + (litLength<<3);
+}
+
+
+FORCE_INLINE U32 ZSTD_getMatchPriceReal(seqStore_t* seqStorePtr, U32 offset, U32 matchLength)
+{
+    /* offset */
+    BYTE offCode = offset ? (BYTE)ZSTD_highbit(offset) + 1 : 0;
+    U32 price = ZSTD_highbit(seqStorePtr->offCodeSum) - ZSTD_highbit(seqStorePtr->offCodeFreq[offCode]);
+    price += offCode;
+
+    /* match Length */
+    price += ((matchLength >= MaxML)*8) + ((matchLength >= 255+MaxML)*16) + ((matchLength>=(1<<15))*8);
+    if (matchLength >= MaxML) matchLength = MaxML;
+    price += ZSTD_highbit(seqStorePtr->matchLengthSum) - ZSTD_highbit(seqStorePtr->matchLengthFreq[matchLength]);
+
+    return price;
+}
+
+
+FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+{
+    if (seqStorePtr->litSum > ZSTD_FREQ_THRESHOLD)
+        return ZSTD_getLiteralPriceReal(seqStorePtr, litLength, literals) + ZSTD_getMatchPriceReal(seqStorePtr, offset, matchLength);
+    /* backup eval */
+    return (litLength<<3) + ZSTD_highbit((U32)matchLength+1) + Offbits + ZSTD_highbit((U32)offset+1);
+}
+
+
+MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
+{
+    U32 u;
+
+    /* literals */
+    seqStorePtr->litSum += litLength;
+    for (u=0; u < litLength; u++)
+        seqStorePtr->litFreq[literals[u]]++;
+
+    /* literal Length */
+    seqStorePtr->litLengthSum++;
+    if (litLength >= MaxLL)
+        seqStorePtr->litLengthFreq[MaxLL]++;
+    else
+        seqStorePtr->litLengthFreq[litLength]++;
+
+    /* match offset */
+    seqStorePtr->offCodeSum++;
+    BYTE offCode = (BYTE)ZSTD_highbit(offset) + 1;
+    if (offset==0) offCode=0;
+    seqStorePtr->offCodeFreq[offCode]++;
+
+    /* match Length */
+    seqStorePtr->matchLengthSum++;
+    if (matchLength >= MaxML)
+        seqStorePtr->matchLengthFreq[MaxML]++;
+    else
+        seqStorePtr->matchLengthFreq[matchLength]++;
+}
+
+
+#define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
+    {                                                 \
+        while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+        opt[pos].mlen = mlen_;                         \
+        opt[pos].off = offset_;                        \
+        opt[pos].litlen = litlen_;                     \
+        opt[pos].price = price_;                       \
+        ZSTD_LOG_PARSER("%d: SET price[%d/%d]=%d litlen=%d len=%d off=%d\n", (int)(inr-base), (int)pos, (int)last_pos, opt[pos].price, opt[pos].litlen, opt[pos].mlen, opt[pos].off); \
+    }
+
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+static U32 ZSTD_insertBtAndGetAllMatches (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iend,
+                        U32 nbCompares, const U32 mls,
+                        U32 extDict, ZSTD_match_t* matches, size_t bestLength)
+{
+    const BYTE* const base = zc->base;
+    const U32 current = (U32)(ip-base);
+    const U32 hashLog = zc->params.hashLog;
+    const size_t h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32* const hashTable = zc->hashTable;
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = zc->contentTable;
+    const U32 btLog = zc->params.contentLog - 1;
+    const U32 btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const U32 btLow = btMask >= current ? 0 : current - btMask;
+    const U32 windowLow = zc->lowLimit;
+    U32* smallerPtr = bt + 2*(current&btMask);
+    U32* largerPtr  = bt + 2*(current&btMask) + 1;
+    U32 matchEndIdx = current+8;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+
+    bestLength = MINMATCH-1;
+    hashTable[h] = current;   /* Update Hash Table */
+
+    while (nbCompares-- && (matchIndex > windowLow)) {
+        U32* nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match;
+
+        if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+            match = base + matchIndex;
+            if (match[matchLength] == ip[matchLength])
+                matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = current - matchIndex;
+            matches[mnum].len = (U32)matchLength;
+            matches[mnum].back = 0;
+            mnum++;
+            if (matchLength > ZSTD_OPT_NUM) break;
+            if (ip+matchLength == iend)   /* equal : no way to know if inf or sup */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
+    return mnum;
+}
+
+
+/** Tree updater, providing best match */
+static U32 ZSTD_BtGetAllMatches (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml)
+{
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minml);
+}
+
+
+static U32 ZSTD_BtGetAllMatches_selectMLS (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+{
+    (void)iLowLimit;  /* unused */
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml);
+    case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml);
+    case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml);
+    }
+}
+
+/** Tree updater, providing best match */
+static U32 ZSTD_BtGetAllMatches_extDict (
+                        ZSTD_CCtx* zc,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, U32 minml)
+{
+    if (ip < zc->base + zc->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
+    return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minml);
+}
+
+
+static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+{
+    (void)iLowLimit;
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minml);
+    case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minml);
+    case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minml);
+    }
+}
+
+
+/* ***********************
+*  Hash Chain
+*************************/
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+U32 ZSTD_HcGetAllMatches_generic (
+                        ZSTD_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 mls, const U32 extDict, ZSTD_match_t* matches, size_t minml)
+{
+    U32* const chainTable = zc->contentTable;
+    const U32 chainSize = (1U << zc->params.contentLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + zc->lowLimit;
+    const U32 lowLimit = zc->lowLimit;
+    const U32 current = (U32)(ip-base);
+    const U32 minChain = current > chainSize ? current - chainSize : 0;
+    U32 matchIndex;
+    U32 mnum = 0;
+    const BYTE* match;
+    U32 nbAttempts=maxNbAttempts;
+    minml=MINMATCH-1;
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+
+    while ((matchIndex>lowLimit) && (nbAttempts)) {
+        size_t currentMl=0;
+        int back = 0;
+        nbAttempts--;
+        if ((!extDict) || matchIndex >= dictLimit) {
+            match = base + matchIndex;
+            if (match[minml] == ip[minml]) currentMl = ZSTD_count(ip, match, iHighLimit); if (currentMl>0) {   // faster
+            //if (MEM_read32(match) == MEM_read32(ip)) { currentMl = ZSTD_count(ip+MINMATCH, match+MINMATCH, iHighLimit)+MINMATCH;  // stronger
+                while ((match-back > prefixStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++;
+                currentMl += back;
+            }
+        } else {
+            match = dictBase + matchIndex;
+            if (MEM_read32(match) == MEM_read32(ip)) {   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iHighLimit, dictEnd, prefixStart) + MINMATCH;
+                while ((match-back > dictStart) && (ip-back > iLowLimit) && (ip[-back-1] == match[-back-1])) back++;   /* backward match extension */
+                currentMl += back;
+        }   }
+
+        /* save best solution */
+        if (currentMl > minml) {
+            minml = currentMl;
+            matches[mnum].off = current - matchIndex;
+            matches[mnum].len = (U32)currentMl;
+            matches[mnum].back = back;
+            mnum++;
+            if (currentMl > ZSTD_OPT_NUM) break;
+            if (ip+currentMl == iHighLimit) break; /* best possible, and avoid read overflow*/
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    return mnum;
+}
+
+
+static U32 ZSTD_HcGetAllMatches_selectMLS (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 0, matches, minml);
+    case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 0, matches, minml);
+    case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 0, matches, minml);
+    }
+}
+
+static U32 ZSTD_HcGetAllMatches_selectMLS_extDict (
+                        ZSTD_CCtx* zc,
+                        const BYTE* ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 4, 1, matches, minml);
+    case 5 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 5, 1, matches, minml);
+    case 6 : return ZSTD_HcGetAllMatches_generic(zc, ip, iLowLimit, iHighLimit, maxNbAttempts, 6, 1, matches, minml);
+    }
+}
+
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+FORCE_INLINE
+void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
+                                    const void* src, size_t srcSize,
+                                    const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base + ctx->dictLimit;
+
+    U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE;
+    const U32 maxSearches = 1U << ctx->params.searchLog;
+    const U32 mls = ctx->params.searchLength;
+
+    typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit,
+                        U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml);
+    getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS : ZSTD_HcGetAllMatches_selectMLS;
+
+    ZSTD_optimal_t opt[ZSTD_OPT_NUM+4];
+    ZSTD_match_t matches[ZSTD_OPT_NUM+1];
+    const BYTE* inr;
+    U32 skip_num, cur, cur2, match_num, last_pos, litlen, price;
+
+    const U32 sufficient_len = ctx->params.targetLength;
+    const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt);
+
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    ZSTD_resetFreqs(seqStorePtr);
+    if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 u;
+        U32 mlen=0;
+        U32 best_mlen=0;
+        U32 best_off=0;
+        memset(opt, 0, sizeof(ZSTD_optimal_t));
+        last_pos = 0;
+        inr = ip;
+        opt[0].litlen = (U32)(ip - anchor);
+
+        /* check repCode */
+        if (MEM_read32(ip+1) == MEM_read32(ip+1 - rep_1)) {
+            /* repcode : we take it */
+            mlen = (U32)ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-rep_1, iend) + MINMATCH;
+
+            ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen);
+            if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+                ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
+                goto _storeSequence;
+            }
+
+            litlen = opt[0].litlen + 1;
+            do {
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
+                if (mlen + 1 > last_pos || price < opt[mlen + 1].price)
+                    SET_PRICE(mlen + 1, mlen, 0, litlen, price);   /* note : macro modifies last_pos */
+                mlen--;
+            } while (mlen >= MINMATCH);
+        }
+
+        best_mlen = (last_pos) ? last_pos : MINMATCH;
+
+        if (faster_get_matches && last_pos)
+           match_num = 0;
+        else
+           match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen); /* first search (depth 0) */
+
+        ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
+        if (!last_pos && !match_num) { ip++; continue; }
+
+        opt[0].rep = rep_1;
+        opt[0].rep2 = rep_2;
+        opt[0].mlen = 1;
+
+        if (match_num && matches[match_num-1].len > sufficient_len) {
+            best_mlen = matches[match_num-1].len;
+            best_off = matches[match_num-1].off;
+            cur = 0;
+            last_pos = 1;
+            goto _storeSequence;
+        }
+
+       // set prices using matches at position = 0
+       for (u = 0; u < match_num; u++) {
+           mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
+           best_mlen = (matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM;
+           ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
+           litlen = opt[0].litlen;
+           while (mlen <= best_mlen) {
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                if (mlen > last_pos || price < opt[mlen].price)
+                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
+                mlen++;
+        }  }
+
+        if (last_pos < MINMATCH) { ip++; continue; }
+
+         /* check further positions */
+        for (skip_num = 0, cur = 1; cur <= last_pos; cur++) {
+           size_t cur_rep;
+           inr = ip + cur;
+
+           if (opt[cur-1].mlen == 1) {
+                litlen = opt[cur-1].litlen + 1;
+                if (cur > litlen) {
+                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
+                } else
+                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+           } else {
+                litlen = 1;
+                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
+                ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1));
+           }
+
+           ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price);
+
+           if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
+                SET_PRICE(cur, 1, 0, litlen, price);
+
+           if (cur == last_pos) break;
+
+           if (inr > ilimit)  /* last match must start at a minimum distance of 8 from oend */
+               continue;
+
+            mlen = opt[cur].mlen;
+
+            if (opt[cur-mlen].off) {
+                opt[cur].rep2 = opt[cur-mlen].rep;
+                opt[cur].rep = opt[cur-mlen].off;
+                ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+            } else {
+                if (cur!=mlen && opt[cur-mlen].litlen == 0) {
+                    opt[cur].rep2 = opt[cur-mlen].rep;
+                    opt[cur].rep = opt[cur-mlen].rep2;
+                    ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+                } else {
+                    opt[cur].rep2 = opt[cur-mlen].rep2;
+                    opt[cur].rep = opt[cur-mlen].rep;
+                    ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+            }   }
+
+           ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+
+           best_mlen = 0;
+
+           if (!opt[cur].off && opt[cur].mlen != 1) {
+               cur_rep = opt[cur].rep2;
+               ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+           } else {
+               cur_rep = opt[cur].rep;
+               ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+           }
+
+           if (MEM_read32(inr) == MEM_read32(inr - cur_rep)) {  // check rep
+               mlen = (U32)ZSTD_count(inr+MINMATCH, inr+MINMATCH - cur_rep, iend) + MINMATCH;
+               ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off);
+
+               if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
+                    best_mlen = mlen;
+                    best_off = 0;
+                    ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
+                    last_pos = cur + 1;
+                    goto _storeSequence;
+               }
+
+               if (opt[cur].mlen == 1) {
+                    litlen = opt[cur].litlen;
+                    if (cur > litlen) {
+                        price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH);
+                        ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
+                    } else
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
+                } else {
+                    litlen = 0;
+                    price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH));
+                }
+
+                best_mlen = mlen;
+                if (faster_get_matches) skip_num = best_mlen;
+                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
+
+                do {
+                    if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
+                        SET_PRICE(cur + mlen, mlen, 0, litlen, price);
+                    mlen--;
+                } while (mlen >= MINMATCH);
+            }
+
+            if (faster_get_matches && skip_num > 0) { skip_num--; continue; }
+
+            best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
+
+            match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen);
+            ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
+
+            if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+                cur -= matches[match_num-1].back;
+                best_mlen = matches[match_num-1].len;
+                best_off = matches[match_num-1].off;
+                last_pos = cur + 1;
+                goto _storeSequence;
+            }
+
+            /* set prices using matches at position = cur */
+            for (u = 0; u < match_num; u++) {
+                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
+                cur2 = cur - matches[u].back;
+                best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2;
+
+                ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos);
+                if (mlen < matches[u].back + 1)
+                    mlen = matches[u].back + 1;
+
+                while (mlen <= best_mlen) {
+                    if (opt[cur2].mlen == 1) {
+                        litlen = opt[cur2].litlen;
+                        if (cur2 > litlen)
+                            price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH);
+                        else
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                    } else {
+                        litlen = 0;
+                        price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                    }
+
+                    ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price);
+
+                    if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price))
+                        SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price);
+
+                    mlen++;
+        }   }   }   //  for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
+
+        best_mlen = opt[last_pos].mlen;
+        best_off = opt[last_pos].off;
+        cur = last_pos - best_mlen;
+        // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur);
+
+        /* store sequence */
+_storeSequence:   /* cur, last_pos, best_mlen, best_off have to be set */
+        for (u = 1; u <= last_pos; u++)
+            ZSTD_LOG_PARSER("%d: price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep);
+
+        opt[0].mlen = 1;
+        U32 offset;
+
+        while (1) {
+            mlen = opt[cur].mlen;
+            ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen);
+            offset = opt[cur].off;
+            opt[cur].mlen = best_mlen;
+            opt[cur].off = best_off;
+            best_mlen = mlen;
+            best_off = offset;
+            if (mlen > cur) break;
+            cur -= mlen;
+        }
+
+        for (u = 0; u <= last_pos;) {
+            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+            u += opt[u].mlen;
+        }
+
+        for (cur=0; cur < last_pos; ) {
+            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+            mlen = opt[cur].mlen;
+            if (mlen == 1) { ip++; cur++; continue; }
+            offset = opt[cur].off;
+            cur += mlen;
+
+            U32 litLength = (U32)(ip - anchor);
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+
+            if (offset) {
+                rep_2 = rep_1;
+                rep_1 = offset;
+            } else {
+                if (litLength == 0) {
+                    best_off = rep_2;
+                    rep_2 = rep_1;
+                    rep_1 = best_off;
+            }   }
+
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+
+#if ZSTD_OPT_DEBUG >= 5
+            int ml2;
+            if (offset)
+                ml2 = ZSTD_count(ip, ip-offset, iend);
+            else
+                ml2 = ZSTD_count(ip, ip-rep_1, iend);
+            if (ml2 < mlen && ml2 < MINMATCH) {
+                printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
+            if (ip < anchor) {
+                printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (ip - offset < ctx->base) {
+                printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if ((int)offset >= (1 << ctx->params.windowLog)) {
+                printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (mlen < MINMATCH) {
+                printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (ip + mlen > iend) {
+                printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+#endif
+
+            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            anchor = ip = ip + mlen;
+        }   /* for (cur=0; cur < last_pos; ) */
+
+        /* check immediate repcode */
+        while ( (anchor <= ilimit)
+             && (MEM_read32(anchor) == MEM_read32(anchor - rep_2)) ) {
+            /* store sequence */
+            best_mlen = (U32)ZSTD_count(anchor+MINMATCH, anchor+MINMATCH-rep_2, iend);
+            best_off = rep_2;
+            rep_2 = rep_1;
+            rep_1 = best_off;
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2);
+            ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, best_mlen);
+            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, best_mlen);
+            anchor += best_mlen+MINMATCH;
+            continue;   /* faster when present ... (?) */
+        }
+        if (anchor > ip) ip = anchor;
+    }
+
+    {   /* Last Literals */
+        size_t lastLLSize = iend - anchor;
+        ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)lastLLSize);
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
+
+
+FORCE_INLINE
+void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
+                                     const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 depth)
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base;
+    const U32 dictLimit = ctx->dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+
+    U32 rep_2=REPCODE_STARTVALUE, rep_1=REPCODE_STARTVALUE;
+    const U32 maxSearches = 1U << ctx->params.searchLog;
+    const U32 mls = ctx->params.searchLength;
+
+    typedef U32 (*getAllMatches_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLowLimit, const BYTE* iHighLimit,
+                        U32 maxNbAttempts, U32 matchLengthSearch, ZSTD_match_t* matches, U32 minml);
+    getAllMatches_f getAllMatches = searchMethod ? ZSTD_BtGetAllMatches_selectMLS_extDict : ZSTD_HcGetAllMatches_selectMLS_extDict;
+
+    ZSTD_optimal_t opt[ZSTD_OPT_NUM+4];
+    ZSTD_match_t matches[ZSTD_OPT_NUM+1];
+    const BYTE* inr;
+    U32 skip_num, cur, cur2, match_num, last_pos, litlen, price;
+
+    const U32 sufficient_len = ctx->params.targetLength;
+    const U32 faster_get_matches = (ctx->params.strategy == ZSTD_opt);
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    ZSTD_resetFreqs(seqStorePtr);
+    if ((ip - prefixStart) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 u, offset, best_off=0;
+        U32 mlen=0, best_mlen=0;
+        U32 current = (U32)(ip-base);
+        memset(opt, 0, sizeof(ZSTD_optimal_t));
+        last_pos = 0;
+        inr = ip;
+        opt[0].litlen = (U32)(ip - anchor);
+
+        /* check repCode */
+        {
+            const U32 repIndex = (U32)(current+1 - rep_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+               && (MEM_read32(ip+1) == MEM_read32(repMatch)) ) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                mlen = (U32)ZSTD_count_2segments(ip+1+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+
+                ZSTD_LOG_PARSER("%d: start try REP rep=%d mlen=%d\n", (int)(ip-base), (int)rep_1, (int)mlen);
+                if (depth==0 || mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
+                    ip+=1; best_mlen = mlen; best_off = 0; cur = 0; last_pos = 1;
+                    goto _storeSequence;
+                }
+
+                litlen = opt[0].litlen + 1;
+                do {
+                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
+                    if (mlen + 1 > last_pos || price < opt[mlen + 1].price)
+                        SET_PRICE(mlen + 1, mlen, 0, litlen, price);
+                    mlen--;
+                } while (mlen >= MINMATCH);
+        }   }
+
+       best_mlen = (last_pos) ? last_pos : MINMATCH;
+
+       if (faster_get_matches && last_pos)
+           match_num = 0;
+       else
+           match_num = getAllMatches(ctx, ip, ip, iend, maxSearches, mls, matches, best_mlen);  /* first search (depth 0) */
+
+       ZSTD_LOG_PARSER("%d: match_num=%d last_pos=%d\n", (int)(ip-base), match_num, last_pos);
+       if (!last_pos && !match_num) { ip++; continue; }
+
+       opt[0].rep = rep_1;
+       opt[0].rep2 = rep_2;
+       opt[0].mlen = 1;
+
+       if (match_num && matches[match_num-1].len > sufficient_len) {
+            best_mlen = matches[match_num-1].len;
+            best_off = matches[match_num-1].off;
+            cur = 0;
+            last_pos = 1;
+            goto _storeSequence;
+       }
+
+        // set prices using matches at position = 0
+        for (u = 0; u < match_num; u++) {
+            mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
+            best_mlen = (matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM;
+            ZSTD_LOG_PARSER("%d: start Found mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(ip-base), matches[u].len, matches[u].off, (int)best_mlen, (int)last_pos);
+            litlen = opt[0].litlen;
+            while (mlen <= best_mlen) {
+                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                if (mlen > last_pos || price < opt[mlen].price)
+                    SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
+                mlen++;
+        }   }
+
+        if (last_pos < MINMATCH) {
+            // ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
+            ip++; continue;
+        }
+
+        /* check further positions */
+        for (skip_num = 0, cur = 1; cur <= last_pos; cur++) {
+           size_t cur_rep;
+           inr = ip + cur;
+
+           if (opt[cur-1].mlen == 1) {
+                litlen = opt[cur-1].litlen + 1;
+                if (cur > litlen) {
+                    price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY1 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
+                } else
+                    price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
+           } else {
+                litlen = 1;
+                price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
+                ZSTD_LOG_TRY_PRICE("%d: TRY3 price=%d cur=%d litlen=%d litonly=%d\n", (int)(inr-base), price, cur, litlen, (int)ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1));
+           }
+
+           ZSTD_LOG_TRY_PRICE("%d: TRY4 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur, opt[cur].price);
+
+           if (cur > last_pos || price <= opt[cur].price) // || ((price == opt[cur].price) && (opt[cur-1].mlen == 1) && (cur != litlen)))
+                SET_PRICE(cur, 1, 0, litlen, price);
+
+           if (cur == last_pos) break;
+
+           if (inr > ilimit) // last match must start at a minimum distance of 8 from oend
+               continue;
+
+            mlen = opt[cur].mlen;
+
+            if (opt[cur-mlen].off) {
+                opt[cur].rep2 = opt[cur-mlen].rep;
+                opt[cur].rep = opt[cur-mlen].off;
+                ZSTD_LOG_PARSER("%d: COPYREP1 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+            } else {
+                if (cur!=mlen && opt[cur-mlen].litlen == 0) {
+                    opt[cur].rep2 = opt[cur-mlen].rep;
+                    opt[cur].rep = opt[cur-mlen].rep2;
+                    ZSTD_LOG_PARSER("%d: COPYREP2 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+                } else {
+                    opt[cur].rep2 = opt[cur-mlen].rep2;
+                    opt[cur].rep = opt[cur-mlen].rep;
+                    ZSTD_LOG_PARSER("%d: COPYREP3 cur=%d mlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, mlen, opt[cur].rep, opt[cur].rep2);
+            }   }
+
+           ZSTD_LOG_PARSER("%d: CURRENT price[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(inr-base), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+
+           best_mlen = 0;
+
+           if (!opt[cur].off && opt[cur].mlen != 1) {
+               cur_rep = opt[cur].rep2;
+               ZSTD_LOG_PARSER("%d: try REP2 rep2=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+           } else {
+               cur_rep = opt[cur].rep;
+               ZSTD_LOG_PARSER("%d: try REP1 rep=%u mlen=%u\n", (int)(inr-base), (U32)cur_rep, mlen);
+           }
+
+           const U32 repIndex = (U32)(current+cur - cur_rep);
+           const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+           const BYTE* const repMatch = repBase + repIndex;
+           if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+              &&(MEM_read32(inr) == MEM_read32(repMatch)) ) {
+                /* repcode detected */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                mlen = (U32)ZSTD_count_2segments(inr+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d rep=%d opt[%d].off=%d\n", (int)(inr-base), mlen, 0, opt[cur].rep, cur, opt[cur].off);
+
+                if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
+                    best_mlen = mlen;
+                    best_off = 0;
+                    ZSTD_LOG_PARSER("%d: REP sufficient_len=%d best_mlen=%d best_off=%d last_pos=%d\n", (int)(inr-base), sufficient_len, best_mlen, best_off, last_pos);
+                    last_pos = cur + 1;
+                    goto _storeSequence;
+                }
+
+                if (opt[cur].mlen == 1) {
+                    litlen = opt[cur].litlen;
+                    if (cur > litlen) {
+                        price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, 0, mlen - MINMATCH);
+                        ZSTD_LOG_TRY_PRICE("%d: TRY5 opt[%d].price=%d price=%d cur=%d litlen=%d\n", (int)(inr-base), cur - litlen, opt[cur - litlen].price, price, cur, litlen);
+                    } else
+                        price = ZSTD_getPrice(seqStorePtr, litlen, anchor, 0, mlen - MINMATCH);
+                } else {
+                    litlen = 0;
+                    price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY7 price=%d cur=%d litlen=0 getprice=%d\n", (int)(inr-base), price, cur, (int)ZSTD_getPrice(seqStorePtr, 0, NULL, 0, mlen - MINMATCH));
+                }
+
+                best_mlen = mlen;
+                if (faster_get_matches) skip_num = best_mlen;
+
+                ZSTD_LOG_PARSER("%d: Found REP mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), mlen, 0, price, litlen, cur - litlen, opt[cur - litlen].price);
+
+                do {
+                    if (cur + mlen > last_pos || price <= opt[cur + mlen].price) // || ((price == opt[cur + mlen].price) && (opt[cur].mlen == 1) && (cur != litlen))) // at equal price prefer REP instead of MATCH
+                        SET_PRICE(cur + mlen, mlen, 0, litlen, price);
+                    mlen--;
+                } while (mlen >= MINMATCH);
+            }
+
+            if (faster_get_matches && skip_num > 0) { skip_num--; continue; }
+
+            best_mlen = (best_mlen > MINMATCH) ? best_mlen : MINMATCH;
+
+            match_num = getAllMatches(ctx, inr, ip, iend, maxSearches, mls, matches, best_mlen);
+            ZSTD_LOG_PARSER("%d: ZSTD_GetAllMatches match_num=%d\n", (int)(inr-base), match_num);
+
+            if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
+                cur -= matches[match_num-1].back;
+                best_mlen = matches[match_num-1].len;
+                best_off = matches[match_num-1].off;
+                last_pos = cur + 1;
+                goto _storeSequence;
+            }
+
+            // set prices using matches at position = cur
+            for (u = 0; u < match_num; u++) {
+                mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
+                cur2 = cur - matches[u].back;
+                best_mlen = (cur2 + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur2;
+
+                ZSTD_LOG_PARSER("%d: Found1 cur=%d cur2=%d mlen=%d off=%d best_mlen=%d last_pos=%d\n", (int)(inr-base), cur, cur2, matches[u].len, matches[u].off, best_mlen, last_pos);
+                if (mlen < matches[u].back + 1)
+                    mlen = matches[u].back + 1;
+
+                while (mlen <= best_mlen) {
+                    if (opt[cur2].mlen == 1) {
+                        litlen = opt[cur2].litlen;
+                        if (cur2 > litlen)
+                            price = opt[cur2 - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur2-litlen, matches[u].off, mlen - MINMATCH);
+                        else
+                            price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off, mlen - MINMATCH);
+                    } else {
+                        litlen = 0;
+                        price = opt[cur2].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off, mlen - MINMATCH);
+                    }
+
+                    ZSTD_LOG_PARSER("%d: Found2 pred=%d mlen=%d best_mlen=%d off=%d price=%d litlen=%d price[%d]=%d\n", (int)(inr-base), matches[u].back, mlen, best_mlen, matches[u].off, price, litlen, cur - litlen, opt[cur - litlen].price);
+                    ZSTD_LOG_TRY_PRICE("%d: TRY8 price=%d opt[%d].price=%d\n", (int)(inr-base), price, cur2 + mlen, opt[cur2 + mlen].price);
+
+                    if (cur2 + mlen > last_pos || (price < opt[cur2 + mlen].price))
+                        SET_PRICE(cur2 + mlen, mlen, matches[u].off, litlen, price);
+
+                    mlen++;
+        }   }   }   //  for (skip_num = 0, cur = 1; cur <= last_pos; cur++)
+
+        best_mlen = opt[last_pos].mlen;
+        best_off = opt[last_pos].off;
+        cur = last_pos - best_mlen;
+        // printf("%d: start=%d best_mlen=%d best_off=%d cur=%d\n", (int)(ip - base), (int)(start - ip), (int)best_mlen, (int)best_off, cur);
+
+        /* store sequence */
+_storeSequence: // cur, last_pos, best_mlen, best_off have to be set
+        for (u = 1; u <= last_pos; u++)
+            ZSTD_LOG_PARSER("%d: price[%u/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+        ZSTD_LOG_PARSER("%d: cur=%d/%d best_mlen=%d best_off=%d rep=%d\n", (int)(ip-base+cur), (int)cur, (int)last_pos, (int)best_mlen, (int)best_off, opt[cur].rep);
+
+        opt[0].mlen = 1;
+
+        while (1) {
+            mlen = opt[cur].mlen;
+            ZSTD_LOG_PARSER("%d: cur=%d mlen=%d\n", (int)(ip-base), cur, mlen);
+            offset = opt[cur].off;
+            opt[cur].mlen = best_mlen;
+            opt[cur].off = best_off;
+            best_mlen = mlen;
+            best_off = offset;
+            if (mlen > cur) break;
+            cur -= mlen;
+        }
+
+        for (u = 0; u <= last_pos; ) {
+            ZSTD_LOG_PARSER("%d: price2[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+u), u, last_pos, opt[u].price, opt[u].off, opt[u].mlen, opt[u].litlen, opt[u].rep, opt[u].rep2);
+            u += opt[u].mlen;
+        }
+
+        for (cur=0; cur < last_pos; ) {
+            U32 litLength;
+            ZSTD_LOG_PARSER("%d: price3[%d/%d]=%d off=%d mlen=%d litlen=%d rep=%d rep2=%d\n", (int)(ip-base+cur), cur, last_pos, opt[cur].price, opt[cur].off, opt[cur].mlen, opt[cur].litlen, opt[cur].rep, opt[cur].rep2);
+            mlen = opt[cur].mlen;
+            if (mlen == 1) { ip++; cur++; continue; }
+            offset = opt[cur].off;
+            cur += mlen;
+
+            litLength = (U32)(ip - anchor);
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE1 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+
+            if (offset) {
+                rep_2 = rep_1;
+                rep_1 = offset;
+            } else {
+                if (litLength == 0) {
+                    best_off = rep_2;
+                    rep_2 = rep_1;
+                    rep_1 = best_off;
+            }   }
+
+            ZSTD_LOG_ENCODE("%d/%d: ENCODE2 literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(ip-base), (int)(iend-base), (int)(litLength), (int)mlen, (int)(offset), (int)rep_1, (int)rep_2);
+
+#if ZSTD_OPT_DEBUG >= 5
+            int ml2;
+            if (offset)
+                ml2 = ZSTD_count(ip, ip-offset, iend);
+            else
+                ml2 = ZSTD_count(ip, ip-rep_1, iend);
+            if (ml2 < mlen && ml2 < MINMATCH) {
+                printf("%d: ERROR iend=%d mlen=%d offset=%d ml2=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset, (int)ml2); exit(0); }
+            if (ip < anchor) {
+                printf("%d: ERROR ip < anchor iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (ip - offset < ctx->base) {
+                printf("%d: ERROR ip - offset < base iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if ((int)offset >= (1 << ctx->params.windowLog)) {
+                printf("%d: offset >= (1 << params.windowLog) iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (mlen < MINMATCH) {
+                printf("%d: ERROR mlen < MINMATCH iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+            if (ip + mlen > iend) {
+                printf("%d: ERROR ip + mlen >= iend iend=%d mlen=%d offset=%d\n", (int)(ip - base), (int)(iend - ip), (int)mlen, (int)offset); exit(0); }
+#endif
+
+            ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
+            anchor = ip = ip + mlen;
+        }
+
+        /* check immediate repcode */
+        while (anchor <= ilimit) {
+            const U32 repIndex = (U32)((anchor-base) - rep_2);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3)   /* intentional overflow */
+               && (MEM_read32(anchor) == MEM_read32(repMatch)) ) {
+                /* repcode detected, let's take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                mlen = (U32)ZSTD_count_2segments(anchor+MINMATCH, repMatch+MINMATCH, iend, repEnd, prefixStart) + MINMATCH;
+                offset = rep_2; rep_2 = rep_1; rep_1 = offset;   /* swap offset history */
+                ZSTD_LOG_ENCODE("%d/%d: ENCODE REP literals=%d mlen=%d off=%d rep1=%d rep2=%d\n", (int)(anchor-base), (int)(iend-base), (int)(0), (int)best_mlen, (int)(0), (int)rep_1, (int)rep_2);
+                ZSTD_updatePrice(seqStorePtr, 0, anchor, 0, mlen-MINMATCH);
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, mlen-MINMATCH);
+                anchor += mlen;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+        }
+        if (anchor > ip) ip = anchor;
+    }
+
+    {   /* Last Literals */
+        size_t lastLLSize = iend - anchor;
+        ZSTD_LOG_ENCODE("%d: lastLLSize literals=%u\n", (int)(ip-base), (U32)(lastLLSize));
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+}
diff --git a/lib/zstd_static.h b/lib/zstd_static.h
index 7ceed8fe1a7..61216535b36 100644
--- a/lib/zstd_static.h
+++ b/lib/zstd_static.h
@@ -27,14 +27,14 @@
     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
+    - zstd homepage : http://www.zstd.net
 */
 #ifndef ZSTD_STATIC_H
 #define ZSTD_STATIC_H
 
-/* The objects defined into this file shall be considered experimental.
- * They are not considered stable, as their prototype may change in the future.
- * You can use them for tests, provide feedback, or if you can endure risks of future changes.
+/* The prototypes defined within this file are considered experimental.
+ * They should not be used in the context DLL as they may change in the future.
+ * Prefer static linking if you need them, to control breaking version changes issues.
  */
 
 #if defined (__cplusplus)
@@ -48,6 +48,12 @@ extern "C" {
 #include "mem.h"
 
 
+/*-*************************************
+*  Constants
+***************************************/
+#define ZSTD_MAGICNUMBER 0xFD2FB525   /* v0.5 */
+
+
 /*-*************************************
 *  Types
 ***************************************/
@@ -57,31 +63,33 @@ extern "C" {
 #define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
 #define ZSTD_CONTENTLOG_MIN 4
 #define ZSTD_HASHLOG_MAX 28
-#define ZSTD_HASHLOG_MIN 4
+#define ZSTD_HASHLOG_MIN 12
 #define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
 #define ZSTD_SEARCHLOG_MIN 1
 #define ZSTD_SEARCHLENGTH_MAX 7
 #define ZSTD_SEARCHLENGTH_MIN 4
+#define ZSTD_TARGETLENGTH_MIN 4
+#define ZSTD_TARGETLENGTH_MAX 999
 
-/** from faster to stronger */
-typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
+/* from faster to stronger */
+typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_opt, ZSTD_btopt } ZSTD_strategy;
 
 typedef struct
 {
     U64 srcSize;       /* optional : tells how much bytes are present in the frame. Use 0 if not known. */
     U32 windowLog;     /* largest match distance : larger == more compression, more memory needed during decompression */
     U32 contentLog;    /* full search segment : larger == more compression, slower, more memory (useless for fast) */
-    U32 hashLog;       /* dispatch table : larger == more memory, faster */
+    U32 hashLog;       /* dispatch table : larger == faster, more memory */
     U32 searchLog;     /* nb of searches : larger == more compression, slower */
-    U32 searchLength;  /* size of matches : larger == faster decompression, sometimes less compression */
+    U32 searchLength;  /* match length searched : larger == faster decompression, sometimes less compression */
+    U32 targetLength;  /* acceptable match size for optimal parser (only) : larger == more compression, slower */
     ZSTD_strategy strategy;
 } ZSTD_parameters;
 
 
-/* *************************************
+/*-*************************************
 *  Advanced functions
 ***************************************/
-#define ZSTD_MAX_CLEVEL 20
 ZSTDLIB_API unsigned ZSTD_maxCLevel (void);
 
 /*! ZSTD_getParams() :
@@ -203,7 +211,7 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ds
 *  Block functions
 ****************************************/
 /*! Block functions produce and decode raw zstd blocks, without frame metadata.
-    User will have to save and regenerate necessary information to regenerate data, such as block sizes.
+    User will have to take in charge required information to regenerate data, such as block sizes.
 
     A few rules to respect :
     - Uncompressed block size must be <= 128 KB
@@ -224,13 +232,13 @@ size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, cons
 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
 
-/* *************************************
+/*-*************************************
 *  Error management
 ***************************************/
 #include "error_public.h"
 /*! ZSTD_getErrorCode() :
     convert a `size_t` function result into a `ZSTD_error_code` enum type,
-    which can be used to compare directly with enum list within "error_public.h" */
+    which can be used to compare directly with enum list published into "error_public.h" */
 ZSTD_ErrorCode ZSTD_getError(size_t code);
 
 
diff --git a/programs/Makefile b/programs/Makefile
index 77b4cb46bb2..4a650c48854 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -1,6 +1,6 @@
 # ##########################################################################
 # ZSTD programs - Makefile
-# Copyright (C) Yann Collet 2015
+# Copyright (C) Yann Collet 2015-2016
 #
 # GPL v2 License
 #
@@ -19,13 +19,14 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 #
 # You can contact the author at :
-#  - ZSTD source repository : http://code.google.com/p/zstd/
-#  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+#  - zstd homepage : http://www.zstd.net/
 # ##########################################################################
 # zstd : Command Line Utility, supporting gzip-like arguments
 # datagen : Synthetic and parametrable data generator, for tests
 # fuzzer  : Test tool, to check zstd integrity on target platform
 # fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode
+# zbufftest  : Test tool, to check ZBUFF integrity on target platform
+# zbufftest32: Same as zbufftest, but forced to compile in 32-bits mode
 # fullbench  : Precisely measure speed for each zstd inner function
 # fullbench32: Same as fullbench, but forced to compile in 32-bits mode
 # ##########################################################################
@@ -52,15 +53,15 @@ BINDIR  = $(PREFIX)/bin
 MANDIR  = $(PREFIX)/share/man/man1
 ZSTDDIR = ../lib
 
-ZSTD_FILES := $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/huff0.c
-ZSTD_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c $(ZSTDDIR)/legacy/zstd_v04.c
+ZSTD_FILES := $(ZSTDDIR)/huff0.c $(ZSTDDIR)/fse.c $(ZSTDDIR)/zstd_compress.c $(ZSTDDIR)/zstd_decompress.c
 
 ifeq ($(ZSTD_LEGACY_SUPPORT), 0)
 CPPFLAGS  += -DZSTD_LEGACY_SUPPORT=0
+ZSTD_FILES_LEGACY:=
 else
-ZSTD_FILES+= $(ZSTD_LEGACY)
-CPPFLAGS  += -I../lib/legacy -I./legacy -DZSTD_LEGACY_SUPPORT=1
-ZSTD_FILEIO_LEGACY = legacy/fileio_legacy.c
+ZSTD_LEGACY_SUPPORT:=1
+CPPFLAGS  += -I../lib/legacy -I./legacy
+ZSTD_FILES_LEGACY:= $(ZSTDDIR)/legacy/zstd_v01.c $(ZSTDDIR)/legacy/zstd_v02.c $(ZSTDDIR)/legacy/zstd_v03.c $(ZSTDDIR)/legacy/zstd_v04.c legacy/fileio_legacy.c
 endif
 
 
@@ -75,6 +76,7 @@ endif
 
 ZBUFFTEST = -T2mn
 FUZZERTEST= -T5mn
+ZSTDRTTEST= --test-large-data
 
 .PHONY: default all clean install uninstall test test32 test-all
 
@@ -82,29 +84,33 @@ default: zstd
 
 all: zstd zstd32 fullbench fullbench32 fuzzer fuzzer32 zbufftest zbufftest32 paramgrill datagen
 
-zstd  : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
-        zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY) bench.c xxhash.c datagen.c 
-	$(CC)      $(FLAGS) $^ -o $@$(EXT)
+zstd  : $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
+        zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c
+	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
 
-zstd32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
-        zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY) bench.c xxhash.c datagen.c 
-	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
+zstd32: $(ZSTD_FILES) $(ZSTD_FILES_LEGACY) $(ZSTDDIR)/zbuff.c $(ZSTDDIR)/zdict.c $(ZSTDDIR)/divsufsort.c \
+        zstdcli.c fileio.c bench.c xxhash.c datagen.c dibio.c 
+	$(CC) -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -o $@$(EXT)
 
 zstd_nolegacy :
 	$(MAKE) zstd ZSTD_LEGACY_SUPPORT=0
 
 zstd-pgo : MOREFLAGS = -fprofile-generate
 zstd-pgo : clean zstd
+	./zstd -b19i1 $(PROFILE_WITH)
+	./zstd -b16i1 $(PROFILE_WITH)
+	./zstd -b9i2 $(PROFILE_WITH)
 	./zstd -b $(PROFILE_WITH)
+	./zstd -b7i2 $(PROFILE_WITH)
+	./zstd -b5 $(PROFILE_WITH)
 	rm zstd
 	$(MAKE) zstd MOREFLAGS=-fprofile-use
 
-zstd-noBench: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
-        zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY)
-	$(CC)      $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT)
+zstd-frugal: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c zstdcli.c fileio.c
+	$(CC)      $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_LEGACY_SUPPORT=0 $^ -o zstd$(EXT)
 
-zstd-frugal: clean 
-	$(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0 
+zstd-small: clean 
+	CFLAGS="-Os -s" $(MAKE) zstd-frugal 
 
 fullbench  : $(ZSTD_FILES) \
         datagen.c fullbench.c
@@ -122,11 +128,11 @@ fuzzer32: $(ZSTD_FILES) \
       datagen.c xxhash.c fuzzer.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
-zbufftest  : $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+zbufftest  : $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
       datagen.c xxhash.c zbufftest.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
-zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+zbufftest32: $(ZSTD_FILES) $(ZSTDDIR)/zbuff.c \
       datagen.c xxhash.c zbufftest.c
 	$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
 
@@ -138,7 +144,7 @@ datagen : datagen.c datagencli.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
 
 clean:
-	@rm -f core *.o tmp* result* *.gcda \
+	@rm -f core *.o tmp* result* *.gcda dictionary *.zst \
         zstd$(EXT) zstd32$(EXT) \
         fullbench$(EXT) fullbench32$(EXT) \
         fuzzer$(EXT) fuzzer32$(EXT) zbufftest$(EXT) zbufftest32$(EXT) \
@@ -178,7 +184,7 @@ test32: test-zstd32 test-fullbench32 test-fuzzer32 test-zbuff32
 test-all: test test32 valgrindTest
 
 zstd-playTests: datagen
-	ZSTD=$(ZSTD) ./playTests.sh --test-large-data
+	ZSTD=$(ZSTD) ./playTests.sh $(ZSTDRTTEST)
 
 test-zstd: ZSTD = ./zstd
 test-zstd: zstd zstd-playTests
@@ -213,12 +219,12 @@ valgrindTest: zstd datagen fuzzer fullbench zbufftest
 	@echo "\n ---- valgrind tests : memory analyzer ----"
 	valgrind --leak-check=yes --error-exitcode=1 ./datagen -g50M > $(VOID)
 	./datagen -g16KB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
 	./datagen -g2930KB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp tmp2
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 $(VOID)
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -5 -vf tmp -o tmp2
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vdf tmp2 -o $(VOID)
 	./datagen -g64MB > tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
+	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp -o $(VOID)
 	@rm tmp
 	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1
 	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1
diff --git a/programs/dibio.c b/programs/dibio.c
new file mode 100644
index 00000000000..646fe2c60d3
--- /dev/null
+++ b/programs/dibio.c
@@ -0,0 +1,277 @@
+/*
+    dibio - I/O API for dictionary builder
+    Copyright (C) Yann Collet 2016
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+/*-**************************************
+*  Compiler Options
+****************************************/
+/* Disable some Visual warning messages */
+#ifdef _MSC_VER
+#  define _CRT_SECURE_NO_WARNINGS                /* fopen */
+#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
+#endif
+
+/* Unix Large Files support (>4GB) */
+#define _FILE_OFFSET_BITS 64
+#if (defined(__sun__) && (!defined(__LP64__)))   /* Sun Solaris 32-bits requires specific definitions */
+#  define _LARGEFILE_SOURCE
+#elif ! defined(__LP64__)                        /* No point defining Large file for 64 bit */
+#  define _LARGEFILE64_SOURCE
+#endif
+
+
+/*-*************************************
+*  Includes
+***************************************/
+#include <stdlib.h>         /* malloc, free */
+#include <string.h>         /* memset */
+#include <stdio.h>          /* fprintf, fopen, ftello64 */
+#include <sys/types.h>      /* stat64 */
+#include <sys/stat.h>       /* stat64 */
+#include <time.h>           /* clock */
+
+#include "mem.h"            /* read */
+#include "error_private.h"
+#include "zdict_static.h"
+
+
+/*-*************************************
+*  Compiler specifics
+***************************************/
+#if !defined(S_ISREG)
+#  define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define DICTLISTSIZE 10000
+#define MEMMULT 11
+static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
+
+#define NOISELENGTH 32
+#define PRIME1   2654435761U
+#define PRIME2   2246822519U
+
+
+/*-*************************************
+*  Console display
+***************************************/
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
+
+
+/*-*************************************
+*  Exceptions
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
+#define EXM_THROW(error, ...)                                             \
+{                                                                         \
+    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
+    DISPLAYLEVEL(1, "Error %i : ", error);                                \
+    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
+    DISPLAYLEVEL(1, "\n");                                                \
+    exit(error);                                                          \
+}
+
+
+/* ********************************************************
+*  Helper functions
+**********************************************************/
+unsigned DiB_isError(size_t errorCode) { return ERR_isError(errorCode); }
+
+const char* DiB_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+
+
+/* ********************************************************
+*  File related operations
+**********************************************************/
+static unsigned long long DiB_getFileSize(const char* infilename)
+{
+    int r;
+#if defined(_MSC_VER)
+    struct _stat64 statbuf;
+    r = _stat64(infilename, &statbuf);
+#else
+    struct stat statbuf;
+    r = stat(infilename, &statbuf);
+#endif
+    if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
+    return (unsigned long long)statbuf.st_size;
+}
+
+
+static unsigned long long DiB_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles)
+{
+    unsigned long long total = 0;
+    unsigned n;
+    for (n=0; n<nbFiles; n++)
+        total += DiB_getFileSize(fileNamesTable[n]);
+    return total;
+}
+
+
+static void DiB_loadFiles(void* buffer, size_t bufferSize,
+                          size_t* fileSizes,
+                          const char** fileNamesTable, unsigned nbFiles)
+{
+    char* buff = (char*)buffer;
+    size_t pos = 0;
+    unsigned n;
+
+    for (n=0; n<nbFiles; n++) {
+        size_t readSize;
+        unsigned long long fileSize = DiB_getFileSize(fileNamesTable[n]);
+        FILE* f = fopen(fileNamesTable[n], "rb");
+        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
+        DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
+        if (fileSize > bufferSize-pos) fileSize = 0;  /* stop there, not enough memory to load all files */
+        readSize = fread(buff+pos, 1, (size_t)fileSize, f);
+        if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
+        pos += readSize;
+        fileSizes[n] = (size_t)fileSize;
+        fclose(f);
+    }
+}
+
+
+/*-********************************************************
+*  Dictionary training functions
+**********************************************************/
+static size_t DiB_findMaxMem(unsigned long long requiredMem)
+{
+    size_t step = 8 MB;
+    void* testmem = NULL;
+
+    requiredMem = (((requiredMem >> 23) + 1) << 23);
+    requiredMem += 2 * step;
+    if (requiredMem > maxMemory) requiredMem = maxMemory;
+
+    while (!testmem) {
+        requiredMem -= step;
+        testmem = malloc((size_t)requiredMem);
+    }
+
+    free(testmem);
+    return (size_t)(requiredMem - step);
+}
+
+
+static void DiB_fillNoise(void* buffer, size_t length)
+{
+    unsigned acc = PRIME1;
+    size_t p=0;;
+
+    for (p=0; p<length; p++) {
+        acc *= PRIME2;
+        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
+    }
+}
+
+
+static void DiB_saveDict(const char* dictFileName,
+                         const void* buff, size_t buffSize)
+{
+    FILE* f;
+    size_t n;
+
+    f = fopen(dictFileName, "wb");
+    if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
+
+    n = fwrite(buff, 1, buffSize, f);
+    if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName)
+
+    n = (size_t)fclose(f);
+    if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName)
+}
+
+
+/*! ZDICT_trainFromBuffer_unsafe() :
+    Strictly Internal use only !!
+    Same as ZDICT_trainFromBuffer_advanced(), but does not control `samplesBuffer`.
+    `samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
+    @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+              or an error code.
+*/
+size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
+                              const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                              ZDICT_params_t parameters);
+
+
+int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
+                       const char** fileNamesTable, unsigned nbFiles,
+                       ZDICT_params_t params)
+{
+    void* srcBuffer;
+    size_t benchedSize;
+    size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
+    unsigned long long totalSizeToLoad = DiB_getTotalFileSize(fileNamesTable, nbFiles);
+    void* dictBuffer = malloc(maxDictSize);
+    size_t dictSize;
+    int result = 0;
+
+    /* init */
+    g_displayLevel = params.notificationLevel;
+    benchedSize = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
+    if ((unsigned long long)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
+    if (benchedSize < totalSizeToLoad)
+        DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
+
+    /* Memory allocation & restrictions */
+    srcBuffer = malloc(benchedSize+NOISELENGTH);     /* + noise */
+    if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles");  /* should not happen */
+
+    /* Load input buffer */
+    DiB_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
+    DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH);   /* guard band, for end of buffer condition */
+
+    /* call buffer version */
+    dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
+                        srcBuffer, fileSizes, nbFiles,
+                        params);
+    if (ZDICT_isError(dictSize)) {
+        DISPLAYLEVEL(1, "dictionary training failed : %s", ZDICT_getErrorName(dictSize));   /* should not happen */
+        result = 1;
+        goto _cleanup;
+    }
+
+    /* save dict */
+    DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
+    DiB_saveDict(dictFileName, dictBuffer, dictSize);
+
+    /* clean up */
+_cleanup:
+    free(srcBuffer);
+    free(dictBuffer);
+    free(fileSizes);
+    return result;
+}
diff --git a/programs/dibio.h b/programs/dibio.h
new file mode 100644
index 00000000000..0ccec4135da
--- /dev/null
+++ b/programs/dibio.h
@@ -0,0 +1,52 @@
+/*
+    dibio.h - I/O API for dictionary builder
+    Copyright (C) Yann Collet 2016
+
+    GPL v2 License
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    You can contact the author at :
+    - zstd homepage : http://www.zstd.net/
+*/
+
+/* This library is designed for a single-threaded console application.
+*  It exit() and printf() into stderr when it encounters an error condition. */
+
+#ifndef DIBIO_H_003
+#define DIBIO_H_003
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zdict_static.h"   /* ZDICT_params_t */
+
+
+/*-*************************************
+*  Public functions
+***************************************/
+/*! DiB_trainFromFiles() :
+    Train a dictionary from a set of files provided by `fileNamesTable`.
+    Resulting dictionary is written into file `dictFileName`.
+    `parameters` is optional and can be provided with values set to 0, meaning "default".
+    @return : 0 == ok. Any other : error.
+*/
+int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
+                       const char** fileNamesTable, unsigned nbFiles,
+                       ZDICT_params_t parameters);
+
+
+#endif
diff --git a/programs/fileio.c b/programs/fileio.c
index ed2a06181d1..028c7db45a7 100644
--- a/programs/fileio.c
+++ b/programs/fileio.c
@@ -1,6 +1,6 @@
 /*
-  fileio.c - File i/o handler
-  Copyright (C) Yann Collet 2013-2015
+  fileio.c - File i/o handler for zstd
+  Copyright (C) Yann Collet 2013-2016
 
   GPL v2 License
 
@@ -19,8 +19,7 @@
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
   You can contact the author at :
-  - zstd source repository : https://github.com/Cyan4973/zstd
-  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+  - zstd homepage : http://www.zstd.net
 */
 /*
   Note : this is stand-alone program.
@@ -33,7 +32,7 @@
 *  Tuning options
 ***************************************/
 #ifndef ZSTD_LEGACY_SUPPORT
-/**LEGACY_SUPPORT :
+/* LEGACY_SUPPORT :
 *  decompressor can decode older formats (starting from Zstd 0.1+) */
 #  define ZSTD_LEGACY_SUPPORT 1
 #endif
@@ -53,7 +52,7 @@
 #define _POSIX_SOURCE 1        /* enable fileno() within <stdio.h> on unix */
 
 
-/* *************************************
+/*-*************************************
 *  Includes
 ***************************************/
 #include <stdio.h>      /* fprintf, fopen, fread, _fileno, stdin, stdout */
@@ -66,23 +65,20 @@
 #include "mem.h"
 #include "fileio.h"
 #include "zstd_static.h"   /* ZSTD_magicNumber */
-#include "zstd_buffered_static.h"
+#include "zbuff_static.h"
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
-#  include "zstd_legacy.h"    /* legacy */
-#  include "fileio_legacy.h"  /* legacy */
+#  include "zstd_legacy.h"    /* ZSTD_isLegacy */
+#  include "fileio_legacy.h"  /* FIO_decompressLegacyFrame */
 #endif
 
 
-/* *************************************
+/*-*************************************
 *  OS-specific Includes
 ***************************************/
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
 #  include <fcntl.h>    /* _O_BINARY */
 #  include <io.h>       /* _setmode, _isatty */
-#  ifdef __MINGW32__
-   // int _fileno(FILE *stream);   /* seems no longer useful /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
-#  endif
 #  define SET_BINARY_MODE(file) { int unused = _setmode(_fileno(file), _O_BINARY); (void)unused; }
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
 #else
@@ -96,7 +92,7 @@
 #endif
 
 
-/* *************************************
+/*-*************************************
 *  Constants
 ***************************************/
 #define KB *(1U<<10)
@@ -116,14 +112,15 @@
 #define BLOCKSIZE      (128 KB)
 #define ROLLBUFFERSIZE (BLOCKSIZE*8*64)
 
-#define FIO_FRAMEHEADERSIZE 5        /* as a define, because needed to allocated table on stack */
-#define FSE_CHECKSUM_SEED        0
+#define FIO_FRAMEHEADERSIZE  5        /* as a define, because needed to allocated table on stack */
+#define FSE_CHECKSUM_SEED    0
 
 #define CACHELINE 64
 
-#define MAX_DICT_SIZE (512 KB)
+#define MAX_DICT_SIZE (1 MB)   /* protection against large input (attack scenario) ; can be changed */
 
-/* *************************************
+
+/*-*************************************
 *  Macros
 ***************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
@@ -137,17 +134,18 @@ static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result
 static const unsigned refreshRate = 150;
 static clock_t g_time = 0;
 
+#define MAX(a,b)   ((a)>(b)?(a):(b))
 
-/* *************************************
+
+/*-*************************************
 *  Local Parameters
 ***************************************/
 static U32 g_overwrite = 0;
-
 void FIO_overwriteMode(void) { g_overwrite=1; }
 void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 
 
-/* *************************************
+/*-*************************************
 *  Exceptions
 ***************************************/
 #ifndef DEBUG
@@ -164,7 +162,7 @@ void FIO_setNotificationLevel(unsigned level) { g_displayLevel=level; }
 }
 
 
-/* *************************************
+/*-*************************************
 *  Functions
 ***************************************/
 static unsigned FIO_GetMilliSpan(clock_t nPrevious)
@@ -190,54 +188,57 @@ static U64 FIO_getFileSize(const char* infilename)
 }
 
 
-static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr,
-                        const char* dstFileName, const char* srcFileName)
+static FILE* FIO_openSrcFile(const char* srcFileName)
 {
+    FILE* f;
+
     if (!strcmp (srcFileName, stdinmark)) {
         DISPLAYLEVEL(4,"Using stdin for input\n");
-        *fileInPtr = stdin;
+        f = stdin;
         SET_BINARY_MODE(stdin);
     } else {
-        *fileInPtr = fopen(srcFileName, "rb");
+        f = fopen(srcFileName, "rb");
     }
 
-    if ( *fileInPtr==0 ) {
-        DISPLAYLEVEL(1, "Unable to access file for processing: %s\n", srcFileName);
-        return 1;
-    }
+    if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: No such file\n", srcFileName);
+
+    return f;
+}
+
+
+static FILE* FIO_openDstFile(const char* dstFileName)
+{
+    FILE* f;
 
     if (!strcmp (dstFileName, stdoutmark)) {
         DISPLAYLEVEL(4,"Using stdout for output\n");
-        *fileOutPtr = stdout;
+        f = stdout;
         SET_BINARY_MODE(stdout);
     } else {
         if (!g_overwrite) {  /* Check if destination file already exists */
-            *fileOutPtr = fopen( dstFileName, "rb" );
-            if (*fileOutPtr != 0) {  /* dest file exists, prompt for overwrite authorization */
-                fclose(*fileOutPtr);
-                DISPLAY("Warning : %s already exists \n", dstFileName);
-                if ((g_displayLevel <= 1) || (*fileInPtr == stdin)) {
+            f = fopen( dstFileName, "rb" );
+            if (f != 0) {  /* dest file exists, prompt for overwrite authorization */
+                fclose(f);
+                if (g_displayLevel <= 1) {
                     /* No interaction possible */
-                    DISPLAY("Operation aborted : %s already exists \n", dstFileName);
-                    return 1;
+                    DISPLAY("zstd: %s already exists; not overwritten  \n", dstFileName);
+                    return 0;
                 }
-                DISPLAY("Overwrite ? (y/N) : ");
+                DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName);
                 {
                     int ch = getchar();
                     if ((ch!='Y') && (ch!='y')) {
-                        DISPLAY("No. Operation aborted : %s already exists \n", dstFileName);
-                        return 1;
+                        DISPLAY("    not overwritten  \n");
+                        return 0;
                     }
                     while ((ch!=EOF) && (ch!='\n')) ch = getchar();  /* flush rest of input line */
         }   }   }
-        *fileOutPtr = fopen( dstFileName, "wb" );
+        f = fopen( dstFileName, "wb" );
     }
-
-    if (*fileOutPtr==0) EXM_THROW(13, "Pb opening %s", dstFileName);
-
-    return 0;
+    return f;
 }
 
+
 /*!FIO_loadFile
 *  creates a buffer, pointed by *bufferPtr,
 *  loads "filename" content into it
@@ -284,6 +285,8 @@ typedef struct {
     void*  dictBuffer;
     size_t dictBufferSize;
     ZBUFF_CCtx* ctx;
+    FILE* dstFile;
+    FILE* srcFile;
 } cRess_t;
 
 static cRess_t FIO_createCResources(const char* dictFileName)
@@ -317,27 +320,24 @@ static void FIO_freeCResources(cRess_t ress)
 }
 
 
-/*
- * FIO_compressFilename_extRess()
- * result : 0 : compression completed correctly
- *          1 : missing or pb opening srcFileName
+/*! FIO_compressFilename_internal() :
+ *  same as FIO_compressFilename_extRess(), with ress.desFile already opened
+ *  @return : 0 : compression completed correctly,
+ *            1 : missing or pb opening srcFileName
  */
-static int FIO_compressFilename_extRess(cRess_t ress,
-                                        const char* dstFileName, const char* srcFileName,
-                                        int cLevel)
+static int FIO_compressFilename_internal(cRess_t ress,
+                                         const char* dstFileName, const char* srcFileName,
+                                         int cLevel)
 {
-    FILE* srcFile;
-    FILE* dstFile;
+    FILE* srcFile = ress.srcFile;
+    FILE* dstFile = ress.dstFile;
     U64 filesize = 0;
     U64 compressedfilesize = 0;
     size_t dictSize = ress.dictBufferSize;
     size_t sizeCheck, errorCode;
 
-    /* File check */
-    if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1;
-
     /* init */
-    filesize = FIO_getFileSize(srcFileName) + dictSize;
+    filesize = MAX(FIO_getFileSize(srcFileName),dictSize);
     errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, ZSTD_getParams(cLevel, filesize));
     if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
 
@@ -350,8 +350,7 @@ static int FIO_compressFilename_extRess(cRess_t ress,
         filesize += inSize;
         DISPLAYUPDATE(2, "\rRead : %u MB  ", (U32)(filesize>>20));
 
-        {
-            /* Compress (buffered streaming ensures appropriate formatting) */
+        {   /* Compress using buffered streaming */
             size_t usedInSize = inSize;
             size_t cSize = ress.dstBufferSize;
             size_t result = ZBUFF_compressContinue(ress.ctx, ress.dstBuffer, &cSize, ress.srcBuffer, &usedInSize);
@@ -366,7 +365,6 @@ static int FIO_compressFilename_extRess(cRess_t ress,
             if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
             compressedfilesize += cSize;
         }
-
         DISPLAYUPDATE(2, "\rRead : %u MB  ==> %.2f%%   ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
     }
 
@@ -386,11 +384,53 @@ static int FIO_compressFilename_extRess(cRess_t ress,
     DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
         (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
 
+    return 0;
+}
+
+
+/*! FIO_compressFilename_internal() :
+ *  same as FIO_compressFilename_extRess(), with ress.desFile already opened
+ *  @return : 0 : compression completed correctly,
+ *            1 : missing or pb opening srcFileName
+ */
+static int FIO_compressFilename_srcFile(cRess_t ress,
+                                        const char* dstFileName, const char* srcFileName,
+                                        int cLevel)
+{
+    int result;
+
+    /* File check */
+    ress.srcFile = FIO_openSrcFile(srcFileName);
+    if (!ress.srcFile) return 1;   /* srcFile could not be opened */
+
+    result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
+
     /* clean */
-    fclose(srcFile);
-    if (fclose(dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
+    fclose(ress.srcFile);
+    return result;
+}
 
-    return 0;
+
+/*! FIO_compressFilename_extRess() :
+ *  @return : 0 : compression completed correctly,
+ *            1 : missing or pb opening srcFileName
+ */
+static int FIO_compressFilename_extRess(cRess_t ress,
+                                        const char* dstFileName, const char* srcFileName,
+                                        int cLevel)
+{
+    int result;
+
+    ress.srcFile = FIO_openSrcFile(srcFileName);
+    if (ress.srcFile==0) return 1;
+    ress.dstFile = FIO_openDstFile(dstFileName);
+    if (ress.dstFile==0) { fclose(ress.srcFile); return 1; }
+
+    result = FIO_compressFilename_internal(ress, dstFileName, srcFileName, cLevel);
+
+    fclose(ress.srcFile);   /* no pb to expect : only reading */
+    if (fclose(ress.dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName);
+    return result;
 }
 
 
@@ -431,21 +471,28 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile
     int missed_files = 0;
     char* dstFileName = (char*)malloc(FNSPACE);
     size_t dfnSize = FNSPACE;
-    const size_t suffixSize = strlen(suffix);
+    const size_t suffixSize = suffix ? strlen(suffix) : 0;
     cRess_t ress;
 
     /* init */
     ress = FIO_createCResources(dictFileName);
 
     /* loop on each file */
-    for (u=0; u<nbFiles; u++) {
-        size_t ifnSize = strlen(inFileNamesTable[u]);
-        if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); }
-        strcpy(dstFileName, inFileNamesTable[u]);
-        strcat(dstFileName, suffix);
-
-        missed_files += FIO_compressFilename_extRess(ress, dstFileName, inFileNamesTable[u], compressionLevel);
-    }
+    if (!strcmp(suffix, stdoutmark)) {
+        ress.dstFile = stdout;
+        for (u=0; u<nbFiles; u++)
+            missed_files += FIO_compressFilename_srcFile(ress, stdoutmark,
+                                                          inFileNamesTable[u], compressionLevel);
+        if (fclose(ress.dstFile)) EXM_THROW(29, "Write error : cannot properly close %s", stdoutmark);
+    } else {
+        for (u=0; u<nbFiles; u++) {
+            size_t ifnSize = strlen(inFileNamesTable[u]);
+            if (dfnSize <= ifnSize+suffixSize+1) { free(dstFileName); dfnSize = ifnSize + 20; dstFileName = (char*)malloc(dfnSize); }
+            strcpy(dstFileName, inFileNamesTable[u]);
+            strcat(dstFileName, suffix);
+            missed_files += FIO_compressFilename_extRess(ress, dstFileName,
+                                                         inFileNamesTable[u], compressionLevel);
+    }   }
 
     /* Close & Free */
     FIO_freeCResources(ress);
@@ -466,6 +513,7 @@ typedef struct {
     void*  dictBuffer;
     size_t dictBufferSize;
     ZBUFF_DCtx* dctx;
+    FILE*  dstFile;
 } dRess_t;
 
 static dRess_t FIO_createDResources(const char* dictFileName)
@@ -534,15 +582,17 @@ unsigned long long FIO_decompressFrame(dRess_t ress,
 }
 
 
-static int FIO_decompressFile_extRess(dRess_t ress,
-                                      const char* dstFileName, const char* srcFileName)
+/** FIO_decompressSrcFile() :
+    Decompression `srcFileName` into `ress.dstFile`
+    @return : 0 : OK
+              1 : operation not started
+*/
+static int FIO_decompressSrcFile(dRess_t ress, const char* srcFileName)
 {
     unsigned long long filesize = 0;
-    FILE* srcFile;
-    FILE* dstFile;
-
-    /* Init */
-    if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1;
+    FILE* dstFile = ress.dstFile;
+    FILE* srcFile = FIO_openSrcFile(srcFileName);
+    if (srcFile==0) return 1;
 
     /* for each frame */
     for ( ; ; ) {
@@ -551,14 +601,17 @@ static int FIO_decompressFile_extRess(dRess_t ress,
         size_t toRead = 4;
         sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile);
         if (sizeCheck==0) break;   /* no more input */
-        if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
+        if (sizeCheck != toRead) EXM_THROW(31, "zstd: %s read error : cannot read header", srcFileName);
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
         if (ZSTD_isLegacy(MEM_readLE32(ress.srcBuffer))) {
             filesize += FIO_decompressLegacyFrame(dstFile, srcFile, MEM_readLE32(ress.srcBuffer));
             continue;
         }
 #endif   /* ZSTD_LEGACY_SUPPORT */
-
+        if (MEM_readLE32(ress.srcBuffer) !=  ZSTD_MAGICNUMBER) {
+            DISPLAYLEVEL(1, "zstd: %s: not in zstd format \n", srcFileName);
+            return 1;
+        }
         filesize += FIO_decompressFrame(ress, dstFile, srcFile, toRead);
     }
 
@@ -568,8 +621,24 @@ static int FIO_decompressFile_extRess(dRess_t ress,
 
     /* Close */
     fclose(srcFile);
-    if (fclose(dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
+    return 0;
+}
+
+
+/** FIO_decompressFile_extRess() :
+    decompress `srcFileName` into `dstFileName`
+    @return : 0 : OK
+              1 : operation aborted (src not available, dst already taken, etc.)
+*/
+static int FIO_decompressFile_extRess(dRess_t ress,
+                                      const char* dstFileName, const char* srcFileName)
+{
+    ress.dstFile = FIO_openDstFile(dstFileName);
+    if (ress.dstFile==0) return 1;
 
+    FIO_decompressSrcFile(ress, srcFileName);
+
+    if (fclose(ress.dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName);
     return 0;
 }
 
@@ -597,29 +666,42 @@ int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles
     int missingFiles = 0;
     char* dstFileName = (char*)malloc(FNSPACE);
     size_t dfnSize = FNSPACE;
-    const size_t suffixSize = strlen(suffix);
+    const size_t suffixSize = suffix ? strlen(suffix) : 0;
     dRess_t ress;
 
 	if (dstFileName==NULL) EXM_THROW(70, "not enough memory for dstFileName");
     ress = FIO_createDResources(dictFileName);
 
-    for (u=0; u<nbFiles; u++) {
-        const char* srcFileName = srcNamesTable[u];
-        size_t sfnSize = strlen(srcFileName);
-        const char* suffixPtr = srcFileName + sfnSize - suffixSize;
-        if (dfnSize <= sfnSize-suffixSize+1) { free(dstFileName); dfnSize = sfnSize + 20; dstFileName = (char*)malloc(dfnSize); if (dstFileName==NULL) EXM_THROW(71, "not enough memory for dstFileName"); }
-        if (sfnSize <= suffixSize  ||  strcmp(suffixPtr, suffix) != 0) {
-            DISPLAYLEVEL(1, "File extension doesn't match expected extension (%4s); will not process file: %s\n", suffix, srcFileName);
-            skippedFiles++;
-            continue;
-        }
-        memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
-        dstFileName[sfnSize-suffixSize] = '\0';
-
-        missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName);
-    }
+    if (!strcmp(suffix, stdoutmark) || !strcmp(suffix, nulmark)) {
+        ress.dstFile = FIO_openDstFile(suffix);
+        if (ress.dstFile == 0) EXM_THROW(71, "cannot open %s", suffix);
+        for (u=0; u<nbFiles; u++)
+            missingFiles += FIO_decompressSrcFile(ress, srcNamesTable[u]);
+        if (fclose(ress.dstFile)) EXM_THROW(39, "Write error : cannot properly close %s", stdoutmark);
+    } else {
+        for (u=0; u<nbFiles; u++) {   /* create dstFileName */
+            const char* srcFileName = srcNamesTable[u];
+            size_t sfnSize = strlen(srcFileName);
+            const char* suffixPtr = srcFileName + sfnSize - suffixSize;
+            if (dfnSize+suffixSize <= sfnSize+1) {
+                free(dstFileName);
+                dfnSize = sfnSize + 20;
+                dstFileName = (char*)malloc(dfnSize);
+                if (dstFileName==NULL) EXM_THROW(71, "not enough memory for dstFileName");
+            }
+            if (sfnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) {
+                DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%4s expected) -- ignored \n", srcFileName, suffix);
+                skippedFiles++;
+                continue;
+            }
+            memcpy(dstFileName, srcFileName, sfnSize - suffixSize);
+            dstFileName[sfnSize-suffixSize] = '\0';
+
+            missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName);
+    }   }
 
     FIO_freeDResources(ress);
     free(dstFileName);
     return missingFiles + skippedFiles;
 }
+
diff --git a/programs/fileio.h b/programs/fileio.h
index 0e25d842f84..ee3cf2278a9 100644
--- a/programs/fileio.h
+++ b/programs/fileio.h
@@ -1,6 +1,6 @@
 /*
   fileio.h - file i/o handler
-  Copyright (C) Yann Collet 2013-2015
+  Copyright (C) Yann Collet 2013-2016
 
   GPL v2 License
 
@@ -19,8 +19,7 @@
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
   You can contact the author at :
-  - ZSTD source repository : https://github.com/Cyan4973/zstd
-  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+  - ZSTD homepage : http://www.zstd.net/
 */
 #pragma once
 
@@ -33,8 +32,8 @@ extern "C" {
 *  Special i/o constants
 **************************************/
 #define nullString "null"
-#define stdinmark "-"
-#define stdoutmark "-"
+#define stdinmark "stdin"
+#define stdoutmark "stdout"
 #ifdef _WIN32
 #  define nulmark "nul"
 #else
@@ -52,32 +51,29 @@ void FIO_setNotificationLevel(unsigned level);
 /* *************************************
 *  Single File functions
 ***************************************/
+/** FIO_compressFilename() :
+    @return : 0 == ok;  1 == pb with src file. */
 int FIO_compressFilename (const char* outfilename, const char* infilename, const char* dictFileName, int compressionLevel);
-int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
-/**
-FIO_compressFilename :
-    @result : 0 == ok;  1 == pb with src file.
 
-FIO_decompressFilename :
-    @result : 0 == ok;  1 == pb with src file.
-*/
+/** FIO_decompressFilename() :
+    @return : 0 == ok;  1 == pb with src file. */
+int FIO_decompressFilename (const char* outfilename, const char* infilename, const char* dictFileName);
 
 
 /* *************************************
 *  Multiple File functions
 ***************************************/
+/** FIO_compressMultipleFilenames() :
+    @return : nb of missing files */
 int FIO_compressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
                                   const char* suffix,
                                   const char* dictFileName, int compressionLevel);
+
+/** FIO_decompressMultipleFilenames() :
+    @return : nb of missing or skipped files */
 int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles,
                                     const char* suffix,
                                     const char* dictFileName);
-/**
-FIO_compressMultipleFilenames :
-    @result : nb of missing files
-FIO_decompressMultipleFilenames :
-    @result : nb of missing or skipped files
-*/
 
 
 #if defined (__cplusplus)
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index c363dbb6e70..f09cf06f2c4 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -91,6 +91,7 @@ static U32 g_testTime = 0;
 /*********************************************************
 *  Fuzzer functions
 *********************************************************/
+#define MIN(a,b) ((a)<(b)?(a):(b))
 #define MAX(a,b) ((a)>(b)?(a):(b))
 
 static U32 FUZ_GetMilliStart(void)
@@ -452,7 +453,8 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
         crcOrig = XXH64(sampleBuffer, sampleSize, 0);
 
         /* compression test */
-        cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2));   /* use high compression levels with small samples, for speed */
+        //cLevelMod = MAX(1, 38 - (int)(MAX(9, sampleSizeLog) * 2));   /* high levels only for small samples, for manageable speed */
+        cLevelMod = MIN( ZSTD_maxCLevel(), (U32)MAX(1,  55 - 3*(int)sampleSizeLog) );   /* high levels only for small samples, for manageable speed */
         cLevel = (FUZ_rand(&lseed) % cLevelMod) +1;
         cSize = ZSTD_compressCCtx(ctx, cBuffer, cBufferSize, sampleBuffer, sampleSize, cLevel);
         CHECK(ZSTD_isError(cSize), "ZSTD_compressCCtx failed");
diff --git a/programs/paramgrill.c b/programs/paramgrill.c
index a34da88ce7a..23a54d466b6 100644
--- a/programs/paramgrill.c
+++ b/programs/paramgrill.c
@@ -1,6 +1,6 @@
 /*
-    paramgrill.c - parameter tester for zstd_hc
-    Copyright (C) Yann Collet 2015
+    paramgrill.c - parameter tester for zstd
+    Copyright (C) Yann Collet 2015-2016
 
     GPL v2 License
 
@@ -19,11 +19,10 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-    - zstd source repository : https://github.com/Cyan4973/zstd
-    - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+    - zstd homepage : http://www.zstd.net/
 */
 
-/**************************************
+/*-************************************
 *  Compiler Options
 **************************************/
 /* Disable some Visual warning messages */
@@ -48,8 +47,8 @@
 #endif
 
 
-/**************************************
-*  Includes
+/*-************************************
+*  Dependencies
 **************************************/
 #include <stdlib.h>       /* malloc */
 #include <stdio.h>        /* fprintf, fopen, ftello64 */
@@ -71,7 +70,7 @@
 #include "xxhash.h"
 
 
-/**************************************
+/*-************************************
 *  Compiler Options
 **************************************/
 /* S_ISREG & gettimeofday() are not supported by MSVC */
@@ -80,7 +79,7 @@
 #endif
 
 
-/**************************************
+/*-************************************
 *  Constants
 **************************************/
 #define PROGRAM_DESCRIPTION "ZSTD_HC parameters tester"
@@ -98,6 +97,8 @@
 #define NBLOOPS    2
 #define TIMELOOP   2000
 
+#define NB_LEVELS_TRACKED 30
+
 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
 #define DEFAULT_CHUNKSIZE   (4<<20)
 
@@ -110,13 +111,13 @@ static const int g_maxVariationTime = 60000;   /* 60 sec */
 static const int g_maxNbVariations = 64;
 
 
-/**************************************
+/*-************************************
 *  Macros
 **************************************/
 #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
 
 
-/**************************************
+/*-************************************
 *  Benchmark Parameters
 **************************************/
 static U32 g_nbIterations = NBLOOPS;
@@ -126,7 +127,7 @@ static U32 g_rand = 1;
 static U32 g_singleRun = 0;
 static U32 g_target = 0;
 static U32 g_noSeed = 0;
-static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, ZSTD_greedy };
+static ZSTD_parameters g_params = { 0, 0, 0, 0, 0, 0, 0, ZSTD_greedy };
 
 void BMK_SetNbIterations(int nbLoops)
 {
@@ -135,7 +136,7 @@ void BMK_SetNbIterations(int nbLoops)
 }
 
 
-/*********************************************************
+/*-*******************************************************
 *  Private functions
 *********************************************************/
 
@@ -187,8 +188,7 @@ static size_t BMK_findMaxMem(U64 requiredMem)
     if (requiredMem > maxMemory) requiredMem = maxMemory;
 
     requiredMem += 2*step;
-    while (!testmem)
-    {
+    while (!testmem) {
         requiredMem -= step;
         testmem = (BYTE*) malloc ((size_t)requiredMem);
     }
@@ -226,7 +226,7 @@ U32 FUZ_rand(U32* src)
 }
 
 
-/*********************************************************
+/*-*******************************************************
 *  Bench functions
 *********************************************************/
 typedef struct {
@@ -265,14 +265,14 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
     U32 Hlog = params.hashLog;
     U32 Slog = params.searchLog;
     U32 Slength = params.searchLength;
+    U32 Tlength = params.targetLength;
     ZSTD_strategy strat = params.strategy;
     char name[30] = { 0 };
     U64 crcOrig;
 
     /* Memory allocation & restrictions */
-    snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%1u", Wlog, Clog, Hlog, Slog, Slength, strat);
-    if (!compressedBuffer || !resultBuffer || !blockTable)
-    {
+    snprintf(name, 30, "Sw%02uc%02uh%02us%02ul%1ut%03uS%1u", Wlog, Clog, Hlog, Slog, Slength, Tlength, strat);
+    if (!compressedBuffer || !resultBuffer || !blockTable) {
         DISPLAY("\nError: not enough memory!\n");
         free(compressedBuffer);
         free(resultBuffer);
@@ -290,8 +290,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
         const char* srcPtr = (const char*)srcBuffer;
         char* cPtr = (char*)compressedBuffer;
         char* resPtr = (char*)resultBuffer;
-        for (i=0; i<nbBlocks; i++)
-        {
+        for (i=0; i<nbBlocks; i++) {
             size_t thisBlockSize = MIN(remaining, blockSize);
             blockTable[i].srcPtr = srcPtr;
             blockTable[i].cPtr = cPtr;
@@ -302,8 +301,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
             cPtr += blockTable[i].cRoom;
             resPtr += thisBlockSize;
             remaining -= thisBlockSize;
-        }
-    }
+    }   }
 
     /* warmimg up memory */
     RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.10, 1);
@@ -318,8 +316,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
         const int startTime =BMK_GetMilliStart();
 
         DISPLAY("\r%79s\r", "");
-        for (loopNb = 1; loopNb <= g_nbIterations; loopNb++)
-        {
+        for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) {
             int nbLoops;
             int milliTime;
             U32 blockNb;
@@ -336,8 +333,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
             milliTime = BMK_GetMilliStart();
             while (BMK_GetMilliStart() == milliTime);
             milliTime = BMK_GetMilliStart();
-            while (BMK_GetMilliSpan(milliTime) < TIMELOOP)
-            {
+            while (BMK_GetMilliSpan(milliTime) < TIMELOOP) {
                 for (blockNb=0; blockNb<nbBlocks; blockNb++)
                     blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx,
                                                     blockTable[blockNb].cPtr,  blockTable[blockNb].cRoom,
@@ -367,8 +363,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
             milliTime = BMK_GetMilliStart();
             while (BMK_GetMilliStart() == milliTime);
             milliTime = BMK_GetMilliStart();
-            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++)
-            {
+            for ( ; BMK_GetMilliSpan(milliTime) < TIMELOOP; nbLoops++) {
                 for (blockNb=0; blockNb<nbBlocks; blockNb++)
                     blockTable[blockNb].resSize = ZSTD_decompress(blockTable[blockNb].resPtr, blockTable[blockNb].srcSize,
                                                                   blockTable[blockNb].cPtr, blockTable[blockNb].cSize);
@@ -384,24 +379,19 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr,
 
             /* CRC Checking */
             crcCheck = XXH64(resultBuffer, srcSize, 0);
-            if (crcOrig!=crcCheck)
-            {
+            if (crcOrig!=crcCheck) {
                 unsigned u;
                 unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize));
                 DISPLAY("\n!!! WARNING !!! Invalid Checksum : %x != %x\n", (unsigned)crcOrig, (unsigned)crcCheck);
-                for (u=0; u<srcSize; u++)
-                {
-                    if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u])
-                    {
+                for (u=0; u<srcSize; u++) {
+                    if (((const BYTE*)srcBuffer)[u] != ((BYTE*)resultBuffer)[u]) {
                         printf("Decoding error at pos %u (block %u, pos %u) \n", u, u / eBlockSize, u % eBlockSize);
                         break;
-                    }
-                }
+                }   }
                 break;
             }
 #endif
-        }
-    }
+    }   }
 
     /* End cleaning */
     DISPLAY("\r");
@@ -415,21 +405,23 @@ const char* g_stratName[] = { "ZSTD_fast   ",
                               "ZSTD_greedy ",
                               "ZSTD_lazy   ",
                               "ZSTD_lazy2  ",
-                              "ZSTD_btlazy2" };
+                              "ZSTD_btlazy2",
+                              "ZSTD_opt    ",
+                              "ZSTD_btopt  " };
 
 static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_parameters params, size_t srcSize)
 {
     DISPLAY("\r%79s\r", "");
-    fprintf(f,"    {%3u,%3u,%3u,%3u,%3u,%3u, %s },  ",
+    fprintf(f,"    {%3u,%3u,%3u,%3u,%3u,%3u,%3u, %s },  ",
             0, params.windowLog, params.contentLog, params.hashLog, params.searchLog, params.searchLength,
-            g_stratName[(U32)(params.strategy)]);
+            params.targetLength, g_stratName[(U32)(params.strategy)]);
     fprintf(f,
             "/* level %2u */   /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
             cLevel, (double)srcSize / result.cSize, (double)result.cSpeed / 1000., (double)result.dSpeed / 1000.);
 }
 
 
-static U32 g_cSpeedTarget[ZSTD_MAX_CLEVEL+1] = { 0 };
+static U32 g_cSpeedTarget[NB_LEVELS_TRACKED] = { 0 };   /* NB_LEVELS_TRACKED : checked at main() */
 
 typedef struct {
     BMK_result_t result;
@@ -438,14 +430,12 @@ typedef struct {
 
 static void BMK_printWinners2(FILE* f, const winnerInfo_t* winners, size_t srcSize)
 {
-    int cLevel;
+    unsigned cLevel;
 
     fprintf(f, "\n /* Proposed configurations : */ \n");
-    fprintf(f, "#define ZSTD_MAX_CLEVEL %2u \n", ZSTD_MAX_CLEVEL);
-    fprintf(f, "static const ZSTD_parameters ZSTD_defaultParameters[ZSTD_MAX_CLEVEL+1] = {\n");
-    fprintf(f, "    /* l,  W,  C,  H,  S,  L, strat */ \n");
+    fprintf(f, "    /* l,  W,  C,  H,  S,  L,  T, strat */ \n");
 
-    for (cLevel=0; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
+    for (cLevel=0; cLevel <= ZSTD_maxCLevel(); cLevel++)
         BMK_printWinner(f, cLevel, winners[cLevel].result, winners[cLevel].params, srcSize);
 }
 
@@ -465,16 +455,14 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
 {
     BMK_result_t testResult;
     int better = 0;
-    int cLevel;
+    unsigned cLevel;
 
     BMK_benchParam(&testResult, srcBuffer, srcSize, ctx, params);
 
-    for (cLevel = 1; cLevel <= ZSTD_MAX_CLEVEL; cLevel++)
-    {
+    for (cLevel = 1; cLevel <= ZSTD_maxCLevel(); cLevel++) {
         if (testResult.cSpeed < g_cSpeedTarget[cLevel])
             continue;   /* not fast enough for this level */
-        if (winners[cLevel].result.cSize==0)
-        {
+        if (winners[cLevel].result.cSize==0) {
             /* first solution for this cLevel */
             winners[cLevel].result = testResult;
             winners[cLevel].params = params;
@@ -483,8 +471,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
             continue;
         }
 
-        if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) )
-        {
+        if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
             /* Validate solution is "good enough" */
             double W_ratio = (double)srcSize / testResult.cSize;
             double O_ratio = (double)srcSize / winners[cLevel].result.cSize;
@@ -509,8 +496,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
             double O_DSpeed_note = O_ratioNote * ( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
 
 
-            if (W_DMemUsed_note < O_DMemUsed_note)
-            {
+            if (W_DMemUsed_note < O_DMemUsed_note) {
                 /* uses too much Decompression memory for too little benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Decompression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
@@ -518,8 +504,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
                          O_ratio, (double)(O_DMemUsed) / 1024 / 1024,   cLevel);
                 continue;
             }
-            if (W_CMemUsed_note < O_CMemUsed_note)
-            {
+            if (W_CMemUsed_note < O_CMemUsed_note) {
                 /* uses too much memory for compression for too little benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Compression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
@@ -527,8 +512,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
                          O_ratio, (double)(O_CMemUsed) / 1024 / 1024,   cLevel);
                 continue;
             }
-            if (W_CSpeed_note   < O_CSpeed_note  )
-            {
+            if (W_CSpeed_note   < O_CSpeed_note  ) {
                 /* too large compression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
@@ -536,8 +520,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
                          O_ratio, (double)(winners[cLevel].result.cSpeed) / 1000.,   cLevel);
                 continue;
             }
-            if (W_DSpeed_note   < O_DSpeed_note  )
-            {
+            if (W_DSpeed_note   < O_DSpeed_note  ) {
                 /* too large decompression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
@@ -554,9 +537,7 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_parameters params,
             BMK_printWinner(stdout, cLevel, testResult, params, srcSize);
 
             better = 1;
-        }
-
-    }
+    }   }
 
     return better;
 }
@@ -567,10 +548,9 @@ static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
 {
     g_params = params;
     if (params.strategy == ZSTD_fast)
-    {
-        g_params.contentLog = 0;
-        g_params.searchLog = 0;
-    }
+        g_params.contentLog = 0, g_params.searchLog = 0;
+    if ((params.strategy != ZSTD_opt) && (params.strategy != ZSTD_btopt ))
+        g_params.targetLength = 0;
     return &g_params;
 }
 
@@ -578,9 +558,8 @@ static ZSTD_parameters* sanitizeParams(ZSTD_parameters params)
 static void paramVariation(ZSTD_parameters* p)
 {
     U32 nbChanges = (FUZ_rand(&g_rand) & 3) + 1;
-    for (; nbChanges; nbChanges--)
-    {
-        const U32 changeID = FUZ_rand(&g_rand) % 12;
+    for (; nbChanges; nbChanges--) {
+        const U32 changeID = FUZ_rand(&g_rand) % 14;
         switch(changeID)
         {
         case 0:
@@ -607,6 +586,10 @@ static void paramVariation(ZSTD_parameters* p)
             p->strategy = (ZSTD_strategy)(((U32)p->strategy)+1); break;
         case 11:
             p->strategy = (ZSTD_strategy)(((U32)p->strategy)-1); break;
+        case 12:
+            p->targetLength *= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
+        case 13:
+            p->targetLength /= 1 + ((double)(FUZ_rand(&g_rand)&255)) / 256.; break;
         }
     }
     ZSTD_validateParams(p);
@@ -632,8 +615,7 @@ static void playAround(FILE* f, winnerInfo_t* winners,
     int nbVariations = 0;
     const int startTime = BMK_GetMilliStart();
 
-    while (BMK_GetMilliSpan(startTime) < g_maxVariationTime)
-    {
+    while (BMK_GetMilliSpan(startTime) < g_maxVariationTime) {
         ZSTD_parameters p = params;
 
         if (nbVariations++ > g_maxNbVariations) break;
@@ -658,15 +640,15 @@ static void playAround(FILE* f, winnerInfo_t* winners,
 static void potentialRandomParams(ZSTD_parameters* p, U32 inverseChance)
 {
     U32 chance = (FUZ_rand(&g_rand) % (inverseChance+1));
-    if (!chance)
-    {
+    if (!chance) {
         /* totally random entry */
         p->contentLog = FUZ_rand(&g_rand) % (ZSTD_CONTENTLOG_MAX+1 - ZSTD_CONTENTLOG_MIN) + ZSTD_CONTENTLOG_MIN;
         p->hashLog    = FUZ_rand(&g_rand) % (ZSTD_HASHLOG_MAX+1 - ZSTD_HASHLOG_MIN) + ZSTD_HASHLOG_MIN;
         p->searchLog  = FUZ_rand(&g_rand) % (ZSTD_SEARCHLOG_MAX+1 - ZSTD_SEARCHLOG_MIN) + ZSTD_SEARCHLOG_MIN;
         p->windowLog  = FUZ_rand(&g_rand) % (ZSTD_WINDOWLOG_MAX+1 - ZSTD_WINDOWLOG_MIN) + ZSTD_WINDOWLOG_MIN;
         p->searchLength=FUZ_rand(&g_rand) % (ZSTD_SEARCHLENGTH_MAX+1 - ZSTD_SEARCHLENGTH_MIN) + ZSTD_SEARCHLENGTH_MIN;
-        p->strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btlazy2+1));
+        p->targetLength=FUZ_rand(&g_rand) % (ZSTD_TARGETLENGTH_MAX+1 - ZSTD_TARGETLENGTH_MIN) + ZSTD_TARGETLENGTH_MIN;
+        p->strategy   = (ZSTD_strategy) (FUZ_rand(&g_rand) % (ZSTD_btopt +1));
         ZSTD_validateParams(p);
     }
 }
@@ -676,9 +658,8 @@ static void BMK_selectRandomStart(
                        const void* srcBuffer, size_t srcSize,
                        ZSTD_CCtx* ctx)
 {
-    U32 id = (FUZ_rand(&g_rand) % (ZSTD_MAX_CLEVEL+1));
-    if ((id==0) || (winners[id].params.windowLog==0))
-    {
+    U32 id = (FUZ_rand(&g_rand) % (ZSTD_maxCLevel()+1));
+    if ((id==0) || (winners[id].params.windowLog==0)) {
         /* totally random entry */
         ZSTD_parameters p;
         potentialRandomParams(&p, 1);
@@ -695,14 +676,14 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 {
     ZSTD_CCtx* ctx = ZSTD_createCCtx();
     ZSTD_parameters params;
-    winnerInfo_t winners[ZSTD_MAX_CLEVEL+1];
+    winnerInfo_t winners[NB_LEVELS_TRACKED];
     int i;
+    unsigned u;
     const char* rfName = "grillResults.txt";
     FILE* f;
     const size_t blockSize = g_blockSize ? g_blockSize : srcSize;
 
-    if (g_singleRun)
-    {
+    if (g_singleRun) {
         BMK_result_t testResult;
         g_params.srcSize = blockSize;
         ZSTD_validateParams(&g_params);
@@ -718,8 +699,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 
     if (g_target)
         g_cSpeedTarget[1] = g_target * 1000;
-    else
-    {
+    else {
         /* baseline config for level 1 */
         BMK_result_t testResult;
         params = ZSTD_getParams(1, blockSize);
@@ -728,14 +708,13 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
     }
 
     /* establish speed objectives (relative to level 1) */
-    for (i=2; i<=ZSTD_MAX_CLEVEL; i++)
-        g_cSpeedTarget[i] = (g_cSpeedTarget[i-1] * 25) >> 5;
+    for (u=2; u<=ZSTD_maxCLevel(); u++)
+        g_cSpeedTarget[u] = (g_cSpeedTarget[u-1] * 25) >> 5;
 
     /* populate initial solution */
     {
-        const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
-        for (i=1; i<=maxSeeds; i++)
-        {
+        const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
+        for (i=1; i<=maxSeeds; i++) {
             params = ZSTD_getParams(i, blockSize);
             ZSTD_validateParams(&params);
             BMK_seed(winners, params, srcBuffer, srcSize, ctx);
@@ -746,8 +725,7 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
     /* start tests */
     {
         const int milliStart = BMK_GetMilliStart();
-        do
-        {
+        do {
             BMK_selectRandomStart(f, winners, srcBuffer, srcSize, ctx);
         } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
     }
@@ -764,17 +742,13 @@ static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 
 static int benchSample(void)
 {
-    char* origBuff;
+    void* origBuff;
     size_t benchedSize = sampleSize;
     const char* name = "Sample 10MiB";
 
     /* Allocation */
-    origBuff = (char*) malloc((size_t)benchedSize);
-    if(!origBuff)
-    {
-        DISPLAY("\nError: not enough memory!\n");
-        return 12;
-    }
+    origBuff = malloc(benchedSize);
+    if (!origBuff) { DISPLAY("\nError: not enough memory!\n"); return 12; }
 
     /* Fill buffer */
     RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
@@ -794,8 +768,7 @@ int benchFiles(char** fileNamesTable, int nbFiles)
     int fileIdx=0;
 
     /* Loop for each file */
-    while (fileIdx<nbFiles)
-    {
+    while (fileIdx<nbFiles) {
         FILE* inFile;
         char* inFileName;
         U64   inFileSize;
@@ -806,25 +779,21 @@ int benchFiles(char** fileNamesTable, int nbFiles)
         /* Check file existence */
         inFileName = fileNamesTable[fileIdx++];
         inFile = fopen( inFileName, "rb" );
-        if (inFile==NULL)
-        {
+        if (inFile==NULL) {
             DISPLAY( "Pb opening %s\n", inFileName);
             return 11;
         }
 
         /* Memory allocation & restrictions */
         inFileSize = BMK_GetFileSize(inFileName);
-        benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
+        benchedSize = BMK_findMaxMem(inFileSize*3) / 3;
         if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
         if (benchedSize < inFileSize)
-        {
             DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
-        }
 
         /* Alloc */
         origBuff = (char*) malloc((size_t)benchedSize);
-        if(!origBuff)
-        {
+        if(!origBuff) {
             DISPLAY("\nError: not enough memory!\n");
             fclose(inFile);
             return 12;
@@ -835,8 +804,7 @@ int benchFiles(char** fileNamesTable, int nbFiles)
         readSize = fread(origBuff, 1, benchedSize, inFile);
         fclose(inFile);
 
-        if(readSize != benchedSize)
-        {
+        if(readSize != benchedSize) {
             DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
             free(origBuff);
             return 13;
@@ -862,8 +830,7 @@ int optimizeForSize(char* inFileName)
 
     /* Check file existence */
     inFile = fopen( inFileName, "rb" );
-    if (inFile==NULL)
-    {
+    if (inFile==NULL) {
         DISPLAY( "Pb opening %s\n", inFileName);
         return 11;
     }
@@ -873,14 +840,11 @@ int optimizeForSize(char* inFileName)
     benchedSize = (size_t) BMK_findMaxMem(inFileSize*3) / 3;
     if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
     if (benchedSize < inFileSize)
-    {
         DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize>>20));
-    }
 
     /* Alloc */
     origBuff = (char*) malloc((size_t)benchedSize);
-    if(!origBuff)
-    {
+    if(!origBuff) {
         DISPLAY("\nError: not enough memory!\n");
         fclose(inFile);
         return 12;
@@ -891,8 +855,7 @@ int optimizeForSize(char* inFileName)
     readSize = fread(origBuff, 1, benchedSize, inFile);
     fclose(inFile);
 
-    if(readSize != benchedSize)
-    {
+    if(readSize != benchedSize) {
         DISPLAY("\nError: problem reading file '%s' !!    \n", inFileName);
         free(origBuff);
         return 13;
@@ -916,9 +879,8 @@ int optimizeForSize(char* inFileName)
 
         /* find best solution from default params */
         {
-            const int maxSeeds = g_noSeed ? 1 : ZSTD_MAX_CLEVEL;
-            for (i=1; i<=maxSeeds; i++)
-            {
+            const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
+            for (i=1; i<=maxSeeds; i++) {
                 params = ZSTD_getParams(i, blockSize);
                 BMK_benchParam(&candidate, origBuff, benchedSize, ctx, params);
                 if ( (candidate.cSize < winner.result.cSize)
@@ -927,16 +889,14 @@ int optimizeForSize(char* inFileName)
                     winner.params = params;
                     winner.result = candidate;
                     BMK_printWinner(stdout, i, winner.result, winner.params, benchedSize);
-                }
-            }
+            }   }
         }
         BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
 
         /* start tests */
         {
             const int milliStart = BMK_GetMilliStart();
-            do
-            {
+            do {
                 params = winner.params;
                 paramVariation(&params);
                 potentialRandomParams(&params, 16);
@@ -950,13 +910,11 @@ int optimizeForSize(char* inFileName)
 
                 /* improvement found => new winner */
                 if ( (candidate.cSize < winner.result.cSize)
-                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) )
-                {
+                   ||((candidate.cSize == winner.result.cSize) && (candidate.cSpeed > winner.result.cSpeed)) ) {
                     winner.params = params;
                     winner.result = candidate;
                     BMK_printWinner(stdout, 99, winner.result, winner.params, benchedSize);
                 }
-
             } while (BMK_GetMilliSpan(milliStart) < g_grillDuration);
         }
 
@@ -972,7 +930,7 @@ int optimizeForSize(char* inFileName)
 }
 
 
-int usage(char* exename)
+static int usage(char* exename)
 {
     DISPLAY( "Usage :\n");
     DISPLAY( "      %s [arg] file\n", exename);
@@ -982,16 +940,17 @@ int usage(char* exename)
     return 0;
 }
 
-int usage_advanced(void)
+static int usage_advanced(void)
 {
     DISPLAY( "\nAdvanced options :\n");
     DISPLAY( " -i#    : iteration loops [1-9](default : %i)\n", NBLOOPS);
     DISPLAY( " -B#    : cut input into blocks of size # (default : single block)\n");
     DISPLAY( " -P#    : generated sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
+    DISPLAY( " -S     : Single run\n");
     return 0;
 }
 
-int badusage(char* exename)
+static int badusage(char* exename)
 {
     DISPLAY("Wrong parameters\n");
     usage(exename);
@@ -1008,6 +967,12 @@ int main(int argc, char** argv)
     U32 optimizer = 0;
     U32 main_pause = 0;
 
+    /* checks */
+    if (NB_LEVELS_TRACKED <= ZSTD_maxCLevel()) {
+        DISPLAY("Error : NB_LEVELS_TRACKED <= ZSTD_maxCLevel() \n");
+        exit(1);
+    }
+
     /* Welcome message */
     DISPLAY(WELCOME_MESSAGE);
 
@@ -1022,12 +987,10 @@ int main(int argc, char** argv)
         if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
 
         /* Decode command (note : aggregated commands are allowed) */
-        if (argument[0]=='-')
-        {
+        if (argument[0]=='-') {
             argument++;
 
-            while (argument[0]!=0)
-            {
+            while (argument[0]!=0) {
 
                 switch(argument[0])
                 {
@@ -1050,8 +1013,7 @@ int main(int argc, char** argv)
                     argument++;
                     {
                         U32 proba32 = 0;
-                        while ((argument[0]>= '0') && (argument[0]<= '9'))
-                        {
+                        while ((argument[0]>= '0') && (argument[0]<= '9')) {
                             proba32 *= 10;
                             proba32 += argument[0] - '0';
                             argument++;
@@ -1070,8 +1032,7 @@ int main(int argc, char** argv)
                     g_singleRun = 1;
                     argument++;
                     g_params = ZSTD_getParams(2, g_blockSize);
-                    for ( ; ; )
-                    {
+                    for ( ; ; ) {
                         switch(*argument)
                         {
                         case 'w':
@@ -1104,14 +1065,16 @@ int main(int argc, char** argv)
                             while ((*argument>= '0') && (*argument<='9'))
                                 g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0';
                             continue;
-                        case 't':  /* strategy */
-                            g_params.strategy = (ZSTD_strategy)0;
+                        case 't':  /* target length */
+                            g_params.targetLength = 0;
                             argument++;
                             while ((*argument>= '0') && (*argument<='9'))
-                            {
-                                g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy *10);
-                                g_params.strategy = (ZSTD_strategy)((U32)g_params.strategy + *argument++ - '0');
-                            }
+                                g_params.targetLength *= 10, g_params.targetLength += *argument++ - '0';
+                            continue;
+                        case 'S':  /* strategy */
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                                g_params.strategy = (ZSTD_strategy)(*argument++ - '0');
                             continue;
                         case 'L':
                             {
@@ -1132,8 +1095,7 @@ int main(int argc, char** argv)
                 case 'T':
                     argument++;
                     g_target = 0;
-                    while ((*argument >= '0') && (*argument <= '9'))
-                    {
+                    while ((*argument >= '0') && (*argument <= '9')) {
                         g_target *= 10;
                         g_target += *argument - '0';
                         argument++;
@@ -1167,8 +1129,7 @@ int main(int argc, char** argv)
 
     if (filenamesStart==0)
         result = benchSample();
-    else
-    {
+    else {
         if (optimizer)
             result = optimizeForSize(input_filename);
         else
@@ -1179,4 +1140,3 @@ int main(int argc, char** argv)
 
     return result;
 }
-
diff --git a/programs/playTests.sh b/programs/playTests.sh
index 5d641ec598d..ec625ee849e 100755
--- a/programs/playTests.sh
+++ b/programs/playTests.sh
@@ -16,28 +16,45 @@ roundTripTest() {
     rm -f tmp1 tmp2
     echo "roundTripTest: ./datagen $1 $p | $ZSTD -v$c | $ZSTD -d"
     ./datagen $1 $p | md5sum > tmp1
-    ./datagen $1 $p | $ZSTD -v$c | $ZSTD -d  | md5sum > tmp2
+    ./datagen $1 $p | $ZSTD -vq$c | $ZSTD -d  | md5sum > tmp2
     diff -q tmp1 tmp2
 }
 
 [ -n "$ZSTD" ] || die "ZSTD variable must be defined!"
 
-printf "\n**** frame concatenation **** "
+
+echo "\n**** simple tests **** "
+./datagen > tmp
+$ZSTD tmp
+$ZSTD -99 tmp && die "too large compression level undetected"
+$ZSTD tmp -c > tmpCompressed
+$ZSTD tmp --stdout > tmpCompressed
+$ZSTD -d tmpCompressed && die "wrong suffix error not detected!"
+$ZSTD -d tmpCompressed -c > tmpResult
+$ZSTD --decompress tmpCompressed -c > tmpResult
+$ZSTD --decompress tmpCompressed --stdout > tmpResult
+$ZSTD -q tmp && die "overwrite check failed!"
+$ZSTD -q -f tmp
+$ZSTD -q --force tmp
+
+
+echo "\n**** frame concatenation **** "
 
 echo "hello " > hello.tmp
 echo "world!" > world.tmp
 cat hello.tmp world.tmp > helloworld.tmp
-$ZSTD hello.tmp > hello.zstd
-$ZSTD world.tmp > world.zstd
+$ZSTD -c hello.tmp > hello.zstd
+$ZSTD -c world.tmp > world.zstd
 cat hello.zstd world.zstd > helloworld.zstd
-$ZSTD -df helloworld.zstd > result.tmp
+$ZSTD -dc helloworld.zstd > result.tmp
 cat result.tmp
 sdiff helloworld.tmp result.tmp
 rm ./*.tmp ./*.zstd
 
 echo frame concatenation test completed
 
-echo "**** flush write error test **** "
+
+echo "\n**** flush write error test **** "
 
 echo "echo foo | $ZSTD > /dev/full"
 echo foo | $ZSTD > /dev/full && die "write error not detected!"
@@ -45,30 +62,52 @@ echo "echo foo | $ZSTD | $ZSTD -d > /dev/full"
 echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!"
 
 
-echo "*** dictionary tests *** "
+echo "\n**** dictionary tests **** "
 
 ./datagen > tmpDict
 ./datagen -g1M | md5sum > tmp1
-./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dv | md5sum > tmp2
+./datagen -g1M | $ZSTD -D tmpDict | $ZSTD -D tmpDict -dvq | md5sum > tmp2
 diff -q tmp1 tmp2
 
-echo "*** multiple files tests *** "
+echo "\n**** multiple files tests **** "
 
 ./datagen -s1        > tmp1 2> /dev/null
 ./datagen -s2 -g100K > tmp2 2> /dev/null
 ./datagen -s3 -g1M   > tmp3 2> /dev/null
-$ZSTD -f -m tmp*
+$ZSTD -f tmp*
+echo "compress tmp* : "
 ls -ls tmp*
 rm tmp1 tmp2 tmp3
-$ZSTD -df -m *.zst
+echo "decompress tmp* : "
+$ZSTD -df *.zst
 ls -ls tmp*
-$ZSTD -f -m tmp1 notHere tmp2 && die "missing file not detected!"
-rm tmp*
+echo "compress tmp* into stdout > tmpall : "
+$ZSTD -c tmp1 tmp2 tmp3 > tmpall
+ls -ls tmp*
+echo "decompress tmpall* into stdout > tmpdec : "
+cp tmpall tmpall2
+$ZSTD -dc tmpall* > tmpdec
+ls -ls tmp*
+echo "compress multiple files including a missing one (notHere) : "
+$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
 
-echo "**** zstd round-trip tests **** "
+echo "\n**** integrity tests **** "
+echo "test one file (tmp1.zst) "
+$ZSTD -t tmp1.zst
+$ZSTD --test tmp1.zst
+echo "test multiple files (*.zst) "
+$ZSTD -t *.zst
+echo "test good and bad files (*) "
+$ZSTD -t * && die "bad files not detected !"
+
+echo "\n**** zstd round-trip tests **** "
 
 roundTripTest
-roundTripTest '' 6
+roundTripTest -g512K 6    # greedy, hash chain
+roundTripTest -g512K 16   # btlazy2 
+roundTripTest -g512K 19   # btopt
+
+rm tmp*
 
 if [ "$1" != "--test-large-data" ]; then
     echo "Skipping large data tests"
@@ -102,3 +141,6 @@ roundTripTest -g50000000 -P94 19
 
 roundTripTest -g99000000 -P99 20
 roundTripTest -g6000000000 -P99 q
+
+rm tmp*
+
diff --git a/programs/xxhash.c b/programs/xxhash.c
index d33113fe276..352d1e54067 100644
--- a/programs/xxhash.c
+++ b/programs/xxhash.c
@@ -175,7 +175,7 @@ static U64 XXH_read64(const void* memPtr)
     return val;
 }
 
-#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
 
 
 /* ****************************************
diff --git a/programs/zbufftest.c b/programs/zbufftest.c
index a8257dfc049..aa57b576aa1 100644
--- a/programs/zbufftest.c
+++ b/programs/zbufftest.c
@@ -41,7 +41,7 @@
 #include <sys/timeb.h>   /* timeb */
 #include <string.h>      /* strcmp */
 #include "mem.h"
-#include "zstd_buffered.h"
+#include "zbuff.h"
 #include "zstd.h"        /* ZSTD_compressBound() */
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"      /* XXH64 */
diff --git a/programs/zstd.1 b/programs/zstd.1
index 8d69c4ddfd9..27d607f5c31 100644
--- a/programs/zstd.1
+++ b/programs/zstd.1
@@ -14,7 +14,7 @@
 
 .SH SYNOPSIS
 .TP 5
-\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] <OUTPUT-FILE>
+\fBzstd\fR [\fBOPTIONS\fR] [-|INPUT-FILE] [-o <OUTPUT-FILE>]
 .PP
 .B unzstd
 is equivalent to
@@ -28,15 +28,13 @@ is equivalent to
 .SH DESCRIPTION
 .PP
 \fBzstd\fR is a fast lossless compression algorithm.
-It is based on the \fBLZ77\fR family, with FSE & huff0 entropy stage.
-zstd offers compression speed > 200 MB/s per core.
-It also features a fast decoder, with speed > 500 MB/s per core.
+It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages.
+\fBzstd\fR offers configurable compression speed, with fast modes at > 200 MB/s per core.
+It also features a very fast decoder, with speed > 500 MB/s per core.
 
 \fBzstd\fR command line is generally similar to gzip, but features the following differences :
  - Original files are preserved
- - By default, \fBzstd file1 file2\fR means : compress file1 \fBinto\fR file2.
-     Use \fB-m\fR command if you want : compress file1 into file1.zstd and file2 into file2.zst
- - By default, when compressing files, \fBzstd\fR displays advancement notification and result summary.
+ - By default, when compressing a single file, \fBzstd\fR displays progress notifications and result summary.
      Use \fB-q\fR to turn them off
 
 
@@ -45,21 +43,19 @@ It also features a fast decoder, with speed > 500 MB/s per core.
 .SH OPTIONS
 .TP
 .B \-#
- # compression level [1-19](default:1)
+ # compression level [1-21] (default:1)
 .TP
-.B \-d
+.BR \-d ", " --decompress
  decompression
 .TP
-.B \-f
- overwrite output without prompting
+.B \-D file
+ use `file` as Dictionary to compress or decompress FILE(s)
 .TP
-.BR \-m ", " --multiple
- multiple files mode
- In this mode, multiple files on the command line means compression or decompression of each named file
- Notifications are also turned off by default
+.B \-o file
+ save result into `file` (only possible with a single input FILE)
 .TP
-.B \-D
- Use next file as dictionary content for compress / decompression
+.BR \-f ", " --force
+ overwrite output without prompting
 .TP
 .BR \-h/\-H ", " --help
  display help/long help and exit
@@ -73,17 +69,47 @@ It also features a fast decoder, with speed > 500 MB/s per core.
 .BR \-q ", " --quiet
  suppress warnings and notifications; specify twice to suppress errors too
 .TP
-.B \-c 
+.BR \-c ", " --stdout
  force write to standard output, even if it is the console
+
+.SH DICTIONARY
+.PP
+\fBzstd\fR offers \fIdictionary\fR compression, useful for very small files and messages.
+It's possible to train \fBzstd\fR with some samples, the result of which is saved into a file called `dictionary`.
+Then during compression and decompression, make reference to the same dictionary.
+It will improve compression ratio of small files.
+Typical gains range from ~10% (at 64KB) to x5 better (at <1KB).
+.TP
+.B \--train FILEs
+ use FILEs as training set to create a dictionary.
+ The training set should contain a lot of small files (> 100).
+ and weight typically 100x the target dictionary size
+ (for example, 10 MB for a 100 KB dictionary)
+.TP
+.B \-o file
+ dictionary saved into `file` (default: dictionary)
 .TP
-.B \-z
- force compression
+.B \--maxdict #
+ limit dictionary to specified size (default : 112640) 
+.TP
+.B \-s#
+ dictionary selectivity level (default: 9)
+ the smaller the value, the denser the dictionary, improving its efficiency but reducing its possible maximum size.
+
+.SH BENCHMARK
 .TP
 .B \-b#
  benchmark file(s) using compression level #
 .TP
 .B \-i#
  iteration loops [1-9](default : 3), benchmark mode only
+.TP
+.B \-B#
+ cut file into independent blocks of size # (default: no block)
+.TP
+.B \-r#
+ test all compression levels from 1 to # (default: disabled)
+
 
 .SH BUGS
 Report bugs at:- https://github.com/Cyan4973/zstd/issues
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 1fe1b8b3204..abe13013c3e 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -1,6 +1,6 @@
 /*
   zstdcli - Command Line Interface (cli) for zstd
-  Copyright (C) Yann Collet 2014-2015
+  Copyright (C) Yann Collet 2014-2016
 
   GPL v2 License
 
@@ -19,25 +19,23 @@
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
   You can contact the author at :
-  - zstd source repository : https://github.com/Cyan4973/zstd
-  - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
+  - zstd homepage : http://www.zstd.net/
 */
 /*
-  Note : this is user program.
-  It is not part of zstd compression library.
-  The license of this compression CLI program is GPLv2.
-  The license of zstd library is BSD.
+  Note : this is user program, not part of libzstd.
+  The license of this command line program is GPLv2.
+  The license of libzstd is BSD.
 */
 
 
-/**************************************
+/*-************************************
 *  Compiler Options
 **************************************/
 #define _CRT_SECURE_NO_WARNINGS  /* Visual : removes warning from strcpy */
 #define _POSIX_SOURCE 1          /* triggers fileno() within <stdio.h> on unix */
 
 
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include <stdio.h>    /* fprintf, getchar */
@@ -47,18 +45,18 @@
 #ifndef ZSTD_NOBENCH
 #  include "bench.h"  /* BMK_benchFiles, BMK_SetNbIterations */
 #endif
-#include "zstd.h"     /* ZSTD version numbers */
+#include "zstd_static.h" /* ZSTD_maxCLevel, ZSTD version numbers  */
+#ifndef ZSTD_NODICT
+#  include "dibio.h"  /* BMK_benchFiles, BMK_SetNbIterations */
+#endif
 
 
-/**************************************
+/*-************************************
 *  OS-specific Includes
 **************************************/
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
 #  include <fcntl.h>    /* _O_BINARY */
 #  include <io.h>       /* _setmode, _isatty */
-#  ifdef __MINGW32__
-   /* int _fileno(FILE *stream);   // seems no longer useful // MINGW somehow forgets to include this windows declaration into <stdio.h> */
-#  endif
 #  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
 #else
@@ -68,7 +66,7 @@
 #endif
 
 
-/**************************************
+/*-************************************
 *  Constants
 **************************************/
 #define COMPRESSOR_NAME "zstd command line interface"
@@ -78,7 +76,8 @@
 #  define ZSTD_VERSION "v" EXPAND_AND_QUOTE(ZSTD_VERSION_MAJOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_MINOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_RELEASE)
 #endif
 #define AUTHOR "Yann Collet"
-#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__
+#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR
+
 #define ZSTD_EXTENSION ".zst"
 #define ZSTD_CAT "zstdcat"
 #define ZSTD_UNZSTD "unzstd"
@@ -87,46 +86,36 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
 
+static const char* g_defaultDictName = "dictionary";
+static const unsigned g_defaultMaxDictSize = 110 KB;
+static const unsigned g_defaultDictCLevel = 5;
+static const unsigned g_defaultSelectivityLevel = 9;
+
 
-/**************************************
+/*-************************************
 *  Display Macros
 **************************************/
 #define DISPLAY(...)           fprintf(displayOut, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...)   if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
 static FILE* displayOut;
-static unsigned displayLevel = 2;   // 0 : no display  // 1: errors  // 2 : + result + interaction + warnings ;  // 3 : + progression;  // 4 : + information
-
-
-/**************************************
-*  Exceptions
-**************************************/
-#define DEBUG 0
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...)                                             \
-{                                                                         \
-    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
-    DISPLAYLEVEL(1, "Error %i : ", error);                                \
-    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
-    DISPLAYLEVEL(1, "\n");                                                \
-    exit(error);                                                          \
-}
+static unsigned displayLevel = 2;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
 
 
-/**************************************
+/*-************************************
 *  Command Line
 **************************************/
 static int usage(const char* programName)
 {
     DISPLAY( "Usage :\n");
-    DISPLAY( "      %s [arg] [input] [output]\n", programName);
+    DISPLAY( "      %s [args] [FILE(s)] [-o file]\n", programName);
     DISPLAY( "\n");
-    DISPLAY( "input   : a filename\n");
+    DISPLAY( "FILE    : a filename\n");
     DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
     DISPLAY( "Arguments :\n");
-    DISPLAY( " -#     : # compression level (1-19, default:1) \n");
+    DISPLAY( " -#     : # compression level (1-%u, default:1) \n", ZSTD_maxCLevel());
     DISPLAY( " -d     : decompression \n");
     DISPLAY( " -D file: use `file` as Dictionary \n");
-    //DISPLAY( " -z     : force compression\n");
+    DISPLAY( " -o file: result stored into `file` (only if 1 input file) \n");
     DISPLAY( " -f     : overwrite output without prompting \n");
     DISPLAY( " -h/-H  : display help/long help and exit\n");
     return 0;
@@ -139,16 +128,23 @@ static int usage_advanced(const char* programName)
     DISPLAY( "\n");
     DISPLAY( "Advanced arguments :\n");
     DISPLAY( " -V     : display Version number and exit\n");
+    DISPLAY( " -t     : test compressed file integrity \n");
     DISPLAY( " -v     : verbose mode\n");
     DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
-    DISPLAY( " -m     : multiple input filenames mode \n");
     DISPLAY( " -c     : force write to standard output, even if it is the console\n");
+#ifndef ZSTD_NODICT
+    DISPLAY( "Dictionary builder :\n");
+    DISPLAY( "--train : create a dictionary from a training set of files \n");
+    DISPLAY( " -o file: `file` is dictionary name (default: %s) \n", g_defaultDictName);
+    DISPLAY( "--maxdict:limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
+    DISPLAY( " -s#    : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
+#endif
 #ifndef ZSTD_NOBENCH
     DISPLAY( "Benchmark arguments :\n");
     DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
-    DISPLAY( " -B#    : cut file into independent blocks of size # (default : no block)\n");
     DISPLAY( " -i#    : iteration loops [1-9](default : 3)\n");
-    DISPLAY( " -r#    : test all compression levels from 1 to # (default : disabled)\n");
+    DISPLAY( " -B#    : cut file into independent blocks of size # (default: no block)\n");
+    DISPLAY( " -r#    : test all compression levels from 1 to # (default: disabled)\n");
 #endif
     return 0;
 }
@@ -178,8 +174,10 @@ int main(int argCount, const char** argv)
         forceStdout=0,
         main_pause=0,
         nextEntryIsDictionary=0,
-        multiple=0,
-        operationResult=0;
+        operationResult=0,
+        dictBuild=0,
+        nextArgumentIsOutFileName=0,
+        nextArgumentIsMaxDict=0;
     unsigned cLevel = 1;
     const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));   /* argCount >= 1 */
     unsigned filenameIdx = 0;
@@ -187,11 +185,13 @@ int main(int argCount, const char** argv)
     const char* outFileName = NULL;
     const char* dictFileName = NULL;
     char* dynNameSpace = NULL;
-    const char extension[] = ZSTD_EXTENSION;
     int rangeBench = 1;
+    unsigned maxDictSize = g_defaultMaxDictSize;
+    unsigned dictCLevel = g_defaultDictCLevel;
+    unsigned dictSelect = g_defaultSelectivityLevel;
 
     /* init */
-    (void)rangeBench;   /* not used when ZSTD_NOBENCH set */
+    (void)rangeBench; (void)dictCLevel;   /* not used when ZSTD_NOBENCH / ZSTD_NODICT set */
     if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); }
     displayOut = stderr;
     /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */
@@ -203,43 +203,46 @@ int main(int argCount, const char** argv)
     if (!strcmp(programName, ZSTD_CAT)) { decode=1; forceStdout=1; displayLevel=1; outFileName=stdoutmark; }
 
     /* command switches */
-    for(i=1; i<argCount; i++)
-    {
+    for(i=1; i<argCount; i++) {
         const char* argument = argv[i];
-
         if(!argument) continue;   /* Protection if argument empty */
 
         /* long commands (--long-word) */
+        if (!strcmp(argument, "--decompress")) { decode=1; continue; }
+        if (!strcmp(argument, "--force")) {  FIO_overwriteMode(); continue; }
         if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; }
         if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); }
-        if (!strcmp(argument, "--multiple")) { multiple=1; continue; }
         if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; }
         if (!strcmp(argument, "--quiet")) { displayLevel--; continue; }
+        if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; displayLevel=1; continue; }
+        if (!strcmp(argument, "--test")) { decode=1; outFileName=nulmark; FIO_overwriteMode(); continue; }
+        if (!strcmp(argument, "--train")) { dictBuild=1; outFileName=g_defaultDictName; continue; }
+        if (!strcmp(argument, "--maxdict")) { nextArgumentIsMaxDict=1; continue; }
+        if (!strcmp(argument, "--keep")) { continue; }   /* does nothing, since preserving input is default; for gzip/xz compatibility */
+
+        /* '-' means stdin/stdout */
+        if (!strcmp(argument, "-")){
+            if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; }
+            outFileName=stdoutmark; continue;
+        }
 
         /* Decode commands (note : aggregated commands are allowed) */
-        if (argument[0]=='-')
-        {
-            /* '-' means stdin/stdout */
-            if (argument[1]==0)
-            {
-                if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; }
-                outFileName=stdoutmark; continue;
-            }
-
+        if (argument[0]=='-') {
             argument++;
 
-            while (argument[0]!=0)
-            {
+            while (argument[0]!=0) {
+
                 /* compression Level */
-                if ((*argument>='0') && (*argument<='9'))
-                {
+                if ((*argument>='0') && (*argument<='9')) {
                     cLevel = 0;
-                    while ((*argument >= '0') && (*argument <= '9'))
-                    {
+                    while ((*argument >= '0') && (*argument <= '9')) {
                         cLevel *= 10;
                         cLevel += *argument - '0';
                         argument++;
                     }
+                    dictCLevel = cLevel;
+                    if (dictCLevel > ZSTD_maxCLevel())
+                        return badusage(programName);
                     continue;
                 }
 
@@ -250,24 +253,15 @@ int main(int argCount, const char** argv)
                 case 'H':
                 case 'h': displayOut=stdout; return usage_advanced(programName);
 
-                    /* Compression (default) */
-                //case 'z': forceCompress = 1; break;
-
-                    /* Decoding */
+                     /* Decoding */
                 case 'd': decode=1; argument++; break;
 
-                    /* Multiple input files */
-                case 'm': multiple=1; argument++; break;
-
                     /* Force stdout, even if stdout==console */
                 case 'c': forceStdout=1; outFileName=stdoutmark; displayLevel=1; argument++; break;
 
                     /* Use file content as dictionary */
                 case 'D': nextEntryIsDictionary = 1; argument++; break;
 
-                    /* Test -- not implemented */
-                /* case 't': decode=1; LZ4IO_setOverwrite(1); output_filename=nulmark; break; */
-
                     /* Overwrite */
                 case 'f': FIO_overwriteMode(); argument++; break;
 
@@ -280,6 +274,12 @@ int main(int argCount, const char** argv)
                     /* keep source file (default anyway, so useless; for gzip/xz compatibility) */
                 case 'k': argument++; break;
 
+                    /* test compressed file */
+                case 't': decode=1; outFileName=nulmark; FIO_overwriteMode(); argument++; break;
+
+                    /* dictionary name */
+                case 'o': nextArgumentIsOutFileName=1; argument++; break;
+
 #ifndef ZSTD_NOBENCH
                     /* Benchmark */
                 case 'b': bench=1; argument++; break;
@@ -316,6 +316,13 @@ int main(int argCount, const char** argv)
                         break;
 #endif   /* ZSTD_NOBENCH */
 
+                    /* Selection level */
+                case 's': argument++;
+                    dictSelect = 0;
+                    while ((*argument >= '0') && (*argument <= '9'))
+                        dictSelect *= 10, dictSelect += *argument++ - '0';
+                    break;
+
                     /* Pause at the end (hidden option) */
                 case 'p': main_pause=1; argument++; break;
 
@@ -326,14 +333,29 @@ int main(int argCount, const char** argv)
             continue;
         }
 
-        /* dictionary */
-        if (nextEntryIsDictionary)
-        {
+        if (nextEntryIsDictionary) {
             nextEntryIsDictionary = 0;
             dictFileName = argument;
             continue;
         }
 
+        if (nextArgumentIsOutFileName) {
+            nextArgumentIsOutFileName = 0;
+            outFileName = argument;
+            if (!strcmp(outFileName, "-")) outFileName = stdoutmark;
+            continue;
+        }
+
+        if (nextArgumentIsMaxDict) {
+            nextArgumentIsMaxDict = 0;
+            maxDictSize = 0;
+            while ((*argument>='0') && (*argument<='9'))
+                maxDictSize = maxDictSize * 10 + (*argument - '0'), argument++;
+            if (*argument=='k' || *argument=='K')
+                maxDictSize <<= 10;
+            continue;
+        }
+
         /* add filename to list */
         filenameTable[filenameIdx++] = argument;
     }
@@ -342,81 +364,54 @@ int main(int argCount, const char** argv)
     DISPLAYLEVEL(3, WELCOME_MESSAGE);
 
     /* Check if benchmark is selected */
-    if (bench)
-    {
+    if (bench) {
 #ifndef ZSTD_NOBENCH
         BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel*rangeBench);
 #endif
         goto _end;
     }
 
-    /* No input filename ==> use stdin */
-    if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark;
-
-    /* Check if input defined as console; trigger an error in this case */
-    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
-
-    /* No output filename ==> try to select one automatically (when possible) */
-    if (filenameIdx>=2) outFileName = filenameTable[1];
-    while (!outFileName)   /* while : just to allow break statement */
-    {
-        if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; }   /* Default to stdout whenever possible (i.e. not a console) */
-        if (!decode)   /* compression to file */
-        {
-            size_t l = strlen(filenameTable[0]);
-            dynNameSpace = (char*)calloc(1,l+5);
-            if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
-            strcpy(dynNameSpace, filenameTable[0]);
-            strcpy(dynNameSpace+l, ZSTD_EXTENSION);
-            outFileName = dynNameSpace;
-            DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName);
-            break;
-        }
-        /* decompression to file (automatic name will work only if input filename has correct format extension) */
-        {
-            size_t filenameSize = strlen(filenameTable[0]);
-            if (strcmp(filenameTable[0] + (filenameSize-4), extension))
-            {
-                 DISPLAYLEVEL(1, "unknown suffix - cannot determine destination filename\n");
-                 return badusage(programName);
-            }
-            dynNameSpace = (char*)calloc(1,filenameSize+1);
-            if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); }
-            outFileName = dynNameSpace;
-            strcpy(dynNameSpace, filenameTable[0]);
-            dynNameSpace[filenameSize-4]=0;
-            DISPLAYLEVEL(2, "Decoding file %s \n", outFileName);
-        }
+    /* Check if dictionary builder is selected */
+    if (dictBuild) {
+#ifndef ZSTD_NODICT
+        ZDICT_params_t dictParams;
+        dictParams.compressionLevel = dictCLevel;
+        dictParams.selectivityLevel = dictSelect;
+        dictParams.notificationLevel = displayLevel;
+        DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
+#endif
+        goto _end;
     }
 
-    /* Check if output is defined as console; trigger an error in this case */
-    if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
+    /* No input filename ==> use stdin and stdout */
+    if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark, outFileName=stdoutmark;
 
-    /* No warning message in pure pipe mode (stdin + stdout) or multiple mode */
-    if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
-    if (multiple && (displayLevel==2)) displayLevel=1;
+    /* Check if input/output defined as console; trigger an error in this case */
+    if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName);
+    if (outFileName && !strcmp(outFileName, stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName);
 
-    if ((!multiple) && (filenameIdx>2))
-    {
-        DISPLAY("Too many files on the command line (%u > 2). Do you mean -m ? \n", filenameIdx);
+    /* user-selected output filename, only possible with a single file */
+    if (outFileName && strcmp(outFileName,stdoutmark) && strcmp(outFileName,nulmark) && (filenameIdx>1)) {
+        DISPLAY("Too many files (%u) on the command line. \n", filenameIdx);
         return filenameIdx;
     }
 
+    /* No warning message in pipe mode (stdin + stdout) or multiple mode */
+    if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1;
+    if ((filenameIdx>1) && (displayLevel==2)) displayLevel=1;
+
     /* IO Stream/File */
     FIO_setNotificationLevel(displayLevel);
-    if (decode)
-    {
-      if (multiple)
-        operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName);
-      else
+    if (decode) {
+      if (filenameIdx==1 && outFileName)
         operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName);
-    }
-    else
-    {
-        if (multiple)
-          operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName, cLevel);
-        else
+      else
+        operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName);
+    } else {  /* compression */
+        if (filenameIdx==1 && outFileName)
           operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel);
+        else
+          operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, outFileName ? outFileName : ZSTD_EXTENSION, dictFileName, cLevel);
     }
 
 _end:
diff --git a/visual/2013/dictBuilder/dictBuilder.vcxproj b/visual/2013/dictBuilder/dictBuilder.vcxproj
deleted file mode 100644
index 0260102b560..00000000000
--- a/visual/2013/dictBuilder/dictBuilder.vcxproj
+++ /dev/null
@@ -1,183 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <ProjectGuid>{D4C01A3D-F609-4DA6-B53F-88D063CCE993}</ProjectGuid>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>fuzzer</RootNamespace>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v120</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\legacy;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-    <IncludePath>$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib;$(VCInstallDir)include;$(VCInstallDir)atlmfc\include;$(WindowsSDK_IncludePath);</IncludePath>
-    <RunCodeAnalysis>true</RunCodeAnalysis>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <EnablePREfast>true</EnablePREfast>
-      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>setargv.obj;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <WarningLevel>Level4</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <EnablePREfast>true</EnablePREfast>
-      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <EnablePREfast>true</EnablePREfast>
-      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level4</WarningLevel>
-      <PrecompiledHeader>
-      </PrecompiledHeader>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <EnablePREfast>true</EnablePREfast>
-      <AdditionalOptions>/analyze:stacksize25000 %(AdditionalOptions)</AdditionalOptions>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\dictBuilder\dibcli.c" />
-    <ClCompile Include="..\..\..\dictBuilder\dictBuilder.c" />
-    <ClCompile Include="..\..\..\dictBuilder\divsufsort.c" />
-    <ClCompile Include="..\..\..\dictBuilder\sssort.c" />
-    <ClCompile Include="..\..\..\dictBuilder\trsort.c" />
-    <ClCompile Include="..\..\..\dictBuilder\utils.c" />
-    <ClCompile Include="..\..\..\lib\fse.c" />
-    <ClCompile Include="..\..\..\lib\huff0.c" />
-    <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\dictBuilder\config.h" />
-    <ClInclude Include="..\..\..\dictBuilder\dictBuilder.h" />
-    <ClInclude Include="..\..\..\dictBuilder\divsufsort.h" />
-    <ClInclude Include="..\..\..\dictBuilder\divsufsort_private.h" />
-    <ClInclude Include="..\..\..\dictBuilder\lfs.h" />
-    <ClInclude Include="..\..\..\lib\fse.h" />
-    <ClInclude Include="..\..\..\lib\huff0.h" />
-    <ClInclude Include="..\..\..\lib\huff0_static.h" />
-    <ClInclude Include="..\..\..\lib\zstd.h" />
-  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/visual/2013/dictBuilder/dictBuilder.vcxproj.filters b/visual/2013/dictBuilder/dictBuilder.vcxproj.filters
deleted file mode 100644
index a38cc30e9fb..00000000000
--- a/visual/2013/dictBuilder/dictBuilder.vcxproj.filters
+++ /dev/null
@@ -1,75 +0,0 @@
-﻿<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup>
-    <Filter Include="Fichiers sources">
-      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
-      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
-    </Filter>
-    <Filter Include="Fichiers d%27en-tête">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Fichiers de ressources">
-      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
-      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="..\..\..\dictBuilder\dibcli.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\dictBuilder\dictBuilder.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\dictBuilder\divsufsort.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\dictBuilder\sssort.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\dictBuilder\trsort.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\dictBuilder\utils.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\fse.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\huff0.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_decompress.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\dictBuilder\config.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\dictBuilder\dictBuilder.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\dictBuilder\divsufsort.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\dictBuilder\divsufsort_private.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\dictBuilder\lfs.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\fse.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\huff0.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\huff0_static.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd.h">
-      <Filter>Fichiers d%27en-tête</Filter>
-    </ClInclude>
-  </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/visual/2013/fullbench/fullbench.vcxproj b/visual/2013/fullbench/fullbench.vcxproj
index c0d737682ec..3797960f460 100644
--- a/visual/2013/fullbench/fullbench.vcxproj
+++ b/visual/2013/fullbench/fullbench.vcxproj
@@ -161,9 +161,6 @@
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\fse.c" />
     <ClCompile Include="..\..\..\lib\huff0.c" />
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
diff --git a/visual/2013/fullbench/fullbench.vcxproj.filters b/visual/2013/fullbench/fullbench.vcxproj.filters
index c3db1976806..3a82000fb55 100644
--- a/visual/2013/fullbench/fullbench.vcxproj.filters
+++ b/visual/2013/fullbench/fullbench.vcxproj.filters
@@ -24,24 +24,15 @@
     <ClCompile Include="..\..\..\programs\datagen.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\lib\huff0.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\lib\zstd_compress.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\lib\zstd_decompress.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\fse.h">
diff --git a/visual/2013/zstd.sln b/visual/2013/zstd.sln
index c132ddda639..3186fc67089 100644
--- a/visual/2013/zstd.sln
+++ b/visual/2013/zstd.sln
@@ -1,7 +1,7 @@
 ﻿
 Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 14
-VisualStudioVersion = 14.0.24720.0
+# Visual Studio 2013
+VisualStudioVersion = 12.0.40629.0
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcxproj", "{4E52A41A-F33B-4C7A-8C36-A1A6B4F4277C}"
 EndProject
@@ -11,8 +11,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\full
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdlib", "zstdlib\zstdlib.vcxproj", "{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dictBuilder", "dictBuilder\dictBuilder.vcxproj", "{D4C01A3D-F609-4DA6-B53F-88D063CCE993}"
-EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@@ -53,14 +51,6 @@ Global
 		{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|Win32.Build.0 = Release|Win32
 		{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|x64.ActiveCfg = Release|x64
 		{8BFD8150-94D5-4BF9-8A50-7BD9929A0850}.Release|x64.Build.0 = Release|x64
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|Win32.ActiveCfg = Debug|Win32
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|Win32.Build.0 = Debug|Win32
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|x64.ActiveCfg = Debug|x64
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Debug|x64.Build.0 = Debug|x64
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|Win32.ActiveCfg = Release|Win32
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|Win32.Build.0 = Release|Win32
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|x64.ActiveCfg = Release|x64
-		{D4C01A3D-F609-4DA6-B53F-88D063CCE993}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/visual/2013/zstd/zstd.vcxproj b/visual/2013/zstd/zstd.vcxproj
index 1c3f0d1f244..45319769081 100644
--- a/visual/2013/zstd/zstd.vcxproj
+++ b/visual/2013/zstd/zstd.vcxproj
@@ -19,23 +19,27 @@
     </ProjectConfiguration>
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="..\..\..\lib\divsufsort.c" />
     <ClCompile Include="..\..\..\lib\fse.c" />
     <ClCompile Include="..\..\..\lib\huff0.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v02.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c" />
     <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c" />
-    <ClCompile Include="..\..\..\lib\zstd_buffered.c" />
+    <ClCompile Include="..\..\..\lib\zbuff.c" />
+    <ClCompile Include="..\..\..\lib\zdict.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
     <ClCompile Include="..\..\..\programs\bench.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
+    <ClCompile Include="..\..\..\programs\dibio.c" />
     <ClCompile Include="..\..\..\programs\fileio.c" />
     <ClCompile Include="..\..\..\programs\legacy\fileio_legacy.c" />
     <ClCompile Include="..\..\..\programs\xxhash.c" />
     <ClCompile Include="..\..\..\programs\zstdcli.c" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="..\..\..\lib\divsufsort.h" />
     <ClInclude Include="..\..\..\lib\fse.h" />
     <ClInclude Include="..\..\..\lib\fse_static.h" />
     <ClInclude Include="..\..\..\lib\huff0.h" />
@@ -45,6 +49,10 @@
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v03.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h" />
+    <ClInclude Include="..\..\..\lib\zbuff.h" />
+    <ClInclude Include="..\..\..\lib\zbuff_static.h" />
+    <ClInclude Include="..\..\..\lib\zdict.h" />
+    <ClInclude Include="..\..\..\lib\zdict_static.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
     <ClInclude Include="..\..\..\lib\zstd_buffered.h" />
     <ClInclude Include="..\..\..\lib\zstd_buffered_static.h" />
@@ -52,6 +60,7 @@
     <ClInclude Include="..\..\..\lib\zstd_static.h" />
     <ClInclude Include="..\..\..\programs\bench.h" />
     <ClInclude Include="..\..\..\programs\datagen.h" />
+    <ClInclude Include="..\..\..\programs\dibio.h" />
     <ClInclude Include="..\..\..\programs\fileio.h" />
     <ClInclude Include="..\..\..\programs\legacy\fileio_legacy.h" />
     <ClInclude Include="..\..\..\programs\xxhash.h" />
diff --git a/visual/2013/zstd/zstd.vcxproj.filters b/visual/2013/zstd/zstd.vcxproj.filters
index e8ab9fe5878..31a97801987 100644
--- a/visual/2013/zstd/zstd.vcxproj.filters
+++ b/visual/2013/zstd/zstd.vcxproj.filters
@@ -48,9 +48,6 @@
     <ClCompile Include="..\..\..\lib\zstd_decompress.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_buffered.c">
-      <Filter>Fichiers sources</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\lib\legacy\zstd_v03.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
@@ -60,6 +57,18 @@
     <ClCompile Include="..\..\..\lib\legacy\zstd_v04.c">
       <Filter>Fichiers sources</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\lib\divsufsort.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zbuff.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\lib\zdict.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\programs\dibio.c">
+      <Filter>Fichiers sources</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\fse.h">
@@ -119,5 +128,23 @@
     <ClInclude Include="..\..\..\lib\legacy\zstd_v04.h">
       <Filter>Fichiers d%27en-tête</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\lib\divsufsort.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zbuff.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zbuff_static.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zdict.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\lib\zdict_static.h">
+      <Filter>Fichiers sources</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\programs\dibio.h">
+      <Filter>Fichiers d%27en-tête</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/visual/2013/zstdlib/zstdlib.vcxproj b/visual/2013/zstdlib/zstdlib.vcxproj
index 05a03ab8dba..b13bc98ff7c 100644
--- a/visual/2013/zstdlib/zstdlib.vcxproj
+++ b/visual/2013/zstdlib/zstdlib.vcxproj
@@ -21,7 +21,7 @@
   <ItemGroup>
     <ClCompile Include="..\..\..\lib\fse.c" />
     <ClCompile Include="..\..\..\lib\huff0.c" />
-    <ClCompile Include="..\..\..\lib\zstd_buffered.c" />
+    <ClCompile Include="..\..\..\lib\zbuff.c" />
     <ClCompile Include="..\..\..\lib\zstd_compress.c" />
     <ClCompile Include="..\..\..\lib\zstd_decompress.c" />
   </ItemGroup>
@@ -34,9 +34,9 @@
     <ClInclude Include="..\..\..\lib\huff0.h" />
     <ClInclude Include="..\..\..\lib\huff0_static.h" />
     <ClInclude Include="..\..\..\lib\mem.h" />
+    <ClInclude Include="..\..\..\lib\zbuff.h" />
+    <ClInclude Include="..\..\..\lib\zbuff_static.h" />
     <ClInclude Include="..\..\..\lib\zstd.h" />
-    <ClInclude Include="..\..\..\lib\zstd_buffered.h" />
-    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h" />
     <ClInclude Include="..\..\..\lib\zstd_internal.h" />
     <ClInclude Include="..\..\..\lib\zstd_static.h" />
     <ClInclude Include="resource.h" />
diff --git a/visual/2013/zstdlib/zstdlib.vcxproj.filters b/visual/2013/zstdlib/zstdlib.vcxproj.filters
index e4bdf7dbff1..ffb457b0b15 100644
--- a/visual/2013/zstdlib/zstdlib.vcxproj.filters
+++ b/visual/2013/zstdlib/zstdlib.vcxproj.filters
@@ -21,15 +21,15 @@
     <ClCompile Include="..\..\..\lib\huff0.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\lib\zstd_buffered.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\lib\zstd_compress.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\lib\zstd_decompress.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\lib\zbuff.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\..\lib\fse.h">
@@ -59,19 +59,19 @@
     <ClInclude Include="..\..\..\lib\zstd_internal.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_buffered.h">
+    <ClInclude Include="..\..\..\lib\mem.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\lib\zstd_buffered_static.h">
+    <ClInclude Include="..\..\..\lib\error_private.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\lib\mem.h">
+    <ClInclude Include="..\..\..\lib\error_public.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\lib\error_private.h">
+    <ClInclude Include="..\..\..\lib\zbuff.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\lib\error_public.h">
+    <ClInclude Include="..\..\..\lib\zbuff_static.h">
       <Filter>Header Files</Filter>
     </ClInclude>
   </ItemGroup>