From 7a3ab588c750404048a6f3dff732bfeb0d100510 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 15 Dec 2015 11:25:12 +0100 Subject: [PATCH 01/19] minor fixes --- programs/fileio.c | 4 ++-- programs/zstdcli.c | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 81635b934cf..05bdb21ee67 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -270,7 +270,7 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* int seekResult; if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); - seekResult = fseek(dictHandle, (size_t)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ + seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); dictSize = MAX_DICT_SIZE; } @@ -417,7 +417,7 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha int seekResult; if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); - seekResult = fseek(dictHandle, dictSize-MAX_DICT_SIZE, SEEK_SET); /* use end of file */ + seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); dictSize = MAX_DICT_SIZE; } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index e04e40fd001..b8c1310653d 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -43,23 +43,25 @@ #include /* fprintf, getchar */ #include /* exit, calloc, free */ #include /* strcmp, strlen */ -#include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ #include "fileio.h" +#ifndef ZSTD_NOBENCH +# include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ +#endif /************************************** * OS-specific Includes **************************************/ #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) -# include // _O_BINARY -# include // _setmode, _isatty +# include /* _O_BINARY */ +# include /* _setmode, _isatty */ # ifdef __MINGW32__ /* int _fileno(FILE *stream); // seems no longer useful // MINGW somehow forgets to include this windows declaration into */ # endif # define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) # define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) #else -# include // isatty +# include /* isatty */ # define SET_BINARY_MODE(file) # define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) #endif From ed699e692d59f3f4857837a7ee0a1f40e0695b0e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 02:37:24 +0100 Subject: [PATCH 02/19] benchmark multiple files --- programs/bench.c | 278 ++++++++++++++++++++++++----------------------- 1 file changed, 143 insertions(+), 135 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index c48112f1d5b..f55219b4b76 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -72,6 +72,10 @@ # define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) #endif +#ifdef _MSC_VER +#define snprintf sprintf_s +#endif + /* ************************************* * Constants @@ -90,9 +94,28 @@ static U32 g_compressibilityDefault = 50; /* ************************************* -* Macros +* console display ***************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ + + +/* ************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, "\n"); \ + exit(error); \ +} /* ************************************* @@ -156,13 +179,27 @@ static int BMK_GetMilliSpan( int nTimeStart ) return nSpan; } +static U64 BMK_getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); +#else + struct stat statbuf; + r = stat(infilename, &statbuf); +#endif + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + /* ******************************************************** * Bench functions **********************************************************/ typedef struct { - char* srcPtr; + const char* srcPtr; size_t srcSize; char* cPtr; size_t cRoom; @@ -175,53 +212,52 @@ typedef size_t (*compressor_t) (void* dst, size_t maxDstSize, const void* src, s #define MIN(a,b) ((a)<(b) ? (a) : (b)) -static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, int cLevel) +static int BMK_benchMem(const void* srcBuffer, size_t srcSize, + const char* displayName, int cLevel, + const char** fileNames, U32 nbFiles) { const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ - const U32 nbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize); - blockParam_t* const blockTable = (blockParam_t*) malloc(nbBlocks * sizeof(blockParam_t)); - const size_t maxCompressedSize = (size_t)nbBlocks * ZSTD_compressBound(blockSize); + const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); + const size_t maxCompressedSize = (size_t)maxNbBlocks * ZSTD_compressBound(blockSize); void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); const compressor_t compressor = ZSTD_compress; - U64 crcOrig; + U64 crcOrig = XXH64(srcBuffer, srcSize, 0); + U32 nbBlocks = 0; /* init */ - if (strlen(fileName)>16) - fileName += strlen(fileName)-16; + if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ /* Memory allocation & restrictions */ if (!compressedBuffer || !resultBuffer || !blockTable) - { - DISPLAY("\nError: not enough memory!\n"); - free(compressedBuffer); - free(resultBuffer); - free(blockTable); - return 12; - } - - /* Calculating input Checksum */ - crcOrig = XXH64(srcBuffer, srcSize, 0); + EXM_THROW(31, "not enough memory"); /* Init blockTable data */ { - U32 i; - size_t remaining = srcSize; - char* srcPtr = (char*)srcBuffer; + U32 fileNb; + const char* srcPtr = (const char*)srcBuffer; char* cPtr = (char*)compressedBuffer; char* resPtr = (char*)resultBuffer; - for (i=0; i=2) ? BMK_getFileSize(fileNames[fileNb]) : srcSize; + size_t remaining = (size_t)fileSize; + U32 nbBlocksforThisFile = (U32)((fileSize + (blockSize-1)) / blockSize); + U32 blockEnd = nbBlocks + nbBlocksforThisFile; + for ( ; nbBlocks\r", loopNb, fileName, (U32)srcSize); + DISPLAY("%2i-%-17.17s :%10u ->\r", loopNb, displayName, (U32)srcSize); memset(compressedBuffer, 0xE5, maxCompressedSize); nbLoops = 0; @@ -264,7 +300,7 @@ static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, i cSize += blockTable[blockNb].cSize; if ((double)milliTime < fastestC*nbLoops) fastestC = (double)milliTime / nbLoops; ratio = (double)srcSize / (double)cSize; - DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.); + DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000.); #if 1 /* Decompression */ @@ -283,7 +319,7 @@ static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, i milliTime = BMK_GetMilliSpan(milliTime); if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime / nbLoops; - DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", loopNb, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); + DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", loopNb, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); /* CRC Checking */ crcCheck = XXH64(resultBuffer, srcSize, 0); @@ -291,10 +327,10 @@ static int BMK_benchMem(void* srcBuffer, size_t srcSize, const char* fileName, i { unsigned u; unsigned eBlockSize = (unsigned)(MIN(65536*2, blockSize)); - DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", fileName, (unsigned)crcOrig, (unsigned)crcCheck); + DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); for (u=0; u%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, fileName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); + DISPLAY("%2i-%-17.17s :%10i ->%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); } - /* End cleaning */ + /* clean up */ free(compressedBuffer); free(resultBuffer); return 0; } -static U64 BMK_GetFileSize(const char* infilename) -{ - int r; -#if defined(_MSC_VER) - struct _stat64 statbuf; - r = _stat64(infilename, &statbuf); -#else - struct stat statbuf; - r = stat(infilename, &statbuf); -#endif - if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ - return (U64)statbuf.st_size; -} - static size_t BMK_findMaxMem(U64 requiredMem) { size_t step = 64 MB; @@ -349,103 +371,99 @@ static size_t BMK_findMaxMem(U64 requiredMem) return (size_t)(requiredMem - step); } -static int BMK_benchOneFile(const char* inFileName, int cLevel) +static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, + const char* displayName, int cLevel, + const char** fileNamesTable, unsigned nbFiles) { - FILE* inFile; - U64 inFileSize; - size_t benchedSize, readSize; - void* srcBuffer; - int result=0; - - /* Check file existence */ - inFile = fopen(inFileName, "rb"); - if (inFile == NULL) + if (cLevel < 0) { - DISPLAY("Pb opening %s\n", inFileName); - return 11; + int l; + for (l=1; l <= -cLevel; l++) + BMK_benchMem(srcBuffer, benchedSize, displayName, l, fileNamesTable, nbFiles); + return; } + BMK_benchMem(srcBuffer, benchedSize, displayName, cLevel, fileNamesTable, nbFiles); +} - /* Memory allocation & restrictions */ - inFileSize = BMK_GetFileSize(inFileName); - benchedSize = BMK_findMaxMem(inFileSize * 3) / 3; - if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize; - if (benchedSize < inFileSize) - DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", inFileName, (int)(benchedSize >> 20)); - srcBuffer = malloc(benchedSize); - if (!srcBuffer) - { - DISPLAY("\nError: not enough memory!\n"); - fclose(inFile); - return 12; - } +static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + unsigned n; + for (n=0; n bufferSize-pos) fileSize = bufferSize-pos; + readSize = fread(buff+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + pos += readSize; + fclose(f); } +} + +static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, int cLevel) +{ + void* srcBuffer; + size_t benchedSize; + U64 totalSizeToLoad = BMK_getTotalFileSize(fileNamesTable, nbFiles); + char mfName[20] = {0}; + const char* displayName = NULL; + + /* Memory allocation & restrictions */ + benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; + if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; + if (benchedSize < totalSizeToLoad) + DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); + srcBuffer = malloc(benchedSize); + if (!srcBuffer) EXM_THROW(12, "not enough memory"); + + /* Load input buffer */ + BMK_loadFiles(srcBuffer, benchedSize, fileNamesTable, nbFiles); /* Bench */ - if (cLevel<0) - { - int l; - for (l=1; l <= -cLevel; l++) - result = BMK_benchMem(srcBuffer, benchedSize, inFileName, l); - } - else - result = BMK_benchMem(srcBuffer, benchedSize, inFileName, cLevel); + snprintf (mfName, sizeof(mfName), " %u files", nbFiles); + if (nbFiles > 1) displayName = mfName; + else displayName = fileNamesTable[0]; + + BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, fileNamesTable, nbFiles); /* clean up */ free(srcBuffer); - DISPLAY("\n"); - return result; } -static int BMK_syntheticTest(int cLevel, double compressibility) +static void BMK_syntheticTest(int cLevel, double compressibility) { + char name[20] = {0}; size_t benchedSize = 10000000; void* srcBuffer = malloc(benchedSize); - int result=0; - char name[20] = {0}; /* Memory allocation */ - if (!srcBuffer) - { - DISPLAY("\nError: not enough memory!\n"); - free(srcBuffer); - return 12; - } + if (!srcBuffer) EXM_THROW(21, "not enough memory"); /* Fill input buffer */ RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); /* Bench */ -#ifdef _MSC_VER - sprintf_s(name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100)); -#else - snprintf (name, 20, "Synthetic %2u%%", (unsigned)(compressibility*100)); -#endif - /* Bench */ - if (cLevel<0) - { - int l; - for (l=1; l <= -cLevel; l++) - result = BMK_benchMem(srcBuffer, benchedSize, name, l); - } - else - result = BMK_benchMem(srcBuffer, benchedSize, name, cLevel); + snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, NULL, 1); - /* End */ + /* clean up */ free(srcBuffer); - DISPLAY("\n"); - return result; } @@ -454,19 +472,9 @@ int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, unsigned cLeve double compressibility = (double)g_compressibilityDefault / 100; if (nbFiles == 0) - { BMK_syntheticTest(cLevel, compressibility); - } else - { - /* Loop for each file */ - unsigned fileIdx = 0; - while (fileIdx Date: Wed, 16 Dec 2015 02:44:56 +0100 Subject: [PATCH 03/19] fixed Visual warning --- programs/bench.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index f55219b4b76..6b061f12b30 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -27,7 +27,10 @@ * Compiler Options ****************************************/ /* Disable some Visual warning messages */ -#define _CRT_SECURE_NO_WARNINGS /* fopen */ +#ifdef _MSC_VER +# define _CRT_SECURE_NO_WARNINGS /* fopen */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif /* Unix Large Files support (>4GB) */ #define _FILE_OFFSET_BITS 64 From 7061135d330342212147d01762c53f0bda78a0a0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 03:01:03 +0100 Subject: [PATCH 04/19] faster init for multi-files bench --- programs/bench.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/programs/bench.c b/programs/bench.c index 6b061f12b30..940ddf00399 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -217,7 +217,7 @@ typedef size_t (*compressor_t) (void* dst, size_t maxDstSize, const void* src, s static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* displayName, int cLevel, - const char** fileNames, U32 nbFiles) + const size_t* fileSizes, U32 nbFiles) { const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; @@ -244,9 +244,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, char* resPtr = (char*)resultBuffer; for (fileNb=0; fileNb=2) ? BMK_getFileSize(fileNames[fileNb]) : srcSize; - size_t remaining = (size_t)fileSize; - U32 nbBlocksforThisFile = (U32)((fileSize + (blockSize-1)) / blockSize); + size_t remaining = fileSizes[fileNb]; + U32 nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize); U32 blockEnd = nbBlocks + nbBlocksforThisFile; for ( ; nbBlocks> 20)); srcBuffer = malloc(benchedSize); + fileSizes = malloc(nbFiles * sizeof(size_t)); if (!srcBuffer) EXM_THROW(12, "not enough memory"); /* Load input buffer */ - BMK_loadFiles(srcBuffer, benchedSize, fileNamesTable, nbFiles); + BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles); /* Bench */ snprintf (mfName, sizeof(mfName), " %u files", nbFiles); if (nbFiles > 1) displayName = mfName; else displayName = fileNamesTable[0]; - BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, fileNamesTable, nbFiles); + BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, fileSizes, nbFiles); /* clean up */ free(srcBuffer); + free(fileSizes); } @@ -463,7 +468,7 @@ static void BMK_syntheticTest(int cLevel, double compressibility) /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, NULL, 1); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1); /* clean up */ free(srcBuffer); From a52c98d23a827eb2b8b8be395df092cb3deab4b0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 03:12:31 +0100 Subject: [PATCH 05/19] fix Visual warning --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index 940ddf00399..dcd937f6a4f 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -415,7 +415,7 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, readSize = fread(buff+pos, 1, (size_t)fileSize, f); if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); pos += readSize; - fileSizes[n] = fileSize; + fileSizes[n] = (size_t)fileSize; fclose(f); } } From 880486ff0508d65dfe2674c8aa17782c1f531cb7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 03:37:21 +0100 Subject: [PATCH 06/19] fix g++ warning --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index dcd937f6a4f..cc135602b7c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -435,7 +435,7 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, in if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - fileSizes = malloc(nbFiles * sizeof(size_t)); + fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); if (!srcBuffer) EXM_THROW(12, "not enough memory"); /* Load input buffer */ From ad84ac93753b6f1a3ac5d772bfd304c515c6bd30 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 03:57:46 +0100 Subject: [PATCH 07/19] reduce memory usage --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index cc135602b7c..676b039525c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -222,7 +222,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - const size_t maxCompressedSize = (size_t)maxNbBlocks * ZSTD_compressBound(blockSize); + const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 512); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); const compressor_t compressor = ZSTD_compress; From f54f57023c92818e66b52b25ee9353e9133a6ea0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 16 Dec 2015 19:38:54 +0100 Subject: [PATCH 08/19] update params for 256K --- lib/zstd_static.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/zstd_static.h b/lib/zstd_static.h index ebb8c83cf4b..1b43e69e3e2 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -207,28 +207,28 @@ static const ZSTD_parameters ZSTD_defaultParameters[4][ZSTD_MAX_CLEVEL+1] = { { 0, 26, 27, 25, 9, 5, ZSTD_btlazy2 }, /* level 20 */ }, { /* for srcSize <= 256 KB */ - /* W, C, H, S, L, strat */ - { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - never used */ - { 0, 18, 16, 15, 1, 7, ZSTD_fast }, /* level 1 */ - { 0, 18, 16, 16, 1, 7, ZSTD_fast }, /* level 2 */ - { 0, 18, 18, 18, 1, 7, ZSTD_fast }, /* level 3 */ - { 0, 18, 14, 15, 4, 6, ZSTD_greedy }, /* level 4 */ - { 0, 18, 16, 16, 1, 6, ZSTD_lazy }, /* level 5 */ - { 0, 18, 15, 15, 3, 6, ZSTD_lazy }, /* level 6 */ - { 0, 18, 15, 15, 4, 6, ZSTD_lazy }, /* level 7 */ - { 0, 18, 16, 18, 4, 6, ZSTD_lazy }, /* level 8 */ - { 0, 18, 18, 18, 4, 6, ZSTD_lazy }, /* level 9 */ - { 0, 18, 18, 18, 5, 6, ZSTD_lazy }, /* level 10 */ - { 0, 18, 18, 19, 6, 6, ZSTD_lazy }, /* level 11 */ - { 0, 18, 18, 19, 7, 6, ZSTD_lazy }, /* level 12 */ - { 0, 18, 19, 15, 7, 5, ZSTD_btlazy2 }, /* level 13 */ - { 0, 18, 19, 16, 8, 5, ZSTD_btlazy2 }, /* level 14 */ - { 0, 18, 19, 17, 9, 5, ZSTD_btlazy2 }, /* level 15 */ - { 0, 18, 19, 17, 10, 5, ZSTD_btlazy2 }, /* level 16 */ - { 0, 18, 19, 17, 11, 5, ZSTD_btlazy2 }, /* level 17 */ - { 0, 18, 19, 17, 12, 5, ZSTD_btlazy2 }, /* level 18 */ - { 0, 18, 19, 17, 13, 5, ZSTD_btlazy2 }, /* level 19 */ - { 0, 18, 19, 17, 14, 5, ZSTD_btlazy2 }, /* level 20 */ + /* W, C, H, S, L, strat */ + { 0, 18, 13, 14, 1, 7, ZSTD_fast }, /* level 0 - never used */ + { 0, 18, 14, 15, 1, 6, ZSTD_fast }, /* level 1 */ + { 0, 18, 14, 15, 1, 5, ZSTD_fast }, /* level 2 */ + { 0, 18, 12, 15, 3, 7, ZSTD_greedy }, /* level 3 */ + { 0, 18, 13, 15, 4, 7, ZSTD_greedy }, /* level 4 */ + { 0, 18, 14, 15, 5, 7, ZSTD_greedy }, /* level 5 */ + { 0, 18, 13, 15, 4, 7, ZSTD_lazy }, /* level 6 */ + { 0, 18, 14, 16, 5, 7, ZSTD_lazy }, /* level 7 */ + { 0, 18, 15, 16, 6, 7, ZSTD_lazy }, /* level 8 */ + { 0, 18, 15, 15, 7, 7, ZSTD_lazy }, /* level 9 */ + { 0, 18, 16, 16, 7, 7, ZSTD_lazy }, /* level 10 */ + { 0, 18, 16, 16, 8, 4, ZSTD_lazy }, /* level 11 */ + { 0, 18, 17, 16, 8, 4, ZSTD_lazy }, /* level 12 */ + { 0, 18, 17, 16, 9, 4, ZSTD_lazy }, /* level 13 */ + { 0, 18, 18, 16, 9, 4, ZSTD_lazy }, /* level 14 */ + { 0, 18, 17, 17, 9, 4, ZSTD_lazy2 }, /* level 15 */ + { 0, 18, 18, 18, 9, 4, ZSTD_lazy2 }, /* level 16 */ + { 0, 18, 18, 18, 10, 4, ZSTD_lazy2 }, /* level 17 */ + { 0, 18, 18, 18, 11, 4, ZSTD_lazy2 }, /* level 18 */ + { 0, 18, 18, 18, 12, 4, ZSTD_lazy2 }, /* level 19 */ + { 0, 18, 18, 18, 13, 4, ZSTD_lazy2 }, /* level 20 */ }, { /* for srcSize <= 128 KB */ /* W, C, H, S, L, strat */ From 367060b8749b70dabd42d589db18a51fb878396f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 17 Dec 2015 00:07:10 +0100 Subject: [PATCH 09/19] larger cBuffer --- programs/bench.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/bench.c b/programs/bench.c index 676b039525c..da12d888b5c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -222,7 +222,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); - const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 512); /* add some room for safety */ + const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); const compressor_t compressor = ZSTD_compress; From 4f13703b2f7cf4c1f7d8da26a11c24e1018c75b0 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 17 Dec 2015 02:23:58 +0100 Subject: [PATCH 10/19] added : -m : compress multiple files in a single command --- programs/fileio.c | 247 +++++++++++++++++++++++++++++++++++++++++++++ programs/fileio.h | 13 ++- programs/zstdcli.c | 20 +++- 3 files changed, 275 insertions(+), 5 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 05bdb21ee67..17dfee665a7 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -236,6 +236,212 @@ static U64 FIO_getFileSize(const char* infilename) } +static int FIO_getFiles(const char* input_filename, const char* output_filename, + FILE** pfinput, FILE** pfoutput) +{ + + if (!strcmp (input_filename, stdinmark)) + { + DISPLAYLEVEL(4,"Using stdin for input\n"); + *pfinput = stdin; + SET_BINARY_MODE(stdin); + } + else + { + *pfinput = fopen(input_filename, "rb"); + } + + if ( *pfinput==0 ) + { + DISPLAYLEVEL(1, "Unable to access file for processing: %s\n", input_filename); + return 1; + } + + if (!strcmp (output_filename, stdoutmark)) + { + DISPLAYLEVEL(4,"Using stdout for output\n"); + *pfoutput = stdout; + SET_BINARY_MODE(stdout); + } + else + { + /* Check if destination file already exists */ + *pfoutput=0; + *pfoutput = fopen( output_filename, "rb" ); + if (*pfoutput!=0) + { + fclose(*pfoutput); + if (!g_overwrite) + { + int ch = 'Y'; + DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); + if ((g_displayLevel <= 1) || (*pfinput == stdin)) + EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ + DISPLAYLEVEL(2, "Overwrite ? (Y/n) : "); + while((ch = getchar()) != '\n' && ch != EOF) /* flush integrated */ + if ((ch!='Y') && (ch!='y')) EXM_THROW(12, "No. Operation aborted : %s already exists", output_filename); + } + } + *pfoutput = fopen( output_filename, "wb" ); + } + + if (*pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename); + + return 0; +} + + +/* ********************************************************************** +* Compression +************************************************************************/ +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + void* dictBuffer; + size_t dictBufferSize; + ZBUFF_CCtx* ctx; +} cRess_t; + +static cRess_t FIO_createCResources(const char* dictFileName) +{ + cRess_t ress; + + ress.ctx = ZBUFF_createCCtx(); + if (ress.ctx == NULL) EXM_THROW(30, "Allocation error : can't create ZBUFF context"); + + /* Allocate Memory */ + ress.srcBufferSize = ZBUFF_recommendedCInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZBUFF_recommendedCOutSize(); + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory"); + + /* dictionary */ + ress.dictBuffer = NULL; + ress.dictBufferSize = 0; + if (dictFileName) + { + FILE* dictHandle; + size_t readSize, dictSize; + DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); + dictHandle = fopen(dictFileName, "rb"); + if (dictHandle==0) EXM_THROW(31, "Error opening dictionary file %s", dictFileName); + dictSize = FIO_getFileSize(dictFileName); + if (dictSize > MAX_DICT_SIZE) + { + int seekResult; + if (dictSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ + DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); + seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ + if (seekResult != 0) EXM_THROW(33, "Error seeking into dictionary file %s", dictFileName); + dictSize = MAX_DICT_SIZE; + } + ress.dictBuffer = (BYTE*)malloc((size_t)dictSize); + if (ress.dictBuffer==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer"); + readSize = fread(ress.dictBuffer, 1, (size_t)dictSize, dictHandle); + if (readSize!=dictSize) EXM_THROW(35, "Error reading dictionary file %s", dictFileName); + fclose(dictHandle); + ress.dictBufferSize = dictSize; + } + + return ress; +} + +static void FIO_freeCResources(cRess_t ress) +{ + size_t errorCode; + free(ress.srcBuffer); + free(ress.dstBuffer); + free(ress.dictBuffer); + errorCode = ZBUFF_freeCCtx(ress.ctx); + if (ZBUFF_isError(errorCode)) EXM_THROW(38, "Error : can't release ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode)); +} + + +/* + * FIO_compressFilename_extRess() + * result : 0 : compression completed correctly + * 1 : missing or pb opening srcFileName + */ +static int FIO_compressFilename_extRess(cRess_t ress, + const char* srcFileName, const char* dstFileName, + int cLevel) +{ + FILE* srcFile; + FILE* dstFile; + U64 filesize = 0; + U64 compressedfilesize = 0; + size_t dictSize = ress.dictBufferSize; + size_t sizeCheck, errorCode; + + /* File check */ + if (FIO_getFiles(srcFileName, dstFileName, &srcFile, &dstFile)) return 1; + + /* init */ + filesize = FIO_getFileSize(srcFileName) + dictSize; + errorCode = ZBUFF_compressInit_advanced(ress.ctx, ZSTD_getParams(cLevel, filesize)); + if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression"); + errorCode = ZBUFF_compressWithDictionary(ress.ctx, ress.dictBuffer, ress.dictBufferSize); + if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing dictionary"); + + /* Main compression loop */ + filesize = 0; + while (1) + { + size_t inSize; + + /* Fill input Buffer */ + inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); + if (inSize==0) break; + filesize += inSize; + DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); + + { + /* Compress (buffered streaming ensures appropriate formatting) */ + size_t usedInSize = inSize; + size_t cSize = ress.dstBufferSize; + size_t result = ZBUFF_compressContinue(ress.ctx, ress.dstBuffer, &cSize, ress.srcBuffer, &usedInSize); + if (ZBUFF_isError(result)) + EXM_THROW(23, "Compression error : %s ", ZBUFF_getErrorName(result)); + if (inSize != usedInSize) + /* inBuff should be entirely consumed since buffer sizes are recommended ones */ + EXM_THROW(24, "Compression error : input block not fully consumed"); + + /* Write cBlock */ + sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile); + if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); + compressedfilesize += cSize; + } + + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100); + } + + /* End of Frame */ + { + size_t cSize = ress.dstBufferSize; + size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize); + if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); + + sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile); + if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end into %s", dstFileName); + compressedfilesize += cSize; + } + + /* Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + + /* clean */ + fclose(srcFile); + if (fclose(dstFile)) EXM_THROW(28, "Write error : cannot properly close %s", dstFileName); + + return 0; +} + + unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, const char* dictFileName, int cLevel) { @@ -349,6 +555,47 @@ unsigned long long FIO_compressFilename(const char* output_filename, const char* } +#define FNSPACE 30 +int FIO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, + const char* suffix, + const char* dictFileName, int compressionLevel) +{ + int i; + int missed_files = 0; + char* dstFileName = (char*)malloc(FNSPACE); + size_t dfnSize = FNSPACE; + const size_t suffixSize = strlen(suffix); + cRess_t ress; + + /* init */ + ress = FIO_createCResources(dictFileName); + + /* loop on each file */ + for (i=0; i Date: Thu, 17 Dec 2015 02:48:26 +0100 Subject: [PATCH 11/19] fixed : conversion warning --- programs/fileio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 17dfee665a7..7ef251bceb6 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -324,7 +324,8 @@ static cRess_t FIO_createCResources(const char* dictFileName) if (dictFileName) { FILE* dictHandle; - size_t readSize, dictSize; + size_t readSize; + U64 dictSize; DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); dictHandle = fopen(dictFileName, "rb"); if (dictHandle==0) EXM_THROW(31, "Error opening dictionary file %s", dictFileName); @@ -343,7 +344,7 @@ static cRess_t FIO_createCResources(const char* dictFileName) readSize = fread(ress.dictBuffer, 1, (size_t)dictSize, dictHandle); if (readSize!=dictSize) EXM_THROW(35, "Error reading dictionary file %s", dictFileName); fclose(dictHandle); - ress.dictBufferSize = dictSize; + ress.dictBufferSize = (size_t)dictSize; } return ress; From 9d90922d49fba9dc3365ebc05dde5e8da3942975 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 17 Dec 2015 14:09:55 +0100 Subject: [PATCH 12/19] refactored file compression --- programs/fileio.c | 186 +++++++++++++--------------------------------- programs/fileio.h | 6 +- 2 files changed, 53 insertions(+), 139 deletions(-) diff --git a/programs/fileio.c b/programs/fileio.c index 7ef251bceb6..7ec5f6c5bcc 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -204,11 +204,11 @@ static void FIO_getFileHandles(FILE** pfinput, FILE** pfoutput, const char* inpu fclose(*pfoutput); if (!g_overwrite) { - char ch; + char ch='N'; if (g_displayLevel <= 1) /* No interaction possible */ EXM_THROW(11, "Operation aborted : %s already exists", output_filename); DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); - DISPLAYLEVEL(2, "Overwrite ? (Y/N) : "); + DISPLAYLEVEL(2, "Overwrite ? (y/N) : "); ch = (char)getchar(); if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); } @@ -236,56 +236,55 @@ static U64 FIO_getFileSize(const char* infilename) } -static int FIO_getFiles(const char* input_filename, const char* output_filename, - FILE** pfinput, FILE** pfoutput) +static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr, + const char* dstFileName, const char* srcFileName) { - - if (!strcmp (input_filename, stdinmark)) + if (!strcmp (srcFileName, stdinmark)) { DISPLAYLEVEL(4,"Using stdin for input\n"); - *pfinput = stdin; + *fileInPtr = stdin; SET_BINARY_MODE(stdin); } else { - *pfinput = fopen(input_filename, "rb"); + *fileInPtr = fopen(srcFileName, "rb"); } - if ( *pfinput==0 ) + if ( *fileInPtr==0 ) { - DISPLAYLEVEL(1, "Unable to access file for processing: %s\n", input_filename); + DISPLAYLEVEL(1, "Unable to access file for processing: %s\n", srcFileName); return 1; } - if (!strcmp (output_filename, stdoutmark)) + if (!strcmp (dstFileName, stdoutmark)) { DISPLAYLEVEL(4,"Using stdout for output\n"); - *pfoutput = stdout; + *fileOutPtr = stdout; SET_BINARY_MODE(stdout); } else { /* Check if destination file already exists */ - *pfoutput=0; - *pfoutput = fopen( output_filename, "rb" ); - if (*pfoutput!=0) + if (!g_overwrite) { - fclose(*pfoutput); - if (!g_overwrite) + *fileOutPtr = fopen( dstFileName, "rb" ); + if (*fileOutPtr != 0) { - int ch = 'Y'; - DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); - if ((g_displayLevel <= 1) || (*pfinput == stdin)) - EXM_THROW(11, "Operation aborted : %s already exists", output_filename); /* No interaction possible */ - DISPLAYLEVEL(2, "Overwrite ? (Y/n) : "); - while((ch = getchar()) != '\n' && ch != EOF) /* flush integrated */ - if ((ch!='Y') && (ch!='y')) EXM_THROW(12, "No. Operation aborted : %s already exists", output_filename); + /* prompt for overwrite authorization */ + int ch = 'N'; + fclose(*fileOutPtr); + DISPLAYLEVEL(2, "Warning : %s already exists\n", dstFileName); + if ((g_displayLevel <= 1) || (*fileInPtr == stdin)) + EXM_THROW(11, "Operation aborted : %s already exists", dstFileName); /* No interaction possible */ + DISPLAYLEVEL(2, "Overwrite ? (y/N) : "); + while((ch = getchar()) != '\n' && ch != EOF); /* flush integrated */ + if ((ch!='Y') && (ch!='y')) EXM_THROW(12, "No. Operation aborted : %s already exists", dstFileName); } } - *pfoutput = fopen( output_filename, "wb" ); + *fileOutPtr = fopen( dstFileName, "wb" ); } - if (*pfoutput==0) EXM_THROW(13, "Pb opening %s", output_filename); + if (*fileOutPtr==0) EXM_THROW(13, "Pb opening %s", dstFileName); return 0; } @@ -367,7 +366,7 @@ static void FIO_freeCResources(cRess_t ress) * 1 : missing or pb opening srcFileName */ static int FIO_compressFilename_extRess(cRess_t ress, - const char* srcFileName, const char* dstFileName, + const char* dstFileName, const char* srcFileName, int cLevel) { FILE* srcFile; @@ -378,7 +377,7 @@ static int FIO_compressFilename_extRess(cRess_t ress, size_t sizeCheck, errorCode; /* File check */ - if (FIO_getFiles(srcFileName, dstFileName, &srcFile, &dstFile)) return 1; + if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1; /* init */ filesize = FIO_getFileSize(srcFileName) + dictSize; @@ -443,125 +442,40 @@ static int FIO_compressFilename_extRess(cRess_t ress, } -unsigned long long FIO_compressFilename(const char* output_filename, const char* input_filename, - const char* dictFileName, int cLevel) +int FIO_compressFilename(const char* dstFileName, const char* srcFileName, + const char* dictFileName, int compressionLevel) { - U64 filesize = 0; - U64 compressedfilesize = 0; - U64 dictSize = 0; - BYTE* inBuff, *outBuff, *dictBuff=NULL; - size_t inBuffSize = ZBUFF_recommendedCInSize(); - size_t outBuffSize = ZBUFF_recommendedCOutSize(); - FILE* finput; - FILE* foutput; - size_t sizeCheck, errorCode; - ZBUFF_CCtx* ctx; - - /* Allocate Memory */ - ctx = ZBUFF_createCCtx(); - inBuff = (BYTE*)malloc(inBuffSize); - outBuff = (BYTE*)malloc(outBuffSize); - if (!inBuff || !outBuff || !ctx) EXM_THROW(20, "Allocation error : not enough memory"); - - /* dictionary */ - if (dictFileName) - { - FILE* dictHandle; - size_t readSize; - DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); - dictHandle = fopen(dictFileName, "rb"); - if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName); - dictSize = FIO_getFileSize(dictFileName); - if (dictSize > MAX_DICT_SIZE) - { - int seekResult; - if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ - DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); - seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ - if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); - dictSize = MAX_DICT_SIZE; - } - dictBuff = (BYTE*)malloc((size_t)dictSize); - if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff"); - readSize = fread(dictBuff, 1, (size_t)dictSize, dictHandle); - if (readSize!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName); - fclose(dictHandle); - } - - /* init */ - FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); - filesize = FIO_getFileSize(input_filename) + dictSize; - errorCode = ZBUFF_compressInit_advanced(ctx, ZSTD_getParams(cLevel, filesize)); - if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing compression"); - errorCode = ZBUFF_compressWithDictionary(ctx, dictBuff, (size_t)dictSize); - if (ZBUFF_isError(errorCode)) EXM_THROW(22, "Error initializing dictionary"); - - /* Main compression loop */ - filesize = 0; - while (1) - { - size_t inSize; - - /* Fill input Buffer */ - inSize = fread(inBuff, (size_t)1, inBuffSize, finput); - if (inSize==0) break; - filesize += inSize; - DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20)); + clock_t start, end; + cRess_t ress; + int issueWithSrcFile = 0; - { - /* Compress (buffered streaming ensures appropriate formatting) */ - size_t usedInSize = inSize; - size_t cSize = outBuffSize; - size_t result = ZBUFF_compressContinue(ctx, outBuff, &cSize, inBuff, &usedInSize); - if (ZBUFF_isError(result)) - EXM_THROW(23, "Compression error : %s ", ZBUFF_getErrorName(result)); - if (inSize != usedInSize) - /* inBuff should be entirely consumed since buffer sizes are recommended ones */ - EXM_THROW(24, "Compression error : input block not fully consumed"); + /* Init */ + start = clock(); + ress = FIO_createCResources(dictFileName); - /* Write cBlock */ - sizeCheck = fwrite(outBuff, 1, cSize, foutput); - if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", output_filename); - compressedfilesize += cSize; - } + /* Compress File */ + issueWithSrcFile += FIO_compressFilename_extRess(ress, dstFileName, srcFileName, compressionLevel); - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100); - } + /* Free resources */ + FIO_freeCResources(ress); - /* End of Frame */ + /* Final Status */ + end = clock(); { - size_t cSize = outBuffSize; - size_t result = ZBUFF_compressEnd(ctx, outBuff, &cSize); - if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end"); - - sizeCheck = fwrite(outBuff, 1, cSize, foutput); - if (sizeCheck!=cSize) EXM_THROW(27, "Write error : cannot write frame end into %s", output_filename); - compressedfilesize += cSize; + double seconds = (double)(end - start) / CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Completed in %.2f sec \n", seconds); } - /* Status */ - DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", - (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); - - /* clean */ - free(inBuff); - free(outBuff); - free(dictBuff); - ZBUFF_freeCCtx(ctx); - fclose(finput); - if (fclose(foutput)) EXM_THROW(28, "Write error : cannot properly close %s", output_filename); - - return compressedfilesize; + return issueWithSrcFile; } #define FNSPACE 30 -int FIO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, +int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFiles, const char* suffix, const char* dictFileName, int compressionLevel) { - int i; + unsigned u; int missed_files = 0; char* dstFileName = (char*)malloc(FNSPACE); size_t dfnSize = FNSPACE; @@ -572,14 +486,14 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, ress = FIO_createCResources(dictFileName); /* loop on each file */ - for (i=0; i Date: Thu, 17 Dec 2015 20:30:14 +0100 Subject: [PATCH 13/19] added : -m : decompress multiple files --- Makefile | 2 +- NEWS | 3 + lib/zstd.h | 2 +- programs/Makefile | 2 +- programs/fileio.c | 322 ++++++++++++++++++++++-------------------- programs/fileio.h | 11 +- programs/playTests.sh | 13 ++ programs/zstd.1 | 34 +++-- programs/zstdcli.c | 21 ++- 9 files changed, 236 insertions(+), 174 deletions(-) diff --git a/Makefile b/Makefile index abec33f8671..a741034ad89 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ # ################################################################ # Version number -export VERSION := 0.4.4 +export VERSION := 0.4.5 PRGDIR = programs ZSTDDIR = lib diff --git a/NEWS b/NEWS index 079483a3c85..ee9a4585c90 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v0.4.5 +new : -m/--multiple : compress/decompress multiple files + v0.4.4 Fixed : high compression modes for Windows 32 bits new : external dictionary API extended to buffered mode and accessible through command line diff --git a/lib/zstd.h b/lib/zstd.h index b0c841f3a9a..d6eb0b517bb 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -62,7 +62,7 @@ extern "C" { ***************************************/ #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ #define ZSTD_VERSION_MINOR 4 /* for new (non-breaking) interface capabilities */ -#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_RELEASE 5 /* for tweaks, bug-fixes, or development */ #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber (void); diff --git a/programs/Makefile b/programs/Makefile index 822f2d24bbe..57fa87c11f5 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -30,7 +30,7 @@ # fullbench32: Same as fullbench, but forced to compile in 32-bits mode # ########################################################################## -VERSION?= 0.4.4 +VERSION?= 0.4.5 DESTDIR?= PREFIX ?= /usr/local diff --git a/programs/fileio.c b/programs/fileio.c index 7ec5f6c5bcc..3867301a995 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -175,52 +175,6 @@ static unsigned FIO_GetMilliSpan(clock_t nPrevious) } -static void FIO_getFileHandles(FILE** pfinput, FILE** pfoutput, const char* input_filename, const char* output_filename) -{ - if (!strcmp (input_filename, stdinmark)) - { - DISPLAYLEVEL(4,"Using stdin for input\n"); - *pfinput = stdin; - SET_BINARY_MODE(stdin); - } - else - { - *pfinput = fopen(input_filename, "rb"); - } - - if (!strcmp (output_filename, stdoutmark)) - { - DISPLAYLEVEL(4,"Using stdout for output\n"); - *pfoutput = stdout; - SET_BINARY_MODE(stdout); - } - else - { - /* Check if destination file already exists */ - *pfoutput=0; - if (strcmp(output_filename,nulmark)) *pfoutput = fopen( output_filename, "rb" ); - if (*pfoutput!=0) - { - fclose(*pfoutput); - if (!g_overwrite) - { - char ch='N'; - if (g_displayLevel <= 1) /* No interaction possible */ - EXM_THROW(11, "Operation aborted : %s already exists", output_filename); - DISPLAYLEVEL(2, "Warning : %s already exists\n", output_filename); - DISPLAYLEVEL(2, "Overwrite ? (y/N) : "); - ch = (char)getchar(); - if ((ch!='Y') && (ch!='y')) EXM_THROW(11, "Operation aborted : %s already exists", output_filename); - } - } - *pfoutput = fopen( output_filename, "wb" ); - } - - if ( *pfinput==0 ) EXM_THROW(12, "Pb opening src : %s", input_filename); - if ( *pfoutput==0) EXM_THROW(13, "Pb opening dst : %s", output_filename); -} - - static U64 FIO_getFileSize(const char* infilename) { int r; @@ -273,12 +227,20 @@ static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr, /* prompt for overwrite authorization */ int ch = 'N'; fclose(*fileOutPtr); - DISPLAYLEVEL(2, "Warning : %s already exists\n", dstFileName); + DISPLAY("Warning : %s already exists \n", dstFileName); if ((g_displayLevel <= 1) || (*fileInPtr == stdin)) - EXM_THROW(11, "Operation aborted : %s already exists", dstFileName); /* No interaction possible */ - DISPLAYLEVEL(2, "Overwrite ? (y/N) : "); + { + /* No interaction possible */ + DISPLAY("Operation aborted : %s already exists \n", dstFileName); + return 1; + } + DISPLAY("Overwrite ? (y/N) : "); while((ch = getchar()) != '\n' && ch != EOF); /* flush integrated */ - if ((ch!='Y') && (ch!='y')) EXM_THROW(12, "No. Operation aborted : %s already exists", dstFileName); + if ((ch!='Y') && (ch!='y')) + { + DISPLAY("No. Operation aborted : %s already exists \n", dstFileName); + return 1; + } } } *fileOutPtr = fopen( dstFileName, "wb" ); @@ -289,6 +251,42 @@ static int FIO_getFiles(FILE** fileOutPtr, FILE** fileInPtr, return 0; } +/*!FIO_loadFile +* creates a buffer, pointed by *bufferPtr, +* loads "filename" content into it +* up to MAX_DICT_SIZE bytes +*/ +static size_t FIO_loadFile(void** bufferPtr, const char* fileName) +{ + FILE* fileHandle; + size_t readSize; + U64 fileSize; + + *bufferPtr = NULL; + if (fileName == NULL) + return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + fileHandle = fopen(fileName, "rb"); + if (fileHandle==0) EXM_THROW(31, "Error opening file %s", fileName); + fileSize = FIO_getFileSize(fileName); + if (fileSize > MAX_DICT_SIZE) + { + int seekResult; + if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName); /* avoid extreme cases */ + DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, MAX_DICT_SIZE); + seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ + if (seekResult != 0) EXM_THROW(33, "Error seeking into file %s", fileName); + fileSize = MAX_DICT_SIZE; + } + *bufferPtr = (BYTE*)malloc((size_t)fileSize); + if (*bufferPtr==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer"); + readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); + if (readSize!=fileSize) EXM_THROW(35, "Error reading dictionary file %s", fileName); + fclose(fileHandle); + return (size_t)fileSize; +} + /* ********************************************************************** * Compression @@ -318,33 +316,7 @@ static cRess_t FIO_createCResources(const char* dictFileName) if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory"); /* dictionary */ - ress.dictBuffer = NULL; - ress.dictBufferSize = 0; - if (dictFileName) - { - FILE* dictHandle; - size_t readSize; - U64 dictSize; - DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); - dictHandle = fopen(dictFileName, "rb"); - if (dictHandle==0) EXM_THROW(31, "Error opening dictionary file %s", dictFileName); - dictSize = FIO_getFileSize(dictFileName); - if (dictSize > MAX_DICT_SIZE) - { - int seekResult; - if (dictSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ - DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); - seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ - if (seekResult != 0) EXM_THROW(33, "Error seeking into dictionary file %s", dictFileName); - dictSize = MAX_DICT_SIZE; - } - ress.dictBuffer = (BYTE*)malloc((size_t)dictSize); - if (ress.dictBuffer==NULL) EXM_THROW(34, "Allocation error : not enough memory for dictBuffer"); - readSize = fread(ress.dictBuffer, 1, (size_t)dictSize, dictHandle); - if (readSize!=dictSize) EXM_THROW(35, "Error reading dictionary file %s", dictFileName); - fclose(dictHandle); - ress.dictBufferSize = (size_t)dictSize; - } + ress.dictBufferSize = FIO_loadFile(&(ress.dictBuffer), dictFileName); return ress; } @@ -504,48 +476,80 @@ int FIO_compressMultipleFilenames(const char** inFileNamesTable, unsigned nbFile } - - /* ************************************************************************** * Decompression ****************************************************************************/ +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + void* dictBuffer; + size_t dictBufferSize; + ZBUFF_DCtx* dctx; +} dRess_t; + +static dRess_t FIO_createDResources(const char* dictFileName) +{ + dRess_t ress; + + /* init */ + ress.dctx = ZBUFF_createDCtx(); + if (ress.dctx==NULL) EXM_THROW(60, "Can't create ZBUFF decompression context"); + /* Allocate Memory */ + ress.srcBufferSize = ZBUFF_recommendedDInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZBUFF_recommendedDOutSize(); + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory"); -unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, - BYTE* inBuff, size_t inBuffSize, size_t alreadyLoaded, - BYTE* outBuff, size_t outBuffSize, - BYTE* dictBuff, size_t dictSize, - ZBUFF_DCtx* dctx) + /* dictionary */ + ress.dictBufferSize = FIO_loadFile(&(ress.dictBuffer), dictFileName); + + return ress; +} + +static void FIO_freeDResources(dRess_t ress) +{ + size_t errorCode = ZBUFF_freeDCtx(ress.dctx); + if (ZBUFF_isError(errorCode)) EXM_THROW(69, "Error : can't free ZBUFF context resource : %s", ZBUFF_getErrorName(errorCode)); + free(ress.srcBuffer); + free(ress.dstBuffer); + free(ress.dictBuffer); +} + + +unsigned long long FIO_decompressFrame(dRess_t ress, + FILE* foutput, FILE* finput, size_t alreadyLoaded) { U64 frameSize = 0; size_t readSize=alreadyLoaded; /* Main decompression Loop */ - ZBUFF_decompressInit(dctx); - ZBUFF_decompressWithDictionary(dctx, dictBuff, dictSize); + ZBUFF_decompressInit(ress.dctx); + ZBUFF_decompressWithDictionary(ress.dctx, ress.dictBuffer, ress.dictBufferSize); while (1) { /* Decode */ size_t sizeCheck; - size_t inSize=readSize, decodedSize=outBuffSize; - size_t inStart=0; - size_t toRead = ZBUFF_decompressContinue(dctx, outBuff, &decodedSize, inBuff+inStart, &inSize); + size_t inSize=readSize, decodedSize=ress.dstBufferSize; + size_t toRead = ZBUFF_decompressContinue(ress.dctx, ress.dstBuffer, &decodedSize, ress.srcBuffer, &inSize); if (ZBUFF_isError(toRead)) EXM_THROW(36, "Decoding error : %s", ZBUFF_getErrorName(toRead)); readSize -= inSize; - inStart += inSize; /* Write block */ - sizeCheck = fwrite(outBuff, 1, decodedSize, foutput); + sizeCheck = fwrite(ress.dstBuffer, 1, decodedSize, foutput); if (sizeCheck != decodedSize) EXM_THROW(37, "Write error : unable to write data block to destination file"); frameSize += decodedSize; DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(frameSize>>20) ); if (toRead == 0) break; - if (readSize) continue; /* still some data left within inBuff */ + if (readSize) EXM_THROW(38, "Decoding error : should consume entire input"); /* Fill input buffer */ - if (toRead > inBuffSize) EXM_THROW(34, "too large block"); - readSize = fread(inBuff, 1, toRead, finput); + if (toRead > ress.srcBufferSize) EXM_THROW(34, "too large block"); + readSize = fread(ress.srcBuffer, 1, toRead, finput); if (readSize != toRead) EXM_THROW(35, "Read error"); } @@ -553,88 +557,96 @@ unsigned long long FIO_decompressFrame(FILE* foutput, FILE* finput, } -unsigned long long FIO_decompressFilename(const char* output_filename, const char* input_filename, const char* dictFileName) +static int FIO_decompressFile_extRess(dRess_t ress, + const char* dstFileName, const char* srcFileName) { - FILE* finput, *foutput; - BYTE* inBuff=NULL; - size_t inBuffSize = ZBUFF_recommendedDInSize(); - BYTE* outBuff=NULL; - size_t outBuffSize = ZBUFF_recommendedDOutSize(); - BYTE* dictBuff=NULL; - size_t dictSize = 0; - U64 filesize = 0; - size_t toRead; - - /* dictionary */ - if (dictFileName) - { - FILE* dictHandle; - size_t readSize; - DISPLAYLEVEL(4,"Using %s as dictionary \n", dictFileName); - dictHandle = fopen(dictFileName, "rb"); - if (dictHandle==0) EXM_THROW(21, "Error opening dictionary file %s", dictFileName); - dictSize = (size_t)FIO_getFileSize(dictFileName); - if (dictSize > MAX_DICT_SIZE) - { - int seekResult; - if (dictSize > 1 GB) EXM_THROW(21, "Dictionary file %s is too large", dictFileName); /* avoid extreme cases */ - DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", dictFileName, MAX_DICT_SIZE); - seekResult = fseek(dictHandle, (long int)(dictSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ - if (seekResult != 0) EXM_THROW(21, "Error seeking into dictionary file %s", dictFileName); - dictSize = MAX_DICT_SIZE; - } - dictBuff = (BYTE*)malloc(dictSize); - if (dictBuff==NULL) EXM_THROW(20, "Allocation error : not enough memory for dictBuff"); - readSize = fread(dictBuff, 1, (size_t)dictSize, dictHandle); - if (readSize!=dictSize) EXM_THROW(21, "Error reading dictionary file %s", dictFileName); - fclose(dictHandle); - } + unsigned long long filesize = 0; + FILE* srcFile; + FILE* dstFile; /* Init */ - ZBUFF_DCtx* dctx = ZBUFF_createDCtx(); - FIO_getFileHandles(&finput, &foutput, input_filename, output_filename); - - /* Allocate Memory (if needed) */ - inBuff = (BYTE*)malloc(inBuffSize); - outBuff = (BYTE*)malloc(outBuffSize); - if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory"); + if (FIO_getFiles(&dstFile, &srcFile, dstFileName, srcFileName)) return 1; /* for each frame */ for ( ; ; ) { size_t sizeCheck; /* check magic number -> version */ - toRead = 4; - sizeCheck = fread(inBuff, (size_t)1, toRead, finput); + size_t toRead = 4; + sizeCheck = fread(ress.srcBuffer, (size_t)1, toRead, srcFile); if (sizeCheck==0) break; /* no more input */ if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header"); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) - if (ZSTD_isLegacy(MEM_readLE32(inBuff))) + if (ZSTD_isLegacy(MEM_readLE32(ress.srcBuffer))) { - filesize += FIO_decompressLegacyFrame(foutput, finput, MEM_readLE32(inBuff)); + filesize += FIO_decompressLegacyFrame(dstFile, srcFile, MEM_readLE32(ress.srcBuffer)); continue; } #endif /* ZSTD_LEGACY_SUPPORT */ - filesize += FIO_decompressFrame(foutput, finput, - inBuff, inBuffSize, toRead, - outBuff, outBuffSize, - dictBuff, dictSize, - dctx); + filesize += FIO_decompressFrame(ress, dstFile, srcFile, toRead); } + /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Decoded %llu bytes \n", (long long unsigned)filesize); + DISPLAYLEVEL(2, "Successfully decoded %llu bytes \n", filesize); - /* clean */ - free(inBuff); - free(outBuff); - free(dictBuff); - ZBUFF_freeDCtx(dctx); - fclose(finput); - if (fclose(foutput)) EXM_THROW(38, "Write error : cannot properly close %s", output_filename); - - return filesize; + /* Close */ + fclose(srcFile); + if (fclose(dstFile)) EXM_THROW(38, "Write error : cannot properly close %s", dstFileName); + + return 0; } +int FIO_decompressFilename(const char* dstFileName, const char* srcFileName, + const char* dictFileName) +{ + int missingFiles = 0; + dRess_t ress = FIO_createDResources(dictFileName); + + missingFiles += FIO_decompressFile_extRess(ress, dstFileName, srcFileName); + + FIO_freeDResources(ress); + return missingFiles; +} + + +#define MAXSUFFIXSIZE 8 +int FIO_decompressMultipleFilenames(const char** srcNamesTable, unsigned nbFiles, + const char* suffix, + const char* dictFileName) +{ + unsigned u; + int skippedFiles = 0; + int missingFiles = 0; + char* dstFileName = (char*)malloc(FNSPACE); + size_t dfnSize = FNSPACE; + const size_t suffixSize = strlen(suffix); + dRess_t ress; + + if (dstFileName==NULL) EXM_THROW(70, "not enough memory for dstFileName"); + ress = FIO_createDResources(dictFileName); + + for (u=0; u tmp2 diff -q tmp1 tmp2 +echo "*** multiple files tests *** " + +./datagen -s1 > tmp1 2> /dev/null +./datagen -s2 -g100K > tmp2 2> /dev/null +./datagen -s3 -g1M > tmp3 2> /dev/null +./zstd -f -m tmp* +ls -ls tmp* +rm tmp1 tmp2 tmp3 +./zstd -df -m *.zst +ls -ls tmp* +./zstd -f -m tmp1 notHere tmp2 && die "missing file not detected!" +rm tmp* + echo "**** zstd round-trip tests **** " roundTripTest diff --git a/programs/zstd.1 b/programs/zstd.1 index fdc8cc462f8..8d69c4ddfd9 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -31,12 +31,21 @@ is equivalent to It is based on the \fBLZ77\fR family, with FSE & huff0 entropy stage. zstd offers compression speed > 200 MB/s per core. It also features a fast decoder, with speed > 500 MB/s per core. + +\fBzstd\fR command line is generally similar to gzip, but features the following differences : + - Original files are preserved + - By default, \fBzstd file1 file2\fR means : compress file1 \fBinto\fR file2. + Use \fB-m\fR command if you want : compress file1 into file1.zstd and file2 into file2.zst + - By default, when compressing files, \fBzstd\fR displays advancement notification and result summary. + Use \fB-q\fR to turn them off + + \fBzstd\fR supports the following options : .SH OPTIONS .TP -.B \-1 - fast compression (default) +.B \-# + # compression level [1-19](default:1) .TP .B \-d decompression @@ -44,6 +53,14 @@ It also features a fast decoder, with speed > 500 MB/s per core. .B \-f overwrite output without prompting .TP +.BR \-m ", " --multiple + multiple files mode + In this mode, multiple files on the command line means compression or decompression of each named file + Notifications are also turned off by default +.TP +.B \-D + Use next file as dictionary content for compress / decompression +.TP .BR \-h/\-H ", " --help display help/long help and exit .TP @@ -53,20 +70,17 @@ It also features a fast decoder, with speed > 500 MB/s per core. .BR \-v ", " --verbose verbose mode .TP -.B \-q - suppress warnings; specify twice to suppress errors too +.BR \-q ", " --quiet + suppress warnings and notifications; specify twice to suppress errors too .TP -.B \-c +.B \-c force write to standard output, even if it is the console .TP -.B \-t - test compressed file integrity -.TP .B \-z force compression .TP -.B \-b - benchmark file(s) +.B \-b# + benchmark file(s) using compression level # .TP .B \-i# iteration loops [1-9](default : 3), benchmark mode only diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 37d48b8e883..9610e0fd4f9 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -47,6 +47,7 @@ #ifndef ZSTD_NOBENCH # include "bench.h" /* BMK_benchFiles, BMK_SetNbIterations */ #endif +#include "zstd.h" /* ZSTD version numbers */ /************************************** @@ -72,7 +73,9 @@ **************************************/ #define COMPRESSOR_NAME "zstd command line interface" #ifndef ZSTD_VERSION -# define ZSTD_VERSION "v0.4.4" +# define QUOTE(str) #str +# define EXPAND_AND_QUOTE(str) QUOTE(str) +# define ZSTD_VERSION "v" EXPAND_AND_QUOTE(ZSTD_VERSION_MAJOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_MINOR) "." EXPAND_AND_QUOTE(ZSTD_VERSION_RELEASE) #endif #define AUTHOR "Yann Collet" #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), ZSTD_VERSION, AUTHOR, __DATE__ @@ -140,7 +143,6 @@ static int usage_advanced(const char* programName) DISPLAY( " -m : multiple input filenames mode"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -D file: use file content as Dictionary \n"); - //DISPLAY( " -t : test compressed file integrity\n"); #ifndef ZSTD_NOBENCH DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); @@ -210,7 +212,9 @@ int main(int argCount, const char** argv) /* long commands (--long-word) */ if (!strcmp(argument, "--version")) { displayOut=stdout; DISPLAY(WELCOME_MESSAGE); return 0; } if (!strcmp(argument, "--help")) { displayOut=stdout; return usage_advanced(programName); } + if (!strcmp(argument, "--multiple")) { multiple=1; continue; } if (!strcmp(argument, "--verbose")) { displayLevel=4; continue; } + if (!strcmp(argument, "--quiet")) { displayLevel--; continue; } /* Decode commands (note : aggregated commands are allowed) */ if (argument[0]=='-') @@ -400,16 +404,27 @@ int main(int argCount, const char** argv) if (!strcmp(inFileName, stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; if (multiple && (displayLevel==2)) displayLevel=1; + if ((!multiple) && (nbFiles>2)) + { + DISPLAY("Too many files on the command line (%u > 2). Do you mean -m ? \n", nbFiles); + return nbFiles; + } + /* IO Stream/File */ FIO_setNotificationLevel(displayLevel); if (decode) + { + if (multiple) + operationResult = FIO_decompressMultipleFilenames(argv+fileNameStart, nbFiles, ZSTD_EXTENSION, dictFileName); + else FIO_decompressFilename(outFileName, inFileName, dictFileName); + } else { if (multiple) operationResult = FIO_compressMultipleFilenames(argv+fileNameStart, nbFiles, ZSTD_EXTENSION, dictFileName, cLevel); else - FIO_compressFilename(outFileName, inFileName, dictFileName, cLevel); + operationResult = FIO_compressFilename(outFileName, inFileName, dictFileName, cLevel); } _end: From 035c5429ba6b6f0cb34a272890932dd85a899393 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 17 Dec 2015 23:12:07 +0100 Subject: [PATCH 14/19] fix test32 --- programs/playTests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/playTests.sh b/programs/playTests.sh index 5829ad2a2ed..afffd5eb53b 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -54,12 +54,12 @@ echo "*** multiple files tests *** " ./datagen -s1 > tmp1 2> /dev/null ./datagen -s2 -g100K > tmp2 2> /dev/null ./datagen -s3 -g1M > tmp3 2> /dev/null -./zstd -f -m tmp* +$ZSTD -f -m tmp* ls -ls tmp* rm tmp1 tmp2 tmp3 -./zstd -df -m *.zst +$ZSTD -df -m *.zst ls -ls tmp* -./zstd -f -m tmp1 notHere tmp2 && die "missing file not detected!" +$ZSTD -f -m tmp1 notHere tmp2 && die "missing file not detected!" rm tmp* echo "**** zstd round-trip tests **** " From fdcad6d3e12b923f75a88dcf69637328b5a91303 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 17 Dec 2015 23:50:15 +0100 Subject: [PATCH 15/19] added ZSTD_compress_usingDict() --- lib/zstd_compress.c | 38 ++++++++++++++++++++++++++++++++++++++ lib/zstd_static.h | 10 +++++++++- programs/bench.c | 7 ++----- programs/bench.h | 2 +- 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 83f85c770f4..9ed4cc9e2fe 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2180,3 +2180,41 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi free(ctxBody.workSpace); /* can't free ctxBody, since it's on stack; free heap content */ return result; } + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t oSize; + + /* Header */ + oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, srcSize+dictSize)); + if (ZSTD_isError(oSize)) return oSize; + op += oSize; + maxDstSize -= oSize; + + if (dict) + { + oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize); + if (ZSTD_isError(oSize)) return oSize; + } + + /* body (compression) */ + oSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize); + if (ZSTD_isError(oSize)) return oSize; + op += oSize; + maxDstSize -= oSize; + + /* Close frame */ + oSize = ZSTD_compressEnd(ctx, op, maxDstSize); + if (ZSTD_isError(oSize)) return oSize; + op += oSize; + + return (op - ostart); +} + + diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 1b43e69e3e2..9638af11ef5 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -98,9 +98,17 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* src, size_t srcSize, ZSTD_parameters params); +/** ZSTD_compress_usingDict +* Same as ZSTD_compressCCtx(), using a Dictionary content as prefix */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + /* ************************************** -* Streaming functions (bufferless mode) +* Streaming functions (direct mode) ****************************************/ ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params); diff --git a/programs/bench.c b/programs/bench.c index da12d888b5c..8849935c6e8 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -211,8 +211,6 @@ typedef struct size_t resSize; } blockParam_t; -typedef size_t (*compressor_t) (void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel); - #define MIN(a,b) ((a)<(b) ? (a) : (b)) static int BMK_benchMem(const void* srcBuffer, size_t srcSize, @@ -225,7 +223,6 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); - const compressor_t compressor = ZSTD_compress; U64 crcOrig = XXH64(srcBuffer, srcSize, 0); U32 nbBlocks = 0; @@ -292,7 +289,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, while (BMK_GetMilliSpan(milliTime) < TIMELOOP) { for (blockNb=0; blockNb Date: Fri, 18 Dec 2015 01:26:48 +0100 Subject: [PATCH 16/19] benchmark can use dictionary --- lib/zstd_compress.c | 57 ++++++++++------------------------ lib/zstd_decompress.c | 29 ++++++++++++++--- lib/zstd_static.h | 49 +++++++++++++++++++---------- programs/bench.c | 72 +++++++++++++++++++++++++++++++++---------- programs/bench.h | 3 +- programs/paramgrill.c | 1 + programs/zstdcli.c | 4 +-- 7 files changed, 134 insertions(+), 81 deletions(-) diff --git a/lib/zstd_compress.c b/lib/zstd_compress.c index 9ed4cc9e2fe..c6d81ebec51 100644 --- a/lib/zstd_compress.c +++ b/lib/zstd_compress.c @@ -2117,7 +2117,7 @@ size_t ZSTD_compressBegin(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, int comp } -/** ZSTD_compressEnd +/*! ZSTD_compressEnd * Write frame epilogue * @return : nb of bytes written into dst (or an error code) */ size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) @@ -2139,6 +2139,7 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize) size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, + const void* dict,size_t dictSize, ZSTD_parameters params) { BYTE* const ostart = (BYTE*)dst; @@ -2151,9 +2152,15 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, op += oSize; maxDstSize -= oSize; + /* dictionary */ + if (dict) + { + oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize); + if (ZSTD_isError(oSize)) return oSize; + } + /* body (compression) */ - ctx->base = (const BYTE*)src; - oSize = ZSTD_compress_generic (ctx, op, maxDstSize, src, srcSize); + oSize = ZSTD_compressContinue (ctx, op, maxDstSize, src, srcSize); if(ZSTD_isError(oSize)) return oSize; op += oSize; maxDstSize -= oSize; @@ -2166,9 +2173,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, return (op - ostart); } +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) +{ + return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, dict, dictSize, ZSTD_getParams(compressionLevel, srcSize+dictSize)); +} + size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) { - return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, ZSTD_getParams(compressionLevel, srcSize)); + return ZSTD_compress_advanced(ctx, dst, maxDstSize, src, srcSize, NULL, 0, ZSTD_getParams(compressionLevel, srcSize)); } size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize, int compressionLevel) @@ -2181,40 +2193,3 @@ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSi return result; } -size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - int compressionLevel) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - size_t oSize; - - /* Header */ - oSize = ZSTD_compressBegin_advanced(ctx, dst, maxDstSize, ZSTD_getParams(compressionLevel, srcSize+dictSize)); - if (ZSTD_isError(oSize)) return oSize; - op += oSize; - maxDstSize -= oSize; - - if (dict) - { - oSize = ZSTD_compress_insertDictionary(ctx, dict, dictSize); - if (ZSTD_isError(oSize)) return oSize; - } - - /* body (compression) */ - oSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize); - if (ZSTD_isError(oSize)) return oSize; - op += oSize; - maxDstSize -= oSize; - - /* Close frame */ - oSize = ZSTD_compressEnd(ctx, op, maxDstSize); - if (ZSTD_isError(oSize)) return oSize; - op += oSize; - - return (op - ostart); -} - - diff --git a/lib/zstd_decompress.c b/lib/zstd_decompress.c index e1c30a8d6f5..3431e327d2a 100644 --- a/lib/zstd_decompress.c +++ b/lib/zstd_decompress.c @@ -676,7 +676,10 @@ static size_t ZSTD_decompressBlock( } -size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) { const BYTE* ip = (const BYTE*)src; const BYTE* iend = ip + srcSize; @@ -686,9 +689,19 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v size_t remainingSize = srcSize; blockProperties_t blockProperties; - /* init */ - ctx->vBase = ctx->base = ctx->dictEnd = dst; + ZSTD_resetDCtx(ctx); + if (dict) + { + ZSTD_decompress_insertDictionary(ctx, dict, dictSize); + ctx->dictEnd = ctx->previousDstEnd; + ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base)); + ctx->base = dst; + } + else + { + ctx->vBase = ctx->base = ctx->dictEnd = dst; + } /* Frame Header */ { @@ -749,10 +762,16 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v return op-ostart; } + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, maxDstSize, src, srcSize, NULL, 0); +} + size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) { - ZSTD_DCtx ctx; - return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); + ZSTD_DCtx dctx; + return ZSTD_decompressDCtx(&dctx, dst, maxDstSize, src, srcSize); } diff --git a/lib/zstd_static.h b/lib/zstd_static.h index 9638af11ef5..f78d464c101 100644 --- a/lib/zstd_static.h +++ b/lib/zstd_static.h @@ -80,7 +80,7 @@ typedef struct /* ************************************* -* Advanced function +* Advanced functions ***************************************/ /** ZSTD_getParams * return ZSTD_parameters structure for a selected compression level and srcSize. @@ -91,21 +91,40 @@ ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, U64 srcSizeHint * correct params value to remain within authorized range */ ZSTDLIB_API void ZSTD_validateParams(ZSTD_parameters* params); -/** ZSTD_compress_advanced -* Same as ZSTD_compressCCtx(), with fine-tune control of each compression parameter */ -ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, - void* dst, size_t maxDstSize, - const void* src, size_t srcSize, - ZSTD_parameters params); - /** ZSTD_compress_usingDict -* Same as ZSTD_compressCCtx(), using a Dictionary content as prefix */ +* Same as ZSTD_compressCCtx(), using a Dictionary content as prefix +* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize, const void* dict,size_t dictSize, int compressionLevel); +/** ZSTD_compress_advanced +* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ +ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/** Decompression context management */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/** ZSTD_decompressDCtx +* Same as ZSTD_decompress, with pre-allocated DCtx structure */ +size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); + +/** ZSTD_decompress_usingDict +* Same as ZSTD_decompressDCtx, using a Dictionary content as prefix +* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize, + const void* dict, size_t dictSize); + /* ************************************** * Streaming functions (direct mode) @@ -118,7 +137,7 @@ ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxD ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize); /** - Streaming compression, bufferless mode + Streaming compression, direct mode (bufferless) A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage it. @@ -139,13 +158,10 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSiz Finish a frame with ZSTD_compressEnd(), which will write the epilogue. Without it, the frame will be considered incomplete by decoders. - You can then re-use ZSTD_CCtx to compress new frames. -*/ + You can then reuse ZSTD_CCtx to compress new frames. +*/ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize); @@ -168,7 +184,8 @@ ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t ma >0 : means there is not enough data into src. Provides the expected size to successfully decode header. errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header) - Then, you can optionally insert a dictionary. This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted. + Then, you can optionally insert a dictionary. + This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted. Then it's possible to start decompression. Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. diff --git a/programs/bench.c b/programs/bench.c index 8849935c6e8..b80d6990bec 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -63,7 +63,7 @@ #endif #include "mem.h" -#include "zstd.h" +#include "zstd_static.h" #include "xxhash.h" #include "datagen.h" /* RDG_genBuffer */ @@ -215,7 +215,8 @@ typedef struct static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const char* displayName, int cLevel, - const size_t* fileSizes, U32 nbFiles) + const size_t* fileSizes, U32 nbFiles, + const void* dictBuffer, size_t dictBufferSize) { const size_t blockSize = (g_blockSize ? g_blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ const U32 maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; @@ -223,6 +224,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, const size_t maxCompressedSize = ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ void* const compressedBuffer = malloc(maxCompressedSize); void* const resultBuffer = malloc(srcSize); + ZSTD_CCtx* ctx = ZSTD_createCCtx(); + ZSTD_DCtx* dctx = ZSTD_createDCtx(); U64 crcOrig = XXH64(srcBuffer, srcSize, 0); U32 nbBlocks = 0; @@ -230,7 +233,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ /* Memory allocation & restrictions */ - if (!compressedBuffer || !resultBuffer || !blockTable) + if (!compressedBuffer || !resultBuffer || !blockTable || !ctx || !dctx) EXM_THROW(31, "not enough memory"); /* Init blockTable data */ @@ -289,7 +292,11 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize, while (BMK_GetMilliSpan(milliTime) < TIMELOOP) { for (blockNb=0; blockNb%10i (%5.3f),%6.1f MB/s ,%6.1f MB/s \n", cLevel, displayName, (int)srcSize, (int)cSize, ratio, (double)srcSize / fastestC / 1000., (double)srcSize / fastestD / 1000.); + else + DISPLAY("X \n"); } /* clean up */ free(compressedBuffer); free(resultBuffer); + ZSTD_freeCCtx(ctx); + ZSTD_freeDCtx(dctx); return 0; } @@ -372,16 +385,23 @@ static size_t BMK_findMaxMem(U64 requiredMem) static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, const char* displayName, int cLevel, - const size_t* fileSizes, unsigned nbFiles) + const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize) { if (cLevel < 0) { int l; for (l=1; l <= -cLevel; l++) - BMK_benchMem(srcBuffer, benchedSize, displayName, l, fileSizes, nbFiles); + BMK_benchMem(srcBuffer, benchedSize, + displayName, l, + fileSizes, nbFiles, + dictBuffer, dictBufferSize); return; } - BMK_benchMem(srcBuffer, benchedSize, displayName, cLevel, fileSizes, nbFiles); + BMK_benchMem(srcBuffer, benchedSize, + displayName, cLevel, + fileSizes, nbFiles, + dictBuffer, dictBufferSize); } static U64 BMK_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) @@ -417,22 +437,37 @@ static void BMK_loadFiles(void* buffer, size_t bufferSize, } } -static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, int cLevel) +static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, + const char* dictFileName, int cLevel) { void* srcBuffer; size_t benchedSize; - size_t* fileSizes; + void* dictBuffer = NULL; + size_t dictBufferSize = 0; + size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); U64 totalSizeToLoad = BMK_getTotalFileSize(fileNamesTable, nbFiles); char mfName[20] = {0}; const char* displayName = NULL; + if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); + + /* Load dictionary */ + if (dictFileName != NULL) + { + U64 dictFileSize = BMK_getFileSize(dictFileName); + if (dictFileSize > 64 MB) EXM_THROW(10, "dictionary file %s too large", dictFileName); + dictBufferSize = (size_t)dictFileSize; + dictBuffer = malloc(dictBufferSize); + if (dictBuffer==NULL) EXM_THROW(11, "not enough memory for dictionary (%u bytes)", (U32)dictBufferSize); + BMK_loadFiles(dictBuffer, dictBufferSize, fileSizes, &dictFileName, 1); + } + /* Memory allocation & restrictions */ benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; if (benchedSize < totalSizeToLoad) DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); srcBuffer = malloc(benchedSize); - fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); if (!srcBuffer) EXM_THROW(12, "not enough memory"); /* Load input buffer */ @@ -443,10 +478,14 @@ static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, in if (nbFiles > 1) displayName = mfName; else displayName = fileNamesTable[0]; - BMK_benchCLevel(srcBuffer, benchedSize, displayName, cLevel, fileSizes, nbFiles); + BMK_benchCLevel(srcBuffer, benchedSize, + displayName, cLevel, + fileSizes, nbFiles, + dictBuffer, dictBufferSize); /* clean up */ free(srcBuffer); + free(dictBuffer); free(fileSizes); } @@ -465,21 +504,22 @@ static void BMK_syntheticTest(int cLevel, double compressibility) /* Bench */ snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); - BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, &benchedSize, 1, NULL, 0); /* clean up */ free(srcBuffer); } -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, int cLevel) +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, + const char* dictFileName, int cLevel) { double compressibility = (double)g_compressibilityDefault / 100; if (nbFiles == 0) BMK_syntheticTest(cLevel, compressibility); else - BMK_benchFileTable(fileNamesTable, nbFiles, cLevel); + BMK_benchFileTable(fileNamesTable, nbFiles, dictFileName, cLevel); return 0; } diff --git a/programs/bench.h b/programs/bench.h index 8cd6794086f..9ae83690cc3 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -26,7 +26,8 @@ /* Main function */ -int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, int cLevel); +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, + const char* dictFileName, int cLevel); /* Set Parameters */ void BMK_SetNbIterations(int nbLoops); diff --git a/programs/paramgrill.c b/programs/paramgrill.c index 44d3ffdc7fb..a34da88ce7a 100644 --- a/programs/paramgrill.c +++ b/programs/paramgrill.c @@ -342,6 +342,7 @@ static size_t BMK_benchParam(BMK_result_t* resultPtr, blockTable[blockNb].cSize = ZSTD_compress_advanced(ctx, blockTable[blockNb].cPtr, blockTable[blockNb].cRoom, blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize, + NULL, 0, params); nbLoops++; } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 9610e0fd4f9..8c5a98776e6 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -140,7 +140,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -V : display Version number and exit\n"); DISPLAY( " -v : verbose mode\n"); DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); - DISPLAY( " -m : multiple input filenames mode"); + DISPLAY( " -m : multiple input filenames mode \n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -D file: use file content as Dictionary \n"); #ifndef ZSTD_NOBENCH @@ -354,7 +354,7 @@ int main(int argCount, const char** argv) if (bench) { #ifndef ZSTD_NOBENCH - BMK_benchFiles(argv+fileNameStart, nbFiles, cLevel*rangeBench); + BMK_benchFiles(argv+fileNameStart, nbFiles, dictFileName, cLevel*rangeBench); #endif goto _end; } From 17d188fa38f350f5fdadff737adf9b6be62df4ea Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 Dec 2015 02:14:46 +0100 Subject: [PATCH 17/19] robust file list --- programs/zstdcli.c | 65 +++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 8c5a98776e6..1526e0ea64a 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -181,20 +181,22 @@ int main(int argCount, const char** argv) multiple=0, operationResult=0; unsigned cLevel = 1; + const char** filenameTable = NULL; + unsigned filenameIdx = 0; const char* programName = argv[0]; - const char* inFileName = NULL; const char* outFileName = NULL; const char* dictFileName = NULL; char* dynNameSpace = NULL; const char extension[] = ZSTD_EXTENSION; - unsigned fileNameStart = 0; - unsigned nbFiles = 0; int rangeBench = 1; /* init */ - (void)rangeBench; (void)nbFiles; (void)fileNameStart; /* not used when ZSTD_NOBENCH set */ + (void)rangeBench; /* not used when ZSTD_NOBENCH set */ + filenameTable = (const char**)malloc(argCount * sizeof(const char*)); + if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); } + memset(filenameTable, 0, argCount * sizeof(const char*)); displayOut = stderr; - /* Pick out basename component. Don't rely on stdlib because of conflicting behavior. */ + /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */ for (i = (int)strlen(programName); i > 0; i--) { if (programName[i] == '/') { i++; break; } } programName += i; @@ -222,9 +224,8 @@ int main(int argCount, const char** argv) /* '-' means stdin/stdout */ if (argument[1]==0) { - if (!inFileName) inFileName=stdinmark; - else outFileName=stdoutmark; - continue; + if (!filenameIdx) { filenameIdx=1, filenameTable[0]=stdinmark; continue; } + outFileName=stdoutmark; continue; } argument++; @@ -335,16 +336,8 @@ int main(int argCount, const char** argv) continue; } - /* first provided filename is input */ - if (!inFileName) { inFileName = argument; fileNameStart = i; nbFiles = argCount-i; continue; } - - /* second provided filename is output */ - if (!outFileName) - { - outFileName = argument; - if (!strcmp (outFileName, nullString)) outFileName = nulmark; - continue; - } + /* add filename to list */ + filenameTable[filenameIdx++] = argument; } /* Welcome message (if verbose) */ @@ -354,27 +347,28 @@ int main(int argCount, const char** argv) if (bench) { #ifndef ZSTD_NOBENCH - BMK_benchFiles(argv+fileNameStart, nbFiles, dictFileName, cLevel*rangeBench); + BMK_benchFiles(filenameTable, filenameIdx, dictFileName, cLevel*rangeBench); #endif goto _end; } /* No input filename ==> use stdin */ - if(!inFileName) { inFileName=stdinmark; } + if(!filenameIdx) filenameIdx=1, filenameTable[0]=stdinmark; /* Check if input defined as console; trigger an error in this case */ - if (!strcmp(inFileName, stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName); + if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName); /* No output filename ==> try to select one automatically (when possible) */ - while (!outFileName) + outFileName = filenameTable[1]; + while (!outFileName) /* while : just to allow break statement */ { if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; } /* Default to stdout whenever possible (i.e. not a console) */ if (!decode) /* compression to file */ { - size_t l = strlen(inFileName); + size_t l = strlen(filenameTable[0]); dynNameSpace = (char*)calloc(1,l+5); if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); } - strcpy(dynNameSpace, inFileName); + strcpy(dynNameSpace, filenameTable[0]); strcpy(dynNameSpace+l, ZSTD_EXTENSION); outFileName = dynNameSpace; DISPLAYLEVEL(2, "Compressed filename will be : %s \n", outFileName); @@ -382,8 +376,8 @@ int main(int argCount, const char** argv) } /* decompression to file (automatic name will work only if input filename has correct format extension) */ { - size_t filenameSize = strlen(inFileName); - if (strcmp(inFileName + (filenameSize-4), extension)) + size_t filenameSize = strlen(filenameTable[0]); + if (strcmp(filenameTable[0] + (filenameSize-4), extension)) { DISPLAYLEVEL(1, "unknown suffix - cannot determine destination filename\n"); return badusage(programName); @@ -391,7 +385,7 @@ int main(int argCount, const char** argv) dynNameSpace = (char*)calloc(1,filenameSize+1); if (dynNameSpace==NULL) { DISPLAY("not enough memory\n"); exit(1); } outFileName = dynNameSpace; - strcpy(dynNameSpace, inFileName); + strcpy(dynNameSpace, filenameTable[0]); dynNameSpace[filenameSize-4]=0; DISPLAYLEVEL(2, "Decoding file %s \n", outFileName); } @@ -401,13 +395,13 @@ int main(int argCount, const char** argv) if (!strcmp(outFileName,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) return badusage(programName); /* No warning message in pure pipe mode (stdin + stdout) or multiple mode */ - if (!strcmp(inFileName, stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; + if (!strcmp(filenameTable[0], stdinmark) && !strcmp(outFileName,stdoutmark) && (displayLevel==2)) displayLevel=1; if (multiple && (displayLevel==2)) displayLevel=1; - if ((!multiple) && (nbFiles>2)) + if ((!multiple) && (filenameIdx>2)) { - DISPLAY("Too many files on the command line (%u > 2). Do you mean -m ? \n", nbFiles); - return nbFiles; + DISPLAY("Too many files on the command line (%u > 2). Do you mean -m ? \n", filenameIdx); + return filenameIdx; } /* IO Stream/File */ @@ -415,20 +409,21 @@ int main(int argCount, const char** argv) if (decode) { if (multiple) - operationResult = FIO_decompressMultipleFilenames(argv+fileNameStart, nbFiles, ZSTD_EXTENSION, dictFileName); + operationResult = FIO_decompressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName); else - FIO_decompressFilename(outFileName, inFileName, dictFileName); + operationResult = FIO_decompressFilename(outFileName, filenameTable[0], dictFileName); } else { if (multiple) - operationResult = FIO_compressMultipleFilenames(argv+fileNameStart, nbFiles, ZSTD_EXTENSION, dictFileName, cLevel); + operationResult = FIO_compressMultipleFilenames(filenameTable, filenameIdx, ZSTD_EXTENSION, dictFileName, cLevel); else - operationResult = FIO_compressFilename(outFileName, inFileName, dictFileName, cLevel); + operationResult = FIO_compressFilename(outFileName, filenameTable[0], dictFileName, cLevel); } _end: if (main_pause) waitEnter(); free(dynNameSpace); + free(filenameTable); return operationResult; } From 6a4583542a735b016c7fa36e51c3460bde24b055 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 Dec 2015 02:51:14 +0100 Subject: [PATCH 18/19] fixed asan warning --- programs/playTests.sh | 3 +++ programs/zstdcli.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/playTests.sh b/programs/playTests.sh index afffd5eb53b..5d641ec598d 100755 --- a/programs/playTests.sh +++ b/programs/playTests.sh @@ -39,9 +39,12 @@ echo frame concatenation test completed echo "**** flush write error test **** " +echo "echo foo | $ZSTD > /dev/full" echo foo | $ZSTD > /dev/full && die "write error not detected!" +echo "echo foo | $ZSTD | $ZSTD -d > /dev/full" echo foo | $ZSTD | $ZSTD -d > /dev/full && die "write error not detected!" + echo "*** dictionary tests *** " ./datagen > tmpDict diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 1526e0ea64a..b218d3a2ca0 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -192,7 +192,7 @@ int main(int argCount, const char** argv) /* init */ (void)rangeBench; /* not used when ZSTD_NOBENCH set */ - filenameTable = (const char**)malloc(argCount * sizeof(const char*)); + filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); } memset(filenameTable, 0, argCount * sizeof(const char*)); displayOut = stderr; @@ -359,7 +359,7 @@ int main(int argCount, const char** argv) if (!strcmp(filenameTable[0], stdinmark) && IS_CONSOLE(stdin) ) return badusage(programName); /* No output filename ==> try to select one automatically (when possible) */ - outFileName = filenameTable[1]; + if (filenameIdx>=2) outFileName = filenameTable[1]; while (!outFileName) /* while : just to allow break statement */ { if (!IS_CONSOLE(stdout)) { outFileName=stdoutmark; break; } /* Default to stdout whenever possible (i.e. not a console) */ From 324a3e27c35939759676da343c0c3f79367c5fae Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 18 Dec 2015 03:19:27 +0100 Subject: [PATCH 19/19] fixed visual warning --- programs/zstdcli.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b218d3a2ca0..564686c622a 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -181,7 +181,7 @@ int main(int argCount, const char** argv) multiple=0, operationResult=0; unsigned cLevel = 1; - const char** filenameTable = NULL; + const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ unsigned filenameIdx = 0; const char* programName = argv[0]; const char* outFileName = NULL; @@ -192,9 +192,7 @@ int main(int argCount, const char** argv) /* init */ (void)rangeBench; /* not used when ZSTD_NOBENCH set */ - filenameTable = (const char**)malloc(argCount * sizeof(const char*)); /* argCount >= 1 */ if (filenameTable==NULL) { DISPLAY("not enough memory\n"); exit(1); } - memset(filenameTable, 0, argCount * sizeof(const char*)); displayOut = stderr; /* Pick out program name from path. Don't rely on stdlib because of conflicting behavior */ for (i = (int)strlen(programName); i > 0; i--) { if (programName[i] == '/') { i++; break; } } @@ -424,6 +422,6 @@ int main(int argCount, const char** argv) _end: if (main_pause) waitEnter(); free(dynNameSpace); - free(filenameTable); + free((void*)filenameTable); return operationResult; }