From 9458cbcce4362c1f3a5aa3929a2119522811d71e Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 20 Dec 2024 17:32:28 -0500 Subject: [PATCH] [opt] Fix too short of match getting generated The optimal parser with LDM enabled using minMatch > 3 could generate a match length of 3 when minMatch >= 4. This is not allowed. 1. Fix the bug 2. Add validation logic to `ZSTD_buildSeqStore()` in debug mode for all block compressors that checks we never generate too short a match. This way we don't rely on the `generate_sequences` fuzzer to find this issue. Credit to OSS-Fuzz --- lib/compress/zstd_compress.c | 22 ++++++++++++++++++++++ lib/compress/zstd_opt.c | 18 +++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3d78b7da355..fe496cf9c28 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -3217,6 +3217,27 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq return litLenSum + matchLenSum; } +/** + * Function to validate sequences produced by a block compressor. + */ +static void ZSTD_validateSeqStore(const seqStore_t* seqStore, const ZSTD_compressionParameters* cParams) +{ +#if DEBUGLEVEL >= 1 + const seqDef* seq = seqStore->sequencesStart; + const seqDef* const seqEnd = seqStore->sequences; + size_t const matchLenLowerBound = cParams->minMatch == 3 ? 3 : 4; + for (; seq < seqEnd; ++seq) { + const ZSTD_sequenceLength seqLength = ZSTD_getSequenceLength(seqStore, seq); + assert(seqLength.matchLength >= matchLenLowerBound); + (void)seqLength; + (void)matchLenLowerBound; + } +#else + (void)seqStore; + (void)cParams; +#endif +} + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) @@ -3380,6 +3401,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); } } + ZSTD_validateSeqStore(&zc->seqStore, &zc->appliedParams.cParams); return ZSTDbss_compress; } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index e4a8ee81ea5..ec51cb4ae7a 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -972,7 +972,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock return; } - /* Matches may be < MINMATCH by this process. In that case, we will reject them + /* Matches may be < minMatch by this process. In that case, we will reject them when we are deciding whether or not to add the ldm */ optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; @@ -994,7 +994,8 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock * into 'matches'. Maintains the correct ordering of 'matches'. */ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, - const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) + const ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 minMatch) { U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ @@ -1003,7 +1004,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, /* Ensure that current block position is not outside of the match */ if (currPosInBlock < optLdm->startPosInBlock || currPosInBlock >= optLdm->endPosInBlock - || candidateMatchLength < MINMATCH) { + || candidateMatchLength < minMatch) { return; } @@ -1023,7 +1024,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, - U32 currPosInBlock, U32 remainingBytes) + U32 currPosInBlock, U32 remainingBytes, + U32 minMatch) { if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { return; @@ -1040,7 +1042,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, } ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); } - ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch); } @@ -1122,7 +1124,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const ll0 = !litlen; U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, - (U32)(ip-istart), (U32)(iend-ip)); + (U32)(ip-istart), (U32)(iend-ip), + minMatch); if (!nbMatches) { DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); ip++; @@ -1276,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 matchNb; ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, - (U32)(inr-istart), (U32)(iend-inr)); + (U32)(inr-istart), (U32)(iend-inr), + minMatch); if (!nbMatches) { DEBUGLOG(7, "rPos:%u : no match found", cur);