Skip to content

Commit

Permalink
[opt] Fix too short of match getting generated
Browse files Browse the repository at this point in the history
The optimal parser with LDM enabled using minMatch > 3 could generate a match
length of 3 when minMatch >= 4. This is not allowed.

1. Fix the bug
2. Add validation logic to `ZSTD_buildSeqStore()` in debug mode for all block
   compressors that checks we never generate too short a match. This way we don't
   rely on the `generate_sequences` fuzzer to find this issue.

Credit to OSS-Fuzz
  • Loading branch information
Nick Terrell committed Dec 20, 2024
1 parent 5a7f5c7 commit 9458cbc
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 7 deletions.
22 changes: 22 additions & 0 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -3217,6 +3217,27 @@ static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seq
return litLenSum + matchLenSum;
}

/**
* Function to validate sequences produced by a block compressor.
*/
static void ZSTD_validateSeqStore(const seqStore_t* seqStore, const ZSTD_compressionParameters* cParams)
{
#if DEBUGLEVEL >= 1
const seqDef* seq = seqStore->sequencesStart;
const seqDef* const seqEnd = seqStore->sequences;
size_t const matchLenLowerBound = cParams->minMatch == 3 ? 3 : 4;
for (; seq < seqEnd; ++seq) {
const ZSTD_sequenceLength seqLength = ZSTD_getSequenceLength(seqStore, seq);
assert(seqLength.matchLength >= matchLenLowerBound);
(void)seqLength;
(void)matchLenLowerBound;
}
#else
(void)seqStore;
(void)cParams;
#endif
}

typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;

static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
Expand Down Expand Up @@ -3380,6 +3401,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
} }
ZSTD_validateSeqStore(&zc->seqStore, &zc->appliedParams.cParams);
return ZSTDbss_compress;
}

Expand Down
18 changes: 11 additions & 7 deletions lib/compress/zstd_opt.c
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
return;
}

/* Matches may be < MINMATCH by this process. In that case, we will reject them
/* Matches may be < minMatch by this process. In that case, we will reject them
when we are deciding whether or not to add the ldm */
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
Expand All @@ -994,7 +994,8 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
* into 'matches'. Maintains the correct ordering of 'matches'.
*/
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
U32 minMatch)
{
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
Expand All @@ -1003,7 +1004,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
/* Ensure that current block position is not outside of the match */
if (currPosInBlock < optLdm->startPosInBlock
|| currPosInBlock >= optLdm->endPosInBlock
|| candidateMatchLength < MINMATCH) {
|| candidateMatchLength < minMatch) {
return;
}

Expand All @@ -1023,7 +1024,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
static void
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
ZSTD_match_t* matches, U32* nbMatches,
U32 currPosInBlock, U32 remainingBytes)
U32 currPosInBlock, U32 remainingBytes,
U32 minMatch)
{
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
return;
Expand All @@ -1040,7 +1042,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
}
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
}
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
}


Expand Down Expand Up @@ -1122,7 +1124,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const ll0 = !litlen;
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
(U32)(ip-istart), (U32)(iend-ip));
(U32)(ip-istart), (U32)(iend-ip),
minMatch);
if (!nbMatches) {
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
ip++;
Expand Down Expand Up @@ -1276,7 +1279,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 matchNb;

ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
(U32)(inr-istart), (U32)(iend-inr));
(U32)(inr-istart), (U32)(iend-inr),
minMatch);

if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
Expand Down

0 comments on commit 9458cbc

Please sign in to comment.