From: Dan Carpenter <dan.carpenter@oracle.com>
To: terrelln@fb.com, stanjo74 <stanjo@usa.net>
Cc: kernel-janitors@vger.kernel.org
Subject: [bug report] lib: zstd: Upgrade to latest upstream zstd version 1.4.10
Date: Fri, 8 Oct 2021 09:37:04 +0300 [thread overview]
Message-ID: <20211008063704.GA5370@kili> (raw)
Hello Nick Terrell,
I grepped the README for zstd and it doesn't appear there is a mailing
list to add to the CC.
The patch c684b4e9a301: "lib: zstd: Upgrade to latest upstream zstd
version 1.4.10" from Sep 11, 2020, leads to the following Smatch static
checker warnings:
lib/zstd/compress/zstd_opt.c:695 ZSTD_insertBtAndGetAllMatches() warn: should this be 'nbCompares == -1'
lib/zstd/compress/zstd_lazy.c:360 ZSTD_DUBT_findBestMatch() warn: should this be 'nbCompares == -1'
lib/zstd/compress/zstd_lazy.c:1267 ZSTD_compressBlock_lazy_extDict_generic() warn: inconsistent indenting
lib/zstd/compress/zstd_compress.c:726 ZSTD_CCtxParams_setParameter() warn: no lower bound on 'value'
lib/zstd/decompress/zstd_decompress.c:177 ZSTD_createDDictHashSet() warn: variable dereferenced before check 'ret' (see line 174)
lib/zstd/compress/zstd_opt.c
508 FORCE_INLINE_TEMPLATE
509 U32 ZSTD_insertBtAndGetAllMatches (
510 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
511 ZSTD_matchState_t* ms,
512 U32* nextToUpdate3,
513 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
514 const U32 rep[ZSTD_REP_NUM],
515 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
516 const U32 lengthToBeat,
517 U32 const mls /* template */)
518 {
519 const ZSTD_compressionParameters* const cParams = &ms->cParams;
520 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
521 const BYTE* const base = ms->window.base;
522 U32 const curr = (U32)(ip-base);
523 U32 const hashLog = cParams->hashLog;
524 U32 const minMatch = (mls==3) ? 3 : 4;
525 U32* const hashTable = ms->hashTable;
526 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
527 U32 matchIndex = hashTable[h];
528 U32* const bt = ms->chainTable;
529 U32 const btLog = cParams->chainLog - 1;
530 U32 const btMask= (1U << btLog) - 1;
531 size_t commonLengthSmaller=0, commonLengthLarger=0;
532 const BYTE* const dictBase = ms->window.dictBase;
533 U32 const dictLimit = ms->window.dictLimit;
534 const BYTE* const dictEnd = dictBase + dictLimit;
535 const BYTE* const prefixStart = base + dictLimit;
536 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
537 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
538 U32 const matchLow = windowLow ? windowLow : 1;
539 U32* smallerPtr = bt + 2*(curr&btMask);
540 U32* largerPtr = bt + 2*(curr&btMask) + 1;
541 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
542 U32 dummy32; /* to be nullified at the end */
543 U32 mnum = 0;
544 U32 nbCompares = 1U << cParams->searchLog;
^^^^^^^^^^^^^^
This is unsigned.
545
546 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
547 const ZSTD_compressionParameters* const dmsCParams =
548 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
549 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
550 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
551 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
552 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
553 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
554 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
555 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
556 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
557 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
558
559 size_t bestLength = lengthToBeat-1;
560 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
561
562 /* check repCode */
563 assert(ll0 <= 1); /* necessarily 1 or 0 */
564 { U32 const lastR = ZSTD_REP_NUM + ll0;
565 U32 repCode;
566 for (repCode = ll0; repCode < lastR; repCode++) {
567 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
568 U32 const repIndex = curr - repOffset;
569 U32 repLen = 0;
570 assert(curr >= dictLimit);
571 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
572 /* We must validate the repcode offset because when we're using a dictionary the
573 * valid offset range shrinks when the dictionary goes out of bounds.
574 */
575 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
576 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
577 }
578 } else { /* repIndex < dictLimit || repIndex >= curr */
579 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
580 dmsBase + repIndex - dmsIndexDelta :
581 dictBase + repIndex;
582 assert(curr >= windowLow);
583 if ( dictMode == ZSTD_extDict
584 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
585 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
586 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
587 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
588 }
589 if (dictMode == ZSTD_dictMatchState
590 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
591 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
592 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
593 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
594 } }
595 /* save longer solution */
596 if (repLen > bestLength) {
597 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
598 repCode, ll0, repOffset, repLen);
599 bestLength = repLen;
600 matches[mnum].off = repCode - ll0;
601 matches[mnum].len = (U32)repLen;
602 mnum++;
603 if ( (repLen > sufficient_len)
604 | (ip+repLen == iLimit) ) { /* best possible */
605 return mnum;
606 } } } }
607
608 /* HC3 match finder */
609 if ((mls == 3) /*static*/ && (bestLength < mls)) {
610 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
611 if ((matchIndex3 >= matchLow)
612 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
613 size_t mlen;
614 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
615 const BYTE* const match = base + matchIndex3;
616 mlen = ZSTD_count(ip, match, iLimit);
617 } else {
618 const BYTE* const match = dictBase + matchIndex3;
619 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
620 }
621
622 /* save best solution */
623 if (mlen >= mls /* == 3 > bestLength */) {
624 DEBUGLOG(8, "found small match with hlog3, of length %u",
625 (U32)mlen);
626 bestLength = mlen;
627 assert(curr > matchIndex3);
628 assert(mnum==0); /* no prior solution */
629 matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
630 matches[0].len = (U32)mlen;
631 mnum = 1;
632 if ( (mlen > sufficient_len) |
633 (ip+mlen == iLimit) ) { /* best possible length */
634 ms->nextToUpdate = curr+1; /* skip insertion */
635 return 1;
636 } } }
637 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638 }
639
640 hashTable[h] = curr; /* Update Hash Table */
641
642 while (nbCompares-- && (matchIndex >= matchLow)) {
^^^^^^^^^^^^
This is a post op so it will exit the loop with nbCompares set to -1U.
643 U32* const nextPtr = bt + 2*(matchIndex & btMask);
644 const BYTE* match;
645 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
646 assert(curr > matchIndex);
647
648 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
649 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
650 match = base + matchIndex;
651 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
652 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
653 } else {
654 match = dictBase + matchIndex;
655 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
656 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
657 if (matchIndex+matchLength >= dictLimit)
658 match = base + matchIndex; /* prepare for match[matchLength] read */
659 }
660
661 if (matchLength > bestLength) {
662 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663 (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
664 assert(matchEndIdx > matchIndex);
665 if (matchLength > matchEndIdx - matchIndex)
666 matchEndIdx = matchIndex + (U32)matchLength;
667 bestLength = matchLength;
668 matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
669 matches[mnum].len = (U32)matchLength;
670 mnum++;
671 if ( (matchLength > ZSTD_OPT_NUM)
672 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
^^^^^^^^^^^^^^
Or it could exit here.
674 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675 }
676 }
677
678 if (match[matchLength] < ip[matchLength]) {
679 /* match smaller than current */
680 *smallerPtr = matchIndex; /* update smaller idx */
681 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
682 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
683 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
684 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
685 } else {
686 *largerPtr = matchIndex;
687 commonLengthLarger = matchLength;
688 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
689 largerPtr = nextPtr;
690 matchIndex = nextPtr[0];
691 } }
692
693 *smallerPtr = *largerPtr = 0;
694
--> 695 if (dictMode == ZSTD_dictMatchState && nbCompares) {
^^^^^^^^^^
The should this be check for if nbCompares == -1? This could be
intentional. Hard to tell.
696 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697 U32 dictMatchIndex = dms->hashTable[dmsH];
698 const U32* const dmsBt = dms->chainTable;
699 commonLengthSmaller = commonLengthLarger = 0;
700 while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
701 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703 const BYTE* match = dmsBase + dictMatchIndex;
704 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
705 if (dictMatchIndex+matchLength >= dmsHighLimit)
706 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
707
708 if (matchLength > bestLength) {
709 matchIndex = dictMatchIndex + dmsIndexDelta;
710 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711 (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
712 if (matchLength > matchEndIdx - matchIndex)
713 matchEndIdx = matchIndex + (U32)matchLength;
714 bestLength = matchLength;
715 matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
716 matches[mnum].len = (U32)matchLength;
717 mnum++;
718 if ( (matchLength > ZSTD_OPT_NUM)
719 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720 break; /* drop, to guarantee consistency (miss a little bit of compression) */
721 }
722 }
723
724 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725 if (match[matchLength] < ip[matchLength]) {
726 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
727 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
728 } else {
729 /* match is larger than current */
730 commonLengthLarger = matchLength;
731 dictMatchIndex = nextPtr[0];
732 }
733 }
734 }
735
736 assert(matchEndIdx > curr+8);
737 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738 return mnum;
739 }
regards,
dan carpenter
next reply other threads:[~2021-10-08 6:37 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-08 6:37 Dan Carpenter [this message]
2021-10-08 17:59 ` [bug report] lib: zstd: Upgrade to latest upstream zstd version 1.4.10 Nick Terrell
2021-10-08 18:28 ` Nick Terrell
2021-10-09 8:25 ` Dan Carpenter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211008063704.GA5370@kili \
--to=dan.carpenter@oracle.com \
--cc=kernel-janitors@vger.kernel.org \
--cc=stanjo@usa.net \
--cc=terrelln@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.