From: David Laight <david.laight.linux@gmail.com>
To: Lasse Collin <lasse.collin@tukaani.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org,
Nathan Chancellor <nathan@kernel.org>,
Thorsten Blum <thorsten.blum@linux.dev>
Subject: Re: [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places
Date: Mon, 15 Jun 2026 09:43:39 +0100 [thread overview]
Message-ID: <20260615094339.4e2a1dcb@pumpkin> (raw)
In-Reply-To: <20260614160521.924710-1-lasse.collin@tukaani.org>
On Sun, 14 Jun 2026 19:05:17 +0300
Lasse Collin <lasse.collin@tukaani.org> wrote:
> Reduce the number of uint32_t <-> size_t conversions a little. Eliminating
> such conversions entirely would require changing almost all uint32_t to
> size_t, which would look confusing and increase the sizes of the structs
> even more. Going the other way, converting everything to uint32_t, isn't
> possible because the input and output buffers use size_t in struct xz_buf.
>
> Now both arguments to min() have the same type. This is required to for
> compatibility with PowerPC boot code[1] whose min() is strict like
> include/linux/minmax.h was before the commit d03eba99f5bf ("minmax: allow
> min()/max()/clamp() if the arguments have the same signedness.").
>
> Swap the order of the "state" and "len" in struct lzma_dec to avoid
> padding in the middle of the struct when size_t is 64 bits. The
> reordering doesn't change the size of the struct; the padding just
> appears at the end instead.
>
> dict_flush() used to truncate size_t to uint32_t when returning.
> This wasn't a bug; the value is always small enough.
>
> Reported-by: Nathan Chancellor <nathan@kernel.org>
> Closes: https://lore.kernel.org/lkml/20260610232323.GA1071374@ax162/ [1]
> Cc: Thorsten Blum <thorsten.blum@linux.dev>
> Cc: David Laight <david.laight.linux@gmail.com>
> Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
> ---
> lib/xz/xz_dec_lzma2.c | 40 ++++++++++++++++++++--------------------
> lib/xz/xz_lzma2.h | 2 +-
> 2 files changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
> index 9d80342b9c6b..68ae9c33b6a8 100644
> --- a/lib/xz/xz_dec_lzma2.c
> +++ b/lib/xz/xz_dec_lzma2.c
> @@ -135,14 +135,16 @@ struct lzma_dec {
> uint32_t rep2;
> uint32_t rep3;
>
> - /* Types of the most recently seen LZMA symbols */
> - enum lzma_state state;
> -
> /*
> * Length of a match. This is updated so that dict_repeat can
> - * be called again to finish repeating the whole match.
> + * be called again to finish repeating the whole match. This is
> + * size_t because a pointer to this is passed to dict_repeat,
> + * and there it's nicer to have size_t instead of uint32_t.
> */
> - uint32_t len;
> + size_t len;
> +
> + /* Types of the most recently seen LZMA symbols */
> + enum lzma_state state;
>
> /*
> * LZMA properties or related bit masks (number of literal
> @@ -228,13 +230,13 @@ struct lzma2_dec {
> enum lzma2_seq next_sequence;
>
> /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
> - uint32_t uncompressed;
> + size_t uncompressed;
>
> /*
> * Compressed size of LZMA chunk or compressed/uncompressed
> * size of uncompressed chunk (64 KiB at maximum)
> */
> - uint32_t compressed;
> + size_t compressed;
>
> /*
> * True if dictionary reset is needed. This is false before
> @@ -273,7 +275,7 @@ struct xz_dec_lzma2 {
> * decoder calls. See lzma2_lzma() for details.
> */
> struct {
> - uint32_t size;
> + size_t size;
> uint8_t buf[3 * LZMA_IN_REQUIRED];
> } temp;
> };
> @@ -320,7 +322,7 @@ static inline bool dict_has_space(const struct dictionary *dict)
> * still empty. This special case is needed for single-call decoding to
> * avoid writing a '\0' to the end of the destination buffer.
> */
> -static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
> +static inline uint32_t dict_get(const struct dictionary *dict, size_t dist)
> {
> size_t offset = dict->pos - dist - 1;
>
> @@ -346,10 +348,10 @@ static inline void dict_put(struct dictionary *dict, uint8_t byte)
> * invalid, false is returned. On success, true is returned and *len is
> * updated to indicate how many bytes were left to be repeated.
> */
> -static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
> +static bool dict_repeat(struct dictionary *dict, size_t *len, size_t dist)
> {
> size_t back;
> - uint32_t left;
> + size_t left;
>
> if (dist >= dict->full || dist >= dict->size)
> return false;
> @@ -375,7 +377,7 @@ static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
>
> /* Copy uncompressed data as is from input to dictionary and output buffers. */
> static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
> - uint32_t *left)
> + size_t *left)
> {
> size_t copy_size;
>
> @@ -433,7 +435,7 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
> * enough space in b->out. This is guaranteed because caller uses dict_limit()
> * before decoding data into the dictionary.
> */
> -static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
> +static size_t dict_flush(struct dictionary *dict, struct xz_buf *b)
> {
> size_t copy_size = dict->pos - dict->start;
>
> @@ -878,7 +880,7 @@ static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
> static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
> {
> size_t in_avail;
> - uint32_t tmp;
> + size_t tmp;
>
> in_avail = b->in_size - b->in_pos;
> if (s->temp.size > 0 || s->lzma2.compressed == 0) {
> @@ -1046,25 +1048,23 @@ enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b)
>
> case SEQ_UNCOMPRESSED_1:
> s->lzma2.uncompressed
> - += (uint32_t)b->in[b->in_pos++] << 8;
> + += (size_t)b->in[b->in_pos++] << 8;
The type of that casts can't matter.
Indeed all these casts can be deleted.
(The value read from the uint8_t variable is promoted to int before
it is used.)
David
> s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
> break;
>
> case SEQ_UNCOMPRESSED_2:
> s->lzma2.uncompressed
> - += (uint32_t)b->in[b->in_pos++] + 1;
> + += (size_t)b->in[b->in_pos++] + 1;
> s->lzma2.sequence = SEQ_COMPRESSED_0;
> break;
>
> case SEQ_COMPRESSED_0:
> - s->lzma2.compressed
> - = (uint32_t)b->in[b->in_pos++] << 8;
> + s->lzma2.compressed = (size_t)b->in[b->in_pos++] << 8;
> s->lzma2.sequence = SEQ_COMPRESSED_1;
> break;
>
> case SEQ_COMPRESSED_1:
> - s->lzma2.compressed
> - += (uint32_t)b->in[b->in_pos++] + 1;
> + s->lzma2.compressed += (size_t)b->in[b->in_pos++] + 1;
> s->lzma2.sequence = s->lzma2.next_sequence;
> break;
>
> diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
> index d2632b7dfb9c..a612ce4fd450 100644
> --- a/lib/xz/xz_lzma2.h
> +++ b/lib/xz/xz_lzma2.h
> @@ -143,7 +143,7 @@ static inline bool lzma_state_is_literal(enum lzma_state state)
> * Get the index of the appropriate probability array for decoding
> * the distance slot.
> */
> -static inline uint32_t lzma_get_dist_state(uint32_t len)
> +static inline size_t lzma_get_dist_state(size_t len)
> {
> return len < DIST_STATES + MATCH_LEN_MIN
> ? len - MATCH_LEN_MIN : DIST_STATES - 1;
next prev parent reply other threads:[~2026-06-15 8:43 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-14 16:05 [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places Lasse Collin
2026-06-14 16:05 ` [PATCH 2/2] lib/xz: Fix comments Lasse Collin
2026-06-15 8:43 ` David Laight [this message]
2026-06-15 11:28 ` [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places Thorsten Blum
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260615094339.4e2a1dcb@pumpkin \
--to=david.laight.linux@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=lasse.collin@tukaani.org \
--cc=linux-kernel@vger.kernel.org \
--cc=nathan@kernel.org \
--cc=thorsten.blum@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox