Re: [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Laight <david.laight.linux@gmail.com>
To: Lasse Collin <lasse.collin@tukaani.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org,
	Nathan Chancellor <nathan@kernel.org>,
	Thorsten Blum <thorsten.blum@linux.dev>
Subject: Re: [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places
Date: Mon, 15 Jun 2026 09:43:39 +0100	[thread overview]
Message-ID: <20260615094339.4e2a1dcb@pumpkin> (raw)
In-Reply-To: <20260614160521.924710-1-lasse.collin@tukaani.org>

On Sun, 14 Jun 2026 19:05:17 +0300
Lasse Collin <lasse.collin@tukaani.org> wrote:

> Reduce the number of uint32_t <-> size_t conversions a little. Eliminating
> such conversions entirely would require changing almost all uint32_t to
> size_t, which would look confusing and increase the sizes of the structs
> even more. Going the other way, converting everything to uint32_t, isn't
> possible because the input and output buffers use size_t in struct xz_buf.
> 
> Now both arguments to min() have the same type. This is required to for
> compatibility with PowerPC boot code[1] whose min() is strict like
> include/linux/minmax.h was before the commit d03eba99f5bf ("minmax: allow
> min()/max()/clamp() if the arguments have the same signedness.").
> 
> Swap the order of the "state" and "len" in struct lzma_dec to avoid
> padding in the middle of the struct when size_t is 64 bits. The
> reordering doesn't change the size of the struct; the padding just
> appears at the end instead.
> 
> dict_flush() used to truncate size_t to uint32_t when returning.
> This wasn't a bug; the value is always small enough.
> 
> Reported-by: Nathan Chancellor <nathan@kernel.org>
> Closes: https://lore.kernel.org/lkml/20260610232323.GA1071374@ax162/ [1]
> Cc: Thorsten Blum <thorsten.blum@linux.dev>
> Cc: David Laight <david.laight.linux@gmail.com>
> Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
> ---
>  lib/xz/xz_dec_lzma2.c | 40 ++++++++++++++++++++--------------------
>  lib/xz/xz_lzma2.h     |  2 +-
>  2 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
> index 9d80342b9c6b..68ae9c33b6a8 100644
> --- a/lib/xz/xz_dec_lzma2.c
> +++ b/lib/xz/xz_dec_lzma2.c
> @@ -135,14 +135,16 @@ struct lzma_dec {
>  	uint32_t rep2;
>  	uint32_t rep3;
>  
> -	/* Types of the most recently seen LZMA symbols */
> -	enum lzma_state state;
> -
>  	/*
>  	 * Length of a match. This is updated so that dict_repeat can
> -	 * be called again to finish repeating the whole match.
> +	 * be called again to finish repeating the whole match. This is
> +	 * size_t because a pointer to this is passed to dict_repeat,
> +	 * and there it's nicer to have size_t instead of uint32_t.
>  	 */
> -	uint32_t len;
> +	size_t len;
> +
> +	/* Types of the most recently seen LZMA symbols */
> +	enum lzma_state state;
>  
>  	/*
>  	 * LZMA properties or related bit masks (number of literal
> @@ -228,13 +230,13 @@ struct lzma2_dec {
>  	enum lzma2_seq next_sequence;
>  
>  	/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
> -	uint32_t uncompressed;
> +	size_t uncompressed;
>  
>  	/*
>  	 * Compressed size of LZMA chunk or compressed/uncompressed
>  	 * size of uncompressed chunk (64 KiB at maximum)
>  	 */
> -	uint32_t compressed;
> +	size_t compressed;
>  
>  	/*
>  	 * True if dictionary reset is needed. This is false before
> @@ -273,7 +275,7 @@ struct xz_dec_lzma2 {
>  	 * decoder calls. See lzma2_lzma() for details.
>  	 */
>  	struct {
> -		uint32_t size;
> +		size_t size;
>  		uint8_t buf[3 * LZMA_IN_REQUIRED];
>  	} temp;
>  };
> @@ -320,7 +322,7 @@ static inline bool dict_has_space(const struct dictionary *dict)
>   * still empty. This special case is needed for single-call decoding to
>   * avoid writing a '\0' to the end of the destination buffer.
>   */
> -static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
> +static inline uint32_t dict_get(const struct dictionary *dict, size_t dist)
>  {
>  	size_t offset = dict->pos - dist - 1;
>  
> @@ -346,10 +348,10 @@ static inline void dict_put(struct dictionary *dict, uint8_t byte)
>   * invalid, false is returned. On success, true is returned and *len is
>   * updated to indicate how many bytes were left to be repeated.
>   */
> -static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
> +static bool dict_repeat(struct dictionary *dict, size_t *len, size_t dist)
>  {
>  	size_t back;
> -	uint32_t left;
> +	size_t left;
>  
>  	if (dist >= dict->full || dist >= dict->size)
>  		return false;
> @@ -375,7 +377,7 @@ static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
>  
>  /* Copy uncompressed data as is from input to dictionary and output buffers. */
>  static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
> -			      uint32_t *left)
> +			      size_t *left)
>  {
>  	size_t copy_size;
>  
> @@ -433,7 +435,7 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
>   * enough space in b->out. This is guaranteed because caller uses dict_limit()
>   * before decoding data into the dictionary.
>   */
> -static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
> +static size_t dict_flush(struct dictionary *dict, struct xz_buf *b)
>  {
>  	size_t copy_size = dict->pos - dict->start;
>  
> @@ -878,7 +880,7 @@ static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
>  static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
>  {
>  	size_t in_avail;
> -	uint32_t tmp;
> +	size_t tmp;
>  
>  	in_avail = b->in_size - b->in_pos;
>  	if (s->temp.size > 0 || s->lzma2.compressed == 0) {
> @@ -1046,25 +1048,23 @@ enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, struct xz_buf *b)
>  
>  		case SEQ_UNCOMPRESSED_1:
>  			s->lzma2.uncompressed
> -					+= (uint32_t)b->in[b->in_pos++] << 8;
> +					+= (size_t)b->in[b->in_pos++] << 8;

The type of that casts can't matter.
Indeed all these casts can be deleted.
(The value read from the uint8_t variable is promoted to int before
it is used.)

	David

>  			s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
>  			break;
>  
>  		case SEQ_UNCOMPRESSED_2:
>  			s->lzma2.uncompressed
> -					+= (uint32_t)b->in[b->in_pos++] + 1;
> +					+= (size_t)b->in[b->in_pos++] + 1;
>  			s->lzma2.sequence = SEQ_COMPRESSED_0;
>  			break;
>  
>  		case SEQ_COMPRESSED_0:
> -			s->lzma2.compressed
> -					= (uint32_t)b->in[b->in_pos++] << 8;
> +			s->lzma2.compressed = (size_t)b->in[b->in_pos++] << 8;
>  			s->lzma2.sequence = SEQ_COMPRESSED_1;
>  			break;
>  
>  		case SEQ_COMPRESSED_1:
> -			s->lzma2.compressed
> -					+= (uint32_t)b->in[b->in_pos++] + 1;
> +			s->lzma2.compressed += (size_t)b->in[b->in_pos++] + 1;
>  			s->lzma2.sequence = s->lzma2.next_sequence;
>  			break;
>  
> diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
> index d2632b7dfb9c..a612ce4fd450 100644
> --- a/lib/xz/xz_lzma2.h
> +++ b/lib/xz/xz_lzma2.h
> @@ -143,7 +143,7 @@ static inline bool lzma_state_is_literal(enum lzma_state state)
>   * Get the index of the appropriate probability array for decoding
>   * the distance slot.
>   */
> -static inline uint32_t lzma_get_dist_state(uint32_t len)
> +static inline size_t lzma_get_dist_state(size_t len)
>  {
>  	return len < DIST_STATES + MATCH_LEN_MIN
>  			? len - MATCH_LEN_MIN : DIST_STATES - 1;

next prev parent reply	other threads:[~2026-06-15  8:43 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-14 16:05 [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places Lasse Collin
2026-06-14 16:05 ` [PATCH 2/2] lib/xz: Fix comments Lasse Collin
2026-06-15  8:43 ` David Laight [this message]
2026-06-15 19:26   ` [PATCH 1/2] lib/xz: Use size_t instead of uint32_t in a few places Lasse Collin
2026-06-15 11:28 ` Thorsten Blum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260615094339.4e2a1dcb@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=lasse.collin@tukaani.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nathan@kernel.org \
    --cc=thorsten.blum@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.