[PATCH net] net: rnpgbe: fix mailbox endianness handling

Netdev List
 help / color / mirror / Atom feed

* [PATCH net] net: rnpgbe: fix mailbox endianness handling
@ 2026-06-17  8:35 Dong Yibo
  2026-06-17  9:40 ` Andrew Lunn
  0 siblings, 1 reply; 6+ messages in thread
From: Dong Yibo @ 2026-06-17  8:35 UTC (permalink / raw)
  To: andrew+netdev, davem, edumazet, kuba, pabeni, vadim.fedorenko
  Cc: netdev, linux-kernel, dong100, yaojun

Mailbox data is exchanged through 32-bit MMIO accesses but the
mailbox payload is defined using little-endian FW structures with
__le16 and __le32 fields.

The mailbox read/write helpers previously operated on raw u32
buffers without performing endian conversion. On big-endian
systems this causes mailbox payload fields to be byte-swapped in
memory, resulting in corrupted FW command and reply structures.

Convert mailbox data between CPU-endian MMIO values and the
little-endian mailbox wire format using cpu_to_le32() on reads and
le32_to_cpu() on writes.

Also switch the helper interfaces to use void */const void * since
the mailbox transport layer operates on opaque payload buffers
rather than native-endian u32 arrays.

Fixes: 4543534c3ef5 ("net: rnpgbe: Add basic mbx ops support")
Signed-off-by: Dong Yibo <dong100@mucse.com>
---
 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c   | 16 ++++++++++------
 drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h   |  5 +++--
 .../net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c    |  7 +++----
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c
index de5e29230b3c..0fccfc49ffc7 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.c
@@ -166,10 +166,12 @@ static void mucse_mbx_inc_pf_ack(struct mucse_hw *hw)
  *
  * Return: 0 on success, negative errno on failure
  **/
-static int mucse_read_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size)
+static int mucse_read_mbx_pf(struct mucse_hw *hw, void *msg, u16 size)
 {
 	const int size_in_words = size / sizeof(u32);
 	struct mucse_mbx_info *mbx = &hw->mbx;
+	int off = MUCSE_MBX_FWPF_SHM;
+	__le32 *msg_le32 = msg;
 	int err;
 
 	err = mucse_obtain_mbx_lock_pf(hw);
@@ -177,7 +179,7 @@ static int mucse_read_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size)
 		return err;
 
 	for (int i = 0; i < size_in_words; i++)
-		msg[i] = mbx_data_rd32(mbx, MUCSE_MBX_FWPF_SHM + 4 * i);
+		msg_le32[i] = cpu_to_le32(mbx_data_rd32(mbx, off + 4 * i));
 	/* Hw needs write data_reg at last */
 	mbx_data_wr32(mbx, MUCSE_MBX_FWPF_SHM, 0);
 	/* flush reqs as we have read this request data */
@@ -236,7 +238,7 @@ static int mucse_poll_for_msg(struct mucse_hw *hw)
  * Return: 0 if it successfully received a message notification and
  * copied it into the receive buffer, negative errno on failure
  **/
-int mucse_poll_and_read_mbx(struct mucse_hw *hw, u32 *msg, u16 size)
+int mucse_poll_and_read_mbx(struct mucse_hw *hw, void *msg, u16 size)
 {
 	int err;
 
@@ -290,10 +292,11 @@ static void mucse_mbx_inc_pf_req(struct mucse_hw *hw)
  * Return: 0 if it successfully copied message into the buffer,
  * negative errno on failure
  **/
-static int mucse_write_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size)
+static int mucse_write_mbx_pf(struct mucse_hw *hw, const void *msg, u16 size)
 {
 	const int size_in_words = size / sizeof(u32);
 	struct mucse_mbx_info *mbx = &hw->mbx;
+	const __le32 *msg_le32 = msg;
 	int err;
 
 	err = mucse_obtain_mbx_lock_pf(hw);
@@ -301,7 +304,8 @@ static int mucse_write_mbx_pf(struct mucse_hw *hw, u32 *msg, u16 size)
 		return err;
 
 	for (int i = 0; i < size_in_words; i++)
-		mbx_data_wr32(mbx, MUCSE_MBX_FWPF_SHM + i * 4, msg[i]);
+		mbx_data_wr32(mbx, MUCSE_MBX_FWPF_SHM + i * 4,
+			      le32_to_cpu(msg_le32[i]));
 
 	/* flush acks as we are overwriting the message buffer */
 	hw->mbx.fw_ack = mucse_mbx_get_fwack(mbx);
@@ -360,7 +364,7 @@ static int mucse_poll_for_ack(struct mucse_hw *hw)
  * Return: 0 if it successfully copied message into the buffer and
  * received an ack to that message within delay * timeout_cnt period
  **/
-int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw, u32 *msg, u16 size)
+int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw, const void *msg, u16 size)
 {
 	int err;
 
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h
index e6fcc8d1d3ca..25bfc97c24c0 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx.h
@@ -14,7 +14,8 @@
 #define MUCSE_MBX_REQ             BIT(0) /* Request a req to mailbox */
 #define MUCSE_MBX_PFU             BIT(3) /* PF owns the mailbox buffer */
 
-int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw, u32 *msg, u16 size);
+int mucse_write_and_wait_ack_mbx(struct mucse_hw *hw,
+				 const void *msg, u16 size);
 void mucse_init_mbx_params_pf(struct mucse_hw *hw);
-int mucse_poll_and_read_mbx(struct mucse_hw *hw, u32 *msg, u16 size);
+int mucse_poll_and_read_mbx(struct mucse_hw *hw, void *msg, u16 size);
 #endif /* _RNPGBE_MBX_H */
diff --git a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
index 8c8bd5e8e1db..2ac97915a098 100644
--- a/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
+++ b/drivers/net/ethernet/mucse/rnpgbe/rnpgbe_mbx_fw.c
@@ -28,12 +28,11 @@ static int mucse_fw_send_cmd_wait_resp(struct mucse_hw *hw,
 	int err;
 
 	mutex_lock(&hw->mbx.lock);
-	err = mucse_write_and_wait_ack_mbx(hw, (u32 *)req, len);
+	err = mucse_write_and_wait_ack_mbx(hw, req, len);
 	if (err)
 		goto out;
 	do {
-		err = mucse_poll_and_read_mbx(hw, (u32 *)reply,
-					      sizeof(*reply));
+		err = mucse_poll_and_read_mbx(hw, reply, sizeof(*reply));
 		if (err)
 			goto out;
 		/* mucse_write_and_wait_ack_mbx return 0 means fw has
@@ -125,7 +124,7 @@ int mucse_mbx_powerup(struct mucse_hw *hw, bool is_powerup)
 
 	len = le16_to_cpu(req.datalen);
 	mutex_lock(&hw->mbx.lock);
-	err = mucse_write_and_wait_ack_mbx(hw, (u32 *)&req, len);
+	err = mucse_write_and_wait_ack_mbx(hw, &req, len);
 	mutex_unlock(&hw->mbx.lock);
 
 	return err;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net] net: rnpgbe: fix mailbox endianness handling
  2026-06-17  8:35 [PATCH net] net: rnpgbe: fix mailbox endianness handling Dong Yibo
@ 2026-06-17  9:40 ` Andrew Lunn
  2026-06-17 11:46   ` Yibo Dong
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Lunn @ 2026-06-17  9:40 UTC (permalink / raw)
  To: Dong Yibo
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, vadim.fedorenko,
	netdev, linux-kernel, yaojun

On Wed, Jun 17, 2026 at 04:35:31PM +0800, Dong Yibo wrote:
> Mailbox data is exchanged through 32-bit MMIO accesses but the
> mailbox payload is defined using little-endian FW structures with
> __le16 and __le32 fields.

Given you are using __le16 and __le32, why did sparse not find these
issues? It would be good to understand this, because if sparse missed
this, what else has sparse missed which is also broken?

	Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] net: rnpgbe: fix mailbox endianness handling
  2026-06-17  9:40 ` Andrew Lunn
@ 2026-06-17 11:46   ` Yibo Dong
  2026-06-17 12:09     ` Andrew Lunn
  0 siblings, 1 reply; 6+ messages in thread
From: Yibo Dong @ 2026-06-17 11:46 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, vadim.fedorenko,
	netdev, linux-kernel, yaojun

On Wed, Jun 17, 2026 at 11:40:42AM +0200, Andrew Lunn wrote:

Hi Andrew:
> On Wed, Jun 17, 2026 at 04:35:31PM +0800, Dong Yibo wrote:
> > Mailbox data is exchanged through 32-bit MMIO accesses but the
> > mailbox payload is defined using little-endian FW structures with
> > __le16 and __le32 fields.
> 
> Given you are using __le16 and __le32, why did sparse not find these
> issues? It would be good to understand this, because if sparse missed
> this, what else has sparse missed which is also broken?
> 
> 	Andrew
> 

My understanding is as follows:
The firmware structures are defined with__le16 / __le32 for wire format,
but the original code cast these struct pointers to u32 * before passing
them to the mailbox read/write routines:
- Send path: (u32 *)&req -> msg buffer -> writel()
- Receive path: readl() -> msg buffer -> (u32 *)&reply
Sparse only sees pure u32 = u32 assignments here, so no type mismatch is
reported. In fact, readl()/writel() operate on 'native CPU-ordered u32
values', not little-endian values.
The __le annotations correctly describe the firmware wire format, but
the original mailbox transport using plain u32 * buffers erased all endian
type information at the MMIO boundary, hiding this mismatch from sparse.

I have also checked the rest of the rnpgbe driver: all __le types are
confined strictly to mailbox firmware structures, and this fix covers all
MMIO <-> structure data transfer paths. Comparisons between two __le fields
(e.g., reply->opcode != req->opcode) are safe, as both values share the
same byte order.

Thanks for your feedback!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] net: rnpgbe: fix mailbox endianness handling
  2026-06-17 11:46   ` Yibo Dong
@ 2026-06-17 12:09     ` Andrew Lunn
  2026-06-17 14:05       ` Yibo Dong
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Lunn @ 2026-06-17 12:09 UTC (permalink / raw)
  To: Yibo Dong
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, vadim.fedorenko,
	netdev, linux-kernel, yaojun

> My understanding is as follows:
> The firmware structures are defined with__le16 / __le32 for wire format,
> but the original code cast these struct pointers to u32 * before passing
> them to the mailbox read/write routines:
> - Send path: (u32 *)&req -> msg buffer -> writel()
> - Receive path: readl() -> msg buffer -> (u32 *)&reply
> Sparse only sees pure u32 = u32 assignments here, so no type mismatch is
> reported.

Can the code be changed so that it does not need the cast? Casts are
bad, as you have just shown. This is something i try to push back on,
it makes you think about types and avoid issues like this.

	Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] net: rnpgbe: fix mailbox endianness handling
  2026-06-17 12:09     ` Andrew Lunn
@ 2026-06-17 14:05       ` Yibo Dong
  2026-06-17 20:45         ` Jakub Kicinski
  0 siblings, 1 reply; 6+ messages in thread
From: Yibo Dong @ 2026-06-17 14:05 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: andrew+netdev, davem, edumazet, kuba, pabeni, vadim.fedorenko,
	netdev, linux-kernel, yaojun

On Wed, Jun 17, 2026 at 02:09:00PM +0200, Andrew Lunn wrote:
> > My understanding is as follows:
> > The firmware structures are defined with__le16 / __le32 for wire format,
> > but the original code cast these struct pointers to u32 * before passing
> > them to the mailbox read/write routines:
> > - Send path: (u32 *)&req -> msg buffer -> writel()
> > - Receive path: readl() -> msg buffer -> (u32 *)&reply
> > Sparse only sees pure u32 = u32 assignments here, so no type mismatch is
> > reported.
> 
> Can the code be changed so that it does not need the cast? Casts are
> bad, as you have just shown. This is something i try to push back on,
> it makes you think about types and avoid issues like this.
> 
> 	Andrew
> 
Thinking... Yes. A few possibilities:

1. Make all fields __le32, then extract via shifts:
   struct mbx_fw_cmd_req {
       __le32 word0;  // [15:0]=flags  [31:16]=opcode
       __le32 word1;  // [15:0]=datalen [31:16]=ret_value
       ...
   };
   But that's painful — le32_to_cpu(req.word0) >> 16 vs req.opcode.

2. Use a union to keep named fields while also exposing __le32[] access:
   union mbx_fw_cmd_req_u {
       struct mbx_fw_cmd_req req;
       __le32 dwords[sizeof(struct mbx_fw_cmd_req) / sizeof(__le32)];
   };
   union mbx_fw_cmd_reply_u {
       struct mbx_fw_cmd_reply reply;
       __le32 dwords[sizeof(struct mbx_fw_cmd_reply) / sizeof(__le32)];
   };

   The transport interface becomes:
   int mucse_write_mbx_pf(struct mucse_hw *hw, const __le32 *msg, u16 size);
   int mucse_read_mbx_pf(struct mucse_hw *hw, __le32 *msg, u16 size);

   Callers would use:
   union mbx_fw_cmd_req_u cmd = {};
   cmd.req.opcode = cpu_to_le16(...);
   cmd.req.flags  = cpu_to_le16(...);
   mucse_write_mbx_pf(hw, cmd.dwords, sizeof(cmd.req));

   If the transport layer forgets le32_to_cpu(), sparse would catch it
   because msg is __le32 * and mbx_data_rd32() returns u32.

   The downside is an extra union wrapper and an extra level in field
   access (cmd.req.opcode vs req.opcode) — a minor inconvenience.

Do you have a preference between these, or another approach?

Thanks for the feedback.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net] net: rnpgbe: fix mailbox endianness handling
  2026-06-17 14:05       ` Yibo Dong
@ 2026-06-17 20:45         ` Jakub Kicinski
  0 siblings, 0 replies; 6+ messages in thread
From: Jakub Kicinski @ 2026-06-17 20:45 UTC (permalink / raw)
  To: Yibo Dong
  Cc: Andrew Lunn, andrew+netdev, davem, edumazet, pabeni,
	vadim.fedorenko, netdev, linux-kernel, yaojun

On Wed, 17 Jun 2026 22:05:30 +0800 Yibo Dong wrote:
> On Wed, Jun 17, 2026 at 02:09:00PM +0200, Andrew Lunn wrote:
> > > My understanding is as follows:
> > > The firmware structures are defined with__le16 / __le32 for wire format,
> > > but the original code cast these struct pointers to u32 * before passing
> > > them to the mailbox read/write routines:
> > > - Send path: (u32 *)&req -> msg buffer -> writel()
> > > - Receive path: readl() -> msg buffer -> (u32 *)&reply
> > > Sparse only sees pure u32 = u32 assignments here, so no type mismatch is
> > > reported.  
> > 
> > Can the code be changed so that it does not need the cast? Casts are
> > bad, as you have just shown. This is something i try to push back on,
> > it makes you think about types and avoid issues like this.
> > 
> > 	Andrew
> >   
> Thinking... Yes. A few possibilities:
> 
> 1. Make all fields __le32, then extract via shifts:
>    struct mbx_fw_cmd_req {
>        __le32 word0;  // [15:0]=flags  [31:16]=opcode
>        __le32 word1;  // [15:0]=datalen [31:16]=ret_value
>        ...
>    };
>    But that's painful — le32_to_cpu(req.word0) >> 16 vs req.opcode.
> 
> 2. Use a union to keep named fields while also exposing __le32[] access:
>    union mbx_fw_cmd_req_u {
>        struct mbx_fw_cmd_req req;
>        __le32 dwords[sizeof(struct mbx_fw_cmd_req) / sizeof(__le32)];
>    };
>    union mbx_fw_cmd_reply_u {
>        struct mbx_fw_cmd_reply reply;
>        __le32 dwords[sizeof(struct mbx_fw_cmd_reply) / sizeof(__le32)];
>    };
> 
>    The transport interface becomes:
>    int mucse_write_mbx_pf(struct mucse_hw *hw, const __le32 *msg, u16 size);
>    int mucse_read_mbx_pf(struct mucse_hw *hw, __le32 *msg, u16 size);
> 
>    Callers would use:
>    union mbx_fw_cmd_req_u cmd = {};
>    cmd.req.opcode = cpu_to_le16(...);
>    cmd.req.flags  = cpu_to_le16(...);
>    mucse_write_mbx_pf(hw, cmd.dwords, sizeof(cmd.req));
> 
>    If the transport layer forgets le32_to_cpu(), sparse would catch it
>    because msg is __le32 * and mbx_data_rd32() returns u32.
> 
>    The downside is an extra union wrapper and an extra level in field
>    access (cmd.req.opcode vs req.opcode) — a minor inconvenience.
> 
> Do you have a preference between these, or another approach?
> 
> Thanks for the feedback.

3. Maybe use memcpy_toio() to transfer the data without any byteswaps?

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-06-17 20:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-17  8:35 [PATCH net] net: rnpgbe: fix mailbox endianness handling Dong Yibo
2026-06-17  9:40 ` Andrew Lunn
2026-06-17 11:46   ` Yibo Dong
2026-06-17 12:09     ` Andrew Lunn
2026-06-17 14:05       ` Yibo Dong
2026-06-17 20:45         ` Jakub Kicinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox