Re: [PATCH v6 net-next 5/5] octeontx2-af: npc: Support for custom KPU profile from filesystem

public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed

From: Ratheesh Kannoth <rkannoth@marvell.com>
To: Simon Horman <horms@kernel.org>
Cc: <shshitrit@nvidia.com>, <cjubran@nvidia.com>,
	<netdev@vger.kernel.org>, <matttbe@kernel.org>,
	<edumazet@google.com>, <pabeni@redhat.com>, <saeedm@nvidia.com>,
	<sgoutham@marvell.com>, <davem@davemloft.net>,
	<donald.hunter@gmail.com>, <jiri@resnulli.us>,
	<chuck.lever@oracle.com>, <linux-kernel@vger.kernel.org>,
	<kuba@kernel.org>
Subject: Re: [PATCH v6 net-next 5/5] octeontx2-af: npc: Support for custom KPU profile from filesystem
Date: Tue, 24 Mar 2026 09:39:58 +0530	[thread overview]
Message-ID: <acIOloT6IX4RmEeZ@rkannoth-OptiPlex-7090> (raw)
In-Reply-To: <acCbtThJb-PA8tni@rkannoth-OptiPlex-7090>

On 2026-03-23 at 07:17:33, Ratheesh Kannoth (rkannoth@marvell.com) wrote:
> On 2026-03-20 at 22:24:32, Simon Horman (horms@kernel.org) wrote:
> >
> > > @@ -665,9 +675,15 @@ void npc_cn20k_parser_profile_init(struct rvu *rvu, int blkaddr)
> > >  	num_pkinds = rvu->kpu.pkinds;
> > >  	num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds);
> > >
> > > -	for (idx = 0; idx < num_pkinds; idx++)
> > > -		npc_config_kpmaction(rvu, blkaddr, &rvu->kpu.ikpu[idx],
> > > +	/* Cn20k does not support Custom profile from filesystem */
> > > +	for (idx = 0; idx < num_pkinds; idx++) {
> > > +		act = npc_get_ikpu_nth_entry(rvu, idx);
> > > +		if (!act)
> > > +			continue;
> > > +
> > > +		npc_config_kpmaction(rvu, blkaddr, act,
> > >  				     0, idx, true);
> > > +	}
> >
> > When a filesystem-loaded profile is used on cn20k (from_fs=true),
> > npc_get_ikpu_nth_entry() returns &profile->ikpu2[idx]:
> >
> > drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c:npc_get_ikpu_nth_entry() {
> >     struct npc_kpu_profile_adapter *profile = &rvu->kpu;
> >
> >     if (profile->from_fs)
> >         return &profile->ikpu2[idx];
> >
> >     return &profile->ikpu[n];
> > }
> >
> > However, ikpu2 is never allocated for the cn20k path. The allocation
> > happens inside npc_apply_custom_kpu() in the from_fs block:
> >
> > drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c:npc_apply_custom_kpu() {
> >     if (is_cn20k(rvu->pdev))
> >         return npc_cn20k_apply_custom_kpu(rvu, profile);
> >     ...
> >     if (from_fs) {
> >         profile->ikpu2 = devm_kcalloc(rvu->dev, 1,
> >                                       sizeof(ikpu_action_entries),
> >                                       GFP_KERNEL);
> >         ...
> >     }
> > }
> >
> > But this from_fs block is never reached for cn20k because the function
> > returns early via npc_cn20k_apply_custom_kpu().
> >
> > Does this code dereference a NULL-offset pointer causing a kernel crash?
> >
> > The same issue appears to affect cam2 and action2 pointers which are
> > allocated in npc_alloc_kpu_cam2_n_action2() but never called for cn20k.
> >
> > This was raised by Jakub Kicinski during v4 review on 2026-03-16.
> This is false positive. from_fs is never set to "true" for cn20k. In npc_load_kpu_profile(), if soc is
> cn20k, it skips "loading from filesystem" method. So npc_get_ikpu_nth_entry()
> will never return ikpu2.
>
> >
> > > diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
> > > index 8d260bcfbf38..8d8feb7742e8 100644
> > > --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
> > > +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
> >
> > [ ... ]
> >
> > > @@ -1730,32 +1835,89 @@ static int npc_apply_custom_kpu(struct rvu *rvu,
> > >  		return -EINVAL;
> > >  	}
> > >
> > > +	*fw_kpus = fw->kpus;
> > > +
> > > +	sfw = devm_kcalloc(rvu->dev, 1, sizeof(*sfw), GFP_KERNEL);
> > > +	if (!sfw)
> > > +		return -ENOMEM;
> > > +
> > > +	memcpy(sfw, fw, sizeof(*sfw));
> > > +
> > >  	profile->custom = 1;
> > > -	profile->name = fw->name;
> > > +	profile->name = sfw->name;
> > >  	profile->version = le64_to_cpu(fw->version);
> > > -	profile->mcam_kex_prfl.mkex = &fw->mkex;
> > > -	profile->lt_def = &fw->lt_def;
> > > +	profile->mcam_kex_prfl.mkex = &sfw->mkex;
> > > +	profile->lt_def = &sfw->lt_def;
> > > +
> > > +	/* Binary blob contains ikpu actions entries at start of data[0] */
> > > +	if (from_fs) {
> > > +		profile->ikpu2 = devm_kcalloc(rvu->dev, 1,
> > > +					      sizeof(ikpu_action_entries),
> > > +					      GFP_KERNEL);
> > > +		if (!profile->ikpu2)
> > > +			return -ENOMEM;
> > > +
> > > +		action = (struct npc_kpu_profile_action *)(fw->data + offset);
> > > +
> > > +		if (rvu->kpu_fwdata_sz < hdr_sz + sizeof(ikpu_action_entries))
> > > +			return -ENOMEM;
> > > +
> > > +		memcpy((void *)profile->ikpu2, action, sizeof(ikpu_action_entries));
> > > +		offset += sizeof(ikpu_action_entries);
> > > +	}
> > > +
> > >  	for (kpu = 0; kpu < fw->kpus; kpu++) {
> > >  		fw_kpu = (struct npc_kpu_fwdata *)(fw->data + offset);
> > > -		if (fw_kpu->entries > KPU_MAX_CST_ENT)
> > > -			dev_warn(rvu->dev,
> > > -				 "Too many custom entries on KPU%d: %d > %d\n",
> > > -				 kpu, fw_kpu->entries, KPU_MAX_CST_ENT);
> > > -		entries = min(fw_kpu->entries, KPU_MAX_CST_ENT);
> > > -		cam = (struct npc_kpu_profile_cam *)fw_kpu->data;
> > > -		offset += sizeof(*fw_kpu) + fw_kpu->entries * sizeof(*cam);
> > > +		if (!from_fs) {
> > > +			if (fw_kpu->entries > KPU_MAX_CST_ENT)
> > > +				dev_warn(rvu->dev,
> > > +					 "Too many custom entries on KPU%d: %d > %d\n",
> > > +					 kpu, fw_kpu->entries, KPU_MAX_CST_ENT);
> > > +			entries = min(fw_kpu->entries, KPU_MAX_CST_ENT);
> > > +			cam = (struct npc_kpu_profile_cam *)fw_kpu->data;
> > > +			offset += sizeof(*fw_kpu) + fw_kpu->entries * sizeof(*cam);
> > > +			action = (struct npc_kpu_profile_action *)(fw->data + offset);
> > > +			offset += fw_kpu->entries * sizeof(*action);
> > > +			if (rvu->kpu_fwdata_sz < hdr_sz + offset) {
> > > +				dev_warn(rvu->dev,
> > > +					 "Profile size mismatch on KPU%i parsing.\n",
> > > +					 kpu + 1);
> > > +				return -EINVAL;
> > > +			}
> > > +			for (entry = 0; entry < entries; entry++) {
> > > +				profile->kpu[kpu].cam[entry] = cam[entry];
> > > +				profile->kpu[kpu].action[entry] = action[entry];
> > > +			}
> > > +			continue;
> > > +		}
> > > +		entries = fw_kpu->entries;
> > > +		dev_info(rvu->dev,
> > > +			 "Loading %u entries on KPU%d\n", entries, kpu);
> > > +
> > > +		cam2 = (struct npc_kpu_profile_cam2 *)fw_kpu->data;
> > > +		offset += sizeof(*fw_kpu) + fw_kpu->entries * sizeof(*cam2);
> > >  		action = (struct npc_kpu_profile_action *)(fw->data + offset);
> > >  		offset += fw_kpu->entries * sizeof(*action);
> > >  		if (rvu->kpu_fwdata_sz < hdr_sz + offset) {
> > >  			dev_warn(rvu->dev,
> > > -				 "Profile size mismatch on KPU%i parsing.\n",
> > > +				 "profile size mismatch on kpu%i parsing.\n",
> > >  				 kpu + 1);
> > >  			return -EINVAL;
> > >  		}
> > > +
> > > +		profile->kpu[kpu].cam_entries2 = entries;
> > > +		profile->kpu[kpu].action_entries2 = entries;
> > > +		ret = npc_alloc_kpu_cam2_n_action2(rvu, kpu, entries);
> > > +		if (ret) {
> > > +			dev_warn(rvu->dev,
> > > +				 "profile entry allocation failed for kpu=%d for %d entries\n",
> > > +				 kpu, entries);
> > > +			return -EINVAL;
> > > +		}
> > > +
> > >  		for (entry = 0; entry < entries; entry++) {
> > > -			profile->kpu[kpu].cam[entry] = cam[entry];
> > > -			profile->kpu[kpu].action[entry] = action[entry];
> > > +			profile->kpu[kpu].cam2[entry] = cam2[entry];
> > > +			profile->kpu[kpu].action2[entry] = action[entry];
> > >  		}
> > >  	}
> >
> > Could this loop overflow and hang the kernel if firmware specifies more
> > than 65535 entries?
> >
> > The loop variable 'entry' is declared as int at the top of the function,
> > but in the from_fs path, 'entries' is read directly from the firmware
> > blob:
> >
> >     entries = fw_kpu->entries;
> >
> > In the non-from_fs path, entries is capped:
> >
> >     entries = min(fw_kpu->entries, KPU_MAX_CST_ENT);
> >
> > But in the from_fs path there is no cap on entries. If a corrupt or
> > malicious firmware file specifies fw_kpu->entries with a very large
> > value, the loop could run for an extremely long time or cause other
> > issues.
> >
> > The kernel should not rely on filesystem data being well-formed.
> > Validation is needed for the from_fs case.
> >
> > This was raised by Simon Horman during v3 review on 2026-03-10. The
> > author acknowledged this in v3 saying they would fix it in v5, but the
> > issue persists in the current version.
> ACK. Last AI review suggest is - either to use "int" as loop variable or do a capping
> on the value. I did modify it to "int".

Hi Simon,

I noticed that this concern has been raised again even after adding "CAPPING"
https://netdev-ai.bots.linux.dev/ai-review.html?id=c0bd3eea-bbc4-46ae-86da-c295d1100233

"If a corrupt or malicious firmware file in /lib/firmware/kpu specifies fw_kpu->entries with a very
large value that doesn't exceed rvu->hw->npc_kpu_entries, the loop could run for an extremely long time."

I would like to clarify that the following line in the code effectively caps the value of entries,
even in cases where the value read from the filesystem may be corrupted:

entries = min(fw_kpu->entries, rvu->hw->npc_kpu_entries);

Here, rvu->hw->npc_kpu_entries is derived from hardware registers and represents the
maximum limit supported by the hardware.

Please let me know if you see any gaps in this reasoning or if further safeguards are required.

     prev parent reply	other threads:[~2026-03-24  4:10 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-19  5:55 [PATCH v6 net-next 0/5] octeontx2-af: npc: Enhancements Ratheesh Kannoth
2026-03-19  5:55 ` [PATCH v6 net-next 1/5] octeontx2-af: npc: cn20k: debugfs enhancements Ratheesh Kannoth
2026-03-19  5:55 ` [PATCH v6 net-next 2/5] devlink: Implement devlink param multi attribute nested data values Ratheesh Kannoth
2026-03-21  7:05   ` kernel test robot
2026-03-19  5:55 ` [PATCH v6 net-next 3/5] octeontx2-af: npc: cn20k: add subbank search order control Ratheesh Kannoth
2026-03-19  5:55 ` [PATCH v6 net-next 4/5] octeontx2-af: npc: cn20k: dynamically allocate and free default MCAM entries Ratheesh Kannoth
2026-03-19  5:55 ` [PATCH v6 net-next 5/5] octeontx2-af: npc: Support for custom KPU profile from filesystem Ratheesh Kannoth
2026-03-20 16:54   ` Simon Horman
2026-03-23  1:47     ` Ratheesh Kannoth
2026-03-24  4:09       ` Ratheesh Kannoth [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=acIOloT6IX4RmEeZ@rkannoth-OptiPlex-7090 \
    --to=rkannoth@marvell.com \
    --cc=chuck.lever@oracle.com \
    --cc=cjubran@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=jiri@resnulli.us \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matttbe@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=sgoutham@marvell.com \
    --cc=shshitrit@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox