Netdev List
 help / color / mirror / Atom feed
* Re: memory leak in rds_send_probe
From: syzbot @ 2019-07-23 22:17 UTC (permalink / raw)
  To: akpm, catalin.marinas, davem, dvyukov, jack, kirill.shutemov,
	koct9i, linux-kernel, linux-mm, linux-rdma, neilb, netdev,
	rds-devel, ross.zwisler, santosh.shilimkar, syzkaller-bugs,
	torvalds, willy
In-Reply-To: <000000000000ad1dfe058e5b89ab@google.com>

syzbot has bisected this bug to:

commit af49a63e101eb62376cc1d6bd25b97eb8c691d54
Author: Matthew Wilcox <willy@linux.intel.com>
Date:   Sat May 21 00:03:33 2016 +0000

     radix-tree: change naming conventions in radix_tree_shrink

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=176528c8600000
start commit:   c6dd78fc Merge branch 'x86-urgent-for-linus' of git://git...
git tree:       upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=14e528c8600000
console output: https://syzkaller.appspot.com/x/log.txt?x=10e528c8600000
kernel config:  https://syzkaller.appspot.com/x/.config?x=8de7d700ea5ac607
dashboard link: https://syzkaller.appspot.com/bug?extid=5134cdf021c4ed5aaa5f
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=145df0c8600000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=170001f4600000

Reported-by: syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com
Fixes: af49a63e101e ("radix-tree: change naming conventions in  
radix_tree_shrink")

For information about bisection process see: https://goo.gl/tpsmEJ#bisection

^ permalink raw reply

* Re: [PATCH bpf] libbpf: fix using uninitialized ioctl results
From: Alexei Starovoitov @ 2019-07-23 22:18 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: Network Development, bpf, David S. Miller, Magnus Karlsson,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
In-Reply-To: <20190723120810.28801-1-i.maximets@samsung.com>

On Tue, Jul 23, 2019 at 5:08 AM Ilya Maximets <i.maximets@samsung.com> wrote:
>
> 'channels.max_combined' initialized only on ioctl success and
> errno is only valid on ioctl failure.
>
> The code doesn't produce any runtime issues, but makes memory
> sanitizers angry:
>
>  Conditional jump or move depends on uninitialised value(s)
>     at 0x55C056F: xsk_get_max_queues (xsk.c:336)
>     by 0x55C05B2: xsk_create_bpf_maps (xsk.c:354)
>     by 0x55C089F: xsk_setup_xdp_prog (xsk.c:447)
>     by 0x55C0E57: xsk_socket__create (xsk.c:601)
>   Uninitialised value was created by a stack allocation
>     at 0x55C04CD: xsk_get_max_queues (xsk.c:318)
>
> Additionally fixed warning on uninitialized bytes in ioctl arguments:
>
>  Syscall param ioctl(SIOCETHTOOL) points to uninitialised byte(s)
>     at 0x648D45B: ioctl (in /usr/lib64/libc-2.28.so)
>     by 0x55C0546: xsk_get_max_queues (xsk.c:330)
>     by 0x55C05B2: xsk_create_bpf_maps (xsk.c:354)
>     by 0x55C089F: xsk_setup_xdp_prog (xsk.c:447)
>     by 0x55C0E57: xsk_socket__create (xsk.c:601)
>   Address 0x1ffefff378 is on thread 1's stack
>   in frame #1, created by xsk_get_max_queues (xsk.c:318)
>   Uninitialised value was created by a stack allocation
>     at 0x55C04CD: xsk_get_max_queues (xsk.c:318)
>
> CC: Magnus Karlsson <magnus.karlsson@intel.com>
> Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets")
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> ---
>  tools/lib/bpf/xsk.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
> index 5007b5d4fd2c..c4f912dc30f9 100644
> --- a/tools/lib/bpf/xsk.c
> +++ b/tools/lib/bpf/xsk.c
> @@ -317,7 +317,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
>
>  static int xsk_get_max_queues(struct xsk_socket *xsk)
>  {
> -       struct ethtool_channels channels;
> +       struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
>         struct ifreq ifr;
>         int fd, err, ret;
>
> @@ -325,7 +325,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
>         if (fd < 0)
>                 return -errno;
>
> -       channels.cmd = ETHTOOL_GCHANNELS;
> +       memset(&ifr, 0, sizeof(ifr));

I refactored this bit into
struct ifreq ifr = {};
based on Andrii suggestion and pushed to bpf tree.
Thanks

>         ifr.ifr_data = (void *)&channels;
>         strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
>         ifr.ifr_name[IFNAMSIZ - 1] = '\0';
> @@ -335,7 +335,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
>                 goto out;
>         }
>
> -       if (channels.max_combined == 0 || errno == EOPNOTSUPP)
> +       if (err || channels.max_combined == 0)
>                 /* If the device says it has no channels, then all traffic
>                  * is sent to a single stream, so max queues = 1.
>                  */
> --
> 2.17.1
>

^ permalink raw reply

* [PATCH] mt76_init_sband_2g: null check the allocation
From: Navid Emamdoost @ 2019-07-23 22:19 UTC (permalink / raw)
  Cc: emamd001, kjlu, smccaman, secalert, Navid Emamdoost,
	Jakub Kicinski, Kalle Valo, David S. Miller, Matthias Brugger,
	linux-wireless, netdev, linux-arm-kernel, linux-mediatek,
	linux-kernel

devm_kzalloc may fail and return NULL. So the null check is needed.

Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
---
 drivers/net/wireless/mediatek/mt7601u/init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c
index 9bfac9f1d47f..cada48800928 100644
--- a/drivers/net/wireless/mediatek/mt7601u/init.c
+++ b/drivers/net/wireless/mediatek/mt7601u/init.c
@@ -557,6 +557,9 @@ mt76_init_sband_2g(struct mt7601u_dev *dev)
 {
 	dev->sband_2g = devm_kzalloc(dev->dev, sizeof(*dev->sband_2g),
 				     GFP_KERNEL);
+	if (!dev->sband_2g)
+		return -ENOMEM;
+
 	dev->hw->wiphy->bands[NL80211_BAND_2GHZ] = dev->sband_2g;
 
 	WARN_ON(dev->ee->reg.start - 1 + dev->ee->reg.num >
-- 
2.17.1


^ permalink raw reply related

* Re: memory leak in rds_send_probe
From: Andrew Morton @ 2019-07-23 22:23 UTC (permalink / raw)
  To: syzbot
  Cc: catalin.marinas, davem, dvyukov, jack, kirill.shutemov, koct9i,
	linux-kernel, linux-mm, linux-rdma, neilb, netdev, rds-devel,
	ross.zwisler, santosh.shilimkar, syzkaller-bugs, torvalds, willy
In-Reply-To: <00000000000034c84a058e608d45@google.com>

On Tue, 23 Jul 2019 15:17:00 -0700 syzbot <syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com> wrote:

> syzbot has bisected this bug to:
> 
> commit af49a63e101eb62376cc1d6bd25b97eb8c691d54
> Author: Matthew Wilcox <willy@linux.intel.com>
> Date:   Sat May 21 00:03:33 2016 +0000
> 
>      radix-tree: change naming conventions in radix_tree_shrink
> 
> bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=176528c8600000
> start commit:   c6dd78fc Merge branch 'x86-urgent-for-linus' of git://git...
> git tree:       upstream
> final crash:    https://syzkaller.appspot.com/x/report.txt?x=14e528c8600000
> console output: https://syzkaller.appspot.com/x/log.txt?x=10e528c8600000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=8de7d700ea5ac607
> dashboard link: https://syzkaller.appspot.com/bug?extid=5134cdf021c4ed5aaa5f
> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=145df0c8600000
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=170001f4600000
> 
> Reported-by: syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com
> Fixes: af49a63e101e ("radix-tree: change naming conventions in  
> radix_tree_shrink")
> 
> For information about bisection process see: https://goo.gl/tpsmEJ#bisection

That's rather hard to believe.  af49a63e101eb6237 simply renames a
couple of local variables.


^ permalink raw reply

* Re: [PATCH net-next 1/3] enetc: Add mdio bus driver for the PCIe MDIO endpoint
From: Andrew Lunn @ 2019-07-23 22:24 UTC (permalink / raw)
  To: Claudiu Manoil
  Cc: David S . Miller, devicetree, netdev, alexandru.marginean,
	linux-kernel, Li Yang, Rob Herring, linux-arm-kernel
In-Reply-To: <1563894955-545-2-git-send-email-claudiu.manoil@nxp.com>

> +	bus = mdiobus_alloc_size(sizeof(u32 *));
> +	if (!bus)
> +		return -ENOMEM;
> +

> +	bus->priv = pci_iomap_range(pdev, 0, ENETC_MDIO_REG_OFFSET, 0);

This got me confused for a while. You allocate space for a u32
pointer. bus->priv will point to this space. However, you are not
using this space, you {ab}use the pointer to directly hold the return
from pci_iomap_range(). This works, but sparse is probably unhappy,
and you are wasting the space the u32 pointer takes.

    Andrew

^ permalink raw reply

* [PATCH] stmmac_dt_phy: null check the allocation
From: Navid Emamdoost @ 2019-07-23 22:28 UTC (permalink / raw)
  Cc: emamd001, kjlu, smccaman, secalert, Navid Emamdoost,
	Giuseppe Cavallaro, Alexandre Torgue, Jose Abreu, David S. Miller,
	Maxime Coquelin, netdev, linux-stm32, linux-arm-kernel,
	linux-kernel

devm_kzalloc may fail and return NULL. So the null check is needed.

Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 73fc2524372e..392f8d9539c1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -342,10 +342,13 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		mdio = true;
 	}
 
-	if (mdio)
+	if (mdio) {
 		plat->mdio_bus_data =
 			devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data),
 				     GFP_KERNEL);
+		if (!plat->mdio_bus_data)
+			return -ENOMEM;
+	}
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH v3 4/7] net: Reorder the contents of skb_frag_t
From: Saeed Mahameed @ 2019-07-23 22:29 UTC (permalink / raw)
  To: willy@infradead.org, davem@davemloft.net
  Cc: hch@lst.de, netdev@vger.kernel.org
In-Reply-To: <20190712134345.19767-5-willy@infradead.org>

On Fri, 2019-07-12 at 06:43 -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Match the layout of bio_vec.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/skbuff.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 7910935410e6..b9dc8b4f24b1 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -314,8 +314,8 @@ struct skb_frag_struct {
>  	struct {
>  		struct page *p;
>  	} page;
> -	__u32 page_offset;
>  	__u32 size;
> +	__u32 page_offset;
>  };
>  

Why do you need this patch? this struct is going to be removed
downstream eventually ..

>  /**

^ permalink raw reply

* Re: [PATCH v3 6/7] net: Rename skb_frag_t size to bv_len
From: Saeed Mahameed @ 2019-07-23 22:33 UTC (permalink / raw)
  To: willy@infradead.org, davem@davemloft.net
  Cc: hch@lst.de, netdev@vger.kernel.org
In-Reply-To: <20190712134345.19767-7-willy@infradead.org>

On Fri, 2019-07-12 at 06:43 -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Improved compatibility with bvec
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/skbuff.h | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 8076e2ba8349..e849e411d1f3 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -312,7 +312,7 @@ typedef struct skb_frag_struct skb_frag_t;
>  
>  struct skb_frag_struct {
>  	struct page *bv_page;
> -	__u32 size;
> +	unsigned int bv_len;
>  	__u32 page_offset;

Why do you keep page_offset name and type as is ? it will make the last
patch much cleaner if you change it to "unsigned int bv_offset".
unsigned int and __u32 on both 32bit and 64bit are the same aren't they
?

>  };
>  
> @@ -322,7 +322,7 @@ struct skb_frag_struct {
>   */
>  static inline unsigned int skb_frag_size(const skb_frag_t *frag)
>  {
> -	return frag->size;
> +	return frag->bv_len;
>  }
>  
>  /**
> @@ -332,7 +332,7 @@ static inline unsigned int skb_frag_size(const
> skb_frag_t *frag)
>   */
>  static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int
> size)
>  {
> -	frag->size = size;
> +	frag->bv_len = size;
>  }
>  
>  /**
> @@ -342,7 +342,7 @@ static inline void skb_frag_size_set(skb_frag_t
> *frag, unsigned int size)
>   */
>  static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
>  {
> -	frag->size += delta;
> +	frag->bv_len += delta;
>  }
>  
>  /**
> @@ -352,7 +352,7 @@ static inline void skb_frag_size_add(skb_frag_t
> *frag, int delta)
>   */
>  static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
>  {
> -	frag->size -= delta;
> +	frag->bv_len -= delta;
>  }
>  
>  /**

^ permalink raw reply

* Re: [PATCH v4 net-next 02/19] ionic: Add hardware init and device commands
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.141833.384334163321137202.davem@davemloft.net>

On 7/23/19 2:18 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:06 -0700
>
>> +void ionic_init_devinfo(struct ionic_dev *idev)
>> +{
>> +	idev->dev_info.asic_type = ioread8(&idev->dev_info_regs->asic_type);
>> +	idev->dev_info.asic_rev = ioread8(&idev->dev_info_regs->asic_rev);
>> +
>> +	memcpy_fromio(idev->dev_info.fw_version,
>> +		      idev->dev_info_regs->fw_version,
>> +		      IONIC_DEVINFO_FWVERS_BUFLEN);
>> +
>> +	memcpy_fromio(idev->dev_info.serial_num,
>> +		      idev->dev_info_regs->serial_num,
>> +		      IONIC_DEVINFO_SERIAL_BUFLEN);
>   ...
>> +	sig = ioread32(&idev->dev_info_regs->signature);
> I think if you are going to use the io{read,write}{8,16,32,64}()
> interfaces then you should use io{read,write}{8,16,32,64}_rep()
> instead of memcpy_{to,from}io().
>
Sure.
sln


^ permalink raw reply

* Re: [PATCH v4 net-next 05/19] ionic: Add interrupts and doorbells
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.142448.414859031558093111.davem@davemloft.net>

On 7/23/19 2:24 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:09 -0700
>
>> The ionic interrupt model is based on interrupt control blocks
>> accessed through the PCI BAR.  Doorbell registers are used by
>> the driver to signal to the NIC that requests are waiting on
>> the message queues.  Interrupts are used by the NIC to signal
>> to the driver that answers are waiting on the completion queues.
>>
>> Signed-off-by: Shannon Nelson <snelson@pensando.io>
> After applying this patch we get a warning:
>
> drivers/net/ethernet/pensando/ionic/ionic_lif.c:33:13: warning: ‘ionic_intr_free’ defined but not used [-Wunused-function]
>   static void ionic_intr_free(struct lif *lif, int index)
>               ^~~~~~~~~~~~~~~
> drivers/net/ethernet/pensando/ionic/ionic_lif.c:15:12: warning: ‘ionic_intr_alloc’ defined but not used [-Wunused-function]
>   static int ionic_intr_alloc(struct lif *lif, struct intr *intr)
>              ^~~~~~~~~~~~~~~~

Yes, and they get used in the next patch.  This is so I can keep similar 
bits of functionality together in one patch and to keep the next patch a 
little smaller.

> Also:
>
>> +	lif->dbid_inuse = kzalloc(BITS_TO_LONGS(lif->dbid_count) * sizeof(long),
>> +				  GFP_KERNEL);
> You can use bitmap_alloc() and friends from linux/bitmap.h for this kind of stuff.

Sure.
sln


^ permalink raw reply

* Re: [PATCH v4 net-next 06/19] ionic: Add basic adminq support
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.142738.638894843366352833.davem@davemloft.net>

On 7/23/19 2:27 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:10 -0700
>
>> +struct queue {
>   ...
>> +struct cq {
>   ...
>> +struct napi_stats {
>   ...
>> +struct q_stats {
>   ...
>> +struct qcq {
> Using names like these and "dev_queue" are just asking for conflicts with the
> global datastructure namespace both now and in the future.
>
> Please put ionic_ or similar as a prefix to these data structure names.
>
> Thank you.

Sure
sln



^ permalink raw reply

* Re: [PATCH v4 net-next 11/19] ionic: Add Rx filter and rx_mode ndo support
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.143326.197667027838462669.davem@davemloft.net>

On 7/23/19 2:33 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:15 -0700
>
>> +	if (in_interrupt()) {
>> +		work = kzalloc(sizeof(*work), GFP_ATOMIC);
>> +		if (!work) {
>> +			netdev_err(lif->netdev, "%s OOM\n", __func__);
>> +			return -ENOMEM;
>> +		}
>> +		work->type = add ? DW_TYPE_RX_ADDR_ADD : DW_TYPE_RX_ADDR_DEL;
>> +		memcpy(work->addr, addr, ETH_ALEN);
>> +		netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
>> +			   add ? "add" : "del", addr);
>> +		ionic_lif_deferred_enqueue(&lif->deferred, work);
>> +	} else {
>> +		netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
>> +			   add ? "add" : "del", addr);
>> +		if (add)
>> +			return ionic_lif_addr_add(lif, addr);
>> +		else
>> +			return ionic_lif_addr_del(lif, addr);
>> +	}
> I don't know about this.
>
> Generally interface address changes are expected to be synchronous.
Yeah, this bothers me a bit as well, but the address change calls come 
in under spin_lock_bh(), and I'm reluctant to make an AdminQ call under 
the _bh that could block for a few seconds.

sln


^ permalink raw reply

* Re: [PATCH v4 net-next 13/19] ionic: Add initial ethtool support
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.143507.1690183028498872104.davem@davemloft.net>

On 7/23/19 2:35 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:17 -0700
>
>> +static int ionic_get_link_ksettings(struct net_device *netdev,
>> +				    struct ethtool_link_ksettings *ks)
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	int copper_seen = 0;
> Reverse christmas tree ordering here please.

Sure.  Most of these are because I wanted to do the one assignment 
first, from which the others are based.  I'll rework these to add the 
init lines after the declarations.  Same in the other files.

sln

>> +static int ionic_set_link_ksettings(struct net_device *netdev,
>> +				    const struct ethtool_link_ksettings *ks)
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic *ionic = lif->ionic;
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	u8 fec_type = PORT_FEC_TYPE_NONE;
>> +	u32 req_rs, req_fc;
>> +	int err = 0;
> Likewise.
>> +static void ionic_get_pauseparam(struct net_device *netdev,
>> +				 struct ethtool_pauseparam *pause)
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	uint8_t pause_type = idev->port_info->config.pause_type;
> Likewise.
>
>> +static int ionic_set_pauseparam(struct net_device *netdev,
>> +				struct ethtool_pauseparam *pause)
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic *ionic = lif->ionic;
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	u32 requested_pause;
>> +	int err;
> Likewise.
>> +static int ionic_get_module_info(struct net_device *netdev,
>> +				 struct ethtool_modinfo *modinfo)
>> +
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	struct xcvr_status *xcvr;
> Likewise.
>
>> +static int ionic_get_module_eeprom(struct net_device *netdev,
>> +				   struct ethtool_eeprom *ee,
>> +				   u8 *data)
>> +{
>> +	struct lif *lif = netdev_priv(netdev);
>> +	struct ionic_dev *idev = &lif->ionic->idev;
>> +	struct xcvr_status *xcvr;
>> +	u32 len;
> Likewise.


^ permalink raw reply

* Re: [PATCH v4 net-next 19/19] ionic: Add basic devlink interface
From: Shannon Nelson @ 2019-07-23 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20190723.144055.138556918172139772.davem@davemloft.net>

On 7/23/19 2:40 PM, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:23 -0700
>
>> +struct ionic *ionic_devlink_alloc(struct device *dev)
>> +{
>> +	struct devlink *dl;
>> +	struct ionic *ionic;
> Reverse christmas tree please.

Yep, I missed this one.

Thanks for your review time.
sln



^ permalink raw reply

* Re: [PATCH v4 net-next 11/19] ionic: Add Rx filter and rx_mode ndo support
From: Saeed Mahameed @ 2019-07-23 22:51 UTC (permalink / raw)
  To: snelson@pensando.io, davem@davemloft.net; +Cc: netdev@vger.kernel.org
In-Reply-To: <20190723.143326.197667027838462669.davem@davemloft.net>

On Tue, 2019-07-23 at 14:33 -0700, David Miller wrote:
> From: Shannon Nelson <snelson@pensando.io>
> Date: Mon, 22 Jul 2019 14:40:15 -0700
> 
> > +	if (in_interrupt()) {
> > +		work = kzalloc(sizeof(*work), GFP_ATOMIC);
> > +		if (!work) {
> > +			netdev_err(lif->netdev, "%s OOM\n", __func__);
> > +			return -ENOMEM;
> > +		}
> > +		work->type = add ? DW_TYPE_RX_ADDR_ADD :
> > DW_TYPE_RX_ADDR_DEL;
> > +		memcpy(work->addr, addr, ETH_ALEN);
> > +		netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
> > +			   add ? "add" : "del", addr);
> > +		ionic_lif_deferred_enqueue(&lif->deferred, work);
> > +	} else {
> > +		netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
> > +			   add ? "add" : "del", addr);
> > +		if (add)
> > +			return ionic_lif_addr_add(lif, addr);
> > +		else
> > +			return ionic_lif_addr_del(lif, addr);
> > +	}
> 
> I don't know about this.
> 
> Generally interface address changes are expected to be synchronous.

Well, drivers can't sleep on set_rx_mode ndo, dev_set_rx_mode is
holding netif_addr_lock_bh.

Some drivers need to wait for firmware/device completion to program the
new address and they have to do this asynchronously to avoid atomic
context. I assume this driver is having the same issue due to
"ionic_adminq_post_wait()" .. 


^ permalink raw reply

* Re: [PATCH TEST] net: bridge: mcast: fix possible uses of stale pointers
From: kbuild test robot @ 2019-07-23 22:54 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: kbuild-all, Martin Weinelt, bridge, Roopa Prabhu, netdev
In-Reply-To: <908e9e90-70cc-7bbe-f83f-0810c9ef3925@cumulusnetworks.com>

Hi Nikolay,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net/master]
[cannot apply to v5.3-rc1 next-20190723]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Nikolay-Aleksandrov/net-bridge-mcast-fix-possible-uses-of-stale-pointers/20190702-083354

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>


coccinelle warnings: (new ones prefixed by >>)

>> net/bridge/br_multicast.c:999:8-14: ERROR: application of sizeof to pointer

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* [PATCH] fix noderef.cocci warnings
From: kbuild test robot @ 2019-07-23 22:54 UTC (permalink / raw)
  To: Nikolay Aleksandrov
  Cc: kbuild-all, Martin Weinelt, bridge, Roopa Prabhu, netdev
In-Reply-To: <908e9e90-70cc-7bbe-f83f-0810c9ef3925@cumulusnetworks.com>

From: kbuild test robot <lkp@intel.com>

net/bridge/br_multicast.c:999:8-14: ERROR: application of sizeof to pointer

 sizeof when applied to a pointer typed expression gives the size of
 the pointer

Generated by: scripts/coccinelle/misc/noderef.cocci

Fixes: 17c91348ed8b ("Use-after-free in br_multicast_rcv")
CC: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: kbuild test robot <lkp@intel.com>
---

url:    https://github.com/0day-ci/linux/commits/Nikolay-Aleksandrov/net-bridge-mcast-fix-possible-uses-of-stale-pointers/20190702-083354

 br_multicast.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -996,7 +996,7 @@ static int br_ip6_multicast_mld2_report(
 			return -EINVAL;
 
 		_nsrcs = skb_header_pointer(skb, nsrcs_offset,
-					   sizeof(_nsrcs), &__nsrcs);
+					   sizeof(*_nsrcs), &__nsrcs);
 		if (!_nsrcs)
 			return -EINVAL;
 

^ permalink raw reply

* Re: [PATCH v2] drivers: net: xgene: Remove acpi_has_method() calls
From: Bjorn Helgaas @ 2019-07-23 22:56 UTC (permalink / raw)
  To: Kelsey Skunberg
  Cc: iyappan, keyur, quan, David Miller, netdev, linux-kernel,
	Bjorn Helgaas, Shuah Khan, linux-kernel-mentees
In-Reply-To: <20190723185811.8548-1-skunberg.kelsey@gmail.com>

On Tue, Jul 23, 2019 at 1:59 PM Kelsey Skunberg
<skunberg.kelsey@gmail.com> wrote:
>
> acpi_evaluate_object will already return an error if the needed method
> does not exist. Remove unnecessary acpi_has_method() calls and check the
> returned acpi_status for failure instead.

> diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> index 6453fc2ebb1f..5d637b46b2bf 100644
> --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> @@ -437,6 +437,7 @@ static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p)
>  static int xgene_enet_reset(struct xgene_enet_pdata *p)
>  {
>         struct device *dev = &p->pdev->dev;
> +       acpi_status status;
>
>         if (!xgene_ring_mgr_init(p))
>                 return -ENODEV;
> @@ -460,14 +461,13 @@ static int xgene_enet_reset(struct xgene_enet_pdata *p)
>                 }
>         } else {
>  #ifdef CONFIG_ACPI
> -               if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_RST"))
> -                       acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
> -                                            "_RST", NULL, NULL);
> -               else if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_INI"))
> +               status = acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
> +                                             "_RST", NULL, NULL);
> +               if (ACPI_FAILURE(status)) {
>                         acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
>                                              "_INI", NULL, NULL);
> +               }
>  #endif
> -       }

Oops, I don't think you intended to remove that brace.

If you haven't found it already, CONFIG_COMPILE_TEST is useful for
building things that wouldn't normally be buildable on your arch.

>         if (!p->port_id) {
>                 xgene_enet_ecc_init(p);

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Song Liu @ 2019-07-23 22:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Kees Cook, linux-security@vger.kernel.org, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Kernel Team, Lorenz Bauer,
	Jann Horn, Greg KH, Linux API
In-Reply-To: <CALCETrX2bMnwC6_t4b_G-hzJSfMPrkK4YKs5ebcecv2LJ0rt3w@mail.gmail.com>



> On Jul 23, 2019, at 8:11 AM, Andy Lutomirski <luto@kernel.org> wrote:
> 
> On Mon, Jul 22, 2019 at 1:54 PM Song Liu <songliubraving@fb.com> wrote:
>> 
>> Hi Andy, Lorenz, and all,
>> 
>>> On Jul 2, 2019, at 2:32 PM, Andy Lutomirski <luto@kernel.org> wrote:
>>> 
>>> On Tue, Jul 2, 2019 at 2:04 PM Kees Cook <keescook@chromium.org> wrote:
>>>> 
>>>> On Mon, Jul 01, 2019 at 06:59:13PM -0700, Andy Lutomirski wrote:
>>>>> I think I'm understanding your motivation.  You're not trying to make
>>>>> bpf() generically usable without privilege -- you're trying to create
>>>>> a way to allow certain users to access dangerous bpf functionality
>>>>> within some limits.
>>>>> 
>>>>> That's a perfectly fine goal, but I think you're reinventing the
>>>>> wheel, and the wheel you're reinventing is quite complicated and
>>>>> already exists.  I think you should teach bpftool to be secure when
>>>>> installed setuid root or with fscaps enabled and put your policy in
>>>>> bpftool.  If you want to harden this a little bit, it would seem
>>>>> entirely reasonable to add a new CAP_BPF_ADMIN and change some, but
>>>>> not all, of the capable() checks to check CAP_BPF_ADMIN instead of the
>>>>> capabilities that they currently check.
>>>> 
>>>> If finer grained controls are wanted, it does seem like the /dev/bpf
>>>> path makes the most sense. open, request abilities, use fd. The open can
>>>> be mediated by DAC and LSM. The request can be mediated by LSM. This
>>>> provides a way to add policy at the LSM level and at the tool level.
>>>> (i.e. For tool-level controls: leave LSM wide open, make /dev/bpf owned
>>>> by "bpfadmin" and bpftool becomes setuid "bpfadmin". For fine-grained
>>>> controls, leave /dev/bpf wide open and add policy to SELinux, etc.)
>>>> 
>>>> With only a new CAP, you don't get the fine-grained controls. (The
>>>> "request abilities" part is the key there.)
>>> 
>>> Sure you do: the effective set.  It has somewhat bizarre defaults, but
>>> I don't think that's a real problem.  Also, this wouldn't be like
>>> CAP_DAC_READ_SEARCH -- you can't accidentally use your BPF caps.
>>> 
>>> I think that a /dev capability-like object isn't totally nuts, but I
>>> think we should do it well, and this patch doesn't really achieve
>>> that.  But I don't think bpf wants fine-grained controls like this at
>>> all -- as I pointed upthread, a fine-grained solution really wants
>>> different treatment for the different capable() checks, and a bunch of
>>> them won't resemble capabilities or /dev/bpf at all.
>> 
>> With 5.3-rc1 out, I am back on this. :)
>> 
>> How about we modify the set as:
>>  1. Introduce sys_bpf_with_cap() that takes fd of /dev/bpf.
> 
> I'm fine with this in principle, but:
> 
>>  2. Better handling of capable() calls through bpf code. I guess the
>>     biggest problem here is is_priv in verifier.c:bpf_check().
> 
> I think it would be good to understand exactly what /dev/bpf will
> enable one to do.  Without some care, it would just become the next
> CAP_SYS_ADMIN: if you can open it, sure, you're not root, but you can
> intercept network traffic, modify cgroup behavior, and do plenty of
> other things, any of which can probably be used to completely take
> over the system.

Well, yes. sys_bpf() is pretty powerful. 

The goal of /dev/bpf is to enable special users to call sys_bpf(). In 
the meanwhile, such users should not take down the whole system easily
by accident, e.g., with rm -rf /.

It is similar to CAP_BPF_ADMIN, without really adding the CAP_.  

I think adding new CAP_ requires much more effort. 

> 
> It would also be nice to understand why you can't do what you need to
> do entirely in user code using setuid or fscaps.

It is not very easy to achieve the same control: only certain users can
run certain tools (bpftool, etc.). 

The closest approach I can find is:
  1. use libcap (pam_cap) to give CAP_SETUID to certain users;
  2. add setuid(0) to bpftool.

The difference between this approach and /dev/bpf is that certain users
would be able to run other tools that call setuid(). Though I am not 
sure how many tools call setuid(), and how risky they are. 

> 
> Finally, at risk of rehashing some old arguments, I'll point out that
> the bpf() syscall is an unusual design to begin with.  As an example,
> consider bpf_prog_attach().  Outside of bpf(), if I want to change the
> behavior of a cgroup, I would write to a file in
> /sys/kernel/cgroup/unified/whatever/, and normal DAC and MAC rules
> apply.  With bpf(), however, I just call bpf() to attach a program to
> the cgroup.  bpf() says "oh, you are capable(CAP_NET_ADMIN) -- go for
> it!".  Unless I missed something major, and I just re-read the code,
> there is no check that the caller has write or LSM permission to
> anything at all in cgroupfs, and the existing API would make it very
> awkward to impose any kind of DAC rules here.
> 
> So I think it might actually be time to repay some techincal debt and
> come up with a real fix.  As a less intrusive approach, you could see
> about requiring ownership of the cgroup directory instead of
> CAP_NET_ADMIN.  As a more intrusive but perhaps better approach, you
> could invert the logic to to make it work like everything outside of
> cgroup: add pseudo-files like bpf.inet_ingress to the cgroup
> directories, and require a writable fd to *that* to a new improved
> attach API.  If a user could do:
> 
> int fd = open("/sys/fs/cgroup/.../bpf.inet_attach", O_RDWR);  /* usual
> DAC and MAC policy applies */
> int bpf_fd = setup the bpf stuff;  /* no privilege required, unless
> the program is huge or needs is_priv */
> bpf(BPF_IMPROVED_ATTACH, target = fd, program = bpf_fd);
> 
> there would be no capabilities or global privilege at all required for
> this.  It would just work with cgroup delegation, containers, etc.
> 
> I think you could even pull off this type of API change with only
> libbpf changes.  In particular, there's this code:
> 
> int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
>                    unsigned int flags)
> {
>        union bpf_attr attr;
> 
>        memset(&attr, 0, sizeof(attr));
>        attr.target_fd     = target_fd;
>        attr.attach_bpf_fd = prog_fd;
>        attr.attach_type   = type;
>        attr.attach_flags  = flags;
> 
>        return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
> }
> 
> This would instead do something like:
> 
> int specific_target_fd = openat(target_fd, bpf_type_to_target[type], O_RDWR);
> attr.target_fd = specific_target_fd;
> ...
> 
> return sys_bpf(BPF_PROG_IMPROVED_ATTACH, &attr, sizeof(attr));
> 
> Would this solve your problem without needing /dev/bpf at all?

This gives fine grain access control. I think it solves the problem. 
But it also requires a lot of rework to sys_bpf(). And it may also 
break backward/forward compatibility?

Personally, I think it is an overkill for the original motivation: 
call sys_bpf() with special user instead of root. 

Alexei, Daniel: what do you think about this? 

Thanks,
Song

^ permalink raw reply

* Re: [PATCH v2 bpf-next] libbpf: provide more helpful message on uninitialized global var
From: Alexei Starovoitov @ 2019-07-23 23:00 UTC (permalink / raw)
  To: Song Liu, Andrii Nakryiko
  Cc: bpf, netdev@vger.kernel.org, daniel@iogearbox.net,
	andrii.nakryiko@gmail.com, Kernel Team
In-Reply-To: <08DD65ED-34B4-47C0-B5ED-9A354CF5BA35@fb.com>

On 7/23/19 3:03 PM, Song Liu wrote:
>> On Jul 23, 2019, at 2:11 PM, Andrii Nakryiko<andriin@fb.com>  wrote:
>>
>> When BPF program defines uninitialized global variable, it's put into
>> a special COMMON section. Libbpf will reject such programs, but will
>> provide very unhelpful message with garbage-looking section index.
>>
>> This patch detects special section cases and gives more explicit error
>> message.
>>
>> Signed-off-by: Andrii Nakryiko<andriin@fb.com>
> Acked-by: Song Liu<songliubraving@fb.com>
> 

Applied. Thanks

^ permalink raw reply

* Re: [PATCH v4 net-next 02/19] ionic: Add hardware init and device commands
From: David Miller @ 2019-07-23 23:05 UTC (permalink / raw)
  To: snelson; +Cc: netdev
In-Reply-To: <59e45fd2-3c62-58cf-cf63-935d17703d2c@pensando.io>

From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 23 Jul 2019 15:50:22 -0700

> On 7/23/19 2:18 PM, David Miller wrote:
>> From: Shannon Nelson <snelson@pensando.io>
>> Date: Mon, 22 Jul 2019 14:40:06 -0700
>>
>>> +void ionic_init_devinfo(struct ionic_dev *idev)
>>> +{
>>> + idev->dev_info.asic_type = ioread8(&idev->dev_info_regs->asic_type);
>>> + idev->dev_info.asic_rev = ioread8(&idev->dev_info_regs->asic_rev);
>>> +
>>> +	memcpy_fromio(idev->dev_info.fw_version,
>>> +		      idev->dev_info_regs->fw_version,
>>> +		      IONIC_DEVINFO_FWVERS_BUFLEN);
>>> +
>>> +	memcpy_fromio(idev->dev_info.serial_num,
>>> +		      idev->dev_info_regs->serial_num,
>>> +		      IONIC_DEVINFO_SERIAL_BUFLEN);
>>   ...
>>> +	sig = ioread32(&idev->dev_info_regs->signature);
>> I think if you are going to use the io{read,write}{8,16,32,64}()
>> interfaces then you should use io{read,write}{8,16,32,64}_rep()
>> instead of memcpy_{to,from}io().
>>
> Sure.

Note, I could be wrong.  Please test.

I think the operation of the two things might be different.

^ permalink raw reply

* Re: [PATCH v4 net-next 11/19] ionic: Add Rx filter and rx_mode ndo support
From: David Miller @ 2019-07-23 23:06 UTC (permalink / raw)
  To: snelson; +Cc: netdev
In-Reply-To: <e0c8417c-75bf-837f-01b5-60df302dafa7@pensando.io>

From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 23 Jul 2019 15:50:43 -0700

> On 7/23/19 2:33 PM, David Miller wrote:
>> Generally interface address changes are expected to be synchronous.
> Yeah, this bothers me a bit as well, but the address change calls come
> in under spin_lock_bh(), and I'm reluctant to make an AdminQ call
> under the _bh that could block for a few seconds.

So it's not about memory allocation but rather the fact that the device
might take a while to complete?

Can you start the operation synchronously yet complete it async?

^ permalink raw reply

* Re: [PATCH v2 bpf-next 0/5] switch samples and tests to libbpf perf buffer API
From: Alexei Starovoitov @ 2019-07-23 23:09 UTC (permalink / raw)
  To: Andrii Nakryiko, bpf@vger.kernel.org, netdev@vger.kernel.org,
	daniel@iogearbox.net, Song Liu
  Cc: andrii.nakryiko@gmail.com, Kernel Team
In-Reply-To: <20190723213445.1732339-1-andriin@fb.com>

On 7/23/19 2:34 PM, Andrii Nakryiko wrote:
> There were few more tests and samples that were using custom perf buffer setup
> code from trace_helpers.h. This patch set gets rid of all the usages of those
> and removes helpers themselves. Libbpf provides nicer, but equally powerful
> set of APIs to work with perf ring buffers, so let's have all the samples use
> 
> v1->v2:
> - make logging message one long line instead of two (Song).

Applied. Thanks

^ permalink raw reply

* Re: [PATCH v2] drivers: net: xgene: Remove acpi_has_method() calls
From: Kelsey Skunberg @ 2019-07-23 23:17 UTC (permalink / raw)
  To: bjorn
  Cc: iyappan, keyur, quan, David Miller, netdev, linux-kernel,
	Shuah Khan, linux-kernel-mentees
In-Reply-To: <CABhMZUVAcJwJpN8BKZTTU7jUW6881KdBtoYs_3kSn+tDtOVqNw@mail.gmail.com>

On Tue, Jul 23, 2019 at 05:56:04PM -0500, Bjorn Helgaas wrote:
> On Tue, Jul 23, 2019 at 1:59 PM Kelsey Skunberg
> <skunberg.kelsey@gmail.com> wrote:
> >
> > acpi_evaluate_object will already return an error if the needed method
> > does not exist. Remove unnecessary acpi_has_method() calls and check the
> > returned acpi_status for failure instead.
> 
> > diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> > index 6453fc2ebb1f..5d637b46b2bf 100644
> > --- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> > +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
> > @@ -437,6 +437,7 @@ static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p)
> >  static int xgene_enet_reset(struct xgene_enet_pdata *p)
> >  {
> >         struct device *dev = &p->pdev->dev;
> > +       acpi_status status;
> >
> >         if (!xgene_ring_mgr_init(p))
> >                 return -ENODEV;
> > @@ -460,14 +461,13 @@ static int xgene_enet_reset(struct xgene_enet_pdata *p)
> >                 }
> >         } else {
> >  #ifdef CONFIG_ACPI
> > -               if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_RST"))
> > -                       acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
> > -                                            "_RST", NULL, NULL);
> > -               else if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_INI"))
> > +               status = acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
> > +                                             "_RST", NULL, NULL);
> > +               if (ACPI_FAILURE(status)) {
> >                         acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
> >                                              "_INI", NULL, NULL);
> > +               }
> >  #endif
> > -       }
> 
> Oops, I don't think you intended to remove that brace.
> 
> If you haven't found it already, CONFIG_COMPILE_TEST is useful for
> building things that wouldn't normally be buildable on your arch.

Thank you very much for catching that. I did not know about
CONFIG_COMPILE_TEST yet and that will be very useful. It's clear why my
build test wasn't coming up with the same errors now. I know for future
patches now and will certainly get this one fixed.
Thank you again.

-Kelsey

^ permalink raw reply

* Re: [PATCH v4 net-next 11/19] ionic: Add Rx filter and rx_mode ndo support
From: Saeed Mahameed @ 2019-07-23 23:20 UTC (permalink / raw)
  To: snelson@pensando.io, netdev@vger.kernel.org, davem@davemloft.net
In-Reply-To: <20190722214023.9513-12-snelson@pensando.io>

On Mon, 2019-07-22 at 14:40 -0700, Shannon Nelson wrote:
> Add the Rx filtering and rx_mode NDO callbacks.  Also add
> the deferred work thread handling needed to manage the filter
> requests otuside of the netif_addr_lock spinlock.
> 
> Signed-off-by: Shannon Nelson <snelson@pensando.io>
> ---
>  .../net/ethernet/pensando/ionic/ionic_lif.c   | 389
> +++++++++++++++++-
>  .../net/ethernet/pensando/ionic/ionic_lif.h   |  29 ++
>  2 files changed, 412 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
> b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
> index 72ac0fd56b69..efcda1337f91 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
> @@ -12,9 +12,55 @@
>  #include "ionic_lif.h"
>  #include "ionic_debugfs.h"
>  
> +static void ionic_lif_rx_mode(struct lif *lif, unsigned int
> rx_mode);
> +static int ionic_lif_addr_add(struct lif *lif, const u8 *addr);
> +static int ionic_lif_addr_del(struct lif *lif, const u8 *addr);
> +
>  static int ionic_set_nic_features(struct lif *lif, netdev_features_t
> features);
>  static int ionic_notifyq_clean(struct lif *lif, int budget);
>  
> +static void ionic_lif_deferred_work(struct work_struct *work)
> +{
> +	struct lif *lif = container_of(work, struct lif,
> deferred.work);
> +	struct ionic_deferred *def = &lif->deferred;
> +	struct ionic_deferred_work *w = NULL;
> +
> +	spin_lock_bh(&def->lock);
> +	if (!list_empty(&def->list)) {
> +		w = list_first_entry(&def->list,
> +				     struct ionic_deferred_work, list);
> +		list_del(&w->list);
> +	}
> +	spin_unlock_bh(&def->lock);
> +
> +	if (w) {
> +		switch (w->type) {
> +		case DW_TYPE_RX_MODE:
> +			ionic_lif_rx_mode(lif, w->rx_mode);
> +			break;
> +		case DW_TYPE_RX_ADDR_ADD:
> +			ionic_lif_addr_add(lif, w->addr);
> +			break;
> +		case DW_TYPE_RX_ADDR_DEL:
> +			ionic_lif_addr_del(lif, w->addr);
> +			break;
> +		default:
> +			break;
> +		}
> +		kfree(w);
> +		schedule_work(&def->work);
> +	}
> +}
> +
> +static void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
> +				       struct ionic_deferred_work
> *work)
> +{
> +	spin_lock_bh(&def->lock);
> +	list_add_tail(&work->list, &def->list);
> +	spin_unlock_bh(&def->lock);
> +	schedule_work(&def->work);
> +}
> +
>  int ionic_open(struct net_device *netdev)
>  {
>  	struct lif *lif = netdev_priv(netdev);
> @@ -180,6 +226,235 @@ static int ionic_notifyq_clean(struct lif *lif,
> int budget)
>  	return work_done;
>  }
>  
> +static int ionic_lif_addr_add(struct lif *lif, const u8 *addr)
> +{
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.rx_filter_add = {
> +			.opcode = CMD_OPCODE_RX_FILTER_ADD,
> +			.lif_index = cpu_to_le16(lif->index),
> +			.match = cpu_to_le16(RX_FILTER_MATCH_MAC),
> +		},
> +	};
> +	struct rx_filter *f;
> +	int err;
> +
> +	/* don't bother if we already have it */
> +	spin_lock_bh(&lif->rx_filters.lock);
> +	f = ionic_rx_filter_by_addr(lif, addr);
> +	spin_unlock_bh(&lif->rx_filters.lock);
> +	if (f)
> +		return 0;
> +
> +	netdev_dbg(lif->netdev, "rx_filter add ADDR %pM (id %d)\n",
> addr,
> +		   ctx.comp.rx_filter_add.filter_id);
> +
> +	memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
> +	err = ionic_adminq_post_wait(lif, &ctx);
> +	if (err)
> +		return err;
> +
> +	return ionic_rx_filter_save(lif, 0, RXQ_INDEX_ANY, 0, &ctx);
> +}
> +
> +static int ionic_lif_addr_del(struct lif *lif, const u8 *addr)
> +{
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.rx_filter_del = {
> +			.opcode = CMD_OPCODE_RX_FILTER_DEL,
> +			.lif_index = cpu_to_le16(lif->index),
> +		},
> +	};
> +	struct rx_filter *f;
> +	int err;
> +
> +	spin_lock_bh(&lif->rx_filters.lock);
> +	f = ionic_rx_filter_by_addr(lif, addr);
> +	if (!f) {
> +		spin_unlock_bh(&lif->rx_filters.lock);
> +		return -ENOENT;
> +	}
> +
> +	ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
> +	ionic_rx_filter_free(lif, f);
> +	spin_unlock_bh(&lif->rx_filters.lock);
> +
> +	err = ionic_adminq_post_wait(lif, &ctx);
> +	if (err)
> +		return err;
> +
> +	netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n",
> addr,
> +		   ctx.cmd.rx_filter_del.filter_id);
> +
> +	return 0;
> +}
> +
> +static int ionic_lif_addr(struct lif *lif, const u8 *addr, bool add)
> +{
> +	struct ionic *ionic = lif->ionic;
> +	struct ionic_deferred_work *work;
> +	unsigned int nmfilters;
> +	unsigned int nufilters;
> +
> +	if (add) {
> +		/* Do we have space for this filter?  We test the
> counters
> +		 * here before checking the need for deferral so that
> we
> +		 * can return an overflow error to the stack.
> +		 */
> +		nmfilters = le32_to_cpu(ionic-
> >ident.lif.eth.max_mcast_filters);
> +		nufilters = le32_to_cpu(ionic-
> >ident.lif.eth.max_ucast_filters);
> +
> +		if ((is_multicast_ether_addr(addr) && lif->nmcast <
> nmfilters))
> +			lif->nmcast++;
> +		else if (!is_multicast_ether_addr(addr) &&
> +			 lif->nucast < nufilters)
> +			lif->nucast++;
> +		else
> +			return -ENOSPC;
> +	} else {
> +		if (is_multicast_ether_addr(addr) && lif->nmcast)
> +			lif->nmcast--;
> +		else if (!is_multicast_ether_addr(addr) && lif->nucast)
> +			lif->nucast--;
> +	}
> +
> +	if (in_interrupt()) {
> +		work = kzalloc(sizeof(*work), GFP_ATOMIC);
> +		if (!work) {
> +			netdev_err(lif->netdev, "%s OOM\n", __func__);
> +			return -ENOMEM;
> +		}
> +		work->type = add ? DW_TYPE_RX_ADDR_ADD :
> DW_TYPE_RX_ADDR_DEL;
> +		memcpy(work->addr, addr, ETH_ALEN);
> +		netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
> +			   add ? "add" : "del", addr);
> +		ionic_lif_deferred_enqueue(&lif->deferred, work);
> +	} else {
> +		netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
> +			   add ? "add" : "del", addr);
> +		if (add)
> +			return ionic_lif_addr_add(lif, addr);
> +		else
> +			return ionic_lif_addr_del(lif, addr);
> +	}
> +
> +	return 0;
> +}
> +
> +static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
> +{
> +	return ionic_lif_addr(netdev_priv(netdev), addr, true);
> +}
> +
> +static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
> +{
> +	return ionic_lif_addr(netdev_priv(netdev), addr, false);
> +}
> +
> +static void ionic_lif_rx_mode(struct lif *lif, unsigned int rx_mode)
> +{
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.rx_mode_set = {
> +			.opcode = CMD_OPCODE_RX_MODE_SET,
> +			.lif_index = cpu_to_le16(lif->index),
> +			.rx_mode = cpu_to_le16(rx_mode),
> +		},
> +	};
> +	char buf[128];
> +	int err;
> +	int i;
> +#define REMAIN(__x) (sizeof(buf) - (__x))
> +
> +	i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
> +		     lif->rx_mode, rx_mode);
> +	if (rx_mode & RX_MODE_F_UNICAST)
> +		i += snprintf(&buf[i], REMAIN(i), "
> RX_MODE_F_UNICAST");
> +	if (rx_mode & RX_MODE_F_MULTICAST)
> +		i += snprintf(&buf[i], REMAIN(i), "
> RX_MODE_F_MULTICAST");
> +	if (rx_mode & RX_MODE_F_BROADCAST)
> +		i += snprintf(&buf[i], REMAIN(i), "
> RX_MODE_F_BROADCAST");
> +	if (rx_mode & RX_MODE_F_PROMISC)
> +		i += snprintf(&buf[i], REMAIN(i), "
> RX_MODE_F_PROMISC");
> +	if (rx_mode & RX_MODE_F_ALLMULTI)
> +		i += snprintf(&buf[i], REMAIN(i), "
> RX_MODE_F_ALLMULTI");
> +	netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
> +
> +	err = ionic_adminq_post_wait(lif, &ctx);
> +	if (err)
> +		netdev_warn(lif->netdev, "set rx_mode 0x%04x failed:
> %d\n",
> +			    rx_mode, err);
> +	else
> +		lif->rx_mode = rx_mode;
> +}
> +
> +static void _ionic_lif_rx_mode(struct lif *lif, unsigned int
> rx_mode)
> +{
> +	struct ionic_deferred_work *work;
> +
> +	if (in_interrupt()) {
> +		work = kzalloc(sizeof(*work), GFP_ATOMIC);
> +		if (!work) {
> +			netdev_err(lif->netdev, "%s OOM\n", __func__);
> +			return;
> +		}
> +		work->type = DW_TYPE_RX_MODE;
> +		work->rx_mode = rx_mode;
> +		netdev_dbg(lif->netdev, "deferred: rx_mode\n");
> +		ionic_lif_deferred_enqueue(&lif->deferred, work);
> +	} else {
> +		ionic_lif_rx_mode(lif, rx_mode);
> +	}
> +}
> +
> +static void ionic_set_rx_mode(struct net_device *netdev)
> +{
> +	struct lif *lif = netdev_priv(netdev);
> +	struct identity *ident = &lif->ionic->ident;
> +	unsigned int nfilters;
> +	unsigned int rx_mode;
> +
> +	rx_mode = RX_MODE_F_UNICAST;
> +	rx_mode |= (netdev->flags & IFF_MULTICAST) ?
> RX_MODE_F_MULTICAST : 0;
> +	rx_mode |= (netdev->flags & IFF_BROADCAST) ?
> RX_MODE_F_BROADCAST : 0;
> +	rx_mode |= (netdev->flags & IFF_PROMISC) ? RX_MODE_F_PROMISC :
> 0;
> +	rx_mode |= (netdev->flags & IFF_ALLMULTI) ? RX_MODE_F_ALLMULTI
> : 0;
> +
> +	/* sync unicast addresses
> +	 * next check to see if we're in an overflow state
> +	 *    if so, we track that we overflowed and enable NIC PROMISC
> +	 *    else if the overflow is set and not needed
> +	 *       we remove our overflow flag and check the netdev flags
> +	 *       to see if we can disable NIC PROMISC
> +	 */
> +	__dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
> +	nfilters = le32_to_cpu(ident->lif.eth.max_ucast_filters);
> +	if (netdev_uc_count(netdev) + 1 > nfilters) {
> +		rx_mode |= RX_MODE_F_PROMISC;
> +		lif->uc_overflow = true;
> +	} else if (lif->uc_overflow) {
> +		lif->uc_overflow = false;
> +		if (!(netdev->flags & IFF_PROMISC))
> +			rx_mode &= ~RX_MODE_F_PROMISC;
> +	}
> +
> +	/* same for multicast */
> +	__dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
> +	nfilters = le32_to_cpu(ident->lif.eth.max_mcast_filters);
> +	if (netdev_mc_count(netdev) > nfilters) {
> +		rx_mode |= RX_MODE_F_ALLMULTI;
> +		lif->mc_overflow = true;
> +	} else if (lif->mc_overflow) {
> +		lif->mc_overflow = false;
> +		if (!(netdev->flags & IFF_ALLMULTI))
> +			rx_mode &= ~RX_MODE_F_ALLMULTI;
> +	}
> +
> +	if (lif->rx_mode != rx_mode)
> +		_ionic_lif_rx_mode(lif, rx_mode);
> +}
> +
>  static int ionic_set_features(struct net_device *netdev,
>  			      netdev_features_t features)
>  {
> @@ -196,8 +471,26 @@ static int ionic_set_features(struct net_device
> *netdev,
>  
>  static int ionic_set_mac_address(struct net_device *netdev, void
> *sa)
>  {
> -	netdev_info(netdev, "%s: stubbed\n", __func__);
> -	return 0;
> +	struct sockaddr *addr = sa;
> +	u8 *mac;
> +
> +	mac = (u8 *)addr->sa_data;
> +	if (ether_addr_equal(netdev->dev_addr, mac))
> +		return 0;
> +
> +	if (!is_valid_ether_addr(mac))
> +		return -EADDRNOTAVAIL;
> +
> +	if (!is_zero_ether_addr(netdev->dev_addr)) {
> +		netdev_info(netdev, "deleting mac addr %pM\n",
> +			    netdev->dev_addr);
> +		ionic_addr_del(netdev, netdev->dev_addr);
> +	}
> +
> +	memcpy(netdev->dev_addr, mac, netdev->addr_len);
> +	netdev_info(netdev, "updating mac addr %pM\n", mac);
> +
> +	return ionic_addr_add(netdev, mac);
>  }
>  
>  static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
> @@ -232,20 +525,63 @@ static void ionic_tx_timeout(struct net_device
> *netdev)
>  static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16
> proto,
>  				 u16 vid)
>  {
> -	netdev_info(netdev, "%s: stubbed\n", __func__);
> -	return 0;
> +	struct lif *lif = netdev_priv(netdev);
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.rx_filter_add = {
> +			.opcode = CMD_OPCODE_RX_FILTER_ADD,
> +			.lif_index = cpu_to_le16(lif->index),
> +			.match = cpu_to_le16(RX_FILTER_MATCH_VLAN),
> +			.vlan.vlan = cpu_to_le16(vid),
> +		},
> +	};
> +	int err;
> +
> +	err = ionic_adminq_post_wait(lif, &ctx);
> +	if (err)
> +		return err;
> +
> +	netdev_dbg(netdev, "rx_filter add VLAN %d (id %d)\n", vid,
> +		   ctx.comp.rx_filter_add.filter_id);
> +
> +	return ionic_rx_filter_save(lif, 0, RXQ_INDEX_ANY, 0, &ctx);
>  }
>  
>  static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16
> proto,
>  				  u16 vid)
>  {
> -	netdev_info(netdev, "%s: stubbed\n", __func__);
> -	return 0;
> +	struct lif *lif = netdev_priv(netdev);
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.rx_filter_del = {
> +			.opcode = CMD_OPCODE_RX_FILTER_DEL,
> +			.lif_index = cpu_to_le16(lif->index),
> +		},
> +	};
> +	struct rx_filter *f;
> +
> +	spin_lock_bh(&lif->rx_filters.lock);
> +
> +	f = ionic_rx_filter_by_vlan(lif, vid);
> +	if (!f) {
> +		spin_unlock_bh(&lif->rx_filters.lock);
> +		return -ENOENT;
> +	}
> +
> +	netdev_dbg(netdev, "rx_filter del VLAN %d (id %d)\n", vid,
> +		   le32_to_cpu(ctx.cmd.rx_filter_del.filter_id));
> +
> +	ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
> +	ionic_rx_filter_free(lif, f);
> +	spin_unlock_bh(&lif->rx_filters.lock);
> +
> +	return ionic_adminq_post_wait(lif, &ctx);
>  }
>  
>  static const struct net_device_ops ionic_netdev_ops = {
>  	.ndo_open               = ionic_open,
>  	.ndo_stop               = ionic_stop,
> +	.ndo_set_rx_mode	= ionic_set_rx_mode,
>  	.ndo_set_features	= ionic_set_features,
>  	.ndo_set_mac_address	= ionic_set_mac_address,
>  	.ndo_validate_addr	= eth_validate_addr,
> @@ -546,6 +882,10 @@ static struct lif *ionic_lif_alloc(struct ionic
> *ionic, unsigned int index)
>  
>  	spin_lock_init(&lif->adminq_lock);
>  
> +	spin_lock_init(&lif->deferred.lock);
> +	INIT_LIST_HEAD(&lif->deferred.list);
> +	INIT_WORK(&lif->deferred.work, ionic_lif_deferred_work);
> +
>  	/* allocate lif info */
>  	lif->info_sz = ALIGN(sizeof(*lif->info), PAGE_SIZE);
>  	lif->info = dma_alloc_coherent(dev, lif->info_sz,
> @@ -607,6 +947,8 @@ static void ionic_lif_free(struct lif *lif)
>  	ionic_qcqs_free(lif);
>  	ionic_lif_reset(lif);
>  

I don't think you want deferred.work running while reset is executing..
cancel_work_sync should happen as early as you close the netdevice.

I assume ionic_lif_reset will flush all configurations and you don't
need to cleanup anything manually?  or any data structure stored in
driver ?

> +	cancel_work_sync(&lif->deferred.work);
> +
>  	/* free lif info */
>  	dma_free_coherent(dev, lif->info_sz, lif->info, lif->info_pa);
>  	lif->info = NULL;
> @@ -975,6 +1317,37 @@ static int ionic_init_nic_features(struct lif
> *lif)
>  	return 0;
>  }
>  
> +static int ionic_station_set(struct lif *lif)
> +{
> +	struct net_device *netdev = lif->netdev;
> +	struct ionic_admin_ctx ctx = {
> +		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
> +		.cmd.lif_getattr = {
> +			.opcode = CMD_OPCODE_LIF_GETATTR,
> +			.index = cpu_to_le16(lif->index),
> +			.attr = IONIC_LIF_ATTR_MAC,
> +		},
> +	};
> +	int err;
> +
> +	err = ionic_adminq_post_wait(lif, &ctx);
> +	if (err)
> +		return err;
> +
> +	if (!is_zero_ether_addr(netdev->dev_addr)) {
> +		netdev_dbg(lif->netdev, "deleting station MAC addr
> %pM\n",
> +			   netdev->dev_addr);
> +		ionic_lif_addr(lif, netdev->dev_addr, false);
> +	}
> +	memcpy(netdev->dev_addr, ctx.comp.lif_getattr.mac,
> +	       netdev->addr_len);
> +	netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
> +		   netdev->dev_addr);
> +	ionic_lif_addr(lif, netdev->dev_addr, true);
> +
> +	return 0;
> +}
> +
>  static int ionic_lif_init(struct lif *lif)
>  {
>  	struct ionic_dev *idev = &lif->ionic->idev;
> @@ -1039,6 +1412,10 @@ static int ionic_lif_init(struct lif *lif)
>  	if (err)
>  		goto err_out_notifyq_deinit;
>  
> +	err = ionic_station_set(lif);
> +	if (err)
> +		goto err_out_notifyq_deinit;
> +
>  	set_bit(LIF_INITED, lif->state);
>  
>  	return 0;
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
> b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
> index 9f112aa69033..20b4fa573f77 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
> @@ -60,6 +60,29 @@ struct qcq {
>  #define napi_to_qcq(napi)	container_of(napi, struct qcq, napi)
>  #define napi_to_cq(napi)	(&napi_to_qcq(napi)->cq)
>  
> +enum ionic_deferred_work_type {
> +	DW_TYPE_RX_MODE,
> +	DW_TYPE_RX_ADDR_ADD,
> +	DW_TYPE_RX_ADDR_DEL,
> +	DW_TYPE_LINK_STATUS,
> +	DW_TYPE_LIF_RESET,
> +};
> +
> +struct ionic_deferred_work {
> +	struct list_head list;
> +	enum ionic_deferred_work_type type;
> +	union {
> +		unsigned int rx_mode;
> +		u8 addr[ETH_ALEN];
> +	};
> +};
> +
> +struct ionic_deferred {
> +	spinlock_t lock;		/* lock for deferred work list */
> +	struct list_head list;
> +	struct work_struct work;
> +};
> +
>  enum lif_state_flags {
>  	LIF_INITED,
>  	LIF_UP,
> @@ -87,13 +110,19 @@ struct lif {
>  	u64 last_eid;
>  	unsigned int neqs;
>  	unsigned int nxqs;
> +	unsigned int rx_mode;
>  	u64 hw_features;
> +	bool mc_overflow;
> +	unsigned int nmcast;
> +	bool uc_overflow;
> +	unsigned int nucast;
>  
>  	struct lif_info *info;
>  	dma_addr_t info_pa;
>  	u32 info_sz;
>  
>  	struct rx_filters rx_filters;
> +	struct ionic_deferred deferred;
>  	unsigned long *dbid_inuse;
>  	unsigned int dbid_count;
>  	struct dentry *dentry;

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox