Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] qmi_wwan: support modify usbnet's rx_urb_size
From: Daniele Palmas @ 2020-08-03 10:33 UTC (permalink / raw)
  To: Greg KH
  Cc: yzc666, Bjørn Mork, David Miller, kuba, netdev, linux-usb,
	carl
In-Reply-To: <20200803094931.GD635660@kroah.com>

Il giorno lun 3 ago 2020 alle ore 11:49 Greg KH
<gregkh@linuxfoundation.org> ha scritto:
>
> On Mon, Aug 03, 2020 at 10:26:18AM +0200, Daniele Palmas wrote:
> > Hi Greg,
> >
> > Il giorno lun 3 ago 2020 alle ore 10:18 Greg KH
> > <gregkh@linuxfoundation.org> ha scritto:
> > >
> > > On Mon, Aug 03, 2020 at 02:51:05PM +0800, yzc666@netease.com wrote:
> > > > From: carl <carl.yin@quectel.com>
> > > >
> > > >     When QMUX enabled, the 'dl-datagram-max-size' can be 4KB/16KB/31KB depend on QUALCOMM's chipsets.
> > > >     User can set 'dl-datagram-max-size' by 'QMI_WDA_SET_DATA_FORMAT'.
> > > >     The usbnet's rx_urb_size must lager than or equal to the 'dl-datagram-max-size'.
> > > >     This patch allow user to modify usbnet's rx_urb_size by next command.
> > > >
> > > >               echo 4096 > /sys/class/net/wwan0/qmi/rx_urb_size
> > > >
> > > >               Next commnds show how to set and query 'dl-datagram-max-size' by qmicli
> > > >               # qmicli -d /dev/cdc-wdm1 --wda-set-data-format="link-layer-protocol=raw-ip, ul-protocol=qmap,
> > > >                               dl-protocol=qmap, dl-max-datagrams=32, dl-datagram-max-size=31744, ep-type=hsusb, ep-iface-number=4"
> > > >               [/dev/cdc-wdm1] Successfully set data format
> > > >                                       QoS flow header: no
> > > >                                   Link layer protocol: 'raw-ip'
> > > >                      Uplink data aggregation protocol: 'qmap'
> > > >                    Downlink data aggregation protocol: 'qmap'
> > > >                                         NDP signature: '0'
> > > >               Downlink data aggregation max datagrams: '10'
> > > >                    Downlink data aggregation max size: '4096'
> > > >
> > > >           # qmicli -d /dev/cdc-wdm1 --wda-get-data-format
> > > >               [/dev/cdc-wdm1] Successfully got data format
> > > >                                  QoS flow header: no
> > > >                              Link layer protocol: 'raw-ip'
> > > >                 Uplink data aggregation protocol: 'qmap'
> > > >               Downlink data aggregation protocol: 'qmap'
> > > >                                    NDP signature: '0'
> > > >               Downlink data aggregation max datagrams: '10'
> > > >               Downlink data aggregation max size: '4096'
> > > >
> > > > Signed-off-by: carl <carl.yin@quectel.com>
> > > > ---
> > > >  drivers/net/usb/qmi_wwan.c | 39 ++++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 39 insertions(+)
> > > >
> > > > diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
> > > > index 07c42c0719f5b..8ea57fd99ae43 100644
> > > > --- a/drivers/net/usb/qmi_wwan.c
> > > > +++ b/drivers/net/usb/qmi_wwan.c
> > > > @@ -400,6 +400,44 @@ static ssize_t raw_ip_store(struct device *d,  struct device_attribute *attr, co
> > > >       return ret;
> > > >  }
> > > >
> > > > +static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
> > > > +{
> > > > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > > > +
> > > > +     return sprintf(buf, "%zd\n", dev->rx_urb_size);
> > >
> > > Why do you care about this?
> > >
> > > > +}
> > > > +
> > > > +static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
> > > > +                              const char *buf, size_t len)
> > > > +{
> > > > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > > > +     u32 rx_urb_size;
> > > > +     int ret;
> > > > +
> > > > +     if (kstrtou32(buf, 0, &rx_urb_size))
> > > > +             return -EINVAL;
> > > > +
> > > > +     /* no change? */
> > > > +     if (rx_urb_size == dev->rx_urb_size)
> > > > +             return len;
> > > > +
> > > > +     if (!rtnl_trylock())
> > > > +             return restart_syscall();
> > > > +
> > > > +     /* we don't want to modify a running netdev */
> > > > +     if (netif_running(dev->net)) {
> > > > +             netdev_err(dev->net, "Cannot change a running device\n");
> > > > +             ret = -EBUSY;
> > > > +             goto err;
> > > > +     }
> > > > +
> > > > +     dev->rx_urb_size = rx_urb_size;
> > > > +     ret = len;
> > > > +err:
> > > > +     rtnl_unlock();
> > > > +     return ret;
> > > > +}
> > > > +
> > > >  static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
> > > >  {
> > > >       struct net_device *dev = to_net_dev(d);
> > > > @@ -505,6 +543,7 @@ static DEVICE_ATTR_RW(add_mux);
> > > >  static DEVICE_ATTR_RW(del_mux);
> > > >
> > > >  static struct attribute *qmi_wwan_sysfs_attrs[] = {
> > > > +     &dev_attr_rx_urb_size.attr,
> > >
> > > You added a driver-specific sysfs file and did not document in in
> > > Documentation/ABI?  That's not ok, sorry, please fix up.
> > >
> > > Actually, no, this all should be done "automatically", do not change the
> > > urb size on the fly.  Change it at probe time based on the device you
> > > are using, do not force userspace to "know" what to do here, as it will
> > > not know that at all.
> > >
> >
> > the problem with doing at probe time is that rx_urb_size is not fixed,
> > but depends on the configuration done at the userspace level with
> > QMI_WDA_SET_DATA_FORMAT, so the userspace knows that.
>
> Where does QMI_WDA_SET_DATA_FORMAT come from?
>

This is a request of Qualcomm proprietary protocol used, among other
things, to configure data aggregation for modems. There's an open
source userspace implementation in the libqmi project
(https://cgit.freedesktop.org/libqmi/tree/data/qmi-service-wda.json)

> And the commit log says that this "depends on the chipset being used",
> so why don't you know that at probe time, does the chipset change?  :)
>

Me too does not understand this, I let the author explain...

> > Currently there's a workaround for setting rx_urb_size i.e. changing
> > the network interface MTU: this is fine for most uses with qmap, but
> > there's the limitation that certain values (multiple of the endpoint
> > size) are not allowed.
>
> Why not just set it really high to start with?  That should not affect
> any older devices, as the urb size does not matter.  The only thing is
> if it is too small that things can not move as fast as they might be
> able to.
>

Yes, this was proposed in the past by Bjørn
(https://lists.freedesktop.org/archives/libqmi-devel/2020-February/003221.html),
but I was not sure about issues with old modems.

Now I understand that there are no such issues, then I agree this is
the simplest solution: I've seen modems requiring as much as 49152,
but usually the default for qmap is <= 16384.

And, by the way, increasing the rx urb size is required also in
non-qmap mode, since the current value leads to babbling issues with
some chipsets (mine
https://www.spinics.net/lists/linux-usb/msg198025.html and Paul's
https://lists.freedesktop.org/archives/libqmi-devel/2020-February/003217.html),
so I think we should definitely increase this also for non-qmap mode.

Bjørn, what do you think?

Thanks,
Daniele

> thanks,
>
> greg k-h

^ permalink raw reply

* Re: [PATCH net-next RFC 00/13] Add devlink reload level option
From: Vasundhara Volam @ 2020-08-03 10:24 UTC (permalink / raw)
  To: Jacob Keller
  Cc: Moshe Shemesh, David S. Miller, Jiri Pirko, Netdev, open list
In-Reply-To: <a3e20b44-9399-93c1-210f-e3c1172bf60d@intel.com>

On Tue, Jul 28, 2020 at 10:13 PM Jacob Keller <jacob.e.keller@intel.com> wrote:
>
>
>
> On 7/27/2020 10:25 PM, Vasundhara Volam wrote:
> > On Mon, Jul 27, 2020 at 4:36 PM Moshe Shemesh <moshe@mellanox.com> wrote:
> >>
> >> Introduce new option on devlink reload API to enable the user to select the
> >> reload level required. Complete support for all levels in mlx5.
> >> The following reload levels are supported:
> >>   driver: Driver entities re-instantiation only.
> >>   fw_reset: Firmware reset and driver entities re-instantiation.
> > The Name is a little confusing. I think it should be renamed to
> > fw_live_reset (in which both firmware and driver entities are
> > re-instantiated).  For only fw_reset, the driver should not undergo
> > reset (it requires a driver reload for firmware to undergo reset).
> >
>
> So, I think the differentiation here is that "live_patch" doesn't reset
> anything.
This seems similar to flashing the firmware and does not reset anything.

>
> >>   fw_live_patch: Firmware live patching only.
> > This level is not clear. Is this similar to flashing??
> >
> > Also I have a basic query. The reload command is split into
> > reload_up/reload_down handlers (Please correct me if this behaviour is
> > changed with this patchset). What if the vendor specific driver does
> > not support up/down and needs only a single handler to fire a firmware
> > reset or firmware live reset command?
>
> In the "reload_down" handler, they would trigger the appropriate reset,
> and quiesce anything that needs to be done. Then on reload up, it would
> restore and bring up anything quiesced in the first stage.
Yes, I got the "reload_down" and "reload_up". Similar to the device
"remove" and "re-probe" respectively.

But our requirement is a similar "ethtool reset" command, where
ethtool calls a single callback in driver and driver just sends a
firmware command for doing the reset. Once firmware receives the
command, it will initiate the reset of driver and firmware entities
asynchronously.

^ permalink raw reply

* Re: [PATCH]     qmi_wwan: support modify usbnet's rx_urb_size
From: kernel test robot @ 2020-08-03 10:01 UTC (permalink / raw)
  To: yzc666, bjorn; +Cc: kbuild-all, davem, kuba, netdev, linux-usb, carl
In-Reply-To: <20200803065105.8997-1-yzc666@netease.com>

[-- Attachment #1: Type: text/plain, Size: 2177 bytes --]

Hi,

I love your patch! Yet something to improve:

[auto build test ERROR on sparc-next/master]
[also build test ERROR on linus/master v5.8 next-20200731]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/yzc666-netease-com/qmi_wwan-support-modify-usbnet-s-rx_urb_size/20200803-152459
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next.git master
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=sh 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/net/usb/qmi_wwan.c:546:3: error: 'dev_attr_rx_urb_size' undeclared here (not in a function)
     546 |  &dev_attr_rx_urb_size.attr,
         |   ^~~~~~~~~~~~~~~~~~~~
   drivers/net/usb/qmi_wwan.c:410:16: warning: 'rx_urb_size_store' defined but not used [-Wunused-function]
     410 | static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
         |                ^~~~~~~~~~~~~~~~~
   drivers/net/usb/qmi_wwan.c:403:16: warning: 'rx_urb_size_show' defined but not used [-Wunused-function]
     403 | static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
         |                ^~~~~~~~~~~~~~~~

vim +/dev_attr_rx_urb_size +546 drivers/net/usb/qmi_wwan.c

   544	
   545	static struct attribute *qmi_wwan_sysfs_attrs[] = {
 > 546		&dev_attr_rx_urb_size.attr,
   547		&dev_attr_raw_ip.attr,
   548		&dev_attr_add_mux.attr,
   549		&dev_attr_del_mux.attr,
   550		NULL,
   551	};
   552	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 55659 bytes --]

^ permalink raw reply

* Re: [net v3] drivers/net/wan/lapbether: Use needed_headroom instead of hard_header_len
From: Willem de Bruijn @ 2020-08-03  9:49 UTC (permalink / raw)
  To: Xie He
  Cc: David S. Miller, Jakub Kicinski, Network Development,
	linux-kernel, Linux X25, Willem de Bruijn, Brian Norris
In-Reply-To: <20200802195046.402539-1-xie.he.0141@gmail.com>

On Sun, Aug 2, 2020 at 9:51 PM Xie He <xie.he.0141@gmail.com> wrote:
>
> In net/packet/af_packet.c, the function packet_snd first reserves a
> headroom of length (dev->hard_header_len + dev->needed_headroom).
> Then if the socket is a SOCK_DGRAM socket, it calls dev_hard_header,
> which calls dev->header_ops->create, to create the link layer header.
> If the socket is a SOCK_RAW socket, it "un-reserves" a headroom of
> length (dev->hard_header_len), and assumes the user to provide the
> appropriate link layer header.
>
> So according to the logic of af_packet.c, dev->hard_header_len should
> be the length of the header that would be created by
> dev->header_ops->create.
>
> However, this driver doesn't provide dev->header_ops, so logically
> dev->hard_header_len should be 0.
>
> So we should use dev->needed_headroom instead of dev->hard_header_len
> to request necessary headroom to be allocated.
>
> This change fixes kernel panic when this driver is used with AF_PACKET
> SOCK_RAW sockets. Call stack when panic:
>
> [  168.399197] skbuff: skb_under_panic: text:ffffffff819d95fb len:20
> put:14 head:ffff8882704c0a00 data:ffff8882704c09fd tail:0x11 end:0xc0
> dev:veth0
> ...
> [  168.399255] Call Trace:
> [  168.399259]  skb_push.cold+0x14/0x24
> [  168.399262]  eth_header+0x2b/0xc0
> [  168.399267]  lapbeth_data_transmit+0x9a/0xb0 [lapbether]
> [  168.399275]  lapb_data_transmit+0x22/0x2c [lapb]
> [  168.399277]  lapb_transmit_buffer+0x71/0xb0 [lapb]
> [  168.399279]  lapb_kick+0xe3/0x1c0 [lapb]
> [  168.399281]  lapb_data_request+0x76/0xc0 [lapb]
> [  168.399283]  lapbeth_xmit+0x56/0x90 [lapbether]
> [  168.399286]  dev_hard_start_xmit+0x91/0x1f0
> [  168.399289]  ? irq_init_percpu_irqstack+0xc0/0x100
> [  168.399291]  __dev_queue_xmit+0x721/0x8e0
> [  168.399295]  ? packet_parse_headers.isra.0+0xd2/0x110
> [  168.399297]  dev_queue_xmit+0x10/0x20
> [  168.399298]  packet_sendmsg+0xbf0/0x19b0
> ......
>
> Additional change:
> When sending, check skb->len to ensure the 1-byte pseudo header is
> present before reading it.
>
> Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
> Cc: Brian Norris <briannorris@chromium.org>
> Signed-off-by: Xie He <xie.he.0141@gmail.com>

Overall, looks great. A  few nits.

It's [PATCH net v3], not [net v3]

> diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
> index b2868433718f..8a3f7ba36f7e 100644
> --- a/drivers/net/wan/lapbether.c
> +++ b/drivers/net/wan/lapbether.c
> @@ -157,6 +157,9 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
>         if (!netif_running(dev))
>                 goto drop;
>
> +       if (skb->len < 1)
> +               goto drop;
> +

Might be worth a comment along the lines of: /* upper layers pass a
control byte. must validate pf_packet input */

>         switch (skb->data[0]) {
>         case X25_IFACE_DATA:
>                 break;
> @@ -305,6 +308,7 @@ static void lapbeth_setup(struct net_device *dev)
>         dev->netdev_ops      = &lapbeth_netdev_ops;
>         dev->needs_free_netdev = true;
>         dev->type            = ARPHRD_X25;
> +       dev->hard_header_len = 0;

Technically not needed. The struct is allocated with kvzalloc, z for
__GFP_ZERO. Fine to leave if intended as self-describing comment, of
course.

>         dev->mtu             = 1000;
>         dev->addr_len        = 0;
>  }
> @@ -331,7 +335,8 @@ static int lapbeth_new_device(struct net_device *dev)
>          * then this driver prepends a length field of 2 bytes,
>          * then the underlying Ethernet device prepends its own header.
>          */
> -       ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len;
> +       ndev->needed_headroom = -1 + 3 + 2 + dev->hard_header_len
> +                                          + dev->needed_headroom;
>
>         lapbeth = netdev_priv(ndev);
>         lapbeth->axdev = ndev;
> --
> 2.25.1
>

^ permalink raw reply

* Re: [PATCH] qmi_wwan: support modify usbnet's rx_urb_size
From: Greg KH @ 2020-08-03  9:49 UTC (permalink / raw)
  To: Daniele Palmas
  Cc: yzc666, Bjørn Mork, David Miller, kuba, netdev, linux-usb,
	carl
In-Reply-To: <CAGRyCJE-BF=0tWakreObGv-skahDp-ui8zP1TPPkX48sMFk4-w@mail.gmail.com>

On Mon, Aug 03, 2020 at 10:26:18AM +0200, Daniele Palmas wrote:
> Hi Greg,
> 
> Il giorno lun 3 ago 2020 alle ore 10:18 Greg KH
> <gregkh@linuxfoundation.org> ha scritto:
> >
> > On Mon, Aug 03, 2020 at 02:51:05PM +0800, yzc666@netease.com wrote:
> > > From: carl <carl.yin@quectel.com>
> > >
> > >     When QMUX enabled, the 'dl-datagram-max-size' can be 4KB/16KB/31KB depend on QUALCOMM's chipsets.
> > >     User can set 'dl-datagram-max-size' by 'QMI_WDA_SET_DATA_FORMAT'.
> > >     The usbnet's rx_urb_size must lager than or equal to the 'dl-datagram-max-size'.
> > >     This patch allow user to modify usbnet's rx_urb_size by next command.
> > >
> > >               echo 4096 > /sys/class/net/wwan0/qmi/rx_urb_size
> > >
> > >               Next commnds show how to set and query 'dl-datagram-max-size' by qmicli
> > >               # qmicli -d /dev/cdc-wdm1 --wda-set-data-format="link-layer-protocol=raw-ip, ul-protocol=qmap,
> > >                               dl-protocol=qmap, dl-max-datagrams=32, dl-datagram-max-size=31744, ep-type=hsusb, ep-iface-number=4"
> > >               [/dev/cdc-wdm1] Successfully set data format
> > >                                       QoS flow header: no
> > >                                   Link layer protocol: 'raw-ip'
> > >                      Uplink data aggregation protocol: 'qmap'
> > >                    Downlink data aggregation protocol: 'qmap'
> > >                                         NDP signature: '0'
> > >               Downlink data aggregation max datagrams: '10'
> > >                    Downlink data aggregation max size: '4096'
> > >
> > >           # qmicli -d /dev/cdc-wdm1 --wda-get-data-format
> > >               [/dev/cdc-wdm1] Successfully got data format
> > >                                  QoS flow header: no
> > >                              Link layer protocol: 'raw-ip'
> > >                 Uplink data aggregation protocol: 'qmap'
> > >               Downlink data aggregation protocol: 'qmap'
> > >                                    NDP signature: '0'
> > >               Downlink data aggregation max datagrams: '10'
> > >               Downlink data aggregation max size: '4096'
> > >
> > > Signed-off-by: carl <carl.yin@quectel.com>
> > > ---
> > >  drivers/net/usb/qmi_wwan.c | 39 ++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 39 insertions(+)
> > >
> > > diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
> > > index 07c42c0719f5b..8ea57fd99ae43 100644
> > > --- a/drivers/net/usb/qmi_wwan.c
> > > +++ b/drivers/net/usb/qmi_wwan.c
> > > @@ -400,6 +400,44 @@ static ssize_t raw_ip_store(struct device *d,  struct device_attribute *attr, co
> > >       return ret;
> > >  }
> > >
> > > +static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
> > > +{
> > > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > > +
> > > +     return sprintf(buf, "%zd\n", dev->rx_urb_size);
> >
> > Why do you care about this?
> >
> > > +}
> > > +
> > > +static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
> > > +                              const char *buf, size_t len)
> > > +{
> > > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > > +     u32 rx_urb_size;
> > > +     int ret;
> > > +
> > > +     if (kstrtou32(buf, 0, &rx_urb_size))
> > > +             return -EINVAL;
> > > +
> > > +     /* no change? */
> > > +     if (rx_urb_size == dev->rx_urb_size)
> > > +             return len;
> > > +
> > > +     if (!rtnl_trylock())
> > > +             return restart_syscall();
> > > +
> > > +     /* we don't want to modify a running netdev */
> > > +     if (netif_running(dev->net)) {
> > > +             netdev_err(dev->net, "Cannot change a running device\n");
> > > +             ret = -EBUSY;
> > > +             goto err;
> > > +     }
> > > +
> > > +     dev->rx_urb_size = rx_urb_size;
> > > +     ret = len;
> > > +err:
> > > +     rtnl_unlock();
> > > +     return ret;
> > > +}
> > > +
> > >  static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
> > >  {
> > >       struct net_device *dev = to_net_dev(d);
> > > @@ -505,6 +543,7 @@ static DEVICE_ATTR_RW(add_mux);
> > >  static DEVICE_ATTR_RW(del_mux);
> > >
> > >  static struct attribute *qmi_wwan_sysfs_attrs[] = {
> > > +     &dev_attr_rx_urb_size.attr,
> >
> > You added a driver-specific sysfs file and did not document in in
> > Documentation/ABI?  That's not ok, sorry, please fix up.
> >
> > Actually, no, this all should be done "automatically", do not change the
> > urb size on the fly.  Change it at probe time based on the device you
> > are using, do not force userspace to "know" what to do here, as it will
> > not know that at all.
> >
> 
> the problem with doing at probe time is that rx_urb_size is not fixed,
> but depends on the configuration done at the userspace level with
> QMI_WDA_SET_DATA_FORMAT, so the userspace knows that.

Where does QMI_WDA_SET_DATA_FORMAT come from?

And the commit log says that this "depends on the chipset being used",
so why don't you know that at probe time, does the chipset change?  :)

> Currently there's a workaround for setting rx_urb_size i.e. changing
> the network interface MTU: this is fine for most uses with qmap, but
> there's the limitation that certain values (multiple of the endpoint
> size) are not allowed.

Why not just set it really high to start with?  That should not affect
any older devices, as the urb size does not matter.  The only thing is
if it is too small that things can not move as fast as they might be
able to.

thanks,

greg k-h

^ permalink raw reply

* Re: [Linux-kernel-mentees] [PATCH net] rds: Prevent kernel-infoleak in rds_notify_queue_get()
From: Dan Carpenter @ 2020-08-03  9:34 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Greg Kroah-Hartman, Leon Romanovsky, Peilin Ye, Santosh Shilimkar,
	David S. Miller, Jakub Kicinski, Arnd Bergmann,
	linux-kernel-mentees, netdev, linux-rdma, rds-devel, linux-kernel
In-Reply-To: <20200801144030.GM24045@ziepe.ca>

Ah, thanks.  We've had a bunch of discussions about these leaks but I
wasn't aware of this.

regards,
dan carpenter


^ permalink raw reply

* Re: [PATCH net-next 2/2] vxlan: fix getting tos value from DSCP field
From: Guillaume Nault @ 2020-08-03  9:30 UTC (permalink / raw)
  To: Hangbin Liu
  Cc: netdev, Petr Machata, David S . Miller, Roopa Prabhu, David Ahern,
	Eelco Chaudron
In-Reply-To: <20200803080217.391850-3-liuhangbin@gmail.com>

On Mon, Aug 03, 2020 at 04:02:17PM +0800, Hangbin Liu wrote:
> In commit 71130f29979c ("vxlan: fix tos value before xmit") we strict
> the vxlan tos value before xmit. But as IP tos field has been obsoleted
> by RFC2474, and updated by RFC3168 later. We should use new DSCP field,
> or we will lost the first 3 bits value when xmit.
> 
Why sending this patch to net-next? Commit 71130f29979c ("vxlan: fix
tos value before xmit") broke setups where the high TOS bits were used.
This needs to be fixed in net (and probably pushed to -stable too).

> Fixes: 71130f29979c ("vxlan: fix tos value before xmit")
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  drivers/net/vxlan.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 77658425db8a..fe051ed0f6db 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -2722,7 +2722,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>  		ndst = &rt->dst;
>  		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
>  
> -		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
> +		tos = ip_tunnel_ecn_encap(RT_DSCP(tos), old_iph, skb);
>  		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>  		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
>  				      vni, md, flags, udp_sum);
> @@ -2762,7 +2762,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>  
>  		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
>  
> -		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
> +		tos = ip_tunnel_ecn_encap(RT_DSCP(tos), old_iph, skb);
>  		ttl = ttl ? : ip6_dst_hoplimit(ndst);
>  		skb_scrub_packet(skb, xnet);
>  		err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
> -- 
> 2.25.4
> 


^ permalink raw reply

* Re: [PATCH net-next 1/2] net: add IP_DSCP_MASK
From: Guillaume Nault @ 2020-08-03  9:26 UTC (permalink / raw)
  To: Hangbin Liu
  Cc: netdev, Petr Machata, David S . Miller, Roopa Prabhu, David Ahern,
	Eelco Chaudron
In-Reply-To: <20200803080217.391850-2-liuhangbin@gmail.com>

On Mon, Aug 03, 2020 at 04:02:16PM +0800, Hangbin Liu wrote:
> In RFC1349 it defined TOS field like
> 
>        0     1     2     3     4     5     6     7
>     +-----+-----+-----+-----+-----+-----+-----+-----+
>     |   PRECEDENCE    |          TOS          | MBZ |
>     +-----+-----+-----+-----+-----+-----+-----+-----+
> 
> But this has been obsoleted by RFC2474, and updated by RFC3168 later.
> Now the DS Field should be like
> 
>        0     1     2     3     4     5     6     7
>     +-----+-----+-----+-----+-----+-----+-----+-----+
>     |          DS FIELD, DSCP           | ECN FIELD |
>     +-----+-----+-----+-----+-----+-----+-----+-----+
> 
>       DSCP: differentiated services codepoint
>       ECN:  Explicit Congestion Notification
> 
> So the old IPTOS_TOS_MASK 0x1E should be updated. But since
> changed the value will break UAPI, let's add a new value
> IP_DSCP_MASK 0xFC as a replacement.
> 
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  include/uapi/linux/in_route.h | 1 +
>  include/uapi/linux/ip.h       | 2 ++
>  2 files changed, 3 insertions(+)
> 
> diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h
> index 0cc2c23b47f8..26ba4efb054d 100644
> --- a/include/uapi/linux/in_route.h
> +++ b/include/uapi/linux/in_route.h
> @@ -29,5 +29,6 @@
>  #define RTCF_NAT	(RTCF_DNAT|RTCF_SNAT)
>  
>  #define RT_TOS(tos)	((tos)&IPTOS_TOS_MASK)
> +#define RT_DSCP(tos)	((tos)&IP_DSCP_MASK)
>  
>  #endif /* _LINUX_IN_ROUTE_H */
> diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
> index e42d13b55cf3..62e4169277eb 100644
> --- a/include/uapi/linux/ip.h
> +++ b/include/uapi/linux/ip.h
> @@ -22,6 +22,8 @@
>  
>  #define IPTOS_TOS_MASK		0x1E
>  #define IPTOS_TOS(tos)		((tos)&IPTOS_TOS_MASK)
> +#define IP_DSCP_MASK		0xFC
> +#define IP_DSCP(tos)		((tos)&IP_DSCP_MASK)

What's the use of IP_DSCP()? It's the same as RT_DSCP().

I guess it's supposed to be the equivalent of IPTOS_TOS(), but that
macro is only used once in the tree, where it could be replaced with
RT_TOS().

I can't see a reason to copy this pattern.


^ permalink raw reply

* Re: [PATCH 2/2] net: phy: Associate device node with fixed PHY
From: Russell King - ARM Linux admin @ 2020-08-03  9:07 UTC (permalink / raw)
  To: Madalin Bucur (OSS)
  Cc: Andrew Lunn, Vikas Singh, f.fainelli@gmail.com,
	hkallweit1@gmail.com, netdev@vger.kernel.org,
	Calvin Johnson (OSS), kuldip dwivedi, Vikas Singh
In-Reply-To: <AM6PR04MB3976BB0CAB0B4270FF932F62EC4D0@AM6PR04MB3976.eurprd04.prod.outlook.com>

On Mon, Aug 03, 2020 at 08:33:19AM +0000, Madalin Bucur (OSS) wrote:
> > -----Original Message-----
> > From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On
> > Behalf Of Andrew Lunn
> > Sent: 01 August 2020 18:11
> > To: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> > Cc: Vikas Singh <vikas.singh@puresoftware.com>; f.fainelli@gmail.com;
> > hkallweit1@gmail.com; netdev@vger.kernel.org; Calvin Johnson (OSS)
> > <calvin.johnson@oss.nxp.com>; kuldip dwivedi
> > <kuldip.dwivedi@puresoftware.com>; Madalin Bucur (OSS)
> > <madalin.bucur@oss.nxp.com>; Vikas Singh <vikas.singh@nxp.com>
> > Subject: Re: [PATCH 2/2] net: phy: Associate device node with fixed PHY
> > 
> > On Sat, Aug 01, 2020 at 10:41:32AM +0100, Russell King - ARM Linux admin
> > wrote:
> > > On Sat, Aug 01, 2020 at 09:52:52AM +0530, Vikas Singh wrote:
> > > > Hi Andrew,
> > > >
> > > > Please refer to the "fman" node under
> > > > linux/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
> > > > I have two 10G ethernet interfaces out of which one is of fixed-link.
> > >
> > > Please do not top post.
> > >
> > > How does XGMII (which is a 10G only interface) work at 1G speed?  Is
> > > what is in DT itself a hack because fixed-phy doesn't support 10G
> > > modes?
> > 
> > My gut feeling is there is some hack going on here, which is why i'm
> > being persistent at trying to understand what is actually going on
> > here.
> 
> Hi Andrew,
> 
> That platform used 1G fixed link there since there was no support for
> 10G fixed link at the time. PHYlib could have tolerated 10G speed there
> With a one-liner.

That statement is false.  It is not a "one liner".  fixed-phy exposes
the settings to userspace as a Clause 22 PHY register set, and the
Clause 22 register set does not support 10G.  So, a "one liner" would
just be yet another hack.  Adding Clause 45 PHY emulation support
would be a huge task.

> I understand that PHYLink is working to describe this
> Better, but it was not there at that time. Adding the dependency on
> PHYLink was not desirable as most of the users for the DPAA 1 platforms
> were targeting kernels before the PHYLink introduction (and last I've
> looked, it's still under development, with unstable APIs so we'll
> take a look at this later, when it settles).

I think you need to read Documentation/process/stable-api-nonsense.rst
particularly the section "Stable Kernel Source Interfaces".

phylink is going to be under development for quite some time to come
as requirements evolve.  For example, when support for QSFP interfaces
is eventually worked out, I suspect there will need to be some further
changes to the driver interface.  This is completely normal.

Now, as to the stability of the phylink API to drivers, it has in fact
been very stable - it has only changed over the course of this year to
support split PCS, a necessary step for DPAA2 and a few others.  It has
been around in mainline for two years, and has been around much longer
than that, and during that time it has been in mainline, the MAC facing
interface has not changed until recently.

So, I find your claim to be quite unreasonable.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* [PATCH bpf-next v6 2/2] bpf: allow to specify ifindex for skb in bpf_prog_test_run_skb
From: Dmitry Yakunin @ 2020-08-03  9:05 UTC (permalink / raw)
  To: alexei.starovoitov, daniel, netdev, bpf; +Cc: eric.dumazet, sdf
In-Reply-To: <20200803090545.82046-1-zeil@yandex-team.ru>

Now skb->dev is unconditionally set to the loopback device in current net
namespace. But if we want to test bpf program which contains code branch
based on ifindex condition (eg filters out localhost packets) it is useful
to allow specifying of ifindex from userspace. This patch adds such option
through ctx_in (__sk_buff) parameter.

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
---
 net/bpf/test_run.c                               | 22 ++++++++++++++++++++--
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c |  5 +++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 736a596..99eb8c6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -327,6 +327,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 	/* priority is allowed */
 
 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
+			   offsetof(struct __sk_buff, ifindex)))
+		return -EINVAL;
+
+	/* ifindex is allowed */
+
+	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
 			   offsetof(struct __sk_buff, cb)))
 		return -EINVAL;
 
@@ -381,6 +387,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
 
 	__skb->mark = skb->mark;
 	__skb->priority = skb->priority;
+	__skb->ifindex = skb->dev->ifindex;
 	__skb->tstamp = skb->tstamp;
 	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
 	__skb->wire_len = cb->pkt_len;
@@ -391,6 +398,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr)
 {
 	bool is_l2 = false, is_direct_pkt_access = false;
+	struct net *net = current->nsproxy->net_ns;
+	struct net_device *dev = net->loopback_dev;
 	u32 size = kattr->test.data_size_in;
 	u32 repeat = kattr->test.repeat;
 	struct __sk_buff *ctx = NULL;
@@ -432,7 +441,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 		kfree(ctx);
 		return -ENOMEM;
 	}
-	sock_net_set(sk, current->nsproxy->net_ns);
+	sock_net_set(sk, net);
 	sock_init_data(NULL, sk);
 
 	skb = build_skb(data, 0);
@@ -446,7 +455,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 
 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 	__skb_put(skb, size);
-	skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+	if (ctx && ctx->ifindex > 1) {
+		dev = dev_get_by_index(net, ctx->ifindex);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out;
+		}
+	}
+	skb->protocol = eth_type_trans(skb, dev);
 	skb_reset_network_header(skb);
 
 	switch (skb->protocol) {
@@ -502,6 +518,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 		ret = bpf_ctx_finish(kattr, uattr, ctx,
 				     sizeof(struct __sk_buff));
 out:
+	if (dev && dev != net->loopback_dev)
+		dev_put(dev);
 	kfree_skb(skb);
 	bpf_sk_storage_free(sk);
 	kfree(sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 7021b92..25de86a 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,7 @@ void test_skb_ctx(void)
 		.cb[3] = 4,
 		.cb[4] = 5,
 		.priority = 6,
+		.ifindex = 1,
 		.tstamp = 7,
 		.wire_len = 100,
 		.gso_segs = 8,
@@ -92,6 +93,10 @@ void test_skb_ctx(void)
 		   "ctx_out_priority",
 		   "skb->priority == %d, expected %d\n",
 		   skb.priority, 7);
+	CHECK_ATTR(skb.ifindex != 1,
+		   "ctx_out_ifindex",
+		   "skb->ifindex == %d, expected %d\n",
+		   skb.ifindex, 1);
 	CHECK_ATTR(skb.tstamp != 8,
 		   "ctx_out_tstamp",
 		   "skb->tstamp == %lld, expected %d\n",
-- 
2.7.4


^ permalink raw reply related

* [PATCH bpf-next v6 1/2] bpf: setup socket family and addresses in bpf_prog_test_run_skb
From: Dmitry Yakunin @ 2020-08-03  9:05 UTC (permalink / raw)
  To: alexei.starovoitov, daniel, netdev, bpf; +Cc: eric.dumazet, sdf
In-Reply-To: <20200803090545.82046-1-zeil@yandex-team.ru>

Now it's impossible to test all branches of cgroup_skb bpf program which
accesses skb->family and skb->{local,remote}_ip{4,6} fields because they
are zeroed during socket allocation. This commit fills socket family and
addresses from related fields in constructed skb.

v2:
  - fix build without CONFIG_IPV6 (kernel test robot <lkp@intel.com>)

v3:
  - check skb length before access to inet headers (Eric Dumazet)

v4:
  - do not use pskb_may_pull() in skb length checking (Alexei Starovoitov)

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
---
 net/bpf/test_run.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b03c469..736a596 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -449,6 +449,27 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 	skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
 	skb_reset_network_header(skb);
 
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		sk->sk_family = AF_INET;
+		if (sizeof(struct iphdr) <= skb_headlen(skb)) {
+			sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
+			sk->sk_daddr = ip_hdr(skb)->daddr;
+		}
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		sk->sk_family = AF_INET6;
+		if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
+			sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
+			sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
+
 	if (is_l2)
 		__skb_push(skb, hh_len);
 	if (is_direct_pkt_access)
-- 
2.7.4


^ permalink raw reply related

* [PATCH bpf-next v6 0/2] bpf: cgroup skb improvements for bpf_prog_test_run
From: Dmitry Yakunin @ 2020-08-03  9:05 UTC (permalink / raw)
  To: alexei.starovoitov, daniel, netdev, bpf; +Cc: eric.dumazet, sdf

This patchset contains some improvements for testing cgroup/skb programs
through BPF_PROG_TEST_RUN command.

v2:
  - fix build without CONFIG_CGROUP_BPF (kernel test robot <lkp@intel.com>)

v3:
  - fix build without CONFIG_IPV6 (kernel test robot <lkp@intel.com>)

v4:
  - remove cgroup storage related commits for future rework (Daniel Borkmann)

v5:
  - check skb length before access to inet headers (Eric Dumazet)

v6:
  - do not use pskb_may_pull() in skb length checking (Alexei Starovoitov)

Dmitry Yakunin (2):
  bpf: setup socket family and addresses in bpf_prog_test_run_skb
  bpf: allow to specify ifindex for skb in bpf_prog_test_run_skb

 net/bpf/test_run.c                               | 39 ++++++++++++++++++++++--
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c |  5 +++
 2 files changed, 42 insertions(+), 2 deletions(-)

-- 
2.7.4


^ permalink raw reply

* [PATCH net-next] net: stmmac: fix failed to suspend if phy based WOL is enabled
From: Jisheng Zhang @ 2020-08-03  8:56 UTC (permalink / raw)
  To: Giuseppe Cavallaro, Alexandre Torgue, Jose Abreu, David S. Miller,
	Jakub Kicinski, Maxime Coquelin
  Cc: netdev, linux-arm-kernel, linux-kernel

With the latest net-next tree, if test suspend/resume after enabling
WOL, we get error as below:

[  487.086365] dpm_run_callback(): mdio_bus_suspend+0x0/0x30 returns -16
[  487.086375] PM: Device stmmac-0:00 failed to suspend: error -16

-16 means -EBUSY, this is because I didn't enable wakeup of the correct
device when implementing phy based WOL feature. To be honest, I caught
the issue when implementing phy based WOL and then fix it locally, but
forgot to amend the phy based wol patch. Today, I found the issue by
testing net-next tree.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 05d63963fdb7..ac5e8cc5fb9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -625,7 +625,7 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 		int ret = phylink_ethtool_set_wol(priv->phylink, wol);
 
 		if (!ret)
-			device_set_wakeup_enable(&dev->dev, !!wol->wolopts);
+			device_set_wakeup_enable(priv->device, !!wol->wolopts);
 		return ret;
 	}
 
-- 
2.28.0


^ permalink raw reply related

* Re: [PATCH 0/3] Modernize tasklet callback API
From: Allen @ 2020-08-03  8:46 UTC (permalink / raw)
  To: Kees Cook
  Cc: Thomas Gleixner, Greg Kroah-Hartman, Oscar Carter, Romain Perier,
	David S. Miller, Peter Zijlstra, Will Deacon, linux-input,
	linux-kernel, netdev, linux-s390, devel, linux-usb,
	kgdb-bugreport, alsa-devel, Kernel Hardening
In-Reply-To: <202007301113.45D24C9D@keescook>

Kees,

>
> [heavily trimmed CC list because I think lkml is ignoring this
> thread...]
>
> On Thu, Jul 30, 2020 at 09:03:55AM +0200, Thomas Gleixner wrote:
> > Kees,
> >
> > Kees Cook <keescook@chromium.org> writes:
> > > This is the infrastructure changes to prepare the tasklet API for
> > > conversion to passing the tasklet struct as the callback argument instead
> > > of an arbitrary unsigned long. The first patch details why this is useful
> > > (it's the same rationale as the timer_struct changes from a bit ago:
> > > less abuse during memory corruption attacks, more in line with existing
> > > ways of doing things in the kernel, save a little space in struct,
> > > etc). Notably, the existing tasklet API use is much less messy, so there
> > > is less to clean up.
> > >
> > > It's not clear to me which tree this should go through... Greg since it
> > > starts with a USB clean-up, -tip for timer or interrupt, or if I should
> > > just carry it. I'm open to suggestions, but if I don't hear otherwise,
> > > I'll just carry it.
> > >
> > > My goal is to have this merged for v5.9-rc1 so that during the v5.10
> > > development cycle the new API will be available. The entire tree of
> > > changes is here[1] currently, but to split it up by maintainer the
> > > infrastructure changes need to be landed first.
> > >
> > > Review and Acks appreciated! :)
> >
> > I'd rather see tasklets vanish from the planet completely, but that's
> > going to be a daring feat. So, grudgingly:
>
> Understood! I will update the comments near the tasklet API.
>
> > Acked-by: Thomas Gleixner <tglx@linutronix.de>
>

Here's the series re-based on top of 5.8
https://github.com/allenpais/tasklets/tree/V3

Let me know how you would want these to be reviewed.

Also, I was thinking if removing tasklets completely could be a task
on KSPP wiki. If yes, I did like to take ownership of that task. I have a
couple of ideas in mind, which could be discussed in a separate email.

Thanks.

-- 
       - Allen

^ permalink raw reply

* Re: [PATCH] tools/bpf/bpftool: Fix wrong return value in do_dump()
From: Tobias Klauser @ 2020-08-03  8:41 UTC (permalink / raw)
  To: Tianjia Zhang
  Cc: ast, daniel, kafai, songliubraving, yhs, andriin, john.fastabend,
	kpsingh, quentin, kuba, toke, jolsa, netdev, bpf, linux-kernel,
	tianjia.zhang
In-Reply-To: <20200802111540.5384-1-tianjia.zhang@linux.alibaba.com>

On 2020-08-02 at 13:15:40 +0200, Tianjia Zhang <tianjia.zhang@linux.alibaba.com> wrote:
> In case of btf_id does not exist, a negative error code -ENOENT
> should be returned.
> 
> Fixes: c93cc69004df3 ("bpftool: add ability to dump BTF types")
> Cc: Andrii Nakryiko <andriin@fb.com>
> Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>

Reviewed-by: Tobias Klauser <tklauser@distanz.ch>

> ---
>  tools/bpf/bpftool/btf.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
> index faac8189b285..c2f1fd414820 100644
> --- a/tools/bpf/bpftool/btf.c
> +++ b/tools/bpf/bpftool/btf.c
> @@ -596,7 +596,7 @@ static int do_dump(int argc, char **argv)
>  			goto done;
>  		}
>  		if (!btf) {
> -			err = ENOENT;
> +			err = -ENOENT;
>  			p_err("can't find btf with ID (%u)", btf_id);
>  			goto done;
>  		}
> -- 
> 2.26.2
> 

^ permalink raw reply

* Re: [PATCH] qmi_wwan: support modify usbnet's rx_urb_size
From: Sergei Shtylyov @ 2020-08-03  8:38 UTC (permalink / raw)
  To: yzc666, bjorn; +Cc: davem, kuba, netdev, linux-usb, carl
In-Reply-To: <20200803065105.8997-1-yzc666@netease.com>

On 03.08.2020 9:51, yzc666@netease.com wrote:
> From: carl <carl.yin@quectel.com>

    Prefrrably a full name, matching that one in the signoff tag.

> 
>      When QMUX enabled, the 'dl-datagram-max-size' can be 4KB/16KB/31KB depend on QUALCOMM's chipsets.

    No indentation here please, start at column 1 and respect the length limit 
of 75 columns as script/checkpatch.pl says...

>      User can set 'dl-datagram-max-size' by 'QMI_WDA_SET_DATA_FORMAT'.
>      The usbnet's rx_urb_size must lager than or equal to the 'dl-datagram-max-size'.
>      This patch allow user to modify usbnet's rx_urb_size by next command.
> 
> 		echo 4096 > /sys/class/net/wwan0/qmi/rx_urb_size
> 
> 		Next commnds show how to set and query 'dl-datagram-max-size' by qmicli
> 		# qmicli -d /dev/cdc-wdm1 --wda-set-data-format="link-layer-protocol=raw-ip, ul-protocol=qmap,
> 				dl-protocol=qmap, dl-max-datagrams=32, dl-datagram-max-size=31744, ep-type=hsusb, ep-iface-number=4"
> 		[/dev/cdc-wdm1] Successfully set data format
> 		                        QoS flow header: no
> 		                    Link layer protocol: 'raw-ip'
> 		       Uplink data aggregation protocol: 'qmap'
> 		     Downlink data aggregation protocol: 'qmap'
> 		                          NDP signature: '0'
> 		Downlink data aggregation max datagrams: '10'
> 		     Downlink data aggregation max size: '4096'
> 
> 	    # qmicli -d /dev/cdc-wdm1 --wda-get-data-format
> 		[/dev/cdc-wdm1] Successfully got data format
> 		                   QoS flow header: no
> 		               Link layer protocol: 'raw-ip'
> 		  Uplink data aggregation protocol: 'qmap'
> 		Downlink data aggregation protocol: 'qmap'
> 		                     NDP signature: '0'
> 		Downlink data aggregation max datagrams: '10'
> 		Downlink data aggregation max size: '4096'
> 
> Signed-off-by: carl <carl.yin@quectel.com>

    Need your full name.

> ---
>   drivers/net/usb/qmi_wwan.c | 39 ++++++++++++++++++++++++++++++++++++++
>   1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
> index 07c42c0719f5b..8ea57fd99ae43 100644
> --- a/drivers/net/usb/qmi_wwan.c
> +++ b/drivers/net/usb/qmi_wwan.c
> @@ -400,6 +400,44 @@ static ssize_t raw_ip_store(struct device *d,  struct device_attribute *attr, co
>   	return ret;
>   }
>   
> +static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
> +{
> +	struct usbnet *dev = netdev_priv(to_net_dev(d));
> +
> +	return sprintf(buf, "%zd\n", dev->rx_urb_size);

   Is dev->rx_urb_size really declared as size_t?

> +}
> +
> +static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
> +				 const char *buf, size_t len)
> +{
> +	struct usbnet *dev = netdev_priv(to_net_dev(d));
> +	u32 rx_urb_size;

    ... in this case, sholdn't this variable be also declared as size_t?

> +	int ret;
> +
> +	if (kstrtou32(buf, 0, &rx_urb_size))
> +		return -EINVAL;
> +
> +	/* no change? */
> +	if (rx_urb_size == dev->rx_urb_size)
> +		return len;
> +
> +	if (!rtnl_trylock())
> +		return restart_syscall();
> +
> +	/* we don't want to modify a running netdev */
> +	if (netif_running(dev->net)) {
> +		netdev_err(dev->net, "Cannot change a running device\n");
> +		ret = -EBUSY;
> +		goto err;
> +	}
> +
> +	dev->rx_urb_size = rx_urb_size;
> +	ret = len;
> +err:
> +	rtnl_unlock();
> +	return ret;
> +}
> +
>   static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
>   {
>   	struct net_device *dev = to_net_dev(d);
> @@ -505,6 +543,7 @@ static DEVICE_ATTR_RW(add_mux);
>   static DEVICE_ATTR_RW(del_mux);
>   
>   static struct attribute *qmi_wwan_sysfs_attrs[] = {
> +	&dev_attr_rx_urb_size.attr,
>   	&dev_attr_raw_ip.attr,
>   	&dev_attr_add_mux.attr,
>   	&dev_attr_del_mux.attr,
> 


^ permalink raw reply

* RE: [PATCH 2/2] net: phy: Associate device node with fixed PHY
From: Madalin Bucur (OSS) @ 2020-08-03  8:33 UTC (permalink / raw)
  To: Andrew Lunn, Russell King - ARM Linux admin
  Cc: Vikas Singh, f.fainelli@gmail.com, hkallweit1@gmail.com,
	netdev@vger.kernel.org, Calvin Johnson (OSS), kuldip dwivedi,
	Madalin Bucur (OSS), Vikas Singh
In-Reply-To: <20200801151107.GK1712415@lunn.ch>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On
> Behalf Of Andrew Lunn
> Sent: 01 August 2020 18:11
> To: Russell King - ARM Linux admin <linux@armlinux.org.uk>
> Cc: Vikas Singh <vikas.singh@puresoftware.com>; f.fainelli@gmail.com;
> hkallweit1@gmail.com; netdev@vger.kernel.org; Calvin Johnson (OSS)
> <calvin.johnson@oss.nxp.com>; kuldip dwivedi
> <kuldip.dwivedi@puresoftware.com>; Madalin Bucur (OSS)
> <madalin.bucur@oss.nxp.com>; Vikas Singh <vikas.singh@nxp.com>
> Subject: Re: [PATCH 2/2] net: phy: Associate device node with fixed PHY
> 
> On Sat, Aug 01, 2020 at 10:41:32AM +0100, Russell King - ARM Linux admin
> wrote:
> > On Sat, Aug 01, 2020 at 09:52:52AM +0530, Vikas Singh wrote:
> > > Hi Andrew,
> > >
> > > Please refer to the "fman" node under
> > > linux/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts
> > > I have two 10G ethernet interfaces out of which one is of fixed-link.
> >
> > Please do not top post.
> >
> > How does XGMII (which is a 10G only interface) work at 1G speed?  Is
> > what is in DT itself a hack because fixed-phy doesn't support 10G
> > modes?
> 
> My gut feeling is there is some hack going on here, which is why i'm
> being persistent at trying to understand what is actually going on
> here.

Hi Andrew,

That platform used 1G fixed link there since there was no support for
10G fixed link at the time. PHYlib could have tolerated 10G speed there
With a one-liner. I understand that PHYLink is working to describe this
Better, but it was not there at that time. Adding the dependency on
PHYLink was not desirable as most of the users for the DPAA 1 platforms
were targeting kernels before the PHYLink introduction (and last I've
looked, it's still under development, with unstable APIs so we'll
take a look at this later, when it settles).

> So Vikas, as Russell pointed out, fixed-link is limited to 1G. It
> seems odd you are running a 10G link at 1G. It is also unclear what
> you have on the other end of that fixed link? Is it an SFP and you are
> afraid of the work needed to get phylink working with ACPI? Is it an
> Ethernet switch, and you are afraid of the work needed to get DSA
> working with ACPI?
> 
> Looking at
> https://www.nxp.com/docs/en/quick-reference-guide/LS1046AQRS.pdf
> 
> I see a XFI/2-5G SGMII port connected to a PHY, which i guess is
> 
>        ethernet@f0000 { /* 10GEC1 */
>                 phy-handle = <&aqr106_phy>;
>                 phy-connection-type = "xgmii";
>         };
> 
> and
>                 aqr106_phy: ethernet-phy@0 {
>                         compatible = "ethernet-phy-ieee802.3-c45";
>                         interrupts = <0 131 4>;
>                         reg = <0x0>;
>                 };
> 
> Which leaves an XFI interface connected to a retimer and then to an
> SFP cage? Is this where you are using fixed-link?
> 
> 	Andrew

^ permalink raw reply

* RE: [PATCH net v3 0/5] DPAA FMan driver fixes
From: Madalin Bucur (OSS) @ 2020-08-03  8:26 UTC (permalink / raw)
  To: Florinel Iordache, davem@davemloft.net, kuba@kernel.org,
	netdev@vger.kernel.org
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1596438454-4895-1-git-send-email-florinel.iordache@nxp.com>

> -----Original Message-----
> From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On
> Behalf Of Florinel Iordache
> Sent: 03 August 2020 10:07
> To: Madalin Bucur <madalin.bucur@nxp.com>; davem@davemloft.net;
> kuba@kernel.org; netdev@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; Florinel Iordache
> <florinel.iordache@nxp.com>
> Subject: [PATCH net v3 0/5] DPAA FMan driver fixes
> 
> Here are several fixes for the DPAA FMan driver.
> 
> v2 changes:
> * corrected patch 4 by removing the line added by mistake
> * used longer fixes tags with the first 12 characters of the SHA-1 ID
> 
> v3 changes:
> * remove the empty line inserted after fixes tag
> 
> Florinel Iordache (5):
>   fsl/fman: use 32-bit unsigned integer
>   fsl/fman: fix dereference null return value
>   fsl/fman: fix unreachable code
>   fsl/fman: check dereferencing null pointer
>   fsl/fman: fix eth hash table allocation
> 
>  drivers/net/ethernet/freescale/fman/fman.c       | 3 +--
>  drivers/net/ethernet/freescale/fman/fman_dtsec.c | 4 ++--
>  drivers/net/ethernet/freescale/fman/fman_mac.h   | 2 +-
>  drivers/net/ethernet/freescale/fman/fman_memac.c | 3 +--
>  drivers/net/ethernet/freescale/fman/fman_port.c  | 9 ++++++++-
>  drivers/net/ethernet/freescale/fman/fman_tgec.c  | 2 +-
>  6 files changed, 14 insertions(+), 9 deletions(-)
> 
> --
> 1.9.1

For the series,

Acked-by: Madalin Bucur <madalin.bucur@oss.nxp.com>

^ permalink raw reply

* Re: [PATCH] qmi_wwan: support modify usbnet's rx_urb_size
From: Daniele Palmas @ 2020-08-03  8:26 UTC (permalink / raw)
  To: Greg KH
  Cc: yzc666, Bjørn Mork, David Miller, kuba, netdev, linux-usb,
	carl
In-Reply-To: <20200803081604.GC493272@kroah.com>

Hi Greg,

Il giorno lun 3 ago 2020 alle ore 10:18 Greg KH
<gregkh@linuxfoundation.org> ha scritto:
>
> On Mon, Aug 03, 2020 at 02:51:05PM +0800, yzc666@netease.com wrote:
> > From: carl <carl.yin@quectel.com>
> >
> >     When QMUX enabled, the 'dl-datagram-max-size' can be 4KB/16KB/31KB depend on QUALCOMM's chipsets.
> >     User can set 'dl-datagram-max-size' by 'QMI_WDA_SET_DATA_FORMAT'.
> >     The usbnet's rx_urb_size must lager than or equal to the 'dl-datagram-max-size'.
> >     This patch allow user to modify usbnet's rx_urb_size by next command.
> >
> >               echo 4096 > /sys/class/net/wwan0/qmi/rx_urb_size
> >
> >               Next commnds show how to set and query 'dl-datagram-max-size' by qmicli
> >               # qmicli -d /dev/cdc-wdm1 --wda-set-data-format="link-layer-protocol=raw-ip, ul-protocol=qmap,
> >                               dl-protocol=qmap, dl-max-datagrams=32, dl-datagram-max-size=31744, ep-type=hsusb, ep-iface-number=4"
> >               [/dev/cdc-wdm1] Successfully set data format
> >                                       QoS flow header: no
> >                                   Link layer protocol: 'raw-ip'
> >                      Uplink data aggregation protocol: 'qmap'
> >                    Downlink data aggregation protocol: 'qmap'
> >                                         NDP signature: '0'
> >               Downlink data aggregation max datagrams: '10'
> >                    Downlink data aggregation max size: '4096'
> >
> >           # qmicli -d /dev/cdc-wdm1 --wda-get-data-format
> >               [/dev/cdc-wdm1] Successfully got data format
> >                                  QoS flow header: no
> >                              Link layer protocol: 'raw-ip'
> >                 Uplink data aggregation protocol: 'qmap'
> >               Downlink data aggregation protocol: 'qmap'
> >                                    NDP signature: '0'
> >               Downlink data aggregation max datagrams: '10'
> >               Downlink data aggregation max size: '4096'
> >
> > Signed-off-by: carl <carl.yin@quectel.com>
> > ---
> >  drivers/net/usb/qmi_wwan.c | 39 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 39 insertions(+)
> >
> > diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
> > index 07c42c0719f5b..8ea57fd99ae43 100644
> > --- a/drivers/net/usb/qmi_wwan.c
> > +++ b/drivers/net/usb/qmi_wwan.c
> > @@ -400,6 +400,44 @@ static ssize_t raw_ip_store(struct device *d,  struct device_attribute *attr, co
> >       return ret;
> >  }
> >
> > +static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
> > +{
> > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > +
> > +     return sprintf(buf, "%zd\n", dev->rx_urb_size);
>
> Why do you care about this?
>
> > +}
> > +
> > +static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
> > +                              const char *buf, size_t len)
> > +{
> > +     struct usbnet *dev = netdev_priv(to_net_dev(d));
> > +     u32 rx_urb_size;
> > +     int ret;
> > +
> > +     if (kstrtou32(buf, 0, &rx_urb_size))
> > +             return -EINVAL;
> > +
> > +     /* no change? */
> > +     if (rx_urb_size == dev->rx_urb_size)
> > +             return len;
> > +
> > +     if (!rtnl_trylock())
> > +             return restart_syscall();
> > +
> > +     /* we don't want to modify a running netdev */
> > +     if (netif_running(dev->net)) {
> > +             netdev_err(dev->net, "Cannot change a running device\n");
> > +             ret = -EBUSY;
> > +             goto err;
> > +     }
> > +
> > +     dev->rx_urb_size = rx_urb_size;
> > +     ret = len;
> > +err:
> > +     rtnl_unlock();
> > +     return ret;
> > +}
> > +
> >  static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
> >  {
> >       struct net_device *dev = to_net_dev(d);
> > @@ -505,6 +543,7 @@ static DEVICE_ATTR_RW(add_mux);
> >  static DEVICE_ATTR_RW(del_mux);
> >
> >  static struct attribute *qmi_wwan_sysfs_attrs[] = {
> > +     &dev_attr_rx_urb_size.attr,
>
> You added a driver-specific sysfs file and did not document in in
> Documentation/ABI?  That's not ok, sorry, please fix up.
>
> Actually, no, this all should be done "automatically", do not change the
> urb size on the fly.  Change it at probe time based on the device you
> are using, do not force userspace to "know" what to do here, as it will
> not know that at all.
>

the problem with doing at probe time is that rx_urb_size is not fixed,
but depends on the configuration done at the userspace level with
QMI_WDA_SET_DATA_FORMAT, so the userspace knows that.

Currently there's a workaround for setting rx_urb_size i.e. changing
the network interface MTU: this is fine for most uses with qmap, but
there's the limitation that certain values (multiple of the endpoint
size) are not allowed.

Thanks,
Daniele

> thanks,
>
> greg k-h

^ permalink raw reply

* Re: [PATCH]     qmi_wwan: support modify usbnet's rx_urb_size
From: Greg KH @ 2020-08-03  8:16 UTC (permalink / raw)
  To: yzc666; +Cc: bjorn, davem, kuba, netdev, linux-usb, carl
In-Reply-To: <20200803065105.8997-1-yzc666@netease.com>

On Mon, Aug 03, 2020 at 02:51:05PM +0800, yzc666@netease.com wrote:
> From: carl <carl.yin@quectel.com>
> 
>     When QMUX enabled, the 'dl-datagram-max-size' can be 4KB/16KB/31KB depend on QUALCOMM's chipsets.
>     User can set 'dl-datagram-max-size' by 'QMI_WDA_SET_DATA_FORMAT'.
>     The usbnet's rx_urb_size must lager than or equal to the 'dl-datagram-max-size'.
>     This patch allow user to modify usbnet's rx_urb_size by next command.
> 
> 		echo 4096 > /sys/class/net/wwan0/qmi/rx_urb_size
> 
> 		Next commnds show how to set and query 'dl-datagram-max-size' by qmicli
> 		# qmicli -d /dev/cdc-wdm1 --wda-set-data-format="link-layer-protocol=raw-ip, ul-protocol=qmap,
> 				dl-protocol=qmap, dl-max-datagrams=32, dl-datagram-max-size=31744, ep-type=hsusb, ep-iface-number=4"
> 		[/dev/cdc-wdm1] Successfully set data format
> 		                        QoS flow header: no
> 		                    Link layer protocol: 'raw-ip'
> 		       Uplink data aggregation protocol: 'qmap'
> 		     Downlink data aggregation protocol: 'qmap'
> 		                          NDP signature: '0'
> 		Downlink data aggregation max datagrams: '10'
> 		     Downlink data aggregation max size: '4096'
> 
> 	    # qmicli -d /dev/cdc-wdm1 --wda-get-data-format
> 		[/dev/cdc-wdm1] Successfully got data format
> 		                   QoS flow header: no
> 		               Link layer protocol: 'raw-ip'
> 		  Uplink data aggregation protocol: 'qmap'
> 		Downlink data aggregation protocol: 'qmap'
> 		                     NDP signature: '0'
> 		Downlink data aggregation max datagrams: '10'
> 		Downlink data aggregation max size: '4096'
> 
> Signed-off-by: carl <carl.yin@quectel.com>
> ---
>  drivers/net/usb/qmi_wwan.c | 39 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
> index 07c42c0719f5b..8ea57fd99ae43 100644
> --- a/drivers/net/usb/qmi_wwan.c
> +++ b/drivers/net/usb/qmi_wwan.c
> @@ -400,6 +400,44 @@ static ssize_t raw_ip_store(struct device *d,  struct device_attribute *attr, co
>  	return ret;
>  }
>  
> +static ssize_t rx_urb_size_show(struct device *d, struct device_attribute *attr, char *buf)
> +{
> +	struct usbnet *dev = netdev_priv(to_net_dev(d));
> +
> +	return sprintf(buf, "%zd\n", dev->rx_urb_size);

Why do you care about this?

> +}
> +
> +static ssize_t rx_urb_size_store(struct device *d,  struct device_attribute *attr,
> +				 const char *buf, size_t len)
> +{
> +	struct usbnet *dev = netdev_priv(to_net_dev(d));
> +	u32 rx_urb_size;
> +	int ret;
> +
> +	if (kstrtou32(buf, 0, &rx_urb_size))
> +		return -EINVAL;
> +
> +	/* no change? */
> +	if (rx_urb_size == dev->rx_urb_size)
> +		return len;
> +
> +	if (!rtnl_trylock())
> +		return restart_syscall();
> +
> +	/* we don't want to modify a running netdev */
> +	if (netif_running(dev->net)) {
> +		netdev_err(dev->net, "Cannot change a running device\n");
> +		ret = -EBUSY;
> +		goto err;
> +	}
> +
> +	dev->rx_urb_size = rx_urb_size;
> +	ret = len;
> +err:
> +	rtnl_unlock();
> +	return ret;
> +}
> +
>  static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf)
>  {
>  	struct net_device *dev = to_net_dev(d);
> @@ -505,6 +543,7 @@ static DEVICE_ATTR_RW(add_mux);
>  static DEVICE_ATTR_RW(del_mux);
>  
>  static struct attribute *qmi_wwan_sysfs_attrs[] = {
> +	&dev_attr_rx_urb_size.attr,

You added a driver-specific sysfs file and did not document in in
Documentation/ABI?  That's not ok, sorry, please fix up.

Actually, no, this all should be done "automatically", do not change the
urb size on the fly.  Change it at probe time based on the device you
are using, do not force userspace to "know" what to do here, as it will
not know that at all.

thanks,

greg k-h

^ permalink raw reply

* [PATCH net-next 2/2] vxlan: fix getting tos value from DSCP field
From: Hangbin Liu @ 2020-08-03  8:02 UTC (permalink / raw)
  To: netdev
  Cc: Guillaume Nault, Petr Machata, David S . Miller, Roopa Prabhu,
	David Ahern, Eelco Chaudron, Hangbin Liu
In-Reply-To: <20200803080217.391850-1-liuhangbin@gmail.com>

In commit 71130f29979c ("vxlan: fix tos value before xmit") we strict
the vxlan tos value before xmit. But as IP tos field has been obsoleted
by RFC2474, and updated by RFC3168 later. We should use new DSCP field,
or we will lost the first 3 bits value when xmit.

Fixes: 71130f29979c ("vxlan: fix tos value before xmit")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 drivers/net/vxlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 77658425db8a..fe051ed0f6db 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2722,7 +2722,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		ndst = &rt->dst;
 		skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
 
-		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
+		tos = ip_tunnel_ecn_encap(RT_DSCP(tos), old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
 				      vni, md, flags, udp_sum);
@@ -2762,7 +2762,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
 
-		tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
+		tos = ip_tunnel_ecn_encap(RT_DSCP(tos), old_iph, skb);
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
 		skb_scrub_packet(skb, xnet);
 		err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
-- 
2.25.4


^ permalink raw reply related

* [PATCH net-next 1/2] net: add IP_DSCP_MASK
From: Hangbin Liu @ 2020-08-03  8:02 UTC (permalink / raw)
  To: netdev
  Cc: Guillaume Nault, Petr Machata, David S . Miller, Roopa Prabhu,
	David Ahern, Eelco Chaudron, Hangbin Liu
In-Reply-To: <20200803080217.391850-1-liuhangbin@gmail.com>

In RFC1349 it defined TOS field like

       0     1     2     3     4     5     6     7
    +-----+-----+-----+-----+-----+-----+-----+-----+
    |   PRECEDENCE    |          TOS          | MBZ |
    +-----+-----+-----+-----+-----+-----+-----+-----+

But this has been obsoleted by RFC2474, and updated by RFC3168 later.
Now the DS Field should be like

       0     1     2     3     4     5     6     7
    +-----+-----+-----+-----+-----+-----+-----+-----+
    |          DS FIELD, DSCP           | ECN FIELD |
    +-----+-----+-----+-----+-----+-----+-----+-----+

      DSCP: differentiated services codepoint
      ECN:  Explicit Congestion Notification

So the old IPTOS_TOS_MASK 0x1E should be updated. But since
changed the value will break UAPI, let's add a new value
IP_DSCP_MASK 0xFC as a replacement.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 include/uapi/linux/in_route.h | 1 +
 include/uapi/linux/ip.h       | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h
index 0cc2c23b47f8..26ba4efb054d 100644
--- a/include/uapi/linux/in_route.h
+++ b/include/uapi/linux/in_route.h
@@ -29,5 +29,6 @@
 #define RTCF_NAT	(RTCF_DNAT|RTCF_SNAT)
 
 #define RT_TOS(tos)	((tos)&IPTOS_TOS_MASK)
+#define RT_DSCP(tos)	((tos)&IP_DSCP_MASK)
 
 #endif /* _LINUX_IN_ROUTE_H */
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index e42d13b55cf3..62e4169277eb 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -22,6 +22,8 @@
 
 #define IPTOS_TOS_MASK		0x1E
 #define IPTOS_TOS(tos)		((tos)&IPTOS_TOS_MASK)
+#define IP_DSCP_MASK		0xFC
+#define IP_DSCP(tos)		((tos)&IP_DSCP_MASK)
 #define	IPTOS_LOWDELAY		0x10
 #define	IPTOS_THROUGHPUT	0x08
 #define	IPTOS_RELIABILITY	0x04
-- 
2.25.4


^ permalink raw reply related

* [PATCH net-next 0/2] Add IP_DSCP_MASK and fix vxlan tos value before xmit
From: Hangbin Liu @ 2020-08-03  8:02 UTC (permalink / raw)
  To: netdev
  Cc: Guillaume Nault, Petr Machata, David S . Miller, Roopa Prabhu,
	David Ahern, Eelco Chaudron, Hangbin Liu

This patch set is aim to update the old IP_TOS_MASK to new IP_DSCP_MASK
as tos value has been obsoleted for a long time. But to make sure we don't
break any existing behaviour, we can't just replease all IP_TOS_MASK
to new IP_DSCP_MASK.

So let's update it case by case. The first issue we will fix is that vxlan
is unable to take the first 3 bits from DSCP field before xmit. Use the
new RT_DSCP() would resolve this.

Hangbin Liu (2):
  net: add IP_DSCP_MASK
  vxlan: fix getting tos value from DSCP field

 drivers/net/vxlan.c           | 4 ++--
 include/uapi/linux/in_route.h | 1 +
 include/uapi/linux/ip.h       | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

-- 
2.25.4


^ permalink raw reply

* [PATCH v3] seg6_iptunnel: Refactor seg6_lwt_headroom out of uapi header
From: Ioana-Ruxandra Stancioi @ 2020-08-03  7:33 UTC (permalink / raw)
  To: david.lebrun, davem, kuznet, yoshfuji, netdev, linux-kernel
  Cc: elver, glider, Ioana-Ruxandra Stăncioi

From: Ioana-Ruxandra Stăncioi <stancioi@google.com>

Refactor the function seg6_lwt_headroom out of the seg6_iptunnel.h uapi
header, because it is only used in seg6_iptunnel.c. Moreover, it is only
used in the kernel code, as indicated by the "#ifdef __KERNEL__".

Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Ioana-Ruxandra Stăncioi <stancioi@google.com>
---
v3:
* Apply David's suggestion and remove the inline tag.
---
 include/uapi/linux/seg6_iptunnel.h | 21 ---------------------
 net/ipv6/seg6_iptunnel.c           | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
index 09fb608a35ec..eb815e0d0ac3 100644
--- a/include/uapi/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -37,25 +37,4 @@ enum {
 	SEG6_IPTUN_MODE_L2ENCAP,
 };
 
-#ifdef __KERNEL__
-
-static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
-{
-	int head = 0;
-
-	switch (tuninfo->mode) {
-	case SEG6_IPTUN_MODE_INLINE:
-		break;
-	case SEG6_IPTUN_MODE_ENCAP:
-		head = sizeof(struct ipv6hdr);
-		break;
-	case SEG6_IPTUN_MODE_L2ENCAP:
-		return 0;
-	}
-
-	return ((tuninfo->srh->hdrlen + 1) << 3) + head;
-}
-
-#endif
-
 #endif
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index e0e9f48ab14f..897fa59c47de 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -27,6 +27,23 @@
 #include <net/seg6_hmac.h>
 #endif
 
+static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
+{
+	int head = 0;
+
+	switch (tuninfo->mode) {
+	case SEG6_IPTUN_MODE_INLINE:
+		break;
+	case SEG6_IPTUN_MODE_ENCAP:
+		head = sizeof(struct ipv6hdr);
+		break;
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		return 0;
+	}
+
+	return ((tuninfo->srh->hdrlen + 1) << 3) + head;
+}
+
 struct seg6_lwt {
 	struct dst_cache cache;
 	struct seg6_iptunnel_encap tuninfo[];
-- 
2.28.0.163.g6104cc2f0b6-goog


^ permalink raw reply related

* [PATCH bpf-next v2 1/2] bpf: change uapi for bpf iterator map elements
From: Yonghong Song @ 2020-08-03  7:33 UTC (permalink / raw)
  To: bpf, netdev; +Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20200803073326.3745149-1-yhs@fb.com>

Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
	__aligned_u64	iter_info;
	__u32		iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
	union bpf_iter_link_info {
		struct {
			__u32   map_fd;
		} map;
	};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h       | 10 ++++---
 include/uapi/linux/bpf.h  | 15 +++++-----
 kernel/bpf/bpf_iter.c     | 58 +++++++++++++++++++--------------------
 kernel/bpf/map_iter.c     | 34 +++++++++++++++++------
 kernel/bpf/syscall.c      |  2 +-
 net/core/bpf_sk_storage.c | 34 +++++++++++++++++------
 6 files changed, 96 insertions(+), 57 deletions(-)

Note: I did not copy include/uapi/linux/bpf.h to
tools/include/uapi/linux/bpf.h in this patch. The
reason is due to resolve_btfids in kernel build.
The resolve_btfids has a dependence on libbpf.
Updating tools/include/uapi/linux/bpf.h without
addition other libbpf changes in the next patch
will cause libbpf build failure and hence
the failure for resolve_btfids build.

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cef4ef0d2b4e..55f694b63164 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
 	struct bpf_map *map;
 };
 
-typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
-				       struct bpf_iter_aux_info *aux);
+typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
+					union bpf_iter_link_info *linfo,
+					struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
-	bpf_iter_check_target_t check_target;
+	bpf_iter_attach_target_t attach_target;
+	bpf_iter_detach_target_t detach_target;
 	u32 ctx_arg_info_size;
-	enum bpf_iter_link_info req_linfo;
 	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 	const struct bpf_iter_seq_info *seq_info;
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b134e679e9db..0480f893facd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type */
 };
 
+union bpf_iter_link_info {
+	struct {
+		__u32	map_fd;
+	} map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-	BPF_ITER_LINK_UNSPEC = 0,
-	BPF_ITER_LINK_MAP_FD = 1,
-
-	MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
 		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
+		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
+		__u32		iter_info_len;	/* iter_info length */
 	} link_create;
 
 	struct { /* struct used by BPF_LINK_UPDATE command */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 363b9cafc2d8..b6715964b685 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
 	struct bpf_iter_link *iter_link =
 		container_of(link, struct bpf_iter_link, link);
 
-	if (iter_link->aux.map)
-		bpf_map_put_with_uref(iter_link->aux.map);
+	if (iter_link->tinfo->reg_info->detach_target)
+		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 }
 
 static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	union bpf_iter_link_info __user *ulinfo;
 	struct bpf_link_primer link_primer;
 	struct bpf_iter_target_info *tinfo;
-	struct bpf_iter_aux_info aux = {};
+	union bpf_iter_link_info linfo;
 	struct bpf_iter_link *link;
-	u32 prog_btf_id, target_fd;
+	u32 prog_btf_id, linfo_len;
 	bool existed = false;
-	struct bpf_map *map;
 	int err;
 
+	if (attr->link_create.target_fd || attr->link_create.flags)
+		return -EINVAL;
+
+	memset(&linfo, 0, sizeof(union bpf_iter_link_info));
+
+	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+	linfo_len = attr->link_create.iter_info_len;
+	if (!ulinfo ^ !linfo_len)
+		return -EINVAL;
+
+	if (ulinfo) {
+		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
+					       linfo_len);
+		if (err)
+			return err;
+		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
+		if (copy_from_user(&linfo, ulinfo, linfo_len))
+			return -EFAULT;
+	}
+
 	prog_btf_id = prog->aux->attach_btf_id;
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!existed)
 		return -ENOENT;
 
-	/* Make sure user supplied flags are target expected. */
-	target_fd = attr->link_create.target_fd;
-	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
-		return -EINVAL;
-	if (!attr->link_create.flags && target_fd)
-		return -EINVAL;
-
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 	if (!link)
 		return -ENOMEM;
@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		return err;
 	}
 
-	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
-		map = bpf_map_get_with_uref(target_fd);
-		if (IS_ERR(map)) {
-			err = PTR_ERR(map);
-			goto cleanup_link;
-		}
-
-		aux.map = map;
-		err = tinfo->reg_info->check_target(prog, &aux);
+	if (tinfo->reg_info->attach_target) {
+		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
 		if (err) {
-			bpf_map_put_with_uref(map);
-			goto cleanup_link;
+			bpf_link_cleanup(&link_primer);
+			return err;
 		}
-
-		link->aux.map = map;
 	}
 
 	return bpf_link_settle(&link_primer);
-
-cleanup_link:
-	bpf_link_cleanup(&link_primer);
-	return err;
 }
 
 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index fbe1f557cb88..69ce577edf2f 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -98,12 +98,18 @@ static struct bpf_iter_reg bpf_map_reg_info = {
 	.seq_info		= &bpf_map_seq_info,
 };
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
 	u32 key_acc_size, value_acc_size, key_size, value_size;
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
 	bool is_percpu = false;
+	int err = -EINVAL;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +118,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else if (map->map_type != BPF_MAP_TYPE_HASH &&
 		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 		 map->map_type != BPF_MAP_TYPE_ARRAY)
-		return -EINVAL;
+		goto put_map;
 
 	key_acc_size = prog->aux->max_rdonly_access;
 	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +128,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
 	else
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 
-	if (key_acc_size > key_size || value_acc_size > value_size)
-		return -EACCES;
+	if (key_acc_size > key_size || value_acc_size > value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +151,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
 
 static const struct bpf_iter_reg bpf_map_elem_reg_info = {
 	.target			= "bpf_map_elem",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_map_elem, key),
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2f343ce15747..86299a292214 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
 	return -EINVAL;
 }
 
-#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
 static int link_create(union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d3377c90a291..fbd83244984f 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -1384,18 +1384,36 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
 	return 0;
 }
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-			      struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+			       union bpf_iter_link_info *linfo,
+			       struct bpf_iter_aux_info *aux)
 {
-	struct bpf_map *map = aux->map;
+	struct bpf_map *map;
+	int err = -EINVAL;
+
+	map = bpf_map_get_with_uref(linfo->map.map_fd);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
-		return -EINVAL;
+		goto put_map;
 
-	if (prog->aux->max_rdonly_access > map->value_size)
-		return -EACCES;
+	if (prog->aux->max_rdonly_access > map->value_size) {
+		err = -EACCES;
+		goto put_map;
+	}
 
+	aux->map = map;
 	return 0;
+
+put_map:
+	bpf_map_put_with_uref(map);
+	return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+	bpf_map_put_with_uref(aux->map);
 }
 
 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1432,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 
 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
 	.target			= "bpf_sk_storage_map",
-	.check_target		= bpf_iter_check_map,
-	.req_linfo		= BPF_ITER_LINK_MAP_FD,
+	.attach_target		= bpf_iter_attach_map,
+	.detach_target		= bpf_iter_detach_map,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
 		{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
-- 
2.24.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox