Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next 1/2] net: Add Transparent Ethernet Bridging GRO support.
From: Or Gerlitz @ 2014-12-31  9:19 UTC (permalink / raw)
  To: Jesse Gross; +Cc: David Miller, Linux Netdev List
In-Reply-To: <1419995416-29987-1-git-send-email-jesse@nicira.com>

On Wed, Dec 31, 2014 at 5:10 AM, Jesse Gross <jesse@nicira.com> wrote:
> Currently the only tunnel protocol that supports GRO with encapsulated
> Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer
> so that it can be used by other tunnel protocols such as GRE and Geneve.

Hi Jesse,

Thanks for taking care of that, I also had it coded under the
intention of adding GRO support for OVS's TEB based GRE, but didn't
make it to submit before your post... anyway, I would recommend that
you break this patch into two:

1. basic TEB GRO support
2. refactoring of the VXLAN GRO logic to use it

Or.

>
> Signed-off-by: Jesse Gross <jesse@nicira.com>
> ---
>  drivers/net/vxlan.c         | 53 +++-----------------------
>  include/linux/etherdevice.h |  4 ++
>  net/ethernet/eth.c          | 92 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 102 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 7fbd89f..2ab0922 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -549,10 +549,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
>  {
>         struct sk_buff *p, **pp = NULL;
>         struct vxlanhdr *vh, *vh2;
> -       struct ethhdr *eh, *eh2;
> -       unsigned int hlen, off_vx, off_eth;
> -       const struct packet_offload *ptype;
> -       __be16 type;
> +       unsigned int hlen, off_vx;
>         int flush = 1;
>
>         off_vx = skb_gro_offset(skb);
> @@ -563,17 +560,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
>                 if (unlikely(!vh))
>                         goto out;
>         }
> -       skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
> -       skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
> -
> -       off_eth = skb_gro_offset(skb);
> -       hlen = off_eth + sizeof(*eh);
> -       eh   = skb_gro_header_fast(skb, off_eth);
> -       if (skb_gro_header_hard(skb, hlen)) {
> -               eh = skb_gro_header_slow(skb, hlen, off_eth);
> -               if (unlikely(!eh))
> -                       goto out;
> -       }
>
>         flush = 0;
>
> @@ -582,28 +568,16 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
>                         continue;
>
>                 vh2 = (struct vxlanhdr *)(p->data + off_vx);
> -               eh2 = (struct ethhdr   *)(p->data + off_eth);
> -               if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
> +               if (vh->vx_vni != vh2->vx_vni) {
>                         NAPI_GRO_CB(p)->same_flow = 0;
>                         continue;
>                 }
>         }
>
> -       type = eh->h_proto;
> -
> -       rcu_read_lock();
> -       ptype = gro_find_receive_by_type(type);
> -       if (ptype == NULL) {
> -               flush = 1;
> -               goto out_unlock;
> -       }
> -
> -       skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
> -       skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
> -       pp = ptype->callbacks.gro_receive(head, skb);
> +       skb_gro_pull(skb, sizeof(struct vxlanhdr));
> +       skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
> +       pp = eth_gro_receive(head, skb);
>
> -out_unlock:
> -       rcu_read_unlock();
>  out:
>         NAPI_GRO_CB(skb)->flush |= flush;
>
> @@ -612,24 +586,9 @@ out:
>
>  static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
>  {
> -       struct ethhdr *eh;
> -       struct packet_offload *ptype;
> -       __be16 type;
> -       int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
> -       int err = -ENOSYS;
> -
>         udp_tunnel_gro_complete(skb, nhoff);
>
> -       eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
> -       type = eh->h_proto;
> -
> -       rcu_read_lock();
> -       ptype = gro_find_complete_by_type(type);
> -       if (ptype != NULL)
> -               err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
> -
> -       rcu_read_unlock();
> -       return err;
> +       return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
>  }
>
>  /* Notify netdevs that UDP port started listening */
> diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
> index 41c891d..1d869d1 100644
> --- a/include/linux/etherdevice.h
> +++ b/include/linux/etherdevice.h
> @@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
>  #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
>  #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
>
> +struct sk_buff **eth_gro_receive(struct sk_buff **head,
> +                                struct sk_buff *skb);
> +int eth_gro_complete(struct sk_buff *skb, int nhoff);
> +
>  /* Reserved Ethernet Addresses per IEEE 802.1Q */
>  static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
>  { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
> diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
> index 33a140e..238f38d 100644
> --- a/net/ethernet/eth.c
> +++ b/net/ethernet/eth.c
> @@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
>         return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
>  }
>  EXPORT_SYMBOL(sysfs_format_mac);
> +
> +struct sk_buff **eth_gro_receive(struct sk_buff **head,
> +                                struct sk_buff *skb)
> +{
> +       struct sk_buff *p, **pp = NULL;
> +       struct ethhdr *eh, *eh2;
> +       unsigned int hlen, off_eth;
> +       const struct packet_offload *ptype;
> +       __be16 type;
> +       int flush = 1;
> +
> +       off_eth = skb_gro_offset(skb);
> +       hlen = off_eth + sizeof(*eh);
> +       eh = skb_gro_header_fast(skb, off_eth);
> +       if (skb_gro_header_hard(skb, hlen)) {
> +               eh = skb_gro_header_slow(skb, hlen, off_eth);
> +               if (unlikely(!eh))
> +                       goto out;
> +       }
> +
> +       flush = 0;
> +
> +       for (p = *head; p; p = p->next) {
> +               if (!NAPI_GRO_CB(p)->same_flow)
> +                       continue;
> +
> +               eh2 = (struct ethhdr *)(p->data + off_eth);
> +               if (compare_ether_header(eh, eh2)) {
> +                       NAPI_GRO_CB(p)->same_flow = 0;
> +                       continue;
> +               }
> +       }
> +
> +       type = eh->h_proto;
> +
> +       rcu_read_lock();
> +       ptype = gro_find_receive_by_type(type);
> +       if (ptype == NULL) {
> +               flush = 1;
> +               goto out_unlock;
> +       }
> +
> +       skb_gro_pull(skb, sizeof(*eh));
> +       skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
> +       pp = ptype->callbacks.gro_receive(head, skb);
> +
> +out_unlock:
> +       rcu_read_unlock();
> +out:
> +       NAPI_GRO_CB(skb)->flush |= flush;
> +
> +       return pp;
> +}
> +EXPORT_SYMBOL(eth_gro_receive);
> +
> +int eth_gro_complete(struct sk_buff *skb, int nhoff)
> +{
> +       struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
> +       __be16 type = eh->h_proto;
> +       struct packet_offload *ptype;
> +       int err = -ENOSYS;
> +
> +       if (skb->encapsulation)
> +               skb_set_inner_mac_header(skb, nhoff);
> +
> +       rcu_read_lock();
> +       ptype = gro_find_complete_by_type(type);
> +       if (ptype != NULL)
> +               err = ptype->callbacks.gro_complete(skb, nhoff +
> +                                                   sizeof(struct ethhdr));
> +
> +       rcu_read_unlock();
> +       return err;
> +}
> +EXPORT_SYMBOL(eth_gro_complete);
> +
> +static struct packet_offload eth_packet_offload __read_mostly = {
> +       .type = cpu_to_be16(ETH_P_TEB),
> +       .callbacks = {
> +               .gro_receive = eth_gro_receive,
> +               .gro_complete = eth_gro_complete,
> +       },
> +};
> +
> +static int __init eth_offload_init(void)
> +{
> +       dev_add_offload(&eth_packet_offload);
> +
> +       return 0;
> +}
> +
> +fs_initcall(eth_offload_init);
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] wait_for_completion_timeout does not return < 0
From: Nicholas Mc Guire @ 2014-12-31  9:04 UTC (permalink / raw)
  To: Inaky Perez-Gonzalez; +Cc: linux-wimax, netdev, linux-kernel, Nicholas Mc Guire

This is only removing the comment which is misleading as
wait_for_completion_timeout does not return < 0 thus there
never is anything to be passed on.

patch is against linux-next 3.19.0-rc1 -next-20141226

Signed-off-by: Nicholas Mc Guire <der.herr@hofr.at>
---
 drivers/net/wimax/i2400m/driver.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 9c78090..0a6384e 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -197,7 +197,6 @@ int i2400m_op_reset(struct wimax_dev *wimax_dev)
 		result = -ETIMEDOUT;
 	else if (result > 0)
 		result = ctx.result;
-	/* if result < 0, pass it on */
 	mutex_lock(&i2400m->init_mutex);
 	i2400m->reset_ctx = NULL;
 	mutex_unlock(&i2400m->init_mutex);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] brcmfmac: avoid duplicated suspend/resume operation
From: Fu, Zhonghui @ 2014-12-31  8:20 UTC (permalink / raw)
  To: brudley, Arend van Spriel, Franky Lin, meuleman, kvalo, linville,
	pieterpg, hdegoede, wens, linux-wireless, brcm80211-dev-list,
	netdev, linux-kernel

>From e34419970a07bfcd365f9c66bdfa552188a0cd26 Mon Sep 17 00:00:00 2001
From: Zhonghui Fu <zhonghui.fu@linux.intel.com>
Date: Mon, 29 Dec 2014 21:25:31 +0800
Subject: [PATCH] brcmfmac: avoid duplicated suspend/resume operation

WiFi chip has 2 SDIO functions, and PM core will trigger
twice suspend/resume operations for one WiFi chip to do
the same things. This patch avoid this case.

Signed-off-by: Zhonghui Fu <zhonghui.fu@linux.intel.com>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c |   19 +++++++++++++++++--
 1 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index 3c06e93..eee7818 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1139,11 +1139,18 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
 static int brcmf_ops_sdio_suspend(struct device *dev)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+	struct brcmf_sdio_dev *sdiodev;
 	mmc_pm_flag_t sdio_flags;
+	struct sdio_func *func = dev_to_sdio_func(dev);
 
 	brcmf_dbg(SDIO, "Enter\n");
 
+	if (func->num == 2) {
+		return 0;
+	}
+
+	sdiodev = bus_if->bus_priv.sdio;
+
 	atomic_set(&sdiodev->suspend, true);
 
 	if (sdiodev->wowl_enabled) {
@@ -1164,9 +1171,17 @@ static int brcmf_ops_sdio_suspend(struct device *dev)
 static int brcmf_ops_sdio_resume(struct device *dev)
 {
 	struct brcmf_bus *bus_if = dev_get_drvdata(dev);
-	struct brcmf_sdio_dev *sdiodev = bus_if->bus_priv.sdio;
+	struct brcmf_sdio_dev *sdiodev;
+	struct sdio_func *func = dev_to_sdio_func(dev);
 
 	brcmf_dbg(SDIO, "Enter\n");
+
+	if (func->num == 2) {
+		return 0;
+	}
+
+	sdiodev = bus_if->bus_priv.sdio;
+
 	if (sdiodev->pdata->oob_irq_supported)
 		disable_irq_wake(sdiodev->pdata->oob_irq_nr);
 	brcmf_sdio_wd_timer(sdiodev->bus, BRCMF_WD_POLL_MS);
-- 1.7.1

^ permalink raw reply related

* Re: [Question] What's the noop_qdisc introduced for in the kernel?
From: Dennis Chen @ 2014-12-31  7:31 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev
In-Reply-To: <CAHA+R7O_ypxh4DjQ1eeKZyGFpYbKQAgKkEjrmk400Ud+zLeEow@mail.gmail.com>

On Wed, Dec 31, 2014 at 4:15 AM, Cong Wang <cwang@twopensource.com> wrote:
> On Tue, Dec 30, 2014 at 1:23 AM, Dennis Chen <kernel.org.gnu@gmail.com> wrote:
>> After google and the code reading, seems this Qdisc instance is only
>> used for the initialization purpose, I can't find the reason that this
>> object introduced in the kernel. Does anybody know what the historical
>> reason is for this invention? the purpose or the benefit for this
>> Qdisc object?
>>
>
> Not just for initialization, it is kinda a null qdisc when
> the previous qdisc gets removed:
>
>         /* ... and graft new one */
>         if (qdisc == NULL)
>                 qdisc = &noop_qdisc;
>
> or the entire device is not activated yet. It guarantees no
> packets can be sent out via this qdisc.

Thanks Cong, got it. The device will use the noop_qdisc when it's not
activated yet...

BTW, do you have some methods to recommend if I want to find the
initial codes that introduce the noop_qdisc first in the kernel? I
tried to search the LKML archives and google, but seems it's not easy
to do that

-- 
Den

^ permalink raw reply

* pull request: bluetooth-next 2014-12-31
From: Johan Hedberg @ 2014-12-31  7:25 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-bluetooth

[-- Attachment #1: Type: text/plain, Size: 6568 bytes --]

Hi Dave,

Here's the first batch of bluetooth patches for 3.20.

 - Cleanups & fixes to ieee802154  drivers
 - Fix synchronization of mgmt commands with respective HCI commands
 - Add self-tests for LE pairing crypto functionality
 - Remove 'BlueFritz!' specific handling from core using a new quirk flag
 - Public address configuration support for ath3012
 - Refactor debugfs support into a dedicated file
 - Initial support for LE Data Length Extension feature from Bluetooth 4.2

Please let me know if there are any issues pulling. Thanks.

Johan

---
The following changes since commit 86c8fc4bbe14b8950e62d379bb57722427ad3d67:

  Merge tag 'mac80211-for-davem-2014-12-18' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211 (2014-12-18 15:33:49 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git for-upstream

for you to fetch changes up to e64b4fb66cc428cef0a62ea899b671c1aad05f3a:

  Bluetooth: Add timing information to ECDH test case runs (2014-12-30 10:32:11 +0200)

----------------------------------------------------------------
Alexander Aring (11):
      nl802154: introduce cca mode enums
      ieee802154: rework cca setting
      nl802154: introduce support for cca settings
      at86rf230: add reset state cca handling
      at86rf230: remove if branch
      at86rf230: make at86rf230_async_error inline
      at86rf230: fix context pointer handling
      at86rf230: remove unnecessary assign
      at86rf230: cleanup check on trac status
      mac802154: iface: check concurrent ifaces
      ieee802154: iface: move multiple node type check

Andrey Yurovsky (2):
      at86rf230: fix register read for part version
      at86rf230: remove version check for AT86RF212

Johan Hedberg (12):
      Bluetooth: Split hci_update_page_scan into two functions
      Bluetooth: Split hci_request helpers to hci_request.[ch]
      Bluetooth: Add hci_request support for hci_update_background_scan
      Bluetooth: Fix Remove Device to wait for HCI before sending cmd_complete
      Bluetooth: Fix Add Device to wait for HCI before sending cmd_complete
      Bluetooth: Add return parameter to cmd_complete callbacks
      Bluetooth: Move hci_update_page_scan to hci_request.c
      Bluetooth: Fix const declarations for smp_f5 and smp_f6
      Bluetooth: Add support for ECDH test cases
      Bluetooth: Add skeleton for SMP self-tests
      Bluetooth: Add legacy SMP tests
      Bluetooth: Add LE Secure Connections tests for SMP

Jukka Rissanen (1):
      Bluetooth: 6lowpan: Add IPSP PSM value

Marcel Holtmann (17):
      Bluetooth: Support static address when BR/EDR has been disabled
      Bluetooth: Add skeleton functions for debugfs creation
      Bluetooth: Move common debugfs file creation into hci_debugfs.c
      Bluetooth: Move BR/EDR debugfs file creation into hci_debugfs.c
      Bluetooth: Move LE debugfs file creation into hci_debugfs.c
      Bluetooth: Add structures for LE Data Length Extension feature
      Bluetooth: Enable basics for LE Data Length Extension feature
      Bluetooth: Store default and maximum LE data length settings
      Bluetooth: Create debugfs directory for each connection handle
      Bluetooth: Remove duplicate constant for RFCOMM PSM
      Bluetooth: Introduce HCI_QUIRK_BROKEN_LOCAL_COMMANDS constant
      Bluetooth: bfusb: Set the HCI_QUIRK_BROKEN_LOCAL_COMMANDS quirk
      Bluetooth: btusb: Set the HCI_QUIRK_BROKEN_LOCAL_COMMANDS quirk
      Bluetooth: Remove BlueFritz! specific check from initialization
      Bluetooth: Add support for self testing framework
      Bluetooth: Add timing information to SMP test case runs
      Bluetooth: Add timing information to ECDH test case runs

Stefan Schmidt (6):
      ieee802154/at86rf230: Remove unneeded blank lines
      ieee802154/at86rf230: Align to opening parenthesis
      ieee802154/at86rf230: Fix typo unkown -> unknown
      ieee802154/cc2520: Remove extra blank lines
      ieee802154/mrf24j40: Fix typo begining -> beginning
      ieee802154/mrf24j40: Fix alignment of parenthesis

Toshi Kikuchi (1):
      Bluetooth: btusb: support public address configuration for ath3012

Varka Bhadram (3):
      cc2520: use devm_kzalloc(.., sizeof(*pointer), ..) pattern
      cc2520: remove 'ret' goto label
      cc2520: fix zero perm_extended_addr address

Wolfram Sang (1):
      net: ieee802154: don't use devm_pinctrl_get_select_default() in probe

 drivers/bluetooth/bfusb.c          |    2 +
 drivers/bluetooth/btusb.c          |   34 +-
 drivers/net/ieee802154/at86rf230.c |   80 +-
 drivers/net/ieee802154/cc2520.c    |   27 +-
 drivers/net/ieee802154/mrf24j40.c  |    6 +-
 include/net/bluetooth/hci.h        |   53 ++
 include/net/bluetooth/hci_core.h   |   35 +-
 include/net/bluetooth/l2cap.h      |    1 +
 include/net/bluetooth/rfcomm.h     |    2 -
 include/net/cfg802154.h            |   10 +-
 include/net/ieee802154_netdev.h    |    4 +-
 include/net/mac802154.h            |    5 +-
 include/net/nl802154.h             |   45 +-
 net/bluetooth/Kconfig              |   27 +
 net/bluetooth/Makefile             |    4 +-
 net/bluetooth/af_bluetooth.c       |    6 +
 net/bluetooth/hci_conn.c           |    4 +
 net/bluetooth/hci_core.c           | 1640 +----------------------------------
 net/bluetooth/hci_debugfs.c        | 1076 +++++++++++++++++++++++
 net/bluetooth/hci_debugfs.h        |   26 +
 net/bluetooth/hci_event.c          |   71 +-
 net/bluetooth/hci_request.c        |  555 ++++++++++++
 net/bluetooth/hci_request.h        |   54 ++
 net/bluetooth/mgmt.c               |  265 ++++--
 net/bluetooth/rfcomm/core.c        |    4 +-
 net/bluetooth/selftest.c           |  244 ++++++
 net/bluetooth/selftest.h           |   45 +
 net/bluetooth/smp.c                |  335 ++++++-
 net/bluetooth/smp.h                |   13 +
 net/ieee802154/nl-mac.c            |    4 +-
 net/ieee802154/nl802154.c          |   46 +-
 net/ieee802154/rdev-ops.h          |    7 +
 net/ieee802154/sysfs.c             |    2 +-
 net/mac802154/cfg.c                |   21 +
 net/mac802154/driver-ops.h         |    5 +-
 net/mac802154/iface.c              |  100 ++-
 net/mac802154/mac_cmd.c            |    6 +-
 37 files changed, 3084 insertions(+), 1780 deletions(-)
 create mode 100644 net/bluetooth/hci_debugfs.c
 create mode 100644 net/bluetooth/hci_debugfs.h
 create mode 100644 net/bluetooth/hci_request.c
 create mode 100644 net/bluetooth/hci_request.h
 create mode 100644 net/bluetooth/selftest.c
 create mode 100644 net/bluetooth/selftest.h


[-- Attachment #2: Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Possible BUG in ipv6_find_hdr function for fragmented packets
From: Rahul Sharma @ 2014-12-31  7:03 UTC (permalink / raw)
  To: netdev

Hello netdev,

I have observed a problem when I added an AH header before protocol
header (OSPFv3) while implementing authentication support for OSPFv3.

Problem: Fragmented packets which include authentication header don't
get reassembled in the kernel. This was because ipv6_find_hdr returns
ENOENT for the non-first fragment since AH is an extension header.

Firstly, this comment  "Note that non-1st fragment is special case
that "the protocol number of last header" is "next header" field in
Fragment header" ('last header' doesn't include AH or other extension
headers) before ipv6_find_hdr looks incorrect as per the description
of the fragmentation process in RFC2460. The rfc clearly states that
next header value in the fragments will be the first header of the
Fragmentable part of the original packet which could be AH (51) as in
our case.

This code looks like a problem:
if (_frag_off) {
253                                 if (target < 0 &&
254                                     ((!ipv6_ext_hdr(hp->nexthdr)) ||
255                                      hp->nexthdr == NEXTHDR_NONE)) {
256                                         if (fragoff)
257                                                 *fragoff = _frag_off;
258                                         return hp->nexthdr;
259                                 }
260                                 return -ENOENT;
261                         }

For non-first fragments, the 'next header' in the fragment header
would *always* be AUTH (or whatever extension header is the first
header in first fragment). But the above code will keep on returning
ENOENT for the non-first fragment in such cases.

Solution: I suggest we should get away with this check
((!ipv6_ext_hdr(hp->nexthdr)) ||hp->nexthdr == NEXTHDR_NONE))  and
simply return hp->nexthdr if the _frag_off is non zero. I tested it on
my machine and it works. Adding an special case for NEXTHDR_AUTH also
works for me.

Thanks,
Rahul

^ permalink raw reply

* Re: Fw: [Bug 82471] New: net/core/dev.c skb_war_bad_offload
From: Richard Laager @ 2014-12-31  6:51 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1418805852.5277.25.camel@watermelon.coderich.net>

[-- Attachment #1: Type: text/plain, Size: 703 bytes --]

I was able to do some bisection using Ubuntu-packaged kernels.

The kernel from Precise (3.2.0-74.109) works (on a Trusty system).

On 3.5.0-51-generic (3.5.0-51.76) from Quantal, I get a different kind
of brokenness. I don't get a stack dump, but I get this kernel message
printed:
skbuff: bond0.7: received packets cannot be forwarded while LRO is enabled

The kernels from Raring (3.8.0-35.50) and Saucy (3.11.0-26.45) are
broken in the same way as Trusty.

Does that skbuff error message provide any clue? If not, it seems that
I'll have to bisect from working-on-3.2 to broken-on-3.5, then revert
that offending commit in each test as I bisect again from 3.5 to 3.8?

-- 
Richard

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* [PATCH net-next] net: More vlan tests before registering netdevice
From: Yuval Mintz @ 2014-12-31  6:35 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yuval Mintz

When register_netdevice() is called, netdevice's vlan filtering feature
and supplied callbacks are checked to see the vlan implementation is
not buggy.
This adds an additional test - see that the vlan_features were filled
correctly, as the vlan devices inherits those as its own features;
Incorrect values set there would later prevent the vlan interface from being
registered itself [as it doesn't implement the filtering ndos].

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
---
Hi Dave,

Not sure why take such a defensive approach regarding this feature.
Perhaps it would have been better to simply remove these checks altogether.

Cheers,
Yuval
---

 net/core/dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 3f191da..8a663b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6253,10 +6253,11 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
-	if (((dev->hw_features | dev->features) &
-	     NETIF_F_HW_VLAN_CTAG_FILTER) &&
-	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
-	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
+	if ((((dev->hw_features | dev->features) &
+	      NETIF_F_HW_VLAN_CTAG_FILTER) &&
+	     (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
+	      !dev->netdev_ops->ndo_vlan_rx_kill_vid)) ||
+	    (dev->vlan_features & NETIF_F_HW_VLAN_CTAG_FILTER) {
 		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
 		ret = -EINVAL;
 		goto err_uninit;
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH V3 for 3.19]  rtlwifi: Fix error when accessing unmapped memory in skb
From: Eric Biggers @ 2014-12-31  5:07 UTC (permalink / raw)
  To: Larry Finger; +Cc: kvalo, linux-wireless, netdev, Stable
In-Reply-To: <1419996787-17395-1-git-send-email-Larry.Finger@lwfinger.net>

On Tue, Dec 30, 2014 at 09:33:07PM -0600, Larry Finger wrote:
> v3 - Unmap skb before trying to allocate a new one so as to not leak mapping.

Looks good to me, although I'm not sure about the handling of DMA mapping errors
(perhaps that's something that drivers typically don't even try to handle?).
Anyway, the skb allocation issue appears to be resolved now.  I am running your
patch with an extra hack to inject some occasional skb allocation failures, and
I haven't noticed any problems except dropped packets.

Eric

^ permalink raw reply

* [PATCH V3 for 3.19]  rtlwifi: Fix error when accessing unmapped memory in skb
From: Larry Finger @ 2014-12-31  3:33 UTC (permalink / raw)
  To: kvalo; +Cc: linux-wireless, Larry Finger, netdev, Stable, Eric Biggers

These drivers use 9100-byte receive buffers, thus allocating an skb requires
an O(3) memory allocation. Under heavy memory loads and fragmentation, such
a request can fail. Previous versions of the driver have dropped the packet
and reused the old buffer; however, the new version introduced a bug in that
it released the old buffer before trying to allocate a new one. The previous
method is implemented here. The skb is unmapped before any attempt is made to
allocate another.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org>  [v3.18]
Reported-by: Eric Biggers <ebiggers3@gmail.com>
Cc: Eric Biggers <ebiggers3@gmail.com>
---

V2 - Fixes an error in the logic of V1. Realtek is working on a change to
     the RX buffer allocation, but that is likely to be too invasive for
     a fix to -rc or stable. In the meantime, this will help.
v3 - Unmap skb before trying to allocate a new one so as to not leak mapping.

Larry
---

 drivers/net/wireless/rtlwifi/pci.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

Index: wireless-drivers/drivers/net/wireless/rtlwifi/pci.c
===================================================================
--- wireless-drivers.orig/drivers/net/wireless/rtlwifi/pci.c
+++ wireless-drivers/drivers/net/wireless/rtlwifi/pci.c
@@ -666,7 +666,8 @@ tx_status_ok:
 }
 
 static int _rtl_pci_init_one_rxdesc(struct ieee80211_hw *hw,
-				    u8 *entry, int rxring_idx, int desc_idx)
+				    struct sk_buff *new_skb, u8 *entry,
+				    int rxring_idx, int desc_idx)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
@@ -674,11 +675,15 @@ static int _rtl_pci_init_one_rxdesc(stru
 	u8 tmp_one = 1;
 	struct sk_buff *skb;
 
+	if (likely(new_skb)) {
+		skb = new_skb;
+		goto remap;
+	}
 	skb = dev_alloc_skb(rtlpci->rxbuffersize);
 	if (!skb)
 		return 0;
-	rtlpci->rx_ring[rxring_idx].rx_buf[desc_idx] = skb;
 
+remap:
 	/* just set skb->cb to mapping addr for pci_unmap_single use */
 	*((dma_addr_t *)skb->cb) =
 		pci_map_single(rtlpci->pdev, skb_tail_pointer(skb),
@@ -686,6 +691,7 @@ static int _rtl_pci_init_one_rxdesc(stru
 	bufferaddress = *((dma_addr_t *)skb->cb);
 	if (pci_dma_mapping_error(rtlpci->pdev, bufferaddress))
 		return 0;
+	rtlpci->rx_ring[rxring_idx].rx_buf[desc_idx] = skb;
 	if (rtlpriv->use_new_trx_flow) {
 		rtlpriv->cfg->ops->set_desc(hw, (u8 *)entry, false,
 					    HW_DESC_RX_PREPARE,
@@ -781,6 +787,7 @@ static void _rtl_pci_rx_interrupt(struct
 		/*rx pkt */
 		struct sk_buff *skb = rtlpci->rx_ring[rxring_idx].rx_buf[
 				      rtlpci->rx_ring[rxring_idx].idx];
+		struct sk_buff *new_skb;
 
 		if (rtlpriv->use_new_trx_flow) {
 			rx_remained_cnt =
@@ -807,6 +814,13 @@ static void _rtl_pci_rx_interrupt(struct
 		pci_unmap_single(rtlpci->pdev, *((dma_addr_t *)skb->cb),
 				 rtlpci->rxbuffersize, PCI_DMA_FROMDEVICE);
 
+		/* get a new skb - if fail, old one will be reused */
+		new_skb = dev_alloc_skb(rtlpci->rxbuffersize);
+		if (unlikely(!new_skb)) {
+			pr_err("Allocation of new skb failed in %s\n",
+			       __func__);
+			goto no_new;
+		}
 		if (rtlpriv->use_new_trx_flow) {
 			buffer_desc =
 			  &rtlpci->rx_ring[rxring_idx].buffer_desc
@@ -911,14 +925,16 @@ static void _rtl_pci_rx_interrupt(struct
 			schedule_work(&rtlpriv->works.lps_change_work);
 		}
 end:
+		skb = new_skb;
+no_new:
 		if (rtlpriv->use_new_trx_flow) {
-			_rtl_pci_init_one_rxdesc(hw, (u8 *)buffer_desc,
+			_rtl_pci_init_one_rxdesc(hw, skb, (u8 *)buffer_desc,
 						 rxring_idx,
-					       rtlpci->rx_ring[rxring_idx].idx);
+						 rtlpci->rx_ring[rxring_idx].idx);
 		} else {
-			_rtl_pci_init_one_rxdesc(hw, (u8 *)pdesc, rxring_idx,
+			_rtl_pci_init_one_rxdesc(hw, skb, (u8 *)pdesc,
+						 rxring_idx,
 						 rtlpci->rx_ring[rxring_idx].idx);
-
 			if (rtlpci->rx_ring[rxring_idx].idx ==
 			    rtlpci->rxringcount - 1)
 				rtlpriv->cfg->ops->set_desc(hw, (u8 *)pdesc,
@@ -1307,7 +1323,7 @@ static int _rtl_pci_init_rx_ring(struct
 		rtlpci->rx_ring[rxring_idx].idx = 0;
 		for (i = 0; i < rtlpci->rxringcount; i++) {
 			entry = &rtlpci->rx_ring[rxring_idx].buffer_desc[i];
-			if (!_rtl_pci_init_one_rxdesc(hw, (u8 *)entry,
+			if (!_rtl_pci_init_one_rxdesc(hw, NULL, (u8 *)entry,
 						      rxring_idx, i))
 				return -ENOMEM;
 		}
@@ -1332,7 +1348,7 @@ static int _rtl_pci_init_rx_ring(struct
 
 		for (i = 0; i < rtlpci->rxringcount; i++) {
 			entry = &rtlpci->rx_ring[rxring_idx].desc[i];
-			if (!_rtl_pci_init_one_rxdesc(hw, (u8 *)entry,
+			if (!_rtl_pci_init_one_rxdesc(hw, NULL, (u8 *)entry,
 						      rxring_idx, i))
 				return -ENOMEM;
 		}

^ permalink raw reply

* Re: [PATCH 23/23 V2 for 3.19]  rtlwifi: Fix error when accessing unmapped memory in skb
From: Larry Finger @ 2014-12-31  3:31 UTC (permalink / raw)
  To: Eric Biggers; +Cc: kvalo, linux-wireless, netdev, Stable
In-Reply-To: <20141231004947.GA2007@zzz>

On 12/30/2014 06:49 PM, Eric Biggers wrote:
> On Sat, Dec 27, 2014 at 02:17:37PM -0600, Larry Finger wrote:
>> These drivers use 9100-byte receive buffers, thus allocating an skb requires
>> an O(3) memory allocation. Under heavy memory loads and fragmentation, such
>> a request can fail. Previous versions of the driver have dropped the packet
>> and reused the old buffer; however, the new version introduced a bug in that
>> it released the old buffer before trying to allocate a new one. The previous
>> method is implemented here.
>
> It looks like in the out-of-memory path, pci_map_single() gets called while the
> skb is still mapped.  Won't this leak the IOMMU mapping?

Good catch. I do not know much about leaking the IOMMU mapping; however it is 
easy to do the unmapping before trying to allocate a new skb.

Thanks,

Larry

^ permalink raw reply

* [PATCH net-next 2/2] geneve: Add Geneve GRO support
From: Jesse Gross @ 2014-12-31  3:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Joe Stringer
In-Reply-To: <1419995416-29987-1-git-send-email-jesse@nicira.com>

From: Joe Stringer <joestringer@nicira.com>

This results in an approximately 30% increase in throughput
when handling encapsulated bulk traffic.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/ipv4/geneve.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 394a200..19e256e 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -149,6 +149,99 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
+static int geneve_hlen(struct genevehdr *gh)
+{
+	return sizeof(*gh) + gh->opt_len * 4;
+}
+
+static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
+					   struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct genevehdr *gh, *gh2;
+	unsigned int hlen, gh_len, off_gnv;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_gnv = skb_gro_offset(skb);
+	hlen = off_gnv + sizeof(*gh);
+	gh = skb_gro_header_fast(skb, off_gnv);
+	if (skb_gro_header_hard(skb, hlen)) {
+		gh = skb_gro_header_slow(skb, hlen, off_gnv);
+		if (unlikely(!gh))
+			goto out;
+	}
+
+	if (gh->ver != GENEVE_VER || gh->oam)
+		goto out;
+	gh_len = geneve_hlen(gh);
+
+	hlen = off_gnv + gh_len;
+	if (skb_gro_header_hard(skb, hlen)) {
+		gh = skb_gro_header_slow(skb, hlen, off_gnv);
+		if (unlikely(!gh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		gh2 = (struct genevehdr *)(p->data + off_gnv);
+		if (gh->opt_len != gh2->opt_len ||
+		    memcmp(gh, gh2, gh_len)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = gh->proto_type;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, gh_len);
+	skb_gro_postpull_rcsum(skb, gh, gh_len);
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct genevehdr *gh;
+	struct packet_offload *ptype;
+	__be16 type;
+	int gh_len;
+	int err = -ENOSYS;
+
+	udp_tunnel_gro_complete(skb, nhoff);
+
+	gh = (struct genevehdr *)(skb->data + nhoff);
+	gh_len = geneve_hlen(gh);
+	type = gh->proto_type;
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
+
+	rcu_read_unlock();
+	return err;
+}
+
 static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 {
 	struct sock *sk = gs->sock->sk;
@@ -278,8 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 
 	/* Initialize the geneve udp offloads structure */
 	gs->udp_offloads.port = port;
-	gs->udp_offloads.callbacks.gro_receive = NULL;
-	gs->udp_offloads.callbacks.gro_complete = NULL;
+	gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
+	gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
 
 	spin_lock(&gn->sock_lock);
 	hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 1/2] net: Add Transparent Ethernet Bridging GRO support.
From: Jesse Gross @ 2014-12-31  3:10 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Currently the only tunnel protocol that supports GRO with encapsulated
Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer
so that it can be used by other tunnel protocols such as GRE and Geneve.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 drivers/net/vxlan.c         | 53 +++-----------------------
 include/linux/etherdevice.h |  4 ++
 net/ethernet/eth.c          | 92 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7fbd89f..2ab0922 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -549,10 +549,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_vx, off_eth;
-	const struct packet_offload *ptype;
-	__be16 type;
+	unsigned int hlen, off_vx;
 	int flush = 1;
 
 	off_vx = skb_gro_offset(skb);
@@ -563,17 +560,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 		if (unlikely(!vh))
 			goto out;
 	}
-	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
-
-	off_eth = skb_gro_offset(skb);
-	hlen = off_eth + sizeof(*eh);
-	eh   = skb_gro_header_fast(skb, off_eth);
-	if (skb_gro_header_hard(skb, hlen)) {
-		eh = skb_gro_header_slow(skb, hlen, off_eth);
-		if (unlikely(!eh))
-			goto out;
-	}
 
 	flush = 0;
 
@@ -582,28 +568,16 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		eh2 = (struct ethhdr   *)(p->data + off_eth);
-		if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
+		if (vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
 	}
 
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (ptype == NULL) {
-		flush = 1;
-		goto out_unlock;
-	}
-
-	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
-	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = ptype->callbacks.gro_receive(head, skb);
+	skb_gro_pull(skb, sizeof(struct vxlanhdr));
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+	pp = eth_gro_receive(head, skb);
 
-out_unlock:
-	rcu_read_unlock();
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
 
@@ -612,24 +586,9 @@ out:
 
 static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	struct ethhdr *eh;
-	struct packet_offload *ptype;
-	__be16 type;
-	int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
-	int err = -ENOSYS;
-
 	udp_tunnel_gro_complete(skb, nhoff);
 
-	eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
-
-	rcu_read_unlock();
-	return err;
+	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
 /* Notify netdevs that UDP port started listening */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 41c891d..1d869d1 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb);
+int eth_gro_complete(struct sk_buff *skb, int nhoff);
+
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
 static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 33a140e..238f38d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 	return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
 }
 EXPORT_SYMBOL(sysfs_format_mac);
+
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct ethhdr *eh, *eh2;
+	unsigned int hlen, off_eth;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_eth = skb_gro_offset(skb);
+	hlen = off_eth + sizeof(*eh);
+	eh = skb_gro_header_fast(skb, off_eth);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eh = skb_gro_header_slow(skb, hlen, off_eth);
+		if (unlikely(!eh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		eh2 = (struct ethhdr *)(p->data + off_eth);
+		if (compare_ether_header(eh, eh2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = eh->h_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, sizeof(*eh));
+	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+EXPORT_SYMBOL(eth_gro_receive);
+
+int eth_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
+	__be16 type = eh->h_proto;
+	struct packet_offload *ptype;
+	int err = -ENOSYS;
+
+	if (skb->encapsulation)
+		skb_set_inner_mac_header(skb, nhoff);
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff +
+						    sizeof(struct ethhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(eth_gro_complete);
+
+static struct packet_offload eth_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_TEB),
+	.callbacks = {
+		.gro_receive = eth_gro_receive,
+		.gro_complete = eth_gro_complete,
+	},
+};
+
+static int __init eth_offload_init(void)
+{
+	dev_add_offload(&eth_packet_offload);
+
+	return 0;
+}
+
+fs_initcall(eth_offload_init);
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 23/23 V2 for 3.19]  rtlwifi: Fix error when accessing unmapped memory in skb
From: Eric Biggers @ 2014-12-31  0:49 UTC (permalink / raw)
  To: Larry Finger; +Cc: kvalo, linux-wireless, netdev, Stable
In-Reply-To: <1419711457-21469-1-git-send-email-Larry.Finger@lwfinger.net>

On Sat, Dec 27, 2014 at 02:17:37PM -0600, Larry Finger wrote:
> These drivers use 9100-byte receive buffers, thus allocating an skb requires
> an O(3) memory allocation. Under heavy memory loads and fragmentation, such
> a request can fail. Previous versions of the driver have dropped the packet
> and reused the old buffer; however, the new version introduced a bug in that
> it released the old buffer before trying to allocate a new one. The previous
> method is implemented here.

It looks like in the out-of-memory path, pci_map_single() gets called while the
skb is still mapped.  Won't this leak the IOMMU mapping?

^ permalink raw reply

* net-next is OPEN
From: David Miller @ 2014-12-31  0:06 UTC (permalink / raw)
  To: netdev; +Cc: linux-wireless, netfilter-devel


Just FYI...

^ permalink raw reply

* Re: [PATCH net-next 2/2] net: gianfar: add missing __iomem annotation
From: David Miller @ 2014-12-31  0:05 UTC (permalink / raw)
  To: haokexin; +Cc: netdev, claudiu.manoil
In-Reply-To: <1419401145-8967-2-git-send-email-haokexin@gmail.com>

From: Kevin Hao <haokexin@gmail.com>
Date: Wed, 24 Dec 2014 14:05:45 +0800

> Fix the following spare warning:
> drivers/net/ethernet/freescale/gianfar.c:3521:60: warning: incorrect type in argument 1 (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:3521:60:    expected unsigned int [noderef] <asn:2>*addr
> drivers/net/ethernet/freescale/gianfar.c:3521:60:    got unsigned int [usertype] *rfbptr
> drivers/net/ethernet/freescale/gianfar.c:205:16: warning: incorrect type in assignment (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:205:16:    expected unsigned int [usertype] *rfbptr
> drivers/net/ethernet/freescale/gianfar.c:205:16:    got unsigned int [noderef] <asn:2>*<noident>
> drivers/net/ethernet/freescale/gianfar.c:2918:44: warning: incorrect type in argument 1 (different address spaces)
> drivers/net/ethernet/freescale/gianfar.c:2918:44:    expected unsigned int [noderef] <asn:2>*addr
> drivers/net/ethernet/freescale/gianfar.c:2918:44:    got unsigned int [usertype] *rfbptr
> 
> Signed-off-by: Kevin Hao <haokexin@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 1/2] net: gianfar: mark the local functions static
From: David Miller @ 2014-12-31  0:05 UTC (permalink / raw)
  To: haokexin; +Cc: netdev, claudiu.manoil
In-Reply-To: <1419401145-8967-1-git-send-email-haokexin@gmail.com>

From: Kevin Hao <haokexin@gmail.com>
Date: Wed, 24 Dec 2014 14:05:44 +0800

> Signed-off-by: Kevin Hao <haokexin@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next V2] virtio-net: don't do header check for dodgy gso packets
From: David Miller @ 2014-12-30 23:53 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, virtualization, linux-kernel, mst
In-Reply-To: <1419390232-14906-1-git-send-email-jasowang@redhat.com>

From: Jason Wang <jasowang@redhat.com>
Date: Wed, 24 Dec 2014 11:03:52 +0800

> There's no need to do header check for virtio-net since:
> 
> - Host sets dodgy for all gso packets from guest and check the header.
> - Host should be prepared for all kinds of evil packets from guest, since
>   malicious guest can send any kinds of packet.
> 
> So this patch sets NETIF_F_GSO_ROBUST for virtio-net to skip the check.
> 
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Changes from V1:
> - typo fixes

Applied.

^ permalink raw reply

* Re: [PATCH 4/27] atheros: atlx: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: jcliburn, kernel-janitors, chris.snook, netdev, linux-kernel
In-Reply-To: <1419604558-29743-4-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:34 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 3/27] atl1e: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: jcliburn, kernel-janitors, chris.snook, netdev, linux-kernel
In-Reply-To: <1419604558-29743-5-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:35 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 11/27] ksz884x: Use setup_timer
From: David Miller @ 2014-12-30 23:35 UTC (permalink / raw)
  To: Julia.Lawall; +Cc: netdev, kernel-janitors, linux-kernel
In-Reply-To: <1419604558-29743-12-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:42 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH 15/27] net: sxgbe: Use setup_timer
From: David Miller @ 2014-12-30 23:34 UTC (permalink / raw)
  To: Julia.Lawall
  Cc: bh74.an, kernel-janitors, ks.giri, vipul.pandya, netdev,
	linux-kernel
In-Reply-To: <1419604558-29743-16-git-send-email-Julia.Lawall@lip6.fr>

From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Fri, 26 Dec 2014 15:35:46 +0100

> Convert a call to init_timer and accompanying intializations of
> the timer's data and function fields to a call to setup_timer.
> 
> A simplified version of the semantic match that fixes this problem is as
> follows: (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> expression t,f,d;
> @@
> 
> -init_timer(&t);
> +setup_timer(&t,f,d);
> -t.function = f;
> -t.data = d;
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 00/11] Time Counter fixes and improvements
From: David Miller @ 2014-12-30 23:32 UTC (permalink / raw)
  To: richardcochran
  Cc: netdev, linux-kernel, amirv, ariel.elior, carolyn.wyborny,
	Frank.Li, jeffrey.t.kirsher, john.stultz, matthew.vick, mlichvar,
	mugunthanvnm, ogerlitz, tglx, thomas.lendacky
In-Reply-To: <cover.1418504883.git.richardcochran@gmail.com>

From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 21 Dec 2014 19:46:55 +0100

> Several PTP Hardware Clock (PHC) drivers implement the clock in
> software using the timecounter/cyclecounter code. This series adds one
> simple improvement and one more subtle fix to the shared timecounter
> facility. Credit for this series goes to Janusz Użycki, who pointed
> the issues out to me off list.

Series applied, thanks Richard.

^ permalink raw reply

* Re: am335x: cpsw: interrupt failure
From: Felipe Balbi @ 2014-12-30 23:22 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: Tony Lindgren, Yegor Yefremov, netdev, N, Mugunthan V,
	linux-omap@vger.kernel.org
In-Reply-To: <20141229171355.GJ29379@saruman>

[-- Attachment #1: Type: text/plain, Size: 2910 bytes --]

Hi,

On Mon, Dec 29, 2014 at 11:13:55AM -0600, Felipe Balbi wrote:
> > > > >>> U-Boot version: 2014.07
> > > > >>> Kernel config is omap2plus with enabled USB
> > > > >>>
> > > > >>> # cat /proc/version
> > > > >>> Linux version 3.18.0 (user@user-VirtualBox) (gcc version 4.8.3
> > > > >>> 20140320 (prerelease) (Sourcery CodeBench Lite 2014.05-29) ) #6 SMP
> > > > >>> Mon Dec 8 22:47:43 CET 2014
> > > > >>
> > > > >> Wasn't GCC 4.8.x total crap for building ARM kernels ? IIRC it was even
> > > > >> blacklisted. Can you try with 4.9.x just to make sure ?
> > > > >
> > > > > Will do.
> > > > 
> > > > Adding linux-omap. Beginning of this discussion:
> > > > http://comments.gmane.org/gmane.linux.network/341427
> > > > 
> > > > Quick summary: starting with kernel 3.18 or commit
> > > > 55601c9f24670ba926ebdd4d712ac3b177232330 am335x (at least BBB and some
> > > > custom boards) stalls at high network load. Reproducible via nuttcp
> > > > within some minutes
> > > > 
> > > > nuttcp -S (on BBB)
> > > > nuttcp -t -N 4 -T30m 192.168.1.235 (on host)
> > > > 
> > > > As Felipe Balbi suggested, I tried both 4.8.3 and 4.9.2 toolchains,
> > > > but both show the same behavior.
> > > > 
> > > > Linux version 3.18.0 (user@user-VirtualBox) (gcc version 4.8.3
> > > > 20140320 (prerelease) (Sourcery CodeBench Lite 2014.05-29) ) #6 SMP
> > > > Mon Dec 8 22:47:43 CET 2014
> > > > Linux version 3.18.1 (user@user-VirtualBox) (gcc version 4.9.2
> > > > (Buildroot 2015.02-git-00582-g10b9761) ) #1 SMP Mon Dec 29 09:22:29
> > > > CET 2014
> > > > 
> > > > Let me know, if you can reproduce this issue.
> > > 
> > > finally managed to reproduce this, it took quite a bit of effort though.
> > > I'll see if I can gether more information about the problem.
> > 
> > Maybe check if the irqnr is 127 (or the last reserved interrupt)
> > in irq-omap-intc.c. If so, also print out the previous interrupt.
> > It seems the intc uses the last reserved interrupt to signal a
> > spurious interrupt for the previous irqnr, so we should probably
> > add some handling for that.
> > 
> > If the previous interrupt is a cpsw interrupt, then there's probably
> > something wrong with cpsw interrupt handling. Either a missing
> > read-back to flush posted write in the cpsw interrupt handler,
> > or the EOI registers are written at a wrong time.
> 
> yeah, I'll go over it, but I first need to reproduce it again. Just
> rebooted to try again and after half an hour, couldn't reproduce it
> anymore. Interesting race to end the year :-)

alright, managed to reproduce multiple and I'm pretty confident I've
found the bug. Right now I'm testing with AM437x and AM335x to make sure
it's really working. If it's still running until tomorrow I'll send a
preliminary patch but I want to leave this running for quite a few days
before calling it "fixed".

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: add BQL support
From: Beniamino Galvani @ 2014-12-30 23:13 UTC (permalink / raw)
  To: Dave Taht
  Cc: Giuseppe Cavallaro, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAA93jw4GeWOqeZu5SjOYhhORiH6XCrv7uPTjrgE9O1QWESCxqw@mail.gmail.com>

On Mon, Dec 29, 2014 at 09:42:01AM -0800, Dave Taht wrote:
> On Sun, Dec 28, 2014 at 1:48 PM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> > On Sun, Dec 28, 2014 at 08:25:40AM -0800, Dave Taht wrote:
> >> On Sun, Dec 28, 2014 at 6:57 AM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> >> > Add support for Byte Queue Limits to the STMicro MAC driver.
> >>
> >> Thank you!
> >>
> >> > Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
> >> > slightly decreases the ping latency from ~10ms to ~3ms when the
> >> > 100Mbps link is saturated by TCP streams. No difference is
> >> > observed at 1Gbps.
> >>
> >> I see the plural. With TSQ in place it is hard (without something like
> >> the rrul test driving multiple streams) to drive a driver to
> >> saturation with small numbers of flows. This was with pfifo_fast, not
> >> sch_fq, at 100mbit?
> >
> > Hi Dave,
> >
> > yes, this was with pfifo_fast and I used 4 iperf TCP streams. The total
> > throughput didn't seem to increase adding more streams.
> 
> >>
> >> Can this board actually drive a full gigabit in the first place? Until
> >> now most of the low end arm boards I have seen only came with
> >> a 100mbit mac, and the gig ones lacking offloads seemed to peak
> >> out at about 600mbit.
> >
> > I measured a throughput of 650mbit in rx and 600mbit in tx.
> 
> You might want to try the rrul test which tests both directions and
> latency at the same time.

I will try it, thanks.

> 
> In my case I have been trying to find a low-cost chip that could do soft
> rate limiting (htb) + fq_codel at up to 300mbit/sec, as that is about
> the peak speed
> we will be getting from cable modems, and these are horribly overbuffered,
> at these speeds too, with 1.2sec of bidirectional latency observed at
> 120mbit/12mbit.
> 
> I'm open to crazy ideas like trying to find a use for the gpu, etc, to
> get there.
> 
> >
> >>
> >> Under my christmas tree landed a quad core A5 (odroid-c1), also an
> >> xgene and zedboard - both of the latter are a-needing BQL,
> >> and I haven't booted the udroid yet. Hopefully it is the
> >> same driver you just improved.
> >
> > I'm using the odroid-c1 too, with this tree based on the recent
> > Amlogic mainline work:
> >
> >   https://github.com/bengal/linux/tree/meson8b
> 
> Oh, cool, thx!
> 
> > Unfortunately at the moment the support for the board is very basic
> > (for example, SMP is not working yet) but it's enough to do some NIC
> > tests.
> 
> Good to know. Have you looked at xmit_more yet?
> 
> http://lwn.net/Articles/615238/

I don't know if I have implemented it correctly, but I found that the
improvement with xmit_more is so small to be barely observable, maybe
because the cost for starting the hardware transmission is very low
(it's a single mmio write).

Beniamino

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox