From: Jiri Pirko <jpirko@redhat.com>
To: "Nicolas de Pesloüan" <nicolas.2p.debian@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>,
netdev@vger.kernel.org, davem@davemloft.net,
shemminger@linux-foundation.org, kaber@trash.net,
fubar@us.ibm.com, eric.dumazet@gmail.com
Subject: Re: [patch net-next-2.6] net: reinject arps into bonding slave instead of master
Date: Thu, 10 Mar 2011 07:48:30 +0100 [thread overview]
Message-ID: <20110310064829.GA3261@psychotron.redhat.com> (raw)
In-Reply-To: <4D77FCC1.5050904@gmail.com>
>But for all others setups, where there exist some net_devices before
>the "untagging" one, you would face some troubles. For example, with
>eth0+eth1 -> br0 -> br0.100, you cannot untag before entering
>__netif_receive_skb. If you do so, the bridge would receive untagged
>frame and if the frame is not for the local host, the bridge would
>forward an untagged frame while it is expected to forward a tagged
>one. Even if the bridge is in a position to know the frame *was*
>tagged, we cannot expect the bridge to do special processing to
>handle this situation. Doing so would break layering.
I disagree.
eth0 -> untag on early __netif_receive_skb (sets up skb->vlan_tci)
->rx_handler of bridge
->br0 -> tag is detected by vlan_tx_tag_present()
-> reinject to __netif_receive_skb with skb->dev == br0.100
This way the flow would be very similar to vlan-hw-accel, am I right?
I have following patch in mind. Note it's raw DRAFT.
Signed-off-by: Jiri Pirko <jpirko@redhat.com>
---
include/linux/if_vlan.h | 5 ++
net/8021q/vlan.c | 8 --
net/8021q/vlan_core.c | 106 +++++++++++++++++++++++++++++
net/8021q/vlan_dev.c | 172 -----------------------------------------------
net/core/dev.c | 18 ++++-
5 files changed, 125 insertions(+), 184 deletions(-)
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 635e1fa..dd914e2 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -133,6 +133,7 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
u16 vlan_tci, int polling);
extern bool vlan_hwaccel_do_receive(struct sk_buff **skb);
+extern void vlan_emulate_hwaccel(struct sk_buff **skbp);
extern gro_result_t
vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
unsigned int vlan_tci, struct sk_buff *skb);
@@ -173,6 +174,10 @@ static inline bool vlan_hwaccel_do_receive(struct sk_buff **skb)
return false;
}
+void vlan_emulate_hwaccel(struct sk_buff **skbp)
+{
+}
+
static inline gro_result_t
vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
unsigned int vlan_tci, struct sk_buff *skb)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 7850412..59f0a9d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -49,11 +49,6 @@ const char vlan_version[] = DRV_VERSION;
static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
-static struct packet_type vlan_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_8021Q),
- .func = vlan_skb_recv, /* VLAN receive method */
-};
-
/* End of global variables definitions. */
static void vlan_group_free(struct vlan_group *grp)
@@ -688,7 +683,6 @@ static int __init vlan_proto_init(void)
if (err < 0)
goto err4;
- dev_add_pack(&vlan_packet_type);
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
@@ -709,8 +703,6 @@ static void __exit vlan_cleanup_module(void)
unregister_netdevice_notifier(&vlan_notifier_block);
- dev_remove_pack(&vlan_packet_type);
-
unregister_pernet_subsys(&vlan_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index ce8e3ab..c324e4d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -88,3 +88,109 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
return napi_gro_frags(napi);
}
EXPORT_SYMBOL(vlan_gro_frags);
+
+static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+{
+ if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ if (skb_cow(skb, skb_headroom(skb)) < 0)
+ skb = NULL;
+ if (skb) {
+ /* Lifted from Gleb's VLAN code... */
+ memmove(skb->data - ETH_HLEN,
+ skb->data - VLAN_ETH_HLEN, 12);
+ skb->mac_header += VLAN_HLEN;
+ }
+ }
+
+ return skb;
+}
+
+static inline void vlan_set_encap_proto(struct sk_buff *skb,
+ struct vlan_hdr *vhdr)
+{
+ __be16 proto;
+ unsigned char *rawp;
+
+ /*
+ * Was a VLAN packet, grab the encapsulated protocol, which the layer
+ * three protocols care about.
+ */
+
+ proto = vhdr->h_vlan_encapsulated_proto;
+ if (ntohs(proto) >= 1536) {
+ skb->protocol = proto;
+ return;
+ }
+
+ rawp = skb->data;
+ if (*(unsigned short *)rawp == 0xFFFF)
+ /*
+ * This is a magic hack to spot IPX packets. Older Novell
+ * breaks the protocol design and runs IPX over 802.3 without
+ * an 802.2 LLC layer. We look for FFFF which isn't a used
+ * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+ * but does for the rest.
+ */
+ skb->protocol = htons(ETH_P_802_3);
+ else
+ /*
+ * Real 802.2 LLC
+ */
+ skb->protocol = htons(ETH_P_802_2);
+}
+
+/*
+ * Determine the packet's protocol ID. The rule here is that we
+ * assume 802.3 if the type field is short enough to be a length.
+ * This is normal practice and works for any 'now in use' protocol.
+ *
+ * Also, at this point we assume that we ARE dealing exclusively with
+ * VLAN packets, or packets that should be made into VLAN packets based
+ * on a default VLAN ID.
+ *
+ * NOTE: Should be similar to ethernet/eth.c.
+ *
+ * SANITY NOTE: This method is called when a packet is moving up the stack
+ * towards userland. To get here, it would have already passed
+ * through the ethernet/eth.c eth_type_trans() method.
+ * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
+ * stored UNALIGNED in the memory. RISC systems don't like
+ * such cases very much...
+ * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
+ * aligned, so there doesn't need to be any of the unaligned
+ * stuff. It has been commented out now... --Ben
+ *
+ */
+void vlan_emulate_hwaccel(struct sk_buff **skbp)
+{
+ struct sk_buff *skb = *skbp;
+ struct vlan_hdr *vhdr;
+ u16 vlan_tci;
+
+ *skbp = skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ goto err_free;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ goto err_free;
+
+ vhdr = (struct vlan_hdr *) skb->data;
+ vlan_tci = ntohs(vhdr->h_vlan_TCI);
+ __vlan_hwaccel_put_tag(skb, vlan_tci);
+
+ skb_pull_rcsum(skb, VLAN_HLEN);
+ vlan_set_encap_proto(skb, vhdr);
+
+ skb = vlan_check_reorder_header(skb);
+ if (unlikely(!skb)) {
+ goto err_free;
+ }
+
+ return;
+
+err_free:
+ kfree_skb(skb);
+ *skbp = NULL;
+ return;
+}
+
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ae610f0..57d2f40 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -65,178 +65,6 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
return 0;
}
-static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
-{
- if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
- if (skb_cow(skb, skb_headroom(skb)) < 0)
- skb = NULL;
- if (skb) {
- /* Lifted from Gleb's VLAN code... */
- memmove(skb->data - ETH_HLEN,
- skb->data - VLAN_ETH_HLEN, 12);
- skb->mac_header += VLAN_HLEN;
- }
- }
-
- return skb;
-}
-
-static inline void vlan_set_encap_proto(struct sk_buff *skb,
- struct vlan_hdr *vhdr)
-{
- __be16 proto;
- unsigned char *rawp;
-
- /*
- * Was a VLAN packet, grab the encapsulated protocol, which the layer
- * three protocols care about.
- */
-
- proto = vhdr->h_vlan_encapsulated_proto;
- if (ntohs(proto) >= 1536) {
- skb->protocol = proto;
- return;
- }
-
- rawp = skb->data;
- if (*(unsigned short *)rawp == 0xFFFF)
- /*
- * This is a magic hack to spot IPX packets. Older Novell
- * breaks the protocol design and runs IPX over 802.3 without
- * an 802.2 LLC layer. We look for FFFF which isn't a used
- * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
- * but does for the rest.
- */
- skb->protocol = htons(ETH_P_802_3);
- else
- /*
- * Real 802.2 LLC
- */
- skb->protocol = htons(ETH_P_802_2);
-}
-
-/*
- * Determine the packet's protocol ID. The rule here is that we
- * assume 802.3 if the type field is short enough to be a length.
- * This is normal practice and works for any 'now in use' protocol.
- *
- * Also, at this point we assume that we ARE dealing exclusively with
- * VLAN packets, or packets that should be made into VLAN packets based
- * on a default VLAN ID.
- *
- * NOTE: Should be similar to ethernet/eth.c.
- *
- * SANITY NOTE: This method is called when a packet is moving up the stack
- * towards userland. To get here, it would have already passed
- * through the ethernet/eth.c eth_type_trans() method.
- * SANITY NOTE 2: We are referencing to the VLAN_HDR frields, which MAY be
- * stored UNALIGNED in the memory. RISC systems don't like
- * such cases very much...
- * SANITY NOTE 2a: According to Dave Miller & Alexey, it will always be
- * aligned, so there doesn't need to be any of the unaligned
- * stuff. It has been commented out now... --Ben
- *
- */
-int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *ptype, struct net_device *orig_dev)
-{
- struct vlan_hdr *vhdr;
- struct vlan_pcpu_stats *rx_stats;
- struct net_device *vlan_dev;
- u16 vlan_id;
- u16 vlan_tci;
-
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto err_free;
-
- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
- goto err_free;
-
- vhdr = (struct vlan_hdr *)skb->data;
- vlan_tci = ntohs(vhdr->h_vlan_TCI);
- vlan_id = vlan_tci & VLAN_VID_MASK;
-
- rcu_read_lock();
- vlan_dev = vlan_find_dev(dev, vlan_id);
-
- /* If the VLAN device is defined, we use it.
- * If not, and the VID is 0, it is a 802.1p packet (not
- * really a VLAN), so we will just netif_rx it later to the
- * original interface, but with the skb->proto set to the
- * wrapped proto: we do nothing here.
- */
-
- if (!vlan_dev) {
- if (vlan_id) {
- pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
- __func__, vlan_id, dev->name);
- goto err_unlock;
- }
- rx_stats = NULL;
- } else {
- skb->dev = vlan_dev;
-
- rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
-
- u64_stats_update_begin(&rx_stats->syncp);
- rx_stats->rx_packets++;
- rx_stats->rx_bytes += skb->len;
-
- skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
-
- pr_debug("%s: priority: %u for TCI: %hu\n",
- __func__, skb->priority, vlan_tci);
-
- switch (skb->pkt_type) {
- case PACKET_BROADCAST:
- /* Yeah, stats collect these together.. */
- /* stats->broadcast ++; // no such counter :-( */
- break;
-
- case PACKET_MULTICAST:
- rx_stats->rx_multicast++;
- break;
-
- case PACKET_OTHERHOST:
- /* Our lower layer thinks this is not local, let's make
- * sure.
- * This allows the VLAN to have a different MAC than the
- * underlying device, and still route correctly.
- */
- if (!compare_ether_addr(eth_hdr(skb)->h_dest,
- skb->dev->dev_addr))
- skb->pkt_type = PACKET_HOST;
- break;
- default:
- break;
- }
- u64_stats_update_end(&rx_stats->syncp);
- }
-
- skb_pull_rcsum(skb, VLAN_HLEN);
- vlan_set_encap_proto(skb, vhdr);
-
- if (vlan_dev) {
- skb = vlan_check_reorder_header(skb);
- if (!skb) {
- rx_stats->rx_errors++;
- goto err_unlock;
- }
- }
-
- netif_rx(skb);
-
- rcu_read_unlock();
- return NET_RX_SUCCESS;
-
-err_unlock:
- rcu_read_unlock();
-err_free:
- atomic_long_inc(&dev->rx_dropped);
- kfree_skb(skb);
- return NET_RX_DROP;
-}
static inline u16
vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index 9f66de9..f3315ef 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3133,10 +3133,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
if (netpoll_receive_skb(skb))
return NET_RX_DROP;
- if (!skb->skb_iif)
- skb->skb_iif = skb->dev->ifindex;
- orig_dev = skb->dev;
-
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
@@ -3145,6 +3141,20 @@ static int __netif_receive_skb(struct sk_buff *skb)
rcu_read_lock();
+ if (!skb->skb_iif) {
+ skb->skb_iif = skb->dev->ifindex;
+ /*
+ * frame is here for the first time so lets check if it's vlan
+ * one and emulate vlan hwaccel in that case
+ */
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ vlan_emulate_hwaccel(&skb);
+ if (!skb)
+ goto out;
+ }
+ }
+ orig_dev = skb->dev;
+
another_round:
__this_cpu_inc(softnet_data.processed);
--
1.7.4
next prev parent reply other threads:[~2011-03-10 6:48 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-03-05 10:29 [patch net-next-2.6 0/8] mostly bonding rx path changes Jiri Pirko
2011-03-05 10:29 ` [patch net-next-2.6 1/8] af_packet: use skb->skb_iif instead of orig_dev->ifindex Jiri Pirko
2011-03-05 14:03 ` Nicolas de Pesloüan
2011-03-05 10:29 ` [patch net-next-2.6 2/8] bonding: register slave pointer for rx_handler Jiri Pirko
2011-03-05 14:06 ` Nicolas de Pesloüan
2011-03-05 14:27 ` Jiri Pirko
2011-03-05 14:38 ` Nicolas de Pesloüan
2011-03-05 10:29 ` [patch net-next-2.6 3/8] net: get rid of multiple bond-related netdevice->priv_flags Jiri Pirko
2011-03-05 14:14 ` Nicolas de Pesloüan
2011-03-05 14:37 ` Ben Hutchings
2011-03-05 14:46 ` Nicolas de Pesloüan
2011-03-05 10:29 ` [patch net-next-2.6 4/8] bonding: wrap slave state work Jiri Pirko
2011-03-05 15:21 ` Nicolas de Pesloüan
2011-03-07 9:58 ` Jiri Pirko
2011-03-07 19:55 ` Nicolas de Pesloüan
2011-03-08 7:18 ` Jiri Pirko
2011-03-08 21:23 ` Nicolas de Pesloüan
2011-03-05 10:29 ` [patch net-next-2.6 5/8] bonding: get rid of IFF_SLAVE_INACTIVE netdev->priv_flag Jiri Pirko
2011-03-05 14:18 ` Nicolas de Pesloüan
2011-03-05 10:29 ` [patch net-next-2.6 6/8] bonding: move processing of recv handlers into handle_frame() Jiri Pirko
2011-03-05 14:33 ` Nicolas de Pesloüan
2011-03-05 14:43 ` Jiri Pirko
2011-03-05 14:50 ` Nicolas de Pesloüan
2011-03-06 12:24 ` Nicolas de Pesloüan
2011-03-06 13:34 ` Jiri Pirko
2011-03-06 14:25 ` Nicolas de Pesloüan
2011-03-06 16:32 ` Michał Mirosław
2011-03-06 17:37 ` Nicolas de Pesloüan
2011-03-07 12:51 ` [patch net-next-2.6] net: reinject arps into bonding slave instead of master Jiri Pirko
2011-03-07 14:32 ` Andy Gospodarek
2011-03-07 20:12 ` Nicolas de Pesloüan
2011-03-07 21:19 ` Jiri Pirko
2011-03-07 21:30 ` Nicolas de Pesloüan
2011-03-07 22:43 ` Andy Gospodarek
2011-03-07 23:09 ` Nicolas de Pesloüan
2011-03-08 2:43 ` Andy Gospodarek
2011-03-08 21:34 ` Nicolas de Pesloüan
2011-03-08 7:13 ` Jiri Pirko
2011-03-08 13:42 ` Andy Gospodarek
2011-03-08 21:44 ` Nicolas de Pesloüan
2011-03-09 7:45 ` Jiri Pirko
2011-03-09 14:49 ` Nicolas de Pesloüan
2011-03-09 15:09 ` Jiri Pirko
2011-03-09 15:28 ` Nicolas de Pesloüan
2011-03-09 17:11 ` Jiri Pirko
2011-03-09 22:18 ` Nicolas de Pesloüan
2011-03-10 6:48 ` Jiri Pirko [this message]
2011-03-10 20:44 ` Nicolas de Pesloüan
2011-03-10 20:52 ` Jiri Pirko
2011-03-10 21:05 ` Jiri Pirko
2011-03-09 20:51 ` Jiri Pirko
2011-03-09 13:33 ` Neil Horman
2011-03-05 10:29 ` [patch net-next-2.6 7/8] net: introduce rx_handler results and logic around that Jiri Pirko
2011-03-05 12:48 ` Ben Hutchings
2011-03-05 14:52 ` Nicolas de Pesloüan
2011-03-05 14:54 ` Jiri Pirko
2011-03-05 15:06 ` Nicolas de Pesloüan
2011-03-05 15:13 ` [patch net-next-2.6] net: comment rx_handler results Jiri Pirko
2011-03-05 15:27 ` Nicolas de Pesloüan
2011-03-05 15:37 ` Jiri Pirko
2011-03-05 15:50 ` Nicolas de Pesloüan
2011-03-06 20:00 ` [PATCH net-next-2.6] net: enhance the documentation for rx_handler Nicolas de Pesloüan
2011-03-07 9:54 ` Jiri Pirko
2011-03-07 16:36 ` Stephen Hemminger
2011-03-07 20:01 ` [PATCH net-next-2.6 V2] " Nicolas de Pesloüan
2011-03-05 15:31 ` [patch net-next-2.6 7/8 v2] net: introduce rx_handler results and logic around that Jiri Pirko
2011-03-05 10:29 ` [patch net-next-2.6 8/8] net: get rid of orig_dev parameter of packet handlers Jiri Pirko
2011-03-05 15:05 ` Nicolas de Pesloüan
2011-03-05 15:15 ` Jiri Pirko
2011-03-05 15:32 ` [patch net-next-2.6 8/8 v2] " Jiri Pirko
2011-03-05 16:56 ` Nicolas de Pesloüan
2011-03-05 22:07 ` Jiri Pirko
2011-03-05 22:18 ` Nicolas de Pesloüan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110310064829.GA3261@psychotron.redhat.com \
--to=jpirko@redhat.com \
--cc=andy@greyhouse.net \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=fubar@us.ibm.com \
--cc=kaber@trash.net \
--cc=netdev@vger.kernel.org \
--cc=nicolas.2p.debian@gmail.com \
--cc=shemminger@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.