From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, David L Stevens <dlstevens@us.ibm.com>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.13 33/65] vxlan: fix nonfunctional neigh_reduce()
Date: Fri, 11 Apr 2014 09:11:05 -0700 [thread overview]
Message-ID: <20140411161002.302383123@linuxfoundation.org> (raw)
In-Reply-To: <20140411160957.714773410@linuxfoundation.org>
3.13-stable review patch. If anyone has any objections, please let me know.
------------------
From: David Stevens <dlstevens@us.ibm.com>
[ Upstream commit 4b29dba9c085a4fb79058fb1c45a2f6257ca3dfa ]
The VXLAN neigh_reduce() code is completely non-functional since
check-in. Specific errors:
1) The original code drops all packets with a multicast destination address,
even though neighbor solicitations are sent to the solicited-node
address, a multicast address. The code after this check was never run.
2) The neighbor table lookup used the IPv6 header destination, which is the
solicited node address, rather than the target address from the
neighbor solicitation. So neighbor lookups would always fail if it
got this far. Also for L3MISSes.
3) The code calls ndisc_send_na(), which does a send on the tunnel device.
The context for neigh_reduce() is the transmit path, vxlan_xmit(),
where the host or a bridge-attached neighbor is trying to transmit
a neighbor solicitation. To respond to it, the tunnel endpoint needs
to do a *receive* of the appropriate neighbor advertisement. Doing a
send, would only try to send the advertisement, encapsulated, to the
remote destinations in the fdb -- hosts that definitely did not do the
corresponding solicitation.
4) The code uses the tunnel endpoint IPv6 forwarding flag to determine the
isrouter flag in the advertisement. This has nothing to do with whether
or not the target is a router, and generally won't be set since the
tunnel endpoint is bridging, not routing, traffic.
The patch below creates a proxy neighbor advertisement to respond to
neighbor solicitions as intended, providing proper IPv6 support for neighbor
reduction.
Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/vxlan.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 113 insertions(+), 14 deletions(-)
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1232,15 +1232,103 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
+
+static struct sk_buff *vxlan_na_create(struct sk_buff *request,
+ struct neighbour *n, bool isrouter)
+{
+ struct net_device *dev = request->dev;
+ struct sk_buff *reply;
+ struct nd_msg *ns, *na;
+ struct ipv6hdr *pip6;
+ u8 *daddr;
+ int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+ int ns_olen;
+ int i, len;
+
+ if (dev == NULL)
+ return NULL;
+
+ len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+ sizeof(*na) + na_olen + dev->needed_tailroom;
+ reply = alloc_skb(len, GFP_ATOMIC);
+ if (reply == NULL)
+ return NULL;
+
+ reply->protocol = htons(ETH_P_IPV6);
+ reply->dev = dev;
+ skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
+ skb_push(reply, sizeof(struct ethhdr));
+ skb_set_mac_header(reply, 0);
+
+ ns = (struct nd_msg *)skb_transport_header(request);
+
+ daddr = eth_hdr(request)->h_source;
+ ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns);
+ for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
+ if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ break;
+ }
+ }
+
+ /* Ethernet header */
+ memcpy(eth_hdr(reply)->h_dest, daddr, ETH_ALEN);
+ memcpy(eth_hdr(reply)->h_source, n->ha, ETH_ALEN);
+ eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+ reply->protocol = htons(ETH_P_IPV6);
+
+ skb_pull(reply, sizeof(struct ethhdr));
+ skb_set_network_header(reply, 0);
+ skb_put(reply, sizeof(struct ipv6hdr));
+
+ /* IPv6 header */
+
+ pip6 = ipv6_hdr(reply);
+ memset(pip6, 0, sizeof(struct ipv6hdr));
+ pip6->version = 6;
+ pip6->priority = ipv6_hdr(request)->priority;
+ pip6->nexthdr = IPPROTO_ICMPV6;
+ pip6->hop_limit = 255;
+ pip6->daddr = ipv6_hdr(request)->saddr;
+ pip6->saddr = *(struct in6_addr *)n->primary_key;
+
+ skb_pull(reply, sizeof(struct ipv6hdr));
+ skb_set_transport_header(reply, 0);
+
+ na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
+
+ /* Neighbor Advertisement */
+ memset(na, 0, sizeof(*na)+na_olen);
+ na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+ na->icmph.icmp6_router = isrouter;
+ na->icmph.icmp6_override = 1;
+ na->icmph.icmp6_solicited = 1;
+ na->target = ns->target;
+ memcpy(&na->opt[2], n->ha, ETH_ALEN);
+ na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+ na->opt[1] = na_olen >> 3;
+
+ na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+ &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6,
+ csum_partial(na, sizeof(*na)+na_olen, 0));
+
+ pip6->payload_len = htons(sizeof(*na)+na_olen);
+
+ skb_push(reply, sizeof(struct ipv6hdr));
+
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+
+ return reply;
+}
+
static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct neighbour *n;
- union vxlan_addr ipa;
+ struct nd_msg *msg;
const struct ipv6hdr *iphdr;
const struct in6_addr *saddr, *daddr;
- struct nd_msg *msg;
- struct inet6_dev *in6_dev = NULL;
+ struct neighbour *n;
+ struct inet6_dev *in6_dev;
in6_dev = __in6_dev_get(dev);
if (!in6_dev)
@@ -1253,19 +1341,20 @@ static int neigh_reduce(struct net_devic
saddr = &iphdr->saddr;
daddr = &iphdr->daddr;
- if (ipv6_addr_loopback(daddr) ||
- ipv6_addr_is_multicast(daddr))
- goto out;
-
msg = (struct nd_msg *)skb_transport_header(skb);
if (msg->icmph.icmp6_code != 0 ||
msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
goto out;
- n = neigh_lookup(ipv6_stub->nd_tbl, daddr, dev);
+ if (ipv6_addr_loopback(daddr) ||
+ ipv6_addr_is_multicast(&msg->target))
+ goto out;
+
+ n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
if (n) {
struct vxlan_fdb *f;
+ struct sk_buff *reply;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
@@ -1279,13 +1368,23 @@ static int neigh_reduce(struct net_devic
goto out;
}
- ipv6_stub->ndisc_send_na(dev, n, saddr, &msg->target,
- !!in6_dev->cnf.forwarding,
- true, false, false);
+ reply = vxlan_na_create(skb, n,
+ !!(f ? f->flags & NTF_ROUTER : 0));
+
neigh_release(n);
+
+ if (reply == NULL)
+ goto out;
+
+ if (netif_rx_ni(reply) == NET_RX_DROP)
+ dev->stats.rx_dropped++;
+
} else if (vxlan->flags & VXLAN_F_L3MISS) {
- ipa.sin6.sin6_addr = *daddr;
- ipa.sa.sa_family = AF_INET6;
+ union vxlan_addr ipa = {
+ .sin6.sin6_addr = msg->target,
+ .sa.sa_family = AF_INET6,
+ };
+
vxlan_ip_miss(dev, &ipa);
}
next prev parent reply other threads:[~2014-04-11 16:11 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-11 16:10 [PATCH 3.13 00/65] 3.13.10-stable review Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 01/65] selinux: correctly label /proc inodes in use before the policy is loaded Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 02/65] net: fix for a race condition in the inet frag code Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 03/65] net: sctp: fix skb leakage in COOKIE ECHO path of chunk->auth_chunk Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 04/65] bridge: multicast: add sanity check for query source addresses Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 05/65] tipc: allow connection shutdown callback to be invoked in advance Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 06/65] tipc: fix connection refcount leak Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 07/65] tipc: drop subscriber connection id invalidation Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 08/65] tipc: fix memory leak during module removal Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 09/65] tipc: dont log disabled tasklet handler errors Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 10/65] inet: frag: make sure forced eviction removes all frags Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 11/65] net: unix: non blocking recvmsg() should not return -EINTR Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 12/65] ipv6: Fix exthdrs offload registration Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 13/65] ipv6: dont set DST_NOCOUNT for remotely added routes Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 14/65] bnx2: Fix shutdown sequence Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 15/65] pkt_sched: fq: do not hold qdisc lock while allocating memory Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 16/65] Xen-netback: Fix issue caused by using gso_type wrongly Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 17/65] vlan: Set correct source MAC address with TX VLAN offload enabled Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 18/65] skbuff: skb_segment: s/frag/nskb_frag/ Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 19/65] skbuff: skb_segment: s/skb_frag/frag/ Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 20/65] skbuff: skb_segment: s/skb/head_skb/ Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 21/65] skbuff: skb_segment: s/fskb/list_skb/ Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 22/65] skbuff: skb_segment: orphan frags before copying Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 23/65] tcp: tcp_release_cb() should release socket ownership Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 24/65] bridge: multicast: add sanity check for general query destination Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 25/65] bridge: multicast: enable snooping on general queries only Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 26/65] net: socket: error on a negative msg_namelen Greg Kroah-Hartman
2014-04-11 16:10 ` [PATCH 3.13 27/65] bonding: set correct vlan id for alb xmit path Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 28/65] eth: fec: Fix lost promiscuous mode after reconnecting cable Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 29/65] ipv6: Avoid unnecessary temporary addresses being generated Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 30/65] ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 31/65] net: cdc_ncm: fix control message ordering Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 32/65] vxlan: fix potential NULL dereference in arp_reduce() Greg Kroah-Hartman
2014-04-11 16:11 ` Greg Kroah-Hartman [this message]
2014-04-11 16:11 ` [PATCH 3.13 34/65] tcp: syncookies: do not use getnstimeofday() Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 35/65] rtnetlink: fix fdb notification flags Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 36/65] ipmr: fix mfc " Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 37/65] ip6mr: " Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 38/65] net: micrel : ks8851-ml: add vdd-supply support Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 39/65] netpoll: fix the skb check in pkt_is_ns Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 40/65] tipc: fix spinlock recursion bug for failed subscriptions Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 41/65] ip_tunnel: Fix dst ref-count Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 42/65] tg3: Do not include vlan acceleration features in vlan_features Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 43/65] virtio-net: correct error handling of virtqueue_kick() Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 44/65] usbnet: include wait queue head in device structure Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 45/65] vlan: Set hard_header_len according to available acceleration Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 46/65] vhost: fix total length when packets are too short Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 47/65] vhost: validate vhost_get_vq_desc return value Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 48/65] tcp: fix get_timewait4_sock() delay computation on 64bit Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 49/65] xen-netback: remove pointless clause from if statement Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 50/65] ipv6: some ipv6 statistic counters failed to disable bh Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 51/65] netlink: dont compare the nul-termination in nla_strcmp Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 52/65] xen-netback: disable rogue vif in kthread context Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 53/65] Call efx_set_channels() before efx->type->dimension_resources() Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 54/65] net: vxlan: fix crash when interface is created with no group Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 55/65] isdnloop: Validate NUL-terminated strings from user Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 56/65] isdnloop: several buffer overflows Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 57/65] rds: prevent dereference of a NULL device in rds_iw_laddr_check Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 58/65] powernow-k6: disable cache when changing frequency Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 59/65] powernow-k6: correctly initialize default parameters Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 60/65] powernow-k6: reorder frequencies Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 61/65] ARC: [nsimosci] Change .dts to use generic 8250 UART Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 62/65] ARC: [nsimosci] Unbork console Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 63/65] futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 64/65] m68k: Skip " Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.13 65/65] crypto: ghash-clmulni-intel - use C implementation for setkey() Greg Kroah-Hartman
2014-04-11 21:45 ` [PATCH 3.13 00/65] 3.13.10-stable review Guenter Roeck
2014-04-11 23:46 ` Shuah Khan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140411161002.302383123@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=davem@davemloft.net \
--cc=dlstevens@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).