netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mike Rapoport <mike.rapoport@ravellosystems.com>
To: Stephen Hemminger <stephen@networkplumber.org>
Cc: Thomas Graf <tgraf@suug.ch>, netdev@vger.kernel.org
Subject: Re: [PATCH net-next v3 2/2] vxlan: allow specifying multiple default destinations
Date: Sun, 2 Jun 2013 13:29:42 +0300	[thread overview]
Message-ID: <20130602102942.GA21706@zed> (raw)
In-Reply-To: <20130531091740.70b3e077@nehalam.linuxnetplumber.net>

On Fri, May 31, 2013 at 7:17 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> Looking at this code in more detail, I see a slew of problems.
>
> First the list of destinations isn't really a list. The default one is still
> embedded in the fdb entry. This means you can't change it safely.
>
> Also the notification via netlink only sends back a single destination
> value.
>
> And the lack of locking on the open coded link list means it is not safe since
> the forwarding table is used with RCU. In order to be safe, proper RCU
> barriers would be needed or better yet convert to list_rcu..
>
> Overall, I feel guilty for not inspecting this more closely and am surprised
> that others did not catch the lack of locking.

I've tried to convert remotes list to use hlist_rcu. The patch below implements the conversion, but it does not address the netlink notification issue.

>From 4de2001606a258462369520231643874e3e5c14c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike.rapoport@ravellosystems.com>
Date: Sun, 2 Jun 2013 13:21:23 +0300
Subject: [PATCH] vxlan: convert remotes list to hlist_rcu

Signed-off-by: Mike Rapoport <mike.rapoport@ravellosystems.com>
---
 drivers/net/vxlan.c | 75 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 8111565..664853e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -101,7 +101,8 @@ struct vxlan_rdst {
 	__be16			 remote_port;
 	u32			 remote_vni;
 	u32			 remote_ifindex;
-	struct vxlan_rdst	*remote_next;
+	struct hlist_node	 hlist;
+	struct rcu_head		 rcu;
 };
 
 /* Forwarding table entry */
@@ -110,7 +111,7 @@ struct vxlan_fdb {
 	struct rcu_head	  rcu;
 	unsigned long	  updated;	/* jiffies */
 	unsigned long	  used;
-	struct vxlan_rdst remote;
+	struct hlist_head remotes;
 	u16		  state;	/* see ndm_state */
 	u8		  flags;	/* see ndm_flags */
 	u8		  eth_addr[ETH_ALEN];
@@ -163,6 +164,13 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
 	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
 }
 
+/* The first remote destination in FDB remotes list */
+static inline struct vxlan_rdst *remotes_head(struct vxlan_fdb *f)
+{
+	return hlist_entry(hlist_first_rcu(&f->remotes),
+			   struct vxlan_rdst, hlist);
+}
+
 /* Find VXLAN socket based on network namespace and UDP port */
 static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
 {
@@ -278,7 +286,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
 	if (skb == NULL)
 		goto errout;
 
-	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote);
+	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, remotes_head(fdb));
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -297,11 +305,14 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb f;
+	struct vxlan_rdst remote;
 
 	memset(&f, 0, sizeof f);
 	f.state = NUD_STALE;
-	f.remote.remote_ip = ipa; /* goes to NDA_DST */
-	f.remote.remote_vni = VXLAN_N_VID;
+	remote.remote_ip = ipa; /* goes to NDA_DST */
+	remote.remote_vni = VXLAN_N_VID;
+
+	hlist_add_head_rcu(&remote.hlist, &f.remotes);
 
 	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
 }
@@ -370,17 +381,16 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 static int vxlan_fdb_append(struct vxlan_fdb *f,
 			    __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
 {
-	struct vxlan_rdst *rd_prev, *rd;
+	struct vxlan_rdst *rd;
 
-	rd_prev = NULL;
-	for (rd = &f->remote; rd; rd = rd->remote_next) {
+	hlist_for_each_entry_rcu(rd, &f->remotes, hlist) {
 		if (rd->remote_ip == ip &&
 		    rd->remote_port == port &&
 		    rd->remote_vni == vni &&
 		    rd->remote_ifindex == ifindex)
 			return 0;
-		rd_prev = rd;
 	}
+
 	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
 	if (rd == NULL)
 		return -ENOBUFS;
@@ -388,8 +398,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	rd->remote_port = port;
 	rd->remote_vni = vni;
 	rd->remote_ifindex = ifindex;
-	rd->remote_next = NULL;
-	rd_prev->remote_next = rd;
+
+	hlist_add_head_rcu(&rd->hlist, &f->remotes);
+
 	return 1;
 }
 
@@ -441,16 +452,13 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			return -ENOMEM;
 
 		notify = 1;
-		f->remote.remote_ip = ip;
-		f->remote.remote_port = port;
-		f->remote.remote_vni = vni;
-		f->remote.remote_ifindex = ifindex;
-		f->remote.remote_next = NULL;
 		f->state = state;
 		f->flags = ndm_flags;
 		f->updated = f->used = jiffies;
 		memcpy(f->eth_addr, mac, ETH_ALEN);
 
+		vxlan_fdb_append(f, ip, port, vni, ifindex);
+
 		++vxlan->addrcnt;
 		hlist_add_head_rcu(&f->hlist,
 				   vxlan_fdb_head(vxlan, mac));
@@ -462,16 +470,22 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 	return 0;
 }
 
+static void vxlan_rdst_free(struct rcu_head *head)
+{
+	struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+	kfree(rd);
+}
+
 static void vxlan_fdb_free(struct rcu_head *head)
 {
 	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+	struct vxlan_rdst *rd;
 
-	while (f->remote.remote_next) {
-		struct vxlan_rdst *rd = f->remote.remote_next;
-
-		f->remote.remote_next = rd->remote_next;
-		kfree(rd);
+	hlist_for_each_safe(rd, &f->remotes, hlist) {
+		hlist_del_rcu(&rd->hlist);
+		call_rcu(&rd->rcu, vxlan_rdst_free);
 	}
+
 	kfree(f);
 }
 
@@ -581,7 +595,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
 			struct vxlan_rdst *rd;
-			for (rd = &f->remote; rd; rd = rd->remote_next) {
+			hlist_for_each_entry_rcu(rd, &f->remotes, hlist) {
 				if (idx < cb->args[0])
 					goto skip;
 
@@ -613,15 +627,17 @@ static void vxlan_snoop(struct net_device *dev,
 
 	f = vxlan_find_mac(vxlan, src_mac);
 	if (likely(f)) {
-		if (likely(f->remote.remote_ip == src_ip))
+		struct vxlan_rdst *remote = remotes_head(f);
+
+		if (likely(remote->remote_ip == src_ip))
 			return;
 
 		if (net_ratelimit())
 			netdev_info(dev,
 				    "%pM migrated from %pI4 to %pI4\n",
-				    src_mac, &f->remote.remote_ip, &src_ip);
+				    src_mac, &remote->remote_ip, &src_ip);
 
-		f->remote.remote_ip = src_ip;
+		remote->remote_ip = src_ip;
 		f->updated = jiffies;
 	} else {
 		/* learned new entry */
@@ -856,6 +872,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 	if (n) {
 		struct vxlan_fdb *f;
 		struct sk_buff	*reply;
+		struct vxlan_rdst *remote;
 
 		if (!(n->nud_state & NUD_CONNECTED)) {
 			neigh_release(n);
@@ -863,7 +880,8 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 		}
 
 		f = vxlan_find_mac(vxlan, n->ha);
-		if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
+		remote = remotes_head(f);
+		if (f && remote->remote_ip == htonl(INADDR_ANY)) {
 			/* bridge-local neighbor */
 			neigh_release(n);
 			goto out;
@@ -1181,12 +1199,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 		    !is_multicast_ether_addr(eth->h_dest))
 			vxlan_fdb_miss(vxlan, eth->h_dest);
 	} else
-		rdst0 = &f->remote;
+		rdst0 = remotes_head(f);
 
 	rc = NETDEV_TX_OK;
 
 	/* if there are multiple destinations, send copies */
-	for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
+	rdst = rdst0;
+	hlist_for_each_entry_continue_rcu(rdst, hlist) {
 		struct sk_buff *skb1;
 
 		skb1 = skb_clone(skb, GFP_ATOMIC);
-- 
1.8.1.5


--
Sincerely yours,
Mike.

  reply	other threads:[~2013-06-02 10:29 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-29 10:00 [PATCH net-next v3 0/2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-29 10:00 ` [PATCH net-next v3 1/2] vxlan: introduce vxlan_rdst_append Mike Rapoport
2013-05-29 22:56   ` Stephen Hemminger
2013-05-30  8:42     ` Mike Rapoport
2013-05-29 10:00 ` [PATCH net-next v3 2/2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-30 11:09   ` Thomas Graf
2013-05-30 11:16     ` Mike Rapoport
2013-05-30 11:37       ` Thomas Graf
2013-05-31 16:17         ` Stephen Hemminger
2013-06-02 10:29           ` Mike Rapoport [this message]
2013-06-03 15:57             ` Stephen Hemminger
2013-06-03 19:47               ` Mike Rapoport
2013-06-03 18:26             ` [RFC] vxlan: convert remote list to list_rcu Stephen Hemminger
2013-06-03 20:18               ` David Stevens
2013-06-03 20:45                 ` Stephen Hemminger
2013-06-03 21:46                   ` David Stevens
2013-06-04  9:18                     ` Mike Rapoport
2013-06-04 12:48                       ` David Stevens
2013-06-04 17:20                         ` Mike Rapoport
2013-06-04 19:02                           ` David Stevens
2013-06-05 12:53                             ` Mike Rapoport
2013-06-04  9:10                 ` Mike Rapoport
2013-06-04 16:00                   ` Stephen Hemminger
2013-06-04 16:29                     ` David Stevens
2013-06-04 17:22                       ` Mike Rapoport
2013-05-29 10:00 ` [PATCH iproute2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-29 10:13   ` Cong Wang
2013-05-29 10:52     ` Mike Rapoport
2013-05-29 22:56       ` Stephen Hemminger
2013-05-30  8:42         ` Mike Rapoport
2013-05-30 11:44           ` Thomas Graf
2013-05-30 12:46             ` Mike Rapoport
2013-05-30 15:57               ` Thomas Graf
2013-06-02  7:09                 ` Mike Rapoport
2013-06-05  4:30                   ` Stephen Hemminger
2013-06-05 12:58                     ` Mike Rapoport
2013-05-30 17:07             ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130602102942.GA21706@zed \
    --to=mike.rapoport@ravellosystems.com \
    --cc=netdev@vger.kernel.org \
    --cc=stephen@networkplumber.org \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).