From: Mike Rapoport <mike.rapoport@ravellosystems.com>
To: Stephen Hemminger <stephen@networkplumber.org>
Cc: Thomas Graf <tgraf@suug.ch>, netdev@vger.kernel.org
Subject: Re: [PATCH net-next v3 2/2] vxlan: allow specifying multiple default destinations
Date: Sun, 2 Jun 2013 13:29:42 +0300 [thread overview]
Message-ID: <20130602102942.GA21706@zed> (raw)
In-Reply-To: <20130531091740.70b3e077@nehalam.linuxnetplumber.net>
On Fri, May 31, 2013 at 7:17 PM, Stephen Hemminger <stephen@networkplumber.org> wrote:
> Looking at this code in more detail, I see a slew of problems.
>
> First the list of destinations isn't really a list. The default one is still
> embedded in the fdb entry. This means you can't change it safely.
>
> Also the notification via netlink only sends back a single destination
> value.
>
> And the lack of locking on the open coded link list means it is not safe since
> the forwarding table is used with RCU. In order to be safe, proper RCU
> barriers would be needed or better yet convert to list_rcu..
>
> Overall, I feel guilty for not inspecting this more closely and am surprised
> that others did not catch the lack of locking.
I've tried to convert remotes list to use hlist_rcu. The patch below implements the conversion, but it does not address the netlink notification issue.
>From 4de2001606a258462369520231643874e3e5c14c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike.rapoport@ravellosystems.com>
Date: Sun, 2 Jun 2013 13:21:23 +0300
Subject: [PATCH] vxlan: convert remotes list to hlist_rcu
Signed-off-by: Mike Rapoport <mike.rapoport@ravellosystems.com>
---
drivers/net/vxlan.c | 75 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 47 insertions(+), 28 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 8111565..664853e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -101,7 +101,8 @@ struct vxlan_rdst {
__be16 remote_port;
u32 remote_vni;
u32 remote_ifindex;
- struct vxlan_rdst *remote_next;
+ struct hlist_node hlist;
+ struct rcu_head rcu;
};
/* Forwarding table entry */
@@ -110,7 +111,7 @@ struct vxlan_fdb {
struct rcu_head rcu;
unsigned long updated; /* jiffies */
unsigned long used;
- struct vxlan_rdst remote;
+ struct hlist_head remotes;
u16 state; /* see ndm_state */
u8 flags; /* see ndm_flags */
u8 eth_addr[ETH_ALEN];
@@ -163,6 +164,13 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
}
+/* The first remote destination in FDB remotes list */
+static inline struct vxlan_rdst *remotes_head(struct vxlan_fdb *f)
+{
+ return hlist_entry(hlist_first_rcu(&f->remotes),
+ struct vxlan_rdst, hlist);
+}
+
/* Find VXLAN socket based on network namespace and UDP port */
static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
{
@@ -278,7 +286,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
if (skb == NULL)
goto errout;
- err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote);
+ err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, remotes_head(fdb));
if (err < 0) {
/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -297,11 +305,14 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb f;
+ struct vxlan_rdst remote;
memset(&f, 0, sizeof f);
f.state = NUD_STALE;
- f.remote.remote_ip = ipa; /* goes to NDA_DST */
- f.remote.remote_vni = VXLAN_N_VID;
+ remote.remote_ip = ipa; /* goes to NDA_DST */
+ remote.remote_vni = VXLAN_N_VID;
+
+ hlist_add_head_rcu(&remote.hlist, &f.remotes);
vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}
@@ -370,17 +381,16 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
static int vxlan_fdb_append(struct vxlan_fdb *f,
__be32 ip, __be16 port, __u32 vni, __u32 ifindex)
{
- struct vxlan_rdst *rd_prev, *rd;
+ struct vxlan_rdst *rd;
- rd_prev = NULL;
- for (rd = &f->remote; rd; rd = rd->remote_next) {
+ hlist_for_each_entry_rcu(rd, &f->remotes, hlist) {
if (rd->remote_ip == ip &&
rd->remote_port == port &&
rd->remote_vni == vni &&
rd->remote_ifindex == ifindex)
return 0;
- rd_prev = rd;
}
+
rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL)
return -ENOBUFS;
@@ -388,8 +398,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd->remote_port = port;
rd->remote_vni = vni;
rd->remote_ifindex = ifindex;
- rd->remote_next = NULL;
- rd_prev->remote_next = rd;
+
+ hlist_add_head_rcu(&rd->hlist, &f->remotes);
+
return 1;
}
@@ -441,16 +452,13 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
return -ENOMEM;
notify = 1;
- f->remote.remote_ip = ip;
- f->remote.remote_port = port;
- f->remote.remote_vni = vni;
- f->remote.remote_ifindex = ifindex;
- f->remote.remote_next = NULL;
f->state = state;
f->flags = ndm_flags;
f->updated = f->used = jiffies;
memcpy(f->eth_addr, mac, ETH_ALEN);
+ vxlan_fdb_append(f, ip, port, vni, ifindex);
+
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
vxlan_fdb_head(vxlan, mac));
@@ -462,16 +470,22 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
return 0;
}
+static void vxlan_rdst_free(struct rcu_head *head)
+{
+ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+ kfree(rd);
+}
+
static void vxlan_fdb_free(struct rcu_head *head)
{
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+ struct vxlan_rdst *rd;
- while (f->remote.remote_next) {
- struct vxlan_rdst *rd = f->remote.remote_next;
-
- f->remote.remote_next = rd->remote_next;
- kfree(rd);
+ hlist_for_each_safe(rd, &f->remotes, hlist) {
+ hlist_del_rcu(&rd->hlist);
+ call_rcu(&rd->rcu, vxlan_rdst_free);
}
+
kfree(f);
}
@@ -581,7 +595,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
struct vxlan_rdst *rd;
- for (rd = &f->remote; rd; rd = rd->remote_next) {
+ hlist_for_each_entry_rcu(rd, &f->remotes, hlist) {
if (idx < cb->args[0])
goto skip;
@@ -613,15 +627,17 @@ static void vxlan_snoop(struct net_device *dev,
f = vxlan_find_mac(vxlan, src_mac);
if (likely(f)) {
- if (likely(f->remote.remote_ip == src_ip))
+ struct vxlan_rdst *remote = remotes_head(f);
+
+ if (likely(remote->remote_ip == src_ip))
return;
if (net_ratelimit())
netdev_info(dev,
"%pM migrated from %pI4 to %pI4\n",
- src_mac, &f->remote.remote_ip, &src_ip);
+ src_mac, &remote->remote_ip, &src_ip);
- f->remote.remote_ip = src_ip;
+ remote->remote_ip = src_ip;
f->updated = jiffies;
} else {
/* learned new entry */
@@ -856,6 +872,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
if (n) {
struct vxlan_fdb *f;
struct sk_buff *reply;
+ struct vxlan_rdst *remote;
if (!(n->nud_state & NUD_CONNECTED)) {
neigh_release(n);
@@ -863,7 +880,8 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
}
f = vxlan_find_mac(vxlan, n->ha);
- if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
+ remote = remotes_head(f);
+ if (f && remote->remote_ip == htonl(INADDR_ANY)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
@@ -1181,12 +1199,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
!is_multicast_ether_addr(eth->h_dest))
vxlan_fdb_miss(vxlan, eth->h_dest);
} else
- rdst0 = &f->remote;
+ rdst0 = remotes_head(f);
rc = NETDEV_TX_OK;
/* if there are multiple destinations, send copies */
- for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
+ rdst = rdst0;
+ hlist_for_each_entry_continue_rcu(rdst, hlist) {
struct sk_buff *skb1;
skb1 = skb_clone(skb, GFP_ATOMIC);
--
1.8.1.5
--
Sincerely yours,
Mike.
next prev parent reply other threads:[~2013-06-02 10:29 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-29 10:00 [PATCH net-next v3 0/2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-29 10:00 ` [PATCH net-next v3 1/2] vxlan: introduce vxlan_rdst_append Mike Rapoport
2013-05-29 22:56 ` Stephen Hemminger
2013-05-30 8:42 ` Mike Rapoport
2013-05-29 10:00 ` [PATCH net-next v3 2/2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-30 11:09 ` Thomas Graf
2013-05-30 11:16 ` Mike Rapoport
2013-05-30 11:37 ` Thomas Graf
2013-05-31 16:17 ` Stephen Hemminger
2013-06-02 10:29 ` Mike Rapoport [this message]
2013-06-03 15:57 ` Stephen Hemminger
2013-06-03 19:47 ` Mike Rapoport
2013-06-03 18:26 ` [RFC] vxlan: convert remote list to list_rcu Stephen Hemminger
2013-06-03 20:18 ` David Stevens
2013-06-03 20:45 ` Stephen Hemminger
2013-06-03 21:46 ` David Stevens
2013-06-04 9:18 ` Mike Rapoport
2013-06-04 12:48 ` David Stevens
2013-06-04 17:20 ` Mike Rapoport
2013-06-04 19:02 ` David Stevens
2013-06-05 12:53 ` Mike Rapoport
2013-06-04 9:10 ` Mike Rapoport
2013-06-04 16:00 ` Stephen Hemminger
2013-06-04 16:29 ` David Stevens
2013-06-04 17:22 ` Mike Rapoport
2013-05-29 10:00 ` [PATCH iproute2] vxlan: allow specifying multiple default destinations Mike Rapoport
2013-05-29 10:13 ` Cong Wang
2013-05-29 10:52 ` Mike Rapoport
2013-05-29 22:56 ` Stephen Hemminger
2013-05-30 8:42 ` Mike Rapoport
2013-05-30 11:44 ` Thomas Graf
2013-05-30 12:46 ` Mike Rapoport
2013-05-30 15:57 ` Thomas Graf
2013-06-02 7:09 ` Mike Rapoport
2013-06-05 4:30 ` Stephen Hemminger
2013-06-05 12:58 ` Mike Rapoport
2013-05-30 17:07 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130602102942.GA21706@zed \
--to=mike.rapoport@ravellosystems.com \
--cc=netdev@vger.kernel.org \
--cc=stephen@networkplumber.org \
--cc=tgraf@suug.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.