From: Krishna Kumar <krkumar2@in.ibm.com>
To: davem@davemloft.net, arnd@arndb.de
Cc: bhutchings@solarflare.com, netdev@vger.kernel.org,
mst@redhat.com, Krishna Kumar <krkumar2@in.ibm.com>,
therbert@google.com
Subject: [PATCH v3 2/2] macvtap: Implement multiqueue macvtap driver
Date: Tue, 03 Aug 2010 08:33:03 +0530 [thread overview]
Message-ID: <20100803030303.8486.67862.sendpatchset@krkumar2.in.ibm.com> (raw)
In-Reply-To: <20100803030256.8486.82622.sendpatchset@krkumar2.in.ibm.com>
From: Krishna Kumar <krkumar2@in.ibm.com>
Implement multiqueue facility for macvtap driver. The idea is that
a macvtap device can be opened multiple times and the fd's can be
used to register eg, as backend for vhost.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/net/macvtap.c | 89 ++++++++++++++++++++++++++++-------
include/linux/if_macvlan.h | 9 +++
2 files changed, 80 insertions(+), 18 deletions(-)
diff -ruNp org/include/linux/if_macvlan.h new/include/linux/if_macvlan.h
--- org/include/linux/if_macvlan.h 2010-08-03 08:19:57.000000000 +0530
+++ new/include/linux/if_macvlan.h 2010-08-03 08:20:39.000000000 +0530
@@ -40,6 +40,12 @@ struct macvlan_rx_stats {
unsigned long rx_errors;
};
+/*
+ * Maximum times a macvtap device can be opened. This can be used to
+ * configure the number of receive queue, e.g. for multiqueue virtio.
+ */
+#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
+
struct macvlan_dev {
struct net_device *dev;
struct list_head list;
@@ -50,7 +56,8 @@ struct macvlan_dev {
enum macvlan_mode mode;
int (*receive)(struct sk_buff *skb);
int (*forward)(struct net_device *dev, struct sk_buff *skb);
- struct macvtap_queue *tap;
+ struct macvtap_queue *taps[MAX_MACVTAP_QUEUES];
+ int numvtaps;
};
static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff -ruNp org/drivers/net/macvtap.c new/drivers/net/macvtap.c
--- org/drivers/net/macvtap.c 2010-08-03 08:19:57.000000000 +0530
+++ new/drivers/net/macvtap.c 2010-08-03 08:19:57.000000000 +0530
@@ -84,26 +84,45 @@ static const struct proto_ops macvtap_so
static DEFINE_SPINLOCK(macvtap_lock);
/*
- * Choose the next free queue, for now there is only one
+ * get_slot: return a [unused/occupied] slot in vlan->taps[]:
+ * - if 'q' is NULL, return the first empty slot;
+ * - otherwise, return the slot this pointer occupies.
*/
+static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q)
+{
+ int i;
+
+ for (i = 0; i < MAX_MACVTAP_QUEUES; i++) {
+ if (rcu_dereference(vlan->taps[i]) == q)
+ return i;
+ }
+
+ /* Should never happen */
+ BUG_ON(1);
+}
+
static int macvtap_set_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ int index;
int err = -EBUSY;
spin_lock(&macvtap_lock);
- if (rcu_dereference(vlan->tap))
+ if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
goto out;
err = 0;
+ index = get_slot(vlan, NULL);
rcu_assign_pointer(q->vlan, vlan);
- rcu_assign_pointer(vlan->tap, q);
+ rcu_assign_pointer(vlan->taps[index], q);
sock_hold(&q->sk);
q->file = file;
file->private_data = q;
+ vlan->numvtaps++;
+
out:
spin_unlock(&macvtap_lock);
return err;
@@ -124,9 +143,12 @@ static void macvtap_put_queue(struct mac
spin_lock(&macvtap_lock);
vlan = rcu_dereference(q->vlan);
if (vlan) {
- rcu_assign_pointer(vlan->tap, NULL);
+ int index = get_slot(vlan, q);
+
+ rcu_assign_pointer(vlan->taps[index], NULL);
rcu_assign_pointer(q->vlan, NULL);
sock_put(&q->sk);
+ --vlan->numvtaps;
}
spin_unlock(&macvtap_lock);
@@ -136,39 +158,72 @@ static void macvtap_put_queue(struct mac
}
/*
- * Since we only support one queue, just dereference the pointer.
+ * Select a queue based on the rxq of the device on which this packet
+ * arrived. If the incoming device is not mq, calculate a flow hash to
+ * select a queue. vlan->numvtaps is cached in case it reduces during
+ * the execution of this function.
*/
static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
struct sk_buff *skb)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ struct macvtap_queue *tap = NULL;
+ int numvtaps = vlan->numvtaps;
+ u16 rxq;
+
+ if (!numvtaps)
+ goto out;
+
+ if (likely(skb_rx_queue_recorded(skb))) {
+ rxq = skb_get_rx_queue(skb);
+
+ while (unlikely(rxq >= numvtaps))
+ rxq -= numvtaps;
- return rcu_dereference(vlan->tap);
+ tap = rcu_dereference(vlan->taps[rxq]);
+ if (tap)
+ goto out;
+ }
+
+ rxq = skb_calculate_flow(dev, skb);
+ if (rxq < 0)
+ rxq = smp_processor_id();
+
+ tap = rcu_dereference(vlan->taps[rxq & (numvtaps - 1)]);
+
+out:
+ return tap;
}
/*
* The net_device is going away, give up the reference
- * that it holds on the queue (all the queues one day)
- * and safely set the pointer from the queues to NULL.
+ * that it holds on all queues and safely set the pointer
+ * from the queues to NULL.
*/
static void macvtap_del_queues(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- struct macvtap_queue *q;
+ struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES];
+ int i, j = 0;
+ /* macvtap_put_queue can free some slots, so go through all slots */
spin_lock(&macvtap_lock);
- q = rcu_dereference(vlan->tap);
- if (!q) {
- spin_unlock(&macvtap_lock);
- return;
+ for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) {
+ q = rcu_dereference(vlan->taps[i]);
+ if (q) {
+ qlist[j++] = q;
+ rcu_assign_pointer(vlan->taps[i], NULL);
+ rcu_assign_pointer(q->vlan, NULL);
+ vlan->numvtaps--;
+ }
}
-
- rcu_assign_pointer(vlan->tap, NULL);
- rcu_assign_pointer(q->vlan, NULL);
+ BUG_ON(vlan->numvtaps != 0);
spin_unlock(&macvtap_lock);
synchronize_rcu();
- sock_put(&q->sk);
+
+ for (--j; j >= 0; j--)
+ sock_put(&qlist[j]->sk);
}
/*
next prev parent reply other threads:[~2010-08-03 3:03 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-03 3:02 [PATCH v3 1/2] core: Factor out flow calculation from get_rps_cpu Krishna Kumar
2010-08-03 3:03 ` Krishna Kumar [this message]
2010-08-03 4:05 ` Changli Gao
2010-08-03 5:57 ` Krishna Kumar2
2010-08-03 6:11 ` Changli Gao
2010-08-03 7:18 ` Changli Gao
2010-08-03 8:32 ` Arnd Bergmann
2010-08-03 22:36 ` Sridhar Samudrala
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100803030303.8486.67862.sendpatchset@krkumar2.in.ibm.com \
--to=krkumar2@in.ibm.com \
--cc=arnd@arndb.de \
--cc=bhutchings@solarflare.com \
--cc=davem@davemloft.net \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=therbert@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).