* [PATCH v2 1/2] core: Factor out flow calculation from get_rps_cpu
@ 2010-08-02 14:33 Krishna Kumar
2010-08-02 14:33 ` [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver Krishna Kumar
0 siblings, 1 reply; 5+ messages in thread
From: Krishna Kumar @ 2010-08-02 14:33 UTC (permalink / raw)
To: davem, arnd; +Cc: bhutchings, netdev, therbert, Krishna Kumar, mst
From: Krishna Kumar <krkumar2@in.ibm.com>
Factor out flow calculation code from get_rps_cpu, since macvtap
driver can use the same code.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
net/core/dev.c | 94 +++++++++++++++++++++++++++++------------------
1 file changed, 58 insertions(+), 36 deletions(-)
diff -ruNp org/net/core/dev.c new/net/core/dev.c
--- org/net/core/dev.c 2010-08-02 10:06:59.000000000 +0530
+++ new/net/core/dev.c 2010-08-02 19:29:34.000000000 +0530
@@ -2263,51 +2263,24 @@ static inline void ____napi_schedule(str
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
/*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * skb_calculate_flow: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. On success, returns a hash number (> 0),
+ * otherwise -1.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
- struct rps_dev_flow **rflowp)
+int skb_calculate_flow(struct net_device *dev, struct sk_buff *skb)
{
+ int hash = skb->rxhash;
struct ipv6hdr *ip6;
struct iphdr *ip;
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
- struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
- int cpu = -1;
u8 ip_proto;
- u16 tcpu;
u32 addr1, addr2, ihl;
union {
u32 v32;
u16 v16[2];
} ports;
- if (skb_rx_queue_recorded(skb)) {
- u16 index = skb_get_rx_queue(skb);
- if (unlikely(index >= dev->num_rx_queues)) {
- WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
- "on queue %u, but number of RX queues is %u\n",
- dev->name, index, dev->num_rx_queues);
- goto done;
- }
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
-
- if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
- goto done;
-
- if (skb->rxhash)
+ if (hash)
goto got_hash; /* Skip hash computation on packet header */
switch (skb->protocol) {
@@ -2334,6 +2307,7 @@ static int get_rps_cpu(struct net_device
default:
goto done;
}
+
switch (ip_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
@@ -2356,11 +2330,59 @@ static int get_rps_cpu(struct net_device
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
- skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
- if (!skb->rxhash)
- skb->rxhash = 1;
+
+ hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ if (!hash)
+ hash = 1;
got_hash:
+ return hash;
+
+done:
+ return -1;
+}
+EXPORT_SYMBOL(skb_calculate_flow);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
+{
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
+ int cpu = -1;
+ u16 tcpu;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->num_rx_queues)) {
+ WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+ "on queue %u, but number of RX queues is %u\n",
+ dev->name, index, dev->num_rx_queues);
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+ goto done;
+
+ skb->rxhash = skb_calculate_flow(dev, skb);
+ if (skb->rxhash < 0)
+ goto done;
+
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver
2010-08-02 14:33 [PATCH v2 1/2] core: Factor out flow calculation from get_rps_cpu Krishna Kumar
@ 2010-08-02 14:33 ` Krishna Kumar
2010-08-02 15:52 ` Arnd Bergmann
0 siblings, 1 reply; 5+ messages in thread
From: Krishna Kumar @ 2010-08-02 14:33 UTC (permalink / raw)
To: davem, arnd; +Cc: bhutchings, netdev, mst, Krishna Kumar, therbert
From: Krishna Kumar <krkumar2@in.ibm.com>
Implement multiqueue facility for macvtap driver. The idea is that
a macvtap device can be opened multiple times and the fd's can be
used to register eg, as backend for vhost.
Please review.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/net/macvtap.c | 89 ++++++++++++++++++++++++++++-------
include/linux/if_macvlan.h | 11 +++-
include/linux/netdevice.h | 1
3 files changed, 83 insertions(+), 18 deletions(-)
diff -ruNp org/include/linux/netdevice.h new/include/linux/netdevice.h
--- org/include/linux/netdevice.h 2010-07-25 16:57:07.000000000 +0530
+++ new/include/linux/netdevice.h 2010-08-02 16:05:57.000000000 +0530
@@ -2253,6 +2253,7 @@ static inline const char *netdev_name(co
return dev->name;
}
+extern int skb_calculate_flow(struct net_device *dev, struct sk_buff *skb);
extern int netdev_printk(const char *level, const struct net_device *dev,
const char *format, ...)
__attribute__ ((format (printf, 3, 4)));
diff -ruNp org/include/linux/if_macvlan.h new/include/linux/if_macvlan.h
--- org/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000 +0530
+++ new/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000 +0530
@@ -40,6 +40,14 @@ struct macvlan_rx_stats {
unsigned long rx_errors;
};
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*
+ * Maximum times a macvtap device can be opened. This can be used to
+ * configure the number of receive queue, e.g. for multiqueue virtio.
+ */
+#define MAX_MACVTAP_QUEUES MIN(16, NR_CPUS)
+
struct macvlan_dev {
struct net_device *dev;
struct list_head list;
@@ -50,7 +58,8 @@ struct macvlan_dev {
enum macvlan_mode mode;
int (*receive)(struct sk_buff *skb);
int (*forward)(struct net_device *dev, struct sk_buff *skb);
- struct macvtap_queue *tap;
+ struct macvtap_queue *taps[MAX_MACVTAP_QUEUES];
+ int numvtaps;
};
static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff -ruNp org/drivers/net/macvtap.c new/drivers/net/macvtap.c
--- org/drivers/net/macvtap.c 2010-07-28 15:10:10.000000000 +0530
+++ new/drivers/net/macvtap.c 2010-08-02 17:48:38.000000000 +0530
@@ -84,26 +84,45 @@ static const struct proto_ops macvtap_so
static DEFINE_SPINLOCK(macvtap_lock);
/*
- * Choose the next free queue, for now there is only one
+ * get_slot: return a [unused/occupied] slot in vlan->taps[]:
+ * - if 'q' is NULL, return the first empty slot;
+ * - otherwise, return the slot this pointer occupies.
*/
+static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q)
+{
+ int i;
+
+ for (i = 0; i < MAX_MACVTAP_QUEUES; i++) {
+ if (rcu_dereference(vlan->taps[i]) == q)
+ return i;
+ }
+
+ /* Should never happen */
+ BUG_ON(1);
+}
+
static int macvtap_set_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ int index;
int err = -EBUSY;
spin_lock(&macvtap_lock);
- if (rcu_dereference(vlan->tap))
+ if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
goto out;
err = 0;
+ index = get_slot(vlan, NULL);
rcu_assign_pointer(q->vlan, vlan);
- rcu_assign_pointer(vlan->tap, q);
+ rcu_assign_pointer(vlan->taps[index], q);
sock_hold(&q->sk);
q->file = file;
file->private_data = q;
+ vlan->numvtaps++;
+
out:
spin_unlock(&macvtap_lock);
return err;
@@ -124,9 +143,12 @@ static void macvtap_put_queue(struct mac
spin_lock(&macvtap_lock);
vlan = rcu_dereference(q->vlan);
if (vlan) {
- rcu_assign_pointer(vlan->tap, NULL);
+ int index = get_slot(vlan, q);
+
+ rcu_assign_pointer(vlan->taps[index], NULL);
rcu_assign_pointer(q->vlan, NULL);
sock_put(&q->sk);
+ --vlan->numvtaps;
}
spin_unlock(&macvtap_lock);
@@ -136,39 +158,72 @@ static void macvtap_put_queue(struct mac
}
/*
- * Since we only support one queue, just dereference the pointer.
+ * Select a queue based on the rxq of the device on which this packet
+ * arrived. If the incoming device is not mq, calculate a flow hash to
+ * select a queue. vlan->numvtaps is cached in case it reduces during
+ * the execution of this function.
*/
static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
struct sk_buff *skb)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ struct macvtap_queue *tap = NULL;
+ int numvtaps = vlan->numvtaps;
+ u16 rxq;
+
+ if (!numvtaps)
+ goto out;
+
+ if (skb_rx_queue_recorded(skb)) {
+ rxq = skb_get_rx_queue(skb);
+
+ while (unlikely(rxq >= numvtaps))
+ rxq -= numvtaps;
- return rcu_dereference(vlan->tap);
+ tap = rcu_dereference(vlan->taps[rxq]);
+ if (tap)
+ goto out;
+ }
+
+ rxq = skb_calculate_flow(dev, skb);
+ if (rxq < 0)
+ rxq = smp_processor_id();
+
+ tap = rcu_dereference(vlan->taps[rxq & (numvtaps - 1)]);
+
+out:
+ return tap;
}
/*
* The net_device is going away, give up the reference
- * that it holds on the queue (all the queues one day)
- * and safely set the pointer from the queues to NULL.
+ * that it holds on all queues and safely set the pointer
+ * from the queues to NULL.
*/
static void macvtap_del_queues(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- struct macvtap_queue *q;
+ struct macvtap_queue *q, *qlist[MAX_MACVTAP_QUEUES];
+ int i, j = 0;
+ /* macvtap_put_queue can free some slots, so go through all slots */
spin_lock(&macvtap_lock);
- q = rcu_dereference(vlan->tap);
- if (!q) {
- spin_unlock(&macvtap_lock);
- return;
+ for (i = 0; i < MAX_MACVTAP_QUEUES && vlan->numvtaps; i++) {
+ q = rcu_dereference(vlan->taps[i]);
+ if (q) {
+ qlist[j++] = q;
+ rcu_assign_pointer(vlan->taps[i], NULL);
+ rcu_assign_pointer(q->vlan, NULL);
+ vlan->numvtaps--;
+ }
}
-
- rcu_assign_pointer(vlan->tap, NULL);
- rcu_assign_pointer(q->vlan, NULL);
+ BUG_ON(vlan->numvtaps != 0);
spin_unlock(&macvtap_lock);
synchronize_rcu();
- sock_put(&q->sk);
+
+ for (--j; j >= 0; j--)
+ sock_put(&qlist[j]->sk);
}
/*
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver
2010-08-02 14:33 ` [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver Krishna Kumar
@ 2010-08-02 15:52 ` Arnd Bergmann
2010-08-02 16:28 ` Krishna Kumar2
0 siblings, 1 reply; 5+ messages in thread
From: Arnd Bergmann @ 2010-08-02 15:52 UTC (permalink / raw)
To: Krishna Kumar; +Cc: davem, bhutchings, netdev, mst, therbert
On Monday 02 August 2010, Krishna Kumar wrote:
> Implement multiqueue facility for macvtap driver. The idea is that
> a macvtap device can be opened multiple times and the fd's can be
> used to register eg, as backend for vhost.
>
> Please review.
Only two very minor points from my side:
> diff -ruNp org/include/linux/netdevice.h new/include/linux/netdevice.h
> --- org/include/linux/netdevice.h 2010-07-25 16:57:07.000000000 +0530
> +++ new/include/linux/netdevice.h 2010-08-02 16:05:57.000000000 +0530
> @@ -2253,6 +2253,7 @@ static inline const char *netdev_name(co
> return dev->name;
> }
>
> +extern int skb_calculate_flow(struct net_device *dev, struct sk_buff *skb);
> extern int netdev_printk(const char *level, const struct net_device *dev,
> const char *format, ...)
> __attribute__ ((format (printf, 3, 4)));
This logically belongs into the first patch.
> diff -ruNp org/include/linux/if_macvlan.h new/include/linux/if_macvlan.h
> --- org/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000 +0530
> +++ new/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000 +0530
> @@ -40,6 +40,14 @@ struct macvlan_rx_stats {
> unsigned long rx_errors;
> };
>
> +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
> +
> +/*
> + * Maximum times a macvtap device can be opened. This can be used to
> + * configure the number of receive queue, e.g. for multiqueue virtio.
> + */
> +#define MAX_MACVTAP_QUEUES MIN(16, NR_CPUS)
> +
Please use the existing min() or min_t() macro instead of providing your own.
Arnd
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver
2010-08-02 15:52 ` Arnd Bergmann
@ 2010-08-02 16:28 ` Krishna Kumar2
2010-08-02 16:34 ` Arnd Bergmann
0 siblings, 1 reply; 5+ messages in thread
From: Krishna Kumar2 @ 2010-08-02 16:28 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: bhutchings, davem, mst, netdev, therbert
Hi Arnd,
Thanks for your comments. The declaration was in the 2nd patch
since the function was not used outside net/core/dev.c after the
1st patch is applied, but now I think you are right.
Regarding min/min_t, I had tried both and got this error:
"include/linux/if_macvlan.h:57: error: braced-group within expression
allowed only inside a function"
Please let me know if there is any alternative (curly braces cannot be
used outside of functions). Otherwise one change required is to add:
#ifndef MIN
#endif
I will wait for a few hours and resubmit the patches.
thanks,
- KK
Arnd Bergmann <arnd@arndb.de> wrote on 08/02/2010 09:22:28 PM:
> Arnd Bergmann <arnd@arndb.de>
> 08/02/2010 09:22 PM
>
> To
>
> Krishna Kumar2/India/IBM@IBMIN
>
> cc
>
> davem@davemloft.net, bhutchings@solarflare.com,
> netdev@vger.kernel.org, mst@redhat.com, therbert@google.com
>
> Subject
>
> Re: [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver
>
> On Monday 02 August 2010, Krishna Kumar wrote:
> > Implement multiqueue facility for macvtap driver. The idea is that
> > a macvtap device can be opened multiple times and the fd's can be
> > used to register eg, as backend for vhost.
> >
> > Please review.
>
> Only two very minor points from my side:
>
> > diff -ruNp org/include/linux/netdevice.h new/include/linux/netdevice.h
> > --- org/include/linux/netdevice.h 2010-07-25 16:57:07.000000000 +0530
> > +++ new/include/linux/netdevice.h 2010-08-02 16:05:57.000000000 +0530
> > @@ -2253,6 +2253,7 @@ static inline const char *netdev_name(co
> > return dev->name;
> > }
> >
> > +extern int skb_calculate_flow(struct net_device *dev, struct sk_buff
*skb);
> > extern int netdev_printk(const char *level, const struct net_device
*dev,
> > const char *format, ...)
> > __attribute__ ((format (printf, 3, 4)));
>
> This logically belongs into the first patch.
>
> > diff -ruNp org/include/linux/if_macvlan.h
new/include/linux/if_macvlan.h
> > --- org/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000
+0530
> > +++ new/include/linux/if_macvlan.h 2010-08-02 15:32:33.000000000
+0530
> > @@ -40,6 +40,14 @@ struct macvlan_rx_stats {
> > unsigned long rx_errors;
> > };
> >
> > +#define MIN(x, y) (((x) < (y)) ? (x) : (y))
> > +
> > +/*
> > + * Maximum times a macvtap device can be opened. This can be used to
> > + * configure the number of receive queue, e.g. for multiqueue virtio.
> > + */
> > +#define MAX_MACVTAP_QUEUES MIN(16, NR_CPUS)
> > +
>
> Please use the existing min() or min_t() macro instead of providing your
own.
>
> Arnd
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver
2010-08-02 16:28 ` Krishna Kumar2
@ 2010-08-02 16:34 ` Arnd Bergmann
0 siblings, 0 replies; 5+ messages in thread
From: Arnd Bergmann @ 2010-08-02 16:34 UTC (permalink / raw)
To: Krishna Kumar2; +Cc: bhutchings, davem, mst, netdev, therbert
On Monday 02 August 2010, Krishna Kumar2 wrote:
> "include/linux/if_macvlan.h:57: error: braced-group within expression
> allowed only inside a function"
> Please let me know if there is any alternative (curly braces cannot be
> used outside of functions). Otherwise one change required is to add:
>
> #ifndef MIN
> #endif
>
> I will wait for a few hours and resubmit the patches.
Maybe just open-code the minimum computation:
#define MAX_MACVTAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
ARnd
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2010-08-02 16:34 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-02 14:33 [PATCH v2 1/2] core: Factor out flow calculation from get_rps_cpu Krishna Kumar
2010-08-02 14:33 ` [PATCH v2 2/2] macvtap: Implement multiqueue macvtap driver Krishna Kumar
2010-08-02 15:52 ` Arnd Bergmann
2010-08-02 16:28 ` Krishna Kumar2
2010-08-02 16:34 ` Arnd Bergmann
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).