netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
@ 2010-04-22  9:09 Changli Gao
  2010-04-22  9:43 ` David Miller
  2010-04-22 11:37 ` Eric Dumazet
  0 siblings, 2 replies; 9+ messages in thread
From: Changli Gao @ 2010-04-22  9:09 UTC (permalink / raw)
  To: David S. Miller; +Cc: jamal, Tom Herbert, Eric Dumazet, netdev, Changli Gao

batch skb dequeueing from softnet input_pkt_queue

batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
contention when RPS is enabled. input_pkt_queue is reimplemented as a single
linked list (FIFO) to keep enqueueing and dequeueing as fast as posible, and
input_pkt_queue_lock is moved into RPS section to reduce 4 bytes on 32bits
machine.

Note: input_pkt_queue_len doesn't been decreased until process_backlog()
returns.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/linux/netdevice.h |   12 ++++-
 net/core/dev.c            |   99 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 82 insertions(+), 29 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f..58abdd5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1387,6 +1387,7 @@ struct softnet_data {
 	struct Qdisc		*output_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
+	struct sk_buff		*process_queue;
 
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
@@ -1396,15 +1397,20 @@ struct softnet_data {
 	struct softnet_data	*rps_ipi_next;
 	unsigned int		cpu;
 	unsigned int		input_queue_head;
+	spinlock_t		input_pkt_queue_lock;
 #endif
-	struct sk_buff_head	input_pkt_queue;
+	unsigned int		input_pkt_queue_len;
+	struct sk_buff		*input_pkt_queue_head;
+	struct sk_buff		**input_pkt_queue_tailp;
+
 	struct napi_struct	backlog;
 };
 
-static inline void input_queue_head_incr(struct softnet_data *sd)
+static inline void input_queue_head_add(struct softnet_data *sd,
+					unsigned int len)
 {
 #ifdef CONFIG_RPS
-	sd->input_queue_head++;
+	sd->input_queue_head += len;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e904c47..f37c223 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -211,14 +211,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_lock(&sd->input_pkt_queue.lock);
+	spin_lock(&sd->input_pkt_queue_lock);
 #endif
 }
 
 static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_unlock(&sd->input_pkt_queue.lock);
+	spin_unlock(&sd->input_pkt_queue_lock);
 #endif
 }
 
@@ -2409,12 +2409,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
-	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (sd->input_pkt_queue.qlen) {
+	if (sd->input_pkt_queue_len <= netdev_max_backlog) {
+		if (sd->input_pkt_queue_len) {
 enqueue:
-			__skb_queue_tail(&sd->input_pkt_queue, skb);
+			skb->next = NULL;
+			*sd->input_pkt_queue_tailp = skb;
+			sd->input_pkt_queue_tailp = &skb->next;
+			sd->input_pkt_queue_len++;
 #ifdef CONFIG_RPS
-			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head + sd->input_pkt_queue_len;
 #endif
 			rps_unlock(sd);
 			local_irq_restore(flags);
@@ -2927,19 +2930,37 @@ EXPORT_SYMBOL(netif_receive_skb);
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
  */
-static void flush_backlog(void *arg)
+
+static struct sk_buff **__flush_backlog(struct softnet_data *sd,
+					struct sk_buff **pskb,
+					struct net_device *dev)
 {
-	struct net_device *dev = arg;
-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
-	struct sk_buff *skb, *tmp;
+	struct sk_buff *skb;
 
-	rps_lock(sd);
-	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
+	while (*pskb) {
+		skb = *pskb;
 		if (skb->dev == dev) {
-			__skb_unlink(skb, &sd->input_pkt_queue);
+			*pskb = skb->next;
 			kfree_skb(skb);
-			input_queue_head_incr(sd);
+			input_queue_head_add(sd, 1);
+			sd->input_pkt_queue_len--;
+		} else {
+			pskb = &skb->next;
 		}
+	}
+
+	return pskb;
+}
+
+static void flush_backlog(void *arg)
+{
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct sk_buff **tailp;
+
+	rps_lock(sd);
+	tailp = __flush_backlog(sd, &sd->input_pkt_queue_head, arg);
+	sd->input_pkt_queue_tailp = tailp;
+	__flush_backlog(sd, &sd->process_queue, arg);
 	rps_unlock(sd);
 }
 
@@ -3249,24 +3270,39 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
 	napi->weight = weight_p;
+	local_irq_disable();
 	do {
 		struct sk_buff *skb;
 
-		local_irq_disable();
+		while (sd->process_queue) {
+			skb = sd->process_queue;
+			sd->process_queue = skb->next;
+			local_irq_enable();
+			__netif_receive_skb(skb);
+			if (++work >= quota) {
+				local_irq_disable();
+				rps_lock(sd);
+				goto out;
+			}
+			local_irq_disable();
+		}
+
 		rps_lock(sd);
-		skb = __skb_dequeue(&sd->input_pkt_queue);
-		if (!skb) {
+		if (sd->input_pkt_queue_head == NULL) {
 			__napi_complete(napi);
-			rps_unlock(sd);
-			local_irq_enable();
 			break;
 		}
-		input_queue_head_incr(sd);
+		sd->process_queue = sd->input_pkt_queue_head;
+		sd->input_pkt_queue_head = NULL;
+		sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head;
 		rps_unlock(sd);
-		local_irq_enable();
+	} while (1);
 
-		__netif_receive_skb(skb);
-	} while (++work < quota);
+out:
+	sd->input_pkt_queue_len -= work;
+	input_queue_head_add(sd, work);
+	rps_unlock(sd);
+	local_irq_enable();
 
 	return work;
 }
@@ -5621,10 +5657,17 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+	while ((skb = oldsd->input_pkt_queue_head)) {
+		oldsd->input_pkt_queue_head = skb->next;
+		netif_rx(skb);
+	}
+	while ((skb = oldsd->process_queue)) {
+		oldsd->process_queue = skb->next;
 		netif_rx(skb);
-		input_queue_head_incr(oldsd);
 	}
+	oldsd->input_pkt_queue_tailp = &oldsd->input_pkt_queue_head;
+	input_queue_head_add(oldsd, oldsd->input_pkt_queue_len);
+	oldsd->input_pkt_queue_len = 0;
 
 	return NOTIFY_OK;
 }
@@ -5842,11 +5885,15 @@ static int __init net_dev_init(void)
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		skb_queue_head_init(&sd->input_pkt_queue);
+		sd->input_pkt_queue_head = NULL;
+		sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head;
+		sd->input_pkt_queue_len = 0;
+		sd->process_queue = NULL;
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
 
 #ifdef CONFIG_RPS
+		spin_lock_init(&sd->input_pkt_queue_lock);
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
 		sd->csd.flags = 0;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22  9:09 [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue Changli Gao
@ 2010-04-22  9:43 ` David Miller
  2010-04-22 12:27   ` Changli Gao
  2010-04-22 11:37 ` Eric Dumazet
  1 sibling, 1 reply; 9+ messages in thread
From: David Miller @ 2010-04-22  9:43 UTC (permalink / raw)
  To: xiaosuo; +Cc: hadi, therbert, eric.dumazet, netdev

From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 22 Apr 2010 17:09:17 +0800

> +	unsigned int		input_pkt_queue_len;
> +	struct sk_buff		*input_pkt_queue_head;
> +	struct sk_buff		**input_pkt_queue_tailp;
> +

Please do not ignore Stephen Hemminger's feedback.

We already have enough odd SKB queue implementations, we
do not need yet another one in a core location.  This makes
it harder and harder to eventually convert sk_buff to use
"struct list_head".

Instead, use "struct sk_buff_head" and the lockless accessors
(__skb_insert, etc.) and initializer (__skb_queue_head_init).

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22  9:09 [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue Changli Gao
  2010-04-22  9:43 ` David Miller
@ 2010-04-22 11:37 ` Eric Dumazet
  2010-04-22 12:17   ` jamal
  2010-04-22 13:06   ` Changli Gao
  1 sibling, 2 replies; 9+ messages in thread
From: Eric Dumazet @ 2010-04-22 11:37 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, jamal, Tom Herbert, netdev

Le jeudi 22 avril 2010 à 17:09 +0800, Changli Gao a écrit :
> batch skb dequeueing from softnet input_pkt_queue
> 
> batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
> contention when RPS is enabled. input_pkt_queue is reimplemented as a single
> linked list (FIFO) to keep enqueueing and dequeueing as fast as posible, and
> input_pkt_queue_lock is moved into RPS section to reduce 4 bytes on 32bits
> machine.
> 
> Note: input_pkt_queue_len doesn't been decreased until process_backlog()
> returns.
> 
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
>  include/linux/netdevice.h |   12 ++++-
>  net/core/dev.c            |   99 +++++++++++++++++++++++++++++++++-------------
>  2 files changed, 82 insertions(+), 29 deletions(-)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 3c5ed5f..58abdd5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1387,6 +1387,7 @@ struct softnet_data {
>  	struct Qdisc		*output_queue;
>  	struct list_head	poll_list;
>  	struct sk_buff		*completion_queue;
> +	struct sk_buff		*process_queue;
>  
>  #ifdef CONFIG_RPS
>  	struct softnet_data	*rps_ipi_list;
> @@ -1396,15 +1397,20 @@ struct softnet_data {
>  	struct softnet_data	*rps_ipi_next;
>  	unsigned int		cpu;
>  	unsigned int		input_queue_head;
> +	spinlock_t		input_pkt_queue_lock;
>  #endif
> -	struct sk_buff_head	input_pkt_queue;
> +	unsigned int		input_pkt_queue_len;
> +	struct sk_buff		*input_pkt_queue_head;
> +	struct sk_buff		**input_pkt_queue_tailp;
> +
>  	struct napi_struct	backlog;
>  };
>  
> -static inline void input_queue_head_incr(struct softnet_data *sd)
> +static inline void input_queue_head_add(struct softnet_data *sd,
> +					unsigned int len)
>  {
>  #ifdef CONFIG_RPS
> -	sd->input_queue_head++;
> +	sd->input_queue_head += len;
>  #endif
>  }
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index e904c47..f37c223 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -211,14 +211,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
>  static inline void rps_lock(struct softnet_data *sd)
>  {
>  #ifdef CONFIG_RPS
> -	spin_lock(&sd->input_pkt_queue.lock);
> +	spin_lock(&sd->input_pkt_queue_lock);
>  #endif
>  }
>  
>  static inline void rps_unlock(struct softnet_data *sd)
>  {
>  #ifdef CONFIG_RPS
> -	spin_unlock(&sd->input_pkt_queue.lock);
> +	spin_unlock(&sd->input_pkt_queue_lock);
>  #endif
>  }
>  
> @@ -2409,12 +2409,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
>  	__get_cpu_var(netdev_rx_stat).total++;
>  
>  	rps_lock(sd);
> -	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
> -		if (sd->input_pkt_queue.qlen) {
> +	if (sd->input_pkt_queue_len <= netdev_max_backlog) {
> +		if (sd->input_pkt_queue_len) {
>  enqueue:
> -			__skb_queue_tail(&sd->input_pkt_queue, skb);
> +			skb->next = NULL;
> +			*sd->input_pkt_queue_tailp = skb;
> +			sd->input_pkt_queue_tailp = &skb->next;
> +			sd->input_pkt_queue_len++;
>  #ifdef CONFIG_RPS
> -			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
> +			*qtail = sd->input_queue_head + sd->input_pkt_queue_len;
>  #endif
>  			rps_unlock(sd);
>  			local_irq_restore(flags);
> @@ -2927,19 +2930,37 @@ EXPORT_SYMBOL(netif_receive_skb);
>  /* Network device is going away, flush any packets still pending
>   * Called with irqs disabled.
>   */
> -static void flush_backlog(void *arg)
> +
> +static struct sk_buff **__flush_backlog(struct softnet_data *sd,
> +					struct sk_buff **pskb,
> +					struct net_device *dev)
>  {
> -	struct net_device *dev = arg;
> -	struct softnet_data *sd = &__get_cpu_var(softnet_data);
> -	struct sk_buff *skb, *tmp;
> +	struct sk_buff *skb;
>  
> -	rps_lock(sd);
> -	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
> +	while (*pskb) {
> +		skb = *pskb;
>  		if (skb->dev == dev) {
> -			__skb_unlink(skb, &sd->input_pkt_queue);
> +			*pskb = skb->next;
>  			kfree_skb(skb);
> -			input_queue_head_incr(sd);
> +			input_queue_head_add(sd, 1);
> +			sd->input_pkt_queue_len--;
> +		} else {
> +			pskb = &skb->next;
>  		}
> +	}
> +
> +	return pskb;
> +}
> +
> +static void flush_backlog(void *arg)
> +{
> +	struct softnet_data *sd = &__get_cpu_var(softnet_data);
> +	struct sk_buff **tailp;
> +
> +	rps_lock(sd);
> +	tailp = __flush_backlog(sd, &sd->input_pkt_queue_head, arg);
> +	sd->input_pkt_queue_tailp = tailp;
> +	__flush_backlog(sd, &sd->process_queue, arg);
>  	rps_unlock(sd);
>  }
>  
> @@ -3249,24 +3270,39 @@ static int process_backlog(struct napi_struct *napi, int quota)
>  	struct softnet_data *sd = &__get_cpu_var(softnet_data);
>  
>  	napi->weight = weight_p;
> +	local_irq_disable();
>  	do {
>  		struct sk_buff *skb;
>  
> -		local_irq_disable();
> +		while (sd->process_queue) {
> +			skb = sd->process_queue;
> +			sd->process_queue = skb->next;
> +			local_irq_enable();
> +			__netif_receive_skb(skb);
> +			if (++work >= quota) {
> +				local_irq_disable();
> +				rps_lock(sd);
> +				goto out;
> +			}
> +			local_irq_disable();
> +		}
> +
>  		rps_lock(sd);
> -		skb = __skb_dequeue(&sd->input_pkt_queue);
> -		if (!skb) {
> +		if (sd->input_pkt_queue_head == NULL) {
>  			__napi_complete(napi);
> -			rps_unlock(sd);
> -			local_irq_enable();
>  			break;
>  		}
> -		input_queue_head_incr(sd);
> +		sd->process_queue = sd->input_pkt_queue_head;
> +		sd->input_pkt_queue_head = NULL;
> +		sd->input_pkt_queue_tailp = &sd->input_pkt_queue_head;
>  		rps_unlock(sd);
> -		local_irq_enable();
> +	} while (1);
>  
> -		__netif_receive_skb(skb);
> -	} while (++work < quota);
> +out:
> +	sd->input_pkt_queue_len -= work;
> +	input_queue_head_add(sd, work);
> +	rps_unlock(sd);
> +	local_irq_enable();
>  



Please reorder things better.

Most likely this function is called for one packet.

In your version you take twice the rps_lock()/rps_unlock() path, so
it'll be slower.

Once to 'transfert' one list to process list

Once to be able to do the 'label out:' post processing.




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22 11:37 ` Eric Dumazet
@ 2010-04-22 12:17   ` jamal
  2010-04-22 13:06   ` Changli Gao
  1 sibling, 0 replies; 9+ messages in thread
From: jamal @ 2010-04-22 12:17 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Changli Gao, David S. Miller, Tom Herbert, netdev

On Thu, 2010-04-22 at 13:37 +0200, Eric Dumazet wrote:

> Please reorder things better.
> 
> Most likely this function is called for one packet.
> 
> In your version you take twice the rps_lock()/rps_unlock() path, so
> it'll be slower.
> 
> Once to 'transfert' one list to process list
> 
> Once to be able to do the 'label out:' post processing.
> 

Ok, once it is settled the right changes are i will test.

cheers,
jamal


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22  9:43 ` David Miller
@ 2010-04-22 12:27   ` Changli Gao
  2010-04-22 14:24     ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Changli Gao @ 2010-04-22 12:27 UTC (permalink / raw)
  To: David Miller, Eric Dumazet; +Cc: hadi, therbert, netdev

On Thu, Apr 22, 2010 at 5:43 PM, David Miller <davem@davemloft.net> wrote:
> From: Changli Gao <xiaosuo@gmail.com>
> Date: Thu, 22 Apr 2010 17:09:17 +0800
>
>> +     unsigned int            input_pkt_queue_len;
>> +     struct sk_buff          *input_pkt_queue_head;
>> +     struct sk_buff          **input_pkt_queue_tailp;
>> +
>
> Please do not ignore Stephen Hemminger's feedback.
>
> We already have enough odd SKB queue implementations, we
> do not need yet another one in a core location.  This makes
> it harder and harder to eventually convert sk_buff to use
> "struct list_head".
>
> Instead, use "struct sk_buff_head" and the lockless accessors
> (__skb_insert, etc.) and initializer (__skb_queue_head_init).
>

If I want to keep softnet_data small, I have to access the internal
fields of sk_buff_head, and modify them in a hack way. It doesn't
sound good. If not, softnet_data will become:

struct softnet_data {
        struct Qdisc            *output_queue;
        struct list_head        poll_list;
        struct sk_buff          *completion_queue;
        struct sk_buff_head     process_queue;

#ifdef CONFIG_RPS
        struct softnet_data     *rps_ipi_list;

        /* Elements below can be accessed between CPUs for RPS */
        struct call_single_data csd ____cacheline_aligned_in_smp;
        struct softnet_data     *rps_ipi_next;
        unsigned int            cpu;
        unsigned int            input_queue_head;
#endif
        unsigned int            input_pkt_queue_len;
        struct sk_buff_head     input_pkt_queue;
        struct napi_struct      backlog;
};

Eric, do you think it is too fat?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22 11:37 ` Eric Dumazet
  2010-04-22 12:17   ` jamal
@ 2010-04-22 13:06   ` Changli Gao
  2010-04-22 14:33     ` Eric Dumazet
  1 sibling, 1 reply; 9+ messages in thread
From: Changli Gao @ 2010-04-22 13:06 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, jamal, Tom Herbert, netdev

On Thu, Apr 22, 2010 at 7:37 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Please reorder things better.
>
> Most likely this function is called for one packet.
>
> In your version you take twice the rps_lock()/rps_unlock() path, so
> it'll be slower.
>
> Once to 'transfert' one list to process list
>
> Once to be able to do the 'label out:' post processing.
>

It doesn't make any difference. We have to hold rps_lock to update
input_pkt_queue_len, if we don't use another variable to record the
length of the process queue, or atomic variable.

I think it is better that using another variable to record the length
of the process queue, and updating it before process_backlog()
returns. For one packet, there is only one locking/unlocking. There is
only one issue you concerned: cache miss due to sum the two queues'
length. I'll change softnet_data to:

struct softnet_data {
        struct Qdisc            *output_queue;
        struct list_head        poll_list;
        struct sk_buff          *completion_queue;
        struct sk_buff_head     process_queue;

#ifdef CONFIG_RPS
        struct softnet_data     *rps_ipi_list;

        /* Elements below can be accessed between CPUs for RPS */
        struct call_single_data csd ____cacheline_aligned_in_smp;
        struct softnet_data     *rps_ipi_next;
        unsigned int            cpu;
        unsigned int            input_queue_head;
#endif
        unsigned int            process_queue_len;
        struct sk_buff_head     input_pkt_queue;
        struct napi_struct      backlog;
};

For one packets, we have to update process_queue_len in any way. For
more packets, we only change process_queue_len just before
process_backlog() returns. It means that process_queue_len change is
batched.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22 12:27   ` Changli Gao
@ 2010-04-22 14:24     ` Eric Dumazet
  0 siblings, 0 replies; 9+ messages in thread
From: Eric Dumazet @ 2010-04-22 14:24 UTC (permalink / raw)
  To: Changli Gao; +Cc: David Miller, hadi, therbert, netdev

Le jeudi 22 avril 2010 à 20:27 +0800, Changli Gao a écrit :
> On Thu, Apr 22, 2010 at 5:43 PM, David Miller <davem@davemloft.net> wrote:
> > From: Changli Gao <xiaosuo@gmail.com>
> > Date: Thu, 22 Apr 2010 17:09:17 +0800
> >
> >> +     unsigned int            input_pkt_queue_len;
> >> +     struct sk_buff          *input_pkt_queue_head;
> >> +     struct sk_buff          **input_pkt_queue_tailp;
> >> +
> >
> > Please do not ignore Stephen Hemminger's feedback.
> >
> > We already have enough odd SKB queue implementations, we
> > do not need yet another one in a core location.  This makes
> > it harder and harder to eventually convert sk_buff to use
> > "struct list_head".
> >
> > Instead, use "struct sk_buff_head" and the lockless accessors
> > (__skb_insert, etc.) and initializer (__skb_queue_head_init).
> >
> 
> If I want to keep softnet_data small, I have to access the internal
> fields of sk_buff_head, and modify them in a hack way. It doesn't
> sound good. If not, softnet_data will become:
> 
> struct softnet_data {
>         struct Qdisc            *output_queue;
>         struct list_head        poll_list;
>         struct sk_buff          *completion_queue;
>         struct sk_buff_head     process_queue;
> 
> #ifdef CONFIG_RPS
>         struct softnet_data     *rps_ipi_list;
> 
>         /* Elements below can be accessed between CPUs for RPS */
>         struct call_single_data csd ____cacheline_aligned_in_smp;
>         struct softnet_data     *rps_ipi_next;
>         unsigned int            cpu;
>         unsigned int            input_queue_head;
> #endif
>         unsigned int            input_pkt_queue_len;
>         struct sk_buff_head     input_pkt_queue;
>         struct napi_struct      backlog;
> };
> 
> Eric, do you think it is too fat?

No it is not, as long as we are careful to separate cache lines.

By the way, input_pkt_queue_len is already in input_ptk_queue (.len)

Thanks



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22 13:06   ` Changli Gao
@ 2010-04-22 14:33     ` Eric Dumazet
  2010-04-22 14:54       ` Changli Gao
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2010-04-22 14:33 UTC (permalink / raw)
  To: Changli Gao; +Cc: David S. Miller, jamal, Tom Herbert, netdev

Le jeudi 22 avril 2010 à 21:06 +0800, Changli Gao a écrit :
> On Thu, Apr 22, 2010 at 7:37 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Please reorder things better.
> >
> > Most likely this function is called for one packet.
> >
> > In your version you take twice the rps_lock()/rps_unlock() path, so
> > it'll be slower.
> >
> > Once to 'transfert' one list to process list
> >
> > Once to be able to do the 'label out:' post processing.
> >
> 
> It doesn't make any difference. We have to hold rps_lock to update
> input_pkt_queue_len, if we don't use another variable to record the
> length of the process queue, or atomic variable.
> 

It does make a difference, Damn it.

I really really start to think you dont read what I wrote, or you dont
care.

Damn, cant you update all the things at once, taking this lock only
once ?

You focus having an ultra precise count of pkt_queue.len, but we dont
care at all ! We only want a _limit_, or else the box can be killed by
DOS.

If in practice this limit can be 2*limit, thats OK. 

Cant you understand this ?

> I think it is better that using another variable to record the length
> of the process queue, and updating it before process_backlog()
> returns. For one packet, there is only one locking/unlocking. There is
> only one issue you concerned: cache miss due to sum the two queues'
> length. I'll change softnet_data to:
> 
> struct softnet_data {
>         struct Qdisc            *output_queue;
>         struct list_head        poll_list;
>         struct sk_buff          *completion_queue;
>         struct sk_buff_head     process_queue;
> 
> #ifdef CONFIG_RPS
>         struct softnet_data     *rps_ipi_list;
> 
>         /* Elements below can be accessed between CPUs for RPS */
>         struct call_single_data csd ____cacheline_aligned_in_smp;
>         struct softnet_data     *rps_ipi_next;
>         unsigned int            cpu;
>         unsigned int            input_queue_head;
> #endif
>         unsigned int            process_queue_len;
>         struct sk_buff_head     input_pkt_queue;
>         struct napi_struct      backlog;
> };
> 
> For one packets, we have to update process_queue_len in any way. For
> more packets, we only change process_queue_len just before
> process_backlog() returns. It means that process_queue_len change is
> batched.
> 

We need one limit. Not two limits.

I already told you how to do it, but you ignored me and started yet
another convoluted thing.


process_backlog() transfert the queue to its own queue and reset pkt_len
to 0 (Only once)

End of story.

Maximum packet queued to this cpu softnet_data will be 2 * old_limit.

So what ?



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue
  2010-04-22 14:33     ` Eric Dumazet
@ 2010-04-22 14:54       ` Changli Gao
  0 siblings, 0 replies; 9+ messages in thread
From: Changli Gao @ 2010-04-22 14:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, jamal, Tom Herbert, netdev

On Thu, Apr 22, 2010 at 10:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> It does make a difference, Damn it.
>
> I really really start to think you dont read what I wrote, or you dont
> care.

I misunderstood it. Sorry.

>
> Damn, cant you update all the things at once, taking this lock only
> once ?
>
> You focus having an ultra precise count of pkt_queue.len, but we dont
> care at all ! We only want a _limit_, or else the box can be killed by
> DOS.
>
> If in practice this limit can be 2*limit, thats OK.
>
> Cant you understand this ?
>
>
> We need one limit. Not two limits.
>
> I already told you how to do it, but you ignored me and started yet
> another convoluted thing.
>
>
> process_backlog() transfert the queue to its own queue and reset pkt_len
> to 0 (Only once)
>
> End of story.
>
> Maximum packet queued to this cpu softnet_data will be 2 * old_limit.
>
> So what ?
>

Now, I think I really understand. We don't need a precious limit. So
only a additional queue is enough.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-04-22 14:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-22  9:09 [PATCH v5] net: batch skb dequeueing from softnet input_pkt_queue Changli Gao
2010-04-22  9:43 ` David Miller
2010-04-22 12:27   ` Changli Gao
2010-04-22 14:24     ` Eric Dumazet
2010-04-22 11:37 ` Eric Dumazet
2010-04-22 12:17   ` jamal
2010-04-22 13:06   ` Changli Gao
2010-04-22 14:33     ` Eric Dumazet
2010-04-22 14:54       ` Changli Gao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).