All of lore.kernel.org
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Wei Liu <wei.liu2@citrix.com>
Cc: annie.li@oracle.com, david.vrabel@citrix.com,
	ian.campbell@citrix.com, xen-devel@lists.xen.org
Subject: Re: [PATCH 2/3] xen-netback: switch to per-cpu scratch space
Date: Tue, 28 May 2013 09:18:41 -0400	[thread overview]
Message-ID: <20130528131841.GA724@phenom.dumpdata.com> (raw)
In-Reply-To: <1369654183-10536-3-git-send-email-wei.liu2@citrix.com>

On Mon, May 27, 2013 at 12:29:42PM +0100, Wei Liu wrote:
> There are maximum nr_onlie_cpus netback threads running. We can make use
> of per-cpu scratch space to reduce the size of buffer space when we move
> to 1:1 model.
> 
> In the unlikely event when per-cpu scratch space is not available,
> processing routines will refuse to run on that CPU.
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/netback.c |  247 ++++++++++++++++++++++++++++++-------
>  1 file changed, 204 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 54853be..0f69eda 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -37,6 +37,7 @@
>  #include <linux/kthread.h>
>  #include <linux/if_vlan.h>
>  #include <linux/udp.h>
> +#include <linux/cpu.h>
>  
>  #include <net/tcp.h>
>  
> @@ -95,6 +96,24 @@ struct netbk_rx_meta {
>  
>  #define MAX_BUFFER_OFFSET PAGE_SIZE
>  
> +/* Coalescing tx requests before copying makes number of grant
> + * copy ops greater or equal to number of slots required. In
> + * worst case a tx request consumes 2 gnttab_copy. So the size
> + * of tx_copy_ops array should be 2*MAX_PENDING_REQS.
> + */
> +#define TX_COPY_OPS_SIZE (2*MAX_PENDING_REQS)
> +DEFINE_PER_CPU(struct gnttab_copy *, tx_copy_ops);

static
> +
> +/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> + * head/fragment page uses 2 copy operations because it
> + * straddles two buffers in the frontend. So the size of following
> + * arrays should be 2*XEN_NETIF_RX_RING_SIZE.
> + */
> +#define GRANT_COPY_OP_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +#define META_SIZE (2*XEN_NETIF_RX_RING_SIZE)
> +DEFINE_PER_CPU(struct gnttab_copy *, grant_copy_op);
> +DEFINE_PER_CPU(struct netbk_rx_meta *, meta);

static for both of them.

> +
>  struct xen_netbk {
>  	wait_queue_head_t wq;
>  	struct task_struct *task;
> @@ -116,21 +135,7 @@ struct xen_netbk {
>  	atomic_t netfront_count;
>  
>  	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> -	/* Coalescing tx requests before copying makes number of grant
> -	 * copy ops greater or equal to number of slots required. In
> -	 * worst case a tx request consumes 2 gnttab_copy.
> -	 */
> -	struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> -
>  	u16 pending_ring[MAX_PENDING_REQS];
> -
> -	/*
> -	 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> -	 * head/fragment page uses 2 copy operations because it
> -	 * straddles two buffers in the frontend.
> -	 */
> -	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
> -	struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
>  };
>  
>  static struct xen_netbk *xen_netbk;
> @@ -608,12 +613,31 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  	int count;
>  	unsigned long offset;
>  	struct skb_cb_overlay *sco;
> +	struct gnttab_copy *gco = get_cpu_var(grant_copy_op);
> +	struct netbk_rx_meta *m = get_cpu_var(meta);
> +	static int unusable_count;
>  
>  	struct netrx_pending_operations npo = {
> -		.copy  = netbk->grant_copy_op,
> -		.meta  = netbk->meta,
> +		.copy = gco,
> +		.meta = m,
>  	};
>  
> +	if (gco == NULL || m == NULL) {
> +		put_cpu_var(grant_copy_op);
> +		put_cpu_var(meta);
> +		if (unusable_count == 1000) {

printk_ratelimited ?

> +			printk(KERN_ALERT
> +			       "xen-netback: "
> +			       "CPU %d scratch space is not available,"
> +			       " not doing any TX work for netback/%d\n",
> +			       smp_processor_id(),
> +			       (int)(netbk - xen_netbk));

So ... are you going to retry it? Drop it? Can you include in the message the
the mechanism by which you are going to recover?

> +			unusable_count = 0;
> +		} else
> +			unusable_count++;
> +		return;
> +	}
> +
>  	skb_queue_head_init(&rxq);
>  
>  	count = 0;
> @@ -635,27 +659,30 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  			break;
>  	}
>  
> -	BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
> +	BUG_ON(npo.meta_prod > META_SIZE);
>  
> -	if (!npo.copy_prod)
> +	if (!npo.copy_prod) {
> +		put_cpu_var(grant_copy_op);
> +		put_cpu_var(meta);
>  		return;
> +	}
>  
> -	BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
> -	gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
> +	BUG_ON(npo.copy_prod > GRANT_COPY_OP_SIZE);
> +	gnttab_batch_copy(gco, npo.copy_prod);
>  
>  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
>  		sco = (struct skb_cb_overlay *)skb->cb;
>  
>  		vif = netdev_priv(skb->dev);
>  
> -		if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
> +		if (m[npo.meta_cons].gso_size && vif->gso_prefix) {
>  			resp = RING_GET_RESPONSE(&vif->rx,
>  						vif->rx.rsp_prod_pvt++);
>  
>  			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
>  
> -			resp->offset = netbk->meta[npo.meta_cons].gso_size;
> -			resp->id = netbk->meta[npo.meta_cons].id;
> +			resp->offset = m[npo.meta_cons].gso_size;
> +			resp->id = m[npo.meta_cons].id;
>  			resp->status = sco->meta_slots_used;
>  
>  			npo.meta_cons++;
> @@ -680,12 +707,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  			flags |= XEN_NETRXF_data_validated;
>  
>  		offset = 0;
> -		resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
> +		resp = make_rx_response(vif, m[npo.meta_cons].id,
>  					status, offset,
> -					netbk->meta[npo.meta_cons].size,
> +					m[npo.meta_cons].size,
>  					flags);
>  
> -		if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
> +		if (m[npo.meta_cons].gso_size && !vif->gso_prefix) {
>  			struct xen_netif_extra_info *gso =
>  				(struct xen_netif_extra_info *)
>  				RING_GET_RESPONSE(&vif->rx,
> @@ -693,7 +720,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  
>  			resp->flags |= XEN_NETRXF_extra_info;
>  
> -			gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
> +			gso->u.gso.size = m[npo.meta_cons].gso_size;
>  			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
>  			gso->u.gso.pad = 0;
>  			gso->u.gso.features = 0;
> @@ -703,7 +730,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  		}
>  
>  		netbk_add_frag_responses(vif, status,
> -					 netbk->meta + npo.meta_cons + 1,
> +					 m + npo.meta_cons + 1,
>  					 sco->meta_slots_used);
>  
>  		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
> @@ -726,6 +753,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
>  	if (!skb_queue_empty(&netbk->rx_queue) &&
>  			!timer_pending(&netbk->net_timer))
>  		xen_netbk_kick_thread(netbk);
> +
> +	put_cpu_var(grant_copy_op);
> +	put_cpu_var(meta);
>  }
>  
>  void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
> @@ -1351,9 +1381,10 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
>  	return false;
>  }
>  
> -static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> +static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk,
> +					struct gnttab_copy *tco)
>  {
> -	struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
> +	struct gnttab_copy *gop = tco, *request_gop;
>  	struct sk_buff *skb;
>  	int ret;
>  
> @@ -1531,16 +1562,17 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
>  		vif->tx.req_cons = idx;
>  		xen_netbk_check_rx_xenvif(vif);
>  
> -		if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
> +		if ((gop-tco) >= TX_COPY_OPS_SIZE)
>  			break;
>  	}
>  
> -	return gop - netbk->tx_copy_ops;
> +	return gop - tco;
>  }
>  
> -static void xen_netbk_tx_submit(struct xen_netbk *netbk)
> +static void xen_netbk_tx_submit(struct xen_netbk *netbk,
> +				struct gnttab_copy *tco)
>  {
> -	struct gnttab_copy *gop = netbk->tx_copy_ops;
> +	struct gnttab_copy *gop = tco;
>  	struct sk_buff *skb;
>  
>  	while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
> @@ -1615,15 +1647,37 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
>  static void xen_netbk_tx_action(struct xen_netbk *netbk)
>  {
>  	unsigned nr_gops;
> +	struct gnttab_copy *tco;
> +	static int unusable_count;
> +
> +	tco = get_cpu_var(tx_copy_ops);
> +
> +	if (tco == NULL) {
> +		put_cpu_var(tx_copy_ops);
> +		if (unusable_count == 1000) {
> +			printk(KERN_ALERT

Ditto. printk_ratelimited.

> +			       "xen-netback: "
> +			       "CPU %d scratch space is not available,"
> +			       " not doing any RX work for netback/%d\n",
> +			       smp_processor_id(),
> +			       (int)(netbk - xen_netbk));

And can you explain what the recovery mechanism is?

> +		} else
> +			unusable_count++;
> +		return;
> +	}
>  
> -	nr_gops = xen_netbk_tx_build_gops(netbk);
> +	nr_gops = xen_netbk_tx_build_gops(netbk, tco);
>  
> -	if (nr_gops == 0)
> +	if (nr_gops == 0) {
> +		put_cpu_var(tx_copy_ops);
>  		return;
> +	}
> +
> +	gnttab_batch_copy(tco, nr_gops);
>  
> -	gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
> +	xen_netbk_tx_submit(netbk, tco);
>  
> -	xen_netbk_tx_submit(netbk);
> +	put_cpu_var(tx_copy_ops);
>  }
>  
>  static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
> @@ -1760,6 +1814,93 @@ static int xen_netbk_kthread(void *data)
>  	return 0;
>  }
>  
> +static int __create_percpu_scratch_space(unsigned int cpu)
> +{
> +	if (per_cpu(tx_copy_ops, cpu) ||
> +	    per_cpu(grant_copy_op, cpu) ||
> +	    per_cpu(meta, cpu))
> +		return 0;
> +
> +	per_cpu(tx_copy_ops, cpu) =
> +		vzalloc_node(sizeof(struct gnttab_copy) * TX_COPY_OPS_SIZE,
> +			     cpu_to_node(cpu));
> +
> +	per_cpu(grant_copy_op, cpu) =
> +		vzalloc_node(sizeof(struct gnttab_copy) * GRANT_COPY_OP_SIZE,
> +			     cpu_to_node(cpu));
> +
> +	per_cpu(meta, cpu) =
> +		vzalloc_node(sizeof(struct netbk_rx_meta) * META_SIZE,
> +			     cpu_to_node(cpu));
> +
> +	if (!per_cpu(tx_copy_ops, cpu) ||
> +	    !per_cpu(grant_copy_op, cpu) ||
> +	    !per_cpu(meta, cpu))
> +		return -ENOMEM;

And no freeing? Ah you require the __free_percpu_scratch_space to
do the job for you. Um, why not do it here instead of depending
on the calleer to clean up the mess?

Say:
		{
			__free_percpu_scratch_space(cpu);
			return -ENOMEM;
		}
?


> +
> +	return 0;
> +}
> +
> +static void __free_percpu_scratch_space(unsigned( int cpu)
> +{
> +	void *tmp;
> +
> +	tmp = per_cpu(tx_copy_ops, cpu);
> +	per_cpu(tx_copy_ops, cpu) = NULL;
> +	vfree(tmp);
> +
> +	tmp = per_cpu(grant_copy_op, cpu);
> +	per_cpu(grant_copy_op, cpu) = NULL;
> +	vfree(tmp);
> +
> +	tmp = per_cpu(meta, cpu);
> +	per_cpu(meta, cpu) = NULL;
> +	vfree(tmp);
> +}
> +
> +static int __netback_percpu_callback(struct notifier_block *nfb,
> +				     unsigned long action, void *hcpu)
> +{
> +	unsigned int cpu = (unsigned long)hcpu;
> +	int rc = NOTIFY_DONE;
> +
> +	switch (action) {
> +	case CPU_ONLINE:
> +	case CPU_ONLINE_FROZEN:
> +		printk(KERN_INFO "xen-netback: CPU %d online, creating scratch space\n",
> +		       cpu);

I think this is more of pr_debug type.

> +		rc = __create_percpu_scratch_space(cpu);
> +		if (rc) {
> +			printk(KERN_ALERT "xen-netback: failed to create scratch space for CPU %d\n",
> +			       cpu);
> +			/* There is really nothing more we can do. Free any
> +			 * partially allocated scratch space. When processing
> +			 * routines get to run they will just print warning
> +			 * message and stop processing.
> +			 */
> +			__free_percpu_scratch_space(cpu);

Ugh. Could the code skip creating a kthread on a CPU for which
the per_cpu(meta, cpu) == NULL?

> +			rc = NOTIFY_BAD;
> +		} else
> +			rc = NOTIFY_OK;
> +		break;
> +	case CPU_DEAD:
> +	case CPU_DEAD_FROZEN:
> +		printk(KERN_INFO "xen-netback: CPU %d offline, destroying scratch space\n",
> +		       cpu);
> +		__free_percpu_scratch_space(cpu);
> +		rc = NOTIFY_OK;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	return rc;
> +}
> +
> +static struct notifier_block netback_notifier_block = {
> +	.notifier_call = __netback_percpu_callback,
> +};
> +
>  void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
>  {
>  	if (vif->tx.sring)
> @@ -1810,6 +1951,7 @@ static int __init netback_init(void)
>  	int i;
>  	int rc = 0;
>  	int group;
> +	int cpu;
>  
>  	if (!xen_domain())
>  		return -ENODEV;
> @@ -1821,10 +1963,21 @@ static int __init netback_init(void)
>  		fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
>  	}
>  
> +	for_each_online_cpu(cpu) {
> +		rc = __create_percpu_scratch_space(cpu);
> +		if (rc) {
> +			rc = -ENOMEM;
> +			goto failed_init;
> +		}
> +	}
> +	register_hotcpu_notifier(&netback_notifier_block);
> +
>  	xen_netbk_group_nr = num_online_cpus();
>  	xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
> -	if (!xen_netbk)
> -		return -ENOMEM;
> +	if (!xen_netbk) {
> +		goto failed_init;
> +		rc = -ENOMEM;
> +	}
>  
>  	for (group = 0; group < xen_netbk_group_nr; group++) {
>  		struct xen_netbk *netbk = &xen_netbk[group];
> @@ -1849,7 +2002,7 @@ static int __init netback_init(void)
>  			printk(KERN_ALERT "kthread_create() fails at netback\n");
>  			del_timer(&netbk->net_timer);
>  			rc = PTR_ERR(netbk->task);
> -			goto failed_init;
> +			goto failed_init_destroy_kthreads;
>  		}
>  
>  		kthread_bind(netbk->task, group);
> @@ -1865,17 +2018,20 @@ static int __init netback_init(void)
>  
>  	rc = xenvif_xenbus_init();
>  	if (rc)
> -		goto failed_init;
> +		goto failed_init_destroy_kthreads;
>  
>  	return 0;
>  
> -failed_init:
> +failed_init_destroy_kthreads:
>  	while (--group >= 0) {
>  		struct xen_netbk *netbk = &xen_netbk[group];
>  		del_timer(&netbk->net_timer);
>  		kthread_stop(netbk->task);
>  	}
>  	vfree(xen_netbk);
> +failed_init:
> +	for_each_online_cpu(cpu)
> +		__free_percpu_scratch_space(cpu);
>  	return rc;
>  
>  }
> @@ -1899,6 +2055,11 @@ static void __exit netback_fini(void)
>  	}
>  
>  	vfree(xen_netbk);
> +
> +	unregister_hotcpu_notifier(&netback_notifier_block);
> +
> +	for_each_online_cpu(i)
> +		__free_percpu_scratch_space(i);
>  }
>  module_exit(netback_fini);
>  
> -- 
> 1.7.10.4
> 

  parent reply	other threads:[~2013-05-28 13:18 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-27 11:29 [PATCH 0/3 V2] xen-netback: switch to NAPI + kthread 1:1 model Wei Liu
2013-05-27 11:29 ` [PATCH 1/3] xen-netback: remove page tracking facility Wei Liu
2013-05-28  9:21   ` David Vrabel
2013-05-29  1:43     ` Matt Wilson
2013-05-29  8:12       ` Wei Liu
2013-05-27 11:29 ` [PATCH 2/3] xen-netback: switch to per-cpu scratch space Wei Liu
2013-05-28  9:47   ` annie li
2013-05-28 10:17     ` Wei Liu
2013-05-28 13:18   ` Konrad Rzeszutek Wilk [this message]
2013-05-28 13:36     ` David Vrabel
2013-05-28 13:54       ` Wei Liu
2013-05-27 11:29 ` [PATCH 3/3] xen-netback: switch to NAPI + kthread 1:1 model Wei Liu
2013-05-28 13:37   ` David Vrabel
2013-05-28 13:40     ` Wei Liu
2013-05-28 14:35 ` [PATCH 0/3 V2] " annie li
2013-05-28 15:09   ` Wei Liu
2013-06-11 10:06 ` David Vrabel
2013-06-11 10:15   ` Wei Liu
2013-06-12 13:44     ` Andrew Bennieston
2013-06-13  9:01       ` Wei Liu
2013-06-13 11:18         ` Andrew Bennieston
2013-06-13 13:06           ` Wei Liu
2013-07-03 12:45     ` Andrew Bennieston
2013-07-03 16:07       ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130528131841.GA724@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=annie.li@oracle.com \
    --cc=david.vrabel@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.