All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Dan Aloni <alonid@stratoscale.com>
Cc: kvm@vger.kernel.org
Subject: Re: [PATCH] drivers/vhost/scsi.c: avoid a 10-order allocation
Date: Sun, 18 Aug 2013 12:18:39 +0300	[thread overview]
Message-ID: <20130818091838.GB17111@redhat.com> (raw)
In-Reply-To: <1376815736-21003-1-git-send-email-alonid@stratoscale.com>

On Sun, Aug 18, 2013 at 11:48:56AM +0300, Dan Aloni wrote:
> On 3.10.7 and x86_64, as a result of sizeof(struct vhost_scsi) being
> 2152960 bytes the allocation failed once on my development machine.
> 
> Saw it would be prudent to split the bulk of it, which is the vqs array
> into separately allocated parts. sizeof(struct vhost_virtqueue) is
> currently 16816 bytes.
> 
> Signed-off-by: Dan Aloni <alonid@stratoscale.com>

This extra indirection is likely to have measureable cost though.

net core saw a similar problem, it was fixed in patch
    net: allow large number of tx queues

So let's do it in a similar way: try to allocate with
GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT
and if that fails, do vmalloc.

To free, we can do
       if (is_vmalloc_addr())
               vfree();
       else
               kfree();



> ---
>  drivers/vhost/scsi.c | 68 +++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index 7014202..7f605b6 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -171,6 +171,7 @@ enum {
>  #define VHOST_SCSI_MAX_EVENT	128
>  
>  struct vhost_scsi_virtqueue {
> +	int index;
>  	struct vhost_virtqueue vq;
>  	/*
>  	 * Reference counting for inflight reqs, used for flush operation. At
> @@ -191,7 +192,7 @@ struct vhost_scsi {
>  	char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
>  
>  	struct vhost_dev dev;
> -	struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
> +	struct vhost_scsi_virtqueue *vqs[VHOST_SCSI_MAX_VQ];
>  
>  	struct vhost_work vs_completion_work; /* cmd completion work item */
>  	struct llist_head vs_completion_list; /* cmd completion queue */
> @@ -234,18 +235,18 @@ static void tcm_vhost_init_inflight(struct vhost_scsi *vs,
>  	int idx, i;
>  
>  	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> -		vq = &vs->vqs[i].vq;
> +		vq = &vs->vqs[i]->vq;
>  
>  		mutex_lock(&vq->mutex);
>  
>  		/* store old infight */
> -		idx = vs->vqs[i].inflight_idx;
> +		idx = vs->vqs[i]->inflight_idx;
>  		if (old_inflight)
> -			old_inflight[i] = &vs->vqs[i].inflights[idx];
> +			old_inflight[i] = &vs->vqs[i]->inflights[idx];
>  
>  		/* setup new infight */
> -		vs->vqs[i].inflight_idx = idx ^ 1;
> -		new_inflight = &vs->vqs[i].inflights[idx ^ 1];
> +		vs->vqs[i]->inflight_idx = idx ^ 1;
> +		new_inflight = &vs->vqs[i]->inflights[idx ^ 1];
>  		kref_init(&new_inflight->kref);
>  		init_completion(&new_inflight->comp);
>  
> @@ -530,7 +531,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
>  static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
>  	u32 event, u32 reason)
>  {
> -	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
> +	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]->vq;
>  	struct tcm_vhost_evt *evt;
>  
>  	if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
> @@ -575,7 +576,7 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
>  static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
>  	struct tcm_vhost_evt *evt)
>  {
> -	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
> +	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]->vq;
>  	struct virtio_scsi_event *event = &evt->event;
>  	struct virtio_scsi_event __user *eventp;
>  	unsigned out, in;
> @@ -626,7 +627,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work)
>  {
>  	struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
>  					vs_event_work);
> -	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
> +	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]->vq;
>  	struct tcm_vhost_evt *evt;
>  	struct llist_node *llnode;
>  
> @@ -680,7 +681,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
>  			struct vhost_scsi_virtqueue *q;
>  			vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0);
>  			q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
> -			vq = q - vs->vqs;
> +			vq = q->index;
>  			__set_bit(vq, signal);
>  		} else
>  			pr_err("Faulted on virtio_scsi_cmd_resp\n");
> @@ -691,7 +692,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
>  	vq = -1;
>  	while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
>  		< VHOST_SCSI_MAX_VQ)
> -		vhost_signal(&vs->dev, &vs->vqs[vq].vq);
> +		vhost_signal(&vs->dev, &vs->vqs[vq]->vq);
>  }
>  
>  static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
> @@ -1108,7 +1109,7 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
>  
>  static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
>  {
> -	vhost_poll_flush(&vs->vqs[index].vq.poll);
> +	vhost_poll_flush(&vs->vqs[index]->vq.poll);
>  }
>  
>  /* Callers must hold dev mutex */
> @@ -1163,7 +1164,7 @@ static int vhost_scsi_set_endpoint(
>  	/* Verify that ring has been setup correctly. */
>  	for (index = 0; index < vs->dev.nvqs; ++index) {
>  		/* Verify that ring has been setup correctly. */
> -		if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
> +		if (!vhost_vq_access_ok(&vs->vqs[index]->vq)) {
>  			ret = -EFAULT;
>  			goto out;
>  		}
> @@ -1210,7 +1211,7 @@ static int vhost_scsi_set_endpoint(
>  		memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
>  		       sizeof(vs->vs_vhost_wwpn));
>  		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> -			vq = &vs->vqs[i].vq;
> +			vq = &vs->vqs[i]->vq;
>  			/* Flushing the vhost_work acts as synchronize_rcu */
>  			mutex_lock(&vq->mutex);
>  			rcu_assign_pointer(vq->private_data, vs_tpg);
> @@ -1251,7 +1252,7 @@ static int vhost_scsi_clear_endpoint(
>  	mutex_lock(&vs->dev.mutex);
>  	/* Verify that ring has been setup correctly. */
>  	for (index = 0; index < vs->dev.nvqs; ++index) {
> -		if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
> +		if (!vhost_vq_access_ok(&vs->vqs[index]->vq)) {
>  			ret = -EFAULT;
>  			goto err_dev;
>  		}
> @@ -1291,7 +1292,7 @@ static int vhost_scsi_clear_endpoint(
>  	}
>  	if (match) {
>  		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> -			vq = &vs->vqs[i].vq;
> +			vq = &vs->vqs[i]->vq;
>  			/* Flushing the vhost_work acts as synchronize_rcu */
>  			mutex_lock(&vq->mutex);
>  			rcu_assign_pointer(vq->private_data, NULL);
> @@ -1346,6 +1347,18 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
>  	if (!s)
>  		return -ENOMEM;
>  
> +	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> +		s->vqs[i] = kzalloc(sizeof(*s->vqs[0]), GFP_KERNEL);;
> +		if (!s->vqs[i]) {
> +			while (i >= 1)
> +				kfree(s->vqs[--i]);
> +			kfree(s);
> +			return -ENOMEM;
> +		}
> +
> +		s->vqs[i]->index = i;
> +	}
> +
>  	vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
>  	if (!vqs) {
>  		kfree(s);
> @@ -1358,19 +1371,22 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
>  	s->vs_events_nr = 0;
>  	s->vs_events_missed = false;
>  
> -	vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq;
> -	vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq;
> -	s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
> -	s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
> +	vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL]->vq;
> +	vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT]->vq;
> +	s->vqs[VHOST_SCSI_VQ_CTL]->vq.handle_kick = vhost_scsi_ctl_handle_kick;
> +	s->vqs[VHOST_SCSI_VQ_EVT]->vq.handle_kick = vhost_scsi_evt_handle_kick;
>  	for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
> -		vqs[i] = &s->vqs[i].vq;
> -		s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
> +		vqs[i] = &s->vqs[i]->vq;
> +		s->vqs[i]->vq.handle_kick = vhost_scsi_handle_kick;
>  	}
>  	r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ);
>  
>  	tcm_vhost_init_inflight(s, NULL);
>  
>  	if (r < 0) {
> +		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> +			kfree(s->vqs[i]);
> +		}
>  		kfree(vqs);
>  		kfree(s);
>  		return r;
> @@ -1384,6 +1400,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
>  {
>  	struct vhost_scsi *s = f->private_data;
>  	struct vhost_scsi_target t;
> +	int i;
>  
>  	mutex_lock(&s->dev.mutex);
>  	memcpy(t.vhost_wwpn, s->vs_vhost_wwpn, sizeof(t.vhost_wwpn));
> @@ -1393,6 +1410,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
>  	vhost_dev_cleanup(&s->dev, false);
>  	/* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
>  	vhost_scsi_flush(s);
> +	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
> +		kfree(s->vqs[i]);
> +	}
>  	kfree(s->dev.vqs);
>  	kfree(s);
>  	return 0;
> @@ -1409,7 +1429,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
>  	u32 events_missed;
>  	u64 features;
>  	int r, abi_version = VHOST_SCSI_ABI_VERSION;
> -	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
> +	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]->vq;
>  
>  	switch (ioctl) {
>  	case VHOST_SCSI_SET_ENDPOINT:
> @@ -1537,7 +1557,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
>  	else
>  		reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
>  
> -	vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
> +	vq = &vs->vqs[VHOST_SCSI_VQ_EVT]->vq;
>  	mutex_lock(&vq->mutex);
>  	tcm_vhost_send_evt(vs, tpg, lun,
>  			VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
> -- 
> 1.8.1.4

  reply	other threads:[~2013-08-18  9:16 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-18  8:48 [PATCH] drivers/vhost/scsi.c: avoid a 10-order allocation Dan Aloni
2013-08-18  9:18 ` Michael S. Tsirkin [this message]
2013-09-04  9:02   ` Michael S. Tsirkin
2013-09-04  9:27     ` Dan Aloni
2013-09-04  9:27     ` Dan Aloni

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130818091838.GB17111@redhat.com \
    --to=mst@redhat.com \
    --cc=alonid@stratoscale.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.