All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Boaz Harrosh <bharrosh@panasas.com>
Cc: Wang Sen <senwang@linux.vnet.ibm.com>,
	linux-scsi@vger.kernel.org, JBottomley@parallels.com,
	stefanha@linux.vnet.ibm.com, mc@linux.vnet.ibm.com,
	linux-kernel@vger.kernel.org
Subject: Re: performance improvements for the sglist API (Re: [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list)
Date: Wed, 25 Jul 2012 17:16:29 +0200	[thread overview]
Message-ID: <50100DCD.7030102@redhat.com> (raw)
In-Reply-To: <50100C37.1060408@redhat.com>

Il 25/07/2012 17:09, Paolo Bonzini ha scritto:
> Il 25/07/2012 16:36, Boaz Harrosh ha scritto:
>>>>
>>>> I did test the patch with value-assignment.
>>>>
>>
>> Still you should use the sg_set_page()!!
>> 1. It is not allowed to directly manipulate sg entries. One should always
>>    use the proper accessor. Even if open coding does work and is not a bug
>>    it should not be used anyway!
>> 2. Future code that will support chaining will need to do as I say so why
>>    change it then, again?
> 
> Future code that will support chaining will not copy anything at all.
> 
> Also, and more important, note that I am _not_ calling sg_init_table
> before the loop, only once in the driver initialization.  That's because
> memset in sg_init_table is an absolute performance killer, especially if
> you have to do it in a critical section; and I'm not making this up, see
> blk_rq_map_sg:
> 
>                           * If the driver previously mapped a shorter
>                           * list, we could see a termination bit
>                           * prematurely unless it fully inits the sg
>                           * table on each mapping. We KNOW that there
>                           * must be more entries here or the driver
>                           * would be buggy, so force clear the
>                           * termination bit to avoid doing a full
>                           * sg_init_table() in drivers for each command.
>                           */
>                           sg->page_link &= ~0x02;
>                           sg = sg_next(sg);
> 
> So let's instead fix the API so that I (and blk-merge.c) can touch
> memory just once.  For example you could add __sg_set_page and
> __sg_set_buf, basically the equivalent of
> 
>     memset(sg, 0, sizeof(*sg));
>     sg_set_{page,buf}(sg, page, len, offset);
> 
> Calling these functions would be fine if you later add a manual call to
> sg_mark_end, again the same as blk-merge.c does.  See the attached
> untested/uncompiled patch.
> 
> And value assignment would be the same as a
> 
>     __sg_set_page(sg, sg_page(page), sg->length, sg->offset);

ENOPATCH...

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 160035f..00ba3d4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -146,7 +146,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 new_segment:
 			if (!sg)
 				sg = sglist;
-			else {
+			else
+				sg = sg_next(sg);
+
 				/*
 				 * If the driver previously mapped a shorter
 				 * list, we could see a termination bit
@@ -158,11 +160,7 @@ new_segment:
 				 * termination bit to avoid doing a full
 				 * sg_init_table() in drivers for each command.
 				 */
-				sg->page_link &= ~0x02;
-				sg = sg_next(sg);
-			}
-
-			sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
+			__sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
 			nsegs++;
 		}
 		bvprv = bvec;
@@ -182,12 +180,11 @@ new_segment:
 		if (rq->cmd_flags & REQ_WRITE)
 			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
-		sg->page_link &= ~0x02;
 		sg = sg_next(sg);
-		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
-			    q->dma_drain_size,
-			    ((unsigned long)q->dma_drain_buffer) &
-			    (PAGE_SIZE - 1));
+		__sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+			      q->dma_drain_size,
+			      ((unsigned long)q->dma_drain_buffer) &
+			      (PAGE_SIZE - 1));
 		nsegs++;
 		rq->extra_len += q->dma_drain_size;
 	}
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ac9586d..d6a937e 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -44,32 +44,23 @@ struct sg_table {
 #define sg_chain_ptr(sg)	\
 	((struct scatterlist *) ((sg)->page_link & ~0x03))
 
-/**
- * sg_assign_page - Assign a given page to an SG entry
- * @sg:		    SG entry
- * @page:	    The page
- *
- * Description:
- *   Assign page to sg entry. Also see sg_set_page(), the most commonly used
- *   variant.
- *
- **/
-static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+static inline void __sg_set_page(struct scatterlist *sg, struct page *page,
+				 unsigned int len, unsigned int offset)
 {
-	unsigned long page_link = sg->page_link & 0x3;
-
 	/*
 	 * In order for the low bit stealing approach to work, pages
 	 * must be aligned at a 32-bit boundary as a minimum.
 	 */
 	BUG_ON((unsigned long) page & 0x03);
 #ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-	BUG_ON(sg_is_chain(sg));
+	sg->sg_magic = SG_MAGIC;
 #endif
-	sg->page_link = page_link | (unsigned long) page;
+	sg->page_link = page;
+	sg->offset = offset;
+	sg->length = len;
 }
 
+
 /**
  * sg_set_page - Set sg entry to point at given page
  * @sg:		 SG entry
@@ -87,9 +78,13 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 			       unsigned int len, unsigned int offset)
 {
-	sg_assign_page(sg, page);
-	sg->offset = offset;
-	sg->length = len;
+	unsigned long flags = sg->page_link & 0x3;
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+	BUG_ON(sg_is_chain(sg));
+#endif
+	__sg_set_page(sg, page, len, offset);
+	sg->page_link |= flags;
 }
 
 static inline struct page *sg_page(struct scatterlist *sg)
@@ -101,6 +96,12 @@ static inline struct page *sg_page(struct scatterlist *sg)
 	return (struct page *)((sg)->page_link & ~0x3);
 }
 
+static inline void __sg_set_buf(struct scatterlist *sg, const void *buf,
+				unsigned int buflen)
+{
+	__sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
 /**
  * sg_set_buf - Set sg entry to point at given data
  * @sg:		 SG entry

  reply	other threads:[~2012-07-25 15:16 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-25  8:29 [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list Wang Sen
2012-07-25  8:44 ` Paolo Bonzini
2012-07-25  9:22   ` Boaz Harrosh
2012-07-25  9:22     ` Boaz Harrosh
2012-07-25  9:41     ` Paolo Bonzini
2012-07-25 12:34       ` Boaz Harrosh
2012-07-25 12:34         ` Boaz Harrosh
2012-07-25 12:49         ` Paolo Bonzini
2012-07-25 13:26           ` Boaz Harrosh
2012-07-25 13:26             ` Boaz Harrosh
2012-07-25 13:36             ` Paolo Bonzini
2012-07-25 14:36               ` Boaz Harrosh
2012-07-25 14:36                 ` Boaz Harrosh
2012-07-25 15:09                 ` performance improvements for the sglist API (Re: [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list) Paolo Bonzini
2012-07-25 15:16                   ` Paolo Bonzini [this message]
2012-07-25 14:17             ` virtio(-scsi) vs. chained sg_lists (was " Paolo Bonzini
2012-07-25 15:28               ` Boaz Harrosh
2012-07-25 15:28                 ` Boaz Harrosh
2012-07-25 17:43                 ` Paolo Bonzini
2012-07-25 19:16                   ` Boaz Harrosh
2012-07-25 19:16                     ` Boaz Harrosh
2012-07-25 20:06                     ` Paolo Bonzini
2012-07-25 21:04                       ` Boaz Harrosh
2012-07-25 21:04                         ` Boaz Harrosh
2012-07-26  7:23                         ` Paolo Bonzini
2012-07-26  7:56                           ` Boaz Harrosh
2012-07-26  7:56                             ` Boaz Harrosh
2012-07-26  7:58                             ` Paolo Bonzini
2012-07-26 13:05                               ` Paolo Bonzini
2012-07-27  6:27                                 ` Rusty Russell
2012-07-27  6:27                                   ` Rusty Russell
2012-07-27  8:11                                   ` Paolo Bonzini
2012-07-29 23:50                                     ` Rusty Russell
2012-07-29 23:50                                       ` Rusty Russell
2012-07-30  7:12                                       ` Paolo Bonzini
2012-07-30  8:56                                         ` Boaz Harrosh
2012-07-30  8:56                                           ` Boaz Harrosh
2012-07-25 10:41   ` [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list Stefan Hajnoczi
2012-07-25 11:48     ` Sen Wang
2012-07-25 11:44   ` Sen Wang
2012-07-25 12:40     ` Boaz Harrosh
2012-07-25 12:40       ` Boaz Harrosh
2012-07-27  3:12       ` Wang Sen
2012-07-27  6:50         ` Paolo Bonzini
2012-07-25 10:04 ` Rolf Eike Beer
2012-07-25 10:04   ` Rolf Eike Beer
2012-07-25 11:46   ` Sen Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50100DCD.7030102@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=JBottomley@parallels.com \
    --cc=bharrosh@panasas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mc@linux.vnet.ibm.com \
    --cc=senwang@linux.vnet.ibm.com \
    --cc=stefanha@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.