linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Boaz Harrosh <bharrosh@panasas.com>
Cc: Wang Sen <senwang@linux.vnet.ibm.com>,
	linux-scsi@vger.kernel.org, JBottomley@parallels.com,
	stefanha@linux.vnet.ibm.com, mc@linux.vnet.ibm.com,
	linux-kernel@vger.kernel.org
Subject: Re: performance improvements for the sglist API (Re: [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list)
Date: Wed, 25 Jul 2012 17:16:29 +0200	[thread overview]
Message-ID: <50100DCD.7030102@redhat.com> (raw)
In-Reply-To: <50100C37.1060408@redhat.com>

Il 25/07/2012 17:09, Paolo Bonzini ha scritto:
> Il 25/07/2012 16:36, Boaz Harrosh ha scritto:
>>>>
>>>> I did test the patch with value-assignment.
>>>>
>>
>> Still you should use the sg_set_page()!!
>> 1. It is not allowed to directly manipulate sg entries. One should always
>>    use the proper accessor. Even if open coding does work and is not a bug
>>    it should not be used anyway!
>> 2. Future code that will support chaining will need to do as I say so why
>>    change it then, again?
> 
> Future code that will support chaining will not copy anything at all.
> 
> Also, and more important, note that I am _not_ calling sg_init_table
> before the loop, only once in the driver initialization.  That's because
> memset in sg_init_table is an absolute performance killer, especially if
> you have to do it in a critical section; and I'm not making this up, see
> blk_rq_map_sg:
> 
>                           * If the driver previously mapped a shorter
>                           * list, we could see a termination bit
>                           * prematurely unless it fully inits the sg
>                           * table on each mapping. We KNOW that there
>                           * must be more entries here or the driver
>                           * would be buggy, so force clear the
>                           * termination bit to avoid doing a full
>                           * sg_init_table() in drivers for each command.
>                           */
>                           sg->page_link &= ~0x02;
>                           sg = sg_next(sg);
> 
> So let's instead fix the API so that I (and blk-merge.c) can touch
> memory just once.  For example you could add __sg_set_page and
> __sg_set_buf, basically the equivalent of
> 
>     memset(sg, 0, sizeof(*sg));
>     sg_set_{page,buf}(sg, page, len, offset);
> 
> Calling these functions would be fine if you later add a manual call to
> sg_mark_end, again the same as blk-merge.c does.  See the attached
> untested/uncompiled patch.
> 
> And value assignment would be the same as a
> 
>     __sg_set_page(sg, sg_page(page), sg->length, sg->offset);

ENOPATCH...

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 160035f..00ba3d4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -146,7 +146,9 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 new_segment:
 			if (!sg)
 				sg = sglist;
-			else {
+			else
+				sg = sg_next(sg);
+
 				/*
 				 * If the driver previously mapped a shorter
 				 * list, we could see a termination bit
@@ -158,11 +160,7 @@ new_segment:
 				 * termination bit to avoid doing a full
 				 * sg_init_table() in drivers for each command.
 				 */
-				sg->page_link &= ~0x02;
-				sg = sg_next(sg);
-			}
-
-			sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
+			__sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
 			nsegs++;
 		}
 		bvprv = bvec;
@@ -182,12 +180,11 @@ new_segment:
 		if (rq->cmd_flags & REQ_WRITE)
 			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
-		sg->page_link &= ~0x02;
 		sg = sg_next(sg);
-		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
-			    q->dma_drain_size,
-			    ((unsigned long)q->dma_drain_buffer) &
-			    (PAGE_SIZE - 1));
+		__sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
+			      q->dma_drain_size,
+			      ((unsigned long)q->dma_drain_buffer) &
+			      (PAGE_SIZE - 1));
 		nsegs++;
 		rq->extra_len += q->dma_drain_size;
 	}
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ac9586d..d6a937e 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -44,32 +44,23 @@ struct sg_table {
 #define sg_chain_ptr(sg)	\
 	((struct scatterlist *) ((sg)->page_link & ~0x03))
 
-/**
- * sg_assign_page - Assign a given page to an SG entry
- * @sg:		    SG entry
- * @page:	    The page
- *
- * Description:
- *   Assign page to sg entry. Also see sg_set_page(), the most commonly used
- *   variant.
- *
- **/
-static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+static inline void __sg_set_page(struct scatterlist *sg, struct page *page,
+				 unsigned int len, unsigned int offset)
 {
-	unsigned long page_link = sg->page_link & 0x3;
-
 	/*
 	 * In order for the low bit stealing approach to work, pages
 	 * must be aligned at a 32-bit boundary as a minimum.
 	 */
 	BUG_ON((unsigned long) page & 0x03);
 #ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-	BUG_ON(sg_is_chain(sg));
+	sg->sg_magic = SG_MAGIC;
 #endif
-	sg->page_link = page_link | (unsigned long) page;
+	sg->page_link = page;
+	sg->offset = offset;
+	sg->length = len;
 }
 
+
 /**
  * sg_set_page - Set sg entry to point at given page
  * @sg:		 SG entry
@@ -87,9 +78,13 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
 static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 			       unsigned int len, unsigned int offset)
 {
-	sg_assign_page(sg, page);
-	sg->offset = offset;
-	sg->length = len;
+	unsigned long flags = sg->page_link & 0x3;
+#ifdef CONFIG_DEBUG_SG
+	BUG_ON(sg->sg_magic != SG_MAGIC);
+	BUG_ON(sg_is_chain(sg));
+#endif
+	__sg_set_page(sg, page, len, offset);
+	sg->page_link |= flags;
 }
 
 static inline struct page *sg_page(struct scatterlist *sg)
@@ -101,6 +96,12 @@ static inline struct page *sg_page(struct scatterlist *sg)
 	return (struct page *)((sg)->page_link & ~0x3);
 }
 
+static inline void __sg_set_buf(struct scatterlist *sg, const void *buf,
+				unsigned int buflen)
+{
+	__sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
 /**
  * sg_set_buf - Set sg entry to point at given data
  * @sg:		 SG entry


  reply	other threads:[~2012-07-25 15:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-25  8:29 [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list Wang Sen
2012-07-25  8:44 ` Paolo Bonzini
2012-07-25  9:22   ` Boaz Harrosh
2012-07-25  9:41     ` Paolo Bonzini
2012-07-25 12:34       ` Boaz Harrosh
2012-07-25 12:49         ` Paolo Bonzini
2012-07-25 13:26           ` Boaz Harrosh
2012-07-25 13:36             ` Paolo Bonzini
2012-07-25 14:36               ` Boaz Harrosh
2012-07-25 15:09                 ` performance improvements for the sglist API (Re: [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list) Paolo Bonzini
2012-07-25 15:16                   ` Paolo Bonzini [this message]
2012-07-25 14:17             ` virtio(-scsi) vs. chained sg_lists (was " Paolo Bonzini
2012-07-25 15:28               ` Boaz Harrosh
2012-07-25 17:43                 ` Paolo Bonzini
2012-07-25 19:16                   ` Boaz Harrosh
2012-07-25 20:06                     ` Paolo Bonzini
2012-07-25 21:04                       ` Boaz Harrosh
2012-07-26  7:23                         ` Paolo Bonzini
2012-07-26  7:56                           ` Boaz Harrosh
2012-07-26  7:58                             ` Paolo Bonzini
2012-07-26 13:05                               ` Paolo Bonzini
2012-07-27  6:27                                 ` Rusty Russell
2012-07-27  8:11                                   ` Paolo Bonzini
2012-07-29 23:50                                     ` Rusty Russell
2012-07-30  7:12                                       ` Paolo Bonzini
2012-07-30  8:56                                         ` Boaz Harrosh
2012-07-25 10:41   ` [PATCH] scsi: virtio-scsi: Fix address translation failure of HighMem pages used by sg list Stefan Hajnoczi
2012-07-25 11:48     ` Sen Wang
2012-07-25 11:44   ` Sen Wang
2012-07-25 12:40     ` Boaz Harrosh
2012-07-27  3:12       ` Wang Sen
2012-07-27  6:50         ` Paolo Bonzini
2012-07-25 10:04 ` Rolf Eike Beer
2012-07-25 11:46   ` Sen Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50100DCD.7030102@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=JBottomley@parallels.com \
    --cc=bharrosh@panasas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=mc@linux.vnet.ibm.com \
    --cc=senwang@linux.vnet.ibm.com \
    --cc=stefanha@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).