All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Dean Luick <dean.luick-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
	Sebastian Sanchez
	<sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 07/13] IB/hfi1: Combine shift copy and byte copy for SGE reads
Date: Sun, 25 Sep 2016 07:41:39 -0700	[thread overview]
Message-ID: <20160925144137.10261.50915.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20160925143656.10261.85231.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>

From: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Prevent over-reading the SGE length by using byte
reads for non quad-word reads.

Reviewed-by: Dean Luick <dean.luick-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/hw/hfi1/pio_copy.c |  160 +++++----------------------------
 1 files changed, 23 insertions(+), 137 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 7b0aa19..aa77736 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	preempt_enable();
 }
 
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
 /*
  * Handle carry bytes using shifts and masks.
  *
@@ -187,126 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 #define mshift(x) (8 * (x))
 
 /*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry.  Other bytes are zeroed.  Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-				  unsigned int nbytes)
-{
-	unsigned long off;
-
-	if (nbytes == 0) {
-		pbuf->carry.val64 = 0;
-	} else {
-		/* align our pointer */
-		off = (unsigned long)from & 0x7;
-		from = (void *)((unsigned long)from & ~0x7l);
-		pbuf->carry.val64 = ((*(u64 *)from)
-				<< zshift(nbytes + off))/* zero upper bytes */
-				>> zshift(nbytes);	/* place at bottom */
-	}
-	pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry.  Unused bytes are zeroed.  It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
-				    const void *from, unsigned int nbytes)
-{
-	unsigned long off = (unsigned long)from & 0x7;
-	unsigned int room, xbytes;
-
-	/* align our pointer */
-	from = (void *)((unsigned long)from & ~0x7l);
-
-	/* check count first - don't read anything if count is zero */
-	while (nbytes) {
-		/* find the number of bytes in this u64 */
-		room = 8 - off;	/* this u64 has room for this many bytes */
-		xbytes = min(room, nbytes);
-
-		/*
-		 * shift down to zero lower bytes, shift up to zero upper
-		 * bytes, shift back down to move into place
-		 */
-		pbuf->carry.val64 |= (((*(u64 *)from)
-					>> mshift(off))
-					<< zshift(xbytes))
-					>> zshift(xbytes + pbuf->carry_bytes);
-		off = 0;
-		pbuf->carry_bytes += xbytes;
-		nbytes -= xbytes;
-		from += sizeof(u64);
-	}
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
-	struct pio_buf *pbuf,
-	void __iomem *dest,
-	const void *src)
-{
-	u64 new, temp;
-
-	new = *(u64 *)src;
-	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
-	writeq(temp, dest);
-	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
-	writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry.  If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
-	if (pbuf->carry_bytes) {
-		/* unused bytes are always kept zeroed, so just write */
-		writeq(pbuf->carry.val64, dest);
-		return 1;
-	}
-
-	return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
  * Jump copy - no-loop copy for < 8 bytes.
  */
 static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -314,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
 	switch (n) {
 	case 7:
 		*dest++ = *src++;
+		/* fall through */
 	case 6:
 		*dest++ = *src++;
+		/* fall through */
 	case 5:
 		*dest++ = *src++;
+		/* fall through */
 	case 4:
 		*dest++ = *src++;
+		/* fall through */
 	case 3:
 		*dest++ = *src++;
+		/* fall through */
 	case 2:
 		*dest++ = *src++;
+		/* fall through */
 	case 1:
 		*dest++ = *src++;
+		/* fall through */
 	}
 }
 
@@ -341,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
 				  unsigned int nbytes)
 {
+	pbuf->carry.val64 = 0;
 	jcopy(&pbuf->carry.val8[0], from, nbytes);
 	pbuf->carry_bytes = nbytes;
 }
@@ -362,25 +247,30 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
 
 /*
  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
+ *
+ * NOTES:
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
  */
 static inline void merge_write8(
 	struct pio_buf *pbuf,
-	void *dest,
+	void __iomem *dest,
 	const void *src)
 {
-	u32 remainder = 8 - pbuf->carry_bytes;
+	u64 new, temp;
 
-	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
-	writeq(pbuf->carry.val64, dest);
-	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+	new = *(u64 *)src;
+	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+	writeq(temp, dest);
+	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
 }
 
 /*
  * Write a quad word using all bytes of carry.
  */
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
 {
 	writeq(carry.val64, dest);
 }
@@ -390,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
  * has zero valid bytes, nothing is written.
  * Returns 0 on nothing written, non-zero on quad word written.
  */
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
 {
 	if (pbuf->carry_bytes) {
-		u64 zero = 0;
-
-		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-		      8 - pbuf->carry_bytes);
+		/* unused bytes are always kept zeroed, so just write */
 		writeq(pbuf->carry.val64, dest);
 		return 1;
 	}
 
 	return 0;
 }
-#endif /* USE_SHIFTS */
 
 /*
  * Segmented PIO Copy - start

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2016-09-25 14:41 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-25 14:40 [PATCH 00/13] IB/hfi1,qib: Round 2 of hfi1/qib fixes for 4.9 Dennis Dalessandro
2016-09-25 14:40 ` Dennis Dalessandro
     [not found] ` <20160925143656.10261.85231.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2016-09-25 14:40   ` [PATCH 01/13] IB/hfi1: Consolidate pio control masks into single definition Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 02/13] IB/qib: Remove qpt_mask global Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 03/13] IB/hfi1: Remove filtering of Set(PkeyTable) in HFI SMA Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 04/13] IB/hfi1: Increase default settings of max_cqes and max_qps Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 05/13] IB/hfi1: Extend i2c timeout Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 06/13] IB/hfi1: Do not read more than a SGE length Dennis Dalessandro
2016-09-25 14:41   ` Dennis Dalessandro [this message]
2016-09-25 14:41   ` [PATCH 08/13] IB/hfi1: Fix defered ack race with qp destroy Dennis Dalessandro
2016-09-25 14:41     ` Dennis Dalessandro
2016-09-25 14:41   ` [PATCH 09/13] IB/hfi1: Act on external device timeout Dennis Dalessandro
2016-09-25 14:42   ` [PATCH 10/13] IB/hfi1: Adjust hardware buffering parameter Dennis Dalessandro
2016-09-25 14:42   ` [PATCH 11/13] IB/hfi1: Cleanup tasklet refs in comments Dennis Dalessandro
2016-09-25 14:42   ` [PATCH 12/13] IB/hfi1: Remove unused variable from devdata Dennis Dalessandro
2016-09-25 14:42   ` [PATCH 13/13] IB/hfi1: Fix resource release in context allocation Dennis Dalessandro
2016-10-02 13:29   ` [PATCH 00/13] IB/hfi1,qib: Round 2 of hfi1/qib fixes for 4.9 Doug Ledford
2016-10-02 13:29     ` Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160925144137.10261.50915.stgit@scvm10.sc.intel.com \
    --to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dean.luick-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=sebastian.sanchez-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.