From: Dean Nelson <dcn@sgi.com>
To: linux-ia64@vger.kernel.org
Subject: Re: Silent data corruption caused by XPC V2.
Date: Tue, 08 Aug 2006 17:22:47 +0000 [thread overview]
Message-ID: <20060808172247.GA18538@sgi.com> (raw)
In-Reply-To: <20060807174933.GB24663@lnx-holt.americas.sgi.com>
Jack Steiner identified a problem where XPC can cause a silent
data corruption. On module load, the placement may cause the
xpc_remote_copy_buffer to span two physical pages. DMA transfers are
done to the start virtual address translated to physical.
This patch changes the buffer from a statically allocated buffer to a
kmalloc'd buffer. Dean Nelson reviewed this before posting. I have
tested it in the configuration that was showing the memory corruption
and verified it works. I also added a DBUG_ON statement to help catch
this if a similar situation is encountered.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
----
Keith, thanks for the feedback. Yes, the #of pages could potentially exceed
two pages. They don't today, but this DBUG_ON() was intended to catch future
changes to the code that may result in doing a BTE transfer to a physically
non-contiguously mapped destination.
Tony, here's a new version of the patch which addresses Keith's concern
about dealing with more than two physical pages.
Thanks,
Dean
Index: linux-2.6/arch/ia64/sn/kernel/xpc_channel.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 10:36:55.249589616 -0500
@@ -279,8 +279,8 @@
return part->reason;
}
- bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
- (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
+ bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt,
+ (BTE_NORMAL | BTE_WACQUIRE), NULL);
if (bte_ret = BTE_SUCCESS) {
return xpcSuccess;
}
Index: linux-2.6/arch/ia64/sn/kernel/xpc_partition.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 10:36:55.249589616 -0500
@@ -71,19 +71,15 @@
* Generic buffer used to store a local copy of portions of a remote
* partition's reserved page (either its header and part_nasids mask,
* or its vars).
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
*/
-char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
- XP_NASID_MASK_BYTES];
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
*/
-static void *
+void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
/* see if kmalloc will give us cachline aligned memory by default */
@@ -148,7 +144,7 @@
}
}
- bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
+ bte_res = xp_bte_copy(rp_pa, buf, buf_len,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
@@ -447,7 +443,7 @@
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
- ia64_tpa((u64) remote_vars),
+ (u64) remote_vars,
XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -498,8 +494,7 @@
/* pull over the reserved page header and part_nasids mask */
-
- bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
+ bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -554,11 +549,8 @@
return xpcVarsNotSet;
}
-
/* pull over the cross partition variables */
-
- bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
- XPC_RP_VARS_SIZE,
+ bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
@@ -1239,7 +1231,7 @@
part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
- bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
+ bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
return xpc_map_bte_errors(bte_res);
Index: linux-2.6/arch/ia64/sn/kernel/xpc_main.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 10:36:55.253590112 -0500
@@ -1052,6 +1052,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
}
@@ -1212,24 +1214,20 @@
partid_t partid;
struct xpc_partition *part;
pid_t pid;
+ size_t buf_size;
if (!ia64_platform_is("sn2")) {
return -ENODEV;
}
- /*
- * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
- * various portions of a partition's reserved page. Its size is based
- * on the size of the reserved page header and part_nasids mask. So we
- * need to ensure that the other items will fit as well.
- */
- if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
- dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
- return -EPERM;
- }
- DBUG_ON((u64) xpc_remote_copy_buffer !- L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+ xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL, &xpc_remote_copy_buffer_base);
+ if (xpc_remote_copy_buffer = NULL)
+ return -ENOMEM;
snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1293,6 +1291,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1311,6 +1311,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1362,6 +1364,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
Index: linux-2.6/include/asm-ia64/sn/xpc.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xpc.h 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/include/asm-ia64/sn/xpc.h 2006-08-08 10:36:55.253590112 -0500
@@ -683,7 +683,9 @@
extern struct xpc_rsvd_page *xpc_rsvd_page;
extern struct xpc_vars_part *xpc_vars_part;
extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
extern void xpc_allow_IPI_ops(void);
extern void xpc_restrict_IPI_ops(void);
Index: linux-2.6/include/asm-ia64/sn/xp.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xp.h 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/include/asm-ia64/sn/xp.h 2006-08-08 11:39:09.825721267 -0500
@@ -60,23 +60,32 @@
* the bte_copy() once in the hope that the failure was due to a temporary
* aberration (i.e., the link going down temporarily).
*
- * See bte_copy for definition of the input parameters.
+ * src - physical address of the source of the transfer.
+ * vdst - virtual address of the destination of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ * mode - see bte_copy() for definition.
+ * notification - see bte_copy() for definition.
*
* Note: xp_bte_copy() should never be called while holding a spinlock.
*/
static inline bte_result_t
-xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
{
bte_result_t ret;
+ u64 pdst = ia64_tpa(vdst);
- ret = bte_copy(src, dest, len, mode, notification);
+ /* ensure that the physically mapped memory is contiguous */
+ DBUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL &&
+ REGION_NUMBER(vdst) != RGN_UNCACHED &&
+ PAGE_SIZE - (vdst & ~PAGE_MASK) < len);
+ ret = bte_copy(src, pdst, len, mode, notification);
if (ret != BTE_SUCCESS) {
if (!in_interrupt()) {
cond_resched();
}
- ret = bte_copy(src, dest, len, mode, notification);
+ ret = bte_copy(src, pdst, len, mode, notification);
}
return ret;
next prev parent reply other threads:[~2006-08-08 17:22 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
2006-08-08 1:02 ` Keith Owens
2006-08-08 17:22 ` Dean Nelson [this message]
2006-08-08 17:44 ` Luck, Tony
2006-08-08 18:52 ` Yu, Fenghua
2006-08-08 19:14 ` Dean Nelson
2006-08-08 20:03 ` Dean Nelson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060808172247.GA18538@sgi.com \
--to=dcn@sgi.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.