* Silent data corruption caused by XPC V2.
@ 2006-08-07 17:49 Robin Holt
2006-08-08 1:02 ` Keith Owens
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Robin Holt @ 2006-08-07 17:49 UTC (permalink / raw)
To: linux-ia64
Jack Steiner identified a problem where XPC can cause a silent
data corruption. On module load, the placement may cause the
xpc_remote_copy_buffer to span two physical pages. DMA transfers are
done to the start virtual address translated to physical.
This patch changes the buffer from a statically allocated buffer to a
kmalloc'd buffer. Dean Nelson reviewed this before posting. I have
tested it in the configuration that was showing the memory corruption
and verified it works. I also added DBUG_ON statements to help catch
this if a similar situation is encountered.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
----
I originally built this against a git tree that I was using for a
git-bisect and did not catch that error before posting. Here is a
repost with that corrected. I am reposting this assuming you
accept our argument about the DBUG_ON() being left as is. If not,
Dean Nelson will repost later today.
Sorry for the confusion,
Robin
Index: linux-2.6/arch/ia64/sn/kernel/xpc_channel.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_channel.c 2006-08-07 12:37:56.187180666 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_channel.c 2006-08-07 12:37:58.935517909 -0500
@@ -274,6 +274,7 @@ xpc_pull_remote_cachelines(struct xpc_pa
DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src));
DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst));
DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
+ DBUG_ON((ia64_tpa(dst) + cnt) != ia64_tpa(&((char *) dst)[cnt]));
if (part->act_state = XPC_P_DEACTIVATING) {
return part->reason;
Index: linux-2.6/arch/ia64/sn/kernel/xpc_partition.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_partition.c 2006-08-07 12:37:56.187180666 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_partition.c 2006-08-07 12:41:41.550828812 -0500
@@ -71,19 +71,15 @@ struct xpc_partition xpc_partitions[XP_M
* Generic buffer used to store a local copy of portions of a remote
* partition's reserved page (either its header and part_nasids mask,
* or its vars).
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
*/
-char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
- XP_NASID_MASK_BYTES];
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
*/
-static void *
+void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
/* see if kmalloc will give us cachline aligned memory by default */
@@ -148,6 +144,9 @@ xpc_get_rsvd_page_pa(int nasid)
}
}
+ DBUG_ON((ia64_tpa(buf) + buf_len) !+ ia64_tpa(&((char *) buf)[buf_len]));
+
bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
@@ -445,6 +444,9 @@ xpc_check_remote_hb(void)
continue;
}
+ DBUG_ON((ia64_tpa(remote_vars) + XPC_RP_VARS_SIZE) !+ ia64_tpa(&((char *) remote_vars)[XPC_RP_VARS_SIZE]));
+
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
ia64_tpa((u64) remote_vars),
@@ -496,9 +498,11 @@ xpc_get_remote_rp(int nasid, u64 *discov
return xpcNoRsvdPageAddr;
}
+ DBUG_ON((ia64_tpa(remote_rp + XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes) !+ ia64_tpa(&((char *) remote_rp)[XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes])));
- /* pull over the reserved page header and part_nasids mask */
+ /* pull over the reserved page header and part_nasids mask */
bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
@@ -554,9 +558,11 @@ xpc_get_remote_vars(u64 remote_vars_pa,
return xpcVarsNotSet;
}
+ DBUG_ON((ia64_tpa(remote_vars) + XPC_RP_VARS_SIZE) !+ ia64_tpa(&((char *) remote_vars)[XPC_RP_VARS_SIZE]));
- /* pull over the cross partition variables */
+ /* pull over the cross partition variables */
bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
@@ -1239,6 +1245,9 @@ xpc_initiate_partid_to_nasids(partid_t p
part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
+ DBUG_ON((ia64_tpa(nasid_mask) + xp_nasid_mask_bytes) !+ ia64_tpa(&((char *) nasid_mask)[xp_nasid_mask_bytes]));
+
bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
Index: linux-2.6/arch/ia64/sn/kernel/xpc_main.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_main.c 2006-08-07 12:37:56.187180666 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_main.c 2006-08-07 12:37:58.939518400 -0500
@@ -1052,6 +1052,8 @@ xpc_do_exit(enum xpc_retval reason)
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
}
@@ -1212,24 +1214,19 @@ xpc_init(void)
partid_t partid;
struct xpc_partition *part;
pid_t pid;
+ size_t buf_size;
if (!ia64_platform_is("sn2")) {
return -ENODEV;
}
- /*
- * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
- * various portions of a partition's reserved page. Its size is based
- * on the size of the reserved page header and part_nasids mask. So we
- * need to ensure that the other items will fit as well.
- */
- if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
- dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
- return -EPERM;
- }
- DBUG_ON((u64) xpc_remote_copy_buffer !- L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
+
+ buf_size = max(XPC_RP_VARS_SIZE, XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+ xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size, GFP_KERNEL,
+ &xpc_remote_copy_buffer_base);
+ if (xpc_remote_copy_buffer = NULL)
+ return -ENOMEM;
snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1293,6 +1290,8 @@ xpc_init(void)
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1311,6 +1310,8 @@ xpc_init(void)
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1362,6 +1363,8 @@ xpc_init(void)
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
Index: linux-2.6/include/asm-ia64/sn/xpc.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xpc.h 2006-08-07 12:37:56.187180666 -0500
+++ linux-2.6/include/asm-ia64/sn/xpc.h 2006-08-07 12:43:44.201871144 -0500
@@ -684,7 +684,9 @@ extern struct xpc_vars *xpc_vars;
extern struct xpc_rsvd_page *xpc_rsvd_page;
extern struct xpc_vars_part *xpc_vars_part;
extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void * xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
extern void xpc_allow_IPI_ops(void);
extern void xpc_restrict_IPI_ops(void);
@@ -720,6 +722,7 @@ extern void xpc_teardown_infrastructure(
+
static inline void
xpc_wakeup_channel_mgr(struct xpc_partition *part)
{
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
@ 2006-08-08 1:02 ` Keith Owens
2006-08-08 17:22 ` Dean Nelson
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Keith Owens @ 2006-08-08 1:02 UTC (permalink / raw)
To: linux-ia64
Robin Holt (on Mon, 7 Aug 2006 12:49:33 -0500) wrote:
>Jack Steiner identified a problem where XPC can cause a silent
>data corruption. On module load, the placement may cause the
>xpc_remote_copy_buffer to span two physical pages. DMA transfers are
>done to the start virtual address translated to physical.
>
>This patch changes the buffer from a statically allocated buffer to a
>kmalloc'd buffer. Dean Nelson reviewed this before posting. I have
>tested it in the configuration that was showing the memory corruption
>and verified it works. I also added DBUG_ON statements to help catch
>this if a similar situation is encountered.
>
>Index: linux-2.6/arch/ia64/sn/kernel/xpc_channel.c
>=================================>--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_channel.c 2006-08-07 12:37:56.187180666 -0500
>+++ linux-2.6/arch/ia64/sn/kernel/xpc_channel.c 2006-08-07 12:37:58.935517909 -0500
>@@ -274,6 +274,7 @@ xpc_pull_remote_cachelines(struct xpc_pa
> DBUG_ON((u64) src != L1_CACHE_ALIGN((u64) src));
> DBUG_ON((u64) dst != L1_CACHE_ALIGN((u64) dst));
> DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
>+ DBUG_ON((ia64_tpa(dst) + cnt) != ia64_tpa(&((char *) dst)[cnt]));
Can the byte count be greater than 2 pages? If it can, then that debug
statement is not going to catch this case:
Virtual page Physical page
A X
A+1 Y
A+2 X+2
because you only test the last page.
FWIW, I find this more readable instead of indexing then taking the
address of the result.
DBUG_ON((ia64_tpa(dst) + cnt) != ia64_tpa((char *) dst + cnt);
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
2006-08-08 1:02 ` Keith Owens
@ 2006-08-08 17:22 ` Dean Nelson
2006-08-08 17:44 ` Luck, Tony
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Dean Nelson @ 2006-08-08 17:22 UTC (permalink / raw)
To: linux-ia64
Jack Steiner identified a problem where XPC can cause a silent
data corruption. On module load, the placement may cause the
xpc_remote_copy_buffer to span two physical pages. DMA transfers are
done to the start virtual address translated to physical.
This patch changes the buffer from a statically allocated buffer to a
kmalloc'd buffer. Dean Nelson reviewed this before posting. I have
tested it in the configuration that was showing the memory corruption
and verified it works. I also added a DBUG_ON statement to help catch
this if a similar situation is encountered.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
----
Keith, thanks for the feedback. Yes, the #of pages could potentially exceed
two pages. They don't today, but this DBUG_ON() was intended to catch future
changes to the code that may result in doing a BTE transfer to a physically
non-contiguously mapped destination.
Tony, here's a new version of the patch which addresses Keith's concern
about dealing with more than two physical pages.
Thanks,
Dean
Index: linux-2.6/arch/ia64/sn/kernel/xpc_channel.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 10:36:55.249589616 -0500
@@ -279,8 +279,8 @@
return part->reason;
}
- bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
- (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
+ bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt,
+ (BTE_NORMAL | BTE_WACQUIRE), NULL);
if (bte_ret = BTE_SUCCESS) {
return xpcSuccess;
}
Index: linux-2.6/arch/ia64/sn/kernel/xpc_partition.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 10:36:55.249589616 -0500
@@ -71,19 +71,15 @@
* Generic buffer used to store a local copy of portions of a remote
* partition's reserved page (either its header and part_nasids mask,
* or its vars).
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
*/
-char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
- XP_NASID_MASK_BYTES];
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
*/
-static void *
+void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
/* see if kmalloc will give us cachline aligned memory by default */
@@ -148,7 +144,7 @@
}
}
- bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
+ bte_res = xp_bte_copy(rp_pa, buf, buf_len,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
@@ -447,7 +443,7 @@
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
- ia64_tpa((u64) remote_vars),
+ (u64) remote_vars,
XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -498,8 +494,7 @@
/* pull over the reserved page header and part_nasids mask */
-
- bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
+ bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -554,11 +549,8 @@
return xpcVarsNotSet;
}
-
/* pull over the cross partition variables */
-
- bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
- XPC_RP_VARS_SIZE,
+ bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
@@ -1239,7 +1231,7 @@
part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
- bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
+ bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
return xpc_map_bte_errors(bte_res);
Index: linux-2.6/arch/ia64/sn/kernel/xpc_main.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 10:36:55.253590112 -0500
@@ -1052,6 +1052,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
}
@@ -1212,24 +1214,20 @@
partid_t partid;
struct xpc_partition *part;
pid_t pid;
+ size_t buf_size;
if (!ia64_platform_is("sn2")) {
return -ENODEV;
}
- /*
- * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
- * various portions of a partition's reserved page. Its size is based
- * on the size of the reserved page header and part_nasids mask. So we
- * need to ensure that the other items will fit as well.
- */
- if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
- dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
- return -EPERM;
- }
- DBUG_ON((u64) xpc_remote_copy_buffer !- L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+ xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL, &xpc_remote_copy_buffer_base);
+ if (xpc_remote_copy_buffer = NULL)
+ return -ENOMEM;
snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1293,6 +1291,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1311,6 +1311,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1362,6 +1364,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
Index: linux-2.6/include/asm-ia64/sn/xpc.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xpc.h 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/include/asm-ia64/sn/xpc.h 2006-08-08 10:36:55.253590112 -0500
@@ -683,7 +683,9 @@
extern struct xpc_rsvd_page *xpc_rsvd_page;
extern struct xpc_vars_part *xpc_vars_part;
extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
extern void xpc_allow_IPI_ops(void);
extern void xpc_restrict_IPI_ops(void);
Index: linux-2.6/include/asm-ia64/sn/xp.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xp.h 2006-08-08 10:36:52.657268672 -0500
+++ linux-2.6/include/asm-ia64/sn/xp.h 2006-08-08 11:39:09.825721267 -0500
@@ -60,23 +60,32 @@
* the bte_copy() once in the hope that the failure was due to a temporary
* aberration (i.e., the link going down temporarily).
*
- * See bte_copy for definition of the input parameters.
+ * src - physical address of the source of the transfer.
+ * vdst - virtual address of the destination of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ * mode - see bte_copy() for definition.
+ * notification - see bte_copy() for definition.
*
* Note: xp_bte_copy() should never be called while holding a spinlock.
*/
static inline bte_result_t
-xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
{
bte_result_t ret;
+ u64 pdst = ia64_tpa(vdst);
- ret = bte_copy(src, dest, len, mode, notification);
+ /* ensure that the physically mapped memory is contiguous */
+ DBUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL &&
+ REGION_NUMBER(vdst) != RGN_UNCACHED &&
+ PAGE_SIZE - (vdst & ~PAGE_MASK) < len);
+ ret = bte_copy(src, pdst, len, mode, notification);
if (ret != BTE_SUCCESS) {
if (!in_interrupt()) {
cond_resched();
}
- ret = bte_copy(src, dest, len, mode, notification);
+ ret = bte_copy(src, pdst, len, mode, notification);
}
return ret;
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
2006-08-08 1:02 ` Keith Owens
2006-08-08 17:22 ` Dean Nelson
@ 2006-08-08 17:44 ` Luck, Tony
2006-08-08 18:52 ` Yu, Fenghua
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Luck, Tony @ 2006-08-08 17:44 UTC (permalink / raw)
To: linux-ia64
+ /* ensure that the physically mapped memory is contiguous */
+ DBUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL &&
+ REGION_NUMBER(vdst) != RGN_UNCACHED &&
+ PAGE_SIZE - (vdst & ~PAGE_MASK) < len);
I thought that part of the point of this patch was that
by using kmalloc() for the buffer, you actually guaranteed
that the whole thing is contiguous (since kmalloc will give
you an order(N) page for requests greater than a page.
This test is a bit convoluted, but looks it will ping you
whenever a transfer crosses a page boundary, regardless of
whether the pages are contiguous or not. Certainly the
comment seems tenuously connected to the code.
-Tony
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
` (2 preceding siblings ...)
2006-08-08 17:44 ` Luck, Tony
@ 2006-08-08 18:52 ` Yu, Fenghua
2006-08-08 19:14 ` Dean Nelson
2006-08-08 20:03 ` Dean Nelson
5 siblings, 0 replies; 7+ messages in thread
From: Yu, Fenghua @ 2006-08-08 18:52 UTC (permalink / raw)
To: linux-ia64
Seems the following part is missed in your patch:
--- arch/ia64/sn/kernel/xpc_main.c 2006-02-13 12:35:03.000000000
-0800
+++ arch/ia64/sn/kernel/xpc_main.c.new 2006-08-08 11:47:03.000000000
-0700
@@ -1374,6 +1374,7 @@ xpc_init(void)
complete(&xpc_discovery_exited);
xpc_do_exit(xpcUnloading);
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
Thanks.
-Fenghua
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
` (3 preceding siblings ...)
2006-08-08 18:52 ` Yu, Fenghua
@ 2006-08-08 19:14 ` Dean Nelson
2006-08-08 20:03 ` Dean Nelson
5 siblings, 0 replies; 7+ messages in thread
From: Dean Nelson @ 2006-08-08 19:14 UTC (permalink / raw)
To: linux-ia64
On Tue, Aug 08, 2006 at 11:52:41AM -0700, Yu, Fenghua wrote:
>
> Seems the following part is missed in your patch:
>
> --- arch/ia64/sn/kernel/xpc_main.c 2006-02-13 12:35:03.000000000
> -0800
> +++ arch/ia64/sn/kernel/xpc_main.c.new 2006-08-08 11:47:03.000000000
> -0700
> @@ -1374,6 +1374,7 @@ xpc_init(void)
> complete(&xpc_discovery_exited);
>
> xpc_do_exit(xpcUnloading);
> + kfree(xpc_remote_copy_buffer_base);
> return -EBUSY;
> }
Actually, the very last line of xpc_do_exit() is the following:
kfree(xpc_remote_copy_buffer_base);
}
I realize it's perhaps not the most straight-forward arrangement, but it
does have something to do with the various avenues through which our exit
processing can get invoked (not just by rmmod's call to xpc_exit()).
I really do appreciate your feedback. Thanks.
Dean
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Silent data corruption caused by XPC V2.
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
` (4 preceding siblings ...)
2006-08-08 19:14 ` Dean Nelson
@ 2006-08-08 20:03 ` Dean Nelson
5 siblings, 0 replies; 7+ messages in thread
From: Dean Nelson @ 2006-08-08 20:03 UTC (permalink / raw)
To: linux-ia64
Jack Steiner identified a problem where XPC can cause a silent
data corruption. On module load, the placement may cause the
xpc_remote_copy_buffer to span two physical pages. DMA transfers are
done to the start virtual address translated to physical.
This patch changes the buffer from a statically allocated buffer to a
kmalloc'd buffer. Dean Nelson reviewed this before posting. I have
tested it in the configuration that was showing the memory corruption
and verified it works. I also added a BUG_ON statement to help catch
this if a similar situation is encountered.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Jack Steiner <steiner@sgi.com>
----
On Tue, Aug 08, 2006 at 10:44:24AM -0700, Luck, Tony wrote:
> + /* ensure that the physically mapped memory is contiguous */
> + DBUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL &&
> + REGION_NUMBER(vdst) != RGN_UNCACHED &&
> + PAGE_SIZE - (vdst & ~PAGE_MASK) < len);
>
> I thought that part of the point of this patch was that
> by using kmalloc() for the buffer, you actually guaranteed
> that the whole thing is contiguous (since kmalloc will give
> you an order(N) page for requests greater than a page.
>
> This test is a bit convoluted, but looks it will ping you
> whenever a transfer crosses a page boundary, regardless of
> whether the pages are contiguous or not. Certainly the
> comment seems tenuously connected to the code.
>
> -Tony
This is a new version of the patch which hopefully will be more in line
with what you'd like to see. I've simplified the test down to a single
BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
since we currently only utilize region 7 buffers.
Thanks,
Dean
Index: linux-2.6/arch/ia64/sn/kernel/xpc_channel.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 14:30:06.750362170 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_channel.c 2006-08-08 14:30:09.350665624 -0500
@@ -279,8 +279,8 @@
return part->reason;
}
- bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
- (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
+ bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt,
+ (BTE_NORMAL | BTE_WACQUIRE), NULL);
if (bte_ret = BTE_SUCCESS) {
return xpcSuccess;
}
Index: linux-2.6/arch/ia64/sn/kernel/xpc_partition.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 14:30:06.750362170 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_partition.c 2006-08-08 14:30:09.350665624 -0500
@@ -71,19 +71,15 @@
* Generic buffer used to store a local copy of portions of a remote
* partition's reserved page (either its header and part_nasids mask,
* or its vars).
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
*/
-char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
- XP_NASID_MASK_BYTES];
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
*/
-static void *
+void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
/* see if kmalloc will give us cachline aligned memory by default */
@@ -148,7 +144,7 @@
}
}
- bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
+ bte_res = xp_bte_copy(rp_pa, buf, buf_len,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bte_res != BTE_SUCCESS) {
dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
@@ -447,7 +443,7 @@
/* pull the remote_hb cache line */
bres = xp_bte_copy(part->remote_vars_pa,
- ia64_tpa((u64) remote_vars),
+ (u64) remote_vars,
XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -498,8 +494,7 @@
/* pull over the reserved page header and part_nasids mask */
-
- bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
+ bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
@@ -554,11 +549,8 @@
return xpcVarsNotSet;
}
-
/* pull over the cross partition variables */
-
- bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
- XPC_RP_VARS_SIZE,
+ bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
(BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (bres != BTE_SUCCESS) {
return xpc_map_bte_errors(bres);
@@ -1239,7 +1231,7 @@
part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
- bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
+ bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
return xpc_map_bte_errors(bte_res);
Index: linux-2.6/arch/ia64/sn/kernel/xpc_main.c
=================================--- linux-2.6.orig/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 14:30:06.750362170 -0500
+++ linux-2.6/arch/ia64/sn/kernel/xpc_main.c 2006-08-08 14:30:09.350665624 -0500
@@ -1052,6 +1052,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
}
@@ -1212,24 +1214,20 @@
partid_t partid;
struct xpc_partition *part;
pid_t pid;
+ size_t buf_size;
if (!ia64_platform_is("sn2")) {
return -ENODEV;
}
- /*
- * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
- * various portions of a partition's reserved page. Its size is based
- * on the size of the reserved page header and part_nasids mask. So we
- * need to ensure that the other items will fit as well.
- */
- if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
- dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
- return -EPERM;
- }
- DBUG_ON((u64) xpc_remote_copy_buffer !- L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
+
+ buf_size = max(XPC_RP_VARS_SIZE,
+ XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+ xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
+ GFP_KERNEL, &xpc_remote_copy_buffer_base);
+ if (xpc_remote_copy_buffer = NULL)
+ return -ENOMEM;
snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1293,6 +1291,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1311,6 +1311,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
@@ -1362,6 +1364,8 @@
if (xpc_sysctl) {
unregister_sysctl_table(xpc_sysctl);
}
+
+ kfree(xpc_remote_copy_buffer_base);
return -EBUSY;
}
Index: linux-2.6/include/asm-ia64/sn/xpc.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xpc.h 2006-08-08 14:30:06.750362170 -0500
+++ linux-2.6/include/asm-ia64/sn/xpc.h 2006-08-08 14:30:09.354666091 -0500
@@ -683,7 +683,9 @@
extern struct xpc_rsvd_page *xpc_rsvd_page;
extern struct xpc_vars_part *xpc_vars_part;
extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
extern void xpc_allow_IPI_ops(void);
extern void xpc_restrict_IPI_ops(void);
Index: linux-2.6/include/asm-ia64/sn/xp.h
=================================--- linux-2.6.orig/include/asm-ia64/sn/xp.h 2006-08-08 14:30:06.750362170 -0500
+++ linux-2.6/include/asm-ia64/sn/xp.h 2006-08-08 14:32:11.968983275 -0500
@@ -60,23 +60,37 @@
* the bte_copy() once in the hope that the failure was due to a temporary
* aberration (i.e., the link going down temporarily).
*
- * See bte_copy for definition of the input parameters.
+ * src - physical address of the source of the transfer.
+ * vdst - virtual address of the destination of the transfer.
+ * len - number of bytes to transfer from source to destination.
+ * mode - see bte_copy() for definition.
+ * notification - see bte_copy() for definition.
*
* Note: xp_bte_copy() should never be called while holding a spinlock.
*/
static inline bte_result_t
-xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
{
bte_result_t ret;
+ u64 pdst = ia64_tpa(vdst);
- ret = bte_copy(src, dest, len, mode, notification);
+ /*
+ * Ensure that the physically mapped memory is contiguous.
+ *
+ * We do this by ensuring that the memory is from region 7 only.
+ * If the need should arise to use memory from one of the other
+ * regions, then modify the BUG_ON() statement to ensure that the
+ * memory from that region is always physically contiguous.
+ */
+ BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
+ ret = bte_copy(src, pdst, len, mode, notification);
if (ret != BTE_SUCCESS) {
if (!in_interrupt()) {
cond_resched();
}
- ret = bte_copy(src, dest, len, mode, notification);
+ ret = bte_copy(src, pdst, len, mode, notification);
}
return ret;
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-08-08 20:03 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-07 17:49 Silent data corruption caused by XPC V2 Robin Holt
2006-08-08 1:02 ` Keith Owens
2006-08-08 17:22 ` Dean Nelson
2006-08-08 17:44 ` Luck, Tony
2006-08-08 18:52 ` Yu, Fenghua
2006-08-08 19:14 ` Dean Nelson
2006-08-08 20:03 ` Dean Nelson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox