public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/4] SGI Altix cross partition functionality
@ 2004-06-16 16:35 Dean Nelson
  2004-06-16 17:28 ` Christoph Hellwig
                   ` (14 more replies)
  0 siblings, 15 replies; 16+ messages in thread
From: Dean Nelson @ 2004-06-16 16:35 UTC (permalink / raw)
  To: linux-ia64

This patch contains the shim module (XP) which interfaces between the
communication module (XPC) and the functional support modules (like XPNET).


Index: linux/arch/ia64/Kconfig
=================================--- linux.orig/arch/ia64/Kconfig	2004-04-23 12:54:23.000000000 -0500
+++ linux/arch/ia64/Kconfig	2004-04-23 12:55:57.000000000 -0500
@@ -203,6 +203,17 @@
 	depends on !IA64_HP_SIM
 	default y
 
+config IA64_SGI_SN_XPC
+	tristate "Support DMA Messaging between SGI machines"
+	depends on (IA64_SGI_SN2 || IA64_GENERIC)
+	default m if (IA64_SGI_SN2 || IA64_GENERIC)
+	help
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow limited communication between
+	  those System Images without allowing write access.
+
 config IA64_SGI_SN_SIM
 	bool "SGI Medusa Simulator Support"
 	depends on IA64_SGI_SN2
Index: linux/arch/ia64/sn/kernel/Makefile
=================================--- linux.orig/arch/ia64/sn/kernel/Makefile	2004-04-23 12:54:23.000000000 -0500
+++ linux/arch/ia64/sn/kernel/Makefile	2004-04-23 13:36:21.000000000 -0500
@@ -9,3 +9,6 @@
 
 obj-y				+= probe.o setup.o bte.o irq.o mca.o idle.o sn2/
 obj-$(CONFIG_IA64_GENERIC)      += machvec.o
+obj-$(CONFIG_IA64_SGI_SN_XPC)	+= xp.o
+xp-y				:= xp_main.o xp_nofault.o
+CFLAGS_xp_main.o += -I $(TOPDIR)/arch/$(ARCH)/kdb
Index: linux/arch/ia64/sn/kernel/xp_main.c
=================================--- linux.orig/arch/ia64/sn/kernel/xp_main.c	1969-12-31 18:00:00.000000000 -0600
+++ linux/arch/ia64/sn/kernel/xp_main.c	2004-04-23 12:55:57.000000000 -0500
@@ -0,0 +1,381 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+
+/*
+ * Cross Partition (XP) base.
+ *
+ *	XP provides a base from which XPMEM and XPNET can interact
+ *	with XPC, yet not be dependent on XPC.
+ *
+ */
+
+
+#ifndef SN_PROM
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/xp.h>
+#else /* ! SN_PROM */
+#include "xp.h"
+#include "xpc_stubs.h"
+#endif /* ! SN_PROM */
+
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+xpc_registration_t xpc_registrations[XPC_NCHANNELS];
+
+
+/*
+ * Initialize the XPC interface to inidicate that XPC isn't loaded.
+ */
+static xpc_t xpc_notloaded(void) { return xpcNotLoaded; }
+
+xpc_interface_t xpc_interface = {
+	(void (*)(int)) xpc_notloaded,
+	(void (*)(int)) xpc_notloaded,
+	(xpc_t (*)(partid_t, int, u32, void **)) xpc_notloaded,
+	(xpc_t (*)(partid_t, int, void *)) xpc_notloaded,
+	(xpc_t (*)(partid_t, int, void *, xpc_notify_func_t, void *))
+								xpc_notloaded,
+	(void (*)(partid_t, int, void *)) xpc_notloaded,
+	(xpc_t (*)(partid_t, void *)) xpc_notloaded
+};
+
+
+#ifndef SN_PROM
+
+#ifdef CONFIG_KDB
+
+static void
+xpc_kdb_print_users(xpc_registration_t *registration, int ch_number)
+{
+	kdb_printf("xpc_registrations[channel=%d] (0x%p):\n", ch_number,
+							(void *) registration);
+
+	kdb_printf("\t&sema=0x%p\n", (void *) &registration->sema);
+	kdb_printf("\tfunc=0x%p\n", (void *) registration->func);
+	kdb_printf("\tkey=0x%p\n", registration->key);
+	kdb_printf("\tnentries=%d\n", registration->nentries);
+	kdb_printf("\tmsg_size=%d\n", registration->msg_size);
+	kdb_printf("\tassigned_limit=%d\n", registration->assigned_limit);
+	kdb_printf("\tidle_limit=%d\n", registration->idle_limit);
+}
+
+
+/*
+ * Display current XPC users who have registered via xpc_connect().
+ *
+ *	xpcusers [ <channel> ]
+ */
+static int
+xpc_kdb_users(int argc, const char **argv, const char **envp,
+			struct pt_regs *regs)
+{
+	int ret;
+	xpc_registration_t *registration;
+	int ch_number;
+
+
+	if (argc > 1) {
+		return KDB_ARGCOUNT;
+
+	} else if (argc = 1) {
+		ret = kdbgetularg(argv[1], (unsigned long *) &ch_number);
+		if (ret) {
+			return ret;
+		}
+		if (ch_number < 0 || ch_number >= XPC_NCHANNELS) {
+			kdb_printf("invalid channel #\n");
+			return KDB_BADINT;
+		}
+		registration = &xpc_registrations[ch_number];
+		xpc_kdb_print_users(registration, ch_number);
+
+	} else {
+		for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
+			registration = &xpc_registrations[ch_number];
+
+			/* if !XPC_CHANNEL_REGISTERED(ch_number) */
+			if (registration->func = NULL) {
+				continue;
+			}
+			xpc_kdb_print_users(registration, ch_number);
+		}
+	}
+	return 0;
+}
+
+#endif /* CONFIG_KDB */
+
+
+static void
+xp_init_xpc(void)
+{
+	int ch_number;
+
+
+#ifdef CONFIG_KDB
+	(void) kdb_register("xpcusers", xpc_kdb_users, "[ <channel> ]",
+				"Display xpc_registration_t entries", 0);
+#endif /* CONFIG_KDB */
+
+	/* initialize the connection registration semaphores */
+	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
+		sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
+	}
+}
+
+
+static void
+xp_exit_xpc(void)
+{
+#ifdef CONFIG_KDB
+	(void) kdb_unregister("xpcusers");
+#endif /* CONFIG_KDB */
+}
+
+#endif /* ! SN_PROM */
+
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect)(int),
+		void (*disconnect)(int),
+		xpc_t (*allocate)(partid_t, int, u32, void **),
+		xpc_t (*send)(partid_t, int, void *),
+		xpc_t (*send_notify)(partid_t, int, void *, xpc_notify_func_t,
+								void *),
+		void (*received)(partid_t, int, void *),
+		xpc_t (*partid_to_nasids)(partid_t, void *))
+{
+	xpc_interface.connect = connect;
+	xpc_interface.disconnect = disconnect;
+	xpc_interface.allocate = allocate;
+	xpc_interface.send = send;
+	xpc_interface.send_notify = send_notify;
+	xpc_interface.received = received;
+	xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+	xpc_interface.connect = (void (*)(int)) xpc_notloaded;
+	xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
+	xpc_interface.allocate = (xpc_t (*)(partid_t, int, u32, void **))
+					xpc_notloaded;
+	xpc_interface.send = (xpc_t (*)(partid_t, int, void *)) xpc_notloaded;
+	xpc_interface.send_notify = (xpc_t (*)(partid_t, int, void *,
+				    xpc_notify_func_t, void *)) xpc_notloaded;
+	xpc_interface.received = (void (*)(partid_t, int, void *))
+					xpc_notloaded;
+	xpc_interface.partid_to_nasids = (xpc_t (*)(partid_t, void *))
+					xpc_notloaded;
+}
+
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to register for connection.
+ *	func - function to call for asynchronous notification of channel
+ *	       state changes (i.e., connection, disconnection, error) and
+ *	       the arrival of incoming messages.
+ *      key - pointer to optional user-defined value that gets passed back
+ *	      to the user on any callouts made to func.
+ *	payload_size - size in bytes of the XPC message's payload area which
+ *		       contains a user-defined message. The user should make
+ *		       this large enough to hold their largest message.
+ *	nentries - max #of XPC message entries a message queue can contain.
+ *		   The actual number, which is determined when a connection
+ * 		   is established and may be less then requested, will be
+ *		   passed to the user via the xpcConnected callout.
+ *	assigned_limit - max number of kthreads allowed to be processing
+ * 			 messages (per connection) at any given instant.
+ *	idle_limit - max number of kthreads allowed to be idle at any given
+ * 		     instant.
+ */
+xpc_t
+xpc_connect(int ch_number, xpc_channel_func_t func, void *key, u16 payload_size,
+		u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+	xpc_registration_t *registration;
+
+
+	XP_ASSERT(ch_number >= 0 && ch_number < XPC_NCHANNELS);
+	XP_ASSERT(payload_size != 0 && nentries != 0);
+	XP_ASSERT(func != NULL);
+	XP_ASSERT(assigned_limit != 0 && idle_limit <= assigned_limit);
+
+	registration = &xpc_registrations[ch_number];
+
+	if (down_interruptible(&registration->sema) != 0) {
+		return xpcInterrupted;
+	}
+
+	/* if XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func != NULL) {
+		up(&registration->sema);
+		return xpcAlreadyRegistered;
+	}
+
+	/* register the channel for connection */
+	registration->msg_size = XPC_MSG_SIZE(payload_size);
+	registration->nentries = nentries;
+	registration->assigned_limit = assigned_limit;
+	registration->idle_limit = idle_limit;
+	registration->key = key;
+	registration->func = func;
+
+	up(&registration->sema);
+
+	xpc_interface.connect(ch_number);
+
+	return xpcSuccess;
+}
+
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ *	ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+	xpc_registration_t *registration;
+
+
+	XP_ASSERT(ch_number >= 0 && ch_number < XPC_NCHANNELS);
+
+	registration = &xpc_registrations[ch_number];
+
+	/*
+	 * We've decided not to make this a down_interruptible(), since
+	 * we figured XPMEM and XPNET will just turn around and call
+	 * xpc_disconnect() again anyways, so we might as well wait, if
+	 * need be.
+	 */
+	down(&registration->sema);
+
+	/* if !XPC_CHANNEL_REGISTERED(ch_number) */
+	if (registration->func = NULL) {
+		up(&registration->sema);
+		return;
+	}
+
+	/* remove the connection registration for the specified channel */
+	registration->func = NULL;
+	registration->key = NULL;
+	registration->nentries = 0;
+	registration->msg_size = 0;
+	registration->assigned_limit = 0;
+	registration->idle_limit = 0;
+
+	xpc_interface.disconnect(ch_number);
+
+	up(&registration->sema);
+
+	return;
+}
+
+
+#ifndef SN_PROM
+
+int __init
+xp_init(void)
+{
+	int ret;
+	int (* pior_func)(void *) = xp_nofault_PIOR;
+	int (* pior_err_func)(void) = xp_error_PIOR;
+
+
+	if (!ia64_platform_is("sn2")) {
+		return -ENODEV;
+	}
+
+
+	/*
+	 * Register a nofault code region which performs a cross-partition
+	 * PIO read. If the PIO read times out, the MCA handler will consume
+	 * the error and return to a kernel-provided instruction to indicate
+	 * an error. This PIO read exists because it is guaranteed to timeout
+	 * if the destination is down (AMO operations do not timeout on at
+	 * least some CPUs on Shubs <= v1.2, which unfortunately we have to
+	 * work around).
+	 */
+	if ((ret = sn_register_nofault_code(*(u64 *) pior_func,
+					*(u64 *) pior_err_func,
+					*(u64 *) pior_err_func, 1, 1)) != 0) {
+		printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
+			ret);
+	}
+
+
+	xp_init_xpc();
+
+	return 0;
+}
+module_init(xp_init);
+
+
+void __exit
+xp_exit(void)
+{
+	int (* pior_func)(void *) = xp_nofault_PIOR;
+	int (* pior_err_func)(void) = xp_error_PIOR;
+
+
+	xp_exit_xpc();
+
+
+	/* unregister the PIO nofault code region */
+	(void) sn_register_nofault_code(*(u64 *) pior_func,
+					*(u64 *) pior_err_func,
+					*(u64 *) pior_err_func, 1, 0);
+}
+module_exit(xp_exit);
+
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(xp_nofault_PIOR);
+EXPORT_SYMBOL(xpc_registrations);
+EXPORT_SYMBOL(xpc_interface);
+EXPORT_SYMBOL(xpc_clear_interface);
+EXPORT_SYMBOL(xpc_set_interface);
+EXPORT_SYMBOL(xpc_connect);
+EXPORT_SYMBOL(xpc_disconnect);
+
+#endif /* ! SN_PROM */
+
Index: linux/arch/ia64/sn/kernel/xp_nofault.S
=================================--- linux.orig/arch/ia64/sn/kernel/xp_nofault.S	1969-12-31 18:00:00.000000000 -0600
+++ linux/arch/ia64/sn/kernel/xp_nofault.S	2004-04-23 12:55:57.000000000 -0500
@@ -0,0 +1,31 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it.  This function
+ * will be registered as a nofault code block.  In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ *	extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+	.global xp_nofault_PIOR
+xp_nofault_PIOR:
+	mov	r8=r0			// Stage a success return value
+	ld8.acq	r9=[r32];;		// PIO Read the specified register
+	adds	r9=1,r9			// Add to force a consume
+	br.ret.sptk.many b0;;		// Return success
+
+	.global xp_error_PIOR
+xp_error_PIOR:
+	mov	r8=1			// Return value of 1
+	br.ret.sptk.many b0;;		// Return failure
+
Index: linux/include/asm-ia64/sn/xp.h
=================================--- linux.orig/include/asm-ia64/sn/xp.h	1969-12-31 18:00:00.000000000 -0600
+++ linux/include/asm-ia64/sn/xp.h	2004-04-23 13:36:40.000000000 -0500
@@ -0,0 +1,552 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+
+#ifndef _ASM_IA64_SN_XP_H
+#define _ASM_IA64_SN_XP_H
+
+
+#ifndef	SN_PROM
+#include <linux/version.h>
+#include <linux/cache.h>
+#include <asm/sn/types.h>
+#include <asm/sn/bte.h>
+#include <asm/hardirq.h>
+#else /* ! SN_PROM */
+#include "xpc_types.h"
+#endif /* ! SN_PROM */
+
+
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+
+#define XP_ASSERT_ALWAYS(_expr)						\
+	if (!(_expr)) {							\
+		printk(KERN_EMERG "Assertion [ %s ] failed!\n"		\
+			"Assertion is in %s() [%s, line %d]!\n",	\
+			#_expr, __FUNCTION__, __FILE__, __LINE__);	\
+		KDB_ENTER();						\
+	}
+#else /* CONFIG_KDB */
+#ifndef	SN_PROM
+#define XP_ASSERT_ALWAYS(_expr)						\
+	if (!(_expr)) {							\
+		printk(KERN_EMERG "Assertion [ %s ] failed!\n"		\
+			"Assertion is in %s() [%s, line %d]!\n",	\
+			#_expr, __FUNCTION__, __FILE__, __LINE__);	\
+		BUG();							\
+	}
+#else /* ! SN_PROM */
+#define XP_ASSERT_ALWAYS(_expr)						\
+	if (!(_expr)) {							\
+		printf("XP_ASSERT %s failed at %s line %d\n",		\
+			#_expr, __FILE__, __LINE__);			\
+	}
+#endif /* ! SN_PROM */
+#endif /* CONFIG_KDB */
+
+
+#ifdef XP_ASSERT_ON
+#define XP_ASSERT(_expr)	XP_ASSERT_ALWAYS(_expr)
+#else
+#define XP_ASSERT(_expr)
+#endif
+
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap.  The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XP_MAX_NASIDS		(MAX_NASIDS / 2)
+#define XP_NUM_NASID_WORDS	((XP_MAX_NASIDS + 63)/ 64)
+
+
+/*
+ * Wrapper for bte_copy() that should it return a failure status will retry
+ * the bte_copy() once in the hope that the failure was due to a temporary
+ * aberration (i.e., the link going down temporarily).
+ *
+ * See bte_copy for definition of the input parameters.
+ */
+static __inline__ bte_result_t
+xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+	bte_result_t ret;
+
+
+	ret = bte_copy(src, dest, len, mode, notification);
+
+	if (ret != BTE_SUCCESS) {
+		if (!in_interrupt()) {
+			cond_resched();
+		}
+		ret = bte_copy(src, dest, len, mode, notification);
+	}
+
+	return ret;
+}
+
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * The maxinum number of channels is limited to eight. For performance reasons,
+ * the internal cross partition structures require sixteen bytes per channel,
+ * and eight allows all of this interface-shared info to fit in one cache line.
+ *
+ * XPC_NCHANNELS reflects the total number of channels currently defined.
+ * If the need for additional channels arises, one can simply increase
+ * XPC_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the MAXIMUM number of channels allowed (eight), then one will need
+ * to make changes to the XPC code to allow for this.
+ *
+ * 
+ *    CHANNEL | USER          | PURPOSE                                  
+ *   ---------+---------------+---------------------------------------------
+ *    0       | XPMEM         | cross partition memory driver
+ *   ---------+---------------+---------------------------------------------
+ *    1       | XPNET         | cross partition network driver
+ *   ---------+---------------+---------------------------------------------
+ *    2       | XPCT          | XPC test channel
+ *   ---------+---------------+---------------------------------------------
+ *    3-7     | unused        | reserved for future use
+ *   ---------+---------------+---------------------------------------------
+ *
+ */
+#define XPC_MEM_CHANNEL		0	/* memory channel number */
+#define	XPC_NET_CHANNEL		1	/* network channel number */
+#define	XPC_TEST_CHANNEL	2	/* test channel number */
+
+#define	XPC_NCHANNELS		3	/* #of defined channels */
+#define XPC_MAX_NCHANNELS	8	/* max #of channels allowed */
+
+#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
+#error	XPC_NCHANNELS exceeds MAXIMUM allowed.
+#endif
+
+
+/*
+ * The format of an XPC message is as follows:
+ *
+ *      +-------+--------------------------------+
+ *      | flags |////////////////////////////////|
+ *      +-------+--------------------------------+
+ *      |             message #                  |
+ *      +----------------------------------------+
+ *      |     payload (user-defined message)     |
+ *      |                                        |
+ *         		:
+ *      |                                        |
+ *      +----------------------------------------+
+ *
+ * The size of the payload is defined by the user via xpc_connect(). A user-
+ * defined message resides in the payload area.
+ *
+ * The user should have no dealings with the message header, but only the
+ * message's payload. When a message entry is allocated (via xpc_allocate())
+ * a pointer to the payload area is returned and not the actual beginning of
+ * the XPC message. The user then constructs a message in the payload area
+ * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
+ *
+ * The size of a message entry (within a message queue) must be a cacheline
+ * sized multiple in order to facilitate the BTE transfer of messages from one
+ * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+typedef struct {
+	volatile u8 flags;	/* FOR XPC INTERNAL USE ONLY */
+	u8 reserved[7];		/* FOR XPC INTERNAL USE ONLY */
+	s64 number;		/* FOR XPC INTERNAL USE ONLY */
+
+	u64 payload;		/* user defined portion of message */
+} xpc_msg_t;
+
+
+#define XPC_MSG_PAYLOAD_OFFSET	(u64) (&((xpc_msg_t *)0)->payload)
+#define XPC_MSG_SIZE(_payload_size)					\
+	L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
+
+
+/*
+ * Define the return values and values passed to user's callout functions. 
+ * (It is important to add new value codes at the end just preceding
+ * xpcUnknownReason, which must have the highest numerical value.)
+ */
+typedef enum {
+	xpcSuccess = 0,
+
+	xpcNotConnected,	/*  1: channel is not connected */
+	xpcConnected,		/*  2: channel connected (opened) */
+	xpcRETIRED1,		/*  3: (formerly xpcDisconnected) */
+
+	xpcMsgReceived,		/*  4: message received */
+	xpcMsgDelivered,	/*  5: message delivered and acknowledged */
+
+	xpcRETIRED2,		/*  6: (formerly xpcTransferFailed) */
+
+	xpcNoWait,		/*  7: operation would require wait */
+	xpcRetry,		/*  8: retry operation */
+	xpcTimeout,		/*  9: timeout in xpc_allocate_msg_wait() */
+	xpcInterrupted,		/* 10: interrupted wait */
+
+	xpcUnequalMsgSizes,	/* 11: message size disparity between sides */
+	xpcInvalidAddress,	/* 12: invalid address */
+
+	xpcNoMemory,		/* 13: no memory available for XPC structures */
+	xpcLackOfResources,	/* 14: insufficient resources for operation */
+	xpcUnregistered,	/* 15: channel is not registered */
+	xpcAlreadyRegistered,	/* 16: channel is already registered */
+
+	xpcPartitionDown,	/* 17: remote partition is down */
+	xpcNotLoaded,		/* 18: XPC module is not loaded */
+	xpcUnloading,		/* 19: this side is unloading XPC module */
+
+	xpcBadMagic,		/* 20: XPC MAGIC string not found */
+
+	xpcReactivating,	/* 21: remote partition was reactivated */
+
+	xpcUnregistering,	/* 22: this side is unregistering channel */
+	xpcOtherUnregistering,	/* 23: other side is unregistering channel */
+
+	xpcCloneKThread,	/* 24: cloning kernel thread */
+	xpcCloneKThreadFailed,	/* 25: cloning kernel thread failed */
+
+	xpcNoHeartbeat,		/* 26: remote partition has no heartbeat */
+
+	xpcPioReadError,	/* 27: PIO read error */
+	xpcPhysAddrRegFailed,	/* 28: registration of phys addr range failed */
+
+	xpcBteDirectoryError,	/* 29: maps to BTEFAIL_DIR */
+	xpcBtePoisonError,	/* 30: maps to BTEFAIL_POISON */
+	xpcBteWriteError,	/* 31: maps to BTEFAIL_WERR */
+	xpcBteAccessError,	/* 32: maps to BTEFAIL_ACCESS */
+	xpcBtePWriteError,	/* 33: maps to BTEFAIL_PWERR */
+	xpcBtePReadError,	/* 34: maps to BTEFAIL_PRERR */
+	xpcBteTimeOutError,	/* 35: maps to BTEFAIL_TOUT */
+	xpcBteXtalkError,	/* 36: maps to BTEFAIL_XTERR */
+	xpcBteNotAvailable,	/* 37: maps to BTEFAIL_NOTAVAIL */
+	xpcBteUnmappedError,	/* 38: unmapped BTEFAIL_ error */
+
+	xpcBadVersion,		/* 39: bad version number */
+	xpcVarsNotSet,		/* 40: the XPC variables are not set up */
+	xpcNoRsvdPageAddr,	/* 41: unable to get rsvd page's phys addr */
+	xpcInvalidPartid,	/* 42: invalid partition ID */
+	xpcLocalPartid,		/* 43: local partition ID */
+
+	xpcUnknownReason	/* 44: unknown reason -- must be last in list */
+} xpc_t;
+
+
+//>>> is inline a good idea with all the strings?
+static __inline__ char *
+xpc_get_ascii_reason_code(xpc_t reason)
+{
+	switch (reason) {
+	case xpcSuccess:		return "";
+	case xpcNotConnected:		return "xpcNotConnected";
+	case xpcConnected:		return "xpcConnected";
+	case xpcRETIRED1:		return "xpcRETIRED1";
+	case xpcMsgReceived:		return "xpcMsgReceived";
+	case xpcMsgDelivered:		return "xpcMsgDelivered";
+	case xpcRETIRED2:		return "xpcRETIRED2";
+	case xpcNoWait:			return "xpcNoWait";
+	case xpcRetry:			return "xpcRetry";
+	case xpcTimeout:		return "xpcTimeout";
+	case xpcInterrupted:		return "xpcInterrupted";
+	case xpcUnequalMsgSizes:	return "xpcUnequalMsgSizes";
+	case xpcInvalidAddress:		return "xpcInvalidAddress";
+	case xpcNoMemory:		return "xpcNoMemory";
+	case xpcLackOfResources:	return "xpcLackOfResources";
+	case xpcUnregistered:		return "xpcUnregistered";
+	case xpcAlreadyRegistered:	return "xpcAlreadyRegistered";
+	case xpcPartitionDown:		return "xpcPartitionDown";
+	case xpcNotLoaded:		return "xpcNotLoaded";
+	case xpcUnloading:		return "xpcUnloading";
+	case xpcBadMagic:		return "xpcBadMagic";
+	case xpcReactivating:		return "xpcReactivating";
+	case xpcUnregistering:		return "xpcUnregistering";
+	case xpcOtherUnregistering:	return "xpcOtherUnregistering";
+	case xpcCloneKThread:		return "xpcCloneKThread";
+	case xpcCloneKThreadFailed:	return "xpcCloneKThreadFailed";
+	case xpcNoHeartbeat:		return "xpcNoHeartbeat";
+	case xpcPioReadError:		return "xpcPioReadError";
+	case xpcPhysAddrRegFailed:	return "xpcPhysAddrRegFailed";
+	case xpcBteDirectoryError:	return "xpcBteDirectoryError";
+	case xpcBtePoisonError:		return "xpcBtePoisonError";
+	case xpcBteWriteError:		return "xpcBteWriteError";
+	case xpcBteAccessError:		return "xpcBteAccessError";
+	case xpcBtePWriteError:		return "xpcBtePWriteError";
+	case xpcBtePReadError:		return "xpcBtePReadError";
+	case xpcBteTimeOutError:	return "xpcBteTimeOutError";
+	case xpcBteXtalkError:		return "xpcBteXtalkError";
+	case xpcBteNotAvailable:	return "xpcBteNotAvailable";
+	case xpcBteUnmappedError:	return "xpcBteUnmappedError";
+	case xpcBadVersion:		return "xpcBadVersion";
+	case xpcVarsNotSet:		return "xpcVarsNotSet";
+	case xpcNoRsvdPageAddr:		return "xpcNoRsvdPageAddr";
+	case xpcInvalidPartid:		return "xpcInvalidPartid";
+	case xpcLocalPartid:		return "xpcLocalPartid";
+	case xpcUnknownReason:		return "xpcUnknownReason";
+	default:			return "undefined reason code";
+	}
+}
+
+
+/*
+ * Define the callout function types used by XPC to update the user on
+ * connection activity and state changes (via the user function registered by
+ * xpc_connect()) and to notify them of messages received and delivered (via
+ * the user function registered by xpc_send_notify()).
+ *
+ * The two function types are xpc_channel_func_t and xpc_notify_func_t and
+ * both share the following arguments, with the exception of "data", which
+ * only xpc_channel_func_t has.
+ *
+ * Arguments:
+ *
+ *	reason - reason code. (See following table.)
+ *	partid - partition ID associated with condition.
+ *	ch_number - channel # associated with condition.
+ *	data - pointer to optional data. (See following table.)
+ *	key - pointer to optional user-defined value provided as the "key"
+ *	      argument to xpc_connect() or xpc_send_notify().
+ *
+ * In the following table the "Optional Data" column applies to callouts made
+ * to functions registered by xpc_connect(). A "NA" in that column indicates
+ * that this reason code can be passed to functions registered by
+ * xpc_send_notify() (i.e. they don't have data arguments).
+ *
+ * Also, the first three reason codes in the following table indicate
+ * success, whereas the others indicate failure. When a failure reason code
+ * is received, one can assume that the channel is not connected.
+ *
+ *
+ * Reason Code          | Cause                          | Optional Data
+ * ===========+================+==========+ * xpcConnected         | connection has been established| max #of entries
+ *                      | to the specified partition on  | allowed in message
+ *                      | the specified channel          | queue 
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgReceived       | an XPC message arrived from    | address of payload
+ *                      | the specified partition on the |
+ *                      | specified channel              | [the user must call
+ *                      |                                | xpc_received() when
+ *                      |                                | finished with the
+ *                      |                                | payload]
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgDelivered      | notification that the message  | NA
+ *                      | was delivered to the intended  |
+ *                      | recipient and that they have   |
+ *                      | acknowledged its receipt by    |
+ *                      | calling xpc_received()         |
+ * ===========+================+==========+ * xpcUnequalMsgSizes   | can't connect to the specified | NULL
+ *                      | partition on the specified     |
+ *                      | channel because of mismatched  |
+ *                      | message sizes                  |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoMemory          | insufficient memory avaiable   | NULL
+ *                      | to allocate message queue      | 
+ * ---------------------+--------------------------------+---------------------
+ * xpcLackOfResources   | lack of resources to create    | NULL
+ *                      | the necessary kthreads to      |
+ *                      | support the channel            |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnregistering     | this side's user has           | NULL or NA
+ *                      | unregistered by calling        | 
+ *                      | xpc_disconnect()               |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnregistering| the other side's user has      | NULL or NA
+ *                      | unregistered by calling        |
+ *                      | xpc_disconnect()               |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoHeartbeat       | the other side's XPC is no     | NULL or NA
+ *                      | longer heartbeating            |
+ *                      |                                |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnloading         | this side's XPC module is      | NULL or NA
+ *                      | being unloaded                 |
+ *                      |                                |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnloading    | the other side's XPC module is | NULL or NA
+ *                      | is being unloaded              |
+ *                      |                                |
+ * ---------------------+--------------------------------+---------------------
+ * xpcPioReadError      | xp_nofault_PIOR() returned an  | NULL or NA
+ *                      | error while sending an IPI     |
+ *                      |                                |
+ * ---------------------+--------------------------------+---------------------
+ * xpcInvalidAddress    | the address either received or | NULL or NA
+ *                      | sent by the specified partition|
+ *                      | is invalid                     |
+ * ---------------------+--------------------------------+---------------------
+ * xpcBteNotAvailable   | attempt to pull data from the  | NULL or NA
+ * xpcBtePoisonError    | specified partition over the   |
+ * xpcBteWriteError     | specified channel via a        |
+ * xpcBteAccessError    | bte_copy() failed              |
+ * xpcBteTimeOutError   |                                |
+ * xpcBteXtalkError     |                                |
+ * xpcBteDirectoryError |                                |
+ * xpcBteGenericError   |                                |
+ * xpcBteUnmappedError  |                                |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnknownReason     | the specified channel to the   | NULL or NA
+ *                      | specified partition was        |
+ *                      | unavailable for unknown reasons|
+ * ===========+================+==========+ */
+
+typedef void (*xpc_channel_func_t)(xpc_t reason, partid_t partid, int ch_number,
+		void *data, void *key);
+
+typedef void (*xpc_notify_func_t)(xpc_t reason, partid_t partid, int ch_number,
+		void *key);
+
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC (XPMEM and XPNET). As long as a registration entry
+ * exists, for any partition that comes up, XPC will attempt to establish a
+ * connection on that channel. Notification that a connection has been made
+ * will occur via the xpc_channel_func_t function.
+ */
+typedef struct {
+
+	struct semaphore sema;
+
+	/*
+	 * Function to call when aynchronous notification is required for
+	 * such events as, a connection established/lost, or an incomming
+	 * message received, or an error condition encountered. A non-NULL
+	 * func field indicates that there is an active registration for
+	 * the channel.
+	 */
+	volatile xpc_channel_func_t func;
+	void *key;			/* pointer to user's key */
+
+	u16 nentries;			/* #of msg entries in local msg queue */
+	u16 msg_size;			/* message queue's message size */
+	u32 assigned_limit;		/* limit on #of assigned kthreads */
+	u32 idle_limit;			/* limit on #of idle kthreads */
+} ____cacheline_aligned xpc_registration_t;
+
+
+#define XPC_CHANNEL_REGISTERED(_c)	(xpc_registrations[_c].func != NULL)
+
+
+/* the following are valid xpc_allocate() flags */
+#define XPC_WAIT	0		/* wait flag */
+#define XPC_NOWAIT	1		/* no wait flag */
+
+
+#ifndef __XPC_MAIN__
+
+
+typedef struct {
+	void (*connect)(int);
+	void (*disconnect)(int);
+	xpc_t (*allocate)(partid_t, int, u32, void **);
+	xpc_t (*send)(partid_t, int, void *);
+	xpc_t (*send_notify)(partid_t, int, void *, xpc_notify_func_t, void *);
+	void (*received)(partid_t, int, void *);
+	xpc_t (*partid_to_nasids)(partid_t, void *);
+} xpc_interface_t;
+
+
+extern xpc_interface_t xpc_interface;
+
+#ifdef	SN_PROM
+extern int xpc_init(void);
+extern void xpc_exit(void);
+#endif /* SN_PROM */
+
+extern xpc_t xpc_connect(int, xpc_channel_func_t, void *, u16, u16, u32, u32);
+extern void xpc_disconnect(int);
+
+
+/*
+ * extern xpc_t xpc_allocate(partid_t partid, int ch_number, u32 flags,
+ *				void **payload);
+ */
+#define xpc_allocate(_partid, _ch_number, _flags, _payload)		\
+		xpc_interface.allocate(_partid, _ch_number, _flags, _payload)
+
+
+/*
+ * extern xpc_t xpc_send(partid_t partid, int ch_number, void *payload);
+ */
+#define xpc_send(_partid, _ch_number, _payload)				\
+		xpc_interface.send(_partid, _ch_number, _payload)
+
+
+/*
+ * extern xpc_t xpc_send_notify(partid_t partid, int ch_number, void *payload,
+ *				xpc_notify_func_t func, void *key);
+ */
+#define xpc_send_notify(_partid, _ch_number, _payload, _func, _key)	\
+		xpc_interface.send_notify(_partid, _ch_number, _payload,\
+					_func, _key)
+
+
+/*
+ * extern void xpc_received(partid_t partid, int ch_number, void *payload);
+ */
+#define xpc_received(_partid, _ch_number, _payload)			\
+		xpc_interface.received(_partid, _ch_number, _payload)
+
+
+/*
+ * extern xpc_t xpc_partid_to_nasids(partid_t partid, void * nasids);
+ */
+#define xpc_partid_to_nasids(_partid, _nasids)				\
+		xpc_interface.partid_to_nasids(_partid, _nasids)
+
+
+#else /* __XPC_MAIN__ */
+
+extern void xpc_set_interface(void (*)(int),
+		void (*)(int),
+		xpc_t (*)(partid_t, int, u32, void **),
+		xpc_t (*)(partid_t, int, void *),
+		xpc_t (*)(partid_t, int, void *, xpc_notify_func_t, void *),
+		void (*)(partid_t, int, void *),
+		xpc_t (*)(partid_t, void *));
+extern void xpc_clear_interface(void);
+
+extern void xpc_initiate_connect(int);
+extern void xpc_initiate_disconnect(int);
+extern xpc_t xpc_allocate(partid_t, int, u32, void **);
+extern xpc_t xpc_send(partid_t, int, void *);
+extern xpc_t xpc_send_notify(partid_t, int, void *, xpc_notify_func_t, void *);
+extern void xpc_received(partid_t, int, void *);
+extern xpc_t xpc_partid_to_nasids(partid_t, void *);
+
+#endif /* __XPC_MAIN__ */
+
+#ifdef	SN_PROM
+extern void xpc_poll(void);
+#endif /* SN_PROM */
+
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+
+#endif /* _ASM_IA64_SN_XP_H */
+
Index: linux/include/asm-ia64/sn/xp_dbgtk.h
=================================--- linux.orig/include/asm-ia64/sn/xp_dbgtk.h	1969-12-31 18:00:00.000000000 -0600
+++ linux/include/asm-ia64/sn/xp_dbgtk.h	2004-04-23 12:55:57.000000000 -0500
@@ -0,0 +1,52 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+/*
+ * Cross Partition (XP)'s dbgtk related definitions.
+ */
+
+
+#ifndef _ASM_IA64_SN_XP_DBGTK_H
+#define _ASM_IA64_SN_XP_DBGTK_H
+
+
+#if defined(CONFIG_DBGTK) || defined(CONFIG_DBGTK_MODULE)
+
+#include <linux/dbgtk.h>
+
+#else /* defined(CONFIG_DBGTK) || defined(CONFIG_DBGTK_MODULE) */
+
+#define DECLARE_DPRINTK(_mn, _hl, _dcs, _dps, _sd)
+#define EXTERN_DPRINTK(_mn)
+#define REG_DPRINTK(_mn)
+#define UNREG_DPRINTK(_mn)
+#define DPRINTK(_mn, mask, fmt...)
+#define DPRINTK_ALWAYS(_mn, mask, fmt...) printk(fmt)
+
+#define DECLARE_DTRACE(_mn, _dtm, _sd)
+#define EXTERN_DTRACE(_mn)
+#define REG_DTRACE(_mn)
+#define UNREG_DTRACE(_mn)
+#define DTRACE(_mn, mask, cs, p1, p2)
+#define DTRACEI(_mn, mask, cs, p1, p2)
+#define DTRACE_L(_mn, mask, cs, p1, p2, p3, p4, p5, p6)
+#define DTRACEI_L(_mn, mask, cs, p1, p2, p3, p4, p5, p6)
+
+#define DECLARE_DCOUNTER(_pd, _cn)
+#define EXTERN_DCOUNTER(_cn)
+#define REG_DCOUNTER(_cn)
+#define UNREG_DCOUNTER(_cn)
+#define DCOUNT(_cn)
+#define DCOUNT_CLEAR(_cn)
+
+#endif /* defined(CONFIG_DBGTK) || defined(CONFIG_DBGTK_MODULE) */
+
+
+#endif /* _ASM_IA64_SN_XP_DBGTK_H */
+

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
@ 2004-06-16 17:28 ` Christoph Hellwig
  2004-06-16 20:22 ` Keith Owens
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2004-06-16 17:28 UTC (permalink / raw)
  To: linux-ia64

> +	default m if (IA64_SGI_SN2 || IA64_GENERIC)

really?

> +#ifndef SN_PROM
> +#include <linux/kernel.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <asm/sn/intr.h>
> +#include <asm/sn/sn_sal.h>
> +#include <asm/sn/xp.h>
> +#else /* ! SN_PROM */
> +#include "xp.h"
> +#include "xpc_stubs.h"
> +#endif /* ! SN_PROM */

Please kill all the prom compat gunk.  Once non-SGI people touch it you can't
easily merge it back anyway for license reasons.

> +
> +
> +/*
> + * Initialize the XPC interface to inidicate that XPC isn't loaded.
> + */
> +static xpc_t xpc_notloaded(void) { return xpcNotLoaded; }
> +
> +xpc_interface_t xpc_interface = {
> +	(void (*)(int)) xpc_notloaded,
> +	(void (*)(int)) xpc_notloaded,
> +	(xpc_t (*)(partid_t, int, u32, void **)) xpc_notloaded,
> +	(xpc_t (*)(partid_t, int, void *)) xpc_notloaded,
> +	(xpc_t (*)(partid_t, int, void *, xpc_notify_func_t, void *))
> +								xpc_notloaded,
> +	(void (*)(partid_t, int, void *)) xpc_notloaded,
> +	(xpc_t (*)(partid_t, void *)) xpc_notloaded
> +};

Yikes.  So you define a method table where only one possible provider exists
anyway?  Please merge xp and xpc into a single module and get rid off all this
junk.

> +#ifdef CONFIG_KDB
> +
> +static void
> +xpc_kdb_print_users(xpc_registration_t *registration, int ch_number)
> +{
> +	kdb_printf("xpc_registrations[channel=%d] (0x%p):\n", ch_number,
> +							(void *) registration);
> +
> +	kdb_printf("\t&sema=0x%p\n", (void *) &registration->sema);
> +	kdb_printf("\tfunc=0x%p\n", (void *) registration->func);
> +	kdb_printf("\tkey=0x%p\n", registration->key);
> +	kdb_printf("\tnentries=%d\n", registration->nentries);
> +	kdb_printf("\tmsg_size=%d\n", registration->msg_size);
> +	kdb_printf("\tassigned_limit=%d\n", registration->assigned_limit);
> +	kdb_printf("\tidle_limit=%d\n", registration->idle_limit);
> +}

IIRC Keith objected against adding kdb-related code to mainline in the past.
And IMHO it's indeed better placed in the kdb patch.

> +	XP_ASSERT(ch_number >= 0 && ch_number < XPC_NCHANNELS);
> +	XP_ASSERT(payload_size != 0 && nentries != 0);
> +	XP_ASSERT(func != NULL);
> +	XP_ASSERT(assigned_limit != 0 && idle_limit <= assigned_limit);

Please use BUG_ON()

> +	xpc_registration_t *registration;

An d btw, please get rid of all these awfull typedefs.

> +/*
> + * Wrapper for bte_copy() that should it return a failure status will retry
> + * the bte_copy() once in the hope that the failure was due to a temporary
> + * aberration (i.e., the link going down temporarily).
> + *
> + * See bte_copy for definition of the input parameters.
> + */
> +static __inline__ bte_result_t
> +xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
> +{
> +	bte_result_t ret;
> +
> +
> +	ret = bte_copy(src, dest, len, mode, notification);
> +
> +	if (ret != BTE_SUCCESS) {
> +		if (!in_interrupt()) {
> +			cond_resched();
> +		}

If you're ever calling this from under a spinlock you're screwed..

> + * 
> + *    CHANNEL | USER          | PURPOSE                                  
> + *   ---------+---------------+---------------------------------------------
> + *    0       | XPMEM         | cross partition memory driver
> + *   ---------+---------------+---------------------------------------------

What's that?  Can't find it in your driver submission.

> +typedef struct {
> +	volatile u8 flags;	/* FOR XPC INTERNAL USE ONLY */

volatile is always a BIG warning sign.  What does it try to do?

> +//>>> is inline a good idea with all the strings?
> +static __inline__ char *
> +xpc_get_ascii_reason_code(xpc_t reason)

No.

> Index: linux/include/asm-ia64/sn/xp_dbgtk.h
> =================================> --- linux.orig/include/asm-ia64/sn/xp_dbgtk.h	1969-12-31 18:00:00.000000000 -0600
> +++ linux/include/asm-ia64/sn/xp_dbgtk.h	2004-04-23 12:55:57.000000000 -0500
> @@ -0,0 +1,52 @@
> +/*
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + *
> + * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
> + */
> +
> +
> +/*
> + * Cross Partition (XP)'s dbgtk related definitions.
> + */
> +
> +
> +#ifndef _ASM_IA64_SN_XP_DBGTK_H
> +#define _ASM_IA64_SN_XP_DBGTK_H
> +
> +
> +#if defined(CONFIG_DBGTK) || defined(CONFIG_DBGTK_MODULE)
> +
> +#include <linux/dbgtk.h>
> +
> +#else /* defined(CONFIG_DBGTK) || defined(CONFIG_DBGTK_MODULE) */
> +
> +#define DECLARE_DPRINTK(_mn, _hl, _dcs, _dps, _sd)
> +#define EXTERN_DPRINTK(_mn)
> +#define REG_DPRINTK(_mn)
> +#define UNREG_DPRINTK(_mn)
> +#define DPRINTK(_mn, mask, fmt...)
> +#define DPRINTK_ALWAYS(_mn, mask, fmt...) printk(fmt)

Bah, please kill all that crap.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
  2004-06-16 17:28 ` Christoph Hellwig
@ 2004-06-16 20:22 ` Keith Owens
  2004-07-29 18:36 ` Dean Nelson
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Keith Owens @ 2004-06-16 20:22 UTC (permalink / raw)
  To: linux-ia64

On Wed, 16 Jun 2004 18:28:22 +0100, 
Christoph Hellwig <hch@infradead.org> wrote:
>> +static void
>> +xpc_kdb_print_users(xpc_registration_t *registration, int ch_number)
>> +{
>> +	kdb_printf("xpc_registrations[channel=%d] (0x%p):\n", ch_number,
>> +							(void *) registration);
>> +
>> +	kdb_printf("\t&sema=0x%p\n", (void *) &registration->sema);
>> +	kdb_printf("\tfunc=0x%p\n", (void *) registration->func);
>> +	kdb_printf("\tkey=0x%p\n", registration->key);
>> +	kdb_printf("\tnentries=%d\n", registration->nentries);
>> +	kdb_printf("\tmsg_size=%d\n", registration->msg_size);
>> +	kdb_printf("\tassigned_limit=%d\n", registration->assigned_limit);
>> +	kdb_printf("\tidle_limit=%d\n", registration->idle_limit);
>> +}
>
>IIRC Keith objected against adding kdb-related code to mainline in the past.

No, I objected to mainline kdb code like a serial console hook being
scattered across other patches.  This is not mainline kdb code, it is
using kdb to add a debugging feature to other code.  I have no
objection to that, code such as ACPI has done that in the past.

>And IMHO it's indeed better placed in the kdb patch.

Absolutely not.  kdb has no interest in the debugging entries of
individual subsystems, in the same way that printk() does not care who
calls it.

BTW, with kdb v4.4 you no longer need the extra CFLAGS, drop
CFLAGS_xp_main.o += -I $(TOPDIR)/arch/$(ARCH)/kdb


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
  2004-06-16 17:28 ` Christoph Hellwig
  2004-06-16 20:22 ` Keith Owens
@ 2004-07-29 18:36 ` Dean Nelson
  2004-08-31 19:22 ` [PATCH 2/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-07-29 18:36 UTC (permalink / raw)
  To: linux-ia64

On Wed, Jun 16, 2004 at 06:28:22PM +0100, Christoph Hellwig wrote:
> > +	XP_ASSERT(ch_number >= 0 && ch_number < XPC_NCHANNELS);
> > +	XP_ASSERT(payload_size != 0 && nentries != 0);
> > +	XP_ASSERT(func != NULL);
> > +	XP_ASSERT(assigned_limit != 0 && idle_limit <= assigned_limit);
> 
> Please use BUG_ON()

As you may have seen, on the lkml I asked for comments on the value of
creating a conditionally compiled version of BUG_ON(). I called it
DBUG_ON() and modeled it after dev_dbg(), which is conditionally
compiled based on #ifdef DEBUG. The subject of the lkml thread is:
[RFC] replace assorted ASSERT()s by something officially sanctioned

The feedback I got raised two issues:

	1. For a large number of ASSERT()s, WARN_ON() would be a better
	   fit than BUG_ON().

	2. The granularity of #ifdef DEBUG is too course. A finer
	   granularity on the driver level makes more sense. (This
	   also holds true for dev_dbg()).

To the first, I suggested that a conditionally compiled version of WARN_ON()
be added as well, i.e., DWARN_ON().

To the second, I didn't have much to say other than to agree. (I was hoping
the community would chime in with a community acceptable policy on this
matter.)

I still feel strongly that there is a need for a conditionally compiled
version of BUG_ON and WARN_ON() for the reason given in my RFC. (And I
really don't care what the name of the macro is.)

I haven't pushed any further on the matter because of the underwhelming
feedback to my RFC.

I'm bringing all of this to your attention because of the existing
XP_ASSERT()s in my proposed patches. I've dealt with the ones that
deserved to be BUG_ON()s. But I've got a lot of them that really (in
my opinion) should be conditionally compiled in. They really are only
necessary while in a debug mode.

So, I was wondering if you would accept my #define'ng a local DBUG_ON()
(or XP_ASSERT()) based on BUG_ON() and whether #ifdef DEBUG (or #ifdef
XP_DEBUG) is true?

Thanks,
Dean


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (2 preceding siblings ...)
  2004-07-29 18:36 ` Dean Nelson
@ 2004-08-31 19:22 ` Dean Nelson
  2004-09-01 10:19 ` Robin Holt
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-08-31 19:22 UTC (permalink / raw)
  To: linux-ia64

On Tue, Aug 24, 2004 at 08:17:33PM +0100, Christoph Hellwig wrote:
> On Tue, Aug 24, 2004 at 01:23:44PM -0500, Dean Nelson wrote:
> > Index: linux/arch/ia64/Kconfig
> > =================================> > --- linux.orig/arch/ia64/Kconfig	2004-08-17 13:31:26.000000000 -0500
> > +++ linux/arch/ia64/Kconfig	2004-08-23 11:39:50.000000000 -0500
> > @@ -189,6 +189,16 @@
> >  	depends on !IA64_HP_SIM
> >  	default y
> >  
> > +config IA64_SGI_SN_XPC
> > +	tristate "Support DMA Messaging between SGI machines"
> 
> Why do you have three different option when the only way they're usefull
> is to have all three enabled at the same time.

By this I asssume you mean that rather than having both IA64_SGI_SN_XPC
and IA64_SGI_SN_XPNET, we should have just one, like IA64_SGI_SN_XP, that
causes all the parts to get built. Correct?


> > +	case xpcMsgReceived:		return "xpcMsgReceived";
> > +	case xpcMsgDelivered:		return "xpcMsgDelivered";
> 
> Please don't add strerror-lookalikes to the kernel.

I'm not clear on this. Do you mean ditch xpc_get_ascii_reason_code()
altogether and have no mapping of a numeric reason code to an ASCII
string?

Or do you mean that the ASCII mapping shouldn't be to the same string,
but rather make the string more meaningful? For example, map
xpcNoHeartbeat to "remote partition has no heartbeat" instead of to
"xpcNoHeartbeat".

If you meant the former: XPC makes dev_info() calls to indicate that a
channel to a partition has connected and disconnected (and why). The why
is a reason code mapped to an ASCII string. Would you prefer that we put
out a numeric value as opposed to a more meaningful ASCII string?


> > +	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
> > +		sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
> > +	}
> 
> A single mutex wouldn't do it?  It doesn't exactly look like it's used in
> fast-paths

Yeah, you're right it's not a fast-path and a single mutex would work.
I kind of like putting the lock within the data structure it's protecting.
When you get the lock, you've already got the data of interest in your
cache (obviously this depends on the size of the structure and where in
the structure the data you're interested in resides). We're not talking
about very much memory lost because of this. (The way it is we only end
up with a total of two semaphores, instead of just one.)

Would you be okay with the thing remaining as it is?


> > +	 */
> > +	if ((ret = sn_register_nofault_code(*(u64 *) pior_func,
> > +					*(u64 *) pior_err_func,
> > +					*(u64 *) pior_err_func, 1, 1)) != 0) {
> 
> Is the strange casting really unavoidable?

Yeah, it is unavoidable. sn_register_nofault_code() is expecting 'the address
of', not 'a pointer to the address of'. I did modify the code a bit so that
the cast is done at another spot, perhaps, this will be more acceptable to
you.

	void __exit
	xp_exit(void)
	{
		u64 func_addr = *(u64 *) xp_nofault_PIOR;
		u64 err_func_addr = *(u64 *) xp_error_PIOR;
                                                                                
		/* unregister the PIO read nofault code region */
		(void) sn_register_nofault_code(func_addr, err_func_addr,
						err_func_addr, 1, 0);
	}


Thanks,
Dean

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (3 preceding siblings ...)
  2004-08-31 19:22 ` [PATCH 2/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
@ 2004-09-01 10:19 ` Robin Holt
  2004-09-04 11:12 ` Christoph Hellwig
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Robin Holt @ 2004-09-01 10:19 UTC (permalink / raw)
  To: linux-ia64

> > > +	case xpcMsgReceived:		return "xpcMsgReceived";
> > > +	case xpcMsgDelivered:		return "xpcMsgDelivered";
> > 
> > Please don't add strerror-lookalikes to the kernel.
> 
...
> 
> If you meant the former: XPC makes dev_info() calls to indicate that a
> channel to a partition has connected and disconnected (and why). The why
> is a reason code mapped to an ASCII string. Would you prefer that we put
> out a numeric value as opposed to a more meaningful ASCII string?

Please no magic numbers in logs.  This always leads to customers calling
support with the "what does 13 mean and should I be concerned"?  If the
community objects to the case statement above, then find a different
way that implements the textual name.

> 
> 
> > > +	for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
> > > +		sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
> > > +	}
> > 
> > A single mutex wouldn't do it?  It doesn't exactly look like it's used in
> > fast-paths
> 
> Yeah, you're right it's not a fast-path and a single mutex would work.
> I kind of like putting the lock within the data structure it's protecting.
> When you get the lock, you've already got the data of interest in your
> cache (obviously this depends on the size of the structure and where in
> the structure the data you're interested in resides). We're not talking
> about very much memory lost because of this. (The way it is we only end
> up with a total of two semaphores, instead of just one.)

I like keeping the lock protecting as little as possible.  This has been
drilled into peoples heads here at SGI since the early Cray days.  We have
always been told to keep locks protecting a single cohesive group of data.

Just my 2 cents.

Thanks,
Robin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (4 preceding siblings ...)
  2004-09-01 10:19 ` Robin Holt
@ 2004-09-04 11:12 ` Christoph Hellwig
  2004-09-04 11:15 ` Christoph Hellwig
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2004-09-04 11:12 UTC (permalink / raw)
  To: linux-ia64

> > Why do you have three different option when the only way they're usefull
> > is to have all three enabled at the same time.
> 
> By this I asssume you mean that rather than having both IA64_SGI_SN_XPC
> and IA64_SGI_SN_XPNET, we should have just one, like IA64_SGI_SN_XP, that
> causes all the parts to get built. Correct?

Yes.

> > > +	case xpcMsgReceived:		return "xpcMsgReceived";
> > > +	case xpcMsgDelivered:		return "xpcMsgDelivered";
> > 
> > Please don't add strerror-lookalikes to the kernel.
> 
> I'm not clear on this. Do you mean ditch xpc_get_ascii_reason_code()
> altogether and have no mapping of a numeric reason code to an ASCII
> string?

Yes.  strerror() is entirely userspace policy.

> > A single mutex wouldn't do it?  It doesn't exactly look like it's used in
> > fast-paths
> 
> Yeah, you're right it's not a fast-path and a single mutex would work.
> I kind of like putting the lock within the data structure it's protecting.
> When you get the lock, you've already got the data of interest in your
> cache (obviously this depends on the size of the structure and where in
> the structure the data you're interested in resides). We're not talking
> about very much memory lost because of this. (The way it is we only end
> up with a total of two semaphores, instead of just one.)

Well, Linux is difficult.  First rule of thumb is keep it simple, and
if you ever need a fastpath semaphore you're better of making it a separate
entinity from the registration sempahore.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (5 preceding siblings ...)
  2004-09-04 11:12 ` Christoph Hellwig
@ 2004-09-04 11:15 ` Christoph Hellwig
  2004-09-04 16:35 ` Russ Anderson
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2004-09-04 11:15 UTC (permalink / raw)
  To: linux-ia64

> Please no magic numbers in logs.  This always leads to customers calling
> support with the "what does 13 mean and should I be concerned"?  If the
> community objects to the case statement above, then find a different
> way that implements the textual name.

So run a postprocessor over the log.

> I like keeping the lock protecting as little as possible.  This has been
> drilled into peoples heads here at SGI since the early Cray days.  We have
> always been told to keep locks protecting a single cohesive group of data.

And because of that IRIX isa huge mess with far too much locks :)  Keep
it as simple as possible and optimize where optimization is needed.
Needless complexity is the root of all evil.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (6 preceding siblings ...)
  2004-09-04 11:15 ` Christoph Hellwig
@ 2004-09-04 16:35 ` Russ Anderson
  2004-09-04 16:55 ` Christoph Hellwig
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Russ Anderson @ 2004-09-04 16:35 UTC (permalink / raw)
  To: linux-ia64

Christoph Hellwig wrote:
> 
> > I like keeping the lock protecting as little as possible.  This has been
> > drilled into peoples heads here at SGI since the early Cray days.  We have
> > always been told to keep locks protecting a single cohesive group of data.
> 
> Keep it as simple as possible and optimize where optimization is needed.
> Needless complexity is the root of all evil.

IMHO, Dean's code is simple and not complex.  There is a lock per channel.
What's so complex about that?

Dean's code> + for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
Dean's code> +         sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
Dean's code> + }

Perhaps I'm missing what you mean by "complexity".  I understand that a reasonable
way to modify a mono-CPU kernel to run on a dual-CPU system is to add a big
kernel lock.  And that as the number of CPUs increase, the locks need to be
finer grain to avoid excessive lock contention.  And that identifying and
breaking up the hot locks is a part of that process.  Cray went through
that process with COS, unicos, SGI with Irix, and now the community
with Linux.

What Dean is doing, and what the Cray and SGI people have learned over
the last couple decades of hard work, is that it is simpler and less complex 
to design in fine grain locks to avoid scaling problems.  We know that CPUs 
will get faster, the number of CPUs will increase, as will the number of nodes 
and amount of memory.  And as they increase, we know that big locks will get 
hot and need to broken up.  So that is why you will find people that
believe that it is simpler and less complex to design in fine grain locks,
to avoid having to track down and fix scaling bugs.  

Thanks,
-- 
Russ Anderson, OS RAS/Partitioning Project Lead  
SGI - Silicon Graphics Inc          rja@sgi.com

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (7 preceding siblings ...)
  2004-09-04 16:35 ` Russ Anderson
@ 2004-09-04 16:55 ` Christoph Hellwig
  2004-09-04 16:57 ` Christoph Hellwig
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2004-09-04 16:55 UTC (permalink / raw)
  To: linux-ia64

On Sat, Sep 04, 2004 at 11:35:11AM -0500, Russ Anderson wrote:
> Perhaps I'm missing what you mean by "complexity".  I understand that a reasonable
> way to modify a mono-CPU kernel to run on a dual-CPU system is to add a big
> kernel lock.  And that as the number of CPUs increase, the locks need to be
> finer grain to avoid excessive lock contention.  And that identifying and
> breaking up the hot locks is a part of that process.  Cray went through
> that process with COS, unicos, SGI with Irix, and now the community
> with Linux.
> 
> What Dean is doing, and what the Cray and SGI people have learned over
> the last couple decades of hard work, is that it is simpler and less complex 
> to design in fine grain locks to avoid scaling problems.  We know that CPUs 
> will get faster, the number of CPUs will increase, as will the number of nodes 
> and amount of memory.  And as they increase, we know that big locks will get 
> hot and need to broken up.  So that is why you will find people that
> believe that it is simpler and less complex to design in fine grain locks,
> to avoid having to track down and fix scaling bugs.  

Have you not looked at the code or are you publically trying to make a fool
of yourself?

The lock is taken during xfc_connect/disconnect which happen exactly at
ifconfig up/down time.  Please explain me why ifconfig scalability matters
to SGI.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (8 preceding siblings ...)
  2004-09-04 16:55 ` Christoph Hellwig
@ 2004-09-04 16:57 ` Christoph Hellwig
  2004-09-05 11:45 ` Robin Holt
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2004-09-04 16:57 UTC (permalink / raw)
  To: linux-ia64

On Sat, Sep 04, 2004 at 05:55:16PM +0100, Christoph Hellwig wrote:
> > hot and need to broken up.  So that is why you will find people that
> > believe that it is simpler and less complex to design in fine grain locks,
> > to avoid having to track down and fix scaling bugs.  
> 
> Have you not looked at the code or are you publically trying to make a fool
> of yourself?
> 
> The lock is taken during xfc_connect/disconnect which happen exactly at
> ifconfig up/down time.  Please explain me why ifconfig scalability matters
> to SGI.

And btw, as your trying to play mister smartass here:  if you removed
support for the second possible channel instead of keeping it as hook
for your propritary xpmem module we'd have only one lock anyway.  What
about just completely killing support for that second channel and we
don't need to dicuss this issue at all.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (9 preceding siblings ...)
  2004-09-04 16:57 ` Christoph Hellwig
@ 2004-09-05 11:45 ` Robin Holt
  2004-12-20 18:45 ` Dean Nelson
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Robin Holt @ 2004-09-05 11:45 UTC (permalink / raw)
  To: linux-ia64

On Sat, Sep 04, 2004 at 11:35:11AM -0500, Russ Anderson wrote:
> Christoph Hellwig wrote:
> > 
> > > I like keeping the lock protecting as little as possible.  This has been
> > > drilled into peoples heads here at SGI since the early Cray days.  We have
> > > always been told to keep locks protecting a single cohesive group of data.
> > 
> > Keep it as simple as possible and optimize where optimization is needed.
> > Needless complexity is the root of all evil.
> 
> IMHO, Dean's code is simple and not complex.  There is a lock per channel.
> What's so complex about that?
> 
> Dean's code> + for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
> Dean's code> +         sema_init(&xpc_registrations[ch_number].sema, 1);  /* mutex */
> Dean's code> + }
> 
...
> 
> What Dean is doing, and what the Cray and SGI people have learned over
> the last couple decades of hard work, is that it is simpler and less complex 
> to design in fine grain locks to avoid scaling problems.  We know that CPUs 
> will get faster, the number of CPUs will increase, as will the number of nodes 
> and amount of memory.  And as they increase, we know that big locks will get 
> hot and need to broken up.  So that is why you will find people that
> believe that it is simpler and less complex to design in fine grain locks,
> to avoid having to track down and fix scaling bugs.  

I would add that I think a lock that is protecting a channel is clearer to
understand than a lock per partition protecting all the channels in it.  When
I look at the partition structure and see a lock for the channels, I would,
at first glance, assume it is protecting addition or removal of the channels.

I remember someone once saying "Speaking clearly is saying what the other
person understands."  I think that applies here.

Robin

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (10 preceding siblings ...)
  2004-09-05 11:45 ` Robin Holt
@ 2004-12-20 18:45 ` Dean Nelson
  2004-12-21 12:20 ` Dean Nelson
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-12-20 18:45 UTC (permalink / raw)
  To: linux-ia64

On Tue, Aug 24, 2004 at 08:17:33PM +0100, Christoph Hellwig wrote:
> On Tue, Aug 24, 2004 at 01:23:44PM -0500, Dean Nelson wrote:
> > This patch contains the shim module (XP) which interfaces between the
> > communication module (XPC) and the functional support modules (like XPNET).
> > 
> is to have all three enabled at the same time.  Also as I mentioned previously
> please merge at least xp and xpc into a single module.
> 

As mentioned in the prologue to this series of patches having XP separate
from XPC has proven invaluable for debugging, which I'm guessing is not
a big selling point for you. But the main reason why we created it was
for XPMEM.

XPMEM, like XPNET, is a user of XPC and provides the ability to share
memory between processes on different partitions. It also provides the
ability for processes within the same partition to share memory.

XPMEM now carries the GNU General Public License. (You can find the source
at bonnie.engr.sgi.com:/proj/sgilinux/stout/isms/opensource/xpmem. This
source has been shipped to some customers as part of a BETA test program
and will be generally available with the next release of ProPack.) We are
planning to push XPMEM to the community as soon as we can find the time to
do so. (Note that it suffers from a number of the same 'issues' that were
raised about XP[C|NET], and we plan on addressing these before the initial
submission.)

Having the XP module separate from XPC allows for XPC to come and go
as needed without disturbing XPMEM and its users. This allows a site to
run a particular partition in isolation for a time (XPC not loaded) and
then open it up to the other partitions (XPC loaded).

In light of the above I would like to keep XP and XPC as separate modules.

Thanks,
Dean


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (11 preceding siblings ...)
  2004-12-20 18:45 ` Dean Nelson
@ 2004-12-21 12:20 ` Dean Nelson
  2005-01-05 11:35 ` Christoph Hellwig
  2005-01-05 11:35 ` Christoph Hellwig
  14 siblings, 0 replies; 16+ messages in thread
From: Dean Nelson @ 2004-12-21 12:20 UTC (permalink / raw)
  To: linux-ia64

On Sat, Sep 04, 2004 at 12:12:57PM +0100, Christoph Hellwig wrote:
> > > A single mutex wouldn't do it?  It doesn't exactly look like it's used in
> > > fast-paths
> > 
> > Yeah, you're right it's not a fast-path and a single mutex would work.
> > I kind of like putting the lock within the data structure it's protecting.
> > When you get the lock, you've already got the data of interest in your
> > cache (obviously this depends on the size of the structure and where in
> > the structure the data you're interested in resides). We're not talking
> > about very much memory lost because of this. (The way it is we only end
> > up with a total of two semaphores, instead of just one.)
> 
> Well, Linux is difficult.  First rule of thumb is keep it simple, and
> if you ever need a fastpath semaphore you're better of making it a separate
> entinity from the registration sempahore.

I agree with the idea of keeping things simple, and therefore started
replacing the per-channel semaphores (i.e., the sema field in the
xpc_registration structure) by a single semaphore for all channels.

I ran into a problem of sorts with xpc_disconnect(), which grabs the
single semaphore and then calls xpc_interface.disconnect() while
holding that semaphore. This function won't return until all connections
to remote partitions have disconnected, which can be a while. (The
waiting is necessary in order to guarantee that no thread will reference
anything in XPC once the module is unloaded.) This wasn't a problem when
there was a semaphore per channel, since you have to wait for an
'rmmod XPNET' to complete before you can do a subsequent 'insmod XPNET'.
But now one won't be able to do an insmod or rmmod of XPMEM during the
time an 'rmmod XPNET' is executing.

To not hold the semaphore across the call to xpc_interface.disconnect()
would require the addition of a 'flag' field in the xpc_registration
structure. This new field would be used to indicate whether a channel
is registered or not, and if registered, whether it is unregistering.
(Currently, the xpc_registration structure's 'func' field is used to
indicate whether a channel is registered or not, and holding the
semaphore keeps another process from registering the channel, since
we've cleared the 'func' field before calling xpc_interface.disconnect().)

I'm not sure that adding the 'flag' field is simpler than keeping things
the way they are with the per-channel semaphores.

Any thoughts?

Thanks,
Dean

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (12 preceding siblings ...)
  2004-12-21 12:20 ` Dean Nelson
@ 2005-01-05 11:35 ` Christoph Hellwig
  2005-01-05 11:35 ` Christoph Hellwig
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2005-01-05 11:35 UTC (permalink / raw)
  To: linux-ia64

On Mon, Dec 20, 2004 at 12:45:48PM -0600, Dean Nelson wrote:
> Having the XP module separate from XPC allows for XPC to come and go
> as needed without disturbing XPMEM and its users. This allows a site to
> run a particular partition in isolation for a time (XPC not loaded) and
> then open it up to the other partitions (XPC loaded).
> 
> In light of the above I would like to keep XP and XPC as separate modules.

I still think it's a bad idea, but I'm fed up with this discussion so go
ahead.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
  2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
                   ` (13 preceding siblings ...)
  2005-01-05 11:35 ` Christoph Hellwig
@ 2005-01-05 11:35 ` Christoph Hellwig
  14 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2005-01-05 11:35 UTC (permalink / raw)
  To: linux-ia64

On Tue, Dec 21, 2004 at 06:20:08AM -0600, Dean Nelson wrote:
> I'm not sure that adding the 'flag' field is simpler than keeping things
> the way they are with the per-channel semaphores.

Well, keep them for now.


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2005-01-05 11:35 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-06-16 16:35 [PATCH 2/4] SGI Altix cross partition functionality Dean Nelson
2004-06-16 17:28 ` Christoph Hellwig
2004-06-16 20:22 ` Keith Owens
2004-07-29 18:36 ` Dean Nelson
2004-08-31 19:22 ` [PATCH 2/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
2004-09-01 10:19 ` Robin Holt
2004-09-04 11:12 ` Christoph Hellwig
2004-09-04 11:15 ` Christoph Hellwig
2004-09-04 16:35 ` Russ Anderson
2004-09-04 16:55 ` Christoph Hellwig
2004-09-04 16:57 ` Christoph Hellwig
2004-09-05 11:45 ` Robin Holt
2004-12-20 18:45 ` Dean Nelson
2004-12-21 12:20 ` Dean Nelson
2005-01-05 11:35 ` Christoph Hellwig
2005-01-05 11:35 ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox