* [PATCH 1/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:00 [PATCH 0/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
@ 2004-08-24 18:22 ` Dean Nelson
2004-08-24 19:13 ` Christoph Hellwig
2004-08-24 18:23 ` [PATCH 2/4] " Dean Nelson
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Dean Nelson @ 2004-08-24 18:22 UTC (permalink / raw)
To: linux-ia64, netdev
This patch exports the symbols needed by XP[C|NET].
Signed-off-by: Dean Nelson <dcn@sgi.com>
Index: bk-linux-2.6/arch/ia64/kernel/smpboot.c
===================================================================
--- bk-linux-2.6.orig/arch/ia64/kernel/smpboot.c 2004-08-23 14:38:36.000000000 -0500
+++ bk-linux-2.6/arch/ia64/kernel/smpboot.c 2004-08-24 07:29:11.000000000 -0500
@@ -498,6 +498,7 @@
EXPORT_SYMBOL(cpu_to_node_map);
/* which logical CPUs are on which nodes */
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
/*
* Build cpu to node mapping and initialize the per node cpu masks.
Index: bk-linux-2.6/arch/ia64/sn/kernel/setup.c
===================================================================
--- bk-linux-2.6.orig/arch/ia64/sn/kernel/setup.c 2004-08-23 14:38:37.000000000 -0500
+++ bk-linux-2.6/arch/ia64/sn/kernel/setup.c 2004-08-24 07:29:11.000000000 -0500
@@ -50,6 +50,7 @@
#include <asm/sn/sn2/shub.h>
DEFINE_PER_CPU(struct pda_s, pda_percpu);
+EXPORT_PER_CPU_SYMBOL(pda_percpu);
#define MAX_PHYS_MEMORY (1UL << 49) /* 1 TB */
@@ -65,8 +66,11 @@
unsigned long sn_rtc_cycles_per_second;
partid_t sn_partid = -1;
+EXPORT_SYMBOL(sn_partid);
char sn_system_serial_number_string[128];
+EXPORT_SYMBOL(sn_system_serial_number_string);
u64 sn_partition_serial_number;
+EXPORT_SYMBOL(sn_partition_serial_number);
short physical_node_map[MAX_PHYSNODE_ID];
Index: bk-linux-2.6/kernel/sched.c
===================================================================
--- bk-linux-2.6.orig/kernel/sched.c 2004-08-23 14:39:35.000000000 -0500
+++ bk-linux-2.6/kernel/sched.c 2004-08-24 07:29:11.000000000 -0500
@@ -2814,6 +2814,7 @@
{
return setscheduler(pid, policy, param);
}
+EXPORT_SYMBOL(sys_sched_setscheduler);
/**
* sys_sched_setparam - set/change the RT priority of a thread
Index: bk-linux-2.6/mm/page_alloc.c
===================================================================
--- bk-linux-2.6.orig/mm/page_alloc.c 2004-08-23 14:38:16.000000000 -0500
+++ bk-linux-2.6/mm/page_alloc.c 2004-08-24 07:29:11.000000000 -0500
@@ -40,6 +40,7 @@
unsigned long totalhigh_pages;
long nr_swap_pages;
int numnodes = 1;
+EXPORT_SYMBOL(numnodes);
int sysctl_lower_zone_protection = 0;
EXPORT_SYMBOL(totalram_pages);
Index: bk-linux-2.6/include/asm-ia64/sn/sn_sal.h
===================================================================
--- bk-linux-2.6.orig/include/asm-ia64/sn/sn_sal.h 2004-08-23 14:40:00.000000000 -0500
+++ bk-linux-2.6/include/asm-ia64/sn/sn_sal.h 2004-08-24 07:29:11.000000000 -0500
@@ -494,7 +494,8 @@
ia64_sn_partition_serial_get(void)
{
struct ia64_sal_retval ret_stuff;
- SAL_CALL(ret_stuff, SN_SAL_PARTITION_SERIAL_GET, 0, 0, 0, 0, 0, 0, 0);
+ ia64_sal_oemcall_reentrant(&ret_stuff, SN_SAL_PARTITION_SERIAL_GET, 0,
+ 0, 0, 0, 0, 0, 0);
if (ret_stuff.status != 0)
return 0;
return ret_stuff.v0;
@@ -502,11 +503,10 @@
static inline u64
sn_partition_serial_number_val(void) {
- if (sn_partition_serial_number) {
- return(sn_partition_serial_number);
- } else {
- return(sn_partition_serial_number = ia64_sn_partition_serial_get());
+ if (unlikely(sn_partition_serial_number == 0)) {
+ sn_partition_serial_number = ia64_sn_partition_serial_get();
}
+ return sn_partition_serial_number;
}
/*
@@ -517,8 +517,8 @@
ia64_sn_sysctl_partition_get(nasid_t nasid)
{
struct ia64_sal_retval ret_stuff;
- SAL_CALL(ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid,
- 0, 0, 0, 0, 0, 0);
+ ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid,
+ 0, 0, 0, 0, 0, 0);
if (ret_stuff.status != 0)
return INVALID_PARTID;
return ((partid_t)ret_stuff.v0);
@@ -532,11 +532,38 @@
static inline partid_t
sn_local_partid(void) {
- if (sn_partid < 0) {
- return (sn_partid = ia64_sn_sysctl_partition_get(cpuid_to_nasid(smp_processor_id())));
- } else {
- return sn_partid;
+ if (unlikely(sn_partid < 0)) {
+ sn_partid = ia64_sn_sysctl_partition_get(cpuid_to_nasid(smp_processor_id()));
}
+ return sn_partid;
+}
+
+/*
+ * Returns the physical address of the partition's reserved page through
+ * an iterative number of calls.
+ *
+ * On first call, 'cookie' and 'len' should be set to 0, and 'addr'
+ * set to the nasid of the partition whose reserved page's address is
+ * being sought.
+ * On subsequent calls, pass the values, that were passed back on the
+ * previous call.
+ *
+ * While the return status equals SALRET_MORE_PASSES, keep calling
+ * this function after first copying 'len' bytes starting at 'addr'
+ * into 'buf'. Once the return status equals SALRET_OK, 'addr' will
+ * be the physical address of the partition's reserved page. If the
+ * return status equals neither of these, an error as occurred.
+ */
+static inline s64
+sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+ struct ia64_sal_retval rv;
+ ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie,
+ *addr, buf, *len, 0, 0, 0);
+ *cookie = rv.v0;
+ *addr = rv.v1;
+ *len = rv.v2;
+ return rv.status;
}
/*
@@ -558,8 +585,8 @@
sn_register_xp_addr_region(u64 paddr, u64 len, int operation)
{
struct ia64_sal_retval ret_stuff;
- SAL_CALL(ret_stuff, SN_SAL_XP_ADDR_REGION, paddr, len, (u64)operation,
- 0, 0, 0, 0);
+ ia64_sal_oemcall(&ret_stuff, SN_SAL_XP_ADDR_REGION, paddr, len,
+ (u64)operation, 0, 0, 0, 0);
return ret_stuff.status;
}
@@ -583,8 +610,8 @@
} else {
call = SN_SAL_NO_FAULT_ZONE_PHYSICAL;
}
- SAL_CALL(ret_stuff, call, start_addr, end_addr, return_addr, (u64)1,
- 0, 0, 0);
+ ia64_sal_oemcall(&ret_stuff, call, start_addr, end_addr, return_addr,
+ (u64)1, 0, 0, 0);
return ret_stuff.status;
}
@@ -605,8 +632,8 @@
sn_change_coherence(u64 *new_domain, u64 *old_domain)
{
struct ia64_sal_retval ret_stuff;
- SAL_CALL(ret_stuff, SN_SAL_COHERENCE, new_domain, old_domain, 0, 0,
- 0, 0, 0);
+ ia64_sal_oemcall(&ret_stuff, SN_SAL_COHERENCE, (u64)new_domain,
+ (u64)old_domain, 0, 0, 0, 0, 0);
return ret_stuff.status;
}
@@ -625,8 +652,8 @@
cnodeid = nasid_to_cnodeid(get_node_number(paddr));
spin_lock(&NODEPDA(cnodeid)->bist_lock);
local_irq_save(irq_flags);
- SAL_CALL_NOLOCK(ret_stuff, SN_SAL_MEMPROTECT, paddr, len, nasid_array,
- perms, 0, 0, 0);
+ ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len,
+ (u64)nasid_array, perms, 0, 0, 0);
local_irq_restore(irq_flags);
spin_unlock(&NODEPDA(cnodeid)->bist_lock);
return ret_stuff.status;
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 1/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:22 ` [PATCH 1/4] " Dean Nelson
@ 2004-08-24 19:13 ` Christoph Hellwig
0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2004-08-24 19:13 UTC (permalink / raw)
To: Dean Nelson; +Cc: linux-ia64, netdev
> --- bk-linux-2.6.orig/kernel/sched.c 2004-08-23 14:39:35.000000000 -0500
> +++ bk-linux-2.6/kernel/sched.c 2004-08-24 07:29:11.000000000 -0500
> @@ -2814,6 +2814,7 @@
> {
> return setscheduler(pid, policy, param);
> }
> +EXPORT_SYMBOL(sys_sched_setscheduler);
As said previously you're not supposed to mess with this one.
> long nr_swap_pages;
> int numnodes = 1;
> +EXPORT_SYMBOL(numnodes);
Looing at the two routines that use this I think those two should always
go into the core kernel code. That way you also get rid of the
pda_percpu export.
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:00 [PATCH 0/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
2004-08-24 18:22 ` [PATCH 1/4] " Dean Nelson
@ 2004-08-24 18:23 ` Dean Nelson
2004-08-24 19:17 ` Christoph Hellwig
2004-08-24 18:26 ` [PATCH 3/4] " Dean Nelson
2004-08-24 18:27 ` [PATCH 4/4] " Dean Nelson
3 siblings, 1 reply; 7+ messages in thread
From: Dean Nelson @ 2004-08-24 18:23 UTC (permalink / raw)
To: linux-ia64, netdev
This patch contains the shim module (XP) which interfaces between the
communication module (XPC) and the functional support modules (like XPNET).
Signed-off-by: Dean Nelson <dcn@sgi.com>
Index: linux/arch/ia64/Kconfig
===================================================================
--- linux.orig/arch/ia64/Kconfig 2004-08-17 13:31:26.000000000 -0500
+++ linux/arch/ia64/Kconfig 2004-08-23 11:39:50.000000000 -0500
@@ -189,6 +189,16 @@
depends on !IA64_HP_SIM
default y
+config IA64_SGI_SN_XPC
+ tristate "Support DMA Messaging between SGI machines"
+ depends on FETCHOP
+ help
+ An SGI machine can be divided into multiple Single System
+ Images which act independently of each other and have
+ hardware based memory protection from the others. Enabling
+ this feature will allow limited communication between
+ those System Images without allowing write access.
+
config IA64_SGI_SN_SIM
bool "SGI Medusa Simulator Support"
depends on IA64_SGI_SN2
Index: linux/arch/ia64/sn/kernel/Makefile
===================================================================
--- linux.orig/arch/ia64/sn/kernel/Makefile 2004-06-16 00:18:59.000000000 -0500
+++ linux/arch/ia64/sn/kernel/Makefile 2004-08-23 10:55:03.000000000 -0500
@@ -9,3 +9,5 @@
obj-y += probe.o setup.o bte.o irq.o mca.o idle.o sn2/
obj-$(CONFIG_IA64_GENERIC) += machvec.o
+obj-$(CONFIG_IA64_SGI_SN_XPC) += xp.o
+xp-y := xp_main.o xp_kdb.o xp_nofault.o
Index: linux/arch/ia64/sn/kernel/xp_main.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/sn/kernel/xp_main.c 2004-08-23 10:55:03.000000000 -0500
@@ -0,0 +1,358 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * Cross Partition (XP) base.
+ *
+ * XP provides a base from which its users can interact
+ * with XPC, yet not be dependent on XPC.
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/xp.h>
+
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_NCHANNELS];
+
+
+/*
+ * Initialize the XPC interface to inidicate that XPC isn't loaded.
+ */
+static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; }
+
+struct xpc_interface xpc_interface = {
+ (void (*)(int)) xpc_notloaded,
+ (void (*)(int)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *))
+ xpc_notloaded,
+ (void (*)(partid_t, int, void *)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded
+};
+
+
+char *
+xpc_get_ascii_reason_code(enum xpc_retval reason)
+{
+ switch (reason) {
+ case xpcSuccess: return "";
+ case xpcNotConnected: return "xpcNotConnected";
+ case xpcConnected: return "xpcConnected";
+ case xpcRETIRED1: return "xpcRETIRED1";
+ case xpcMsgReceived: return "xpcMsgReceived";
+ case xpcMsgDelivered: return "xpcMsgDelivered";
+ case xpcRETIRED2: return "xpcRETIRED2";
+ case xpcNoWait: return "xpcNoWait";
+ case xpcRetry: return "xpcRetry";
+ case xpcTimeout: return "xpcTimeout";
+ case xpcInterrupted: return "xpcInterrupted";
+ case xpcUnequalMsgSizes: return "xpcUnequalMsgSizes";
+ case xpcInvalidAddress: return "xpcInvalidAddress";
+ case xpcNoMemory: return "xpcNoMemory";
+ case xpcLackOfResources: return "xpcLackOfResources";
+ case xpcUnregistered: return "xpcUnregistered";
+ case xpcAlreadyRegistered: return "xpcAlreadyRegistered";
+ case xpcPartitionDown: return "xpcPartitionDown";
+ case xpcNotLoaded: return "xpcNotLoaded";
+ case xpcUnloading: return "xpcUnloading";
+ case xpcBadMagic: return "xpcBadMagic";
+ case xpcReactivating: return "xpcReactivating";
+ case xpcUnregistering: return "xpcUnregistering";
+ case xpcOtherUnregistering: return "xpcOtherUnregistering";
+ case xpcCloneKThread: return "xpcCloneKThread";
+ case xpcCloneKThreadFailed: return "xpcCloneKThreadFailed";
+ case xpcNoHeartbeat: return "xpcNoHeartbeat";
+ case xpcPioReadError: return "xpcPioReadError";
+ case xpcPhysAddrRegFailed: return "xpcPhysAddrRegFailed";
+ case xpcBteDirectoryError: return "xpcBteDirectoryError";
+ case xpcBtePoisonError: return "xpcBtePoisonError";
+ case xpcBteWriteError: return "xpcBteWriteError";
+ case xpcBteAccessError: return "xpcBteAccessError";
+ case xpcBtePWriteError: return "xpcBtePWriteError";
+ case xpcBtePReadError: return "xpcBtePReadError";
+ case xpcBteTimeOutError: return "xpcBteTimeOutError";
+ case xpcBteXtalkError: return "xpcBteXtalkError";
+ case xpcBteNotAvailable: return "xpcBteNotAvailable";
+ case xpcBteUnmappedError: return "xpcBteUnmappedError";
+ case xpcBadVersion: return "xpcBadVersion";
+ case xpcVarsNotSet: return "xpcVarsNotSet";
+ case xpcNoRsvdPageAddr: return "xpcNoRsvdPageAddr";
+ case xpcInvalidPartid: return "xpcInvalidPartid";
+ case xpcLocalPartid: return "xpcLocalPartid";
+ case xpcUnknownReason: return "xpcUnknownReason";
+ default: return "undefined reason code";
+ }
+}
+
+
+static void
+xp_init_xpc(void)
+{
+ extern void xp_kdb_register(void);
+ int ch_number;
+
+
+ xp_kdb_register();
+
+ /* initialize the connection registration semaphores */
+ for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
+ sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */
+ }
+}
+
+
+static void
+xp_exit_xpc(void)
+{
+ extern void xp_kdb_unregister(void);
+
+
+ xp_kdb_unregister();
+}
+
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect)(int),
+ void (*disconnect)(int),
+ enum xpc_retval (*allocate)(partid_t, int, u32, void **),
+ enum xpc_retval (*send)(partid_t, int, void *),
+ enum xpc_retval (*send_notify)(partid_t, int, void *,
+ xpc_notify_func, void *),
+ void (*received)(partid_t, int, void *),
+ enum xpc_retval (*partid_to_nasids)(partid_t, void *))
+{
+ xpc_interface.connect = connect;
+ xpc_interface.disconnect = disconnect;
+ xpc_interface.allocate = allocate;
+ xpc_interface.send = send;
+ xpc_interface.send_notify = send_notify;
+ xpc_interface.received = received;
+ xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+ xpc_interface.connect = (void (*)(int)) xpc_notloaded;
+ xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
+ xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32,
+ void **)) xpc_notloaded;
+ xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *,
+ xpc_notify_func, void *)) xpc_notloaded;
+ xpc_interface.received = (void (*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *))
+ xpc_notloaded;
+}
+
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to register for connection.
+ * func - function to call for asynchronous notification of channel
+ * state changes (i.e., connection, disconnection, error) and
+ * the arrival of incoming messages.
+ * key - pointer to optional user-defined value that gets passed back
+ * to the user on any callouts made to func.
+ * payload_size - size in bytes of the XPC message's payload area which
+ * contains a user-defined message. The user should make
+ * this large enough to hold their largest message.
+ * nentries - max #of XPC message entries a message queue can contain.
+ * The actual number, which is determined when a connection
+ * is established and may be less then requested, will be
+ * passed to the user via the xpcConnected callout.
+ * assigned_limit - max number of kthreads allowed to be processing
+ * messages (per connection) at any given instant.
+ * idle_limit - max number of kthreads allowed to be idle at any given
+ * instant.
+ */
+enum xpc_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+ u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+ struct xpc_registration *registration;
+
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(payload_size == 0 || nentries == 0);
+ DBUG_ON(func == NULL);
+ DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+ registration = &xpc_registrations[ch_number];
+
+ if (down_interruptible(®istration->sema) != 0) {
+ return xpcInterrupted;
+ }
+
+ /* if XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func != NULL) {
+ up(®istration->sema);
+ return xpcAlreadyRegistered;
+ }
+
+ /* register the channel for connection */
+ registration->msg_size = XPC_MSG_SIZE(payload_size);
+ registration->nentries = nentries;
+ registration->assigned_limit = assigned_limit;
+ registration->idle_limit = idle_limit;
+ registration->key = key;
+ registration->func = func;
+
+ up(®istration->sema);
+
+ xpc_interface.connect(ch_number);
+
+ return xpcSuccess;
+}
+
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+ struct xpc_registration *registration;
+
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+
+ registration = &xpc_registrations[ch_number];
+
+ /*
+ * We've decided not to make this a down_interruptible(), since we
+ * figured XPC's users will just turn around and call xpc_disconnect()
+ * again anyways, so we might as well wait, if need be.
+ */
+ down(®istration->sema);
+
+ /* if !XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func == NULL) {
+ up(®istration->sema);
+ return;
+ }
+
+ /* remove the connection registration for the specified channel */
+ registration->func = NULL;
+ registration->key = NULL;
+ registration->nentries = 0;
+ registration->msg_size = 0;
+ registration->assigned_limit = 0;
+ registration->idle_limit = 0;
+
+ xpc_interface.disconnect(ch_number);
+
+ up(®istration->sema);
+
+ return;
+}
+
+
+int __init
+xp_init(void)
+{
+ int ret;
+ int (* pior_func)(void *) = xp_nofault_PIOR;
+ int (* pior_err_func)(void) = xp_error_PIOR;
+
+
+ if (!ia64_platform_is("sn2")) {
+ return -ENODEV;
+ }
+
+
+ /*
+ * Register a nofault code region which performs a cross-partition
+ * PIO read. If the PIO read times out, the MCA handler will consume
+ * the error and return to a kernel-provided instruction to indicate
+ * an error. This PIO read exists because it is guaranteed to timeout
+ * if the destination is down (AMO operations do not timeout on at
+ * least some CPUs on Shubs <= v1.2, which unfortunately we have to
+ * work around).
+ */
+ if ((ret = sn_register_nofault_code(*(u64 *) pior_func,
+ *(u64 *) pior_err_func,
+ *(u64 *) pior_err_func, 1, 1)) != 0) {
+ printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
+ ret);
+ }
+
+
+ xp_init_xpc();
+
+ return 0;
+}
+module_init(xp_init);
+
+
+void __exit
+xp_exit(void)
+{
+ int (* pior_func)(void *) = xp_nofault_PIOR;
+ int (* pior_err_func)(void) = xp_error_PIOR;
+
+
+ xp_exit_xpc();
+
+
+ /* unregister the PIO nofault code region */
+ (void) sn_register_nofault_code(*(u64 *) pior_func,
+ *(u64 *) pior_err_func,
+ *(u64 *) pior_err_func, 1, 0);
+}
+module_exit(xp_exit);
+
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(xp_nofault_PIOR);
+EXPORT_SYMBOL(xpc_get_ascii_reason_code);
+EXPORT_SYMBOL(xpc_registrations);
+EXPORT_SYMBOL(xpc_interface);
+EXPORT_SYMBOL(xpc_clear_interface);
+EXPORT_SYMBOL(xpc_set_interface);
+EXPORT_SYMBOL(xpc_connect);
+EXPORT_SYMBOL(xpc_disconnect);
+
Index: linux/arch/ia64/sn/kernel/xp_nofault.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/sn/kernel/xp_nofault.S 2004-08-23 10:55:03.000000000 -0500
@@ -0,0 +1,31 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it. This function
+ * will be registered as a nofault code block. In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+ .global xp_nofault_PIOR
+xp_nofault_PIOR:
+ mov r8=r0 // Stage a success return value
+ ld8.acq r9=[r32];; // PIO Read the specified register
+ adds r9=1,r9 // Add to force a consume
+ br.ret.sptk.many b0;; // Return success
+
+ .global xp_error_PIOR
+xp_error_PIOR:
+ mov r8=1 // Return value of 1
+ br.ret.sptk.many b0;; // Return failure
+
Index: linux/include/asm-ia64/sn/xp.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/include/asm-ia64/sn/xp.h 2004-08-23 10:55:03.000000000 -0500
@@ -0,0 +1,426 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+/*
+ * External Cross Partition (XP) structures and defines.
+ */
+
+
+#ifndef _ASM_IA64_SN_XP_H
+#define _ASM_IA64_SN_XP_H
+
+
+#include <linux/version.h>
+#include <linux/cache.h>
+#include <asm/sn/types.h>
+#include <asm/sn/bte.h>
+#include <asm/hardirq.h>
+
+
+#ifdef USE_DBUG_ON
+#define DBUG_ON(condition) BUG_ON(condition)
+#else
+#define DBUG_ON(condition)
+#endif
+
+
+/*
+ * Define the number of u64s required to represent all the C-brick nasids
+ * as a bitmap. The cross-partition kernel modules deal only with
+ * C-brick nasids, thus the need for bitmaps which don't account for
+ * odd-numbered (non C-brick) nasids.
+ */
+#define XP_MAX_NASIDS (MAX_NASIDS / 2)
+#define XP_NUM_NASID_WORDS ((XP_MAX_NASIDS + 63)/ 64)
+
+
+/*
+ * Wrapper for bte_copy() that should it return a failure status will retry
+ * the bte_copy() once in the hope that the failure was due to a temporary
+ * aberration (i.e., the link going down temporarily).
+ *
+ * See bte_copy for definition of the input parameters.
+ *
+ * Note: xp_bte_copy() should never be called while holding a spinlock.
+ */
+static inline bte_result_t
+xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+ bte_result_t ret;
+
+
+ ret = bte_copy(src, dest, len, mode, notification);
+
+ if (ret != BTE_SUCCESS) {
+ if (!in_interrupt()) {
+ cond_resched();
+ }
+ ret = bte_copy(src, dest, len, mode, notification);
+ }
+
+ return ret;
+}
+
+
+/*
+ * XPC establishes channel connections between the local partition and any
+ * other partition that is currently up. Over these channels, kernel-level
+ * `users' can communicate with their counterparts on the other partitions.
+ *
+ * The maxinum number of channels is limited to eight. For performance reasons,
+ * the internal cross partition structures require sixteen bytes per channel,
+ * and eight allows all of this interface-shared info to fit in one cache line.
+ *
+ * XPC_NCHANNELS reflects the total number of channels currently defined.
+ * If the need for additional channels arises, one can simply increase
+ * XPC_NCHANNELS accordingly. If the day should come where that number
+ * exceeds the MAXIMUM number of channels allowed (eight), then one will need
+ * to make changes to the XPC code to allow for this.
+ */
+#define XPC_MEM_CHANNEL 0 /* memory channel number */
+#define XPC_NET_CHANNEL 1 /* network channel number */
+
+#define XPC_NCHANNELS 2 /* #of defined channels */
+#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
+
+#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
+#error XPC_NCHANNELS exceeds MAXIMUM allowed.
+#endif
+
+
+/*
+ * The format of an XPC message is as follows:
+ *
+ * +-------+--------------------------------+
+ * | flags |////////////////////////////////|
+ * +-------+--------------------------------+
+ * | message # |
+ * +----------------------------------------+
+ * | payload (user-defined message) |
+ * | |
+ * :
+ * | |
+ * +----------------------------------------+
+ *
+ * The size of the payload is defined by the user via xpc_connect(). A user-
+ * defined message resides in the payload area.
+ *
+ * The user should have no dealings with the message header, but only the
+ * message's payload. When a message entry is allocated (via xpc_allocate())
+ * a pointer to the payload area is returned and not the actual beginning of
+ * the XPC message. The user then constructs a message in the payload area
+ * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
+ *
+ * The size of a message entry (within a message queue) must be a cacheline
+ * sized multiple in order to facilitate the BTE transfer of messages from one
+ * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
+ * that wants to fit as many msg entries as possible in a given memory size
+ * (e.g. a memory page).
+ */
+struct xpc_msg {
+ u8 flags; /* FOR XPC INTERNAL USE ONLY */
+ u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
+ s64 number; /* FOR XPC INTERNAL USE ONLY */
+
+ u64 payload; /* user defined portion of message */
+};
+
+
+#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
+#define XPC_MSG_SIZE(_payload_size) \
+ L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
+
+
+/*
+ * Define the return values and values passed to user's callout functions.
+ * (It is important to add new value codes at the end just preceding
+ * xpcUnknownReason, which must have the highest numerical value.)
+ */
+enum xpc_retval {
+ xpcSuccess = 0,
+
+ xpcNotConnected, /* 1: channel is not connected */
+ xpcConnected, /* 2: channel connected (opened) */
+ xpcRETIRED1, /* 3: (formerly xpcDisconnected) */
+
+ xpcMsgReceived, /* 4: message received */
+ xpcMsgDelivered, /* 5: message delivered and acknowledged */
+
+ xpcRETIRED2, /* 6: (formerly xpcTransferFailed) */
+
+ xpcNoWait, /* 7: operation would require wait */
+ xpcRetry, /* 8: retry operation */
+ xpcTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
+ xpcInterrupted, /* 10: interrupted wait */
+
+ xpcUnequalMsgSizes, /* 11: message size disparity between sides */
+ xpcInvalidAddress, /* 12: invalid address */
+
+ xpcNoMemory, /* 13: no memory available for XPC structures */
+ xpcLackOfResources, /* 14: insufficient resources for operation */
+ xpcUnregistered, /* 15: channel is not registered */
+ xpcAlreadyRegistered, /* 16: channel is already registered */
+
+ xpcPartitionDown, /* 17: remote partition is down */
+ xpcNotLoaded, /* 18: XPC module is not loaded */
+ xpcUnloading, /* 19: this side is unloading XPC module */
+
+ xpcBadMagic, /* 20: XPC MAGIC string not found */
+
+ xpcReactivating, /* 21: remote partition was reactivated */
+
+ xpcUnregistering, /* 22: this side is unregistering channel */
+ xpcOtherUnregistering, /* 23: other side is unregistering channel */
+
+ xpcCloneKThread, /* 24: cloning kernel thread */
+ xpcCloneKThreadFailed, /* 25: cloning kernel thread failed */
+
+ xpcNoHeartbeat, /* 26: remote partition has no heartbeat */
+
+ xpcPioReadError, /* 27: PIO read error */
+ xpcPhysAddrRegFailed, /* 28: registration of phys addr range failed */
+
+ xpcBteDirectoryError, /* 29: maps to BTEFAIL_DIR */
+ xpcBtePoisonError, /* 30: maps to BTEFAIL_POISON */
+ xpcBteWriteError, /* 31: maps to BTEFAIL_WERR */
+ xpcBteAccessError, /* 32: maps to BTEFAIL_ACCESS */
+ xpcBtePWriteError, /* 33: maps to BTEFAIL_PWERR */
+ xpcBtePReadError, /* 34: maps to BTEFAIL_PRERR */
+ xpcBteTimeOutError, /* 35: maps to BTEFAIL_TOUT */
+ xpcBteXtalkError, /* 36: maps to BTEFAIL_XTERR */
+ xpcBteNotAvailable, /* 37: maps to BTEFAIL_NOTAVAIL */
+ xpcBteUnmappedError, /* 38: unmapped BTEFAIL_ error */
+
+ xpcBadVersion, /* 39: bad version number */
+ xpcVarsNotSet, /* 40: the XPC variables are not set up */
+ xpcNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
+ xpcInvalidPartid, /* 42: invalid partition ID */
+ xpcLocalPartid, /* 43: local partition ID */
+
+ xpcUnknownReason /* 44: unknown reason -- must be last in list */
+};
+
+
+/*
+ * Define the callout function types used by XPC to update the user on
+ * connection activity and state changes (via the user function registered by
+ * xpc_connect()) and to notify them of messages received and delivered (via
+ * the user function registered by xpc_send_notify()).
+ *
+ * The two function types are xpc_channel_func and xpc_notify_func and
+ * both share the following arguments, with the exception of "data", which
+ * only xpc_channel_func has.
+ *
+ * Arguments:
+ *
+ * reason - reason code. (See following table.)
+ * partid - partition ID associated with condition.
+ * ch_number - channel # associated with condition.
+ * data - pointer to optional data. (See following table.)
+ * key - pointer to optional user-defined value provided as the "key"
+ * argument to xpc_connect() or xpc_send_notify().
+ *
+ * In the following table the "Optional Data" column applies to callouts made
+ * to functions registered by xpc_connect(). A "NA" in that column indicates
+ * that this reason code can be passed to functions registered by
+ * xpc_send_notify() (i.e. they don't have data arguments).
+ *
+ * Also, the first three reason codes in the following table indicate
+ * success, whereas the others indicate failure. When a failure reason code
+ * is received, one can assume that the channel is not connected.
+ *
+ *
+ * Reason Code | Cause | Optional Data
+ * =====================+================================+=====================
+ * xpcConnected | connection has been established| max #of entries
+ * | to the specified partition on | allowed in message
+ * | the specified channel | queue
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgReceived | an XPC message arrived from | address of payload
+ * | the specified partition on the |
+ * | specified channel | [the user must call
+ * | | xpc_received() when
+ * | | finished with the
+ * | | payload]
+ * ---------------------+--------------------------------+---------------------
+ * xpcMsgDelivered | notification that the message | NA
+ * | was delivered to the intended |
+ * | recipient and that they have |
+ * | acknowledged its receipt by |
+ * | calling xpc_received() |
+ * =====================+================================+=====================
+ * xpcUnequalMsgSizes | can't connect to the specified | NULL
+ * | partition on the specified |
+ * | channel because of mismatched |
+ * | message sizes |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoMemory | insufficient memory avaiable | NULL
+ * | to allocate message queue |
+ * ---------------------+--------------------------------+---------------------
+ * xpcLackOfResources | lack of resources to create | NULL
+ * | the necessary kthreads to |
+ * | support the channel |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnregistering | this side's user has | NULL or NA
+ * | unregistered by calling |
+ * | xpc_disconnect() |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnregistering| the other side's user has | NULL or NA
+ * | unregistered by calling |
+ * | xpc_disconnect() |
+ * ---------------------+--------------------------------+---------------------
+ * xpcNoHeartbeat | the other side's XPC is no | NULL or NA
+ * | longer heartbeating |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnloading | this side's XPC module is | NULL or NA
+ * | being unloaded |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcOtherUnloading | the other side's XPC module is | NULL or NA
+ * | is being unloaded |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcPioReadError | xp_nofault_PIOR() returned an | NULL or NA
+ * | error while sending an IPI |
+ * | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcInvalidAddress | the address either received or | NULL or NA
+ * | sent by the specified partition|
+ * | is invalid |
+ * ---------------------+--------------------------------+---------------------
+ * xpcBteNotAvailable | attempt to pull data from the | NULL or NA
+ * xpcBtePoisonError | specified partition over the |
+ * xpcBteWriteError | specified channel via a |
+ * xpcBteAccessError | bte_copy() failed |
+ * xpcBteTimeOutError | |
+ * xpcBteXtalkError | |
+ * xpcBteDirectoryError | |
+ * xpcBteGenericError | |
+ * xpcBteUnmappedError | |
+ * ---------------------+--------------------------------+---------------------
+ * xpcUnknownReason | the specified channel to the | NULL or NA
+ * | specified partition was |
+ * | unavailable for unknown reasons|
+ * =====================+================================+=====================
+ */
+
+typedef void (*xpc_channel_func)(enum xpc_retval reason, partid_t partid,
+ int ch_number, void *data, void *key);
+
+typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid,
+ int ch_number, void *key);
+
+
+/*
+ * The following is a registration entry. There is a global array of these,
+ * one per channel. It is used to record the connection registration made
+ * by the users of XPC. As long as a registration entry exists, for any
+ * partition that comes up, XPC will attempt to establish a connection on
+ * that channel. Notification that a connection has been made will occur via
+ * the xpc_channel_func function.
+ */
+struct xpc_registration {
+
+ struct semaphore sema;
+
+ /*
+ * Function to call when aynchronous notification is required for
+ * such events as, a connection established/lost, or an incomming
+ * message received, or an error condition encountered. A non-NULL
+ * func field indicates that there is an active registration for
+ * the channel.
+ */
+ xpc_channel_func func;
+ void *key; /* pointer to user's key */
+
+ u16 nentries; /* #of msg entries in local msg queue */
+ u16 msg_size; /* message queue's message size */
+ u32 assigned_limit; /* limit on #of assigned kthreads */
+ u32 idle_limit; /* limit on #of idle kthreads */
+} ____cacheline_aligned;
+
+
+#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
+
+
+/* the following are valid xpc_allocate() flags */
+#define XPC_WAIT 0 /* wait flag */
+#define XPC_NOWAIT 1 /* no wait flag */
+
+
+struct xpc_interface {
+ void (*connect)(int);
+ void (*disconnect)(int);
+ enum xpc_retval (*allocate)(partid_t, int, u32, void **);
+ enum xpc_retval (*send)(partid_t, int, void *);
+ enum xpc_retval (*send_notify)(partid_t, int, void *,
+ xpc_notify_func, void *);
+ void (*received)(partid_t, int, void *);
+ enum xpc_retval (*partid_to_nasids)(partid_t, void *);
+};
+
+
+extern struct xpc_interface xpc_interface;
+
+extern void xpc_set_interface(void (*)(int),
+ void (*)(int),
+ enum xpc_retval (*)(partid_t, int, u32, void **),
+ enum xpc_retval (*)(partid_t, int, void *),
+ enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func,
+ void *),
+ void (*)(partid_t, int, void *),
+ enum xpc_retval (*)(partid_t, void *));
+extern void xpc_clear_interface(void);
+
+
+extern enum xpc_retval xpc_connect(int, xpc_channel_func, void *, u16,
+ u16, u32, u32);
+extern void xpc_disconnect(int);
+
+static inline enum xpc_retval
+xpc_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
+{
+ return xpc_interface.allocate(partid, ch_number, flags, payload);
+}
+
+static inline enum xpc_retval
+xpc_send(partid_t partid, int ch_number, void *payload)
+{
+ return xpc_interface.send(partid, ch_number, payload);
+}
+
+static inline enum xpc_retval
+xpc_send_notify(partid_t partid, int ch_number, void *payload,
+ xpc_notify_func func, void *key)
+{
+ return xpc_interface.send_notify(partid, ch_number, payload, func, key);
+}
+
+static inline void
+xpc_received(partid_t partid, int ch_number, void *payload)
+{
+ return xpc_interface.received(partid, ch_number, payload);
+}
+
+static inline enum xpc_retval
+xpc_partid_to_nasids(partid_t partid, void *nasids)
+{
+ return xpc_interface.partid_to_nasids(partid, nasids);
+}
+
+
+extern char *xpc_get_ascii_reason_code(enum xpc_retval);
+
+extern int xp_nofault_PIOR(void *);
+extern int xp_error_PIOR(void);
+
+
+#endif /* _ASM_IA64_SN_XP_H */
+
Index: linux/arch/ia64/sn/kernel/xp_kdb.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/arch/ia64/sn/kernel/xp_kdb.c 2004-08-23 10:55:03.000000000 -0500
@@ -0,0 +1,109 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * Cross Partition (XP) base kdb support.
+ *
+ * This is the part of XP that provides kdb functions for
+ * debugging purposes.
+ *
+ */
+
+
+#include <linux/kernel.h>
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+#include <linux/kdbprivate.h>
+#endif
+#include <asm/sn/xp.h>
+
+
+#ifdef CONFIG_KDB
+
+static void
+xpc_kdb_print_users(struct xpc_registration *registration, int ch_number)
+{
+ kdb_printf("xpc_registrations[channel=%d] (0x%p):\n", ch_number,
+ (void *) registration);
+
+ kdb_printf("\t&sema=0x%p\n", (void *) ®istration->sema);
+ kdb_printf("\tfunc=0x%p\n", (void *) registration->func);
+ kdb_printf("\tkey=0x%p\n", registration->key);
+ kdb_printf("\tnentries=%d\n", registration->nentries);
+ kdb_printf("\tmsg_size=%d\n", registration->msg_size);
+ kdb_printf("\tassigned_limit=%d\n", registration->assigned_limit);
+ kdb_printf("\tidle_limit=%d\n", registration->idle_limit);
+}
+
+
+/*
+ * Display current XPC users who have registered via xpc_connect().
+ *
+ * xpcusers [ <channel> ]
+ */
+static int
+xpc_kdb_users(int argc, const char **argv, const char **envp,
+ struct pt_regs *regs)
+{
+ extern struct xpc_registration xpc_registrations[];
+ int ret;
+ struct xpc_registration *registration;
+ int ch_number;
+
+
+ if (argc > 1) {
+ return KDB_ARGCOUNT;
+
+ } else if (argc == 1) {
+ ret = kdbgetularg(argv[1], (unsigned long *) &ch_number);
+ if (ret) {
+ return ret;
+ }
+ if (ch_number < 0 || ch_number >= XPC_NCHANNELS) {
+ kdb_printf("invalid channel #\n");
+ return KDB_BADINT;
+ }
+ registration = &xpc_registrations[ch_number];
+ xpc_kdb_print_users(registration, ch_number);
+
+ } else {
+ for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
+ registration = &xpc_registrations[ch_number];
+
+ /* if !XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func == NULL) {
+ continue;
+ }
+ xpc_kdb_print_users(registration, ch_number);
+ }
+ }
+ return 0;
+}
+
+#endif /* CONFIG_KDB */
+
+
+void
+xp_kdb_register(void)
+{
+#ifdef CONFIG_KDB
+ (void) kdb_register("xpcusers", xpc_kdb_users, "[ <channel> ]",
+ "Display struct xpc_registration entries", 0);
+#endif /* CONFIG_KDB */
+}
+
+
+void
+xp_kdb_unregister(void)
+{
+#ifdef CONFIG_KDB
+ (void) kdb_unregister("xpcusers");
+#endif /* CONFIG_KDB */
+}
+
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH 2/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:23 ` [PATCH 2/4] " Dean Nelson
@ 2004-08-24 19:17 ` Christoph Hellwig
0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2004-08-24 19:17 UTC (permalink / raw)
To: Dean Nelson; +Cc: linux-ia64, netdev
On Tue, Aug 24, 2004 at 01:23:44PM -0500, Dean Nelson wrote:
> This patch contains the shim module (XP) which interfaces between the
> communication module (XPC) and the functional support modules (like XPNET).
>
> Signed-off-by: Dean Nelson <dcn@sgi.com>
>
>
> Index: linux/arch/ia64/Kconfig
> ===================================================================
> --- linux.orig/arch/ia64/Kconfig 2004-08-17 13:31:26.000000000 -0500
> +++ linux/arch/ia64/Kconfig 2004-08-23 11:39:50.000000000 -0500
> @@ -189,6 +189,16 @@
> depends on !IA64_HP_SIM
> default y
>
> +config IA64_SGI_SN_XPC
> + tristate "Support DMA Messaging between SGI machines"
Why do you have three different option when the only way they're usefull
is to have all three enabled at the same time. Also as I mentioned previously
please merge at least xp and xpc into a single module.
> + depends on FETCHOP
Please make sure the fetchop driver is at least posted publically
before submitting anything that depends on it.
> +xp-y := xp_main.o xp_kdb.o xp_nofault.o
Please kill all the kdb hooks for mainline submission.
> + case xpcMsgReceived: return "xpcMsgReceived";
> + case xpcMsgDelivered: return "xpcMsgDelivered";
Please don't add strerror-lookalikes to the kernel.
> + for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
> + sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */
> + }
A single mutex wouldn't do it? It doesn't exactly look like it's used in
fast-paths
> + */
> + if ((ret = sn_register_nofault_code(*(u64 *) pior_func,
> + *(u64 *) pior_err_func,
> + *(u64 *) pior_err_func, 1, 1)) != 0) {
Is the strange casting really unavoidable?
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:00 [PATCH 0/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
2004-08-24 18:22 ` [PATCH 1/4] " Dean Nelson
2004-08-24 18:23 ` [PATCH 2/4] " Dean Nelson
@ 2004-08-24 18:26 ` Dean Nelson
2004-08-24 18:27 ` [PATCH 4/4] " Dean Nelson
3 siblings, 0 replies; 7+ messages in thread
From: Dean Nelson @ 2004-08-24 18:26 UTC (permalink / raw)
To: linux-ia64, netdev
[-- Attachment #1: Type: text/plain, Size: 157 bytes --]
This patch contains the communication module (XPC) for cross partition
communication on a partitioned SGI Altix.
Signed-off-by: Dean Nelson <dcn@sgi.com>
[-- Attachment #2: patch-3.gz --]
[-- Type: application/x-gunzip, Size: 41057 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 4/4] SGI Altix cross partition functionality (1st revision)
2004-08-24 18:00 [PATCH 0/4] SGI Altix cross partition functionality (1st revision) Dean Nelson
` (2 preceding siblings ...)
2004-08-24 18:26 ` [PATCH 3/4] " Dean Nelson
@ 2004-08-24 18:27 ` Dean Nelson
3 siblings, 0 replies; 7+ messages in thread
From: Dean Nelson @ 2004-08-24 18:27 UTC (permalink / raw)
To: linux-ia64, netdev
This patch contains the cross partition pseudo-ethernet driver (XPNET)
functional support module.
Signed-off-by: Dean Nelson <dcn@sgi.com>
Index: bk-linux-2.6/arch/ia64/Kconfig
===================================================================
--- bk-linux-2.6.orig/arch/ia64/Kconfig 2004-08-24 07:36:43.000000000 -0500
+++ bk-linux-2.6/arch/ia64/Kconfig 2004-08-24 07:38:23.000000000 -0500
@@ -199,6 +199,16 @@
this feature will allow limited communication between
those System Images without allowing write access.
+config IA64_SGI_SN_XPNET
+ tristate "SGI DMA pseudo-ethernet driver"
+ depends on IA64_SGI_SN_XPC
+ help
+ An SGI machine can be divided into multiple Single System
+ Images which act independently of each other and have
+ hardware based memory protection from the others. Enabling
+ this feature will produce a network adapter that can be
+ used to communicate directly between SSIs.
+
config IA64_SGI_SN_SIM
bool "SGI Medusa Simulator Support"
depends on IA64_SGI_SN2
Index: bk-linux-2.6/arch/ia64/sn/kernel/Makefile
===================================================================
--- bk-linux-2.6.orig/arch/ia64/sn/kernel/Makefile 2004-08-24 07:38:21.000000000 -0500
+++ bk-linux-2.6/arch/ia64/sn/kernel/Makefile 2004-08-24 07:38:23.000000000 -0500
@@ -13,3 +13,4 @@
xp-y := xp_main.o xp_kdb.o xp_nofault.o
obj-$(CONFIG_IA64_SGI_SN_XPC) += xpc.o
xpc-y := xpc_main.o xpc_kdb.o xpc_channel.o xpc_partition.o
+obj-$(CONFIG_IA64_SGI_SN_XPNET) += xpnet.o
Index: bk-linux-2.6/arch/ia64/sn/kernel/xpnet.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ bk-linux-2.6/arch/ia64/sn/kernel/xpnet.c 2004-08-24 09:18:54.000000000 -0500
@@ -0,0 +1,714 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved.
+ */
+
+
+/*
+ * Cross Partition Network Interface (XPNET) support
+ *
+ * XPNET provides a virtual network layered on top of the Cross
+ * Partition communication layer.
+ *
+ * XPNET provides direct point-to-point and broadcast-like support
+ * for an ethernet-like device. The ethernet broadcast medium is
+ * replaced with a point-to-point message structure which passes
+ * pointers to a DMA-capable block that a remote partition should
+ * retrieve and pass to the upper level networking layer.
+ *
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/io.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/types.h>
+#include <asm/atomic.h>
+#include <asm/sn/xp.h>
+
+
+/*
+ * The message payload transferred by XPC.
+ *
+ * buf_pa is the physical address where the DMA should pull from.
+ *
+ * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
+ * cacheline boundary. To accomplish this, we record the number of
+ * bytes from the beginning of the first cacheline to the first useful
+ * byte of the skb (leadin_ignore) and the number of bytes from the
+ * last useful byte of the skb to the end of the last cacheline
+ * (tailout_ignore).
+ *
+ * size is the number of bytes to transfer which includes the skb->len
+ * (useful bytes of the senders skb) plus the leadin and tailout
+ */
+struct xpnet_message {
+ u16 version; /* Version for this message */
+ u16 embedded_bytes; /* #of bytes embedded in XPC message */
+ u32 magic; /* Special number indicating this is xpnet */
+ u64 buf_pa; /* phys address of buffer to retrieve */
+ u32 size; /* #of bytes in buffer */
+ u8 leadin_ignore; /* #of bytes to ignore at the beginning */
+ u8 tailout_ignore; /* #of bytes to ignore at the end */
+ unsigned char data; /* body of small packets */
+};
+
+/*
+ * Determine the size of our message, the cacheline aligned size,
+ * and then the number of message will request from XPC.
+ *
+ * XPC expects each message to exist in an individual cacheline.
+ */
+#define XPNET_MSG_SIZE (L1_CACHE_BYTES - XPC_MSG_PAYLOAD_OFFSET)
+#define XPNET_MSG_DATA_MAX \
+ (XPNET_MSG_SIZE - (u64)(&((struct xpnet_message *)0)->data))
+#define XPNET_MSG_ALIGNED_SIZE (L1_CACHE_ALIGN(XPNET_MSG_SIZE))
+#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPNET_MSG_ALIGNED_SIZE)
+
+
+#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
+#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
+
+/*
+ * Version number of XPNET implementation. XPNET can always talk to versions
+ * with same major #, and never talk to versions with a different version.
+ */
+#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
+#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
+
+#define XPNET_VERSION _XPNET_VERSION(1,0) /* version 1.0 */
+#define XPNET_VERSION_EMBED _XPNET_VERSION(1,1) /* version 1.1 */
+#define XPNET_MAGIC 0x88786984 /* "XNET" */
+
+#define XPNET_VALID_MSG(_m) \
+ ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
+ && (msg->magic == XPNET_MAGIC))
+
+#define XPNET_DEVICE_NAME "xp0"
+
+
+/*
+ * When messages are queued with xpc_send_notify, a kmalloc'd buffer
+ * of the following type is passed as a notification cookie. When the
+ * notification function is called, we use the cookie to decide
+ * whether all outstanding message sends have completed. The skb can
+ * then be released.
+ */
+struct xpnet_pending_msg {
+ struct list_head free_list;
+ struct sk_buff *skb;
+ atomic_t use_count;
+};
+
+/* driver specific structure pointed to by the device structure */
+struct xpnet_dev_private {
+ struct net_device_stats stats;
+};
+
+struct net_device *xpnet_device;
+
+/*
+ * When we are notified of other partitions activating, we add them to
+ * our bitmask of partitions to which we broadcast.
+ */
+static u64 xpnet_broadcast_partitions;
+/* protect above */
+static spinlock_t xpnet_broadcast_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Since the Block Transfer Engine (BTE) is being used for the transfer
+ * and it relies upon cache-line size transfers, we need to reserve at
+ * least one cache-line for head and tail alignment. The BTE is
+ * limited to 8MB transfers.
+ *
+ * Testing has shown that changing MTU to greater than 64KB has no effect
+ * on TCP as the two sides negotiate a Max Segment Size that is limited
+ * to 64K. Other protocols May use packets greater than this, but for
+ * now, the default is 64KB.
+ */
+#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 32KB has been determined to be the ideal */
+#define XPNET_DEF_MTU (0x8000UL)
+
+
+/*
+ * The partition id is encapsulated in the MAC address. The following
+ * define locates the octet the partid is in.
+ */
+#define XPNET_PARTID_OCTET 1
+#define XPNET_LICENSE_OCTET 2
+
+
+/*
+ * Define the XPNET debug device structure that is to be used with dev_dbg(),
+ * dev_err(), dev_warn(), and dev_info().
+ */
+struct device_driver xpnet_dbg_name = {
+ .name = "xpnet"
+};
+
+struct device xpnet_dbg_subname = {
+ .bus_id = {0}, /* set to "" */
+ .driver = &xpnet_dbg_name
+};
+
+struct device *xpnet = &xpnet_dbg_subname;
+
+/*
+ * Packet was recevied by XPC and forwarded to us.
+ */
+static void
+xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
+{
+ struct sk_buff *skb;
+ bte_result_t bret;
+ struct xpnet_dev_private *priv =
+ (struct xpnet_dev_private *) xpnet_device->priv;
+
+
+ if (!XPNET_VALID_MSG(msg)) {
+ /*
+ * Packet with a different XPC version. Ignore.
+ */
+ xpc_received(partid, channel, (void *) msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+ dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+
+ /* reserve an extra cache line */
+ skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
+ if (!skb) {
+ dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
+ msg->size + L1_CACHE_BYTES);
+
+ xpc_received(partid, channel, (void *) msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+
+ /*
+ * The allocated skb has some reserved space.
+ * In order to use bte_copy, we need to get the
+ * skb->data pointer moved forward.
+ */
+ skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
+ (L1_CACHE_BYTES - 1)) +
+ msg->leadin_ignore));
+
+ /*
+ * Update the tail pointer to indicate data actually
+ * transferred.
+ */
+ skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
+
+ /*
+ * Move the data over from the the other side.
+ */
+ if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
+ (msg->embedded_bytes != 0)) {
+ dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
+ "%lu)\n", skb->data, &msg->data,
+ (size_t) msg->embedded_bytes);
+
+ memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes);
+ } else {
+ dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
+ "bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
+ (void *)__pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+ msg->size);
+
+ bret = bte_copy(msg->buf_pa,
+ __pa((u64)skb->data & ~(L1_CACHE_BYTES - 1)),
+ msg->size, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+
+ if (bret != BTE_SUCCESS) {
+ // >>> Need better way of cleaning skb. Currently skb
+ // >>> appears in_use and we can't just call
+ // >>> dev_kfree_skb.
+ dev_err(xpnet, "bte_copy(0x%p, 0x%p, 0x%hx) returned "
+ "error=0x%x\n", (void *)msg->buf_pa,
+ (void *)__pa((u64)skb->data &
+ ~(L1_CACHE_BYTES - 1)),
+ msg->size, bret);
+
+ xpc_received(partid, channel, (void *) msg);
+
+ priv->stats.rx_errors++;
+
+ return;
+ }
+ }
+
+ dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
+ (void *) skb->data, (void *) skb->tail, (void *) skb->end,
+ skb->len);
+
+ skb->dev = xpnet_device;
+ skb->protocol = eth_type_trans(skb, xpnet_device);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
+ "skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
+ (void *) skb->head, (void *) skb->data, (void *) skb->tail,
+ (void *) skb->end, skb->len);
+
+
+ priv->stats.rx_packets++;
+ priv->stats.rx_bytes += skb->len + ETH_HLEN;
+
+ netif_rx_ni(skb);
+ xpc_received(partid, channel, (void *) msg);
+}
+
+
+/*
+ * This is the handler which XPC calls during any sort of change in
+ * state or message reception on a connection.
+ */
+static void
+xpnet_connection_activity(enum xpc_retval reason, partid_t partid, int channel,
+ void *data, void *key)
+{
+ long bp;
+
+
+ DBUG_ON(partid <= 0 || partid >= MAX_PARTITIONS);
+ DBUG_ON(channel != XPC_NET_CHANNEL);
+
+ switch(reason) {
+ case xpcMsgReceived: /* message received */
+ DBUG_ON(data == NULL);
+
+ xpnet_receive(partid, channel, (struct xpnet_message *) data);
+ break;
+
+ case xpcConnected: /* connection completed to a partition */
+ spin_lock_bh(&xpnet_broadcast_lock);
+ xpnet_broadcast_partitions |= 1UL << (partid -1 );
+ bp = xpnet_broadcast_partitions;
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ netif_carrier_on(xpnet_device);
+
+ dev_dbg(xpnet, "%s connection created to partition %d; "
+ "xpnet_broadcast_partitions=0x%lx\n",
+ xpnet_device->name, partid, bp);
+ break;
+
+ default:
+ spin_lock_bh(&xpnet_broadcast_lock);
+ xpnet_broadcast_partitions &= ~(1UL << (partid -1 ));
+ bp = xpnet_broadcast_partitions;
+ spin_unlock_bh(&xpnet_broadcast_lock);
+
+ if (bp == 0) {
+ netif_carrier_off(xpnet_device);
+ }
+
+ dev_dbg(xpnet, "%s disconnected from partition %d; "
+ "xpnet_broadcast_partitions=0x%lx\n",
+ xpnet_device->name, partid, bp);
+ break;
+
+ }
+}
+
+
+static int
+xpnet_dev_open(struct net_device *dev)
+{
+ enum xpc_retval ret;
+
+
+ dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, "
+ "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
+ XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS,
+ XPNET_MAX_IDLE_KTHREADS);
+
+ ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
+ XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
+ XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
+ if (ret != xpcSuccess) {
+ dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
+ "ret=%d\n", dev->name, ret);
+
+ return -ENOMEM;
+ }
+
+ dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
+
+ return 0;
+}
+
+
+static int
+xpnet_dev_stop(struct net_device *dev)
+{
+ xpc_disconnect(XPC_NET_CHANNEL);
+
+ dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
+
+ return 0;
+}
+
+
+static int
+xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+ /* 68 comes from min TCP+IP+MAC header */
+ if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
+ dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
+ "between 68 and %ld\n", dev->name, new_mtu,
+ XPNET_MAX_MTU);
+ return -EINVAL;
+ }
+
+ dev->mtu = new_mtu;
+ dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
+ return 0;
+}
+
+
+/*
+ * Required for the net_device structure.
+ */
+static int
+xpnet_dev_set_config(struct net_device *dev, struct ifmap *new_map)
+{
+ return 0;
+}
+
+
+/*
+ * Return statistics to the caller.
+ */
+static struct net_device_stats *
+xpnet_dev_get_stats(struct net_device *dev)
+{
+ struct xpnet_dev_private *priv;
+
+
+ priv = (struct xpnet_dev_private *) dev->priv;
+
+ return &priv->stats;
+}
+
+
+/*
+ * Notification that the other end has received the message and
+ * DMA'd the skb information. At this point, they are done with
+ * our side. When all recipients are done processing, we
+ * release the skb and then release our pending message structure.
+ */
+static void
+xpnet_send_completed(enum xpc_retval reason, partid_t partid, int channel,
+ void *__qm)
+{
+ struct xpnet_pending_msg *queued_msg =
+ (struct xpnet_pending_msg *) __qm;
+
+
+ DBUG_ON(queued_msg == NULL);
+
+ dev_dbg(xpnet, "message to %d notified with reason %d\n",
+ partid, reason);
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
+ (void *) queued_msg->skb->head);
+
+ dev_kfree_skb_any(queued_msg->skb);
+ kfree(queued_msg);
+ }
+}
+
+
+/*
+ * Network layer has formatted a packet (skb) and is ready to place it
+ * "on the wire". Prepare and send an xpnet_message to all partitions
+ * which have connected with us and are targets of this packet.
+ *
+ * MAC-NOTE: For the XPNET driver, the MAC address contains the
+ * destination partition_id. If the destination partition id word
+ * is 0xff, this packet is to broadcast to all partitions.
+ */
+static int
+xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct xpnet_pending_msg *queued_msg;
+ enum xpc_retval ret;
+ struct xpnet_message *msg;
+ u64 start_addr, end_addr;
+ long dp;
+ u8 second_mac_octet;
+ partid_t dest_partid;
+ struct xpnet_dev_private *priv;
+ u16 embedded_bytes;
+
+
+ priv = (struct xpnet_dev_private *) dev->priv;
+
+
+ dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
+ "skb->end=0x%p skb->len=%d\n", (void *) skb->head,
+ (void *) skb->data, (void *) skb->tail, (void *) skb->end,
+ skb->len);
+
+
+ /*
+ * The xpnet_pending_msg tracks how many outstanding
+ * xpc_send_notifies are relying on this skb. When none
+ * remain, release the skb.
+ */
+ queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
+ if (queued_msg == NULL) {
+ dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
+ "packet\n", sizeof(struct xpnet_pending_msg));
+
+ priv->stats.tx_errors++;
+
+ return -ENOMEM;
+ }
+
+
+ /* get the beginning of the first cacheline and end of last */
+ start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
+ end_addr = L1_CACHE_ALIGN((u64) skb->tail);
+
+ /* calculate how many bytes to embed in the XPC message */
+ embedded_bytes = 0;
+ if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
+ /* skb->data does fit so embed */
+ embedded_bytes = skb->len;
+ }
+
+
+ /*
+ * Since the send occurs asynchronously, we set the count to one
+ * and begin sending. Any sends that happen to complete before
+ * we are done sending will not free the skb. We will be left
+ * with that task during exit. This also handles the case of
+ * a packet destined for a partition which is no longer up.
+ */
+ atomic_set(&queued_msg->use_count, 1);
+ queued_msg->skb = skb;
+
+
+ second_mac_octet = skb->data[XPNET_PARTID_OCTET];
+ if (second_mac_octet == 0xff) {
+ /* we are being asked to broadcast to all partitions */
+ dp = xpnet_broadcast_partitions;
+ } else if (second_mac_octet != 0) {
+ dp = xpnet_broadcast_partitions &
+ (1UL << (second_mac_octet - 1));
+ } else {
+ /* 0 is an invalid partid. Ignore */
+ dp = 0;
+ }
+ dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp);
+
+ /*
+ * If we wanted to allow promiscous mode to work like an
+ * unswitched network, this would be a good point to OR in a
+ * mask of partitions which should be receiving all packets.
+ */
+
+ /*
+ * Main send loop.
+ */
+ for (dest_partid = 1; dp && dest_partid < MAX_PARTITIONS;
+ dest_partid++) {
+
+
+ if (!(dp & (1UL << (dest_partid - 1)))) {
+ /* not destined for this partition */
+ continue;
+ }
+
+ /* remove this partition from the destinations mask */
+ dp &= ~(1UL << (dest_partid - 1));
+
+
+ /* found a partition to send to */
+
+ ret = xpc_allocate(dest_partid, XPC_NET_CHANNEL,
+ XPC_NOWAIT, (void **)&msg);
+ if (unlikely(ret != xpcSuccess)) {
+ continue;
+ }
+
+ msg->embedded_bytes = embedded_bytes;
+ if (unlikely(embedded_bytes != 0)) {
+ msg->version = XPNET_VERSION_EMBED;
+ dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
+ &msg->data, skb->data, (size_t) embedded_bytes);
+ memcpy(&msg->data, skb->data, (size_t) embedded_bytes);
+ } else {
+ msg->version = XPNET_VERSION;
+ }
+ msg->magic = XPNET_MAGIC;
+ msg->size = end_addr - start_addr;
+ msg->leadin_ignore = (u64) skb->data - start_addr;
+ msg->tailout_ignore = end_addr - (u64) skb->tail;
+ msg->buf_pa = __pa(start_addr);
+
+ dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa="
+ "0x%lx, msg->size=%u, msg->leadin_ignore=%u, "
+ "msg->tailout_ignore=%u\n", dest_partid,
+ XPC_NET_CHANNEL, msg->buf_pa, msg->size,
+ msg->leadin_ignore, msg->tailout_ignore);
+
+
+ atomic_inc(&queued_msg->use_count);
+
+ ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, msg,
+ xpnet_send_completed, queued_msg);
+ if (unlikely(ret != xpcSuccess)) {
+ atomic_dec(&queued_msg->use_count);
+ continue;
+ }
+
+ }
+
+ if (atomic_dec_return(&queued_msg->use_count) == 0) {
+ dev_dbg(xpnet, "no partitions to receive packet destined for "
+ "%d\n", dest_partid);
+
+
+ dev_kfree_skb(skb);
+ kfree(queued_msg);
+ }
+
+ priv->stats.tx_packets++;
+ priv->stats.tx_bytes += skb->len;
+
+ return 0;
+}
+
+
+/*
+ * Deal with transmit timeouts coming from the network layer.
+ */
+static void
+xpnet_dev_tx_timeout (struct net_device *dev)
+{
+ struct xpnet_dev_private *priv;
+
+
+ priv = (struct xpnet_dev_private *) dev->priv;
+
+ priv->stats.tx_errors++;
+ return;
+}
+
+
+static int __init
+xpnet_init(void)
+{
+ int i;
+ u32 license_num;
+ int result = -ENOMEM;
+
+
+ dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
+
+ /*
+ * use ether_setup() to init the majority of our device
+ * structure and then override the necessary pieces.
+ */
+ xpnet_device = alloc_netdev(sizeof(struct xpnet_dev_private),
+ XPNET_DEVICE_NAME, ether_setup);
+ if (xpnet_device == NULL) {
+ return -ENOMEM;
+ }
+
+ netif_carrier_off(xpnet_device);
+
+ xpnet_device->mtu = XPNET_DEF_MTU;
+ xpnet_device->change_mtu = xpnet_dev_change_mtu;
+ xpnet_device->open = xpnet_dev_open;
+ xpnet_device->get_stats = xpnet_dev_get_stats;
+ xpnet_device->stop = xpnet_dev_stop;
+ xpnet_device->hard_start_xmit = xpnet_dev_hard_start_xmit;
+ xpnet_device->tx_timeout = xpnet_dev_tx_timeout;
+ xpnet_device->set_config = xpnet_dev_set_config;
+
+ /*
+ * Multicast assumes the LSB of the first octet is set for multicast
+ * MAC addresses. We chose the first octet of the MAC to be unlikely
+ * to collide with any vendor's officially issued MAC.
+ */
+ xpnet_device->dev_addr[0] = 0xfe;
+ xpnet_device->dev_addr[XPNET_PARTID_OCTET] = sn_local_partid();
+ license_num = sn_partition_serial_number_val();
+ for (i = 3; i >= 0; i--) {
+ xpnet_device->dev_addr[XPNET_LICENSE_OCTET + i] =
+ license_num & 0xff;
+ license_num = license_num >> 8;
+ }
+
+ /*
+ * ether_setup() sets this to a multicast device. We are
+ * really not supporting multicast at this time.
+ */
+ xpnet_device->flags &= ~IFF_MULTICAST;
+
+ /*
+ * No need to checksum as it is a DMA transfer. The BTE will
+ * report an error if the data is not retrievable and the
+ * packet will be dropped.
+ */
+ xpnet_device->features = NETIF_F_NO_CSUM | NETIF_F_HIGHDMA;
+
+ result = register_netdev(xpnet_device);
+ if (result != 0) {
+ free_netdev(xpnet_device);
+ }
+
+ return result;
+}
+module_init(xpnet_init);
+
+
+static void __exit
+xpnet_exit(void)
+{
+ dev_info(xpnet, "unregistering network device %s\n",
+ xpnet_device[0].name);
+
+ unregister_netdev(xpnet_device);
+
+ free_netdev(xpnet_device);
+}
+module_exit(xpnet_exit);
+
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
+MODULE_LICENSE("GPL");
+
^ permalink raw reply [flat|nested] 7+ messages in thread