From: Bjorn Helgaas <bjorn_helgaas@hp.com>
To: linux-ia64@vger.kernel.org
Subject: [Linux-ia64] SAL error record logging/decoding
Date: Wed, 07 May 2003 23:41:08 +0000 [thread overview]
Message-ID: <marc-linux-ia64-105590723705660@msgid-missing> (raw)
[-- Attachment #1: Type: text/plain, Size: 2001 bytes --]
The MCA/INIT/CMC/CPE log decoding currently in arch/ia64/kernel/mca.c
has some problems:
- It doesn't know much about OEM-specific sections.
- At boot-time, it sometimes takes so long to print
the log to the console that the BSP erroneously
assumes an AP is stuck. This sometimes causes
*another* MCA.
- The log goes ONLY to the console, where the output
may be lost.
So here's some fodder for discussion. I don't claim that this is ready
for prime time; I just want to get some feedback on whether this
is a reasonable approach.
The attached patch (against 2.4.21-rc1) makes the raw, binary
error records straight from SAL available via files in /proc:
/proc/sal/cpu<n>/{mca,init,cmc,cpe}
If you read the file, you get the raw data. If you write "clear" to
it, you invalidate the current error record (which as I read the spec,
may potentially make another, pending record available to be read).
The idea is that
- An rc script run at boot-time can save all the logs in
files, clearing each afterwards.
- A user-level analysis tool can decode them as needed
(perhaps also run from the same rc script above).
- The user-level analyzer need not be open-source, if
people are worried about IP in the OEM-specific sections.
- A baseline open-source analyzer can provide at least the
functionality available today in the kernel decoder.
So, attached are the kernel patch against 2.4.21-rc1 and a simple
user program ("salinfo") to decode the logs. Note that the kernel
patch removes the SAL clear_state_info calls from mca.c, so the error
records will be preserved until the user program can read them.
This feels like the right thing to me (only a user program
can know that the logs have been saved somewhere safe), but
no doubt there are issues here.
The user-space analyzer is derived from the current kernel code
in mca.c and should produce identical output. For now, I left
all the code in the kernel as well, but ultimately it could be
removed.
Bjorn
[-- Attachment #2: diffs --]
[-- Type: text/x-diff, Size: 6379 bytes --]
===== arch/ia64/kernel/mca.c 1.23 vs edited =====
--- 1.23/arch/ia64/kernel/mca.c Fri Apr 18 04:07:09 2003
+++ edited/arch/ia64/kernel/mca.c Fri May 2 11:24:15 2003
@@ -156,10 +156,6 @@
*/
platform_err = ia64_log_print(sal_info_type, (prfunc_t)printk);
- /* temporary: only clear SAL logs on hardware-corrected errors
- or if we're logging an error after an MCA-initiated reboot */
- if ((sal_info_type > 1) || (called_from_init))
- ia64_sal_clear_state_info(sal_info_type);
return platform_err;
}
@@ -1235,9 +1231,6 @@
proc_ptr = &plog_ptr->proc_err;
ia64_process_min_state_save(&SAL_LPI_PSI_INFO(proc_ptr)->min_state_area);
-
- /* Clear the INIT SAL logs now that they have been saved in the OS buffer */
- ia64_sal_clear_state_info(SAL_INFO_TYPE_INIT);
init_handler_platform(proc_ptr, pt, sw); /* call platform specific routines */
}
===== arch/ia64/kernel/salinfo.c 1.1 vs edited =====
--- 1.1/arch/ia64/kernel/salinfo.c Thu Sep 12 10:43:47 2002
+++ edited/arch/ia64/kernel/salinfo.c Tue May 6 14:53:28 2003
@@ -4,6 +4,8 @@
* Creates entries in /proc/sal for various system features.
*
* Copyright (c) 2001 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) 2003 Hewlett-Packard Co
+ * Bjorn Helgaas <bjorn_helgaas@hp.com>
*
* 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo
* code to create this file
@@ -12,8 +14,10 @@
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
+#include <linux/smp.h>
#include <asm/sal.h>
+#include <asm/uaccess.h>
MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
@@ -40,25 +44,191 @@
#define NR_SALINFO_ENTRIES (sizeof(salinfo_entries)/sizeof(salinfo_entry_t))
-/*
- * One for each feature and one more for the directory entry...
- */
-static struct proc_dir_entry *salinfo_proc_entries[NR_SALINFO_ENTRIES + 1];
+static char *salinfo_log_name[] = {
+ "mca",
+ "init",
+ "cmc",
+ "cpe",
+};
+
+static struct proc_dir_entry *salinfo_proc_entries[
+ ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */
+ (NR_CPUS * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/cpu0/mca */
+ NR_CPUS + /* /proc/sal/cpu0 */
+ 1]; /* /proc/sal */
+
+struct salinfo_log_data {
+ int type;
+ u8 *log_buffer;
+ u64 log_size;
+};
+
+static void
+salinfo_log_read_cpu(void *context)
+{
+ struct salinfo_log_data *info = context;
+ u64 size;
+
+ size = ia64_sal_get_state_info_size(info->type);
+ info->log_buffer = kmalloc(size, GFP_ATOMIC);
+ if (!info->log_buffer)
+ return;
+
+ info->log_size = ia64_sal_get_state_info(info->type, (u64 *) info->log_buffer);
+}
+
+static ssize_t
+salinfo_log_read(struct file *file, char *buffer, size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ struct salinfo_log_data info;
+ int cpu, ret;
+ void *data;
+ size_t size;
+
+ if (!suser())
+ return -EPERM;
+
+ MOD_INC_USE_COUNT;
+
+ cpu = (u64) entry->data >> 16;
+ info.type = (u64) entry->data & 0xffff;
+
+ if (cpu == smp_processor_id())
+ salinfo_log_read_cpu(&info);
+ else {
+#ifdef CONFIG_SMP
+ smp_call_function_single(cpu, salinfo_log_read_cpu, &info, 0, 1);
+#else
+ printk(KERN_ERR "%s: trying to read CPU %d data from %d\n",
+ __FUNCTION__, cpu, smp_processor_id());
+ info.log_buffer = 0;
+#endif
+ }
+
+ if (!info.log_buffer || *ppos >= info.log_size) {
+ ret = 0;
+ goto out;
+ }
+
+ data = info.log_buffer + file->f_pos;
+ size = info.log_size - file->f_pos;
+ if (size > count)
+ size = count;
+
+ if (copy_to_user(buffer, data, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ *ppos += size;
+ ret = size;
+
+out:
+ kfree(info.log_buffer);
+
+ MOD_DEC_USE_COUNT;
+
+ return ret;
+}
+
+static void
+salinfo_log_write_cpu(void *context)
+{
+ u64 type = (u64) context;
+
+ ia64_sal_clear_state_info(type);
+}
+
+static ssize_t
+salinfo_log_write(struct file *file, const char *buffer, size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct proc_dir_entry *entry = (struct proc_dir_entry *) inode->u.generic_ip;
+ char cmd[16];
+ int cpu;
+ u64 type;
+
+ if (!suser())
+ return -EPERM;
+
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+
+ memset(cmd, 0, sizeof(cmd));
+ if (copy_from_user(cmd, buffer, sizeof(cmd)))
+ return -EFAULT;
+
+ if (strncmp(cmd, "clear", 5))
+ return count;
+
+ MOD_INC_USE_COUNT;
+
+ cpu = (u64) entry->data >> 16;
+ type = (u64) entry->data & 0xffff;
+
+ if (cpu == smp_processor_id())
+ salinfo_log_write_cpu((void *) type);
+ else {
+#ifdef CONFIG_SMP
+ smp_call_function_single(cpu, salinfo_log_write_cpu, (void *) type, 0, 1);
+#else
+ printk(KERN_ERR "%s: trying to clear CPU %d data from %d\n",
+ __FUNCTION__, cpu, smp_processor_id());
+#endif
+ }
+
+ MOD_DEC_USE_COUNT;
+
+ return count;
+}
+
+static struct file_operations salinfo_log_fops = {
+ .read = salinfo_log_read,
+ .write = salinfo_log_write,
+};
static int __init
salinfo_init(void)
{
struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
- int i;
+ struct proc_dir_entry *cpu_dir, *entry;
+#define CPUSTR "cpu%d"
+ char name[sizeof(CPUSTR)];
+ int i, j;
salinfo_dir = proc_mkdir("sal", NULL);
+ if (!salinfo_dir)
+ return 0;
for (i=0; i < NR_SALINFO_ENTRIES; i++) {
/* pass the feature bit in question as misc data */
*sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
salinfo_read, (void *)salinfo_entries[i].feature);
}
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+
+ sprintf(name, CPUSTR, i);
+ cpu_dir = proc_mkdir(name, salinfo_dir);
+ if (!cpu_dir)
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(salinfo_log_name); j++) {
+ entry = create_proc_entry(salinfo_log_name[j], 0, cpu_dir);
+ if (entry) {
+ entry->proc_fops = &salinfo_log_fops;
+ entry->data = (void *) ((u64) i << 16 | j);
+ *sdir++ = entry;
+ }
+ }
+ *sdir++ = cpu_dir;
+ }
+
*sdir++ = salinfo_dir;
return 0;
@@ -69,7 +239,7 @@
{
int i = 0;
- for (i = 0; i < NR_SALINFO_ENTRIES ; i++) {
+ for (i = 0; i < ARRAY_SIZE(salinfo_proc_entries); i++) {
if (salinfo_proc_entries[i])
remove_proc_entry (salinfo_proc_entries[i]->name, NULL);
}
[-- Attachment #3: salinfo.tar.gz --]
[-- Type: application/x-tgz, Size: 32027 bytes --]
next reply other threads:[~2003-05-07 23:41 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-05-07 23:41 Bjorn Helgaas [this message]
2003-05-08 0:05 ` [Linux-ia64] SAL error record logging/decoding David Mosberger
2003-05-08 0:13 ` Luck, Tony
2003-05-08 19:32 ` Bjorn Helgaas
2003-05-20 22:58 ` Bjorn Helgaas
2003-05-21 18:06 ` Luck, Tony
2003-05-21 20:48 ` Luck, Tony
2003-05-21 21:51 ` Luck, Tony
2003-05-22 21:29 ` Bjorn Helgaas
2003-05-23 0:24 ` Bjorn Helgaas
2003-05-23 15:42 ` Luck, Tony
2003-05-28 23:26 ` Bjorn Helgaas
2003-05-29 0:07 ` Keith Owens
2003-05-29 1:34 ` Bjorn Helgaas
2003-05-29 1:37 ` Keith Owens
2003-05-29 20:49 ` Luck, Tony
2003-05-29 21:31 ` Bjorn Helgaas
2003-05-29 21:47 ` Luck, Tony
2003-05-29 22:38 ` Bjorn Helgaas
2003-05-29 23:33 ` Luck, Tony
2003-05-30 11:56 ` Matthew Wilcox
2003-05-30 20:27 ` Bjorn Helgaas
2003-05-30 20:31 ` Bjorn Helgaas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-105590723705660@msgid-missing \
--to=bjorn_helgaas@hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox