Merge branch 'apei' into release

This commit is contained in:
Len Brown 2010-08-14 23:55:47 -04:00
commit feb29c5175
8 changed files with 392 additions and 101 deletions

View file

@ -80,7 +80,7 @@ int apei_write_mce(struct mce *m)
rcd.hdr.revision = CPER_RECORD_REV;
rcd.hdr.signature_end = CPER_SIG_END;
rcd.hdr.section_count = 1;
rcd.hdr.error_severity = CPER_SER_FATAL;
rcd.hdr.error_severity = CPER_SEV_FATAL;
/* timestamp, platform_id, partition_id are all invalid */
rcd.hdr.validation_bits = 0;
rcd.hdr.record_length = sizeof(rcd);
@ -96,7 +96,7 @@ int apei_write_mce(struct mce *m)
rcd.sec_hdr.validation_bits = 0;
rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
rcd.sec_hdr.section_severity = CPER_SER_FATAL;
rcd.sec_hdr.section_severity = CPER_SEV_FATAL;
memcpy(&rcd.mce, m, sizeof(*m));

View file

@ -28,3 +28,12 @@ config ACPI_APEI_EINJ
EINJ provides a hardware error injection mechanism, it is
mainly used for debugging and testing the other parts of
APEI and some other RAS features.
config ACPI_APEI_ERST_DEBUG
tristate "APEI Error Record Serialization Table (ERST) Debug Support"
depends on ACPI_APEI
help
ERST is a way provided by APEI to save and retrieve hardware
error infomation to and from a persistent store. Enable this
if you want to debugging and testing the ERST kernel support
and firmware implementation.

View file

@ -1,5 +1,6 @@
obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
apei-y := apei-base.o hest.o cper.o erst.o

View file

@ -482,14 +482,14 @@ err_unmap_ioport:
list_for_each_entry(res, &resources->ioport, list) {
if (res == res_bak)
break;
release_mem_region(res->start, res->end - res->start);
release_region(res->start, res->end - res->start);
}
res_bak = NULL;
err_unmap_iomem:
list_for_each_entry(res, &resources->iomem, list) {
if (res == res_bak)
break;
release_region(res->start, res->end - res->start);
release_mem_region(res->start, res->end - res->start);
}
return -EINVAL;
}

View file

@ -0,0 +1,207 @@
/*
* APEI Error Record Serialization Table debug support
*
* ERST is a way provided by APEI to save and retrieve hardware error
* infomation to and from a persistent store. This file provide the
* debugging/testing support for ERST kernel support and firmware
* implementation.
*
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <acpi/apei.h>
#include <linux/miscdevice.h>
#include "apei-internal.h"
#define ERST_DBG_PFX "ERST DBG: "
#define ERST_DBG_RECORD_LEN_MAX 4096
static void *erst_dbg_buf;
static unsigned int erst_dbg_buf_len;
/* Prevent erst_dbg_read/write from being invoked concurrently */
static DEFINE_MUTEX(erst_dbg_mutex);
static int erst_dbg_open(struct inode *inode, struct file *file)
{
if (erst_disable)
return -ENODEV;
return nonseekable_open(inode, file);
}
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int rc;
u64 record_id;
u32 record_count;
switch (cmd) {
case APEI_ERST_CLEAR_RECORD:
rc = copy_from_user(&record_id, (void __user *)arg,
sizeof(record_id));
if (rc)
return -EFAULT;
return erst_clear(record_id);
case APEI_ERST_GET_RECORD_COUNT:
rc = erst_get_record_count();
if (rc < 0)
return rc;
record_count = rc;
rc = put_user(record_count, (u32 __user *)arg);
if (rc)
return rc;
return 0;
default:
return -ENOTTY;
}
}
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
int rc;
ssize_t len = 0;
u64 id;
if (*off != 0)
return -EINVAL;
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
return -EINTR;
retry_next:
rc = erst_get_next_record_id(&id);
if (rc)
goto out;
/* no more record */
if (id == APEI_ERST_INVALID_RECORD_ID)
goto out;
retry:
rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
/* The record may be cleared by others, try read next record */
if (rc == -ENOENT)
goto retry_next;
if (rc < 0)
goto out;
if (len > ERST_DBG_RECORD_LEN_MAX) {
pr_warning(ERST_DBG_PFX
"Record (ID: 0x%llx) length is too long: %zd\n",
id, len);
rc = -EIO;
goto out;
}
if (len > erst_dbg_buf_len) {
kfree(erst_dbg_buf);
rc = -ENOMEM;
erst_dbg_buf = kmalloc(len, GFP_KERNEL);
if (!erst_dbg_buf)
goto out;
erst_dbg_buf_len = len;
goto retry;
}
rc = -EINVAL;
if (len > usize)
goto out;
rc = -EFAULT;
if (copy_to_user(ubuf, erst_dbg_buf, len))
goto out;
rc = 0;
out:
mutex_unlock(&erst_dbg_mutex);
return rc ? rc : len;
}
static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
size_t usize, loff_t *off)
{
int rc;
struct cper_record_header *rcd;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (usize > ERST_DBG_RECORD_LEN_MAX) {
pr_err(ERST_DBG_PFX "Too long record to be written\n");
return -EINVAL;
}
if (mutex_lock_interruptible(&erst_dbg_mutex))
return -EINTR;
if (usize > erst_dbg_buf_len) {
kfree(erst_dbg_buf);
rc = -ENOMEM;
erst_dbg_buf = kmalloc(usize, GFP_KERNEL);
if (!erst_dbg_buf)
goto out;
erst_dbg_buf_len = usize;
}
rc = copy_from_user(erst_dbg_buf, ubuf, usize);
if (rc) {
rc = -EFAULT;
goto out;
}
rcd = erst_dbg_buf;
rc = -EINVAL;
if (rcd->record_length != usize)
goto out;
rc = erst_write(erst_dbg_buf);
out:
mutex_unlock(&erst_dbg_mutex);
return rc < 0 ? rc : usize;
}
static const struct file_operations erst_dbg_ops = {
.owner = THIS_MODULE,
.open = erst_dbg_open,
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
};
static struct miscdevice erst_dbg_dev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "erst_dbg",
.fops = &erst_dbg_ops,
};
static __init int erst_dbg_init(void)
{
return misc_register(&erst_dbg_dev);
}
static __exit void erst_dbg_exit(void)
{
misc_deregister(&erst_dbg_dev);
kfree(erst_dbg_buf);
}
module_init(erst_dbg_init);
module_exit(erst_dbg_exit);
MODULE_AUTHOR("Huang Ying");
MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support");
MODULE_LICENSE("GPL");

View file

@ -41,6 +41,8 @@
#include <linux/interrupt.h>
#include <linux/cper.h>
#include <linux/kdebug.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <acpi/apei.h>
#include <acpi/atomicio.h>
#include <acpi/hed.h>
@ -87,6 +89,7 @@ struct ghes {
* used for that.
*/
static LIST_HEAD(ghes_sci);
static DEFINE_MUTEX(ghes_list_mutex);
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
@ -132,26 +135,26 @@ static void ghes_fini(struct ghes *ghes)
}
enum {
GHES_SER_NO = 0x0,
GHES_SER_CORRECTED = 0x1,
GHES_SER_RECOVERABLE = 0x2,
GHES_SER_PANIC = 0x3,
GHES_SEV_NO = 0x0,
GHES_SEV_CORRECTED = 0x1,
GHES_SEV_RECOVERABLE = 0x2,
GHES_SEV_PANIC = 0x3,
};
static inline int ghes_severity(int severity)
{
switch (severity) {
case CPER_SER_INFORMATIONAL:
return GHES_SER_NO;
case CPER_SER_CORRECTED:
return GHES_SER_CORRECTED;
case CPER_SER_RECOVERABLE:
return GHES_SER_RECOVERABLE;
case CPER_SER_FATAL:
return GHES_SER_PANIC;
case CPER_SEV_INFORMATIONAL:
return GHES_SEV_NO;
case CPER_SEV_CORRECTED:
return GHES_SEV_CORRECTED;
case CPER_SEV_RECOVERABLE:
return GHES_SEV_RECOVERABLE;
case CPER_SEV_FATAL:
return GHES_SEV_PANIC;
default:
/* Unkown, go panic */
return GHES_SER_PANIC;
return GHES_SEV_PANIC;
}
}
@ -237,16 +240,16 @@ static void ghes_clear_estatus(struct ghes *ghes)
static void ghes_do_proc(struct ghes *ghes)
{
int ser, processed = 0;
int sev, processed = 0;
struct acpi_hest_generic_data *gdata;
ser = ghes_severity(ghes->estatus->error_severity);
sev = ghes_severity(ghes->estatus->error_severity);
apei_estatus_for_each_section(ghes->estatus, gdata) {
#ifdef CONFIG_X86_MCE
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PLATFORM_MEM)) {
apei_mce_report_mem_error(
ser == GHES_SER_CORRECTED,
sev == GHES_SEV_CORRECTED,
(struct cper_sec_mem_err *)(gdata+1));
processed = 1;
}
@ -293,18 +296,15 @@ static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci,
};
static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data)
static int __devinit ghes_probe(struct platform_device *ghes_dev)
{
struct acpi_hest_generic *generic;
struct ghes *ghes = NULL;
int rc = 0;
int rc = -EINVAL;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
return 0;
generic = (struct acpi_hest_generic *)hest_hdr;
generic = ghes_dev->dev.platform_data;
if (!generic->enabled)
return 0;
return -ENODEV;
if (generic->error_block_length <
sizeof(struct acpi_hest_generic_status)) {
@ -327,62 +327,91 @@ static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data)
ghes = NULL;
goto err;
}
switch (generic->notify.type) {
case ACPI_HEST_NOTIFY_POLLED:
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via POLL is not supported!\n",
generic->header.source_id);
break;
case ACPI_HEST_NOTIFY_EXTERNAL:
case ACPI_HEST_NOTIFY_LOCAL:
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via IRQ is not supported!\n",
generic->header.source_id);
break;
case ACPI_HEST_NOTIFY_SCI:
if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
mutex_lock(&ghes_list_mutex);
if (list_empty(&ghes_sci))
register_acpi_hed_notifier(&ghes_notifier_sci);
list_add_rcu(&ghes->list, &ghes_sci);
break;
case ACPI_HEST_NOTIFY_NMI:
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via NMI is not supported!\n",
generic->header.source_id);
break;
default:
pr_warning(FW_WARN GHES_PFX
"Unknown notification type: %u for generic hardware error source: %d\n",
generic->notify.type, generic->header.source_id);
break;
mutex_unlock(&ghes_list_mutex);
} else {
unsigned char *notify = NULL;
switch (generic->notify.type) {
case ACPI_HEST_NOTIFY_POLLED:
notify = "POLL";
break;
case ACPI_HEST_NOTIFY_EXTERNAL:
case ACPI_HEST_NOTIFY_LOCAL:
notify = "IRQ";
break;
case ACPI_HEST_NOTIFY_NMI:
notify = "NMI";
break;
}
if (notify) {
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via %s is not supported!\n",
generic->header.source_id, notify);
} else {
pr_warning(FW_WARN GHES_PFX
"Unknown notification type: %u for generic hardware error source: %d\n",
generic->notify.type, generic->header.source_id);
}
rc = -ENODEV;
goto err;
}
platform_set_drvdata(ghes_dev, ghes);
return 0;
err:
if (ghes)
ghes_fini(ghes);
return rc;
}
static void ghes_cleanup(void)
{
struct ghes *ghes, *nghes;
if (!list_empty(&ghes_sci))
unregister_acpi_hed_notifier(&ghes_notifier_sci);
synchronize_rcu();
list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) {
list_del(&ghes->list);
if (ghes) {
ghes_fini(ghes);
kfree(ghes);
}
return rc;
}
static int __devexit ghes_remove(struct platform_device *ghes_dev)
{
struct ghes *ghes;
struct acpi_hest_generic *generic;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
switch (generic->notify.type) {
case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
if (list_empty(&ghes_sci))
unregister_acpi_hed_notifier(&ghes_notifier_sci);
mutex_unlock(&ghes_list_mutex);
break;
default:
BUG();
break;
}
synchronize_rcu();
ghes_fini(ghes);
kfree(ghes);
platform_set_drvdata(ghes_dev, NULL);
return 0;
}
static struct platform_driver ghes_platform_driver = {
.driver = {
.name = "GHES",
.owner = THIS_MODULE,
},
.probe = ghes_probe,
.remove = ghes_remove,
};
static int __init ghes_init(void)
{
int rc;
if (acpi_disabled)
return -ENODEV;
@ -391,32 +420,12 @@ static int __init ghes_init(void)
return -EINVAL;
}
rc = apei_hest_parse(hest_ghes_parse, NULL);
if (rc) {
pr_err(GHES_PFX
"Error during parsing HEST generic hardware error sources.\n");
goto err_cleanup;
}
if (list_empty(&ghes_sci)) {
pr_info(GHES_PFX
"No functional generic hardware error sources.\n");
rc = -ENODEV;
goto err_cleanup;
}
pr_info(GHES_PFX
"Generic Hardware Error Source support is initialized.\n");
return 0;
err_cleanup:
ghes_cleanup();
return rc;
return platform_driver_register(&ghes_platform_driver);
}
static void __exit ghes_exit(void)
{
ghes_cleanup();
platform_driver_unregister(&ghes_platform_driver);
}
module_init(ghes_init);
@ -425,3 +434,4 @@ module_exit(ghes_exit);
MODULE_AUTHOR("Huang Ying");
MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:GHES");

View file

@ -34,6 +34,7 @@
#include <linux/kdebug.h>
#include <linux/highmem.h>
#include <linux/io.h>
#include <linux/platform_device.h>
#include <acpi/apei.h>
#include "apei-internal.h"
@ -47,11 +48,6 @@ EXPORT_SYMBOL_GPL(hest_disable);
static struct acpi_table_hest *hest_tab;
static int hest_void_parse(struct acpi_hest_header *hest_hdr, void *data)
{
return 0;
}
static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
@ -125,6 +121,69 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
}
EXPORT_SYMBOL_GPL(apei_hest_parse);
struct ghes_arr {
struct platform_device **ghes_devs;
unsigned int count;
};
static int hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
{
int *count = data;
if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
(*count)++;
return 0;
}
static int hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
{
struct acpi_hest_generic *generic;
struct platform_device *ghes_dev;
struct ghes_arr *ghes_arr = data;
int rc;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
return 0;
generic = (struct acpi_hest_generic *)hest_hdr;
if (!generic->enabled)
return 0;
ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
if (!ghes_dev)
return -ENOMEM;
ghes_dev->dev.platform_data = generic;
rc = platform_device_add(ghes_dev);
if (rc)
goto err;
ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev;
return 0;
err:
platform_device_put(ghes_dev);
return rc;
}
static int hest_ghes_dev_register(unsigned int ghes_count)
{
int rc, i;
struct ghes_arr ghes_arr;
ghes_arr.count = 0;
ghes_arr.ghes_devs = kmalloc(sizeof(void *) * ghes_count, GFP_KERNEL);
if (!ghes_arr.ghes_devs)
return -ENOMEM;
rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
if (rc)
goto err;
out:
kfree(ghes_arr.ghes_devs);
return rc;
err:
for (i = 0; i < ghes_arr.count; i++)
platform_device_unregister(ghes_arr.ghes_devs[i]);
goto out;
}
static int __init setup_hest_disable(char *str)
{
hest_disable = 1;
@ -137,6 +196,7 @@ static int __init hest_init(void)
{
acpi_status status;
int rc = -ENODEV;
unsigned int ghes_count = 0;
if (acpi_disabled)
goto err;
@ -158,7 +218,11 @@ static int __init hest_init(void)
goto err;
}
rc = apei_hest_parse(hest_void_parse, NULL);
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc)
goto err;
rc = hest_ghes_dev_register(ghes_count);
if (rc)
goto err;

View file

@ -39,10 +39,10 @@
* Severity difinition for error_severity in struct cper_record_header
* and section_severity in struct cper_section_descriptor
*/
#define CPER_SER_RECOVERABLE 0x0
#define CPER_SER_FATAL 0x1
#define CPER_SER_CORRECTED 0x2
#define CPER_SER_INFORMATIONAL 0x3
#define CPER_SEV_RECOVERABLE 0x0
#define CPER_SEV_FATAL 0x1
#define CPER_SEV_CORRECTED 0x2
#define CPER_SEV_INFORMATIONAL 0x3
/*
* Validation bits difinition for validation_bits in struct