Merge branch 'pci/aer' into next

* pci/aer:
  PCI/AER: Fix aer_probe() kernel-doc comment
  PCI/AER: Cache capability position
  PCI/AER: Avoid memory allocation in interrupt handling path
  ACPI / APEI: Send correct severity to calculate AER severity
  PCI/AER: Remove duplicate AER severity translation
  PCI/AER: Remove aerdriver.forceload kernel parameter
  PCI/AER: Remove aerdriver.nosourceid kernel parameter
  x86/PCI: VMD: Add quirk for AER to ignore source ID
  PCI/AER: Add bus flag to skip source ID matching

Conflicts:
	drivers/pci/probe.c
This commit is contained in:
Bjorn Helgaas 2016-10-03 09:42:57 -05:00
commit 4dc2db096a
10 changed files with 66 additions and 88 deletions

View file

@ -49,25 +49,17 @@ depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
CONFIG_PCIEAER = y.
2.2 Load PCI Express AER Root Driver
There is a case where a system has AER support in BIOS. Enabling the AER
Root driver and having AER support in BIOS may result unpredictable
behavior. To avoid this conflict, a successful load of the AER Root driver
requires ACPI _OSC support in the BIOS to allow the AER Root driver to
request for native control of AER. See the PCI FW 3.0 Specification for
details regarding OSC usage. Currently, lots of firmwares don't provide
_OSC support while they use PCI Express. To support such firmwares,
forceload, a parameter of type bool, could enable AER to continue to
be initiated although firmwares have no _OSC support. To enable the
walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
when booting kernel. Note that forceload=n by default.
nosourceid, another parameter of type bool, can be used when broken
hardware (mostly chipsets) has root ports that cannot obtain the reporting
source ID. nosourceid=n by default.
Some systems have AER support in firmware. Enabling Linux AER support at
the same time the firmware handles AER may result in unpredictable
behavior. Therefore, Linux does not handle AER events unless the firmware
grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0
Specification for details regarding _OSC usage.
2.3 AER error output
When a PCI-E AER error is captured, an error message will be outputted to
console. If it's a correctable error, it is outputted as a warning.
When a PCIe AER error is captured, an error message will be output to
console. If it's a correctable error, it is output as a warning.
Otherwise, it is printed as an error. So users could choose different
log level to filter out correctable error messages.

View file

@ -457,7 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,
devfn = PCI_DEVFN(pcie_err->device_id.device,
pcie_err->device_id.function);
aer_severity = cper_severity_to_aer(sev);
aer_severity = cper_severity_to_aer(gdata->error_severity);
/*
* If firmware reset the component to contain

View file

@ -66,7 +66,7 @@ static int pcie_aer_disable;
void pci_no_aer(void)
{
pcie_aer_disable = 1; /* has priority over 'forceload' */
pcie_aer_disable = 1;
}
bool pci_aer_available(void)
@ -130,7 +130,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
SYSTEM_ERROR_INTR_ON_MESG_MASK);
aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
aer_pos = pdev->aer_cap;
/* Clear error status */
pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
@ -169,7 +169,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
*/
set_downstream_devices_error_reporting(pdev, false);
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
pos = pdev->aer_cap;
/* Disable Root's interrupt in response to error messages */
pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
@ -196,7 +196,7 @@ irqreturn_t aer_irq(int irq, void *context)
unsigned long flags;
int pos;
pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR);
pos = pdev->port->aer_cap;
/*
* Must lock access to Root Error Status Reg, Root Error ID Reg,
* and Root error producer/consumer index
@ -290,7 +290,6 @@ static void aer_remove(struct pcie_device *dev)
/**
* aer_probe - initialize resources
* @dev: pointer to the pcie_dev data structure
* @id: pointer to the service id data structure
*
* Invoked when PCI Express bus loads AER service driver.
*/
@ -300,11 +299,6 @@ static int aer_probe(struct pcie_device *dev)
struct aer_rpc *rpc;
struct device *device = &dev->device;
/* Init */
status = aer_init(dev);
if (status)
return status;
/* Alloc rpc data structure */
rpc = aer_alloc_rpc(dev);
if (!rpc) {
@ -339,7 +333,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
u32 reg32;
int pos;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
/* Disable Root's interrupt in response to error messages */
pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, &reg32);
@ -392,7 +386,7 @@ static void aer_error_resume(struct pci_dev *dev)
pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);
/* Clean AER Root Error Status */
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
if (dev->error_state == pci_channel_io_normal)

View file

@ -60,6 +60,7 @@ struct aer_rpc {
struct pcie_device *rpd; /* Root Port device */
struct work_struct dpc_handler;
struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX];
struct aer_err_info e_info;
unsigned short prod_idx; /* Error Producer Index */
unsigned short cons_idx; /* Error Consumer Index */
int isr;
@ -105,7 +106,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
}
extern struct bus_type pcie_port_bus_type;
int aer_init(struct pcie_device *dev);
void aer_isr(struct work_struct *work);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info);
@ -121,11 +121,4 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev)
return 0;
}
#endif
static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev,
int enable)
{
pci_dev->__aer_firmware_first = !!enable;
pci_dev->__aer_firmware_first_valid = 1;
}
#endif /* _AERDRV_H_ */

View file

@ -27,11 +27,6 @@
#include <linux/kfifo.h>
#include "aerdrv.h"
static bool forceload;
static bool nosourceid;
module_param(forceload, bool, 0);
module_param(nosourceid, bool, 0);
#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
@ -40,7 +35,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
if (pcie_aer_get_firmware_first(dev))
return -EIO;
if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))
if (!dev->aer_cap)
return -EIO;
return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
@ -62,7 +57,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
int pos;
u32 status;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return -EIO;
@ -83,7 +78,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
if (!pci_is_pcie(dev))
return -ENODEV;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return -EIO;
@ -102,6 +97,12 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
return 0;
}
int pci_aer_init(struct pci_dev *dev)
{
dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
return pci_cleanup_aer_error_status_regs(dev);
}
/**
* add_error_device - list device to be handled
* @e_info: pointer to error info
@ -132,7 +133,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
* When bus id is equal to 0, it might be a bad id
* reported by root port.
*/
if (!nosourceid && (PCI_BUS_NUM(e_info->id) != 0)) {
if ((PCI_BUS_NUM(e_info->id) != 0) &&
!(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
/* Device ID match? */
if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
return true;
@ -144,10 +146,10 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
/*
* When either
* 1) nosourceid==y;
* 2) bus id is equal to 0. Some ports might lose the bus
* 1) bus id is equal to 0. Some ports might lose the bus
* id of error source id;
* 3) There are multiple errors and prior id comparing fails;
* 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
* 3) There are multiple errors and prior ID comparing fails;
* We check AER status registers to find possible reporter.
*/
if (atomic_read(&dev->enable_cnt) == 0)
@ -158,7 +160,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
if (!(reg16 & PCI_EXP_AER_FLAGS))
return false;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (!pos)
return false;
@ -555,7 +557,7 @@ static void handle_error_source(struct pcie_device *aerdev,
* Correctable error does not need software intervention.
* No need to go through error recovery process.
*/
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
if (pos)
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
info->status);
@ -647,7 +649,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
info->status = 0;
info->tlp_header_valid = 0;
pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
pos = dev->aer_cap;
/* The device might not support AER */
if (!pos)
@ -715,15 +717,8 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
static void aer_isr_one_error(struct pcie_device *p_device,
struct aer_err_source *e_src)
{
struct aer_err_info *e_info;
/* struct aer_err_info might be big, so we allocate it with slab */
e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
if (!e_info) {
dev_printk(KERN_DEBUG, &p_device->port->dev,
"Can't allocate mem when processing AER errors\n");
return;
}
struct aer_rpc *rpc = get_service_data(p_device);
struct aer_err_info *e_info = &rpc->e_info;
/*
* There is a possibility that both correctable error and
@ -762,8 +757,6 @@ static void aer_isr_one_error(struct pcie_device *p_device,
if (find_source_device(p_device->port, e_info))
aer_process_err_devices(p_device, e_info);
}
kfree(e_info);
}
/**
@ -812,19 +805,3 @@ void aer_isr(struct work_struct *work)
aer_isr_one_error(p_device, &e_src);
mutex_unlock(&rpc->rpc_mutex);
}
/**
* aer_init - provide AER initialization
* @dev: pointer to AER pcie device
*
* Invoked when AER service driver is loaded.
*/
int aer_init(struct pcie_device *dev)
{
if (forceload) {
dev_printk(KERN_DEBUG, &dev->device,
"aerdrv forceload requested.\n");
pcie_aer_force_firmware_first(dev->port, 0);
}
return 0;
}

View file

@ -219,15 +219,13 @@ int cper_severity_to_aer(int cper_severity)
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
void cper_print_aer(struct pci_dev *dev, int cper_severity,
void cper_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer)
{
int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
int layer, agent, status_strs_size, tlp_header_valid = 0;
u32 status, mask;
const char **status_strs;
aer_severity = cper_severity_to_aer(cper_severity);
if (aer_severity == AER_CORRECTABLE) {
status = aer->cor_status;
mask = aer->cor_mask;

View file

@ -1666,10 +1666,11 @@ static void pci_init_capabilities(struct pci_dev *dev)
/* Enable ACS P2P upstream forwarding */
pci_enable_acs(dev);
pci_cleanup_aer_error_status_regs(dev);
/* Precision Time Measurement */
pci_ptm_init(dev);
/* Advanced Error Reporting */
pci_aer_init(dev);
}
/*

View file

@ -4428,3 +4428,20 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev)
}
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap);
/*
* VMD-enabled root ports will change the source ID for all messages
* to the VMD device. Rather than doing device matching with the source
* ID, the AER driver should traverse the child device tree, reading
* AER registers to find the faulting device.
*/
static void quirk_no_aersid(struct pci_dev *pdev)
{
/* VMD Domain */
if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000)
pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);

View file

@ -63,7 +63,7 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
}
#endif
void cper_print_aer(struct pci_dev *dev, int cper_severity,
void cper_print_aer(struct pci_dev *dev, int aer_severity,
struct aer_capability_regs *aer);
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,

View file

@ -187,8 +187,9 @@ enum pci_irq_reroute_variant {
typedef unsigned short __bitwise pci_bus_flags_t;
enum pci_bus_flags {
PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1,
PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4,
};
/* These values come from the PCI Express Spec */
@ -268,6 +269,9 @@ struct pci_dev {
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
u8 revision; /* PCI revision, low byte of class word */
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
#ifdef CONFIG_PCIEAER
u16 aer_cap; /* AER capability offset */
#endif
u8 pcie_cap; /* PCIe capability offset */
u8 msi_cap; /* MSI capability offset */
u8 msix_cap; /* MSI-X capability offset */
@ -1374,9 +1378,11 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
#ifdef CONFIG_PCIEAER
void pci_no_aer(void);
bool pci_aer_available(void);
int pci_aer_init(struct pci_dev *dev);
#else
static inline void pci_no_aer(void) { }
static inline bool pci_aer_available(void) { return false; }
static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
#endif
#ifdef CONFIG_PCIE_ECRC