[PATCH 0/2][SRU Bionic] acpi: apei: Send all PCIe errors to AER driver

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/2][SRU Bionic] acpi: apei: Send all PCIe errors to AER driver

dann frazier-4
BugLink: https://bugs.launchpad.net/bugs/1769730

Clean cherry picks. Tested on a Qualcomm QDF2400 (arm64) and a Dell PowerEdge
R810 (x86).

Tyler Baicar (2):
  ACPI: APEI: handle PCIe AER errors in separate function
  ACPI: APEI: call into AER handling regardless of severity

 drivers/acpi/apei/ghes.c | 76 ++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 30 deletions(-)

--
2.17.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2][SRU Bionic] ACPI: APEI: handle PCIe AER errors in separate function

dann frazier-4
From: Tyler Baicar <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1769730

Move PCIe AER error handling code into a separate function.

Signed-off-by: Tyler Baicar <[hidden email]>
Reviewed-by: Borislav Petkov <[hidden email]>
Signed-off-by: Rafael J. Wysocki <[hidden email]>
(cherry picked from commit 3c5b977f06b754b00a49ee7bf1595491afab7de6)
Signed-off-by: dann frazier <[hidden email]>
---
 drivers/acpi/apei/ghes.c | 64 +++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6402f7fad3bb..f67eb763e950 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -414,6 +414,39 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 #endif
 }
 
+static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev)
+{
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+ struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
+ if (sev == GHES_SEV_RECOVERABLE &&
+    sec_sev == GHES_SEV_RECOVERABLE &&
+    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+ unsigned int devfn;
+ int aer_severity;
+
+ devfn = PCI_DEVFN(pcie_err->device_id.device,
+  pcie_err->device_id.function);
+ aer_severity = cper_severity_to_aer(gdata->error_severity);
+
+ /*
+ * If firmware reset the component to contain
+ * the error, we must reinitialize it before
+ * use, so treat it as a fatal AER error.
+ */
+ if (gdata->flags & CPER_SEC_RESET)
+ aer_severity = AER_FATAL;
+
+ aer_recover_queue(pcie_err->device_id.segment,
+  pcie_err->device_id.bus,
+  devfn, aer_severity,
+  (struct aer_capability_regs *)
+  pcie_err->aer_info);
+ }
+#endif
+}
+
 static void ghes_do_proc(struct ghes *ghes,
  const struct acpi_hest_generic_status *estatus)
 {
@@ -441,38 +474,9 @@ static void ghes_do_proc(struct ghes *ghes,
  arch_apei_report_mem_error(sev, mem_err);
  ghes_handle_memory_failure(gdata, sev);
  }
-#ifdef CONFIG_ACPI_APEI_PCIEAER
  else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
- struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
-
- if (sev == GHES_SEV_RECOVERABLE &&
-    sec_sev == GHES_SEV_RECOVERABLE &&
-    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
-    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
- unsigned int devfn;
- int aer_severity;
-
- devfn = PCI_DEVFN(pcie_err->device_id.device,
-  pcie_err->device_id.function);
- aer_severity = cper_severity_to_aer(gdata->error_severity);
-
- /*
- * If firmware reset the component to contain
- * the error, we must reinitialize it before
- * use, so treat it as a fatal AER error.
- */
- if (gdata->flags & CPER_SEC_RESET)
- aer_severity = AER_FATAL;
-
- aer_recover_queue(pcie_err->device_id.segment,
-  pcie_err->device_id.bus,
-  devfn, aer_severity,
-  (struct aer_capability_regs *)
-  pcie_err->aer_info);
- }
-
+ ghes_handle_aer(gdata, sev, sec_sev);
  }
-#endif
  else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
 
--
2.17.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2][SRU Bionic] ACPI: APEI: call into AER handling regardless of severity

dann frazier-4
In reply to this post by dann frazier-4
From: Tyler Baicar <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1769730

Currently the GHES code only calls into the AER driver for
recoverable type errors. This is incorrect because errors of
other severities do not get logged by the AER driver and do not
get exposed to user space via the AER trace event. So, call
into the AER driver for PCIe errors regardless of the severity

Signed-off-by: Tyler Baicar <[hidden email]>
Reviewed-by: Borislav Petkov <[hidden email]>
Signed-off-by: Rafael J. Wysocki <[hidden email]>
(cherry picked from commit 9852ce9ae213d39a98f161db84b90b047fbdc436)
Signed-off-by: dann frazier <[hidden email]>
---
 drivers/acpi/apei/ghes.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index f67eb763e950..cc65d1992635 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -414,14 +414,26 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 #endif
 }
 
-static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev)
+/*
+ * PCIe AER errors need to be sent to the AER driver for reporting and
+ * recovery. The GHES severities map to the following AER severities and
+ * require the following handling:
+ *
+ * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
+ *     These need to be reported by the AER driver but no recovery is
+ *     necessary.
+ * GHES_SEV_RECOVERABLE -> AER_NONFATAL
+ * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
+ *     These both need to be reported and recovered from by the AER driver.
+ * GHES_SEV_PANIC does not make it to this handling since the kernel must
+ *     panic.
+ */
+static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 {
 #ifdef CONFIG_ACPI_APEI_PCIEAER
  struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
 
- if (sev == GHES_SEV_RECOVERABLE &&
-    sec_sev == GHES_SEV_RECOVERABLE &&
-    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+ if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
     pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
  unsigned int devfn;
  int aer_severity;
@@ -475,7 +487,7 @@ static void ghes_do_proc(struct ghes *ghes,
  ghes_handle_memory_failure(gdata, sev);
  }
  else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
- ghes_handle_aer(gdata, sev, sec_sev);
+ ghes_handle_aer(gdata);
  }
  else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
  struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
--
2.17.0


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK: [PATCH 0/2][SRU Bionic] acpi: apei: Send all PCIe errors to AER driver

Kleber Souza
In reply to this post by dann frazier-4
On 05/08/18 00:07, dann frazier wrote:

> BugLink: https://bugs.launchpad.net/bugs/1769730
>
> Clean cherry picks. Tested on a Qualcomm QDF2400 (arm64) and a Dell PowerEdge
> R810 (x86).
>
> Tyler Baicar (2):
>   ACPI: APEI: handle PCIe AER errors in separate function
>   ACPI: APEI: call into AER handling regardless of severity
>
>  drivers/acpi/apei/ghes.c | 76 ++++++++++++++++++++++++----------------
>  1 file changed, 46 insertions(+), 30 deletions(-)
>

Acked-by: Kleber Sacilotto de Souza <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team