diff --git a/Documentation/ABI/testing/debugfs-aest b/Documentation/ABI/testing/debugfs-aest new file mode 100644 index 0000000000000..cc41ea7032c72 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-aest @@ -0,0 +1,99 @@ +What: /sys/kernel/debug/aest/./ +Date: Dec 2025 +KernelVersion: 6.19 +Contact: Ruidong Tian +Description: + Directory represented a AEST device, means device type, + like: + + - processor + - memory + - smmu + - ... + + is the unique ID for this device. + +What: /sys/kernel/debug/aest/.//* +Date: Dec 2025 +KernelVersion: 6.19 +Contact: Ruidong Tian +Description: + Attibute for aest node which belong this device, the format + of node name is: - + + See more at: + https://developer.arm.com/documentation/den0085/latest/ + +What: /sys/kernel/debug/aest/.//ce_threshold +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (WO) Write the ce threshold to all records of this node. Failed + if input exceeded the maximum threshold + +What: /sys/kernel/debug/aest/.//err_count +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (RO) Outputs error statistics for all error records of this node. + +What: /sys/kernel/debug/aest/.//record/err_* +Date: Dec 2025 +KernelVersion: 6.19 +Contact: Ruidong Tian +Description: + (RO) Read err_* register and return val. + +What: /sys/kernel/debug/aest/.//record/ce_threshold +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (RW) Read and write the ce threshold to this record. Failed + if input exceeded the maximum threshold + +What: /sys/kernel/debug/aest/.//record/err_count +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (RO) Outputs error statistics for all this records. + +What: /sys/kernel/debug/aest/.//record/inject/err_* +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (RW) These registers are used to simulate soft injection errors + by holding error register values. You can write any values + to them. To trigger the injection, you need to write soft_inject + at last. The validity of the injected error depends on the + value written to err_status. + + Accepts values - any. + +What: /sys/kernel/debug/aest/.//record/inject/soft_inject +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (WO) Write any value to this file to trigger the error + injection. Make sure you have specified all necessary error + parameters, i.e. this write should be the last step when + injecting errors. + + Accepts values - any. + +What: /sys/kernel/debug/aest/.//record/inject/hard_inject +Date: Dec 2025 +KernelVersion 6.19 +Contact: Ruidong Tian +Description: + (WO) If the AEST table provides error injection registers, + you can write to them via this interface. For instance, + values can be written to the ERXPFGCTL register. The post-injection + behavior is then determined by the hardware specification. + + Accepts values - any. diff --git a/Documentation/devicetree/bindings/arm/arm,aest.yaml b/Documentation/devicetree/bindings/arm/arm,aest.yaml new file mode 100644 index 0000000000000..7809a0d382703 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,aest.yaml @@ -0,0 +1,406 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,aest.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm Error Source Table (AEST) + +maintainers: + - Umang Chheda + +description: + The Arm Error Source Table (AEST) describes RAS error sources and their + register interfaces. Each error source exposes one or more error records + through either system registers or a memory-mapped register window, and + may signal errors via interrupts. The top-level node acts as a container + for one or more child nodes, each describing a single AEST error source. + Refer to the Arm AEST specification (DEN0085 / DDI 0587B) for details. + Flag bit constants for use in DT source files are defined in + . + +properties: + compatible: + const: arm,aest + + "#address-cells": + const: 2 + + "#size-cells": + const: 2 + + ranges: true + +required: + - compatible + +additionalProperties: false + +patternProperties: + "^aest-[a-z0-9-]+(@[0-9a-f]+)?$": + type: object + description: + An AEST error source node describing one error source defined by + the Arm AEST specification. + + properties: + compatible: + description: + Identifies the type of AEST error source. Each value corresponds to + a distinct error source class defined by the Arm AEST specification. + arm,aest-proxy represents a proxy error source that forwards errors + from another error source. + enum: + - arm,aest-processor + - arm,aest-memory + - arm,aest-smmu + - arm,aest-gic + - arm,aest-pcie + - arm,aest-vendor + - arm,aest-proxy + + reg: + description: + Register ranges for the error source. Absence of reg implies + system-register access (interface type 0). A single range implies + memory-mapped access (interface type 1). Two ranges imply + single-record memory-mapped access (interface type 2). + minItems: 1 + maxItems: 4 + + reg-names: + description: + Names for the register ranges. The base error-record window is + unnamed (or first entry). Optional named ranges provide access to + the fault-injection, error-group, and interrupt-config register + windows defined by the AEST specification. + minItems: 1 + maxItems: 4 + items: + enum: + - fault-inject + - err-group + - irq-config + + interrupts: + description: Interrupts associated with the error source. + minItems: 1 + maxItems: 2 + + interrupt-names: + description: Names of the interrupts associated with the error source. + minItems: 1 + maxItems: 2 + items: + enum: + - fhi + - eri + + arm,fhi-flags: + description: + Bitmask of flags for the fault-handling interrupt (FHI), as defined + in the AEST node interrupt structure flags field. Constants are + defined in - AEST_IRQ_MODE_LEVEL (0), + AEST_IRQ_MODE_EDGE (1). + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,eri-flags: + description: + Bitmask of flags for the error-recovery interrupt (ERI), as defined + in the AEST node interrupt structure flags field. Constants are + defined in . + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,interface-flags: + description: | + Bitmask of interface flags for the error source, as defined in the + AEST node interface flags field. Constants are defined in + : + AEST_XFACE_SHARED (bit 0) - shared error source, + AEST_XFACE_CLEAR_MISC (bit 1) - clear MISC registers on error, + AEST_XFACE_ERROR_DEVICE (bit 2) - error node device present, + AEST_XFACE_AFFINITY (bit 3) - affinity information valid, + AEST_XFACE_ERROR_GROUP (bit 4) - error group register window present, + AEST_XFACE_FAULT_INJECT (bit 5) - fault injection register window present, + AEST_XFACE_INT_CONFIG (bit 6) - interrupt config register window present. + For system-register interface nodes (no reg property), only + AEST_XFACE_CLEAR_MISC is meaningful; the MMIO window flags + (AEST_XFACE_ERROR_GROUP, AEST_XFACE_FAULT_INJECT, + AEST_XFACE_INT_CONFIG) have no effect without a base address. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,group-format: + description: | + Page-granularity of the error record group register window, which + determines the MMIO mapping size, the number of ERRGSR registers, + and the width of the record-implemented and status-reporting bitmaps. + Constants are defined in : + AEST_GROUP_FORMAT_4K (0) - 4K window, 1 ERRGSR, up to 64 records, + AEST_GROUP_FORMAT_16K (1) - 16K window, 4 ERRGSRs, up to 256 records, + AEST_GROUP_FORMAT_64K (2) - 64K window, 14 ERRGSRs, up to 896 records. + Required for memory-mapped nodes (reg present) where it controls + the ioremap size and ERRGSR layout. For system-register nodes + (no reg property) this property is optional and defaults to + AEST_GROUP_FORMAT_4K. + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2] + + arm,num-records: + description: Number of error records implemented by this error source. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,record-impl: + description: + Bitmap of implemented error records within this error source. Bit N + set to 0 means error record N is implemented and must be polled. + $ref: /schemas/types.yaml#/definitions/uint64-array + + arm,status-reporting: + description: + Bitmap indicating which error records support status reporting via + the ERRGSR register. Bit N set to 1 means record N does not report + through ERRGSR and must be polled explicitly. + $ref: /schemas/types.yaml#/definitions/uint64-array + + arm,addressing-mode: + description: + Bitmap indicating the address type reported in ERR_ADDR for each + error record. Bit N set to 0 means record N reports System Physical + Addresses (SPA); bit N set to 1 means record N reports node-specific + Logical Addresses (LA) that require OS translation to SPA. + $ref: /schemas/types.yaml#/definitions/uint64-array + + arm,processor-flags: + description: + Bitmask indicating the scope of a processor error source, as defined + in the AEST processor node flags field. Constants are defined in + - AEST_PROC_GLOBAL (bit 0), + AEST_PROC_SHARED (bit 1). + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,resource-type: + description: | + Type of processor resource associated with this error source. + Constants are defined in : + AEST_RESOURCE_CACHE (0), + AEST_RESOURCE_TLB (1), + AEST_RESOURCE_GENERIC (2). + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2] + + arm,cache-ref: + description: + Phandle to the cache node associated with this processor error source. + $ref: /schemas/types.yaml#/definitions/phandle + + arm,tlb-level: + description: TLB level identifier for this processor TLB error source. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,resource-ref: + description: + Generic resource reference identifier for this processor error source. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,proximity-domain: + description: + SRAT proximity domain of the memory node associated with this error + source. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,smmu-ref: + description: + Phandle to the SMMU node in the IORT associated with this error + source. + $ref: /schemas/types.yaml#/definitions/phandle + + arm,smmu-subcomponent: + description: + SMMU subcomponent reference identifier for this error source, as + defined in the AEST SMMU node structure. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,gic-type: + description: | + GIC component type for this error source, as defined in the AEST GIC + node structure. Constants are defined in : + AEST_GIC_CPU (0), + AEST_GIC_DISTRIBUTOR (1), + AEST_GIC_REDISTRIBUTOR (2), + AEST_GIC_ITS (3). + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + + arm,gic-instance: + description: + GIC instance identifier for this error source, used to distinguish + multiple instances of the same GIC component type. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,pcie-segment: + description: + PCI segment number of the PCIe root port associated with this error + source, corresponding to the IORT node reference. + $ref: /schemas/types.yaml#/definitions/uint32 + + arm,vendor-hid: + description: + 8-character ACPI Hardware ID string identifying the vendor error + source, as defined in the AEST vendor node structure. + $ref: /schemas/types.yaml#/definitions/string + + arm,vendor-uid: + description: + ACPI unique instance identifier for this vendor error source, used + to distinguish multiple instances with the same hardware ID. + $ref: /schemas/types.yaml#/definitions/uint32 + + required: + - compatible + - arm,num-records + + allOf: + - if: + required: + - reg + then: + required: + - arm,group-format + - if: + properties: + compatible: + contains: + const: arm,aest-processor + then: + properties: + arm,processor-flags: {} + arm,resource-type: {} + arm,cache-ref: {} + arm,tlb-level: {} + arm,resource-ref: {} + else: + properties: + arm,processor-flags: false + arm,resource-type: false + arm,cache-ref: false + arm,tlb-level: false + arm,resource-ref: false + + - if: + properties: + compatible: + contains: + const: arm,aest-memory + then: + required: + - arm,proximity-domain + properties: + arm,proximity-domain: {} + else: + properties: + arm,proximity-domain: false + + - if: + properties: + compatible: + contains: + const: arm,aest-smmu + then: + required: + - arm,smmu-ref + properties: + arm,smmu-ref: {} + arm,smmu-subcomponent: {} + else: + properties: + arm,smmu-ref: false + arm,smmu-subcomponent: false + + - if: + properties: + compatible: + contains: + const: arm,aest-gic + then: + properties: + arm,gic-type: {} + arm,gic-instance: {} + else: + properties: + arm,gic-type: false + arm,gic-instance: false + + - if: + properties: + compatible: + contains: + const: arm,aest-pcie + then: + required: + - arm,pcie-segment + properties: + arm,pcie-segment: {} + else: + properties: + arm,pcie-segment: false + + - if: + properties: + compatible: + contains: + const: arm,aest-vendor + then: + required: + - arm,vendor-hid + properties: + arm,vendor-hid: {} + arm,vendor-uid: {} + else: + properties: + arm,vendor-hid: false + arm,vendor-uid: false + + unevaluatedProperties: false + +examples: + - | + #include + #include + + aest { + compatible = "arm,aest"; + #address-cells = <2>; + #size-cells = <2>; + + /* System-register based processor error source (no reg property) */ + aest-processor-0 { + compatible = "arm,aest-processor"; + arm,num-records = <2>; + arm,record-impl = /bits/ 64 <0x3>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + arm,resource-type = ; + interrupts = ; + interrupt-names = "fhi"; + }; + + /* Memory-mapped memory controller error source */ + aest-memory-0@50010000 { + compatible = "arm,aest-memory"; + reg = <0x0 0x50010000 0x0 0x1000>, + <0x0 0x50011000 0x0 0x1000>, + <0x0 0x50012000 0x0 0x1000>; + reg-names = "err-group", "fault-inject", "irq-config"; + arm,group-format = ; + arm,num-records = <4>; + arm,record-impl = /bits/ 64 <0xf>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,interface-flags = ; + arm,proximity-domain = <0>; + interrupts = , + ; + interrupt-names = "fhi", "eri"; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index f35e1769fa729..d8993970d0f4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -333,6 +333,17 @@ S: Maintained F: drivers/acpi/arm64 F: include/linux/acpi_iort.h +ACPI AEST +M: Ruidong Tian +L: linux-acpi@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +S: Supported +F: Documentation/ABI/testing/debugfs-aest +F: arch/arm64/include/asm/ras.h +F: drivers/acpi/arm64/aest.c +F: drivers/ras/aest/ +F: include/linux/acpi_aest.h + ACPI FOR RISC-V (ACPI/riscv) M: Sunil V L L: linux-acpi@vger.kernel.org diff --git a/arch/arm64/boot/dts/qcom/lemans.dtsi b/arch/arm64/boot/dts/qcom/lemans.dtsi index fe6e763518230..199ea1f9a8d58 100644 --- a/arch/arm64/boot/dts/qcom/lemans.dtsi +++ b/arch/arm64/boot/dts/qcom/lemans.dtsi @@ -4,6 +4,7 @@ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include #include #include @@ -29,6 +30,46 @@ #address-cells = <2>; #size-cells = <2>; + aest { + compatible = "arm,aest"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + aest-processor-0 { + compatible = "arm,aest-processor"; + arm,num-records = <1>; + arm,record-impl = /bits/ 64 <0x0>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + + aest-l3-cluster0 { + compatible = "arm,aest-processor"; + arm,num-records = <2>; + arm,record-impl = /bits/ 64 <0x1>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + + aest-l3-cluster1 { + compatible = "arm,aest-processor"; + arm,num-records = <2>; + arm,record-impl = /bits/ 64 <0x1>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + }; + clocks { xo_board_clk: xo-board-clk { compatible = "fixed-clock"; diff --git a/arch/arm64/boot/dts/qcom/monaco.dtsi b/arch/arm64/boot/dts/qcom/monaco.dtsi index 7b1d57460f1e6..8e43ceed7d84a 100644 --- a/arch/arm64/boot/dts/qcom/monaco.dtsi +++ b/arch/arm64/boot/dts/qcom/monaco.dtsi @@ -3,6 +3,7 @@ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ +#include #include #include #include @@ -29,6 +30,46 @@ #address-cells = <2>; #size-cells = <2>; + aest { + compatible = "arm,aest"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + aest-processor-0 { + compatible = "arm,aest-processor"; + arm,num-records = <1>; + arm,record-impl = /bits/ 64 <0x0>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + + aest-l3-cluster0 { + compatible = "arm,aest-processor"; + arm,num-records = <2>; + arm,record-impl = /bits/ 64 <0x1>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + + aest-l3-cluster1 { + compatible = "arm,aest-processor"; + arm,num-records = <2>; + arm,record-impl = /bits/ 64 <0x1>; + arm,status-reporting = /bits/ 64 <0x0>; + arm,addressing-mode = /bits/ 64 <0x0>; + arm,processor-flags = ; + interrupts = ; + interrupt-names = "fhi"; + }; + }; + clocks { xo_board_clk: xo-board-clk { compatible = "fixed-clock"; diff --git a/arch/arm64/include/asm/arm-cmn.h b/arch/arm64/include/asm/arm-cmn.h new file mode 100644 index 0000000000000..1b9f506797944 --- /dev/null +++ b/arch/arm64/include/asm/arm-cmn.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015 ARM Ltd. + */ +#ifndef __ASM_ARM_CMN_H +#define __ASM_ARM_CMN_H + +#include + +/* Common register stuff */ +#define CMN_NODE_INFO 0x0000 +#define CMN_NI_NODE_TYPE GENMASK_ULL(15, 0) +#define CMN_NI_NODE_ID GENMASK_ULL(31, 16) +#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) + +enum cmn_node_type { + CMN_TYPE_INVALID, + CMN_TYPE_DVM, + CMN_TYPE_CFG, + CMN_TYPE_DTC, + CMN_TYPE_HNI, + CMN_TYPE_HNF, + CMN_TYPE_XP, + CMN_TYPE_SBSX, + CMN_TYPE_MPAM_S, + CMN_TYPE_MPAM_NS, + CMN_TYPE_RNI, + CMN_TYPE_RND = 0xd, + CMN_TYPE_RNSAM = 0xf, + CMN_TYPE_MTSX, + CMN_TYPE_HNP, + CMN_TYPE_CXRA = 0x100, + CMN_TYPE_CXHA, + CMN_TYPE_CXLA, + CMN_TYPE_CCRA, + CMN_TYPE_CCHA, + CMN_TYPE_CCLA, + CMN_TYPE_CCLA_RNI, + CMN_TYPE_HNS = 0x200, + CMN_TYPE_HNS_MPAM_S, + CMN_TYPE_HNS_MPAM_NS, + CMN_TYPE_APB = 0x1000, + /* Not a real node type */ + CMN_TYPE_WP = 0x7770 +}; + +#endif /* __ASM_ARM_CMN_H */ diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h new file mode 100644 index 0000000000000..02cf15278d9ff --- /dev/null +++ b/arch/arm64/include/asm/ras.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_RAS_H +#define __ASM_RAS_H + +#include +#include + +/* ERRFR */ +#define ERR_FR_CE GENMASK_ULL(54, 53) +#define ERR_FR_RP BIT(15) +#define ERR_FR_CEC GENMASK_ULL(14, 12) + +#define ERR_FR_RP_SINGLE_COUNTER 0 +#define ERR_FR_RP_DOUBLE_COUNTER 1 + +#define ERR_FR_CEC_0B_COUNTER 0 +#define ERR_FR_CEC_8B_COUNTER BIT(1) +#define ERR_FR_CEC_16B_COUNTER BIT(2) + +/* ERRMISC0 */ + +/* ERRFR.CEC == 0b010, ERRFR.RP == 0 */ +#define ERR_MISC0_8B_OF BIT(39) +#define ERR_MISC0_8B_CEC GENMASK_ULL(38, 32) + +/* ERRFR.CEC == 0b100, ERRFR.RP == 0 */ +#define ERR_MISC0_16B_OF BIT(47) +#define ERR_MISC0_16B_CEC GENMASK_ULL(46, 32) + +#define ERR_MISC0_CEC_SHIFT 31 + +#define ERR_8B_CEC_MAX (ERR_MISC0_8B_CEC >> ERR_MISC0_CEC_SHIFT) +#define ERR_16B_CEC_MAX (ERR_MISC0_16B_CEC >> ERR_MISC0_CEC_SHIFT) + +/* ERRFR.CEC == 0b100, ERRFR.RP == 1 */ +#define ERR_MISC0_16B_OFO BIT(63) +#define ERR_MISC0_16B_CECO GENMASK_ULL(62, 48) +#define ERR_MISC0_16B_OFR BIT(47) +#define ERR_MISC0_16B_CECR GENMASK_ULL(46, 32) + +/* ERRSTATUS */ +#define ERR_STATUS_AV BIT(31) +#define ERR_STATUS_V BIT(30) +#define ERR_STATUS_UE BIT(29) +#define ERR_STATUS_ER BIT(28) +#define ERR_STATUS_OF BIT(27) +#define ERR_STATUS_MV BIT(26) +#define ERR_STATUS_CE (BIT(25) | BIT(24)) +#define ERR_STATUS_DE BIT(23) +#define ERR_STATUS_PN BIT(22) +#define ERR_STATUS_UET (BIT(21) | BIT(20)) +#define ERR_STATUS_CI BIT(19) +#define ERR_STATUS_IERR GENMASK_ULL(15, 8) +#define ERR_STATUS_SERR GENMASK_ULL(7, 0) + +/* Theses bits are write-one-to-clear */ +#define ERR_STATUS_W1TC \ + (ERR_STATUS_AV | ERR_STATUS_V | ERR_STATUS_UE | ERR_STATUS_ER | \ + ERR_STATUS_OF | ERR_STATUS_MV | ERR_STATUS_CE | ERR_STATUS_DE | \ + ERR_STATUS_PN | ERR_STATUS_UET | ERR_STATUS_CI) + +#define ERR_STATUS_UET_UC 0 +#define ERR_STATUS_UET_UEU 1 +#define ERR_STATUS_UET_UEO 2 +#define ERR_STATUS_UET_UER 3 + +/* ERRADDR */ +#define ERR_ADDR_AI BIT(61) +#define ERR_ADDR_PADDR GENMASK_ULL(55, 0) + +/* ERRCTLR */ +#define ERR_CTLR_CFI BIT(8) +#define ERR_CTLR_FI BIT(3) +#define ERR_CTLR_UI BIT(2) + +/* ERRDEVARCH */ +#define ERRDEVARCH_REV GENMASK(19, 16) + +enum ras_ce_threshold { + RAS_CE_THRESHOLD_0B, + RAS_CE_THRESHOLD_8B, + RAS_CE_THRESHOLD_16B, + RAS_CE_THRESHOLD_32B, + UNKNOWN, +}; + +struct ras_ext_regs { + u64 err_fr; + u64 err_ctlr; + u64 err_status; + u64 err_addr; + u64 err_misc[4]; +}; + +#endif /* __ASM_RAS_H */ diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index f2fd79f22e7d8..52df190356c82 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -24,3 +24,14 @@ config ACPI_APMT config ACPI_MPAM bool + +config ACPI_AEST + bool "ARM Error Source Table Support" + depends on ARM64_RAS_EXTN + + help + The Arm Error Source Table (AEST) provides details on ACPI + extensions that enable kernel-first handling of errors in a + system that supports the Armv8 RAS extensions. + + If set, the kernel will report and log hardware errors. diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 9390b57cb5648..bad77fdbf8dd0 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_MPAM) += mpam.o obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o obj-$(CONFIG_ARM_AMBA) += amba.o +obj-$(CONFIG_ACPI_AEST) += aest.o obj-y += dma.o init.o obj-y += thermal_cpufreq.o diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c new file mode 100644 index 0000000000000..b8359b95f40f9 --- /dev/null +++ b/drivers/acpi/arm64/aest.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Error Source Table Support + * + * Copyright (c) 2025, Alibaba Group. + */ + +#include +#include +#include + +#include "init.h" + +#undef pr_fmt +#define pr_fmt(fmt) "ACPI AEST: " fmt + +static struct xarray *aest_array; + +static void __init aest_init_interface(struct acpi_aest_hdr *hdr, + struct acpi_aest_node *node) +{ + struct acpi_aest_node_interface_header *interface; + + interface = ACPI_ADD_PTR(struct acpi_aest_node_interface_header, hdr, + hdr->node_interface_offset); + + node->type = hdr->type; + node->interface_hdr = interface; + + switch (interface->group_format) { + case ACPI_AEST_NODE_GROUP_FORMAT_4K: { + struct acpi_aest_node_interface_4k *interface_4k = + (struct acpi_aest_node_interface_4k *)(interface + 1); + + node->common = &interface_4k->common; + node->record_implemented = + (unsigned long *)&interface_4k->error_record_implemented; + node->status_reporting = + (unsigned long *)&interface_4k->error_status_reporting; + node->addressing_mode = + (unsigned long *)&interface_4k->addressing_mode; + break; + } + case ACPI_AEST_NODE_GROUP_FORMAT_16K: { + struct acpi_aest_node_interface_16k *interface_16k = + (struct acpi_aest_node_interface_16k *)(interface + 1); + + node->common = &interface_16k->common; + node->record_implemented = + (unsigned long *)interface_16k->error_record_implemented; + node->status_reporting = + (unsigned long *)interface_16k->error_status_reporting; + node->addressing_mode = + (unsigned long *)interface_16k->addressing_mode; + break; + } + case ACPI_AEST_NODE_GROUP_FORMAT_64K: { + struct acpi_aest_node_interface_64k *interface_64k = + (struct acpi_aest_node_interface_64k *)(interface + 1); + + node->common = &interface_64k->common; + node->record_implemented = + (unsigned long *)interface_64k->error_record_implemented; + node->status_reporting = + (unsigned long *)interface_64k->error_status_reporting; + node->addressing_mode = + (unsigned long *)interface_64k->addressing_mode; + break; + } + default: + pr_err("invalid group format: %d\n", interface->group_format); + } + + node->interrupt = ACPI_ADD_PTR(struct acpi_aest_node_interrupt_v2, hdr, + hdr->node_interrupt_offset); + + node->interrupt_count = hdr->node_interrupt_count; +} + +static struct aest_hnode *__init +acpi_aest_alloc_ahnode(struct acpi_aest_node *node, u64 error_device_id) +{ + struct aest_hnode *ahnode __free(kfree) = NULL; + + ahnode = kzalloc(sizeof(*ahnode), GFP_KERNEL); + if (!ahnode) + return NULL; + + INIT_LIST_HEAD(&ahnode->list); + ahnode->id = error_device_id; + ahnode->count = 0; + ahnode->type = node->type; + + return_ptr(ahnode); +} +static int __init acpi_aest_init_node(struct acpi_aest_hdr *aest_hdr) +{ + struct aest_hnode *ahnode; + u64 error_device_id; + struct acpi_aest_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->spec_pointer = + ACPI_ADD_PTR(void, aest_hdr, aest_hdr->node_specific_offset); + if (aest_hdr->type == ACPI_AEST_PROCESSOR_ERROR_NODE) + node->processor_spec_pointer = + ACPI_ADD_PTR(void, node->spec_pointer, + sizeof(struct acpi_aest_processor)); + + aest_init_interface(aest_hdr, node); + + if (node->interrupt_count <= 0) + return -EINVAL; + + error_device_id = node->interrupt[0].gsiv; + ahnode = xa_load(aest_array, error_device_id); + if (!ahnode) { + ahnode = acpi_aest_alloc_ahnode(node, error_device_id); + if (!ahnode) + return -ENOMEM; + xa_store(aest_array, error_device_id, ahnode, GFP_KERNEL); + } + + list_add_tail(&node->list, &ahnode->list); + ahnode->count++; + + return 0; +} + +static int __init acpi_aest_init_nodes(struct acpi_table_header *aest_table) +{ + struct acpi_aest_hdr *aest_node, *aest_end; + struct acpi_table_aest *aest; + int rc; + + aest = (struct acpi_table_aest *)aest_table; + aest_node = ACPI_ADD_PTR(struct acpi_aest_hdr, aest, + sizeof(struct acpi_table_header)); + aest_end = ACPI_ADD_PTR(struct acpi_aest_hdr, aest, aest_table->length); + + while (aest_node < aest_end) { + if (((u64)aest_node + aest_node->length) > (u64)aest_end) { + pr_warn(FW_WARN + "AEST node pointer overflow, bad table.\n"); + return -EINVAL; + } + + rc = acpi_aest_init_node(aest_node); + if (rc) + return rc; + + aest_node = ACPI_ADD_PTR(struct acpi_aest_hdr, aest_node, + aest_node->length); + } + + return 0; +} + +static int acpi_aest_parse_irqs(struct platform_device *pdev, + struct acpi_aest_node *anode, + struct resource *res, int *res_idx, int irqs[2]) +{ + int i; + struct acpi_aest_node_interrupt_v2 *interrupt; + int trigger, irq; + + for (i = 0; i < anode->interrupt_count; i++) { + interrupt = &anode->interrupt[i]; + if (irqs[interrupt->type]) + continue; + + trigger = (interrupt->flags & AEST_INTERRUPT_MODE) ? + ACPI_LEVEL_SENSITIVE : + ACPI_EDGE_SENSITIVE; + + irq = acpi_register_gsi(&pdev->dev, interrupt->gsiv, trigger, + ACPI_ACTIVE_HIGH); + if (irq <= 0) { + pr_err("failed to map AEST GSI %d\n", interrupt->gsiv); + return irq; + } + + res[*res_idx].start = irq; + res[*res_idx].end = irq; + res[*res_idx].flags = IORESOURCE_IRQ; + res[*res_idx].name = interrupt->type ? AEST_ERI_NAME : + AEST_FHI_NAME; + + (*res_idx)++; + + irqs[interrupt->type] = irq; + } + + return 0; +} + +DEFINE_FREE(res, struct resource *, if (_T) kfree(_T)) +static struct platform_device *__init +acpi_aest_alloc_pdev(struct aest_hnode *ahnode, int index) +{ + struct platform_device *pdev __free(platform_device_put) = + platform_device_alloc("AEST", index++); + struct resource *res __free(res); + struct acpi_aest_node *anode; + int ret, size, j, irq[AEST_MAX_INTERRUPT_PER_NODE] = { 0 }; + + if (!pdev) + return ERR_PTR(-ENOMEM); + + res = kcalloc(ahnode->count + AEST_MAX_INTERRUPT_PER_NODE, sizeof(*res), + GFP_KERNEL); + if (!res) + return ERR_PTR(-ENOMEM); + + j = 0; + list_for_each_entry(anode, &ahnode->list, list) { + if (anode->interface_hdr->type != + ACPI_AEST_NODE_SYSTEM_REGISTER) { + res[j].name = AEST_NODE_NAME; + res[j].start = anode->interface_hdr->address; + switch (anode->interface_hdr->group_format) { + case ACPI_AEST_NODE_GROUP_FORMAT_4K: + size = 4 * KB; + break; + case ACPI_AEST_NODE_GROUP_FORMAT_16K: + size = 16 * KB; + break; + case ACPI_AEST_NODE_GROUP_FORMAT_64K: + size = 64 * KB; + break; + default: + size = 4 * KB; + } + res[j].end = res[j].start + size - 1; + res[j].flags = IORESOURCE_MEM; + } + + ret = acpi_aest_parse_irqs(pdev, anode, res, &j, irq); + if (ret) + return ERR_PTR(ret); + } + + ret = platform_device_add_resources(pdev, res, j); + if (ret) + return ERR_PTR(ret); + + ret = platform_device_add_data(pdev, &ahnode, sizeof(ahnode)); + if (ret) + return ERR_PTR(ret); + + ret = platform_device_add(pdev); + if (ret) + return ERR_PTR(ret); + + return_ptr(pdev); +} +static int __init acpi_aest_alloc_pdevs(void) +{ + int ret = 0, index = 0; + struct aest_hnode *ahnode = NULL; + unsigned long i; + + xa_for_each(aest_array, i, ahnode) { + struct platform_device *pdev = + acpi_aest_alloc_pdev(ahnode, index++); + + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + break; + } + } + + return ret; +} + +static int __init acpi_aest_init(void) +{ + int ret; + + if (acpi_disabled) + return 0; + + struct acpi_table_header *aest_table __free(acpi_put_table) = + acpi_get_table_pointer(ACPI_SIG_AEST, 0); + if (IS_ERR(aest_table)) + return 0; + + aest_array = kzalloc(sizeof(struct xarray), GFP_KERNEL); + if (!aest_array) + return -ENOMEM; + + xa_init(aest_array); + + ret = acpi_aest_init_nodes(aest_table); + if (ret) { + pr_err("Failed init aest node %d\n", ret); + return ret; + } + + ret = acpi_aest_alloc_pdevs(); + if (ret) { + pr_err("Failed alloc pdev %d\n", ret); + return ret; + } + + return 0; +} +subsys_initcall_sync(acpi_aest_init); diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index f5305c8fdca43..4d0702f16a0ff 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2,6 +2,7 @@ // Copyright (C) 2016-2020 Arm Limited // ARM CMN/CI interconnect PMU driver +#include #include #include #include @@ -19,11 +20,6 @@ #include /* Common register stuff */ -#define CMN_NODE_INFO 0x0000 -#define CMN_NI_NODE_TYPE GENMASK_ULL(15, 0) -#define CMN_NI_NODE_ID GENMASK_ULL(31, 16) -#define CMN_NI_LOGICAL_ID GENMASK_ULL(47, 32) - #define CMN_CHILD_INFO 0x0080 #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) @@ -242,37 +238,6 @@ enum cmn_revision { REV_CI700_R2P0, }; -enum cmn_node_type { - CMN_TYPE_INVALID, - CMN_TYPE_DVM, - CMN_TYPE_CFG, - CMN_TYPE_DTC, - CMN_TYPE_HNI, - CMN_TYPE_HNF, - CMN_TYPE_XP, - CMN_TYPE_SBSX, - CMN_TYPE_MPAM_S, - CMN_TYPE_MPAM_NS, - CMN_TYPE_RNI, - CMN_TYPE_RND = 0xd, - CMN_TYPE_RNSAM = 0xf, - CMN_TYPE_MTSX, - CMN_TYPE_HNP, - CMN_TYPE_CXRA = 0x100, - CMN_TYPE_CXHA, - CMN_TYPE_CXLA, - CMN_TYPE_CCRA, - CMN_TYPE_CCHA, - CMN_TYPE_CCLA, - CMN_TYPE_CCLA_RNI, - CMN_TYPE_HNS = 0x200, - CMN_TYPE_HNS_MPAM_S, - CMN_TYPE_HNS_MPAM_NS, - CMN_TYPE_APB = 0x1000, - /* Not a real node type */ - CMN_TYPE_WP = 0x7770 -}; - enum cmn_filter_select { SEL_NONE = -1, SEL_OCCUP1ID, diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index fc4f4bb94a4c6..61a2a05d9c949 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -33,6 +33,7 @@ if RAS source "arch/x86/ras/Kconfig" source "drivers/ras/amd/atl/Kconfig" +source "drivers/ras/aest/Kconfig" config RAS_FMPM tristate "FRU Memory Poison Manager" diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 11f95d59d3972..72411ee9deafd 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_RAS_CEC) += cec.o obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o obj-y += amd/atl/ +obj-y += aest/ diff --git a/drivers/ras/aest/Kconfig b/drivers/ras/aest/Kconfig new file mode 100644 index 0000000000000..ca034255faddf --- /dev/null +++ b/drivers/ras/aest/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# ARM Error Source Table Support +# +# Copyright (c) 2025, Alibaba Group. +# + +config AEST + tristate "ARM AEST Driver" + depends on ACPI_AEST || OF_AEST + depends on RAS + help + The Arm Error Source Table (AEST) provides details on ACPI + extensions that enable kernel-first handling of errors in a + system that supports the Armv8 RAS extensions. + + If set, the kernel will report and log hardware errors. + +config OF_AEST + bool "ARM Error Source Table DT Support" + depends on ARM64_RAS_EXTN && OF + help + Enable support for discovering ARM RAS error sources using the + Device Tree based Arm Error Source Table (AEST) specification. + This allows the kernel to enumerate and manage hardware error + reporting blocks described in firmware for ARMv8 and later + systems. Select this option if your platform describes AEST + nodes in Device Tree and relies on RAS error handling. diff --git a/drivers/ras/aest/Makefile b/drivers/ras/aest/Makefile new file mode 100644 index 0000000000000..2997952901c05 --- /dev/null +++ b/drivers/ras/aest/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_AEST) += aest.o + +aest-y := aest-core.o +aest-y += aest-sysfs.o +aest-y += aest-inject.o +aest-y += aest-cmn.o + +obj-$(CONFIG_OF_AEST) += aest-of.o diff --git a/drivers/ras/aest/aest-cmn.c b/drivers/ras/aest/aest-cmn.c new file mode 100644 index 0000000000000..ad82ed163a8c5 --- /dev/null +++ b/drivers/ras/aest/aest-cmn.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Error Source Table CMN700 Support + * + * Copyright (c) 2025, Alibaba Inc + */ + +#include + +#include "aest.h" + +/* + * CMN include 5 device types, each device type has an error group register set + * which contains a set of error records. The struct aest_cmn_700 represents + * one CMN Instance, and the struct aest_cmn_700_child represent one CMN device. + * The error record of CMN use memory-mapped single error record view, so one + * record is correspond to one AEST node, it means there will be hundreds of + * AEST node of CMN. As described in chapters 2.6.3.4 of Arm ACPI Spec[1], we + * use vendor define data to recognize the device type of an AEST node. So AEST + * driver can enumerate all CMN AEST node to initialize struct aest_cmn_700 and + * aest_cmn_700_child with HID, UID and other CMN info described in AEST or CMN + * register. + * + * Each CMN Instance has their own error interrupt and the struct aest_cmn_700 + * is passed to interrupt context. OS check error group register set to locate + * record which report error. All procedure is similar with chapters 3.8 in + * Arm CMN Spec[2]. + * + * The CMN RAS architecture is showed as follow: + * + * +----+ + * -->|XP | ...... + * | +----+ + * | + * | +----+ ...... + * | |HNI | +----------------+ + * | +----+ ->|record/AEST node| + * | | +----------------+ + * +------------+ | +----+ | . + * |CMN Instance|--| |HNF |---| . + * +------------+ | +----+ | . + * | | +----------------+ + * | +----+ ->|record/AEST node| + * | |SBSX| +----------------+ + * | +----+ ...... + * | + * | +----+ + * -->|CCG | ...... + * +----+ + * + * [1]: https://developer.arm.com/documentation/den0093/latest + * [2]: https://developer.arm.com/documentation/102308/latest + */ + +#define CMN_RAS_DEV_NUM 6 +#define CMN700_ERRGSR_NUM 8 +#define CMN_MAX_UID 8 +#define CMN_ERRDEVARCH 0x3FB8 +#define CMN_ERRDEVARCH_REV GENMASK(19, 16) +#define CMN_ERRGSR_OFFSET 0x3000 + +struct cmn_vendor_data { + int node_type; + int node_id; + int logic_id; +}; + +struct cmn_config { + int errgsr_num; + int dev_num; + int ras_ver; + const int *node_id_map; + const char *const *node_name; + int (*errgsr_mapping)(int errgsr_bit); + u64 (*errgsr_offset)(u64 hnd_ofset, int node_idx); +}; + +static const char *const cmn700_node_name[] = { + [CMN_TYPE_HNI] = "HNI", [CMN_TYPE_HNF] = "HNF", + [CMN_TYPE_XP] = "XP", [CMN_TYPE_SBSX] = "SBSX", + [CMN_TYPE_CXRA] = "RND", [CMN_TYPE_MTSX] = "MTSX", +}; + +static const int cmn700_node_id_map[] = { + [CMN_TYPE_HNI] = 1, [CMN_TYPE_HNF] = 2, [CMN_TYPE_XP] = 0, + [CMN_TYPE_SBSX] = 3, [CMN_TYPE_CXRA] = 4, [CMN_TYPE_MTSX] = 5, +}; + +static u64 cmn_dev_array[CMN_MAX_UID]; +static struct cmn_config *cmn_config; + +static u64 cmn700_errgsr_offset(u64 hnd_offset, int node_idx) +{ + return hnd_offset + CMN_ERRGSR_OFFSET + + (node_idx * 2) * CMN700_ERRGSR_NUM * 8; +} + +static struct cmn_config cmn700_config = { + .errgsr_num = CMN700_ERRGSR_NUM, + .dev_num = CMN_RAS_DEV_NUM, + .ras_ver = 1, + .node_name = cmn700_node_name, + .node_id_map = cmn700_node_id_map, + .errgsr_mapping = cmn700_errgsr_mapping, + .errgsr_offset = cmn700_errgsr_offset, +}; + +static acpi_status aest_cmn_700_resource_ioremap(struct acpi_resource *res, + void *data) +{ + struct acpi_resource_address64 addr64; + u32 *uid = data; + acpi_status status; + + status = acpi_resource_to_address64(res, &addr64); + if (ACPI_FAILURE(status) || (addr64.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + cmn_dev_array[*uid] = (u64)ioremap(addr64.address.minimum, + addr64.address.address_length); + + pr_debug("CMN device resource [%llx-%llx] ioremap to %llx\n", + addr64.address.minimum, addr64.address.maximum, + cmn_dev_array[*uid]); + + return AE_CTRL_TERMINATE; +} + +static acpi_status aest_cmn_get_dev_by_uid(acpi_handle handle, u32 level, + void *data, void **return_value) +{ + u32 *match_uid = data; + acpi_status status; + unsigned long long uid; + + status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid); + if (ACPI_FAILURE(status)) { + pr_err("Do not find devive\n"); + return_ACPI_STATUS(status); + } + + if (uid != *match_uid) + return AE_OK; + + pr_debug("CMN device instance %llx, walk through resource\n", uid); + + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + aest_cmn_700_resource_ioremap, data); + + if (ACPI_FAILURE(status)) { + pr_err("Device do not have resource\n"); + return_ACPI_STATUS(status); + } + + return AE_CTRL_TERMINATE; +} + +static inline int aest_cmn_node_ver(void *base) +{ + return FIELD_GET(CMN_ERRDEVARCH_REV, + readl_relaxed(base + CMN_ERRDEVARCH)); +} + +static int aest_cmn_init_node(struct aest_device *adev, + struct aest_node *cmn_node, + struct acpi_aest_node *anode, u64 type, + u64 errgsr_addr) +{ + cmn_node->info = anode; + cmn_node->name = devm_kasprintf(adev->dev, GFP_KERNEL, "%s", + cmn_config->node_name[type]); + if (!cmn_node->name) + return -ENOMEM; + cmn_node->errgsr = (void *)errgsr_addr; + cmn_node->type = anode->type; + cmn_node->adev = adev; + cmn_node->version = cmn_config->ras_ver; + cmn_node->errgsr_num = cmn_config->errgsr_num; + cmn_node->errgsr_mapping = cmn_config->errgsr_mapping; + cmn_node->record_count = cmn_node->errgsr_num * BITS_PER_LONG / 2; + cmn_node->record_implemented = devm_bitmap_zalloc( + adev->dev, cmn_node->record_count, GFP_KERNEL); + if (!cmn_node->record_implemented) + return -ENOMEM; + bitmap_set(cmn_node->record_implemented, 0, cmn_node->record_count); + + cmn_node->status_reporting = devm_bitmap_zalloc( + adev->dev, cmn_node->record_count, GFP_KERNEL); + if (!cmn_node->status_reporting) + return -ENOMEM; + bitmap_set(cmn_node->status_reporting, 0, cmn_node->record_count); + + cmn_node->records = devm_kcalloc(adev->dev, cmn_node->record_count, + sizeof(struct aest_record), + GFP_KERNEL); + if (!cmn_node->records) + return -ENOMEM; + + aest_node_dbg(cmn_node, "Node init with errgsr %llx\n", errgsr_addr); + + return 0; +} + +static int aest_cmn_reorgnize_node(struct aest_device *adev, + struct acpi_aest_node *anode, u64 base) +{ + struct aest_node *cmn_node; + u64 hnd_offset, cmn_node_offset, reg, logic_id, type, node_id; + u64 errgsr_addr, hnd_base; + struct aest_record *record; + int ret, node_index; + struct cmn_vendor_data *vendor_data; + + if (anode->interface_hdr->type != + ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED) { + aest_dev_err(adev, "CMN just use single memory mapping\n"); + return -ENODEV; + } + + hnd_offset = *((u64 *)anode->vendor->vendor_specific_data); + cmn_node_offset = *((u64 *)&anode->vendor->vendor_specific_data[8]); + + reg = readq_relaxed((void *)base + cmn_node_offset + CMN_NODE_INFO); + + logic_id = FIELD_GET(CMN_NI_LOGICAL_ID, reg); + type = FIELD_GET(CMN_NI_NODE_TYPE, reg); + node_id = FIELD_GET(CMN_NI_NODE_ID, reg); + + hnd_base = base + hnd_offset; + node_index = cmn_config->node_id_map[type]; + errgsr_addr = base + cmn_config->errgsr_offset(hnd_offset, node_index); + + // node not register, create it + cmn_node = &adev->nodes[node_index]; + if (!cmn_node->errgsr) { + ret = aest_cmn_init_node(adev, cmn_node, anode, type, + errgsr_addr); + if (ret) + return -ENOMEM; + } + + aest_dev_dbg(adev, "node type %llx, id %llx, offset %llx\n", type, + logic_id, cmn_node_offset); + + if (!test_bit(0, anode->record_implemented)) + clear_bit(logic_id, cmn_node->record_implemented); + + if (!test_bit(0, anode->status_reporting)) + clear_bit(logic_id, cmn_node->status_reporting); + + record = &cmn_node->records[logic_id]; + record->name = + devm_kasprintf(adev->dev, GFP_KERNEL, "record%lld", logic_id); + if (!record->name) + return -ENOMEM; + record->regs_base = devm_ioremap( + adev->dev, (resource_size_t)anode->interface_hdr->address, + sizeof(struct ras_ext_regs)); + if (!record->regs_base) + return -ENOMEM; + record->addressing_mode = test_bit(0, anode->addressing_mode); + record->node = cmn_node; + record->index = logic_id; + record->access = &aest_access[anode->interface_hdr->type]; + + vendor_data = devm_kzalloc(adev->dev, sizeof(struct cmn_vendor_data), + GFP_KERNEL); + vendor_data->node_type = type; + vendor_data->node_id = node_id; + vendor_data->logic_id = logic_id; + + record->vendor_data = vendor_data; + record->vendor_data_size = sizeof(struct cmn_vendor_data); + + aest_record_dbg(record, "base %llx\n", anode->interface_hdr->address); + + return 0; +} + +// reorgnize cmn node +static int aest_cmn_probe(struct aest_device *adev, struct aest_hnode *ahnode) +{ + acpi_status status; + u64 base; + int ret = 0; + struct acpi_aest_node *anode; + char name[9]; + + anode = list_first_entry(&ahnode->list, struct acpi_aest_node, list); + if (!anode) + return -ENODEV; + + if (!cmn_dev_array[anode->vendor->acpi_uid]) { + snprintf(name, 9, "%s", anode->vendor->acpi_hid); + status = acpi_get_devices(name, aest_cmn_get_dev_by_uid, + &anode->vendor->acpi_uid, NULL); + if (ACPI_FAILURE(status)) { + aest_dev_err(adev, "Can not find base\n"); + return_ACPI_STATUS(status); + } + } + base = cmn_dev_array[anode->vendor->acpi_uid]; + if (!base) { + aest_dev_err(adev, "Device base invalid\n"); + return -ENODEV; + } + + adev->type = anode->type; + adev->node_cnt = cmn_config->dev_num; + adev->nodes = devm_kcalloc(adev->dev, adev->node_cnt, + sizeof(struct aest_node), GFP_KERNEL); + if (!adev->nodes) + return -ENOMEM; + aest_set_name(adev, ahnode); + + list_for_each_entry(anode, &ahnode->list, list) { + ret = aest_cmn_reorgnize_node(adev, anode, base); + if (ret) + return ret; + } + + return 0; +} + +int aest_cmn700_probe(struct aest_device *adev, struct aest_hnode *ahnode) +{ + cmn_config = &cmn700_config; + + return aest_cmn_probe(adev, ahnode); +} diff --git a/drivers/ras/aest/aest-core.c b/drivers/ras/aest/aest-core.c new file mode 100644 index 0000000000000..9ce782a66edfc --- /dev/null +++ b/drivers/ras/aest/aest-core.c @@ -0,0 +1,1107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Error Source Table Support + * + * Copyright (c) 2025, Alibaba Group. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "aest.h" + +DEFINE_PER_CPU(struct aest_device, percpu_adev); + +#undef pr_fmt +#define pr_fmt(fmt) "AEST: " fmt + +static bool aest_panic_on_ue; +module_param(aest_panic_on_ue, bool, 0644); +MODULE_PARM_DESC(aest_panic_on_ue, + "Panic on unrecoverable error: 0=off 1=on (default: 1)"); + +#ifdef CONFIG_DEBUG_FS +struct dentry *aest_debugfs; +#endif +/* + * This memory pool is only to be used to save AEST node in AEST irq context. + * There can be 500 AEST node at most. + */ +#define AEST_NODE_ALLOCED_MAX 500 + +#define AEST_LOG_PREFIX_BUFFER 64 + +BLOCKING_NOTIFIER_HEAD(aest_decoder_chain); + +static void aest_print(struct aest_event *event) +{ + static atomic_t seqno = { 0 }; + unsigned int curr_seqno; + char pfx_seq[AEST_LOG_PREFIX_BUFFER]; + int index; + struct ras_ext_regs *regs; + + curr_seqno = atomic_inc_return(&seqno); + snprintf(pfx_seq, sizeof(pfx_seq), "{%u}" HW_ERR, curr_seqno); + pr_info("%sHardware error from AEST %s\n", pfx_seq, event->node_name); + + switch (event->type) { + case ACPI_AEST_PROCESSOR_ERROR_NODE: + /* + * For shared/global nodes (e.g. cluster L3 cache, DSU), + * id0 is the CPU that handled the interrupt — not the error + * source itself. The node_name already identifies the resource + * (e.g. "processor.cache.1"). Print a distinct message so the + * log is not confused with a per-PE CPU error. + */ + if (event->proc_flags & + (ACPI_AEST_PROC_FLAG_SHARED | ACPI_AEST_PROC_FLAG_GLOBAL)) + pr_err("%s Error from shared processor resource (interrupt handled on CPU%d)\n", + pfx_seq, event->id0); + else + pr_err("%s Error from CPU%d\n", pfx_seq, event->id0); + break; + case ACPI_AEST_MEMORY_ERROR_NODE: + pr_err("%s Error from memory at SRAT proximity domain %#x\n", + pfx_seq, event->id0); + break; + case ACPI_AEST_SMMU_ERROR_NODE: + pr_err("%s Error from SMMU IORT node %#x subcomponent %#x\n", + pfx_seq, event->id0, event->id1); + break; + case ACPI_AEST_VENDOR_ERROR_NODE: + pr_err("%s Error from vendor hid %8.8s uid %#x\n", pfx_seq, + event->hid, event->id1); + break; + case ACPI_AEST_GIC_ERROR_NODE: + pr_err("%s Error from GIC type %#x instance %#x\n", pfx_seq, + event->id0, event->id1); + break; + default: + pr_err("%s Unknown AEST node type\n", pfx_seq); + return; + } + + index = event->index; + regs = &event->regs; + + pr_err("%s ERR%dFR: 0x%llx\n", pfx_seq, index, regs->err_fr); + pr_err("%s ERR%dCTRL: 0x%llx\n", pfx_seq, index, regs->err_ctlr); + pr_err("%s ERR%dSTATUS: 0x%llx\n", pfx_seq, index, regs->err_status); + if (regs->err_status & ERR_STATUS_AV) + pr_err("%s ERR%dADDR: 0x%llx\n", pfx_seq, index, + regs->err_addr); + + if (regs->err_status & ERR_STATUS_MV) { + pr_err("%s ERR%dMISC0: 0x%llx\n", pfx_seq, index, + regs->err_misc[0]); + pr_err("%s ERR%dMISC1: 0x%llx\n", pfx_seq, index, + regs->err_misc[1]); + pr_err("%s ERR%dMISC2: 0x%llx\n", pfx_seq, index, + regs->err_misc[2]); + pr_err("%s ERR%dMISC3: 0x%llx\n", pfx_seq, index, + regs->err_misc[3]); + } + + trace_arm_ras_ext_event(event->type, event->id0, event->id1, + event->index, event->hid, &event->regs, + event->vendor_data, event->vendor_data_size); +} + +static void aest_handle_memory_failure(u64 addr) +{ + unsigned long pfn; + + pfn = PHYS_PFN(addr); + + if (!pfn_valid(pfn)) { + pr_warn(HW_ERR "Invalid physical address: %#llx\n", addr); + return; + } + +#ifdef CONFIG_MEMORY_FAILURE + memory_failure(pfn, 0); +#endif +} + +static void init_aest_event(struct aest_event *event, + struct aest_record *record, + struct ras_ext_regs *regs) +{ + struct aest_node *node = record->node; + struct acpi_aest_node *info = node->info; + + event->type = node->type; + event->node_name = node->name; + switch (node->type) { + case ACPI_AEST_PROCESSOR_ERROR_NODE: + if (info->processor->flags & + (ACPI_AEST_PROC_FLAG_SHARED | ACPI_AEST_PROC_FLAG_GLOBAL)) + event->id0 = smp_processor_id(); + else + event->id0 = get_cpu_for_acpi_id( + info->processor->processor_id); + + event->id1 = info->processor->resource_type; + event->proc_flags = info->processor->flags; + break; + case ACPI_AEST_MEMORY_ERROR_NODE: + event->id0 = info->memory->srat_proximity_domain; + break; + case ACPI_AEST_SMMU_ERROR_NODE: + event->id0 = info->smmu->iort_node_reference; + event->id1 = info->smmu->subcomponent_reference; + break; + case ACPI_AEST_VENDOR_ERROR_NODE: + event->id0 = 0; + event->id1 = info->vendor->acpi_uid; + event->hid = info->vendor->acpi_hid; + break; + case ACPI_AEST_GIC_ERROR_NODE: + event->id0 = info->gic->interface_type; + event->id1 = info->gic->instance_id; + break; + default: + event->id0 = 0; + event->id1 = 0; + } + + memcpy(&event->regs, regs, sizeof(*regs)); + event->index = record->index; + event->addressing_mode = record->addressing_mode; + event->vendor_data_size = record->vendor_data_size; + event->vendor_data = record->vendor_data; +} + +static int aest_node_gen_pool_add(struct aest_device *adev, + struct aest_record *record, + struct ras_ext_regs *regs) +{ + struct aest_event *event; + + if (!adev->pool) + return -EINVAL; + + event = (void *)gen_pool_alloc(adev->pool, sizeof(*event)); + if (!event) + return -ENOMEM; + + memset(event, 0, sizeof(*event)); + init_aest_event(event, record, regs); + llist_add(&event->llnode, &adev->event_list); + + if (regs->err_status & ERR_STATUS_CE) + record->count.ce++; + if (regs->err_status & ERR_STATUS_DE) + record->count.de++; + if (regs->err_status & ERR_STATUS_UE) { + switch (regs->err_status & ERR_STATUS_UET) { + case ERR_STATUS_UET_UC: + record->count.uc++; + break; + case ERR_STATUS_UET_UEU: + record->count.ueu++; + break; + case ERR_STATUS_UET_UER: + record->count.uer++; + break; + case ERR_STATUS_UET_UEO: + record->count.ueo++; + break; + } + } + + return 0; +} + +static void aest_log(struct aest_record *record, struct ras_ext_regs *regs) +{ + struct aest_device *adev = record->node->adev; + + if (!aest_node_gen_pool_add(adev, record, regs)) + schedule_work(&adev->aest_work); +} + +void aest_register_decode_chain(struct notifier_block *nb) +{ + blocking_notifier_chain_register(&aest_decoder_chain, nb); +} +EXPORT_SYMBOL_GPL(aest_register_decode_chain); + +void aest_unregister_decode_chain(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&aest_decoder_chain, nb); +} +EXPORT_SYMBOL_GPL(aest_unregister_decode_chain); + +static void aest_node_pool_process(struct work_struct *work) +{ + struct llist_node *head; + struct aest_event *event; + struct aest_device *adev = + container_of(work, struct aest_device, aest_work); + u64 status, addr; + + head = llist_del_all(&adev->event_list); + if (!head) + return; + + head = llist_reverse_order(head); + llist_for_each_entry(event, head, llnode) { + aest_print(event); + + status = event->regs.err_status; + if (!(event->regs.err_addr & ERR_ADDR_AI) && + (status & (ERR_STATUS_UE | ERR_STATUS_DE))) { + if (event->addressing_mode == AEST_ADDREESS_SPA) + addr = event->regs.err_addr & PHYS_MASK; + aest_handle_memory_failure(addr); + } + + blocking_notifier_call_chain(&aest_decoder_chain, 0, event); + gen_pool_free(adev->pool, (unsigned long)event, sizeof(*event)); + } +} + +static int aest_node_pool_init(struct aest_device *adev) +{ + unsigned long addr, size; + + size = ilog2(sizeof(struct aest_event)); + adev->pool = + devm_gen_pool_create(adev->dev, size, -1, dev_name(adev->dev)); + if (!adev->pool) + return -ENOMEM; + + size = PAGE_ALIGN(size * AEST_NODE_ALLOCED_MAX); + addr = (unsigned long)devm_kzalloc(adev->dev, size, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + return gen_pool_add(adev->pool, addr, size, -1); +} + +static void aest_panic(struct aest_record *record, struct ras_ext_regs *regs, + char *msg) +{ + struct aest_event event = { 0 }; + + init_aest_event(&event, record, regs); + + aest_print(&event); + + panic(msg); +} + +void aest_proc_record(struct aest_record *record, void *data, bool fake) +{ + struct ras_ext_regs regs = { 0 }; + int *count = data; + u64 ue; + + regs.err_status = record_read(record, ERXSTATUS); + if (!(regs.err_status & ERR_STATUS_V)) + return; + + (*count)++; + + if (regs.err_status & ERR_STATUS_AV) + regs.err_addr = record_read(record, ERXADDR); + + regs.err_fr = record_read(record, ERXFR); + regs.err_ctlr = record_read(record, ERXCTLR); + + if (regs.err_status & ERR_STATUS_MV) { + regs.err_misc[0] = record_read(record, ERXMISC0); + regs.err_misc[1] = record_read(record, ERXMISC1); + if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) { + regs.err_misc[2] = record_read(record, ERXMISC2); + regs.err_misc[3] = record_read(record, ERXMISC3); + } + + if (record->node->info->interface_hdr->flags & + AEST_XFACE_FLAG_CLEAR_MISC) { + record_write(record, ERXMISC0, 0); + record_write(record, ERXMISC1, 0); + if (record->node->version >= ID_AA64PFR0_EL1_RAS_V1P1) { + record_write(record, ERXMISC2, 0); + record_write(record, ERXMISC3, 0); + } + /* ce count is 0 if record do not support ce */ + } else if (record->ce.count > 0) + record_write(record, ERXMISC0, record->ce.reg_val); + } + + /* panic if unrecoverable and uncontainable error encountered */ + ue = FIELD_GET(ERR_STATUS_UET, regs.err_status); + if ((regs.err_status & ERR_STATUS_UE) && + (ue == ERR_STATUS_UET_UC || ue == ERR_STATUS_UET_UEU)) { + if (fake) + aest_record_info( + record, + "Simulated error! Skip panic due to fault injection\n"); + else if (aest_panic_on_ue) + aest_panic(record, ®s, + "AEST: unrecoverable error encountered"); + else + aest_record_err(record, "UE detected, panic suppressed\n"); + } + + aest_log(record, ®s); + + /* Write-one-to-clear the bits we've seen */ + regs.err_status &= ERR_STATUS_W1TC; + + /* Multi bit filed need to write all-ones to clear. */ + if (regs.err_status & ERR_STATUS_CE) + regs.err_status |= ERR_STATUS_CE; + + /* Multi bit filed need to write all-ones to clear. */ + if (regs.err_status & ERR_STATUS_UET) + regs.err_status |= ERR_STATUS_UET; + + record_write(record, ERXSTATUS, regs.err_status); +} + +void aest_node_foreach_record(void (*func)(struct aest_record *, void *, bool), + struct aest_node *node, void *data, + unsigned long *bitmap) +{ + int i; + + for_each_clear_bit(i, bitmap, node->record_count) { + aest_select_record(node, i); + + func(&node->records[i], data, false); + + aest_sync(node); + } +} + +static int aest_proc(struct aest_node *node) +{ + int count = 0, i, j, size = node->record_count, record_idx; + u64 err_group = 0; + + aest_node_dbg(node, "Poll bitmap %*pb\n", size, + node->record_implemented); + aest_node_foreach_record(aest_proc_record, node, &count, + node->record_implemented); + + if (!node->errgsr) + return count; + + aest_node_dbg(node, "Report bitmap %*pb\n", size, + node->status_reporting); + for (i = 0; i < BITS_TO_U64(size); i++) { + err_group = readq_relaxed((void *)node->errgsr + i * 8); + for_each_set_bit(j, (unsigned long *)&err_group, + BITS_PER_LONG) { + record_idx = + node->errgsr_mapping(i * BITS_PER_LONG + j); + aest_node_dbg(node, "errgsr[%d]: bit %d occur error\n", + i, record_idx); + /* + * Error group base is only valid in Memory Map node, + * so driver do not need to write select register and + * sync. + */ + if (test_bit(record_idx, node->status_reporting)) + continue; + aest_proc_record(&node->records[record_idx], &count, + false); + } + } + + return count; +} + +static irqreturn_t aest_irq_func(int irq, void *input) +{ + struct aest_device *adev = input; + int i; + + for (i = 0; i < adev->node_cnt; i++) { + if (!adev->nodes[i].record_count) + continue; + aest_proc(&adev->nodes[i]); + } + + return IRQ_HANDLED; +} + +static int aest_register_irq(struct aest_device *adev) +{ + int i, irq, ret; + char *irq_desc; + + irq_desc = devm_kasprintf(adev->dev, GFP_KERNEL, "%s.%s.", + dev_driver_string(adev->dev), + dev_name(adev->dev)); + if (!irq_desc) + return -ENOMEM; + + for (i = 0; i < MAX_GSI_PER_NODE; i++) { + irq = adev->irq[i]; + + if (!irq) + continue; + + if (irq_is_percpu_devid(irq)) { + ret = request_percpu_irq(irq, aest_irq_func, irq_desc, + adev->adev_oncore); + if (ret) + goto free; + } else { + ret = devm_request_irq(adev->dev, irq, aest_irq_func, 0, + irq_desc, adev); + if (ret) + return ret; + } + } + return 0; + +free: + for (; i >= 0; i--) { + irq = adev->irq[i]; + + if (irq_is_percpu_devid(irq)) + free_percpu_irq(irq, adev->adev_oncore); + } + + return ret; +} + +static void aest_enable_irq(struct aest_record *record) +{ + u64 err_ctlr; + struct aest_device *adev = record->node->adev; + + err_ctlr = record_read(record, ERXCTLR); + + if (adev->irq[ACPI_AEST_NODE_FAULT_HANDLING]) + err_ctlr |= (ERR_CTLR_FI | ERR_CTLR_CFI); + if (adev->irq[ACPI_AEST_NODE_ERROR_RECOVERY]) + err_ctlr |= ERR_CTLR_UI; + + record_write(record, ERXCTLR, err_ctlr); +} + +static void aest_config_irq(struct aest_node *node) +{ + int i; + struct acpi_aest_node_interrupt_v2 *interrupt; + + if (!node->irq_config) + return; + + for (i = 0; i < node->info->interrupt_count; i++) { + interrupt = &node->info->interrupt[i]; + + if (interrupt->type == ACPI_AEST_NODE_FAULT_HANDLING) + writeq_relaxed(interrupt->gsiv, node->irq_config); + + if (interrupt->type == ACPI_AEST_NODE_ERROR_RECOVERY) + writeq_relaxed(interrupt->gsiv, node->irq_config + 8); + + aest_node_dbg(node, "config irq type %d gsiv %d at %llx", + interrupt->type, interrupt->gsiv, + (u64)node->irq_config); + } +} + +static enum ras_ce_threshold aest_get_ce_threshold(struct aest_record *record) +{ + u64 err_fr, err_fr_cec, err_fr_rp = -1; + + err_fr = record_read(record, ERXFR); + err_fr_cec = FIELD_GET(ERR_FR_CEC, err_fr); + err_fr_rp = FIELD_GET(ERR_FR_RP, err_fr); + + if (err_fr_cec == ERR_FR_CEC_0B_COUNTER) + return RAS_CE_THRESHOLD_0B; + else if (err_fr_rp == ERR_FR_RP_DOUBLE_COUNTER) + return RAS_CE_THRESHOLD_32B; + else if (err_fr_cec == ERR_FR_CEC_8B_COUNTER) + return RAS_CE_THRESHOLD_8B; + else if (err_fr_cec == ERR_FR_CEC_16B_COUNTER) + return RAS_CE_THRESHOLD_16B; + else + return UNKNOWN; +} + +static const struct ce_threshold_info ce_info[] = { + [RAS_CE_THRESHOLD_0B] = { 0 }, + [RAS_CE_THRESHOLD_8B] = { + .max_count = ERR_8B_CEC_MAX, + .mask = ERR_MISC0_8B_CEC, + .shift = ERR_MISC0_CEC_SHIFT, + }, + [RAS_CE_THRESHOLD_16B] = { + .max_count = ERR_16B_CEC_MAX, + .mask = ERR_MISC0_16B_CEC, + .shift = ERR_MISC0_CEC_SHIFT, + }, +}; + +static void aest_set_ce_threshold(struct aest_record *record) +{ + u64 err_misc0; + struct ce_threshold *ce = &record->ce; + const struct ce_threshold_info *info; + + record->threshold_type = aest_get_ce_threshold(record); + + switch (record->threshold_type) { + case RAS_CE_THRESHOLD_0B: + aest_record_dbg(record, "do not support CE threshold!\n"); + return; + case RAS_CE_THRESHOLD_8B: + aest_record_dbg(record, "support 8 bit CE threshold!\n"); + break; + case RAS_CE_THRESHOLD_16B: + aest_record_dbg(record, "support 16 bit CE threshold!\n"); + break; + case RAS_CE_THRESHOLD_32B: + aest_record_dbg(record, "not support 32 bit CE threshold!\n"); + break; + default: + aest_record_dbg(record, "Unknown misc0 ce threshold!\n"); + } + + err_misc0 = record_read(record, ERXMISC0); + info = &ce_info[record->threshold_type]; + ce->info = info; + + // Default CE threshold is 1. + ce->count = info->max_count; + ce->threshold = DEFAULT_CE_THRESHOLD; + ce->reg_val = err_misc0 | info->mask; + + record_write(record, ERXMISC0, ce->reg_val); + aest_record_dbg(record, "CE threshold is %llx, controlled by Kernel", + ce->threshold); +} + +static int get_aest_node_ver(struct aest_node *node) +{ + u64 reg; + void *devarch_base; + + if (node->type == ACPI_AEST_GIC_ERROR_NODE) { + devarch_base = ioremap(node->info->interface_hdr->address + + GIC_ERRDEVARCH, + PAGE_SIZE); + if (!devarch_base) + return 0; + + reg = readl_relaxed(devarch_base); + iounmap(devarch_base); + + return FIELD_GET(ERRDEVARCH_REV, reg); + } + + return FIELD_GET(ID_AA64PFR0_EL1_RAS_MASK, read_cpuid(ID_AA64PFR0_EL1)); +} + +static int aest_init_record(struct aest_record *record, int i, + struct aest_node *node) +{ + struct device *dev = node->adev->dev; + + record->name = devm_kasprintf(dev, GFP_KERNEL, "record%d", i); + if (!record->name) + return -ENOMEM; + + if (node->base) + record->regs_base = + node->base + sizeof(struct ras_ext_regs) * i; + + record->access = &aest_access[node->info->interface_hdr->type]; + record->addressing_mode = test_bit(i, node->info->addressing_mode); + record->index = i; + record->node = node; + + aest_record_dbg(record, "base: %p, index: %d, address mode: %x\n", + record->regs_base, record->index, + record->addressing_mode); + return 0; +} + +static void aest_online_record(struct aest_record *record, void *data, + bool __unused) +{ + if (record_read(record, ERXFR) & ERR_FR_CE) + aest_set_ce_threshold(record); + + aest_enable_irq(record); +} + +static void aest_online_oncore_node(struct aest_node *node) +{ + int count; + + count = aest_proc(node); + aest_node_dbg(node, "Find %d error on CPU%d before AEST probe\n", count, + smp_processor_id()); + + aest_node_foreach_record(aest_online_record, node, NULL, + node->record_implemented); + + aest_node_foreach_record(aest_online_record, node, NULL, + node->status_reporting); +} + +static void aest_online_oncore_dev(void *data) +{ + int fhi_irq, eri_irq, i; + struct aest_device *adev = this_cpu_ptr(data); + + for (i = 0; i < adev->node_cnt; i++) + aest_online_oncore_node(&adev->nodes[i]); + + fhi_irq = adev->irq[ACPI_AEST_NODE_FAULT_HANDLING]; + if (fhi_irq > 0) + enable_percpu_irq(fhi_irq, IRQ_TYPE_NONE); + eri_irq = adev->irq[ACPI_AEST_NODE_ERROR_RECOVERY]; + if (eri_irq > 0) + enable_percpu_irq(eri_irq, IRQ_TYPE_NONE); +} + +static void aest_offline_oncore_dev(void *data) +{ + int fhi_irq, eri_irq; + struct aest_device *adev = this_cpu_ptr(data); + + fhi_irq = adev->irq[ACPI_AEST_NODE_FAULT_HANDLING]; + if (fhi_irq > 0) + disable_percpu_irq(fhi_irq); + eri_irq = adev->irq[ACPI_AEST_NODE_ERROR_RECOVERY]; + if (eri_irq > 0) + disable_percpu_irq(eri_irq); +} + +static void aest_online_dev(struct aest_device *adev) +{ + int count, i; + struct aest_node *node; + + for (i = 0; i < adev->node_cnt; i++) { + node = &adev->nodes[i]; + + if (!node->name) + continue; + + count = aest_proc(node); + aest_node_dbg(node, "Find %d error before AEST probe\n", count); + + aest_config_irq(node); + + aest_node_foreach_record(aest_online_record, node, NULL, + node->record_implemented); + aest_node_foreach_record(aest_online_record, node, NULL, + node->status_reporting); + } +} + +static int aest_starting_cpu(unsigned int cpu) +{ + pr_debug("CPU%d starting\n", cpu); + aest_online_oncore_dev(&percpu_adev); + + return 0; +} + +static int aest_dying_cpu(unsigned int cpu) +{ + pr_debug("CPU%d dying\n", cpu); + aest_offline_oncore_dev(&percpu_adev); + + return 0; +} + +static void aest_device_remove(struct platform_device *pdev) +{ + struct aest_device *adev = platform_get_drvdata(pdev); + int i; + + platform_set_drvdata(pdev, NULL); + + if (adev->type != ACPI_AEST_PROCESSOR_ERROR_NODE) + return; + + on_each_cpu(aest_offline_oncore_dev, adev->adev_oncore, 1); + + for (i = 0; i < MAX_GSI_PER_NODE; i++) { + if (adev->irq[i]) + free_percpu_irq(adev->irq[i], adev->adev_oncore); + } +} + +static char *alloc_aest_node_name(struct aest_node *node) +{ + char *name; + + switch (node->type) { + case ACPI_AEST_PROCESSOR_ERROR_NODE: + /* + * Shared/global processor nodes (e.g. cluster L3 cache, DSU) + * have processor_id=0 and use smp_processor_id() at error-log + * time — using processor_id in the name would produce the same + * "processor.0" string for every shared node and every CPU0 + * per-PE node, making logs ambiguous. + * + * For shared/global nodes, build the name from the resource + * type and the device id so each node gets a unique, meaningful + * name (e.g. "processor.cache.1", "processor.tlb.2"). + * + * For per-PE nodes, keep the original "processor." form. + */ + if (node->info->processor->flags & + (ACPI_AEST_PROC_FLAG_SHARED | ACPI_AEST_PROC_FLAG_GLOBAL)) { + static const char *const res_name[] = { + [ACPI_AEST_CACHE_RESOURCE] = "cache", + [ACPI_AEST_TLB_RESOURCE] = "tlb", + [ACPI_AEST_GENERIC_RESOURCE] = "generic", + }; + u8 rtype = node->info->processor->resource_type; + const char *rstr = (rtype < ARRAY_SIZE(res_name) && + res_name[rtype]) ? res_name[rtype] : "unknown"; + + name = devm_kasprintf(node->adev->dev, GFP_KERNEL, + "%s.%s.%d", + aest_node_name[node->type], + rstr, + node->adev->id); + } else { + name = devm_kasprintf(node->adev->dev, GFP_KERNEL, + "%s.%d", + aest_node_name[node->type], + node->info->processor->processor_id); + } + break; + case ACPI_AEST_MEMORY_ERROR_NODE: + case ACPI_AEST_SMMU_ERROR_NODE: + case ACPI_AEST_VENDOR_ERROR_NODE: + case ACPI_AEST_GIC_ERROR_NODE: + case ACPI_AEST_PCIE_ERROR_NODE: + case ACPI_AEST_PROXY_ERROR_NODE: + name = devm_kasprintf(node->adev->dev, GFP_KERNEL, "%s.%llx", + aest_node_name[node->type], + node->info->interface_hdr->address); + break; + default: + name = devm_kasprintf(node->adev->dev, GFP_KERNEL, "Unknown"); + } + + return name; +} + +static int aest_node_set_errgsr(struct aest_device *adev, + struct aest_node *node) +{ + struct acpi_aest_node *anode = node->info; + u64 errgsr_base = anode->common->error_group_register_base; + + if (anode->interface_hdr->type != ACPI_AEST_NODE_MEMORY_MAPPED) + return 0; + + if (!node->base) + return 0; + + if (!(anode->interface_hdr->flags & AEST_XFACE_FLAG_ERROR_GROUP)) { + node->errgsr = node->base + node->group->errgsr_offset; + return 0; + } + + if (!errgsr_base) + return -EINVAL; + + node->errgsr = devm_ioremap(adev->dev, errgsr_base, PAGE_SIZE); + if (!node->errgsr) + return -ENOMEM; + + return 0; +} + +static int aest_init_node(struct aest_device *adev, struct aest_node *node, + struct acpi_aest_node *anode) +{ + int i, ret; + u64 address, flags; + + node->adev = adev; + node->info = anode; + node->type = anode->type; + node->version = get_aest_node_ver(node); + node->errgsr_mapping = default_errgsr_mapping; + node->name = alloc_aest_node_name(node); + if (!node->name) + return -ENOMEM; + node->record_implemented = anode->record_implemented; + node->status_reporting = anode->status_reporting; + node->group = &aest_group_config[anode->interface_hdr->group_format]; + + address = anode->interface_hdr->address; + if (address) { + node->base = + devm_ioremap(adev->dev, address, node->group->size); + if (!node->base) + return -ENOMEM; + } + + flags = anode->interface_hdr->flags; + address = node->info->common->fault_inject_register_base; + if ((flags & AEST_XFACE_FLAG_FAULT_INJECT) && address) { + if (address - anode->interface_hdr->address < node->group->size) + node->inj = node->base + + (address - anode->interface_hdr->address); + else { + node->inj = devm_ioremap(adev->dev, address, PAGE_SIZE); + if (!node->inj) + return -ENOMEM; + } + } + + address = node->info->common->interrupt_config_register_base; + if ((flags & AEST_XFACE_FLAG_INT_CONFIG) && address) { + if (address - anode->interface_hdr->address < node->group->size) + node->irq_config = + node->base + + (address - anode->interface_hdr->address); + else { + node->irq_config = + devm_ioremap(adev->dev, address, PAGE_SIZE); + if (!node->irq_config) + return -ENOMEM; + } + } + + ret = aest_node_set_errgsr(adev, node); + if (ret) + return ret; + + node->record_count = anode->interface_hdr->error_record_count; + node->records = devm_kcalloc(adev->dev, node->record_count, + sizeof(struct aest_record), GFP_KERNEL); + if (!node->records) + return -ENOMEM; + + node->errgsr_num = DIV_ROUND_UP(node->record_count, BITS_PER_LONG); + for (i = 0; i < node->record_count; i++) { + ret = aest_init_record(&node->records[i], i, node); + if (ret) + return ret; + } + aest_node_dbg(node, "%d records, base: %llx, errgsr: %llx\n", + node->record_count, (u64)node->base, (u64)node->errgsr); + return 0; +} + +static int aest_init_nodes(struct aest_device *adev, struct aest_hnode *ahnode) +{ + struct acpi_aest_node *anode; + struct aest_node *node; + int ret, i = 0; + + adev->node_cnt = ahnode->count; + adev->nodes = devm_kcalloc(adev->dev, adev->node_cnt, + sizeof(struct aest_node), GFP_KERNEL); + if (!adev->nodes) + return -ENOMEM; + + list_for_each_entry(anode, &ahnode->list, list) { + adev->type = anode->type; + + node = &adev->nodes[i++]; + ret = aest_init_node(adev, node, anode); + if (ret) + return ret; + } + + return 0; +} + +static int __setup_ppi(struct aest_device *adev) +{ + int cpu, i; + struct aest_device *oncore_adev; + struct aest_node *oncore_node; + size_t size; + + adev->adev_oncore = &percpu_adev; + for_each_possible_cpu(cpu) { + oncore_adev = per_cpu_ptr(&percpu_adev, cpu); + memcpy(oncore_adev, adev, sizeof(struct aest_device)); + + oncore_adev->nodes = + devm_kcalloc(adev->dev, oncore_adev->node_cnt, + sizeof(struct aest_node), GFP_KERNEL); + if (!oncore_adev->nodes) + return -ENOMEM; + + size = adev->node_cnt * sizeof(struct aest_node); + memcpy(oncore_adev->nodes, adev->nodes, size); + for (i = 0; i < oncore_adev->node_cnt; i++) { + oncore_node = &oncore_adev->nodes[i]; + oncore_node->records = devm_kcalloc( + adev->dev, oncore_node->record_count, + sizeof(struct aest_record), GFP_KERNEL); + if (!oncore_node->records) + return -ENOMEM; + + size = oncore_node->record_count * + sizeof(struct aest_record); + memcpy(oncore_node->records, adev->nodes[i].records, + size); + } + + aest_dev_dbg(adev, "Init device on CPU%d.\n", cpu); + } + + return 0; +} + +static int aest_setup_irq(struct platform_device *pdev, + struct aest_device *adev) +{ + int fhi_irq, eri_irq; + + fhi_irq = platform_get_irq_byname_optional(pdev, AEST_FHI_NAME); + if (fhi_irq > 0) + adev->irq[0] = fhi_irq; + + eri_irq = platform_get_irq_byname_optional(pdev, AEST_ERI_NAME); + if (eri_irq > 0) + adev->irq[1] = eri_irq; + + /* Allocate and initialise the percpu device pointer for PPI */ + if (irq_is_percpu(fhi_irq) || irq_is_percpu(eri_irq)) + return __setup_ppi(adev); + + return 0; +} + +static struct aest_vendor_match vendor_match[] = { + { "ARMHC700", &aest_cmn700_probe }, + {}, +}; + +static int aest_vendor_probe(struct aest_device *adev, + struct aest_hnode *ahnode) +{ + int i; + struct acpi_aest_node *anode; + + anode = list_first_entry(&ahnode->list, struct acpi_aest_node, list); + if (!anode) + return -ENODEV; + + aest_dev_dbg(adev, "Try to probe vendor node %s\n", + anode->vendor->acpi_hid); + for (i = 0; i < ARRAY_SIZE(vendor_match); i++) { + if (!strncmp(vendor_match[i].hid, anode->vendor->acpi_hid, 8)) + return vendor_match[i].probe(adev, ahnode); + } + + return 0; +} + +static int aest_device_probe(struct platform_device *pdev) +{ + int ret; + struct aest_device *adev; + struct aest_hnode *ahnode; + + ahnode = *((struct aest_hnode **)pdev->dev.platform_data); + if (!ahnode) + return -ENODEV; + + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + adev->dev = &pdev->dev; + adev->id = pdev->id; + aest_set_name(adev, ahnode); + + INIT_WORK(&adev->aest_work, aest_node_pool_process); + ret = aest_node_pool_init(adev); + if (ret) { + aest_dev_err(adev, "Failed init aest node pool.\n"); + return ret; + } + init_llist_head(&adev->event_list); + + if (ahnode->type == ACPI_AEST_VENDOR_ERROR_NODE) + ret = aest_vendor_probe(adev, ahnode); + else + ret = aest_init_nodes(adev, ahnode); + if (ret) + return ret; + + ret = aest_setup_irq(pdev, adev); + if (ret) + return ret; + + ret = aest_register_irq(adev); + if (ret) { + aest_dev_err(adev, "register irq failed\n"); + return ret; + } + + if (aest_dev_is_oncore(adev)) + ret = cpuhp_setup_state(CPUHP_AP_ARM_AEST_STARTING, + "drivers/acpi/arm64/aest:starting", + aest_starting_cpu, aest_dying_cpu); + else + aest_online_dev(adev); + if (ret) + return ret; + + platform_set_drvdata(pdev, adev); + + aest_dev_init_debugfs(adev); + + aest_dev_dbg(adev, "Node cnt: %x, id: %x\n", adev->node_cnt, adev->id); + + return 0; +} + +static struct platform_driver aest_driver = { + .driver = { + .name = "AEST", + }, + .probe = aest_device_probe, + .remove = aest_device_remove, +}; + +static int __init aest_init(void) +{ +#ifdef CONFIG_DEBUG_FS + aest_debugfs = debugfs_create_dir("aest", NULL); +#endif + + return platform_driver_register(&aest_driver); +} +module_init(aest_init); + +static void __exit aest_exit(void) +{ +#ifdef CONFIG_DEBUG_FS + debugfs_remove(aest_debugfs); +#endif + + platform_driver_unregister(&aest_driver); +} +module_exit(aest_exit); + +MODULE_DESCRIPTION("ARM AEST Driver"); +MODULE_AUTHOR("Ruidong Tian "); +MODULE_LICENSE("GPL"); diff --git a/drivers/ras/aest/aest-inject.c b/drivers/ras/aest/aest-inject.c new file mode 100644 index 0000000000000..fe6ccac8338e4 --- /dev/null +++ b/drivers/ras/aest/aest-inject.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Error Source Table Support + * + * Copyright (c) 2024, Alibaba Group. + */ + +#include "aest.h" + +static struct ras_ext_regs regs_inj; + +struct inj_attr { + struct attribute attr; + ssize_t (*show)(struct aest_node *n, struct inj_attr *a, char *b); + ssize_t (*store)(struct aest_node *n, struct inj_attr *a, const char *b, + size_t c); +}; + +struct aest_inject { + struct aest_node *node; + struct kobject kobj; +}; + +#define to_inj(k) container_of(k, struct aest_inject, kobj) +#define to_inj_attr(a) container_of(a, struct inj_attr, attr) + +static u64 aest_sysreg_read_inject(void *__unused, u32 offset) +{ + u64 *p = (u64 *)®s_inj; + + return p[offset/8]; +} + +static void aest_sysreg_write_inject(void *base, u32 offset, u64 val) +{ + u64 *p = (u64 *)®s_inj; + + p[offset/8] = val; +} + +static u64 aest_iomem_read_inject(void *base, u32 offset) +{ + u64 *p = (u64 *)®s_inj; + + return p[offset/8]; +} + +static void aest_iomem_write_inject(void *base, u32 offset, u64 val) +{ + u64 *p = (u64 *)®s_inj; + + p[offset/8] = val; +} + +static struct aest_access aest_access_inject[] = { + [ACPI_AEST_NODE_SYSTEM_REGISTER] = { + .read = aest_sysreg_read_inject, + .write = aest_sysreg_write_inject, + }, + + [ACPI_AEST_NODE_MEMORY_MAPPED] = { + .read = aest_iomem_read_inject, + .write = aest_iomem_write_inject, + }, + [ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED] = { + .read = aest_iomem_read_inject, + .write = aest_iomem_write_inject, + }, + { } +}; + +static int soft_inject_store(void *data, u64 val) +{ + int count = 0; + struct aest_record record_inj, *record = data; + struct aest_node node_inj, *node = record->node; + + memcpy(&node_inj, node, sizeof(*node)); + node_inj.name = "AEST-injection"; + + record_inj.access = &aest_access_inject[node->info->interface_hdr->type]; + record_inj.node = &node_inj; + record_inj.index = record->index; + + regs_inj.err_status |= ERR_STATUS_V; + + aest_proc_record(&record_inj, &count, true); + + if (count != 1) + return -EIO; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(soft_inject_ops, NULL, soft_inject_store, "%llu\n"); + +static int hard_inject_store(void *data, u64 val) +{ + struct aest_record *record = data; + struct aest_node *node = record->node; + + if (!node->inj) + return -EPERM; + + aest_select_record(node, record->index); + record_write(record, ERXPFGCTL, val); + record_write(record, ERXPFGCDN, 0x100); + aest_sync(node); + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(hard_inject_ops, NULL, hard_inject_store, "%llu\n"); + +void aest_inject_init_debugfs(struct aest_record *record) +{ + struct dentry *inj; + + inj = debugfs_create_dir("inject", record->debugfs); + + debugfs_create_u64("err_fr", 0600, inj, ®s_inj.err_fr); + debugfs_create_u64("err_ctrl", 0600, inj, ®s_inj.err_ctlr); + debugfs_create_u64("err_status", 0600, inj, ®s_inj.err_status); + debugfs_create_u64("err_addr", 0600, inj, ®s_inj.err_addr); + debugfs_create_u64("err_misc0", 0600, inj, ®s_inj.err_misc[0]); + debugfs_create_u64("err_misc1", 0600, inj, ®s_inj.err_misc[1]); + debugfs_create_u64("err_misc2", 0600, inj, ®s_inj.err_misc[2]); + debugfs_create_u64("err_misc3", 0600, inj, ®s_inj.err_misc[3]); + debugfs_create_file("soft_inject", 0400, inj, record, &soft_inject_ops); + + if (record->node->inj) + debugfs_create_file("hard_inject", 0400, inj, record, &hard_inject_ops); +} diff --git a/drivers/ras/aest/aest-of.c b/drivers/ras/aest/aest-of.c new file mode 100644 index 0000000000000..939db2c417427 --- /dev/null +++ b/drivers/ras/aest/aest-of.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#include +#include +#include +#include +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "DT AEST: " fmt + +struct dt_aest_priv { + struct xarray aest_array; + u32 node_id; +}; + +static const struct of_device_id dt_aest_child_match[] = { + { .compatible = "arm,aest-processor", .data = (void *)ACPI_AEST_PROCESSOR_ERROR_NODE }, + { .compatible = "arm,aest-memory", .data = (void *)ACPI_AEST_MEMORY_ERROR_NODE }, + { .compatible = "arm,aest-smmu", .data = (void *)ACPI_AEST_SMMU_ERROR_NODE }, + { .compatible = "arm,aest-vendor", .data = (void *)ACPI_AEST_VENDOR_ERROR_NODE }, + { .compatible = "arm,aest-gic", .data = (void *)ACPI_AEST_GIC_ERROR_NODE }, + { .compatible = "arm,aest-pcie", .data = (void *)ACPI_AEST_PCIE_ERROR_NODE }, + { .compatible = "arm,aest-proxy", .data = (void *)ACPI_AEST_PROXY_ERROR_NODE }, + { } +}; + +static int dt_aest_node_type(struct device_node *np) +{ + const struct of_device_id *match; + + match = of_match_node(dt_aest_child_match, np); + if (!match) { + pr_warn("unknown compatible for %pOF\n", np); + return -EINVAL; + } + return (int)(uintptr_t)match->data; +} + +static struct aest_hnode *dt_aest_alloc_hnode(int node_type, u32 id) +{ + struct aest_hnode *ahnode; + + ahnode = kzalloc_obj(*ahnode, GFP_KERNEL); + if (!ahnode) + return NULL; + + INIT_LIST_HEAD(&ahnode->list); + ahnode->count = 0; + ahnode->id = id; + ahnode->type = node_type; + return ahnode; +} + +static int dt_aest_build_interface(struct device_node *np, + struct acpi_aest_node *anode) +{ + struct acpi_aest_node_interface_header *hdr; + struct acpi_aest_node_interface_common *common; + struct resource res; + struct resource named_res; + u32 gfmt = 0, flags = 0, nrec = 1; + u32 itype; + int ret; + size_t body_sz; + + /* + * Deduce interface type from the presence and count of reg entries: + * no reg -> system-register access (type 0) + * 1 range -> memory-mapped access (type 1) + * 2+ ranges -> single-record MMIO (type 2) + */ + if (!of_property_present(np, "reg")) + itype = ACPI_AEST_NODE_SYSTEM_REGISTER; + else if (of_property_count_elems_of_size(np, "reg", sizeof(u32)) <= + (of_n_addr_cells(np) + of_n_size_cells(np))) + itype = ACPI_AEST_NODE_MEMORY_MAPPED; + else + itype = ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED; + + of_property_read_u32(np, "arm,group-format", &gfmt); + of_property_read_u32(np, "arm,interface-flags", &flags); + of_property_read_u32(np, "arm,num-records", &nrec); + + switch (gfmt) { + case ACPI_AEST_NODE_GROUP_FORMAT_16K: + body_sz = sizeof(struct acpi_aest_node_interface_16k); + break; + case ACPI_AEST_NODE_GROUP_FORMAT_64K: + body_sz = sizeof(struct acpi_aest_node_interface_64k); + break; + default: + body_sz = sizeof(struct acpi_aest_node_interface_4k); + break; + } + + hdr = kzalloc(sizeof(*hdr) + body_sz, GFP_KERNEL); + if (!hdr) + return -ENOMEM; + + /* Fill header */ + hdr->type = (u8)itype; + hdr->group_format = (u8)gfmt; + hdr->flags = flags; + hdr->error_record_count = nrec; + hdr->error_record_index = 0; + + if (itype != ACPI_AEST_NODE_SYSTEM_REGISTER) { + ret = of_address_to_resource(np, 0, &res); + if (ret) { + pr_err("node %pOF: missing 'reg' for MMIO interface\n", np); + kfree(hdr); + return ret; + } + hdr->address = res.start; + } + + switch (gfmt) { + case ACPI_AEST_NODE_GROUP_FORMAT_4K: { + struct acpi_aest_node_interface_4k *b = + (struct acpi_aest_node_interface_4k *)(hdr + 1); + of_property_read_u64(np, "arm,record-impl", + &b->error_record_implemented); + of_property_read_u64(np, "arm,status-reporting", + &b->error_status_reporting); + of_property_read_u64(np, "arm,addressing-mode", + &b->addressing_mode); + common = &b->common; + anode->record_implemented = + (unsigned long *)&b->error_record_implemented; + anode->status_reporting = + (unsigned long *)&b->error_status_reporting; + anode->addressing_mode = + (unsigned long *)&b->addressing_mode; + break; + } + case ACPI_AEST_NODE_GROUP_FORMAT_16K: { + struct acpi_aest_node_interface_16k *b = + (struct acpi_aest_node_interface_16k *)(hdr + 1); + of_property_read_u64_array(np, "arm,record-impl", + b->error_record_implemented, 4); + of_property_read_u64_array(np, "arm,status-reporting", + b->error_status_reporting, 4); + of_property_read_u64_array(np, "arm,addressing-mode", + b->addressing_mode, 4); + common = &b->common; + anode->record_implemented = + (unsigned long *)b->error_record_implemented; + anode->status_reporting = + (unsigned long *)b->error_status_reporting; + anode->addressing_mode = + (unsigned long *)b->addressing_mode; + break; + } + case ACPI_AEST_NODE_GROUP_FORMAT_64K: { + struct acpi_aest_node_interface_64k *b = + (struct acpi_aest_node_interface_64k *)(hdr + 1); + of_property_read_u64_array(np, "arm,record-impl", + b->error_record_implemented, 14); + of_property_read_u64_array(np, "arm,status-reporting", + b->error_status_reporting, 14); + of_property_read_u64_array(np, "arm,addressing-mode", + b->addressing_mode, 14); + common = &b->common; + anode->record_implemented = + (unsigned long *)b->error_record_implemented; + anode->status_reporting = + (unsigned long *)b->error_status_reporting; + anode->addressing_mode = + (unsigned long *)b->addressing_mode; + break; + } + default: + pr_err("node %pOF: unsupported group-format %u\n", np, gfmt); + kfree(hdr); + return -EINVAL; + } + + if (!of_address_to_resource(np, of_property_match_string( + np, "reg-names", "fault-inject"), &named_res)) + common->fault_inject_register_base = named_res.start; + + if (!of_address_to_resource(np, of_property_match_string( + np, "reg-names", "err-group"), &named_res)) + common->error_group_register_base = named_res.start; + + if (!of_address_to_resource(np, of_property_match_string( + np, "reg-names", "irq-config"), &named_res)) + common->interrupt_config_register_base = named_res.start; + + anode->interface_hdr = hdr; + anode->common = common; + + return 0; +} + +static int dt_aest_build_interrupt(struct device_node *np, + struct acpi_aest_node *anode) +{ + struct acpi_aest_node_interrupt_v2 *irq_arr; + int fhi_irq, eri_irq, count = 0; + u32 fhi_flags = 0, eri_flags = 0; + + of_property_read_u32(np, "arm,fhi-flags", &fhi_flags); + of_property_read_u32(np, "arm,eri-flags", &eri_flags); + + fhi_irq = of_irq_get_byname(np, "fhi"); + if (fhi_irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (fhi_irq < 0 && fhi_irq != -EINVAL) { + const char *name = NULL; + + of_property_read_string(np, "interrupt-names", &name); + + pr_warn("node %pOF: failed to map FHI IRQ: %d (interrupt-names[0]=\"%s\", want \"%s\")\n", + np, fhi_irq, name ?: "", "fhi"); + } + eri_irq = of_irq_get_byname(np, "eri"); + if (eri_irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (eri_irq < 0 && eri_irq != -EINVAL) { + const char *name = NULL; + + of_property_read_string_index(np, "interrupt-names", 1, &name); + + pr_warn("node %pOF: failed to map ERI IRQ: %d (interrupt-names[1]=\"%s\", want \"%s\")\n", + np, eri_irq, name ?: "", "eri"); + } + + if (fhi_irq > 0) + count++; + if (eri_irq > 0) + count++; + + if (!count) { + anode->interrupt = NULL; + anode->interrupt_count = 0; + return 0; + } + + irq_arr = kcalloc(count, sizeof(*irq_arr), GFP_KERNEL); + if (!irq_arr) + return -ENOMEM; + + count = 0; + if (fhi_irq > 0) { + irq_arr[count].gsiv = fhi_irq; + irq_arr[count].flags = AEST_INTERRUPT_MODE | fhi_flags; + irq_arr[count].type = ACPI_AEST_NODE_FAULT_HANDLING; + count++; + } + if (eri_irq > 0) { + irq_arr[count].gsiv = eri_irq; + irq_arr[count].flags = eri_flags; + irq_arr[count].type = ACPI_AEST_NODE_ERROR_RECOVERY; + count++; + } + + anode->interrupt = irq_arr; + anode->interrupt_count = count; + return 0; +} + +static int dt_aest_build_node_specific(struct device_node *np, + struct acpi_aest_node *anode, + int node_type) +{ + switch (node_type) { + + case ACPI_AEST_PROCESSOR_ERROR_NODE: { + struct acpi_aest_processor *proc; + u32 rtype = 0, pflags = 0; + + proc = kzalloc_obj(*proc, GFP_KERNEL); + if (!proc) + return -ENOMEM; + + of_property_read_u32(np, "arm,resource-type", &rtype); + of_property_read_u32(np, "arm,processor-flags", &pflags); + + proc->resource_type = (u8)rtype; + proc->flags = (u8)pflags; + + /* Processor cache/TLB/generic sub-structure */ + switch (rtype) { + case ACPI_AEST_CACHE_RESOURCE: { + struct acpi_aest_processor_cache *c; + struct device_node *cache_np; + + c = kzalloc_obj(*c, GFP_KERNEL); + if (!c) { + kfree(proc); + return -ENOMEM; + } + + cache_np = of_parse_phandle(np, "arm,cache-ref", 0); + if (cache_np) { + c->cache_reference = cache_np->phandle; + of_node_put(cache_np); + } + anode->cache = c; + break; + } + case ACPI_AEST_TLB_RESOURCE: { + struct acpi_aest_processor_tlb *t; + + t = kzalloc_obj(*t, GFP_KERNEL); + if (!t) { + kfree(proc); + return -ENOMEM; + } + of_property_read_u32(np, "arm,tlb-level", + &t->tlb_level); + anode->tlb = t; + break; + } + default: { + struct acpi_aest_processor_generic *g; + + g = kzalloc_obj(*g, GFP_KERNEL); + if (!g) { + kfree(proc); + return -ENOMEM; + } + of_property_read_u32(np, "arm,resource-ref", + &g->resource); + anode->generic = g; + break; + } + } + anode->processor = proc; + break; + } + + case ACPI_AEST_MEMORY_ERROR_NODE: { + struct acpi_aest_memory *mem; + + mem = kzalloc_obj(*mem, GFP_KERNEL); + + if (!mem) + return -ENOMEM; + of_property_read_u32(np, "arm,proximity-domain", + &mem->srat_proximity_domain); + anode->memory = mem; + break; + } + + case ACPI_AEST_SMMU_ERROR_NODE: { + struct acpi_aest_smmu *smmu; + struct device_node *smmu_np; + + smmu = kzalloc_obj(*smmu, GFP_KERNEL); + + if (!smmu) + return -ENOMEM; + smmu_np = of_parse_phandle(np, "arm,smmu-ref", 0); + if (smmu_np) { + /* Use the DT node offset as the IORT reference */ + smmu->iort_node_reference = smmu_np->phandle; + of_node_put(smmu_np); + } + of_property_read_u32(np, "arm,smmu-subcomponent", + &smmu->subcomponent_reference); + anode->smmu = smmu; + break; + } + + case ACPI_AEST_VENDOR_ERROR_NODE: { + struct acpi_aest_vendor_v2 *vendor; + const char *hid = "ARMHC000"; + + vendor = kzalloc_obj(*vendor, GFP_KERNEL); + + if (!vendor) + return -ENOMEM; + of_property_read_string(np, "arm,vendor-hid", &hid); + strscpy(vendor->acpi_hid, hid, sizeof(vendor->acpi_hid)); + of_property_read_u32(np, "arm,vendor-uid", + &vendor->acpi_uid); + anode->vendor = vendor; + break; + } + + case ACPI_AEST_GIC_ERROR_NODE: { + struct acpi_aest_gic *gic; + + gic = kzalloc_obj(*gic, GFP_KERNEL); + + if (!gic) + return -ENOMEM; + of_property_read_u32(np, "arm,gic-type", + &gic->interface_type); + of_property_read_u32(np, "arm,gic-instance", + &gic->instance_id); + anode->gic = gic; + break; + } + + case ACPI_AEST_PCIE_ERROR_NODE: { + struct acpi_aest_pcie *pcie; + + pcie = kzalloc_obj(*pcie, GFP_KERNEL); + + if (!pcie) + return -ENOMEM; + of_property_read_u32(np, "arm,pcie-segment", + &pcie->iort_node_reference); + anode->pcie = pcie; + break; + } + + case ACPI_AEST_PROXY_ERROR_NODE: + /* No node-specific data for proxy nodes */ + anode->spec_pointer = NULL; + break; + + default: + return -EINVAL; + } + + return 0; +} + +static struct acpi_aest_node * +dt_aest_alloc_anode(struct device_node *np, int node_type) +{ + struct acpi_aest_node *anode; + int ret; + + anode = kzalloc_obj(*anode, GFP_KERNEL); + if (!anode) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&anode->list); + anode->type = node_type; + + ret = dt_aest_build_interface(np, anode); + if (ret) + goto err_free; + + ret = dt_aest_build_node_specific(np, anode, node_type); + if (ret) + goto err_free; + + ret = dt_aest_build_interrupt(np, anode); + if (ret) + goto err_free; + + return anode; + +err_free: + kfree(anode->interface_hdr); + kfree(anode->spec_pointer); + kfree(anode->processor_spec_pointer); + kfree(anode); + return ERR_PTR(ret); +} + +static int dt_aest_init_one_node(struct device_node *np, + struct dt_aest_priv *priv) +{ + int node_type; + struct aest_hnode *ahnode; + struct acpi_aest_node *anode; + + node_type = dt_aest_node_type(np); + if (node_type < 0) { + pr_warn("unknown node type for %pOF, skipping\n", np); + return 0; + } + + ahnode = dt_aest_alloc_hnode(node_type, priv->node_id); + if (!ahnode) + return -ENOMEM; + + anode = dt_aest_alloc_anode(np, node_type); + if (IS_ERR(anode)) { + kfree(ahnode); + return PTR_ERR(anode); + } + + list_add_tail(&anode->list, &ahnode->list); + ahnode->count = 1; + + if (xa_err(xa_store(&priv->aest_array, priv->node_id, + ahnode, GFP_KERNEL))) { + kfree(anode); + kfree(ahnode); + return -ENOMEM; + } + priv->node_id++; + return 0; +} + +static int dt_aest_init_nodes(struct device_node *aest_root, + struct dt_aest_priv *priv) +{ + struct device_node *np; + int ret; + + for_each_available_child_of_node(aest_root, np) { + ret = dt_aest_init_one_node(np, priv); + if (ret) { + pr_err("failed to init node %pOF: %d\n", np, ret); + of_node_put(np); + return ret; + } + } + return 0; +} + +static struct platform_device *dt_aest_alloc_pdev(struct aest_hnode *ahnode, + int index) +{ + struct platform_device *pdev; + struct resource *res; + struct acpi_aest_node *anode; + int ret, size, j; + int irq[AEST_MAX_INTERRUPT_PER_NODE] = { 0 }; + + pdev = platform_device_alloc("AEST", index); + if (!pdev) + return ERR_PTR(-ENOMEM); + + res = kcalloc(ahnode->count + AEST_MAX_INTERRUPT_PER_NODE, + sizeof(*res), GFP_KERNEL); + if (!res) { + platform_device_put(pdev); + return ERR_PTR(-ENOMEM); + } + + j = 0; + list_for_each_entry(anode, &ahnode->list, list) { + if (anode->interface_hdr->type != + ACPI_AEST_NODE_SYSTEM_REGISTER) { + res[j].name = AEST_NODE_NAME; + res[j].start = anode->interface_hdr->address; + + switch (anode->interface_hdr->group_format) { + case ACPI_AEST_NODE_GROUP_FORMAT_4K: + size = 4 * KB; break; + case ACPI_AEST_NODE_GROUP_FORMAT_16K: + size = 16 * KB; break; + case ACPI_AEST_NODE_GROUP_FORMAT_64K: + size = 64 * KB; break; + default: + size = 4 * KB; + } + res[j].end = res[j].start + size - 1; + res[j].flags = IORESOURCE_MEM; + j++; + } + + if (anode->interrupt && anode->interrupt_count > 0) { + int k; + + for (k = 0; k < anode->interrupt_count && + k < AEST_MAX_INTERRUPT_PER_NODE; k++) { + + struct acpi_aest_node_interrupt_v2 *intr = + &anode->interrupt[k]; + int itype = intr->type; + int virq = intr->gsiv; + struct irq_data *irqd; + + if (!virq) + continue; + if (itype >= AEST_MAX_INTERRUPT_PER_NODE) + continue; + if (irq[itype] == virq) + continue; + irq[itype] = virq; + /* + * aest_config_irq() writes intr->gsiv directly + * to the hardware IRQ-config register, so it + * must hold the GIC hardware SPI number, not the + * Linux virtual IRQ. Convert here now that we + * have the virq in hand; the resource still gets + * the virq so devm_request_irq() works correctly. + */ + irqd = irq_get_irq_data(virq); + if (irqd) + intr->gsiv = irqd->hwirq; + + res[j].name = (itype == ACPI_AEST_NODE_FAULT_HANDLING) + ? AEST_FHI_NAME : AEST_ERI_NAME; + res[j].start = virq; + res[j].end = virq; + res[j].flags = IORESOURCE_IRQ; + j++; + } + } + } + + ret = platform_device_add_resources(pdev, res, j); + kfree(res); + if (ret) { + platform_device_put(pdev); + return ERR_PTR(ret); + } + + ret = platform_device_add_data(pdev, &ahnode, sizeof(ahnode)); + if (ret) { + platform_device_put(pdev); + return ERR_PTR(ret); + } + + ret = platform_device_add(pdev); + if (ret) { + platform_device_put(pdev); + return ERR_PTR(ret); + } + + return pdev; +} + +static int dt_aest_alloc_pdevs(struct dt_aest_priv *priv) +{ + struct aest_hnode *ahnode; + unsigned long i; + int ret = 0, index = 0; + + xa_for_each(&priv->aest_array, i, ahnode) { + struct platform_device *pdev = + dt_aest_alloc_pdev(ahnode, index++); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + pr_err("failed to alloc pdev for node %u: %d\n", + ahnode->id, ret); + break; + } + } + return ret; +} + +static int __init dt_aest_init(void) +{ + struct device_node *aest_root; + struct dt_aest_priv priv = {}; + int ret; + + if (!acpi_disabled) + return 0; + + aest_root = of_find_compatible_node(NULL, NULL, "arm,aest"); + if (!aest_root) + return 0; + + xa_init(&priv.aest_array); + + ret = dt_aest_init_nodes(aest_root, &priv); + of_node_put(aest_root); + if (ret) { + pr_err("failed to init AEST nodes: %d\n", ret); + return ret; + } + + ret = dt_aest_alloc_pdevs(&priv); + if (ret) { + pr_err("failed to alloc AEST pdevs: %d\n", ret); + return ret; + } + + pr_info("registered %u AEST error source(s) from DT\n", priv.node_id); + + return 0; +} +subsys_initcall_sync(dt_aest_init); diff --git a/drivers/ras/aest/aest-sysfs.c b/drivers/ras/aest/aest-sysfs.c new file mode 100644 index 0000000000000..b36190bb3b3e4 --- /dev/null +++ b/drivers/ras/aest/aest-sysfs.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM Error Source Table Support + * + * Copyright (c) 2025, Alibaba Group. + */ + +#include "aest.h" + +static void +aest_store_threshold(struct aest_record *record, void *data) +{ + u64 err_misc0, *threshold = data; + struct ce_threshold *ce = &record->ce; + + if (*threshold > ce->info->max_count) + return; + + ce->threshold = *threshold; + ce->count = ce->info->max_count - ce->threshold + 1; + + err_misc0 = record_read(record, ERXMISC0); + ce->reg_val = (err_misc0 & ~ce->info->mask) | + (ce->count << ce->info->shift); + + record_write(record, ERXMISC0, ce->reg_val); +} + +static void +aest_error_count(struct aest_record *record, void *data) +{ + struct record_count *count = data; + + count->ce += record->count.ce; + count->de += record->count.de; + count->uc += record->count.uc; + count->ueu += record->count.ueu; + count->uer += record->count.uer; + count->ueo += record->count.ueo; +} + +/******************************************************************************* + * + * Debugfs for AEST node + * + ******************************************************************************/ + +static int aest_node_err_count_show(struct seq_file *m, void *data) +{ + struct aest_node *node = m->private; + struct record_count count = { 0 }; + int i; + + for (i = 0; i < node->record_count; i++) + if (!test_bit(i, node->record_implemented)) + aest_error_count(&node->records[i], &count); + + seq_printf(m, "CE: %llu\n" + "DE: %llu\n" + "UC: %llu\n" + "UEU: %llu\n" + "UEO: %llu\n" + "UER: %llu\n", + count.ce, count.de, count.uc, count.ueu, + count.uer, count.ueo); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(aest_node_err_count); + +/******************************************************************************* + * + * Attribute for AEST record + * + ******************************************************************************/ + +#define DEFINE_AEST_DEBUGFS_ATTR(name, offset) \ +static int name##_get(void *data, u64 *val) \ +{ \ + struct aest_record *record = data; \ + *val = record_read(record, offset); \ + return 0; \ +} \ +static int name##_set(void *data, u64 val) \ +{ \ + struct aest_record *record = data; \ + record_write(record, offset, val); \ + return 0; \ +} \ +DEFINE_DEBUGFS_ATTRIBUTE(name##_ops, name##_get, name##_set, "%#llx\n") + +DEFINE_AEST_DEBUGFS_ATTR(err_fr, ERXFR); +DEFINE_AEST_DEBUGFS_ATTR(err_ctrl, ERXCTLR); +DEFINE_AEST_DEBUGFS_ATTR(err_status, ERXSTATUS); +DEFINE_AEST_DEBUGFS_ATTR(err_addr, ERXADDR); +DEFINE_AEST_DEBUGFS_ATTR(err_misc0, ERXMISC0); +DEFINE_AEST_DEBUGFS_ATTR(err_misc1, ERXMISC1); +DEFINE_AEST_DEBUGFS_ATTR(err_misc2, ERXMISC2); +DEFINE_AEST_DEBUGFS_ATTR(err_misc3, ERXMISC3); + +static int record_ce_threshold_get(void *data, u64 *val) +{ + struct aest_record *record = data; + + *val = record->ce.threshold; + return 0; +} + +static int record_ce_threshold_set(void *data, u64 val) +{ + u64 threshold = val; + struct aest_record *record = data; + + aest_store_threshold(record, &threshold); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(record_ce_threshold_ops, record_ce_threshold_get, + record_ce_threshold_set, "%llu\n"); + +static int aest_record_err_count_show(struct seq_file *m, void *data) +{ + struct aest_record *record = m->private; + struct record_count count = { 0 }; + + aest_error_count(record, &count); + + seq_printf(m, "CE: %llu\n" + "DE: %llu\n" + "UC: %llu\n" + "UEU: %llu\n" + "UEO: %llu\n" + "UER: %llu\n", + count.ce, count.de, count.uc, count.ueu, + count.uer, count.ueo); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(aest_record_err_count); + +static void aest_record_init_debugfs(struct aest_record *record) +{ + debugfs_create_file("err_fr", 0600, record->debugfs, record, + &err_fr_ops); + debugfs_create_file("err_ctrl", 0600, record->debugfs, record, + &err_ctrl_ops); + debugfs_create_file("err_status", 0600, record->debugfs, record, + &err_status_ops); + debugfs_create_file("err_addr", 0600, record->debugfs, record, + &err_addr_ops); + debugfs_create_file("err_misc0", 0600, record->debugfs, record, + &err_misc0_ops); + debugfs_create_file("err_misc1", 0600, record->debugfs, record, + &err_misc1_ops); + debugfs_create_file("err_misc2", 0600, record->debugfs, record, + &err_misc2_ops); + debugfs_create_file("err_misc3", 0600, record->debugfs, record, + &err_misc3_ops); + debugfs_create_file("err_count", 0400, record->debugfs, record, + &aest_record_err_count_fops); + debugfs_create_file("ce_threshold", 0600, record->debugfs, record, + &record_ce_threshold_ops); + aest_inject_init_debugfs(record); +} + +static void +aest_node_init_debugfs(struct aest_node *node) +{ + int i; + struct aest_record *record; + + debugfs_create_file("err_count", 0400, node->debugfs, node, + &aest_node_err_count_fops); + + for (i = 0; i < node->record_count; i++) { + record = &node->records[i]; + if (!record->name) + continue; + /* Skip records not implemented on this node. */ + if (test_bit(i, node->record_implemented)) + continue; + record->debugfs = debugfs_create_dir(record->name, + node->debugfs); + aest_record_init_debugfs(record); + } +} + +static void +aest_oncore_dev_init_debugfs(struct aest_device *adev) +{ + int cpu, i; + struct aest_node *node; + struct aest_device *percpu_dev; + char name[16]; + + for_each_possible_cpu(cpu) { + percpu_dev = per_cpu_ptr(adev->adev_oncore, cpu); + + snprintf(name, sizeof(name), "processor%u", cpu); + percpu_dev->debugfs = debugfs_create_dir(name, adev->debugfs); + + for (i = 0; i < adev->node_cnt; i++) { + node = &percpu_dev->nodes[i]; + + /* + * Use adev->nodes[i].name (the original) rather than + * node->name from the per-CPU copy. The per-CPU copy + * receives node->name via shallow memcpy in __setup_ppi; + * the original is the authoritative, guaranteed-valid + * string. + */ + node->debugfs = debugfs_create_dir(adev->nodes[i].name, + percpu_dev->debugfs); + aest_node_init_debugfs(node); + } + } +} + +void aest_dev_init_debugfs(struct aest_device *adev) +{ + int i; + struct aest_node *node; + + if (!aest_debugfs) + dev_err(adev->dev, "debugfs not enabled\n"); + + adev->debugfs = debugfs_create_dir(dev_name(adev->dev), aest_debugfs); + if (aest_dev_is_oncore(adev)) { + aest_oncore_dev_init_debugfs(adev); + return; + } + + for (i = 0; i < adev->node_cnt; i++) { + node = &adev->nodes[i]; + if (!node->name) + continue; + node->debugfs = debugfs_create_dir(node->name, adev->debugfs); + aest_node_init_debugfs(node); + } +} diff --git a/drivers/ras/aest/aest.h b/drivers/ras/aest/aest.h new file mode 100644 index 0000000000000..9704af97fee89 --- /dev/null +++ b/drivers/ras/aest/aest.h @@ -0,0 +1,423 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ARM Error Source Table Support + * + * Copyright (c) 2025, Alibaba Group. + */ + +#include +#include +#include +#include + +#define MAX_GSI_PER_NODE 2 +#define DEFAULT_CE_THRESHOLD 1 + +#define record_read(record, offset) \ + record->access->read(record->regs_base, offset) +#define record_write(record, offset, val) \ + record->access->write(record->regs_base, offset, val) + +#define aest_dev_err(__adev, format, ...) \ + dev_err((__adev)->dev, format, ##__VA_ARGS__) +#define aest_dev_info(__adev, format, ...) \ + dev_info((__adev)->dev, format, ##__VA_ARGS__) +#define aest_dev_dbg(__adev, format, ...) \ + dev_dbg((__adev)->dev, format, ##__VA_ARGS__) + +#define aest_node_err(__node, format, ...) \ + dev_err((__node)->adev->dev, "%s: " format, (__node)->name, \ + ##__VA_ARGS__) +#define aest_node_info(__node, format, ...) \ + dev_info((__node)->adev->dev, "%s: " format, (__node)->name, \ + ##__VA_ARGS__) +#define aest_node_dbg(__node, format, ...) \ + dev_dbg((__node)->adev->dev, "%s: " format, (__node)->name, \ + ##__VA_ARGS__) + +#define aest_record_err(__record, format, ...) \ + dev_err((__record)->node->adev->dev, "%s: %s: " format, \ + (__record)->node->name, (__record)->name, ##__VA_ARGS__) +#define aest_record_info(__record, format, ...) \ + dev_info((__record)->node->adev->dev, "%s: %s: " format, \ + (__record)->node->name, (__record)->name, ##__VA_ARGS__) +#define aest_record_dbg(__record, format, ...) \ + dev_dbg((__record)->node->adev->dev, "%s: %s: " format, \ + (__record)->node->name, (__record)->name, ##__VA_ARGS__) + +#define ERXGROUP_4K_OFFSET 0xE00 +#define ERXGROUP_16K_OFFSET 0x3800 +#define ERXGROUP_64K_OFFSET 0xE000 +#define ERXGROUP_4K_SIZE (4 * KB) +#define ERXGROUP_16K_SIZE (16 * KB) +#define ERXGROUP_64K_SIZE (64 * KB) +#define ERXGROUP_4K_ERRGSR_NUM 1 +#define ERXGROUP_16K_ERRGSR_NUM 4 +#define ERXGROUP_64K_ERRGSR_NUM 14 + +#define ERXFR 0x0 +#define ERXCTLR 0x8 +#define ERXSTATUS 0x10 +#define ERXADDR 0x18 +#define ERXMISC0 0x20 +#define ERXMISC1 0x28 +#define ERXMISC2 0x30 +#define ERXMISC3 0x38 +#define ERXPFGF 0x800 +#define ERXPFGCTL 0x808 +#define ERXPFGCDN 0x810 + +#define GIC_ERRDEVARCH 0xFFBC + +extern struct dentry *aest_debugfs; + +struct aest_event { + struct llist_node llnode; + char *node_name; + u32 type; + /* + * Different nodes have different meanings: + * - Processor node : processor number. + * - Memory node : SRAT proximity domain. + * - SMMU node : IORT proximity domain. + * - GIC node : interface type. + */ + u32 id0; + /* + * Different nodes have different meanings: + * - Processor node : processor resource type. + * - Memory node : Non. + * - SMMU node : subcomponent reference. + * - Vendor node : Unique ID. + * - GIC node : instance identifier. + */ + u32 id1; + /* Vendor node : hardware ID. */ + char *hid; + u32 index; + /* Processor node: ACPI_AEST_PROC_FLAG_* bitmask (SHARED/GLOBAL) */ + u8 proc_flags; + u64 ce_threshold; + int addressing_mode; + struct ras_ext_regs regs; + + /* + * This field is used to store vendor specific data for decoding error + * record by EDAC driver. + */ + void *vendor_data; + size_t vendor_data_size; +}; + +struct aest_access { + u64 (*read)(void *base, u32 offset); + void (*write)(void *base, u32 offset, u64 val); +}; + +struct ce_threshold_info { + const u64 max_count; + const u64 mask; + const u64 shift; +}; + +struct ce_threshold { + const struct ce_threshold_info *info; + u64 count; + u64 threshold; + u64 reg_val; +}; + +struct record_count { + u64 ce; + u64 de; + u64 uc; + u64 uer; + u64 ueo; + u64 ueu; +}; + +struct aest_record { + char *name; + int index; + void __iomem *regs_base; + + /* + * This bit specifies the addressing mode to populate the ERR_ADDR + * register: + * 0b: Error record reports System Physical Addresses (SPA) in + * the ERR_ADDR register. + * 1b: Error record reports error node-specific Logical Addresses(LA) + * in the ERR_ADD register. OS must use other means to translate + * the reported LA into SPA + */ + int addressing_mode; + struct aest_node *node; + const struct aest_access *access; + + struct ce_threshold ce; + enum ras_ce_threshold threshold_type; + struct record_count count; + struct dentry *debugfs; + + void *vendor_data; + size_t vendor_data_size; +}; + +struct aest_group { + int type; + int errgsr_num; + size_t size; + u64 errgsr_offset; +}; + +static const struct aest_group aest_group_config[] = { + [ACPI_AEST_NODE_GROUP_FORMAT_4K] = { + .type = ACPI_AEST_NODE_GROUP_FORMAT_4K, + .errgsr_num = ERXGROUP_4K_ERRGSR_NUM, + .size = ERXGROUP_4K_SIZE, + .errgsr_offset = ERXGROUP_4K_OFFSET, + }, + [ACPI_AEST_NODE_GROUP_FORMAT_16K] = { + .type = ACPI_AEST_NODE_GROUP_FORMAT_16K, + .errgsr_num = ERXGROUP_16K_ERRGSR_NUM, + .size = ERXGROUP_16K_SIZE, + .errgsr_offset = ERXGROUP_16K_OFFSET, + }, + [ACPI_AEST_NODE_GROUP_FORMAT_64K] = { + .type = ACPI_AEST_NODE_GROUP_FORMAT_64K, + .errgsr_num = ERXGROUP_64K_ERRGSR_NUM, + .size = ERXGROUP_64K_SIZE, + .errgsr_offset = ERXGROUP_64K_OFFSET, + }, +}; + +struct aest_node { + char *name; + u8 type; + void *errgsr; + void *base; + void *inj; + void *irq_config; + + /* + * This bitmap indicates which of the error records within this error + * node must be polled for error status. + * Bit[n] of this field pertains to error record corresponding to + * index n in this error group. + * Bit[n] = 0b: Error record at index n needs to be polled. + * Bit[n] = 1b: Error record at index n do not needs to be polled. + */ + unsigned long *record_implemented; + /* + * This bitmap indicates which of the error records within this error + * node support error status reporting using ERRGSR register. + * Bit[n] of this field pertains to error record corresponding to + * index n in this error group. + * Bit[n] = 0b: Error record at index n supports error status reporting + * through ERRGSR.S. + * Bit[n] = 1b: Error record at index n does not support error reporting + * through the ERRGSR.S bit If this error record is + * implemented, then it must be polled explicitly for + * error events. + */ + unsigned long *status_reporting; + int version; + /* + * Usually bit[n] in errgsr indicates [n]th error record within this + * error node report error. But some compoent may have different rules. + * For example, CMN700 TRM 4.3.5.12 say: + * ``` Error occurs when the index is even and Fault + * occurs when the index is odd. ``` + * Bit[n]: record[n] report ERROR. + * Bit[n + 1]: record[n] report FAULT. + * errgsr_mapping function is used to map errgsr bit to record index + * for various components. + */ + int (*errgsr_mapping)(int errgsr_bit); + int errgsr_num; + + const struct aest_group *group; + struct aest_device *adev; + struct acpi_aest_node *info; + + int record_count; + struct aest_record *records; + struct dentry *debugfs; + struct aest_node __percpu *oncore_node; +}; + +struct aest_device { + struct device *dev; + u32 type; + int node_cnt; + struct aest_node *nodes; + u32 id; + int irq[MAX_GSI_PER_NODE]; + + struct work_struct aest_work; + struct gen_pool *pool; + struct llist_head event_list; + struct dentry *debugfs; + struct aest_device __percpu *adev_oncore; +}; + +static const char *const aest_node_name[] = { + [ACPI_AEST_PROCESSOR_ERROR_NODE] = "processor", + [ACPI_AEST_MEMORY_ERROR_NODE] = "memory", + [ACPI_AEST_SMMU_ERROR_NODE] = "smmu", + [ACPI_AEST_VENDOR_ERROR_NODE] = "vendor", + [ACPI_AEST_GIC_ERROR_NODE] = "gic", + [ACPI_AEST_PCIE_ERROR_NODE] = "pcie", + [ACPI_AEST_PROXY_ERROR_NODE] = "proxy", +}; + +struct aest_vendor_match { + char hid[ACPI_ID_LEN]; + int (*probe)(struct aest_device *adev, struct aest_hnode *anode); +}; + +static inline int aest_set_name(struct aest_device *adev, + struct aest_hnode *ahnode) +{ + adev->dev->init_name = devm_kasprintf(adev->dev, GFP_KERNEL, "%s%d", + aest_node_name[ahnode->type], + adev->id); + if (!adev->dev->init_name) + return -ENOMEM; + + return 0; +} + +#define CASE_READ(res, x) \ + case (x): { \ + res = read_sysreg_s(SYS_##x##_EL1); \ + break; \ + } + +#define CASE_WRITE(val, x) \ + case (x): { \ + write_sysreg_s((val), SYS_##x##_EL1); \ + break; \ + } + +static inline u64 aest_sysreg_read(void *__unused, u32 offset) +{ + u64 res; + + switch (offset) { + CASE_READ(res, ERXFR) + CASE_READ(res, ERXCTLR) + CASE_READ(res, ERXSTATUS) + CASE_READ(res, ERXADDR) + CASE_READ(res, ERXMISC0) + CASE_READ(res, ERXMISC1) + CASE_READ(res, ERXMISC2) + CASE_READ(res, ERXMISC3) + CASE_READ(res, ERXPFGF) + CASE_READ(res, ERXPFGCTL) + CASE_READ(res, ERXPFGCDN) + default : + res = 0; + } + return res; +} + +static inline void aest_sysreg_write(void *base, u32 offset, u64 val) +{ + switch (offset) { + CASE_WRITE(val, ERXFR) + CASE_WRITE(val, ERXCTLR) + CASE_WRITE(val, ERXSTATUS) + CASE_WRITE(val, ERXADDR) + CASE_WRITE(val, ERXMISC0) + CASE_WRITE(val, ERXMISC1) + CASE_WRITE(val, ERXMISC2) + CASE_WRITE(val, ERXMISC3) + CASE_WRITE(val, ERXPFGF) + CASE_WRITE(val, ERXPFGCTL) + CASE_WRITE(val, ERXPFGCDN) + default : + return; + } +} + +static inline u64 aest_iomem_read(void *base, u32 offset) +{ + return readq_relaxed(base + offset); +} + +static inline void aest_iomem_write(void *base, u32 offset, u64 val) +{ + writeq_relaxed(val, base + offset); +} + +/* access type is decided by AEST interface type. */ +static const struct aest_access aest_access[] = { + [ACPI_AEST_NODE_SYSTEM_REGISTER] = { + .read = aest_sysreg_read, + .write = aest_sysreg_write, + }, + [ACPI_AEST_NODE_MEMORY_MAPPED] = { + .read = aest_iomem_read, + .write = aest_iomem_write, + }, + [ACPI_AEST_NODE_SINGLE_RECORD_MEMORY_MAPPED] = { + .read = aest_iomem_read, + .write = aest_iomem_write, + }, + { } +}; + +/* + * Each PE may has multi error record, you must selects an error + * record to be accessed through the Error Record System + * registers. + */ +static inline void aest_select_record(struct aest_node *node, int index) +{ + if (node->type == ACPI_AEST_PROCESSOR_ERROR_NODE) { + write_sysreg_s(index, SYS_ERRSELR_EL1); + isb(); + } +} + +/* Ensure all writes has taken effect. */ +static inline void aest_sync(struct aest_node *node) +{ + if (node->type == ACPI_AEST_PROCESSOR_ERROR_NODE) + isb(); +} + +static inline bool aest_dev_is_oncore(struct aest_device *adev) +{ + /* + * A processor node is "on-core" (uses PPI + cpuhp) only when its + * interrupt is a per-CPU PPI. A shared processor node (e.g. cluster + * L3 cache, DSU) uses an SPI and must follow the non-oncore path + * (aest_online_dev) so that aest_config_irq and aest_online_dev are + * called instead of cpuhp_setup_state. + */ + if (adev->type != ACPI_AEST_PROCESSOR_ERROR_NODE) + return false; + return irq_is_percpu(adev->irq[ACPI_AEST_NODE_FAULT_HANDLING]) || + irq_is_percpu(adev->irq[ACPI_AEST_NODE_ERROR_RECOVERY]); +} + +static inline int default_errgsr_mapping(int errgsr_bit) +{ + return errgsr_bit; +} + +static inline int cmn700_errgsr_mapping(int errgsr_bit) +{ + return errgsr_bit / 2; +} + +void aest_dev_init_debugfs(struct aest_device *adev); +void aest_inject_init_debugfs(struct aest_record *record); +void aest_proc_record(struct aest_record *record, void *data, bool fake); +void aest_node_foreach_record(void (*func)(struct aest_record *, void *, bool), + struct aest_node *node, void *data, + unsigned long *bitmap); + +int aest_cmn700_probe(struct aest_device *adev, struct aest_hnode *ahnode); diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 03df3db623346..c8858b745021c 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -115,6 +115,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event); EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event); +#ifdef CONFIG_ARM64_RAS_EXTN +EXPORT_TRACEPOINT_SYMBOL_GPL(arm_ras_ext_event); +#endif static int __init parse_ras_param(char *str) { diff --git a/include/dt-bindings/arm/aest.h b/include/dt-bindings/arm/aest.h new file mode 100644 index 0000000000000..43679314e98e8 --- /dev/null +++ b/include/dt-bindings/arm/aest.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * This header provides constants for the Arm Error Source Table (AEST) + * DT binding (Documentation/devicetree/bindings/arm/arm,aest.yaml). + */ + +#ifndef _DT_BINDINGS_ARM_AEST_H +#define _DT_BINDINGS_ARM_AEST_H + +/* arm,interface-flags - AEST node interface flags field */ +#define AEST_XFACE_SHARED 1 +#define AEST_XFACE_CLEAR_MISC 2 +#define AEST_XFACE_ERROR_DEVICE 4 +#define AEST_XFACE_AFFINITY 8 +#define AEST_XFACE_ERROR_GROUP 16 +#define AEST_XFACE_FAULT_INJECT 32 +#define AEST_XFACE_INT_CONFIG 64 + +/* arm,fhi-flags / arm,eri-flags - AEST node interrupt flags field */ +#define AEST_IRQ_MODE_LEVEL 0 +#define AEST_IRQ_MODE_EDGE 1 + +/* arm,processor-flags - AEST processor node flags field */ +#define AEST_PROC_GLOBAL 1 +#define AEST_PROC_SHARED 2 + +/* arm,group-format - error record group register window page size */ +#define AEST_GROUP_FORMAT_4K 0 +#define AEST_GROUP_FORMAT_16K 1 +#define AEST_GROUP_FORMAT_64K 2 + +/* arm,resource-type - processor resource type */ +#define AEST_RESOURCE_CACHE 0 +#define AEST_RESOURCE_TLB 1 +#define AEST_RESOURCE_GENERIC 2 + +/* arm,gic-type - GIC component type */ +#define AEST_GIC_CPU 0 +#define AEST_GIC_DISTRIBUTOR 1 +#define AEST_GIC_REDISTRIBUTOR 2 +#define AEST_GIC_ITS 3 + +#endif /* _DT_BINDINGS_ARM_AEST_H */ diff --git a/include/linux/acpi_aest.h b/include/linux/acpi_aest.h new file mode 100644 index 0000000000000..3a899f57f92fb --- /dev/null +++ b/include/linux/acpi_aest.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ACPI_AEST_H__ +#define __ACPI_AEST_H__ + +#include +#include + +/* AEST resource name */ +#define AEST_NODE_NAME "AEST:NODE" +#define AEST_FHI_NAME "AEST:FHI" +#define AEST_ERI_NAME "AEST:ERI" + +/* AEST component */ +#define ACPI_AEST_PROC_FLAG_GLOBAL (1<<0) +#define ACPI_AEST_PROC_FLAG_SHARED (1<<1) + +#define AEST_ADDREESS_SPA 0 +#define AEST_ADDREESS_LA 1 + +/* AEST interrupt */ +#define AEST_INTERRUPT_MODE BIT(0) + +#define AEST_INTERRUPT_FHI_UE_SUPPORT BIT(0) +#define AEST_INTERRUPT_FHI_UE_NO_SUPPORT BIT(1) + +#define AEST_MAX_INTERRUPT_PER_NODE 2 + +/* AEST interface */ +#define AEST_XFACE_FLAG_SHARED (1 << 0) +#define AEST_XFACE_FLAG_CLEAR_MISC (1 << 1) +#define AEST_XFACE_FLAG_ERROR_DEVICE (1 << 2) +#define AEST_XFACE_FLAG_AFFINITY (1 << 3) +#define AEST_XFACE_FLAG_ERROR_GROUP (1 << 4) +#define AEST_XFACE_FLAG_FAULT_INJECT (1 << 5) +#define AEST_XFACE_FLAG_INT_CONFIG (1 << 6) + +#define KB 1024 +#define MB (1024 * KB) +#define GB (1024 * MB) + +struct aest_hnode { + struct list_head list; + int count; + u32 id; + int type; +}; + +struct acpi_aest_node { + struct list_head list; + int type; + struct acpi_aest_node_interface_header *interface_hdr; + unsigned long *record_implemented; + unsigned long *status_reporting; + unsigned long *addressing_mode; + struct acpi_aest_node_interface_common *common; + union { + struct acpi_aest_processor *processor; + struct acpi_aest_memory *memory; + struct acpi_aest_smmu *smmu; + struct acpi_aest_vendor_v2 *vendor; + struct acpi_aest_gic *gic; + struct acpi_aest_pcie *pcie; + struct acpi_aest_proxy *proxy; + void *spec_pointer; + }; + union { + struct acpi_aest_processor_cache *cache; + struct acpi_aest_processor_tlb *tlb; + struct acpi_aest_processor_generic *generic; + void *processor_spec_pointer; + }; + struct acpi_aest_node_interrupt_v2 *interrupt; + int interrupt_count; +}; +#endif /* __ACPI_AEST_H__ */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 22ba327ec2278..e7b553241b305 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -178,6 +178,7 @@ enum cpuhp_state { CPUHP_AP_HYPERV_TIMER_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, + CPUHP_AP_ARM_AEST_STARTING, CPUHP_AP_ARM_XEN_STARTING, CPUHP_AP_ARM_XEN_RUNSTATE_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, diff --git a/include/linux/ras.h b/include/linux/ras.h index 468941bfe855f..05096f049dacb 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -63,4 +63,12 @@ amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #define GET_LOGICAL_INDEX(mpidr) -EINVAL #endif /* CONFIG_ARM || CONFIG_ARM64 */ +#if IS_ENABLED(CONFIG_AEST) +void aest_register_decode_chain(struct notifier_block *nb); +void aest_unregister_decode_chain(struct notifier_block *nb); +#else +static inline void aest_register_decode_chain(struct notifier_block *nb) {} +static inline void aest_unregister_decode_chain(struct notifier_block *nb) {} +#endif /* CONFIG_AEST */ + #endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index fdb785fa4613a..c4063f7ad7342 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -381,6 +381,77 @@ TRACE_EVENT(aer_event, "Not available") ); #endif /* CONFIG_PCIEAER */ + +/* + * ARM RAS Extension Events Report + * + * This event is generated when an error reported by the ARM RAS extension + * hardware is detected. + */ + +#ifdef CONFIG_ARM64_RAS_EXTN +#include +TRACE_EVENT(arm_ras_ext_event, + + TP_PROTO(const u8 type, + const u32 id0, + const u32 id1, + const u32 index, + char *hid, + struct ras_ext_regs *regs, + const u8 *data, + const u32 len), + + TP_ARGS(type, id0, id1, index, hid, regs, data, len), + + TP_STRUCT__entry( + __field(u8, type) + __field(u32, id0) + __field(u32, id1) + __field(u32, index) + __field(char *, hid) + __field(u64, err_fr) + __field(u64, err_ctlr) + __field(u64, err_status) + __field(u64, err_addr) + __field(u64, err_misc0) + __field(u64, err_misc1) + __field(u64, err_misc2) + __field(u64, err_misc3) + __field(u32, len) + __dynamic_array(u8, buf, len) + ), + + TP_fast_assign( + __entry->type = type; + __entry->id0 = id0; + __entry->id1 = id1; + __entry->index = index; + __entry->hid = hid; + __entry->err_fr = regs->err_fr; + __entry->err_ctlr = regs->err_ctlr; + __entry->err_status = regs->err_status; + __entry->err_addr = regs->err_addr; + __entry->err_misc0 = regs->err_misc[0]; + __entry->err_misc1 = regs->err_misc[1]; + __entry->err_misc2 = regs->err_misc[2]; + __entry->err_misc3 = regs->err_misc[3]; + __entry->len = len; + memcpy(__get_dynamic_array(buf), data, len); + ), + + TP_printk("type: %d; id0: %d; id1: %d; index: %d; hid: %s; " + "ERR_FR: %llx; ERR_CTLR: %llx; ERR_STATUS: %llx; " + "ERR_ADDR: %llx; ERR_MISC0: %llx; ERR_MISC1: %llx; " + "ERR_MISC2: %llx; ERR_MISC3: %llx; data len:%d; raw data:%s", + __entry->type, __entry->id0, __entry->id1, __entry->index, + __entry->hid, __entry->err_fr, __entry->err_ctlr, + __entry->err_status, __entry->err_addr, __entry->err_misc0, + __entry->err_misc1, __entry->err_misc2, __entry->err_misc3, + __entry->len, + __print_hex(__get_dynamic_array(buf), __entry->len)) +); +#endif /* CONFIG_ARM64_RAS_EXTN */ #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */