Commit | Line | Data |
---|---|---|
82413e56 SKR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Bluefield-specific EDAC driver. | |
4 | * | |
5 | * Copyright (c) 2019 Mellanox Technologies. | |
6 | */ | |
7 | ||
8 | #include <linux/acpi.h> | |
9 | #include <linux/arm-smccc.h> | |
10 | #include <linux/bitfield.h> | |
11 | #include <linux/edac.h> | |
12 | #include <linux/io.h> | |
13 | #include <linux/module.h> | |
14 | #include <linux/platform_device.h> | |
15 | ||
16 | #include "edac_module.h" | |
17 | ||
18 | #define DRIVER_NAME "bluefield-edac" | |
19 | ||
20 | /* | |
21 | * Mellanox BlueField EMI (External Memory Interface) register definitions. | |
22 | */ | |
23 | ||
24 | #define MLXBF_ECC_CNT 0x340 | |
25 | #define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) | |
26 | #define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) | |
27 | ||
28 | #define MLXBF_ECC_ERR 0x348 | |
29 | #define MLXBF_ECC_ERR__SECC BIT(0) | |
30 | #define MLXBF_ECC_ERR__DECC BIT(16) | |
31 | ||
32 | #define MLXBF_ECC_LATCH_SEL 0x354 | |
33 | #define MLXBF_ECC_LATCH_SEL__START BIT(24) | |
34 | ||
35 | #define MLXBF_ERR_ADDR_0 0x358 | |
36 | ||
37 | #define MLXBF_ERR_ADDR_1 0x37c | |
38 | ||
39 | #define MLXBF_SYNDROM 0x35c | |
40 | #define MLXBF_SYNDROM__DERR BIT(0) | |
41 | #define MLXBF_SYNDROM__SERR BIT(1) | |
42 | #define MLXBF_SYNDROM__SYN GENMASK(25, 16) | |
43 | ||
44 | #define MLXBF_ADD_INFO 0x364 | |
45 | #define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) | |
46 | ||
47 | #define MLXBF_EDAC_MAX_DIMM_PER_MC 2 | |
48 | #define MLXBF_EDAC_ERROR_GRAIN 8 | |
49 | ||
50 | /* | |
51 | * Request MLNX_SIP_GET_DIMM_INFO | |
52 | * | |
53 | * Retrieve information about DIMM on a certain slot. | |
54 | * | |
55 | * Call register usage: | |
56 | * a0: MLNX_SIP_GET_DIMM_INFO | |
57 | * a1: (Memory controller index) << 16 | (Dimm index in memory controller) | |
58 | * a2-7: not used. | |
59 | * | |
60 | * Return status: | |
61 | * a0: MLXBF_DIMM_INFO defined below describing the DIMM. | |
62 | * a1-3: not used. | |
63 | */ | |
64 | #define MLNX_SIP_GET_DIMM_INFO 0x82000008 | |
65 | ||
66 | /* Format for the SMC response about the memory information */ | |
67 | #define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) | |
68 | #define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) | |
69 | #define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) | |
70 | #define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) | |
71 | #define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) | |
72 | #define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) | |
73 | ||
74 | struct bluefield_edac_priv { | |
75 | int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; | |
76 | void __iomem *emi_base; | |
77 | int dimm_per_mc; | |
78 | }; | |
79 | ||
80 | static u64 smc_call1(u64 smc_op, u64 smc_arg) | |
81 | { | |
82 | struct arm_smccc_res res; | |
83 | ||
84 | arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); | |
85 | ||
86 | return res.a0; | |
87 | } | |
88 | ||
89 | /* | |
90 | * Gather the ECC information from the External Memory Interface registers | |
91 | * and report it to the edac handler. | |
92 | */ | |
93 | static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, | |
94 | int error_cnt, | |
95 | int is_single_ecc) | |
96 | { | |
97 | struct bluefield_edac_priv *priv = mci->pvt_info; | |
98 | u32 dram_additional_info, err_prank, edea0, edea1; | |
99 | u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; | |
100 | enum hw_event_mc_err_type ecc_type; | |
101 | u64 ecc_dimm_addr; | |
102 | int ecc_dimm; | |
103 | ||
104 | ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : | |
105 | HW_EVENT_ERR_UNCORRECTED; | |
106 | ||
107 | /* | |
108 | * Tell the External Memory Interface to populate the relevant | |
109 | * registers with information about the last ECC error occurrence. | |
110 | */ | |
111 | ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; | |
112 | writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); | |
113 | ||
114 | /* | |
115 | * Verify that the ECC reported info in the registers is of the | |
116 | * same type as the one asked to report. If not, just report the | |
117 | * error without the detailed information. | |
118 | */ | |
119 | dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); | |
120 | serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); | |
121 | derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); | |
122 | syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); | |
123 | ||
124 | if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { | |
125 | edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, | |
126 | 0, 0, -1, mci->ctl_name, ""); | |
127 | return; | |
128 | } | |
129 | ||
130 | dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); | |
131 | err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); | |
132 | ||
133 | ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; | |
134 | ||
135 | edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); | |
136 | edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); | |
137 | ||
138 | ecc_dimm_addr = ((u64)edea1 << 32) | edea0; | |
139 | ||
140 | edac_mc_handle_error(ecc_type, mci, error_cnt, | |
141 | PFN_DOWN(ecc_dimm_addr), | |
142 | offset_in_page(ecc_dimm_addr), | |
143 | syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); | |
144 | } | |
145 | ||
146 | static void bluefield_edac_check(struct mem_ctl_info *mci) | |
147 | { | |
148 | struct bluefield_edac_priv *priv = mci->pvt_info; | |
149 | u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; | |
150 | ||
151 | /* | |
152 | * The memory controller might not be initialized by the firmware | |
153 | * when there isn't memory, which may lead to bad register readings. | |
154 | */ | |
155 | if (mci->edac_cap == EDAC_FLAG_NONE) | |
156 | return; | |
157 | ||
158 | ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); | |
159 | single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); | |
160 | double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); | |
161 | ||
162 | if (single_error_count) { | |
163 | ecc_error |= MLXBF_ECC_ERR__SECC; | |
164 | ||
165 | bluefield_gather_report_ecc(mci, single_error_count, 1); | |
166 | } | |
167 | ||
168 | if (double_error_count) { | |
169 | ecc_error |= MLXBF_ECC_ERR__DECC; | |
170 | ||
171 | bluefield_gather_report_ecc(mci, double_error_count, 0); | |
172 | } | |
173 | ||
174 | /* Write to clear reported errors. */ | |
175 | if (ecc_count) | |
176 | writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); | |
177 | } | |
178 | ||
179 | /* Initialize the DIMMs information for the given memory controller. */ | |
180 | static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) | |
181 | { | |
182 | struct bluefield_edac_priv *priv = mci->pvt_info; | |
183 | int mem_ctrl_idx = mci->mc_idx; | |
184 | struct dimm_info *dimm; | |
185 | u64 smc_info, smc_arg; | |
186 | int is_empty = 1, i; | |
187 | ||
188 | for (i = 0; i < priv->dimm_per_mc; i++) { | |
189 | dimm = mci->dimms[i]; | |
190 | ||
191 | smc_arg = mem_ctrl_idx << 16 | i; | |
192 | smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); | |
193 | ||
194 | if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { | |
195 | dimm->mtype = MEM_EMPTY; | |
196 | continue; | |
197 | } | |
198 | ||
199 | is_empty = 0; | |
200 | ||
201 | dimm->edac_mode = EDAC_SECDED; | |
202 | ||
203 | if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) | |
204 | dimm->mtype = MEM_NVDIMM; | |
205 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) | |
206 | dimm->mtype = MEM_LRDDR4; | |
207 | else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) | |
208 | dimm->mtype = MEM_RDDR4; | |
209 | else | |
210 | dimm->mtype = MEM_DDR4; | |
211 | ||
212 | dimm->nr_pages = | |
213 | FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * | |
214 | (SZ_1G / PAGE_SIZE); | |
215 | dimm->grain = MLXBF_EDAC_ERROR_GRAIN; | |
216 | ||
217 | /* Mem controller for BlueField only supports x4, x8 and x16 */ | |
218 | switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { | |
219 | case 4: | |
220 | dimm->dtype = DEV_X4; | |
221 | break; | |
222 | case 8: | |
223 | dimm->dtype = DEV_X8; | |
224 | break; | |
225 | case 16: | |
226 | dimm->dtype = DEV_X16; | |
227 | break; | |
228 | default: | |
229 | dimm->dtype = DEV_UNKNOWN; | |
230 | } | |
231 | ||
232 | priv->dimm_ranks[i] = | |
233 | FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); | |
234 | } | |
235 | ||
236 | if (is_empty) | |
237 | mci->edac_cap = EDAC_FLAG_NONE; | |
238 | else | |
239 | mci->edac_cap = EDAC_FLAG_SECDED; | |
240 | } | |
241 | ||
242 | static int bluefield_edac_mc_probe(struct platform_device *pdev) | |
243 | { | |
244 | struct bluefield_edac_priv *priv; | |
245 | struct device *dev = &pdev->dev; | |
246 | struct edac_mc_layer layers[1]; | |
247 | struct mem_ctl_info *mci; | |
248 | struct resource *emi_res; | |
249 | unsigned int mc_idx, dimm_count; | |
250 | int rc, ret; | |
251 | ||
252 | /* Read the MSS (Memory SubSystem) index from ACPI table. */ | |
253 | if (device_property_read_u32(dev, "mss_number", &mc_idx)) { | |
254 | dev_warn(dev, "bf_edac: MSS number unknown\n"); | |
255 | return -EINVAL; | |
256 | } | |
257 | ||
258 | /* Read the DIMMs per MC from ACPI table. */ | |
259 | if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { | |
260 | dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); | |
261 | return -EINVAL; | |
262 | } | |
263 | ||
264 | if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { | |
265 | dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); | |
266 | return -EINVAL; | |
267 | } | |
268 | ||
269 | emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | |
270 | if (!emi_res) | |
271 | return -EINVAL; | |
272 | ||
273 | layers[0].type = EDAC_MC_LAYER_SLOT; | |
274 | layers[0].size = dimm_count; | |
275 | layers[0].is_virt_csrow = true; | |
276 | ||
277 | mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); | |
278 | if (!mci) | |
279 | return -ENOMEM; | |
280 | ||
281 | priv = mci->pvt_info; | |
282 | ||
283 | priv->dimm_per_mc = dimm_count; | |
284 | priv->emi_base = devm_ioremap_resource(dev, emi_res); | |
285 | if (IS_ERR(priv->emi_base)) { | |
286 | dev_err(dev, "failed to map EMI IO resource\n"); | |
287 | ret = PTR_ERR(priv->emi_base); | |
288 | goto err; | |
289 | } | |
290 | ||
291 | mci->pdev = dev; | |
292 | mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | | |
293 | MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; | |
294 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; | |
295 | ||
296 | mci->mod_name = DRIVER_NAME; | |
297 | mci->ctl_name = "BlueField_Memory_Controller"; | |
298 | mci->dev_name = dev_name(dev); | |
299 | mci->edac_check = bluefield_edac_check; | |
300 | ||
301 | /* Initialize mci with the actual populated DIMM information. */ | |
302 | bluefield_edac_init_dimms(mci); | |
303 | ||
304 | platform_set_drvdata(pdev, mci); | |
305 | ||
306 | /* Register with EDAC core */ | |
307 | rc = edac_mc_add_mc(mci); | |
308 | if (rc) { | |
309 | dev_err(dev, "failed to register with EDAC core\n"); | |
310 | ret = rc; | |
311 | goto err; | |
312 | } | |
313 | ||
314 | /* Only POLL mode supported so far. */ | |
315 | edac_op_state = EDAC_OPSTATE_POLL; | |
316 | ||
317 | return 0; | |
318 | ||
319 | err: | |
320 | edac_mc_free(mci); | |
321 | ||
322 | return ret; | |
323 | ||
324 | } | |
325 | ||
a5347591 | 326 | static void bluefield_edac_mc_remove(struct platform_device *pdev) |
82413e56 SKR |
327 | { |
328 | struct mem_ctl_info *mci = platform_get_drvdata(pdev); | |
329 | ||
330 | edac_mc_del_mc(&pdev->dev); | |
331 | edac_mc_free(mci); | |
82413e56 SKR |
332 | } |
333 | ||
334 | static const struct acpi_device_id bluefield_mc_acpi_ids[] = { | |
335 | {"MLNXBF08", 0}, | |
336 | {} | |
337 | }; | |
338 | ||
339 | MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); | |
340 | ||
341 | static struct platform_driver bluefield_edac_mc_driver = { | |
342 | .driver = { | |
343 | .name = DRIVER_NAME, | |
344 | .acpi_match_table = bluefield_mc_acpi_ids, | |
345 | }, | |
346 | .probe = bluefield_edac_mc_probe, | |
a5347591 | 347 | .remove_new = bluefield_edac_mc_remove, |
82413e56 SKR |
348 | }; |
349 | ||
350 | module_platform_driver(bluefield_edac_mc_driver); | |
351 | ||
352 | MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); | |
353 | MODULE_AUTHOR("Mellanox Technologies"); | |
354 | MODULE_LICENSE("GPL v2"); |