Commit | Line | Data |
---|---|---|
8747f363 GS |
1 | /* |
2 | * The file intends to implement the functions needed by EEH, which is | |
3 | * built on IODA compliant chip. Actually, lots of functions related | |
4 | * to EEH would be built based on the OPAL APIs. | |
5 | * | |
6 | * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | */ | |
13 | ||
14 | #include <linux/bootmem.h> | |
8998897b | 15 | #include <linux/debugfs.h> |
8747f363 GS |
16 | #include <linux/delay.h> |
17 | #include <linux/init.h> | |
18 | #include <linux/io.h> | |
19 | #include <linux/irq.h> | |
20 | #include <linux/kernel.h> | |
21 | #include <linux/msi.h> | |
7cb9d93d | 22 | #include <linux/notifier.h> |
8747f363 GS |
23 | #include <linux/pci.h> |
24 | #include <linux/string.h> | |
25 | ||
26 | #include <asm/eeh.h> | |
27 | #include <asm/eeh_event.h> | |
28 | #include <asm/io.h> | |
29 | #include <asm/iommu.h> | |
30 | #include <asm/msi_bitmap.h> | |
31 | #include <asm/opal.h> | |
32 | #include <asm/pci-bridge.h> | |
33 | #include <asm/ppc-pci.h> | |
34 | #include <asm/tce.h> | |
35 | ||
36 | #include "powernv.h" | |
37 | #include "pci.h" | |
38 | ||
70f942db | 39 | static char *hub_diag = NULL; |
7cb9d93d GS |
40 | static int ioda_eeh_nb_init = 0; |
41 | ||
42 | static int ioda_eeh_event(struct notifier_block *nb, | |
43 | unsigned long events, void *change) | |
44 | { | |
45 | uint64_t changed_evts = (uint64_t)change; | |
46 | ||
47 | /* We simply send special EEH event */ | |
48 | if ((changed_evts & OPAL_EVENT_PCI_ERROR) && | |
49 | (events & OPAL_EVENT_PCI_ERROR)) | |
50 | eeh_send_failure_event(NULL); | |
51 | ||
52 | return 0; | |
53 | } | |
54 | ||
55 | static struct notifier_block ioda_eeh_nb = { | |
56 | .notifier_call = ioda_eeh_event, | |
57 | .next = NULL, | |
58 | .priority = 0 | |
59 | }; | |
70f942db | 60 | |
8998897b | 61 | #ifdef CONFIG_DEBUG_FS |
ff6bdcd9 | 62 | static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) |
8998897b GS |
63 | { |
64 | struct pci_controller *hose = data; | |
65 | struct pnv_phb *phb = hose->private_data; | |
66 | ||
ff6bdcd9 | 67 | out_be64(phb->regs + offset, val); |
8998897b GS |
68 | return 0; |
69 | } | |
70 | ||
ff6bdcd9 | 71 | static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val) |
8998897b GS |
72 | { |
73 | struct pci_controller *hose = data; | |
74 | struct pnv_phb *phb = hose->private_data; | |
75 | ||
ff6bdcd9 | 76 | *val = in_be64(phb->regs + offset); |
8998897b GS |
77 | return 0; |
78 | } | |
79 | ||
ff6bdcd9 GS |
80 | static int ioda_eeh_outb_dbgfs_set(void *data, u64 val) |
81 | { | |
82 | return ioda_eeh_dbgfs_set(data, 0xD10, val); | |
83 | } | |
84 | ||
85 | static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val) | |
86 | { | |
87 | return ioda_eeh_dbgfs_get(data, 0xD10, val); | |
88 | } | |
89 | ||
90 | static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val) | |
91 | { | |
92 | return ioda_eeh_dbgfs_set(data, 0xD90, val); | |
93 | } | |
94 | ||
95 | static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val) | |
96 | { | |
97 | return ioda_eeh_dbgfs_get(data, 0xD90, val); | |
98 | } | |
99 | ||
100 | static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val) | |
101 | { | |
102 | return ioda_eeh_dbgfs_set(data, 0xE10, val); | |
103 | } | |
104 | ||
105 | static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val) | |
106 | { | |
107 | return ioda_eeh_dbgfs_get(data, 0xE10, val); | |
108 | } | |
109 | ||
110 | DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get, | |
111 | ioda_eeh_outb_dbgfs_set, "0x%llx\n"); | |
112 | DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get, | |
113 | ioda_eeh_inbA_dbgfs_set, "0x%llx\n"); | |
114 | DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, | |
115 | ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); | |
8998897b GS |
116 | #endif /* CONFIG_DEBUG_FS */ |
117 | ||
73370c66 GS |
118 | /** |
119 | * ioda_eeh_post_init - Chip dependent post initialization | |
120 | * @hose: PCI controller | |
121 | * | |
122 | * The function will be called after eeh PEs and devices | |
123 | * have been built. That means the EEH is ready to supply | |
124 | * service with I/O cache. | |
125 | */ | |
126 | static int ioda_eeh_post_init(struct pci_controller *hose) | |
127 | { | |
128 | struct pnv_phb *phb = hose->private_data; | |
7cb9d93d GS |
129 | int ret; |
130 | ||
131 | /* Register OPAL event notifier */ | |
132 | if (!ioda_eeh_nb_init) { | |
133 | ret = opal_notifier_register(&ioda_eeh_nb); | |
134 | if (ret) { | |
135 | pr_err("%s: Can't register OPAL event notifier (%d)\n", | |
136 | __func__, ret); | |
137 | return ret; | |
138 | } | |
139 | ||
140 | ioda_eeh_nb_init = 1; | |
141 | } | |
73370c66 | 142 | |
20bb842b GS |
143 | /* We needn't HUB diag-data on PHB3 */ |
144 | if (phb->type == PNV_PHB_IODA1 && !hub_diag) { | |
145 | hub_diag = (char *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | |
70f942db | 146 | if (!hub_diag) { |
20bb842b GS |
147 | pr_err("%s: Out of memory !\n", __func__); |
148 | return -ENOMEM; | |
70f942db | 149 | } |
20bb842b | 150 | } |
70f942db | 151 | |
8998897b | 152 | #ifdef CONFIG_DEBUG_FS |
ff6bdcd9 GS |
153 | if (phb->dbgfs) { |
154 | debugfs_create_file("err_injct_outbound", 0600, | |
155 | phb->dbgfs, hose, | |
156 | &ioda_eeh_outb_dbgfs_ops); | |
157 | debugfs_create_file("err_injct_inboundA", 0600, | |
20bb842b | 158 | phb->dbgfs, hose, |
ff6bdcd9 GS |
159 | &ioda_eeh_inbA_dbgfs_ops); |
160 | debugfs_create_file("err_injct_inboundB", 0600, | |
161 | phb->dbgfs, hose, | |
162 | &ioda_eeh_inbB_dbgfs_ops); | |
163 | } | |
8998897b GS |
164 | #endif |
165 | ||
20bb842b | 166 | phb->eeh_state |= PNV_EEH_STATE_ENABLED; |
73370c66 GS |
167 | |
168 | return 0; | |
169 | } | |
170 | ||
eb005983 GS |
171 | /** |
172 | * ioda_eeh_set_option - Set EEH operation or I/O setting | |
173 | * @pe: EEH PE | |
174 | * @option: options | |
175 | * | |
176 | * Enable or disable EEH option for the indicated PE. The | |
177 | * function also can be used to enable I/O or DMA for the | |
178 | * PE. | |
179 | */ | |
180 | static int ioda_eeh_set_option(struct eeh_pe *pe, int option) | |
181 | { | |
182 | s64 ret; | |
183 | u32 pe_no; | |
184 | struct pci_controller *hose = pe->phb; | |
185 | struct pnv_phb *phb = hose->private_data; | |
186 | ||
187 | /* Check on PE number */ | |
188 | if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { | |
189 | pr_err("%s: PE address %x out of range [0, %x] " | |
190 | "on PHB#%x\n", | |
191 | __func__, pe->addr, phb->ioda.total_pe, | |
192 | hose->global_number); | |
193 | return -EINVAL; | |
194 | } | |
195 | ||
196 | pe_no = pe->addr; | |
197 | switch (option) { | |
198 | case EEH_OPT_DISABLE: | |
199 | ret = -EEXIST; | |
200 | break; | |
201 | case EEH_OPT_ENABLE: | |
202 | ret = 0; | |
203 | break; | |
204 | case EEH_OPT_THAW_MMIO: | |
205 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, | |
206 | OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); | |
207 | if (ret) { | |
208 | pr_warning("%s: Failed to enable MMIO for " | |
209 | "PHB#%x-PE#%x, err=%lld\n", | |
210 | __func__, hose->global_number, pe_no, ret); | |
211 | return -EIO; | |
212 | } | |
213 | ||
214 | break; | |
215 | case EEH_OPT_THAW_DMA: | |
216 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, | |
217 | OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); | |
218 | if (ret) { | |
219 | pr_warning("%s: Failed to enable DMA for " | |
220 | "PHB#%x-PE#%x, err=%lld\n", | |
221 | __func__, hose->global_number, pe_no, ret); | |
222 | return -EIO; | |
223 | } | |
224 | ||
225 | break; | |
226 | default: | |
227 | pr_warning("%s: Invalid option %d\n", __func__, option); | |
228 | return -EINVAL; | |
229 | } | |
230 | ||
231 | return ret; | |
232 | } | |
233 | ||
8c41a7f3 GS |
234 | /** |
235 | * ioda_eeh_get_state - Retrieve the state of PE | |
236 | * @pe: EEH PE | |
237 | * | |
238 | * The PE's state should be retrieved from the PEEV, PEST | |
239 | * IODA tables. Since the OPAL has exported the function | |
240 | * to do it, it'd better to use that. | |
241 | */ | |
242 | static int ioda_eeh_get_state(struct eeh_pe *pe) | |
243 | { | |
244 | s64 ret = 0; | |
245 | u8 fstate; | |
246 | u16 pcierr; | |
247 | u32 pe_no; | |
248 | int result; | |
249 | struct pci_controller *hose = pe->phb; | |
250 | struct pnv_phb *phb = hose->private_data; | |
251 | ||
252 | /* | |
253 | * Sanity check on PE address. The PHB PE address should | |
254 | * be zero. | |
255 | */ | |
256 | if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { | |
257 | pr_err("%s: PE address %x out of range [0, %x] " | |
258 | "on PHB#%x\n", | |
259 | __func__, pe->addr, phb->ioda.total_pe, | |
260 | hose->global_number); | |
261 | return EEH_STATE_NOT_SUPPORT; | |
262 | } | |
263 | ||
264 | /* Retrieve PE status through OPAL */ | |
265 | pe_no = pe->addr; | |
266 | ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, | |
267 | &fstate, &pcierr, NULL); | |
268 | if (ret) { | |
269 | pr_err("%s: Failed to get EEH status on " | |
270 | "PHB#%x-PE#%x\n, err=%lld\n", | |
271 | __func__, hose->global_number, pe_no, ret); | |
272 | return EEH_STATE_NOT_SUPPORT; | |
273 | } | |
274 | ||
275 | /* Check PHB status */ | |
276 | if (pe->type & EEH_PE_PHB) { | |
277 | result = 0; | |
278 | result &= ~EEH_STATE_RESET_ACTIVE; | |
279 | ||
280 | if (pcierr != OPAL_EEH_PHB_ERROR) { | |
281 | result |= EEH_STATE_MMIO_ACTIVE; | |
282 | result |= EEH_STATE_DMA_ACTIVE; | |
283 | result |= EEH_STATE_MMIO_ENABLED; | |
284 | result |= EEH_STATE_DMA_ENABLED; | |
285 | } | |
286 | ||
287 | return result; | |
288 | } | |
289 | ||
290 | /* Parse result out */ | |
291 | result = 0; | |
292 | switch (fstate) { | |
293 | case OPAL_EEH_STOPPED_NOT_FROZEN: | |
294 | result &= ~EEH_STATE_RESET_ACTIVE; | |
295 | result |= EEH_STATE_MMIO_ACTIVE; | |
296 | result |= EEH_STATE_DMA_ACTIVE; | |
297 | result |= EEH_STATE_MMIO_ENABLED; | |
298 | result |= EEH_STATE_DMA_ENABLED; | |
299 | break; | |
300 | case OPAL_EEH_STOPPED_MMIO_FREEZE: | |
301 | result &= ~EEH_STATE_RESET_ACTIVE; | |
302 | result |= EEH_STATE_DMA_ACTIVE; | |
303 | result |= EEH_STATE_DMA_ENABLED; | |
304 | break; | |
305 | case OPAL_EEH_STOPPED_DMA_FREEZE: | |
306 | result &= ~EEH_STATE_RESET_ACTIVE; | |
307 | result |= EEH_STATE_MMIO_ACTIVE; | |
308 | result |= EEH_STATE_MMIO_ENABLED; | |
309 | break; | |
310 | case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: | |
311 | result &= ~EEH_STATE_RESET_ACTIVE; | |
312 | break; | |
313 | case OPAL_EEH_STOPPED_RESET: | |
314 | result |= EEH_STATE_RESET_ACTIVE; | |
315 | break; | |
316 | case OPAL_EEH_STOPPED_TEMP_UNAVAIL: | |
317 | result |= EEH_STATE_UNAVAILABLE; | |
318 | break; | |
319 | case OPAL_EEH_STOPPED_PERM_UNAVAIL: | |
320 | result |= EEH_STATE_NOT_SUPPORT; | |
321 | break; | |
322 | default: | |
323 | pr_warning("%s: Unexpected EEH status 0x%x " | |
324 | "on PHB#%x-PE#%x\n", | |
325 | __func__, fstate, hose->global_number, pe_no); | |
326 | } | |
327 | ||
328 | return result; | |
329 | } | |
330 | ||
9d5cab00 GS |
331 | static int ioda_eeh_pe_clear(struct eeh_pe *pe) |
332 | { | |
333 | struct pci_controller *hose; | |
334 | struct pnv_phb *phb; | |
335 | u32 pe_no; | |
336 | u8 fstate; | |
337 | u16 pcierr; | |
338 | s64 ret; | |
339 | ||
340 | pe_no = pe->addr; | |
341 | hose = pe->phb; | |
342 | phb = pe->phb->private_data; | |
343 | ||
344 | /* Clear the EEH error on the PE */ | |
345 | ret = opal_pci_eeh_freeze_clear(phb->opal_id, | |
346 | pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); | |
347 | if (ret) { | |
348 | pr_err("%s: Failed to clear EEH error for " | |
349 | "PHB#%x-PE#%x, err=%lld\n", | |
350 | __func__, hose->global_number, pe_no, ret); | |
351 | return -EIO; | |
352 | } | |
353 | ||
354 | /* | |
355 | * Read the PE state back and verify that the frozen | |
356 | * state has been removed. | |
357 | */ | |
358 | ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, | |
359 | &fstate, &pcierr, NULL); | |
360 | if (ret) { | |
361 | pr_err("%s: Failed to get EEH status on " | |
362 | "PHB#%x-PE#%x\n, err=%lld\n", | |
363 | __func__, hose->global_number, pe_no, ret); | |
364 | return -EIO; | |
365 | } | |
366 | ||
367 | if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { | |
368 | pr_err("%s: Frozen state not cleared on " | |
369 | "PHB#%x-PE#%x, sts=%x\n", | |
370 | __func__, hose->global_number, pe_no, fstate); | |
371 | return -EIO; | |
372 | } | |
373 | ||
374 | return 0; | |
375 | } | |
376 | ||
377 | static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) | |
378 | { | |
379 | s64 rc = OPAL_HARDWARE; | |
380 | ||
381 | while (1) { | |
382 | rc = opal_pci_poll(phb->opal_id); | |
383 | if (rc <= 0) | |
384 | break; | |
385 | ||
386 | msleep(rc); | |
387 | } | |
388 | ||
389 | return rc; | |
390 | } | |
391 | ||
392 | static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) | |
393 | { | |
394 | struct pnv_phb *phb = hose->private_data; | |
395 | s64 rc = OPAL_HARDWARE; | |
396 | ||
397 | pr_debug("%s: Reset PHB#%x, option=%d\n", | |
398 | __func__, hose->global_number, option); | |
399 | ||
400 | /* Issue PHB complete reset request */ | |
401 | if (option == EEH_RESET_FUNDAMENTAL || | |
402 | option == EEH_RESET_HOT) | |
403 | rc = opal_pci_reset(phb->opal_id, | |
404 | OPAL_PHB_COMPLETE, | |
405 | OPAL_ASSERT_RESET); | |
406 | else if (option == EEH_RESET_DEACTIVATE) | |
407 | rc = opal_pci_reset(phb->opal_id, | |
408 | OPAL_PHB_COMPLETE, | |
409 | OPAL_DEASSERT_RESET); | |
410 | if (rc < 0) | |
411 | goto out; | |
412 | ||
413 | /* | |
414 | * Poll state of the PHB until the request is done | |
415 | * successfully. | |
416 | */ | |
417 | rc = ioda_eeh_phb_poll(phb); | |
418 | out: | |
419 | if (rc != OPAL_SUCCESS) | |
420 | return -EIO; | |
421 | ||
422 | return 0; | |
423 | } | |
424 | ||
425 | static int ioda_eeh_root_reset(struct pci_controller *hose, int option) | |
426 | { | |
427 | struct pnv_phb *phb = hose->private_data; | |
428 | s64 rc = OPAL_SUCCESS; | |
429 | ||
430 | pr_debug("%s: Reset PHB#%x, option=%d\n", | |
431 | __func__, hose->global_number, option); | |
432 | ||
433 | /* | |
434 | * During the reset deassert time, we needn't care | |
435 | * the reset scope because the firmware does nothing | |
436 | * for fundamental or hot reset during deassert phase. | |
437 | */ | |
438 | if (option == EEH_RESET_FUNDAMENTAL) | |
439 | rc = opal_pci_reset(phb->opal_id, | |
440 | OPAL_PCI_FUNDAMENTAL_RESET, | |
441 | OPAL_ASSERT_RESET); | |
442 | else if (option == EEH_RESET_HOT) | |
443 | rc = opal_pci_reset(phb->opal_id, | |
444 | OPAL_PCI_HOT_RESET, | |
445 | OPAL_ASSERT_RESET); | |
446 | else if (option == EEH_RESET_DEACTIVATE) | |
447 | rc = opal_pci_reset(phb->opal_id, | |
448 | OPAL_PCI_HOT_RESET, | |
449 | OPAL_DEASSERT_RESET); | |
450 | if (rc < 0) | |
451 | goto out; | |
452 | ||
453 | /* Poll state of the PHB until the request is done */ | |
454 | rc = ioda_eeh_phb_poll(phb); | |
455 | out: | |
456 | if (rc != OPAL_SUCCESS) | |
457 | return -EIO; | |
458 | ||
459 | return 0; | |
460 | } | |
461 | ||
462 | static int ioda_eeh_bridge_reset(struct pci_controller *hose, | |
463 | struct pci_dev *dev, int option) | |
464 | { | |
465 | u16 ctrl; | |
466 | ||
467 | pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", | |
468 | __func__, hose->global_number, dev->bus->number, | |
469 | PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); | |
470 | ||
471 | switch (option) { | |
472 | case EEH_RESET_FUNDAMENTAL: | |
473 | case EEH_RESET_HOT: | |
474 | pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); | |
475 | ctrl |= PCI_BRIDGE_CTL_BUS_RESET; | |
476 | pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); | |
477 | break; | |
478 | case EEH_RESET_DEACTIVATE: | |
479 | pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); | |
480 | ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; | |
481 | pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); | |
482 | break; | |
483 | } | |
484 | ||
485 | return 0; | |
486 | } | |
487 | ||
488 | /** | |
489 | * ioda_eeh_reset - Reset the indicated PE | |
490 | * @pe: EEH PE | |
491 | * @option: reset option | |
492 | * | |
493 | * Do reset on the indicated PE. For PCI bus sensitive PE, | |
494 | * we need to reset the parent p2p bridge. The PHB has to | |
495 | * be reinitialized if the p2p bridge is root bridge. For | |
496 | * PCI device sensitive PE, we will try to reset the device | |
497 | * through FLR. For now, we don't have OPAL APIs to do HARD | |
498 | * reset yet, so all reset would be SOFT (HOT) reset. | |
499 | */ | |
500 | static int ioda_eeh_reset(struct eeh_pe *pe, int option) | |
501 | { | |
502 | struct pci_controller *hose = pe->phb; | |
503 | struct eeh_dev *edev; | |
504 | struct pci_dev *dev; | |
505 | int ret; | |
506 | ||
507 | /* | |
508 | * Anyway, we have to clear the problematic state for the | |
509 | * corresponding PE. However, we needn't do it if the PE | |
510 | * is PHB associated. That means the PHB is having fatal | |
511 | * errors and it needs reset. Further more, the AIB interface | |
512 | * isn't reliable any more. | |
513 | */ | |
514 | if (!(pe->type & EEH_PE_PHB) && | |
515 | (option == EEH_RESET_HOT || | |
516 | option == EEH_RESET_FUNDAMENTAL)) { | |
517 | ret = ioda_eeh_pe_clear(pe); | |
518 | if (ret) | |
519 | return -EIO; | |
520 | } | |
521 | ||
522 | /* | |
523 | * The rules applied to reset, either fundamental or hot reset: | |
524 | * | |
525 | * We always reset the direct upstream bridge of the PE. If the | |
526 | * direct upstream bridge isn't root bridge, we always take hot | |
527 | * reset no matter what option (fundamental or hot) is. Otherwise, | |
528 | * we should do the reset according to the required option. | |
529 | */ | |
530 | if (pe->type & EEH_PE_PHB) { | |
531 | ret = ioda_eeh_phb_reset(hose, option); | |
532 | } else { | |
533 | if (pe->type & EEH_PE_DEVICE) { | |
534 | /* | |
535 | * If it's device PE, we didn't refer to the parent | |
536 | * PCI bus yet. So we have to figure it out indirectly. | |
537 | */ | |
538 | edev = list_first_entry(&pe->edevs, | |
539 | struct eeh_dev, list); | |
540 | dev = eeh_dev_to_pci_dev(edev); | |
541 | dev = dev->bus->self; | |
542 | } else { | |
543 | /* | |
544 | * If it's bus PE, the parent PCI bus is already there | |
545 | * and just pick it up. | |
546 | */ | |
547 | dev = pe->bus->self; | |
548 | } | |
549 | ||
550 | /* | |
551 | * Do reset based on the fact that the direct upstream bridge | |
552 | * is root bridge (port) or not. | |
553 | */ | |
554 | if (dev->bus->number == 0) | |
555 | ret = ioda_eeh_root_reset(hose, option); | |
556 | else | |
557 | ret = ioda_eeh_bridge_reset(hose, dev, option); | |
558 | } | |
559 | ||
560 | return ret; | |
561 | } | |
562 | ||
bf90dfea GS |
563 | /** |
564 | * ioda_eeh_get_log - Retrieve error log | |
565 | * @pe: EEH PE | |
566 | * @severity: Severity level of the log | |
567 | * @drv_log: buffer to store the log | |
568 | * @len: space of the log buffer | |
569 | * | |
570 | * The function is used to retrieve error log from P7IOC. | |
571 | */ | |
572 | static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, | |
573 | char *drv_log, unsigned long len) | |
574 | { | |
575 | s64 ret; | |
576 | unsigned long flags; | |
577 | struct pci_controller *hose = pe->phb; | |
578 | struct pnv_phb *phb = hose->private_data; | |
579 | ||
580 | spin_lock_irqsave(&phb->lock, flags); | |
581 | ||
582 | ret = opal_pci_get_phb_diag_data2(phb->opal_id, | |
583 | phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); | |
584 | if (ret) { | |
585 | spin_unlock_irqrestore(&phb->lock, flags); | |
586 | pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n", | |
587 | __func__, hose->global_number, pe->addr); | |
588 | return -EIO; | |
589 | } | |
590 | ||
591 | /* | |
592 | * FIXME: We probably need log the error in somewhere. | |
593 | * Lets make it up in future. | |
594 | */ | |
595 | /* pr_info("%s", phb->diag.blob); */ | |
596 | ||
597 | spin_unlock_irqrestore(&phb->lock, flags); | |
598 | ||
599 | return 0; | |
600 | } | |
601 | ||
602 | /** | |
603 | * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE | |
604 | * @pe: EEH PE | |
605 | * | |
606 | * For particular PE, it might have included PCI bridges. In order | |
607 | * to make the PE work properly, those PCI bridges should be configured | |
608 | * correctly. However, we need do nothing on P7IOC since the reset | |
609 | * function will do everything that should be covered by the function. | |
610 | */ | |
611 | static int ioda_eeh_configure_bridge(struct eeh_pe *pe) | |
612 | { | |
613 | return 0; | |
614 | } | |
615 | ||
70f942db GS |
616 | static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) |
617 | { | |
618 | /* GEM */ | |
619 | pr_info(" GEM XFIR: %016llx\n", data->gemXfir); | |
620 | pr_info(" GEM RFIR: %016llx\n", data->gemRfir); | |
621 | pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); | |
622 | pr_info(" GEM Mask: %016llx\n", data->gemMask); | |
623 | pr_info(" GEM RWOF: %016llx\n", data->gemRwof); | |
624 | ||
625 | /* LEM */ | |
626 | pr_info(" LEM FIR: %016llx\n", data->lemFir); | |
627 | pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); | |
628 | pr_info(" LEM Action 0: %016llx\n", data->lemAction0); | |
629 | pr_info(" LEM Action 1: %016llx\n", data->lemAction1); | |
630 | pr_info(" LEM WOF: %016llx\n", data->lemWof); | |
631 | } | |
632 | ||
633 | static void ioda_eeh_hub_diag(struct pci_controller *hose) | |
634 | { | |
635 | struct pnv_phb *phb = hose->private_data; | |
636 | struct OpalIoP7IOCErrorData *data; | |
637 | long rc; | |
638 | ||
639 | data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; | |
640 | rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); | |
641 | if (rc != OPAL_SUCCESS) { | |
642 | pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", | |
643 | __func__, phb->hub_id, rc); | |
644 | return; | |
645 | } | |
646 | ||
647 | switch (data->type) { | |
648 | case OPAL_P7IOC_DIAG_TYPE_RGC: | |
649 | pr_info("P7IOC diag-data for RGC\n\n"); | |
650 | ioda_eeh_hub_diag_common(data); | |
651 | pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); | |
652 | pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); | |
653 | break; | |
654 | case OPAL_P7IOC_DIAG_TYPE_BI: | |
655 | pr_info("P7IOC diag-data for BI %s\n\n", | |
656 | data->bi.biDownbound ? "Downbound" : "Upbound"); | |
657 | ioda_eeh_hub_diag_common(data); | |
658 | pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); | |
659 | pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); | |
660 | pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); | |
661 | pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); | |
662 | break; | |
663 | case OPAL_P7IOC_DIAG_TYPE_CI: | |
664 | pr_info("P7IOC diag-data for CI Port %d\\nn", | |
665 | data->ci.ciPort); | |
666 | ioda_eeh_hub_diag_common(data); | |
667 | pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); | |
668 | pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); | |
669 | break; | |
670 | case OPAL_P7IOC_DIAG_TYPE_MISC: | |
671 | pr_info("P7IOC diag-data for MISC\n\n"); | |
672 | ioda_eeh_hub_diag_common(data); | |
673 | break; | |
674 | case OPAL_P7IOC_DIAG_TYPE_I2C: | |
675 | pr_info("P7IOC diag-data for I2C\n\n"); | |
676 | ioda_eeh_hub_diag_common(data); | |
677 | break; | |
678 | default: | |
679 | pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", | |
680 | __func__, phb->hub_id, data->type); | |
681 | } | |
682 | } | |
683 | ||
684 | static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose, | |
685 | struct OpalIoPhbErrorCommon *common) | |
686 | { | |
687 | struct OpalIoP7IOCPhbErrorData *data; | |
688 | int i; | |
689 | ||
690 | data = (struct OpalIoP7IOCPhbErrorData *)common; | |
691 | ||
692 | pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n", | |
693 | hose->global_number, common->version); | |
694 | ||
695 | pr_info(" brdgCtl: %08x\n", data->brdgCtl); | |
696 | ||
697 | pr_info(" portStatusReg: %08x\n", data->portStatusReg); | |
698 | pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); | |
699 | pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); | |
700 | ||
701 | pr_info(" deviceStatus: %08x\n", data->deviceStatus); | |
702 | pr_info(" slotStatus: %08x\n", data->slotStatus); | |
703 | pr_info(" linkStatus: %08x\n", data->linkStatus); | |
704 | pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); | |
705 | pr_info(" devSecStatus: %08x\n", data->devSecStatus); | |
706 | ||
707 | pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); | |
708 | pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); | |
709 | pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); | |
710 | pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); | |
711 | pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); | |
712 | pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); | |
713 | pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); | |
714 | pr_info(" sourceId: %08x\n", data->sourceId); | |
715 | ||
716 | pr_info(" errorClass: %016llx\n", data->errorClass); | |
717 | pr_info(" correlator: %016llx\n", data->correlator); | |
718 | pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); | |
719 | pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); | |
720 | pr_info(" lemFir: %016llx\n", data->lemFir); | |
721 | pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); | |
722 | pr_info(" lemWOF: %016llx\n", data->lemWOF); | |
723 | pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); | |
724 | pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); | |
725 | pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); | |
726 | pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); | |
727 | pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); | |
728 | pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); | |
729 | pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); | |
730 | pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); | |
731 | pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); | |
732 | pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); | |
733 | pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); | |
734 | pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); | |
735 | pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); | |
736 | pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); | |
737 | pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); | |
738 | pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); | |
739 | ||
740 | for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { | |
741 | if ((data->pestA[i] >> 63) == 0 && | |
742 | (data->pestB[i] >> 63) == 0) | |
743 | continue; | |
744 | ||
745 | pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); | |
746 | pr_info(" PESTB: %016llx\n", data->pestB[i]); | |
747 | } | |
748 | } | |
749 | ||
750 | static void ioda_eeh_phb_diag(struct pci_controller *hose) | |
751 | { | |
752 | struct pnv_phb *phb = hose->private_data; | |
753 | struct OpalIoPhbErrorCommon *common; | |
754 | long rc; | |
755 | ||
756 | common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; | |
757 | rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); | |
758 | if (rc != OPAL_SUCCESS) { | |
759 | pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", | |
760 | __func__, hose->global_number, rc); | |
761 | return; | |
762 | } | |
763 | ||
764 | switch (common->ioType) { | |
765 | case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: | |
766 | ioda_eeh_p7ioc_phb_diag(hose, common); | |
767 | break; | |
768 | default: | |
769 | pr_warning("%s: Unrecognized I/O chip %d\n", | |
770 | __func__, common->ioType); | |
771 | } | |
772 | } | |
773 | ||
774 | static int ioda_eeh_get_phb_pe(struct pci_controller *hose, | |
775 | struct eeh_pe **pe) | |
776 | { | |
777 | struct eeh_pe *phb_pe; | |
778 | ||
779 | phb_pe = eeh_phb_pe_get(hose); | |
780 | if (!phb_pe) { | |
781 | pr_warning("%s Can't find PE for PHB#%d\n", | |
782 | __func__, hose->global_number); | |
783 | return -EEXIST; | |
784 | } | |
785 | ||
786 | *pe = phb_pe; | |
787 | return 0; | |
788 | } | |
789 | ||
790 | static int ioda_eeh_get_pe(struct pci_controller *hose, | |
791 | u16 pe_no, struct eeh_pe **pe) | |
792 | { | |
793 | struct eeh_pe *phb_pe, *dev_pe; | |
794 | struct eeh_dev dev; | |
795 | ||
796 | /* Find the PHB PE */ | |
797 | if (ioda_eeh_get_phb_pe(hose, &phb_pe)) | |
798 | return -EEXIST; | |
799 | ||
800 | /* Find the PE according to PE# */ | |
801 | memset(&dev, 0, sizeof(struct eeh_dev)); | |
802 | dev.phb = hose; | |
803 | dev.pe_config_addr = pe_no; | |
804 | dev_pe = eeh_pe_get(&dev); | |
805 | if (!dev_pe) { | |
806 | pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", | |
807 | __func__, hose->global_number, pe_no); | |
808 | return -EEXIST; | |
809 | } | |
810 | ||
811 | *pe = dev_pe; | |
812 | return 0; | |
813 | } | |
814 | ||
815 | /** | |
816 | * ioda_eeh_next_error - Retrieve next error for EEH core to handle | |
817 | * @pe: The affected PE | |
818 | * | |
819 | * The function is expected to be called by EEH core while it gets | |
820 | * special EEH event (without binding PE). The function calls to | |
821 | * OPAL APIs for next error to handle. The informational error is | |
822 | * handled internally by platform. However, the dead IOC, dead PHB, | |
823 | * fenced PHB and frozen PE should be handled by EEH core eventually. | |
824 | */ | |
825 | static int ioda_eeh_next_error(struct eeh_pe **pe) | |
826 | { | |
827 | struct pci_controller *hose, *tmp; | |
828 | struct pnv_phb *phb; | |
829 | u64 frozen_pe_no; | |
830 | u16 err_type, severity; | |
831 | long rc; | |
832 | int ret = 1; | |
833 | ||
7cb9d93d GS |
834 | /* |
835 | * While running here, it's safe to purge the event queue. | |
836 | * And we should keep the cached OPAL notifier event sychronized | |
837 | * between the kernel and firmware. | |
838 | */ | |
70f942db | 839 | eeh_remove_event(NULL); |
7cb9d93d | 840 | opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); |
70f942db GS |
841 | |
842 | list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { | |
843 | /* | |
844 | * If the subordinate PCI buses of the PHB has been | |
845 | * removed, we needn't take care of it any more. | |
846 | */ | |
847 | phb = hose->private_data; | |
0b9e267d | 848 | if (phb->eeh_state & PNV_EEH_STATE_REMOVED) |
70f942db GS |
849 | continue; |
850 | ||
851 | rc = opal_pci_next_error(phb->opal_id, | |
852 | &frozen_pe_no, &err_type, &severity); | |
853 | ||
854 | /* If OPAL API returns error, we needn't proceed */ | |
855 | if (rc != OPAL_SUCCESS) { | |
20212703 MQ |
856 | pr_devel("%s: Invalid return value on " |
857 | "PHB#%x (0x%lx) from opal_pci_next_error", | |
858 | __func__, hose->global_number, rc); | |
70f942db GS |
859 | continue; |
860 | } | |
861 | ||
862 | /* If the PHB doesn't have error, stop processing */ | |
863 | if (err_type == OPAL_EEH_NO_ERROR || | |
864 | severity == OPAL_EEH_SEV_NO_ERROR) { | |
20212703 MQ |
865 | pr_devel("%s: No error found on PHB#%x\n", |
866 | __func__, hose->global_number); | |
70f942db GS |
867 | continue; |
868 | } | |
869 | ||
870 | /* | |
871 | * Processing the error. We're expecting the error with | |
872 | * highest priority reported upon multiple errors on the | |
873 | * specific PHB. | |
874 | */ | |
20212703 MQ |
875 | pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", |
876 | __func__, err_type, severity, | |
877 | frozen_pe_no, hose->global_number); | |
70f942db GS |
878 | switch (err_type) { |
879 | case OPAL_EEH_IOC_ERROR: | |
880 | if (severity == OPAL_EEH_SEV_IOC_DEAD) { | |
881 | list_for_each_entry_safe(hose, tmp, | |
882 | &hose_list, list_node) { | |
883 | phb = hose->private_data; | |
0b9e267d | 884 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; |
70f942db GS |
885 | } |
886 | ||
56ca4fde | 887 | pr_err("EEH: dead IOC detected\n"); |
70f942db GS |
888 | ret = 4; |
889 | goto out; | |
56ca4fde GS |
890 | } else if (severity == OPAL_EEH_SEV_INF) { |
891 | pr_info("EEH: IOC informative error " | |
892 | "detected\n"); | |
70f942db | 893 | ioda_eeh_hub_diag(hose); |
56ca4fde | 894 | } |
70f942db GS |
895 | |
896 | break; | |
897 | case OPAL_EEH_PHB_ERROR: | |
898 | if (severity == OPAL_EEH_SEV_PHB_DEAD) { | |
899 | if (ioda_eeh_get_phb_pe(hose, pe)) | |
900 | break; | |
901 | ||
56ca4fde GS |
902 | pr_err("EEH: dead PHB#%x detected\n", |
903 | hose->global_number); | |
0b9e267d | 904 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; |
70f942db GS |
905 | ret = 3; |
906 | goto out; | |
907 | } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { | |
908 | if (ioda_eeh_get_phb_pe(hose, pe)) | |
909 | break; | |
910 | ||
56ca4fde GS |
911 | pr_err("EEH: fenced PHB#%x detected\n", |
912 | hose->global_number); | |
70f942db GS |
913 | ret = 2; |
914 | goto out; | |
56ca4fde GS |
915 | } else if (severity == OPAL_EEH_SEV_INF) { |
916 | pr_info("EEH: PHB#%x informative error " | |
917 | "detected\n", | |
918 | hose->global_number); | |
70f942db | 919 | ioda_eeh_phb_diag(hose); |
56ca4fde | 920 | } |
70f942db GS |
921 | |
922 | break; | |
923 | case OPAL_EEH_PE_ERROR: | |
924 | if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) | |
925 | break; | |
926 | ||
56ca4fde GS |
927 | pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", |
928 | (*pe)->addr, (*pe)->phb->global_number); | |
70f942db GS |
929 | ret = 1; |
930 | goto out; | |
931 | } | |
932 | } | |
933 | ||
934 | ret = 0; | |
935 | out: | |
936 | return ret; | |
937 | } | |
938 | ||
8747f363 | 939 | struct pnv_eeh_ops ioda_eeh_ops = { |
73370c66 | 940 | .post_init = ioda_eeh_post_init, |
eb005983 | 941 | .set_option = ioda_eeh_set_option, |
8c41a7f3 | 942 | .get_state = ioda_eeh_get_state, |
9d5cab00 | 943 | .reset = ioda_eeh_reset, |
bf90dfea GS |
944 | .get_log = ioda_eeh_get_log, |
945 | .configure_bridge = ioda_eeh_configure_bridge, | |
70f942db | 946 | .next_error = ioda_eeh_next_error |
8747f363 | 947 | }; |