powerpc/eeh: Emulate EEH recovery for VFIO devices
[linux-2.6-block.git] / arch / powerpc / kernel / eeh.c
CommitLineData
1da177e4 1/*
3c8c90ab
LV
2 * Copyright IBM Corporation 2001, 2005, 2006
3 * Copyright Dave Engebretsen & Todd Inglett 2001
4 * Copyright Linas Vepstas 2005, 2006
cb3bc9d0 5 * Copyright 2001-2012 IBM Corporation.
69376502 6 *
1da177e4
LT
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
69376502 11 *
1da177e4
LT
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
69376502 16 *
1da177e4
LT
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3c8c90ab
LV
20 *
21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
1da177e4
LT
22 */
23
6dee3fb9 24#include <linux/delay.h>
7f52a526 25#include <linux/debugfs.h>
cb3bc9d0 26#include <linux/sched.h>
1da177e4
LT
27#include <linux/init.h>
28#include <linux/list.h>
1da177e4 29#include <linux/pci.h>
a3032ca9 30#include <linux/iommu.h>
1da177e4
LT
31#include <linux/proc_fs.h>
32#include <linux/rbtree.h>
66f9af83 33#include <linux/reboot.h>
1da177e4
LT
34#include <linux/seq_file.h>
35#include <linux/spinlock.h>
66b15db6 36#include <linux/export.h>
acaa6176
SR
37#include <linux/of.h>
38
60063497 39#include <linux/atomic.h>
1e54b938 40#include <asm/debug.h>
1da177e4 41#include <asm/eeh.h>
172ca926 42#include <asm/eeh_event.h>
1da177e4 43#include <asm/io.h>
212d16cd 44#include <asm/iommu.h>
1da177e4 45#include <asm/machdep.h>
172ca926 46#include <asm/ppc-pci.h>
1da177e4 47#include <asm/rtas.h>
1da177e4 48
1da177e4
LT
49
50/** Overview:
51 * EEH, or "Extended Error Handling" is a PCI bridge technology for
52 * dealing with PCI bus errors that can't be dealt with within the
53 * usual PCI framework, except by check-stopping the CPU. Systems
54 * that are designed for high-availability/reliability cannot afford
55 * to crash due to a "mere" PCI error, thus the need for EEH.
56 * An EEH-capable bridge operates by converting a detected error
57 * into a "slot freeze", taking the PCI adapter off-line, making
58 * the slot behave, from the OS'es point of view, as if the slot
59 * were "empty": all reads return 0xff's and all writes are silently
60 * ignored. EEH slot isolation events can be triggered by parity
61 * errors on the address or data busses (e.g. during posted writes),
69376502
LV
62 * which in turn might be caused by low voltage on the bus, dust,
63 * vibration, humidity, radioactivity or plain-old failed hardware.
1da177e4
LT
64 *
65 * Note, however, that one of the leading causes of EEH slot
66 * freeze events are buggy device drivers, buggy device microcode,
67 * or buggy device hardware. This is because any attempt by the
68 * device to bus-master data to a memory address that is not
69 * assigned to the device will trigger a slot freeze. (The idea
70 * is to prevent devices-gone-wild from corrupting system memory).
71 * Buggy hardware/drivers will have a miserable time co-existing
72 * with EEH.
73 *
74 * Ideally, a PCI device driver, when suspecting that an isolation
25985edc 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH
1da177e4
LT
76 * whether this is the case, and then take appropriate steps to
77 * reset the PCI slot, the PCI device, and then resume operations.
78 * However, until that day, the checking is done here, with the
79 * eeh_check_failure() routine embedded in the MMIO macros. If
80 * the slot is found to be isolated, an "EEH Event" is synthesized
81 * and sent out for processing.
82 */
83
5c1344e9 84/* If a device driver keeps reading an MMIO register in an interrupt
f36c5227
MM
85 * handler after a slot isolation event, it might be broken.
86 * This sets the threshold for how many read attempts we allow
87 * before printing an error message.
1da177e4 88 */
2fd30be8 89#define EEH_MAX_FAILS 2100000
1da177e4 90
17213c3b 91/* Time to wait for a PCI slot to report status, in milliseconds */
fb48dc22 92#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
9c547768 93
d7bb8862 94/*
8a5ad356
GS
95 * EEH probe mode support, which is part of the flags,
96 * is to support multiple platforms for EEH. Some platforms
97 * like pSeries do PCI emunation based on device tree.
98 * However, other platforms like powernv probe PCI devices
99 * from hardware. The flag is used to distinguish that.
100 * In addition, struct eeh_ops::probe would be invoked for
101 * particular OF node or PCI device so that the corresponding
102 * PE would be created there.
d7bb8862 103 */
8a5ad356
GS
104int eeh_subsystem_flags;
105EXPORT_SYMBOL(eeh_subsystem_flags);
106
107/* Platform dependent EEH operations */
108struct eeh_ops *eeh_ops = NULL;
d7bb8862 109
fd761fd8 110/* Lock to avoid races due to multiple reports of an error */
4907581d 111DEFINE_RAW_SPINLOCK(confirm_error_lock);
fd761fd8 112
212d16cd
GS
113/* Lock to protect passed flags */
114static DEFINE_MUTEX(eeh_dev_mutex);
115
17213c3b
LV
116/* Buffer for reporting pci register dumps. Its here in BSS, and
117 * not dynamically alloced, so that it ends up in RMO where RTAS
118 * can access it.
119 */
d99bb1db
LV
120#define EEH_PCI_REGS_LOG_LEN 4096
121static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
122
e575f8db
GS
123/*
124 * The struct is used to maintain the EEH global statistic
125 * information. Besides, the EEH global statistics will be
126 * exported to user space through procfs
127 */
128struct eeh_stats {
129 u64 no_device; /* PCI device not found */
130 u64 no_dn; /* OF node not found */
131 u64 no_cfg_addr; /* Config address not found */
132 u64 ignored_check; /* EEH check skipped */
133 u64 total_mmio_ffs; /* Total EEH checks */
134 u64 false_positives; /* Unnecessary EEH checks */
135 u64 slot_resets; /* PE reset */
136};
137
138static struct eeh_stats eeh_stats;
1da177e4 139
7684b40c
LV
140#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
141
7f52a526
GS
142static int __init eeh_setup(char *str)
143{
144 if (!strcmp(str, "off"))
05b1721d 145 eeh_add_flag(EEH_FORCE_DISABLED);
7f52a526
GS
146
147 return 1;
148}
149__setup("eeh=", eeh_setup);
150
d99bb1db 151/**
cce4b2d2 152 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
f631acd3 153 * @edev: device to report data for
d99bb1db
LV
154 * @buf: point to buffer in which to log
155 * @len: amount of room in buffer
156 *
157 * This routine captures assorted PCI configuration space data,
158 * and puts them into a buffer for RTAS error logging.
159 */
0ed352dd 160static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
d99bb1db 161{
f631acd3 162 struct device_node *dn = eeh_dev_to_of_node(edev);
d99bb1db 163 u32 cfg;
fcf9892b 164 int cap, i;
0ed352dd
GS
165 int n = 0, l = 0;
166 char buffer[128];
d99bb1db 167
f631acd3 168 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
2d86c385 169 pr_warn("EEH: of node=%s\n", dn->full_name);
fcf9892b 170
3780444c 171 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
fcf9892b 172 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
2d86c385 173 pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
fcf9892b 174
3780444c 175 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
d99bb1db 176 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
2d86c385 177 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
fcf9892b 178
0b9369f4 179 /* Gather bridge-specific registers */
2a18dfc6 180 if (edev->mode & EEH_DEV_BRIDGE) {
3780444c 181 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
0b9369f4 182 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
2d86c385 183 pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
0b9369f4 184
3780444c 185 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
0b9369f4 186 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
2d86c385 187 pr_warn("EEH: Bridge control: %04x\n", cfg);
0b9369f4
LV
188 }
189
fcf9892b 190 /* Dump out the PCI-X command and status regs */
2a18dfc6 191 cap = edev->pcix_cap;
fcf9892b 192 if (cap) {
3780444c 193 eeh_ops->read_config(dn, cap, 4, &cfg);
fcf9892b 194 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
2d86c385 195 pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
fcf9892b 196
3780444c 197 eeh_ops->read_config(dn, cap+4, 4, &cfg);
fcf9892b 198 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
2d86c385 199 pr_warn("EEH: PCI-X status: %08x\n", cfg);
fcf9892b
LV
200 }
201
2a18dfc6
GS
202 /* If PCI-E capable, dump PCI-E cap 10 */
203 cap = edev->pcie_cap;
204 if (cap) {
fcf9892b 205 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
2d86c385 206 pr_warn("EEH: PCI-E capabilities and status follow:\n");
fcf9892b
LV
207
208 for (i=0; i<=8; i++) {
2a18dfc6 209 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
fcf9892b 210 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
0ed352dd
GS
211
212 if ((i % 4) == 0) {
213 if (i != 0)
214 pr_warn("%s\n", buffer);
215
216 l = scnprintf(buffer, sizeof(buffer),
217 "EEH: PCI-E %02x: %08x ",
218 4*i, cfg);
219 } else {
220 l += scnprintf(buffer+l, sizeof(buffer)-l,
221 "%08x ", cfg);
222 }
223
fcf9892b 224 }
0ed352dd
GS
225
226 pr_warn("%s\n", buffer);
2a18dfc6 227 }
fcf9892b 228
2a18dfc6
GS
229 /* If AER capable, dump it */
230 cap = edev->aer_cap;
231 if (cap) {
232 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
233 pr_warn("EEH: PCI-E AER capability register set follows:\n");
234
0ed352dd 235 for (i=0; i<=13; i++) {
2a18dfc6
GS
236 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
237 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
0ed352dd
GS
238
239 if ((i % 4) == 0) {
240 if (i != 0)
241 pr_warn("%s\n", buffer);
242
243 l = scnprintf(buffer, sizeof(buffer),
244 "EEH: PCI-E AER %02x: %08x ",
245 4*i, cfg);
246 } else {
247 l += scnprintf(buffer+l, sizeof(buffer)-l,
248 "%08x ", cfg);
249 }
fcf9892b 250 }
0ed352dd
GS
251
252 pr_warn("%s\n", buffer);
fcf9892b 253 }
0b9369f4 254
d99bb1db
LV
255 return n;
256}
257
cb3bc9d0
GS
258/**
259 * eeh_slot_error_detail - Generate combined log including driver log and error log
ff477966 260 * @pe: EEH PE
cb3bc9d0
GS
261 * @severity: temporary or permanent error log
262 *
263 * This routine should be called to generate the combined log, which
264 * is comprised of driver log and error log. The driver log is figured
265 * out from the config space of the corresponding PCI device, while
266 * the error log is fetched through platform dependent function call.
267 */
ff477966 268void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
d99bb1db
LV
269{
270 size_t loglen = 0;
9feed42e 271 struct eeh_dev *edev, *tmp;
d99bb1db 272
c35ae179
GS
273 /*
274 * When the PHB is fenced or dead, it's pointless to collect
275 * the data from PCI config space because it should return
276 * 0xFF's. For ER, we still retrieve the data from the PCI
277 * config space.
78954700
GS
278 *
279 * For pHyp, we have to enable IO for log retrieval. Otherwise,
280 * 0xFF's is always returned from PCI config space.
c35ae179 281 */
9e049375 282 if (!(pe->type & EEH_PE_PHB)) {
dc561fb9 283 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG))
78954700 284 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
c35ae179
GS
285 eeh_ops->configure_bridge(pe);
286 eeh_pe_restore_bars(pe);
287
288 pci_regs_buf[0] = 0;
9feed42e 289 eeh_pe_for_each_dev(pe, edev, tmp) {
c35ae179
GS
290 loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
291 EEH_PCI_REGS_LOG_LEN - loglen);
292 }
293 }
ff477966
GS
294
295 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
d99bb1db
LV
296}
297
1da177e4 298/**
cb3bc9d0
GS
299 * eeh_token_to_phys - Convert EEH address token to phys address
300 * @token: I/O token, should be address in the form 0xA....
301 *
302 * This routine should be called to convert virtual I/O address
303 * to physical one.
1da177e4
LT
304 */
305static inline unsigned long eeh_token_to_phys(unsigned long token)
306{
307 pte_t *ptep;
308 unsigned long pa;
12bc9f6f 309 int hugepage_shift;
1da177e4 310
12bc9f6f
AK
311 /*
312 * We won't find hugepages here, iomem
313 */
314 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
1da177e4
LT
315 if (!ptep)
316 return token;
12bc9f6f 317 WARN_ON(hugepage_shift);
1da177e4
LT
318 pa = pte_pfn(*ptep) << PAGE_SHIFT;
319
320 return pa | (token & (PAGE_SIZE-1));
321}
322
b95cd2cd
GS
323/*
324 * On PowerNV platform, we might already have fenced PHB there.
325 * For that case, it's meaningless to recover frozen PE. Intead,
326 * We have to handle fenced PHB firstly.
327 */
328static int eeh_phb_check_failure(struct eeh_pe *pe)
329{
330 struct eeh_pe *phb_pe;
331 unsigned long flags;
332 int ret;
333
05b1721d 334 if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
b95cd2cd
GS
335 return -EPERM;
336
337 /* Find the PHB PE */
338 phb_pe = eeh_phb_pe_get(pe->phb);
339 if (!phb_pe) {
0dae2743
GS
340 pr_warn("%s Can't find PE for PHB#%d\n",
341 __func__, pe->phb->global_number);
b95cd2cd
GS
342 return -EEXIST;
343 }
344
345 /* If the PHB has been in problematic state */
346 eeh_serialize_lock(&flags);
9e049375 347 if (phb_pe->state & EEH_PE_ISOLATED) {
b95cd2cd
GS
348 ret = 0;
349 goto out;
350 }
351
352 /* Check PHB state */
353 ret = eeh_ops->get_state(phb_pe, NULL);
354 if ((ret < 0) ||
355 (ret == EEH_STATE_NOT_SUPPORT) ||
356 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
357 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
358 ret = 0;
359 goto out;
360 }
361
362 /* Isolate the PHB and send event */
363 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
364 eeh_serialize_unlock(flags);
b95cd2cd 365
357b2f3d
GS
366 pr_err("EEH: PHB#%x failure detected, location: %s\n",
367 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
56ca4fde 368 dump_stack();
5293bf97 369 eeh_send_failure_event(phb_pe);
b95cd2cd
GS
370
371 return 1;
372out:
373 eeh_serialize_unlock(flags);
374 return ret;
375}
376
1da177e4 377/**
f8f7d63f
GS
378 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
379 * @edev: eeh device
1da177e4
LT
380 *
381 * Check for an EEH failure for the given device node. Call this
382 * routine if the result of a read was all 0xff's and you want to
383 * find out if this is due to an EEH slot freeze. This routine
384 * will query firmware for the EEH status.
385 *
386 * Returns 0 if there has not been an EEH error; otherwise returns
69376502 387 * a non-zero value and queues up a slot isolation event notification.
1da177e4
LT
388 *
389 * It is safe to call this routine in an interrupt context.
390 */
f8f7d63f 391int eeh_dev_check_failure(struct eeh_dev *edev)
1da177e4
LT
392{
393 int ret;
1ad7a72c 394 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1da177e4 395 unsigned long flags;
f8f7d63f
GS
396 struct device_node *dn;
397 struct pci_dev *dev;
357b2f3d 398 struct eeh_pe *pe, *parent_pe, *phb_pe;
fd761fd8 399 int rc = 0;
f36c5227 400 const char *location;
1da177e4 401
e575f8db 402 eeh_stats.total_mmio_ffs++;
1da177e4 403
2ec5a0ad 404 if (!eeh_enabled())
1da177e4
LT
405 return 0;
406
f8f7d63f 407 if (!edev) {
e575f8db 408 eeh_stats.no_dn++;
1da177e4 409 return 0;
177bc936 410 }
f8f7d63f
GS
411 dn = eeh_dev_to_of_node(edev);
412 dev = eeh_dev_to_pci_dev(edev);
2a58222f 413 pe = eeh_dev_to_pe(edev);
1da177e4
LT
414
415 /* Access to IO BARs might get this far and still not want checking. */
66523d9f 416 if (!pe) {
e575f8db 417 eeh_stats.ignored_check++;
66523d9f
GS
418 pr_debug("EEH: Ignored check for %s %s\n",
419 eeh_pci_name(dev), dn->full_name);
1da177e4
LT
420 return 0;
421 }
422
66523d9f 423 if (!pe->addr && !pe->config_addr) {
e575f8db 424 eeh_stats.no_cfg_addr++;
1da177e4
LT
425 return 0;
426 }
427
b95cd2cd
GS
428 /*
429 * On PowerNV platform, we might already have fenced PHB
430 * there and we need take care of that firstly.
431 */
432 ret = eeh_phb_check_failure(pe);
433 if (ret > 0)
434 return ret;
435
05ec424e
GS
436 /*
437 * If the PE isn't owned by us, we shouldn't check the
438 * state. Instead, let the owner handle it if the PE has
439 * been frozen.
440 */
441 if (eeh_pe_passed(pe))
442 return 0;
443
fd761fd8
LV
444 /* If we already have a pending isolation event for this
445 * slot, we know it's bad already, we don't need to check.
446 * Do this checking under a lock; as multiple PCI devices
447 * in one slot might report errors simultaneously, and we
448 * only want one error recovery routine running.
1da177e4 449 */
4907581d 450 eeh_serialize_lock(&flags);
fd761fd8 451 rc = 1;
66523d9f
GS
452 if (pe->state & EEH_PE_ISOLATED) {
453 pe->check_count++;
454 if (pe->check_count % EEH_MAX_FAILS == 0) {
f36c5227 455 location = of_get_property(dn, "ibm,loc-code", NULL);
cb3bc9d0 456 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
f36c5227 457 "location=%s driver=%s pci addr=%s\n",
66523d9f 458 pe->check_count, location,
778a785f 459 eeh_driver_name(dev), eeh_pci_name(dev));
cb3bc9d0 460 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
778a785f 461 eeh_driver_name(dev));
5c1344e9 462 dump_stack();
1da177e4 463 }
fd761fd8 464 goto dn_unlock;
1da177e4
LT
465 }
466
467 /*
468 * Now test for an EEH failure. This is VERY expensive.
469 * Note that the eeh_config_addr may be a parent device
470 * in the case of a device behind a bridge, or it may be
471 * function zero of a multi-function device.
472 * In any case they must share a common PHB.
473 */
66523d9f 474 ret = eeh_ops->get_state(pe, NULL);
76e6faf7 475
39d16e29 476 /* Note that config-io to empty slots may fail;
cb3bc9d0 477 * they are empty when they don't have children.
eb594a47
GS
478 * We will punt with the following conditions: Failure to get
479 * PE's state, EEH not support and Permanently unavailable
480 * state, PE is in good state.
cb3bc9d0 481 */
eb594a47
GS
482 if ((ret < 0) ||
483 (ret == EEH_STATE_NOT_SUPPORT) ||
1ad7a72c 484 ((ret & active_flags) == active_flags)) {
e575f8db 485 eeh_stats.false_positives++;
66523d9f 486 pe->false_positives++;
fd761fd8
LV
487 rc = 0;
488 goto dn_unlock;
76e6faf7
LV
489 }
490
1ad7a72c
GS
491 /*
492 * It should be corner case that the parent PE has been
493 * put into frozen state as well. We should take care
494 * that at first.
495 */
496 parent_pe = pe->parent;
497 while (parent_pe) {
498 /* Hit the ceiling ? */
499 if (parent_pe->type & EEH_PE_PHB)
500 break;
501
502 /* Frozen parent PE ? */
503 ret = eeh_ops->get_state(parent_pe, NULL);
504 if (ret > 0 &&
505 (ret & active_flags) != active_flags)
506 pe = parent_pe;
507
508 /* Next parent level */
509 parent_pe = parent_pe->parent;
510 }
511
e575f8db 512 eeh_stats.slot_resets++;
a84f273c 513
fd761fd8
LV
514 /* Avoid repeated reports of this failure, including problems
515 * with other functions on this device, and functions under
cb3bc9d0
GS
516 * bridges.
517 */
66523d9f 518 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
4907581d 519 eeh_serialize_unlock(flags);
1da177e4 520
1da177e4
LT
521 /* Most EEH events are due to device driver bugs. Having
522 * a stack trace will help the device-driver authors figure
cb3bc9d0
GS
523 * out what happened. So print that out.
524 */
357b2f3d
GS
525 phb_pe = eeh_phb_pe_get(pe->phb);
526 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
527 pe->phb->global_number, pe->addr);
528 pr_err("EEH: PE location: %s, PHB location: %s\n",
529 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
56ca4fde
GS
530 dump_stack();
531
5293bf97
GS
532 eeh_send_failure_event(pe);
533
fd761fd8
LV
534 return 1;
535
536dn_unlock:
4907581d 537 eeh_serialize_unlock(flags);
fd761fd8 538 return rc;
1da177e4
LT
539}
540
f8f7d63f 541EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
1da177e4
LT
542
543/**
cb3bc9d0 544 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
3e938052 545 * @token: I/O address
1da177e4 546 *
3e938052 547 * Check for an EEH failure at the given I/O address. Call this
1da177e4 548 * routine if the result of a read was all 0xff's and you want to
3e938052 549 * find out if this is due to an EEH slot freeze event. This routine
1da177e4
LT
550 * will query firmware for the EEH status.
551 *
552 * Note this routine is safe to call in an interrupt context.
553 */
3e938052 554int eeh_check_failure(const volatile void __iomem *token)
1da177e4
LT
555{
556 unsigned long addr;
f8f7d63f 557 struct eeh_dev *edev;
1da177e4
LT
558
559 /* Finding the phys addr + pci device; this is pretty quick. */
560 addr = eeh_token_to_phys((unsigned long __force) token);
3ab96a02 561 edev = eeh_addr_cache_get_dev(addr);
f8f7d63f 562 if (!edev) {
e575f8db 563 eeh_stats.no_device++;
3e938052 564 return 0;
177bc936 565 }
1da177e4 566
3e938052 567 return eeh_dev_check_failure(edev);
1da177e4 568}
1da177e4
LT
569EXPORT_SYMBOL(eeh_check_failure);
570
6dee3fb9 571
47b5c838 572/**
cce4b2d2 573 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
ff477966 574 * @pe: EEH PE
cb3bc9d0
GS
575 *
576 * This routine should be called to reenable frozen MMIO or DMA
577 * so that it would work correctly again. It's useful while doing
578 * recovery or log collection on the indicated device.
47b5c838 579 */
ff477966 580int eeh_pci_enable(struct eeh_pe *pe, int function)
47b5c838 581{
4d4f577e 582 int active_flag, rc;
78954700
GS
583
584 /*
585 * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
586 * Also, it's pointless to enable them on unfrozen PE. So
4d4f577e 587 * we have to check before enabling IO or DMA.
78954700 588 */
4d4f577e
GS
589 switch (function) {
590 case EEH_OPT_THAW_MMIO:
591 active_flag = EEH_STATE_MMIO_ACTIVE;
592 break;
593 case EEH_OPT_THAW_DMA:
594 active_flag = EEH_STATE_DMA_ACTIVE;
595 break;
596 case EEH_OPT_DISABLE:
597 case EEH_OPT_ENABLE:
598 case EEH_OPT_FREEZE_PE:
599 active_flag = 0;
600 break;
601 default:
602 pr_warn("%s: Invalid function %d\n",
603 __func__, function);
604 return -EINVAL;
605 }
606
607 /*
608 * Check if IO or DMA has been enabled before
609 * enabling them.
610 */
611 if (active_flag) {
78954700
GS
612 rc = eeh_ops->get_state(pe, NULL);
613 if (rc < 0)
614 return rc;
615
4d4f577e
GS
616 /* Needn't enable it at all */
617 if (rc == EEH_STATE_NOT_SUPPORT)
618 return 0;
619
620 /* It's already enabled */
621 if (rc & active_flag)
78954700
GS
622 return 0;
623 }
47b5c838 624
4d4f577e
GS
625
626 /* Issue the request */
ff477966 627 rc = eeh_ops->set_option(pe, function);
47b5c838 628 if (rc)
78954700
GS
629 pr_warn("%s: Unexpected state change %d on "
630 "PHB#%d-PE#%x, err=%d\n",
631 __func__, function, pe->phb->global_number,
632 pe->addr, rc);
47b5c838 633
4d4f577e
GS
634 /* Check if the request is finished successfully */
635 if (active_flag) {
636 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
637 if (rc <= 0)
638 return rc;
78954700 639
4d4f577e
GS
640 if (rc & active_flag)
641 return 0;
78954700 642
4d4f577e
GS
643 return -EIO;
644 }
fa1be476 645
47b5c838
LV
646 return rc;
647}
648
00c2ae35
BK
649/**
650 * pcibios_set_pcie_slot_reset - Set PCI-E reset state
cb3bc9d0
GS
651 * @dev: pci device struct
652 * @state: reset state to enter
00c2ae35
BK
653 *
654 * Return value:
655 * 0 if success
cb3bc9d0 656 */
00c2ae35
BK
657int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
658{
c270a24c 659 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
2a58222f 660 struct eeh_pe *pe = eeh_dev_to_pe(edev);
c270a24c
GS
661
662 if (!pe) {
663 pr_err("%s: No PE found on PCI device %s\n",
664 __func__, pci_name(dev));
665 return -EINVAL;
666 }
00c2ae35
BK
667
668 switch (state) {
669 case pcie_deassert_reset:
c270a24c 670 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
9372dddb 671 eeh_pe_state_clear(pe, EEH_PE_RESET);
00c2ae35
BK
672 break;
673 case pcie_hot_reset:
9372dddb 674 eeh_pe_state_mark(pe, EEH_PE_RESET);
c270a24c 675 eeh_ops->reset(pe, EEH_RESET_HOT);
00c2ae35
BK
676 break;
677 case pcie_warm_reset:
9372dddb 678 eeh_pe_state_mark(pe, EEH_PE_RESET);
c270a24c 679 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
00c2ae35
BK
680 break;
681 default:
9372dddb 682 eeh_pe_state_clear(pe, EEH_PE_RESET);
00c2ae35
BK
683 return -EINVAL;
684 };
685
686 return 0;
687}
688
cb5b5624 689/**
c270a24c
GS
690 * eeh_set_pe_freset - Check the required reset for the indicated device
691 * @data: EEH device
692 * @flag: return value
cb3bc9d0
GS
693 *
694 * Each device might have its preferred reset type: fundamental or
695 * hot reset. The routine is used to collected the information for
696 * the indicated device and its children so that the bunch of the
697 * devices could be reset properly.
698 */
c270a24c 699static void *eeh_set_dev_freset(void *data, void *flag)
cb3bc9d0
GS
700{
701 struct pci_dev *dev;
c270a24c
GS
702 unsigned int *freset = (unsigned int *)flag;
703 struct eeh_dev *edev = (struct eeh_dev *)data;
6dee3fb9 704
c270a24c 705 dev = eeh_dev_to_pci_dev(edev);
cb3bc9d0
GS
706 if (dev)
707 *freset |= dev->needs_freset;
708
c270a24c 709 return NULL;
cb3bc9d0
GS
710}
711
712/**
cce4b2d2 713 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
c270a24c 714 * @pe: EEH PE
cb3bc9d0
GS
715 *
716 * Assert the PCI #RST line for 1/4 second.
717 */
c270a24c 718static void eeh_reset_pe_once(struct eeh_pe *pe)
6dee3fb9 719{
308fc4f8 720 unsigned int freset = 0;
6e19314c 721
308fc4f8
RL
722 /* Determine type of EEH reset required for
723 * Partitionable Endpoint, a hot-reset (1)
724 * or a fundamental reset (3).
725 * A fundamental reset required by any device under
726 * Partitionable Endpoint trumps hot-reset.
a84f273c 727 */
c270a24c 728 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
308fc4f8
RL
729
730 if (freset)
c270a24c 731 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
6e19314c 732 else
c270a24c 733 eeh_ops->reset(pe, EEH_RESET_HOT);
6dee3fb9 734
c270a24c 735 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
e1029263
LV
736}
737
cb3bc9d0 738/**
cce4b2d2 739 * eeh_reset_pe - Reset the indicated PE
c270a24c 740 * @pe: EEH PE
cb3bc9d0
GS
741 *
742 * This routine should be called to reset indicated device, including
743 * PE. A PE might include multiple PCI devices and sometimes PCI bridges
744 * might be involved as well.
745 */
c270a24c 746int eeh_reset_pe(struct eeh_pe *pe)
e1029263 747{
326a98ea 748 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
e1029263
LV
749 int i, rc;
750
9c547768
LV
751 /* Take three shots at resetting the bus */
752 for (i=0; i<3; i++) {
c270a24c 753 eeh_reset_pe_once(pe);
6dee3fb9 754
78954700
GS
755 /*
756 * EEH_PE_ISOLATED is expected to be removed after
757 * BAR restore.
758 */
c270a24c 759 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
78954700 760 if ((rc & flags) == flags)
b6495c0c 761 return 0;
e1029263 762
e1029263 763 if (rc < 0) {
c270a24c
GS
764 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
765 __func__, pe->phb->global_number, pe->addr);
b6495c0c 766 return -1;
e1029263 767 }
c270a24c
GS
768 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
769 i+1, pe->phb->global_number, pe->addr, rc);
6dee3fb9 770 }
b6495c0c 771
9c547768 772 return -1;
6dee3fb9
LV
773}
774
8b553f32 775/**
cb3bc9d0 776 * eeh_save_bars - Save device bars
f631acd3 777 * @edev: PCI device associated EEH device
8b553f32
LV
778 *
779 * Save the values of the device bars. Unlike the restore
780 * routine, this routine is *not* recursive. This is because
31116f0b 781 * PCI devices are added individually; but, for the restore,
8b553f32
LV
782 * an entire slot is reset at a time.
783 */
d7bb8862 784void eeh_save_bars(struct eeh_dev *edev)
8b553f32
LV
785{
786 int i;
f631acd3 787 struct device_node *dn;
8b553f32 788
f631acd3 789 if (!edev)
8b553f32 790 return;
f631acd3 791 dn = eeh_dev_to_of_node(edev);
a84f273c 792
8b553f32 793 for (i = 0; i < 16; i++)
3780444c 794 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
bf898ec5
GS
795
796 /*
797 * For PCI bridges including root port, we need enable bus
798 * master explicitly. Otherwise, it can't fetch IODA table
799 * entries correctly. So we cache the bit in advance so that
800 * we can restore it after reset, either PHB range or PE range.
801 */
802 if (edev->mode & EEH_DEV_BRIDGE)
803 edev->config_space[1] |= PCI_COMMAND_MASTER;
8b553f32
LV
804}
805
aa1e6374
GS
806/**
807 * eeh_ops_register - Register platform dependent EEH operations
808 * @ops: platform dependent EEH operations
809 *
810 * Register the platform dependent EEH operation callback
811 * functions. The platform should call this function before
812 * any other EEH operations.
813 */
814int __init eeh_ops_register(struct eeh_ops *ops)
815{
816 if (!ops->name) {
0dae2743 817 pr_warn("%s: Invalid EEH ops name for %p\n",
aa1e6374
GS
818 __func__, ops);
819 return -EINVAL;
820 }
821
822 if (eeh_ops && eeh_ops != ops) {
0dae2743 823 pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
aa1e6374
GS
824 __func__, eeh_ops->name, ops->name);
825 return -EEXIST;
826 }
827
828 eeh_ops = ops;
829
830 return 0;
831}
832
833/**
834 * eeh_ops_unregister - Unreigster platform dependent EEH operations
835 * @name: name of EEH platform operations
836 *
837 * Unregister the platform dependent EEH operation callback
838 * functions.
839 */
840int __exit eeh_ops_unregister(const char *name)
841{
842 if (!name || !strlen(name)) {
0dae2743 843 pr_warn("%s: Invalid EEH ops name\n",
aa1e6374
GS
844 __func__);
845 return -EINVAL;
846 }
847
848 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
849 eeh_ops = NULL;
850 return 0;
851 }
852
853 return -EEXIST;
854}
855
66f9af83
GS
856static int eeh_reboot_notifier(struct notifier_block *nb,
857 unsigned long action, void *unused)
858{
05b1721d 859 eeh_clear_flag(EEH_ENABLED);
66f9af83
GS
860 return NOTIFY_DONE;
861}
862
863static struct notifier_block eeh_reboot_nb = {
864 .notifier_call = eeh_reboot_notifier,
865};
866
cb3bc9d0
GS
867/**
868 * eeh_init - EEH initialization
869 *
1da177e4
LT
870 * Initialize EEH by trying to enable it for all of the adapters in the system.
871 * As a side effect we can determine here if eeh is supported at all.
872 * Note that we leave EEH on so failed config cycles won't cause a machine
873 * check. If a user turns off EEH for a particular adapter they are really
874 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
875 * grant access to a slot if EEH isn't enabled, and so we always enable
876 * EEH for all slots/all devices.
877 *
878 * The eeh-force-off option disables EEH checking globally, for all slots.
879 * Even if force-off is set, the EEH hardware is still enabled, so that
880 * newer systems can boot.
881 */
eeb6361f 882int eeh_init(void)
1da177e4 883{
1a5c2e63
GS
884 struct pci_controller *hose, *tmp;
885 struct device_node *phb;
51fb5f56
GS
886 static int cnt = 0;
887 int ret = 0;
888
889 /*
890 * We have to delay the initialization on PowerNV after
891 * the PCI hierarchy tree has been built because the PEs
892 * are figured out based on PCI devices instead of device
893 * tree nodes
894 */
895 if (machine_is(powernv) && cnt++ <= 0)
896 return ret;
e2af155c 897
66f9af83
GS
898 /* Register reboot notifier */
899 ret = register_reboot_notifier(&eeh_reboot_nb);
900 if (ret) {
901 pr_warn("%s: Failed to register notifier (%d)\n",
902 __func__, ret);
903 return ret;
904 }
905
e2af155c
GS
906 /* call platform initialization function */
907 if (!eeh_ops) {
0dae2743 908 pr_warn("%s: Platform EEH operation not found\n",
e2af155c 909 __func__);
35e5cfe2 910 return -EEXIST;
e2af155c 911 } else if ((ret = eeh_ops->init())) {
0dae2743 912 pr_warn("%s: Failed to call platform init function (%d)\n",
e2af155c 913 __func__, ret);
35e5cfe2 914 return ret;
e2af155c 915 }
1da177e4 916
c8608558
GS
917 /* Initialize EEH event */
918 ret = eeh_event_init();
919 if (ret)
920 return ret;
921
1a5c2e63 922 /* Enable EEH for all adapters */
05b1721d 923 if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) {
d7bb8862
GS
924 list_for_each_entry_safe(hose, tmp,
925 &hose_list, list_node) {
926 phb = hose->dn;
927 traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
928 }
05b1721d 929 } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) {
51fb5f56
GS
930 list_for_each_entry_safe(hose, tmp,
931 &hose_list, list_node)
932 pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
933 } else {
8a5ad356
GS
934 pr_warn("%s: Invalid probe mode %x",
935 __func__, eeh_subsystem_flags);
51fb5f56 936 return -EINVAL;
1da177e4
LT
937 }
938
21fd21f5
GS
939 /*
940 * Call platform post-initialization. Actually, It's good chance
941 * to inform platform that EEH is ready to supply service if the
942 * I/O cache stuff has been built up.
943 */
944 if (eeh_ops->post_init) {
945 ret = eeh_ops->post_init();
946 if (ret)
947 return ret;
948 }
949
2ec5a0ad 950 if (eeh_enabled())
d7bb8862 951 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
1da177e4 952 else
0dae2743 953 pr_warn("EEH: No capable adapters found\n");
35e5cfe2
GS
954
955 return ret;
1da177e4
LT
956}
957
35e5cfe2
GS
958core_initcall_sync(eeh_init);
959
1da177e4 960/**
cb3bc9d0 961 * eeh_add_device_early - Enable EEH for the indicated device_node
1da177e4
LT
962 * @dn: device node for which to set up EEH
963 *
964 * This routine must be used to perform EEH initialization for PCI
965 * devices that were added after system boot (e.g. hotplug, dlpar).
966 * This routine must be called before any i/o is performed to the
967 * adapter (inluding any config-space i/o).
968 * Whether this actually enables EEH or not for this device depends
969 * on the CEC architecture, type of the device, on earlier boot
970 * command-line arguments & etc.
971 */
f2856491 972void eeh_add_device_early(struct device_node *dn)
1da177e4
LT
973{
974 struct pci_controller *phb;
1da177e4 975
26a74850
GS
976 /*
977 * If we're doing EEH probe based on PCI device, we
978 * would delay the probe until late stage because
979 * the PCI device isn't available this moment.
980 */
05b1721d 981 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
26a74850
GS
982 return;
983
1e38b714 984 if (!of_node_to_eeh_dev(dn))
1da177e4 985 return;
f631acd3 986 phb = of_node_to_eeh_dev(dn)->phb;
f751f841
LV
987
988 /* USB Bus children of PCI devices will not have BUID's */
989 if (NULL == phb || 0 == phb->buid)
1da177e4 990 return;
1da177e4 991
d7bb8862 992 eeh_ops->of_probe(dn, NULL);
1da177e4 993}
1da177e4 994
cb3bc9d0
GS
995/**
996 * eeh_add_device_tree_early - Enable EEH for the indicated device
997 * @dn: device node
998 *
999 * This routine must be used to perform EEH initialization for the
1000 * indicated PCI device that was added after system boot (e.g.
1001 * hotplug, dlpar).
1002 */
e2a296ee
LV
1003void eeh_add_device_tree_early(struct device_node *dn)
1004{
1005 struct device_node *sib;
acaa6176
SR
1006
1007 for_each_child_of_node(dn, sib)
e2a296ee
LV
1008 eeh_add_device_tree_early(sib);
1009 eeh_add_device_early(dn);
1010}
1011EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
1012
1da177e4 1013/**
cb3bc9d0 1014 * eeh_add_device_late - Perform EEH initialization for the indicated pci device
1da177e4
LT
1015 * @dev: pci device for which to set up EEH
1016 *
1017 * This routine must be used to complete EEH initialization for PCI
1018 * devices that were added after system boot (e.g. hotplug, dlpar).
1019 */
f2856491 1020void eeh_add_device_late(struct pci_dev *dev)
1da177e4 1021{
56b0fca3 1022 struct device_node *dn;
f631acd3 1023 struct eeh_dev *edev;
56b0fca3 1024
2ec5a0ad 1025 if (!dev || !eeh_enabled())
1da177e4
LT
1026 return;
1027
57b066ff 1028 pr_debug("EEH: Adding device %s\n", pci_name(dev));
1da177e4 1029
56b0fca3 1030 dn = pci_device_to_OF_node(dev);
2ef822c5 1031 edev = of_node_to_eeh_dev(dn);
f631acd3 1032 if (edev->pdev == dev) {
57b066ff
BH
1033 pr_debug("EEH: Already referenced !\n");
1034 return;
1035 }
f5c57710
GS
1036
1037 /*
1038 * The EEH cache might not be removed correctly because of
1039 * unbalanced kref to the device during unplug time, which
1040 * relies on pcibios_release_device(). So we have to remove
1041 * that here explicitly.
1042 */
1043 if (edev->pdev) {
1044 eeh_rmv_from_parent_pe(edev);
1045 eeh_addr_cache_rmv_dev(edev->pdev);
1046 eeh_sysfs_remove_device(edev->pdev);
ab55d218 1047 edev->mode &= ~EEH_DEV_SYSFS;
f5c57710 1048
f26c7a03
GS
1049 /*
1050 * We definitely should have the PCI device removed
1051 * though it wasn't correctly. So we needn't call
1052 * into error handler afterwards.
1053 */
1054 edev->mode |= EEH_DEV_NO_HANDLER;
1055
f5c57710
GS
1056 edev->pdev = NULL;
1057 dev->dev.archdata.edev = NULL;
1058 }
57b066ff 1059
f631acd3
GS
1060 edev->pdev = dev;
1061 dev->dev.archdata.edev = edev;
56b0fca3 1062
26a74850
GS
1063 /*
1064 * We have to do the EEH probe here because the PCI device
1065 * hasn't been created yet in the early stage.
1066 */
05b1721d 1067 if (eeh_has_flag(EEH_PROBE_MODE_DEV))
26a74850
GS
1068 eeh_ops->dev_probe(dev, NULL);
1069
3ab96a02 1070 eeh_addr_cache_insert_dev(dev);
1da177e4 1071}
794e085e 1072
cb3bc9d0
GS
1073/**
1074 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
1075 * @bus: PCI bus
1076 *
1077 * This routine must be used to perform EEH initialization for PCI
1078 * devices which are attached to the indicated PCI bus. The PCI bus
1079 * is added after system boot through hotplug or dlpar.
1080 */
794e085e
NF
1081void eeh_add_device_tree_late(struct pci_bus *bus)
1082{
1083 struct pci_dev *dev;
1084
1085 list_for_each_entry(dev, &bus->devices, bus_list) {
a84f273c
GS
1086 eeh_add_device_late(dev);
1087 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1088 struct pci_bus *subbus = dev->subordinate;
1089 if (subbus)
1090 eeh_add_device_tree_late(subbus);
1091 }
794e085e
NF
1092 }
1093}
1094EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
1da177e4 1095
6a040ce7
TLSC
1096/**
1097 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
1098 * @bus: PCI bus
1099 *
1100 * This routine must be used to add EEH sysfs files for PCI
1101 * devices which are attached to the indicated PCI bus. The PCI bus
1102 * is added after system boot through hotplug or dlpar.
1103 */
1104void eeh_add_sysfs_files(struct pci_bus *bus)
1105{
1106 struct pci_dev *dev;
1107
1108 list_for_each_entry(dev, &bus->devices, bus_list) {
1109 eeh_sysfs_add_device(dev);
1110 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1111 struct pci_bus *subbus = dev->subordinate;
1112 if (subbus)
1113 eeh_add_sysfs_files(subbus);
1114 }
1115 }
1116}
1117EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
1118
1da177e4 1119/**
cb3bc9d0 1120 * eeh_remove_device - Undo EEH setup for the indicated pci device
1da177e4
LT
1121 * @dev: pci device to be removed
1122 *
794e085e
NF
1123 * This routine should be called when a device is removed from
1124 * a running system (e.g. by hotplug or dlpar). It unregisters
1125 * the PCI device from the EEH subsystem. I/O errors affecting
1126 * this device will no longer be detected after this call; thus,
1127 * i/o errors affecting this slot may leave this device unusable.
1da177e4 1128 */
807a827d 1129void eeh_remove_device(struct pci_dev *dev)
1da177e4 1130{
f631acd3
GS
1131 struct eeh_dev *edev;
1132
2ec5a0ad 1133 if (!dev || !eeh_enabled())
1da177e4 1134 return;
f631acd3 1135 edev = pci_dev_to_eeh_dev(dev);
1da177e4
LT
1136
1137 /* Unregister the device with the EEH/PCI address search system */
57b066ff 1138 pr_debug("EEH: Removing device %s\n", pci_name(dev));
56b0fca3 1139
f5c57710 1140 if (!edev || !edev->pdev || !edev->pe) {
57b066ff
BH
1141 pr_debug("EEH: Not referenced !\n");
1142 return;
b055a9e1 1143 }
f5c57710
GS
1144
1145 /*
1146 * During the hotplug for EEH error recovery, we need the EEH
1147 * device attached to the parent PE in order for BAR restore
1148 * a bit later. So we keep it for BAR restore and remove it
1149 * from the parent PE during the BAR resotre.
1150 */
f631acd3
GS
1151 edev->pdev = NULL;
1152 dev->dev.archdata.edev = NULL;
f5c57710
GS
1153 if (!(edev->pe->state & EEH_PE_KEEP))
1154 eeh_rmv_from_parent_pe(edev);
1155 else
1156 edev->mode |= EEH_DEV_DISCONNECTED;
57b066ff 1157
f26c7a03
GS
1158 /*
1159 * We're removing from the PCI subsystem, that means
1160 * the PCI device driver can't support EEH or not
1161 * well. So we rely on hotplug completely to do recovery
1162 * for the specific PCI device.
1163 */
1164 edev->mode |= EEH_DEV_NO_HANDLER;
1165
3ab96a02 1166 eeh_addr_cache_rmv_dev(dev);
57b066ff 1167 eeh_sysfs_remove_device(dev);
ab55d218 1168 edev->mode &= ~EEH_DEV_SYSFS;
1da177e4 1169}
1da177e4 1170
4eeeff0e
GS
1171int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
1172{
1173 int ret;
1174
1175 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
1176 if (ret) {
1177 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
1178 __func__, ret, pe->phb->global_number, pe->addr);
1179 return ret;
1180 }
1181
1182 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
1183 if (ret) {
1184 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
1185 __func__, ret, pe->phb->global_number, pe->addr);
1186 return ret;
1187 }
1188
1189 /* Clear software isolated state */
1190 if (sw_state && (pe->state & EEH_PE_ISOLATED))
1191 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
1192
1193 return ret;
1194}
1195
5cfb20b9
GS
1196
1197static struct pci_device_id eeh_reset_ids[] = {
1198 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */
1199 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */
1200 { 0 }
1201};
1202
1203static int eeh_pe_change_owner(struct eeh_pe *pe)
1204{
1205 struct eeh_dev *edev, *tmp;
1206 struct pci_dev *pdev;
1207 struct pci_device_id *id;
1208 int flags, ret;
1209
1210 /* Check PE state */
1211 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1212 ret = eeh_ops->get_state(pe, NULL);
1213 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
1214 return 0;
1215
1216 /* Unfrozen PE, nothing to do */
1217 if ((ret & flags) == flags)
1218 return 0;
1219
1220 /* Frozen PE, check if it needs PE level reset */
1221 eeh_pe_for_each_dev(pe, edev, tmp) {
1222 pdev = eeh_dev_to_pci_dev(edev);
1223 if (!pdev)
1224 continue;
1225
1226 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
1227 if (id->vendor != PCI_ANY_ID &&
1228 id->vendor != pdev->vendor)
1229 continue;
1230 if (id->device != PCI_ANY_ID &&
1231 id->device != pdev->device)
1232 continue;
1233 if (id->subvendor != PCI_ANY_ID &&
1234 id->subvendor != pdev->subsystem_vendor)
1235 continue;
1236 if (id->subdevice != PCI_ANY_ID &&
1237 id->subdevice != pdev->subsystem_device)
1238 continue;
1239
1240 goto reset;
1241 }
1242 }
1243
1244 return eeh_unfreeze_pe(pe, true);
1245
1246reset:
1247 return eeh_pe_reset_and_recover(pe);
1248}
1249
212d16cd
GS
1250/**
1251 * eeh_dev_open - Increase count of pass through devices for PE
1252 * @pdev: PCI device
1253 *
1254 * Increase count of passed through devices for the indicated
1255 * PE. In the result, the EEH errors detected on the PE won't be
1256 * reported. The PE owner will be responsible for detection
1257 * and recovery.
1258 */
1259int eeh_dev_open(struct pci_dev *pdev)
1260{
1261 struct eeh_dev *edev;
404079c8 1262 int ret = -ENODEV;
212d16cd
GS
1263
1264 mutex_lock(&eeh_dev_mutex);
1265
1266 /* No PCI device ? */
1267 if (!pdev)
1268 goto out;
1269
1270 /* No EEH device or PE ? */
1271 edev = pci_dev_to_eeh_dev(pdev);
1272 if (!edev || !edev->pe)
1273 goto out;
1274
404079c8
GS
1275 /*
1276 * The PE might have been put into frozen state, but we
1277 * didn't detect that yet. The passed through PCI devices
1278 * in frozen PE won't work properly. Clear the frozen state
1279 * in advance.
1280 */
5cfb20b9 1281 ret = eeh_pe_change_owner(edev->pe);
4eeeff0e
GS
1282 if (ret)
1283 goto out;
404079c8 1284
212d16cd
GS
1285 /* Increase PE's pass through count */
1286 atomic_inc(&edev->pe->pass_dev_cnt);
1287 mutex_unlock(&eeh_dev_mutex);
1288
1289 return 0;
1290out:
1291 mutex_unlock(&eeh_dev_mutex);
404079c8 1292 return ret;
212d16cd
GS
1293}
1294EXPORT_SYMBOL_GPL(eeh_dev_open);
1295
1296/**
1297 * eeh_dev_release - Decrease count of pass through devices for PE
1298 * @pdev: PCI device
1299 *
1300 * Decrease count of pass through devices for the indicated PE. If
1301 * there is no passed through device in PE, the EEH errors detected
1302 * on the PE will be reported and handled as usual.
1303 */
1304void eeh_dev_release(struct pci_dev *pdev)
1305{
1306 struct eeh_dev *edev;
1307
1308 mutex_lock(&eeh_dev_mutex);
1309
1310 /* No PCI device ? */
1311 if (!pdev)
1312 goto out;
1313
1314 /* No EEH device ? */
1315 edev = pci_dev_to_eeh_dev(pdev);
1316 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
1317 goto out;
1318
1319 /* Decrease PE's pass through count */
1320 atomic_dec(&edev->pe->pass_dev_cnt);
1321 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
5cfb20b9 1322 eeh_pe_change_owner(edev->pe);
212d16cd
GS
1323out:
1324 mutex_unlock(&eeh_dev_mutex);
1325}
1326EXPORT_SYMBOL(eeh_dev_release);
1327
2194dc27
BH
1328#ifdef CONFIG_IOMMU_API
1329
a3032ca9
GS
1330static int dev_has_iommu_table(struct device *dev, void *data)
1331{
1332 struct pci_dev *pdev = to_pci_dev(dev);
1333 struct pci_dev **ppdev = data;
1334 struct iommu_table *tbl;
1335
1336 if (!dev)
1337 return 0;
1338
1339 tbl = get_iommu_table_base(dev);
1340 if (tbl && tbl->it_group) {
1341 *ppdev = pdev;
1342 return 1;
1343 }
1344
1345 return 0;
1346}
1347
212d16cd
GS
1348/**
1349 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
1350 * @group: IOMMU group
1351 *
1352 * The routine is called to convert IOMMU group to EEH PE.
1353 */
1354struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
1355{
212d16cd
GS
1356 struct pci_dev *pdev = NULL;
1357 struct eeh_dev *edev;
a3032ca9 1358 int ret;
212d16cd
GS
1359
1360 /* No IOMMU group ? */
1361 if (!group)
1362 return NULL;
1363
a3032ca9
GS
1364 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
1365 if (!ret || !pdev)
212d16cd
GS
1366 return NULL;
1367
1368 /* No EEH device or PE ? */
1369 edev = pci_dev_to_eeh_dev(pdev);
1370 if (!edev || !edev->pe)
1371 return NULL;
1372
1373 return edev->pe;
1374}
537e5400 1375EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
212d16cd 1376
2194dc27
BH
1377#endif /* CONFIG_IOMMU_API */
1378
212d16cd
GS
1379/**
1380 * eeh_pe_set_option - Set options for the indicated PE
1381 * @pe: EEH PE
1382 * @option: requested option
1383 *
1384 * The routine is called to enable or disable EEH functionality
1385 * on the indicated PE, to enable IO or DMA for the frozen PE.
1386 */
1387int eeh_pe_set_option(struct eeh_pe *pe, int option)
1388{
1389 int ret = 0;
1390
1391 /* Invalid PE ? */
1392 if (!pe)
1393 return -ENODEV;
1394
1395 /*
1396 * EEH functionality could possibly be disabled, just
1397 * return error for the case. And the EEH functinality
1398 * isn't expected to be disabled on one specific PE.
1399 */
1400 switch (option) {
1401 case EEH_OPT_ENABLE:
4eeeff0e 1402 if (eeh_enabled()) {
5cfb20b9 1403 ret = eeh_pe_change_owner(pe);
212d16cd 1404 break;
4eeeff0e 1405 }
212d16cd
GS
1406 ret = -EIO;
1407 break;
1408 case EEH_OPT_DISABLE:
1409 break;
1410 case EEH_OPT_THAW_MMIO:
1411 case EEH_OPT_THAW_DMA:
1412 if (!eeh_ops || !eeh_ops->set_option) {
1413 ret = -ENOENT;
1414 break;
1415 }
1416
4eeeff0e 1417 ret = eeh_pci_enable(pe, option);
212d16cd
GS
1418 break;
1419 default:
1420 pr_debug("%s: Option %d out of range (%d, %d)\n",
1421 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
1422 ret = -EINVAL;
1423 }
1424
1425 return ret;
1426}
1427EXPORT_SYMBOL_GPL(eeh_pe_set_option);
1428
1429/**
1430 * eeh_pe_get_state - Retrieve PE's state
1431 * @pe: EEH PE
1432 *
1433 * Retrieve the PE's state, which includes 3 aspects: enabled
1434 * DMA, enabled IO and asserted reset.
1435 */
1436int eeh_pe_get_state(struct eeh_pe *pe)
1437{
1438 int result, ret = 0;
1439 bool rst_active, dma_en, mmio_en;
1440
1441 /* Existing PE ? */
1442 if (!pe)
1443 return -ENODEV;
1444
1445 if (!eeh_ops || !eeh_ops->get_state)
1446 return -ENOENT;
1447
1448 result = eeh_ops->get_state(pe, NULL);
1449 rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
1450 dma_en = !!(result & EEH_STATE_DMA_ENABLED);
1451 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
1452
1453 if (rst_active)
1454 ret = EEH_PE_STATE_RESET;
1455 else if (dma_en && mmio_en)
1456 ret = EEH_PE_STATE_NORMAL;
1457 else if (!dma_en && !mmio_en)
1458 ret = EEH_PE_STATE_STOPPED_IO_DMA;
1459 else if (!dma_en && mmio_en)
1460 ret = EEH_PE_STATE_STOPPED_DMA;
1461 else
1462 ret = EEH_PE_STATE_UNAVAIL;
1463
1464 return ret;
1465}
1466EXPORT_SYMBOL_GPL(eeh_pe_get_state);
1467
316233ff
GS
1468static int eeh_pe_reenable_devices(struct eeh_pe *pe)
1469{
1470 struct eeh_dev *edev, *tmp;
1471 struct pci_dev *pdev;
1472 int ret = 0;
1473
1474 /* Restore config space */
1475 eeh_pe_restore_bars(pe);
1476
1477 /*
1478 * Reenable PCI devices as the devices passed
1479 * through are always enabled before the reset.
1480 */
1481 eeh_pe_for_each_dev(pe, edev, tmp) {
1482 pdev = eeh_dev_to_pci_dev(edev);
1483 if (!pdev)
1484 continue;
1485
1486 ret = pci_reenable_device(pdev);
1487 if (ret) {
1488 pr_warn("%s: Failure %d reenabling %s\n",
1489 __func__, ret, pci_name(pdev));
1490 return ret;
1491 }
1492 }
1493
1494 /* The PE is still in frozen state */
c9dd0143 1495 return eeh_unfreeze_pe(pe, true);
316233ff
GS
1496}
1497
212d16cd
GS
1498/**
1499 * eeh_pe_reset - Issue PE reset according to specified type
1500 * @pe: EEH PE
1501 * @option: reset type
1502 *
1503 * The routine is called to reset the specified PE with the
1504 * indicated type, either fundamental reset or hot reset.
1505 * PE reset is the most important part for error recovery.
1506 */
1507int eeh_pe_reset(struct eeh_pe *pe, int option)
1508{
1509 int ret = 0;
1510
1511 /* Invalid PE ? */
1512 if (!pe)
1513 return -ENODEV;
1514
1515 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
1516 return -ENOENT;
1517
1518 switch (option) {
1519 case EEH_RESET_DEACTIVATE:
1520 ret = eeh_ops->reset(pe, option);
93e8b36d 1521 eeh_pe_state_clear(pe, EEH_PE_RESET);
212d16cd
GS
1522 if (ret)
1523 break;
1524
316233ff 1525 ret = eeh_pe_reenable_devices(pe);
212d16cd
GS
1526 break;
1527 case EEH_RESET_HOT:
1528 case EEH_RESET_FUNDAMENTAL:
0d5ee520
GS
1529 /*
1530 * Proactively freeze the PE to drop all MMIO access
1531 * during reset, which should be banned as it's always
1532 * cause recursive EEH error.
1533 */
1534 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
1535
93e8b36d 1536 eeh_pe_state_mark(pe, EEH_PE_RESET);
212d16cd
GS
1537 ret = eeh_ops->reset(pe, option);
1538 break;
1539 default:
1540 pr_debug("%s: Unsupported option %d\n",
1541 __func__, option);
1542 ret = -EINVAL;
1543 }
1544
1545 return ret;
1546}
1547EXPORT_SYMBOL_GPL(eeh_pe_reset);
1548
1549/**
1550 * eeh_pe_configure - Configure PCI bridges after PE reset
1551 * @pe: EEH PE
1552 *
1553 * The routine is called to restore the PCI config space for
1554 * those PCI devices, especially PCI bridges affected by PE
1555 * reset issued previously.
1556 */
1557int eeh_pe_configure(struct eeh_pe *pe)
1558{
1559 int ret = 0;
1560
1561 /* Invalid PE ? */
1562 if (!pe)
1563 return -ENODEV;
1564
212d16cd
GS
1565 return ret;
1566}
1567EXPORT_SYMBOL_GPL(eeh_pe_configure);
1568
1da177e4
LT
1569static int proc_eeh_show(struct seq_file *m, void *v)
1570{
2ec5a0ad 1571 if (!eeh_enabled()) {
1da177e4 1572 seq_printf(m, "EEH Subsystem is globally disabled\n");
e575f8db 1573 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
1da177e4
LT
1574 } else {
1575 seq_printf(m, "EEH Subsystem is enabled\n");
177bc936 1576 seq_printf(m,
e575f8db
GS
1577 "no device=%llu\n"
1578 "no device node=%llu\n"
1579 "no config address=%llu\n"
1580 "check not wanted=%llu\n"
1581 "eeh_total_mmio_ffs=%llu\n"
1582 "eeh_false_positives=%llu\n"
1583 "eeh_slot_resets=%llu\n",
1584 eeh_stats.no_device,
1585 eeh_stats.no_dn,
1586 eeh_stats.no_cfg_addr,
1587 eeh_stats.ignored_check,
1588 eeh_stats.total_mmio_ffs,
1589 eeh_stats.false_positives,
1590 eeh_stats.slot_resets);
1da177e4
LT
1591 }
1592
1593 return 0;
1594}
1595
1596static int proc_eeh_open(struct inode *inode, struct file *file)
1597{
1598 return single_open(file, proc_eeh_show, NULL);
1599}
1600
5dfe4c96 1601static const struct file_operations proc_eeh_operations = {
1da177e4
LT
1602 .open = proc_eeh_open,
1603 .read = seq_read,
1604 .llseek = seq_lseek,
1605 .release = single_release,
1606};
1607
7f52a526
GS
1608#ifdef CONFIG_DEBUG_FS
1609static int eeh_enable_dbgfs_set(void *data, u64 val)
1610{
1611 if (val)
05b1721d 1612 eeh_clear_flag(EEH_FORCE_DISABLED);
7f52a526 1613 else
05b1721d 1614 eeh_add_flag(EEH_FORCE_DISABLED);
7f52a526
GS
1615
1616 /* Notify the backend */
1617 if (eeh_ops->post_init)
1618 eeh_ops->post_init();
1619
1620 return 0;
1621}
1622
1623static int eeh_enable_dbgfs_get(void *data, u64 *val)
1624{
1625 if (eeh_enabled())
1626 *val = 0x1ul;
1627 else
1628 *val = 0x0ul;
1629 return 0;
1630}
1631
1632DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
1633 eeh_enable_dbgfs_set, "0x%llx\n");
1634#endif
1635
1da177e4
LT
1636static int __init eeh_init_proc(void)
1637{
7f52a526 1638 if (machine_is(pseries) || machine_is(powernv)) {
8feaa434 1639 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
7f52a526
GS
1640#ifdef CONFIG_DEBUG_FS
1641 debugfs_create_file("eeh_enable", 0600,
1642 powerpc_debugfs_root, NULL,
1643 &eeh_enable_dbgfs_ops);
1644#endif
1645 }
1646
1da177e4
LT
1647 return 0;
1648}
1649__initcall(eeh_init_proc);