Commit | Line | Data |
---|---|---|
172ca926 | 1 | /* |
172ca926 LV |
2 | * This program is free software; you can redistribute it and/or modify |
3 | * it under the terms of the GNU General Public License as published by | |
4 | * the Free Software Foundation; either version 2 of the License, or | |
5 | * (at your option) any later version. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, | |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 | * GNU General Public License for more details. | |
11 | * | |
12 | * You should have received a copy of the GNU General Public License | |
13 | * along with this program; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | * | |
16 | * Copyright (c) 2005 Linas Vepstas <linas@linas.org> | |
17 | */ | |
18 | ||
ac325acd | 19 | #include <linux/delay.h> |
172ca926 | 20 | #include <linux/list.h> |
62fe91bb | 21 | #include <linux/sched.h> |
c8608558 | 22 | #include <linux/semaphore.h> |
172ca926 | 23 | #include <linux/pci.h> |
5a0e3ad6 | 24 | #include <linux/slab.h> |
ecf89e58 | 25 | #include <linux/kthread.h> |
172ca926 | 26 | #include <asm/eeh_event.h> |
77bd7415 | 27 | #include <asm/ppc-pci.h> |
172ca926 LV |
28 | |
29 | /** Overview: | |
30 | * EEH error states may be detected within exception handlers; | |
31 | * however, the recovery processing needs to occur asynchronously | |
32 | * in a normal kernel context and not an interrupt context. | |
33 | * This pair of routines creates an event and queues it onto a | |
34 | * work-queue, where a worker thread can drive recovery. | |
35 | */ | |
36 | ||
34af946a | 37 | static DEFINE_SPINLOCK(eeh_eventlist_lock); |
c8608558 | 38 | static struct semaphore eeh_eventlist_sem; |
635218c7 | 39 | static LIST_HEAD(eeh_eventlist); |
8c33fd11 | 40 | |
172ca926 | 41 | /** |
29f8bf1b | 42 | * eeh_event_handler - Dispatch EEH events. |
172ca926 | 43 | * @dummy - unused |
8c33fd11 LV |
44 | * |
45 | * The detection of a frozen slot can occur inside an interrupt, | |
46 | * where it can be hard to do anything about it. The goal of this | |
47 | * routine is to pull these detection events out of the context | |
48 | * of the interrupt handler, and re-dispatch them for processing | |
49 | * at a later time in a normal context. | |
172ca926 LV |
50 | */ |
51 | static int eeh_event_handler(void * dummy) | |
52 | { | |
53 | unsigned long flags; | |
40a7cd92 | 54 | struct eeh_event *event; |
120dc496 | 55 | struct eeh_pe *pe; |
172ca926 | 56 | |
c8608558 | 57 | while (!kthread_should_stop()) { |
5459ae14 GS |
58 | if (down_interruptible(&eeh_eventlist_sem)) |
59 | break; | |
c8608558 GS |
60 | |
61 | /* Fetch EEH event from the queue */ | |
62 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | |
63 | event = NULL; | |
64 | if (!list_empty(&eeh_eventlist)) { | |
65 | event = list_entry(eeh_eventlist.next, | |
66 | struct eeh_event, list); | |
67 | list_del(&event->list); | |
68 | } | |
69 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | |
70 | if (!event) | |
71 | continue; | |
72 | ||
73 | /* We might have event without binding PE */ | |
74 | pe = event->pe; | |
75 | if (pe) { | |
0b5381a6 | 76 | if (pe->type & EEH_PE_PHB) |
1f52f176 | 77 | pr_info("EEH: Detected error on PHB#%x\n", |
0b5381a6 GS |
78 | pe->phb->global_number); |
79 | else | |
80 | pr_info("EEH: Detected PCI bus error on " | |
1f52f176 | 81 | "PHB#%x-PE#%x\n", |
0b5381a6 | 82 | pe->phb->global_number, pe->addr); |
68701780 | 83 | eeh_handle_normal_event(pe); |
c8608558 | 84 | } else { |
68701780 | 85 | eeh_handle_special_event(); |
c8608558 GS |
86 | } |
87 | ||
88 | kfree(event); | |
172ca926 LV |
89 | } |
90 | ||
91 | return 0; | |
92 | } | |
93 | ||
94 | /** | |
c8608558 | 95 | * eeh_event_init - Start kernel thread to handle EEH events |
29f8bf1b GS |
96 | * |
97 | * This routine is called to start the kernel thread for processing | |
98 | * EEH event. | |
172ca926 | 99 | */ |
c8608558 | 100 | int eeh_event_init(void) |
172ca926 | 101 | { |
c8608558 GS |
102 | struct task_struct *t; |
103 | int ret = 0; | |
104 | ||
105 | /* Initialize semaphore */ | |
106 | sema_init(&eeh_eventlist_sem, 0); | |
107 | ||
108 | t = kthread_run(eeh_event_handler, NULL, "eehd"); | |
109 | if (IS_ERR(t)) { | |
110 | ret = PTR_ERR(t); | |
111 | pr_err("%s: Failed to start EEH daemon (%d)\n", | |
112 | __func__, ret); | |
113 | return ret; | |
114 | } | |
115 | ||
116 | return 0; | |
172ca926 LV |
117 | } |
118 | ||
119 | /** | |
29f8bf1b | 120 | * eeh_send_failure_event - Generate a PCI error event |
c533b46c | 121 | * @pe: EEH PE |
172ca926 LV |
122 | * |
123 | * This routine can be called within an interrupt context; | |
124 | * the actual event will be delivered in a normal context | |
125 | * (from a workqueue). | |
126 | */ | |
c533b46c | 127 | int eeh_send_failure_event(struct eeh_pe *pe) |
172ca926 LV |
128 | { |
129 | unsigned long flags; | |
130 | struct eeh_event *event; | |
172ca926 | 131 | |
c533b46c GS |
132 | event = kzalloc(sizeof(*event), GFP_ATOMIC); |
133 | if (!event) { | |
134 | pr_err("EEH: out of memory, event not handled\n"); | |
135 | return -ENOMEM; | |
136 | } | |
137 | event->pe = pe; | |
172ca926 LV |
138 | |
139 | /* We may or may not be called in an interrupt context */ | |
140 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | |
141 | list_add(&event->list, &eeh_eventlist); | |
142 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | |
143 | ||
c8608558 GS |
144 | /* For EEH deamon to knick in */ |
145 | up(&eeh_eventlist_sem); | |
172ca926 LV |
146 | |
147 | return 0; | |
148 | } | |
99866595 GS |
149 | |
150 | /** | |
151 | * eeh_remove_event - Remove EEH event from the queue | |
152 | * @pe: Event binding to the PE | |
5c7a35e3 | 153 | * @force: Event will be removed unconditionally |
99866595 GS |
154 | * |
155 | * On PowerNV platform, we might have subsequent coming events | |
156 | * is part of the former one. For that case, those subsequent | |
157 | * coming events are totally duplicated and unnecessary, thus | |
158 | * they should be removed. | |
159 | */ | |
5c7a35e3 | 160 | void eeh_remove_event(struct eeh_pe *pe, bool force) |
99866595 GS |
161 | { |
162 | unsigned long flags; | |
163 | struct eeh_event *event, *tmp; | |
164 | ||
5c7a35e3 GS |
165 | /* |
166 | * If we have NULL PE passed in, we have dead IOC | |
167 | * or we're sure we can report all existing errors | |
168 | * by the caller. | |
169 | * | |
170 | * With "force", the event with associated PE that | |
171 | * have been isolated, the event won't be removed | |
172 | * to avoid event lost. | |
173 | */ | |
99866595 GS |
174 | spin_lock_irqsave(&eeh_eventlist_lock, flags); |
175 | list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { | |
5c7a35e3 GS |
176 | if (!force && event->pe && |
177 | (event->pe->state & EEH_PE_ISOLATED)) | |
178 | continue; | |
179 | ||
99866595 GS |
180 | if (!pe) { |
181 | list_del(&event->list); | |
182 | kfree(event); | |
183 | } else if (pe->type & EEH_PE_PHB) { | |
184 | if (event->pe && event->pe->phb == pe->phb) { | |
185 | list_del(&event->list); | |
186 | kfree(event); | |
187 | } | |
188 | } else if (event->pe == pe) { | |
189 | list_del(&event->list); | |
190 | kfree(event); | |
191 | } | |
192 | } | |
193 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | |
194 | } |