Commit | Line | Data |
---|---|---|
3d155f8c RD |
1 | /* |
2 | * Copyright (c) 2005 Cisco Systems. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | * | |
32 | * $Id$ | |
33 | */ | |
34 | ||
8c65b4a6 TS |
35 | #include <linux/jiffies.h> |
36 | #include <linux/timer.h> | |
b3b30f5e | 37 | #include <linux/workqueue.h> |
8c65b4a6 | 38 | |
3d155f8c RD |
39 | #include "mthca_dev.h" |
40 | ||
41 | enum { | |
42 | MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, | |
43 | ||
44 | MTHCA_CATAS_TYPE_INTERNAL = 0, | |
45 | MTHCA_CATAS_TYPE_UPLINK = 3, | |
46 | MTHCA_CATAS_TYPE_DDR = 4, | |
47 | MTHCA_CATAS_TYPE_PARITY = 5, | |
48 | }; | |
49 | ||
50 | static DEFINE_SPINLOCK(catas_lock); | |
51 | ||
b3b30f5e JM |
52 | static LIST_HEAD(catas_list); |
53 | static struct workqueue_struct *catas_wq; | |
54 | static struct work_struct catas_work; | |
55 | ||
56 | static int catas_reset_disable; | |
57 | module_param_named(catas_reset_disable, catas_reset_disable, int, 0644); | |
58 | MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); | |
59 | ||
c4028958 | 60 | static void catas_reset(struct work_struct *work) |
b3b30f5e JM |
61 | { |
62 | struct mthca_dev *dev, *tmpdev; | |
63 | LIST_HEAD(tlist); | |
64 | int ret; | |
65 | ||
66 | mutex_lock(&mthca_device_mutex); | |
67 | ||
68 | spin_lock_irq(&catas_lock); | |
69 | list_splice_init(&catas_list, &tlist); | |
70 | spin_unlock_irq(&catas_lock); | |
71 | ||
72 | list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { | |
73 | ret = __mthca_restart_one(dev->pdev); | |
74 | if (ret) | |
75 | mthca_err(dev, "Reset failed (%d)\n", ret); | |
76 | else | |
77 | mthca_dbg(dev, "Reset succeeded\n"); | |
78 | } | |
79 | ||
80 | mutex_unlock(&mthca_device_mutex); | |
81 | } | |
82 | ||
3d155f8c RD |
83 | static void handle_catas(struct mthca_dev *dev) |
84 | { | |
85 | struct ib_event event; | |
b3b30f5e | 86 | unsigned long flags; |
3d155f8c RD |
87 | const char *type; |
88 | int i; | |
89 | ||
90 | event.device = &dev->ib_dev; | |
91 | event.event = IB_EVENT_DEVICE_FATAL; | |
92 | event.element.port_num = 0; | |
93 | ||
94 | ib_dispatch_event(&event); | |
95 | ||
96 | switch (swab32(readl(dev->catas_err.map)) >> 24) { | |
97 | case MTHCA_CATAS_TYPE_INTERNAL: | |
98 | type = "internal error"; | |
99 | break; | |
100 | case MTHCA_CATAS_TYPE_UPLINK: | |
101 | type = "uplink bus error"; | |
102 | break; | |
103 | case MTHCA_CATAS_TYPE_DDR: | |
104 | type = "DDR data error"; | |
105 | break; | |
106 | case MTHCA_CATAS_TYPE_PARITY: | |
107 | type = "internal parity error"; | |
108 | break; | |
109 | default: | |
110 | type = "unknown error"; | |
111 | break; | |
112 | } | |
113 | ||
114 | mthca_err(dev, "Catastrophic error detected: %s\n", type); | |
115 | for (i = 0; i < dev->catas_err.size; ++i) | |
116 | mthca_err(dev, " buf[%02x]: %08x\n", | |
117 | i, swab32(readl(dev->catas_err.map + i))); | |
b3b30f5e JM |
118 | |
119 | if (catas_reset_disable) | |
120 | return; | |
121 | ||
122 | spin_lock_irqsave(&catas_lock, flags); | |
123 | list_add(&dev->catas_err.list, &catas_list); | |
124 | queue_work(catas_wq, &catas_work); | |
125 | spin_unlock_irqrestore(&catas_lock, flags); | |
3d155f8c RD |
126 | } |
127 | ||
128 | static void poll_catas(unsigned long dev_ptr) | |
129 | { | |
130 | struct mthca_dev *dev = (struct mthca_dev *) dev_ptr; | |
131 | unsigned long flags; | |
132 | int i; | |
133 | ||
134 | for (i = 0; i < dev->catas_err.size; ++i) | |
135 | if (readl(dev->catas_err.map + i)) { | |
136 | handle_catas(dev); | |
137 | return; | |
138 | } | |
139 | ||
140 | spin_lock_irqsave(&catas_lock, flags); | |
0b4ff2c0 | 141 | if (!dev->catas_err.stop) |
3d155f8c RD |
142 | mod_timer(&dev->catas_err.timer, |
143 | jiffies + MTHCA_CATAS_POLL_INTERVAL); | |
144 | spin_unlock_irqrestore(&catas_lock, flags); | |
145 | ||
146 | return; | |
147 | } | |
148 | ||
149 | void mthca_start_catas_poll(struct mthca_dev *dev) | |
150 | { | |
151 | unsigned long addr; | |
152 | ||
153 | init_timer(&dev->catas_err.timer); | |
154 | dev->catas_err.stop = 0; | |
155 | dev->catas_err.map = NULL; | |
156 | ||
157 | addr = pci_resource_start(dev->pdev, 0) + | |
158 | ((pci_resource_len(dev->pdev, 0) - 1) & | |
159 | dev->catas_err.addr); | |
160 | ||
161 | if (!request_mem_region(addr, dev->catas_err.size * 4, | |
162 | DRV_NAME)) { | |
163 | mthca_warn(dev, "couldn't request catastrophic error region " | |
164 | "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); | |
165 | return; | |
166 | } | |
167 | ||
168 | dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); | |
169 | if (!dev->catas_err.map) { | |
170 | mthca_warn(dev, "couldn't map catastrophic error region " | |
171 | "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); | |
172 | release_mem_region(addr, dev->catas_err.size * 4); | |
173 | return; | |
174 | } | |
175 | ||
176 | dev->catas_err.timer.data = (unsigned long) dev; | |
177 | dev->catas_err.timer.function = poll_catas; | |
178 | dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; | |
b3b30f5e | 179 | INIT_LIST_HEAD(&dev->catas_err.list); |
3d155f8c RD |
180 | add_timer(&dev->catas_err.timer); |
181 | } | |
182 | ||
183 | void mthca_stop_catas_poll(struct mthca_dev *dev) | |
184 | { | |
185 | spin_lock_irq(&catas_lock); | |
186 | dev->catas_err.stop = 1; | |
187 | spin_unlock_irq(&catas_lock); | |
188 | ||
189 | del_timer_sync(&dev->catas_err.timer); | |
190 | ||
191 | if (dev->catas_err.map) { | |
192 | iounmap(dev->catas_err.map); | |
193 | release_mem_region(pci_resource_start(dev->pdev, 0) + | |
194 | ((pci_resource_len(dev->pdev, 0) - 1) & | |
195 | dev->catas_err.addr), | |
196 | dev->catas_err.size * 4); | |
197 | } | |
b3b30f5e JM |
198 | |
199 | spin_lock_irq(&catas_lock); | |
200 | list_del(&dev->catas_err.list); | |
201 | spin_unlock_irq(&catas_lock); | |
202 | } | |
203 | ||
204 | int __init mthca_catas_init(void) | |
205 | { | |
c4028958 | 206 | INIT_WORK(&catas_work, catas_reset); |
b3b30f5e JM |
207 | |
208 | catas_wq = create_singlethread_workqueue("mthca_catas"); | |
209 | if (!catas_wq) | |
210 | return -ENOMEM; | |
211 | ||
212 | return 0; | |
213 | } | |
214 | ||
215 | void mthca_catas_cleanup(void) | |
216 | { | |
217 | destroy_workqueue(catas_wq); | |
3d155f8c | 218 | } |