Commit | Line | Data |
---|---|---|
4cdadfd5 DW |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ | |
5f60d5f6 | 3 | #include <linux/unaligned.h> |
4faf31b4 | 4 | #include <linux/io-64-nonatomic-lo-hi.h> |
229e8828 | 5 | #include <linux/moduleparam.h> |
4cdadfd5 | 6 | #include <linux/module.h> |
229e8828 | 7 | #include <linux/delay.h> |
fae8817a | 8 | #include <linux/sizes.h> |
b39cb105 | 9 | #include <linux/mutex.h> |
30af9729 | 10 | #include <linux/list.h> |
4cdadfd5 | 11 | #include <linux/pci.h> |
2905cb52 | 12 | #include <linux/aer.h> |
4cdadfd5 | 13 | #include <linux/io.h> |
8d8081ce | 14 | #include <cxl/mailbox.h> |
5161a55c | 15 | #include "cxlmem.h" |
af9cae9f | 16 | #include "cxlpci.h" |
8adaf747 | 17 | #include "cxl.h" |
1ad3f701 | 18 | #include "pmu.h" |
8adaf747 BW |
19 | |
20 | /** | |
21e9f767 | 21 | * DOC: cxl pci |
8adaf747 | 22 | * |
21e9f767 BW |
23 | * This implements the PCI exclusive functionality for a CXL device as it is |
24 | * defined by the Compute Express Link specification. CXL devices may surface | |
ed97afb5 BW |
25 | * certain functionality even if it isn't CXL enabled. While this driver is |
26 | * focused around the PCI specific aspects of a CXL device, it binds to the | |
27 | * specific CXL memory device class code, and therefore the implementation of | |
28 | * cxl_pci is focused around CXL memory devices. | |
8adaf747 BW |
29 | * |
30 | * The driver has several responsibilities, mainly: | |
31 | * - Create the memX device and register on the CXL bus. | |
32 | * - Enumerate device's register interface and map them. | |
ed97afb5 BW |
33 | * - Registers nvdimm bridge device with cxl_core. |
34 | * - Registers a CXL mailbox with cxl_core. | |
8adaf747 BW |
35 | */ |
36 | ||
5e2411ae IW |
37 | #define cxl_doorbell_busy(cxlds) \ |
38 | (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ | |
8adaf747 BW |
39 | CXLDEV_MBOX_CTRL_DOORBELL) |
40 | ||
41 | /* CXL 2.0 - 8.2.8.4 */ | |
42 | #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) | |
43 | ||
229e8828 BW |
44 | /* |
45 | * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to | |
46 | * dictate how long to wait for the mailbox to become ready. The new | |
47 | * field allows the device to tell software the amount of time to wait | |
48 | * before mailbox ready. This field per the spec theoretically allows | |
49 | * for up to 255 seconds. 255 seconds is unreasonably long, its longer | |
50 | * than the maximum SATA port link recovery wait. Default to 60 seconds | |
51 | * until someone builds a CXL device that needs more time in practice. | |
52 | */ | |
53 | static unsigned short mbox_ready_timeout = 60; | |
54 | module_param(mbox_ready_timeout, ushort, 0644); | |
2e4ba0ec | 55 | MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); |
229e8828 | 56 | |
5e2411ae | 57 | static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) |
8adaf747 BW |
58 | { |
59 | const unsigned long start = jiffies; | |
60 | unsigned long end = start; | |
61 | ||
5e2411ae | 62 | while (cxl_doorbell_busy(cxlds)) { |
8adaf747 BW |
63 | end = jiffies; |
64 | ||
65 | if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { | |
66 | /* Check again in case preempted before timeout test */ | |
5e2411ae | 67 | if (!cxl_doorbell_busy(cxlds)) |
8adaf747 BW |
68 | break; |
69 | return -ETIMEDOUT; | |
70 | } | |
71 | cpu_relax(); | |
72 | } | |
73 | ||
5e2411ae | 74 | dev_dbg(cxlds->dev, "Doorbell wait took %dms", |
8adaf747 BW |
75 | jiffies_to_msecs(end) - jiffies_to_msecs(start)); |
76 | return 0; | |
77 | } | |
78 | ||
4f195ee7 DW |
79 | #define cxl_err(dev, status, msg) \ |
80 | dev_err_ratelimited(dev, msg ", device state %s%s\n", \ | |
81 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ | |
82 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") | |
8adaf747 | 83 | |
4f195ee7 DW |
84 | #define cxl_cmd_err(dev, cmd, status, msg) \ |
85 | dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ | |
86 | (cmd)->opcode, \ | |
87 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ | |
88 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") | |
8adaf747 | 89 | |
1b27978d IW |
90 | /* |
91 | * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique | |
92 | * wrapper object for each irq within the same cxlds. | |
93 | */ | |
9f7a320d DB |
94 | struct cxl_dev_id { |
95 | struct cxl_dev_state *cxlds; | |
96 | }; | |
97 | ||
98 | static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, | |
08b8a8c0 | 99 | irq_handler_t thread_fn) |
9f7a320d DB |
100 | { |
101 | struct device *dev = cxlds->dev; | |
102 | struct cxl_dev_id *dev_id; | |
103 | ||
9f7a320d DB |
104 | dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); |
105 | if (!dev_id) | |
106 | return -ENOMEM; | |
107 | dev_id->cxlds = cxlds; | |
108 | ||
08b8a8c0 DW |
109 | return devm_request_threaded_irq(dev, irq, NULL, thread_fn, |
110 | IRQF_SHARED | IRQF_ONESHOT, NULL, | |
111 | dev_id); | |
9f7a320d DB |
112 | } |
113 | ||
ccadf131 DB |
114 | static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) |
115 | { | |
116 | u64 reg; | |
117 | ||
118 | reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); | |
119 | return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; | |
120 | } | |
121 | ||
122 | static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) | |
123 | { | |
0c36b6ad DB |
124 | u64 reg; |
125 | u16 opcode; | |
ccadf131 DB |
126 | struct cxl_dev_id *dev_id = id; |
127 | struct cxl_dev_state *cxlds = dev_id->cxlds; | |
8d8081ce | 128 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
aeaefabc | 129 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
ccadf131 | 130 | |
8ea9c33d DB |
131 | if (!cxl_mbox_background_complete(cxlds)) |
132 | return IRQ_NONE; | |
133 | ||
0c36b6ad DB |
134 | reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); |
135 | opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); | |
136 | if (opcode == CXL_MBOX_OP_SANITIZE) { | |
8d8081ce | 137 | mutex_lock(&cxl_mbox->mbox_mutex); |
aeaefabc | 138 | if (mds->security.sanitize_node) |
e30a1065 | 139 | mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); |
8d8081ce | 140 | mutex_unlock(&cxl_mbox->mbox_mutex); |
0c36b6ad DB |
141 | } else { |
142 | /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ | |
8d8081ce | 143 | rcuwait_wake_up(&cxl_mbox->mbox_wait); |
0c36b6ad | 144 | } |
ccadf131 DB |
145 | |
146 | return IRQ_HANDLED; | |
147 | } | |
148 | ||
0c36b6ad DB |
149 | /* |
150 | * Sanitization operation polling mode. | |
151 | */ | |
152 | static void cxl_mbox_sanitize_work(struct work_struct *work) | |
153 | { | |
aeaefabc DW |
154 | struct cxl_memdev_state *mds = |
155 | container_of(work, typeof(*mds), security.poll_dwork.work); | |
156 | struct cxl_dev_state *cxlds = &mds->cxlds; | |
8d8081ce | 157 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
0c36b6ad | 158 | |
8d8081ce | 159 | mutex_lock(&cxl_mbox->mbox_mutex); |
0c36b6ad | 160 | if (cxl_mbox_background_complete(cxlds)) { |
aeaefabc | 161 | mds->security.poll_tmo_secs = 0; |
aeaefabc DW |
162 | if (mds->security.sanitize_node) |
163 | sysfs_notify_dirent(mds->security.sanitize_node); | |
33981838 | 164 | mds->security.sanitize_active = false; |
48dcdbb1 | 165 | |
0c36b6ad DB |
166 | dev_dbg(cxlds->dev, "Sanitization operation ended\n"); |
167 | } else { | |
aeaefabc | 168 | int timeout = mds->security.poll_tmo_secs + 10; |
0c36b6ad | 169 | |
aeaefabc | 170 | mds->security.poll_tmo_secs = min(15 * 60, timeout); |
e30a1065 | 171 | schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); |
0c36b6ad | 172 | } |
8d8081ce | 173 | mutex_unlock(&cxl_mbox->mbox_mutex); |
0c36b6ad DB |
174 | } |
175 | ||
8adaf747 | 176 | /** |
ed97afb5 | 177 | * __cxl_pci_mbox_send_cmd() - Execute a mailbox command |
8d8081ce | 178 | * @cxl_mbox: CXL mailbox context |
8adaf747 BW |
179 | * @mbox_cmd: Command to send to the memory device. |
180 | * | |
181 | * Context: Any context. Expects mbox_mutex to be held. | |
182 | * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. | |
183 | * Caller should check the return code in @mbox_cmd to make sure it | |
184 | * succeeded. | |
185 | * | |
186 | * This is a generic form of the CXL mailbox send command thus only using the | |
187 | * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory | |
188 | * devices, and perhaps other types of CXL devices may have further information | |
189 | * available upon error conditions. Driver facilities wishing to send mailbox | |
190 | * commands should use the wrapper command. | |
191 | * | |
192 | * The CXL spec allows for up to two mailboxes. The intention is for the primary | |
193 | * mailbox to be OS controlled and the secondary mailbox to be used by system | |
194 | * firmware. This allows the OS and firmware to communicate with the device and | |
195 | * not need to coordinate with each other. The driver only uses the primary | |
196 | * mailbox. | |
197 | */ | |
8d8081ce | 198 | static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, |
b64955a9 | 199 | struct cxl_mbox_cmd *mbox_cmd) |
8adaf747 | 200 | { |
8d8081ce DJ |
201 | struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); |
202 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); | |
5e2411ae IW |
203 | void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; |
204 | struct device *dev = cxlds->dev; | |
8adaf747 BW |
205 | u64 cmd_reg, status_reg; |
206 | size_t out_len; | |
207 | int rc; | |
208 | ||
8d8081ce | 209 | lockdep_assert_held(&cxl_mbox->mbox_mutex); |
8adaf747 BW |
210 | |
211 | /* | |
212 | * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. | |
213 | * 1. Caller reads MB Control Register to verify doorbell is clear | |
214 | * 2. Caller writes Command Register | |
215 | * 3. Caller writes Command Payload Registers if input payload is non-empty | |
216 | * 4. Caller writes MB Control Register to set doorbell | |
217 | * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured | |
218 | * 6. Caller reads MB Status Register to fetch Return code | |
219 | * 7. If command successful, Caller reads Command Register to get Payload Length | |
220 | * 8. If output payload is non-empty, host reads Command Payload Registers | |
221 | * | |
222 | * Hardware is free to do whatever it wants before the doorbell is rung, | |
223 | * and isn't allowed to change anything after it clears the doorbell. As | |
224 | * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can | |
225 | * also happen in any order (though some orders might not make sense). | |
226 | */ | |
227 | ||
228 | /* #1 */ | |
5e2411ae | 229 | if (cxl_doorbell_busy(cxlds)) { |
4f195ee7 DW |
230 | u64 md_status = |
231 | readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); | |
232 | ||
233 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, | |
234 | "mailbox queue busy"); | |
8adaf747 BW |
235 | return -EBUSY; |
236 | } | |
237 | ||
0c36b6ad DB |
238 | /* |
239 | * With sanitize polling, hardware might be done and the poller still | |
240 | * not be in sync. Ensure no new command comes in until so. Keep the | |
241 | * hardware semantics and only allow device health status. | |
242 | */ | |
aeaefabc | 243 | if (mds->security.poll_tmo_secs > 0) { |
0c36b6ad DB |
244 | if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) |
245 | return -EBUSY; | |
246 | } | |
247 | ||
8adaf747 BW |
248 | cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, |
249 | mbox_cmd->opcode); | |
250 | if (mbox_cmd->size_in) { | |
251 | if (WARN_ON(!mbox_cmd->payload_in)) | |
252 | return -EINVAL; | |
253 | ||
254 | cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, | |
255 | mbox_cmd->size_in); | |
256 | memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); | |
257 | } | |
258 | ||
259 | /* #2, #3 */ | |
5e2411ae | 260 | writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
8adaf747 BW |
261 | |
262 | /* #4 */ | |
852db33c | 263 | dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); |
8adaf747 | 264 | writel(CXLDEV_MBOX_CTRL_DOORBELL, |
5e2411ae | 265 | cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
8adaf747 BW |
266 | |
267 | /* #5 */ | |
5e2411ae | 268 | rc = cxl_pci_mbox_wait_for_doorbell(cxlds); |
8adaf747 | 269 | if (rc == -ETIMEDOUT) { |
4f195ee7 DW |
270 | u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
271 | ||
272 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); | |
8adaf747 BW |
273 | return rc; |
274 | } | |
275 | ||
276 | /* #6 */ | |
5e2411ae | 277 | status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); |
8adaf747 BW |
278 | mbox_cmd->return_code = |
279 | FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); | |
280 | ||
ccadf131 DB |
281 | /* |
282 | * Handle the background command in a synchronous manner. | |
283 | * | |
284 | * All other mailbox commands will serialize/queue on the mbox_mutex, | |
285 | * which we currently hold. Furthermore this also guarantees that | |
286 | * cxl_mbox_background_complete() checks are safe amongst each other, | |
287 | * in that no new bg operation can occur in between. | |
288 | * | |
289 | * Background operations are timesliced in accordance with the nature | |
290 | * of the command. In the event of timeout, the mailbox state is | |
291 | * indeterminate until the next successful command submission and the | |
292 | * driver can get back in sync with the hardware state. | |
293 | */ | |
294 | if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { | |
295 | u64 bg_status_reg; | |
0c36b6ad DB |
296 | int i, timeout; |
297 | ||
298 | /* | |
299 | * Sanitization is a special case which monopolizes the device | |
300 | * and cannot be timesliced. Handle asynchronously instead, | |
301 | * and allow userspace to poll(2) for completion. | |
302 | */ | |
303 | if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { | |
33981838 DW |
304 | if (mds->security.sanitize_active) |
305 | return -EBUSY; | |
306 | ||
e30a1065 DW |
307 | /* give first timeout a second */ |
308 | timeout = 1; | |
309 | mds->security.poll_tmo_secs = timeout; | |
33981838 | 310 | mds->security.sanitize_active = true; |
e30a1065 DW |
311 | schedule_delayed_work(&mds->security.poll_dwork, |
312 | timeout * HZ); | |
0c36b6ad DB |
313 | dev_dbg(dev, "Sanitization operation started\n"); |
314 | goto success; | |
315 | } | |
ccadf131 DB |
316 | |
317 | dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", | |
318 | mbox_cmd->opcode); | |
319 | ||
0c36b6ad | 320 | timeout = mbox_cmd->poll_interval_ms; |
ccadf131 | 321 | for (i = 0; i < mbox_cmd->poll_count; i++) { |
8d8081ce DJ |
322 | if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, |
323 | cxl_mbox_background_complete(cxlds), | |
324 | TASK_UNINTERRUPTIBLE, | |
325 | msecs_to_jiffies(timeout)) > 0) | |
ccadf131 DB |
326 | break; |
327 | } | |
328 | ||
329 | if (!cxl_mbox_background_complete(cxlds)) { | |
330 | dev_err(dev, "timeout waiting for background (%d ms)\n", | |
331 | timeout * mbox_cmd->poll_count); | |
332 | return -ETIMEDOUT; | |
333 | } | |
334 | ||
335 | bg_status_reg = readq(cxlds->regs.mbox + | |
336 | CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); | |
337 | mbox_cmd->return_code = | |
338 | FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, | |
339 | bg_status_reg); | |
340 | dev_dbg(dev, | |
341 | "Mailbox background operation (0x%04x) completed\n", | |
342 | mbox_cmd->opcode); | |
343 | } | |
344 | ||
92fcc1ab | 345 | if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { |
c43e036d DB |
346 | dev_dbg(dev, "Mailbox operation had an error: %s\n", |
347 | cxl_mbox_cmd_rc2str(mbox_cmd)); | |
cbe83a20 | 348 | return 0; /* completed but caller must check return_code */ |
8adaf747 BW |
349 | } |
350 | ||
0c36b6ad | 351 | success: |
8adaf747 | 352 | /* #7 */ |
5e2411ae | 353 | cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
8adaf747 BW |
354 | out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); |
355 | ||
356 | /* #8 */ | |
357 | if (out_len && mbox_cmd->payload_out) { | |
358 | /* | |
359 | * Sanitize the copy. If hardware misbehaves, out_len per the | |
360 | * spec can actually be greater than the max allowed size (21 | |
361 | * bits available but spec defined 1M max). The caller also may | |
362 | * have requested less data than the hardware supplied even | |
363 | * within spec. | |
364 | */ | |
59f8d151 | 365 | size_t n; |
8adaf747 | 366 | |
8d8081ce | 367 | n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); |
8adaf747 BW |
368 | memcpy_fromio(mbox_cmd->payload_out, payload, n); |
369 | mbox_cmd->size_out = n; | |
370 | } else { | |
371 | mbox_cmd->size_out = 0; | |
372 | } | |
373 | ||
374 | return 0; | |
375 | } | |
376 | ||
8d8081ce | 377 | static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, |
59f8d151 | 378 | struct cxl_mbox_cmd *cmd) |
b64955a9 DW |
379 | { |
380 | int rc; | |
381 | ||
8d8081ce DJ |
382 | mutex_lock_io(&cxl_mbox->mbox_mutex); |
383 | rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); | |
384 | mutex_unlock(&cxl_mbox->mbox_mutex); | |
b64955a9 DW |
385 | |
386 | return rc; | |
387 | } | |
388 | ||
d72a4caf | 389 | static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) |
8adaf747 | 390 | { |
59f8d151 | 391 | struct cxl_dev_state *cxlds = &mds->cxlds; |
8d8081ce | 392 | struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; |
5e2411ae | 393 | const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); |
59f8d151 | 394 | struct device *dev = cxlds->dev; |
229e8828 | 395 | unsigned long timeout; |
e30a1065 | 396 | int irq, msgnum; |
229e8828 | 397 | u64 md_status; |
e30a1065 | 398 | u32 ctrl; |
229e8828 BW |
399 | |
400 | timeout = jiffies + mbox_ready_timeout * HZ; | |
401 | do { | |
402 | md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); | |
403 | if (md_status & CXLMDEV_MBOX_IF_READY) | |
404 | break; | |
405 | if (msleep_interruptible(100)) | |
406 | break; | |
407 | } while (!time_after(jiffies, timeout)); | |
408 | ||
409 | if (!(md_status & CXLMDEV_MBOX_IF_READY)) { | |
59f8d151 | 410 | cxl_err(dev, md_status, "timeout awaiting mailbox ready"); |
4f195ee7 DW |
411 | return -ETIMEDOUT; |
412 | } | |
413 | ||
414 | /* | |
415 | * A command may be in flight from a previous driver instance, | |
416 | * think kexec, do one doorbell wait so that | |
417 | * __cxl_pci_mbox_send_cmd() can assume that it is the only | |
418 | * source for future doorbell busy events. | |
419 | */ | |
420 | if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { | |
59f8d151 | 421 | cxl_err(dev, md_status, "timeout awaiting mailbox idle"); |
4f195ee7 | 422 | return -ETIMEDOUT; |
229e8828 | 423 | } |
8adaf747 | 424 | |
8d8081ce DJ |
425 | cxl_mbox->mbox_send = cxl_pci_mbox_send; |
426 | cxl_mbox->payload_size = | |
8adaf747 BW |
427 | 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); |
428 | ||
429 | /* | |
430 | * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register | |
431 | * | |
432 | * If the size is too small, mandatory commands will not work and so | |
433 | * there's no point in going forward. If the size is too large, there's | |
434 | * no harm is soft limiting it. | |
435 | */ | |
8d8081ce DJ |
436 | cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); |
437 | if (cxl_mbox->payload_size < 256) { | |
59f8d151 | 438 | dev_err(dev, "Mailbox is too small (%zub)", |
8d8081ce | 439 | cxl_mbox->payload_size); |
8adaf747 BW |
440 | return -ENXIO; |
441 | } | |
442 | ||
8d8081ce | 443 | dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size); |
8adaf747 | 444 | |
e30a1065 | 445 | INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); |
ccadf131 | 446 | |
e30a1065 | 447 | /* background command interrupts are optional */ |
d72a4caf | 448 | if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) |
e30a1065 | 449 | return 0; |
ccadf131 | 450 | |
e30a1065 DW |
451 | msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); |
452 | irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); | |
453 | if (irq < 0) | |
454 | return 0; | |
ccadf131 | 455 | |
08b8a8c0 | 456 | if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) |
ccadf131 | 457 | return 0; |
ccadf131 | 458 | |
e30a1065 DW |
459 | dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); |
460 | /* enable background command mbox irq support */ | |
461 | ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); | |
462 | ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; | |
463 | writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); | |
0c36b6ad | 464 | |
8adaf747 BW |
465 | return 0; |
466 | } | |
467 | ||
733b57f2 RR |
468 | /* |
469 | * Assume that any RCIEP that emits the CXL memory expander class code | |
470 | * is an RCD | |
471 | */ | |
472 | static bool is_cxl_restricted(struct pci_dev *pdev) | |
1b0a1a2a | 473 | { |
733b57f2 | 474 | return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; |
30af9729 IW |
475 | } |
476 | ||
733b57f2 | 477 | static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, |
7a01213d KD |
478 | struct cxl_register_map *map, |
479 | struct cxl_dport *dport) | |
30af9729 | 480 | { |
733b57f2 | 481 | resource_size_t component_reg_phys; |
4cdadfd5 | 482 | |
733b57f2 | 483 | *map = (struct cxl_register_map) { |
dd22581f | 484 | .host = &pdev->dev, |
733b57f2 RR |
485 | .resource = CXL_RESOURCE_NONE, |
486 | }; | |
08422378 | 487 | |
dd2617eb LM |
488 | struct cxl_port *port __free(put_cxl_port) = |
489 | cxl_pci_find_port(pdev, &dport); | |
733b57f2 RR |
490 | if (!port) |
491 | return -EPROBE_DEFER; | |
30af9729 | 492 | |
733b57f2 | 493 | component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); |
733b57f2 RR |
494 | if (component_reg_phys == CXL_RESOURCE_NONE) |
495 | return -ENXIO; | |
496 | ||
497 | map->resource = component_reg_phys; | |
498 | map->reg_type = CXL_REGLOC_RBI_COMPONENT; | |
499 | map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; | |
30af9729 IW |
500 | |
501 | return 0; | |
502 | } | |
503 | ||
d076bb8c TB |
504 | static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, |
505 | struct cxl_register_map *map) | |
85afc317 BW |
506 | { |
507 | int rc; | |
5b68705d | 508 | |
85afc317 | 509 | rc = cxl_find_regblock(pdev, type, map); |
1d5a4159 | 510 | |
733b57f2 RR |
511 | /* |
512 | * If the Register Locator DVSEC does not exist, check if it | |
513 | * is an RCH and try to extract the Component Registers from | |
514 | * an RCRB. | |
515 | */ | |
7a01213d KD |
516 | if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { |
517 | struct cxl_dport *dport; | |
518 | struct cxl_port *port __free(put_cxl_port) = | |
519 | cxl_pci_find_port(pdev, &dport); | |
520 | if (!port) | |
521 | return -EPROBE_DEFER; | |
522 | ||
523 | rc = cxl_rcrb_get_comp_regs(pdev, map, dport); | |
524 | if (rc) | |
525 | return rc; | |
526 | ||
527 | rc = cxl_dport_map_rcd_linkcap(pdev, dport); | |
528 | if (rc) | |
529 | return rc; | |
530 | ||
531 | } else if (rc) { | |
85afc317 | 532 | return rc; |
7a01213d | 533 | } |
85afc317 | 534 | |
d076bb8c | 535 | return cxl_setup_regs(map); |
0a19bfc8 DW |
536 | } |
537 | ||
248529ed DJ |
538 | static int cxl_pci_ras_unmask(struct pci_dev *pdev) |
539 | { | |
248529ed DJ |
540 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); |
541 | void __iomem *addr; | |
542 | u32 orig_val, val, mask; | |
543 | u16 cap; | |
544 | int rc; | |
545 | ||
546 | if (!cxlds->regs.ras) { | |
547 | dev_dbg(&pdev->dev, "No RAS registers.\n"); | |
548 | return 0; | |
549 | } | |
550 | ||
0339dc39 | 551 | /* BIOS has PCIe AER error control */ |
55b8ff06 | 552 | if (!pcie_aer_is_native(pdev)) |
0339dc39 | 553 | return 0; |
248529ed DJ |
554 | |
555 | rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); | |
556 | if (rc) | |
557 | return rc; | |
558 | ||
559 | if (cap & PCI_EXP_DEVCTL_URRE) { | |
560 | addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; | |
561 | orig_val = readl(addr); | |
562 | ||
f3c8a37a DW |
563 | mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | |
564 | CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; | |
248529ed DJ |
565 | val = orig_val & ~mask; |
566 | writel(val, addr); | |
567 | dev_dbg(&pdev->dev, | |
568 | "Uncorrectable RAS Errors Mask: %#x -> %#x\n", | |
569 | orig_val, val); | |
570 | } | |
571 | ||
572 | if (cap & PCI_EXP_DEVCTL_CERE) { | |
573 | addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; | |
574 | orig_val = readl(addr); | |
575 | val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; | |
576 | writel(val, addr); | |
577 | dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", | |
578 | orig_val, val); | |
579 | } | |
580 | ||
581 | return 0; | |
2905cb52 DW |
582 | } |
583 | ||
6ebe28f9 IW |
584 | static void free_event_buf(void *buf) |
585 | { | |
586 | kvfree(buf); | |
587 | } | |
588 | ||
589 | /* | |
590 | * There is a single buffer for reading event logs from the mailbox. All logs | |
59f8d151 | 591 | * share this buffer protected by the mds->event_log_lock. |
6ebe28f9 | 592 | */ |
59f8d151 | 593 | static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) |
6ebe28f9 | 594 | { |
8d8081ce | 595 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
6ebe28f9 IW |
596 | struct cxl_get_event_payload *buf; |
597 | ||
8d8081ce | 598 | buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); |
6ebe28f9 IW |
599 | if (!buf) |
600 | return -ENOMEM; | |
59f8d151 | 601 | mds->event.buf = buf; |
6ebe28f9 | 602 | |
59f8d151 | 603 | return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); |
6ebe28f9 IW |
604 | } |
605 | ||
d72a4caf | 606 | static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) |
a49aa814 DB |
607 | { |
608 | int nvecs; | |
609 | ||
610 | /* | |
611 | * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must | |
612 | * not generate INTx messages if that function participates in | |
613 | * CXL.cache or CXL.mem. | |
614 | * | |
615 | * Additionally pci_alloc_irq_vectors() handles calling | |
616 | * pci_free_irq_vectors() automatically despite not being called | |
617 | * pcim_*. See pci_setup_msi_context(). | |
618 | */ | |
619 | nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, | |
620 | PCI_IRQ_MSIX | PCI_IRQ_MSI); | |
621 | if (nvecs < 1) { | |
622 | dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); | |
d72a4caf | 623 | return false; |
a49aa814 | 624 | } |
d72a4caf | 625 | return true; |
a49aa814 DB |
626 | } |
627 | ||
a49aa814 DB |
628 | static irqreturn_t cxl_event_thread(int irq, void *id) |
629 | { | |
630 | struct cxl_dev_id *dev_id = id; | |
631 | struct cxl_dev_state *cxlds = dev_id->cxlds; | |
59f8d151 | 632 | struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); |
a49aa814 DB |
633 | u32 status; |
634 | ||
635 | do { | |
636 | /* | |
637 | * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; | |
638 | * ignore the reserved upper 32 bits | |
639 | */ | |
640 | status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); | |
641 | /* Ignore logs unknown to the driver */ | |
642 | status &= CXLDEV_EVENT_STATUS_ALL; | |
643 | if (!status) | |
644 | break; | |
59f8d151 | 645 | cxl_mem_get_event_records(mds, status); |
a49aa814 DB |
646 | cond_resched(); |
647 | } while (status); | |
648 | ||
649 | return IRQ_HANDLED; | |
650 | } | |
651 | ||
652 | static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) | |
653 | { | |
9f7a320d | 654 | struct pci_dev *pdev = to_pci_dev(cxlds->dev); |
a49aa814 DB |
655 | int irq; |
656 | ||
657 | if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) | |
658 | return -ENXIO; | |
659 | ||
a49aa814 DB |
660 | irq = pci_irq_vector(pdev, |
661 | FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); | |
662 | if (irq < 0) | |
663 | return irq; | |
664 | ||
08b8a8c0 | 665 | return cxl_request_irq(cxlds, irq, cxl_event_thread); |
a49aa814 DB |
666 | } |
667 | ||
59f8d151 | 668 | static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, |
a49aa814 DB |
669 | struct cxl_event_interrupt_policy *policy) |
670 | { | |
b5209da3 | 671 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
a49aa814 DB |
672 | struct cxl_mbox_cmd mbox_cmd = { |
673 | .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, | |
674 | .payload_out = policy, | |
675 | .size_out = sizeof(*policy), | |
676 | }; | |
677 | int rc; | |
678 | ||
b5209da3 | 679 | rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); |
a49aa814 | 680 | if (rc < 0) |
59f8d151 DW |
681 | dev_err(mds->cxlds.dev, |
682 | "Failed to get event interrupt policy : %d", rc); | |
a49aa814 DB |
683 | |
684 | return rc; | |
685 | } | |
686 | ||
59f8d151 | 687 | static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, |
a49aa814 DB |
688 | struct cxl_event_interrupt_policy *policy) |
689 | { | |
b5209da3 | 690 | struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; |
a49aa814 DB |
691 | struct cxl_mbox_cmd mbox_cmd; |
692 | int rc; | |
693 | ||
694 | *policy = (struct cxl_event_interrupt_policy) { | |
695 | .info_settings = CXL_INT_MSI_MSIX, | |
696 | .warn_settings = CXL_INT_MSI_MSIX, | |
697 | .failure_settings = CXL_INT_MSI_MSIX, | |
698 | .fatal_settings = CXL_INT_MSI_MSIX, | |
699 | }; | |
700 | ||
701 | mbox_cmd = (struct cxl_mbox_cmd) { | |
702 | .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, | |
703 | .payload_in = policy, | |
704 | .size_in = sizeof(*policy), | |
705 | }; | |
706 | ||
b5209da3 | 707 | rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); |
a49aa814 | 708 | if (rc < 0) { |
59f8d151 | 709 | dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", |
a49aa814 DB |
710 | rc); |
711 | return rc; | |
712 | } | |
713 | ||
714 | /* Retrieve final interrupt settings */ | |
59f8d151 | 715 | return cxl_event_get_int_policy(mds, policy); |
a49aa814 DB |
716 | } |
717 | ||
59f8d151 | 718 | static int cxl_event_irqsetup(struct cxl_memdev_state *mds) |
a49aa814 | 719 | { |
59f8d151 | 720 | struct cxl_dev_state *cxlds = &mds->cxlds; |
a49aa814 DB |
721 | struct cxl_event_interrupt_policy policy; |
722 | int rc; | |
723 | ||
59f8d151 | 724 | rc = cxl_event_config_msgnums(mds, &policy); |
a49aa814 DB |
725 | if (rc) |
726 | return rc; | |
727 | ||
728 | rc = cxl_event_req_irq(cxlds, policy.info_settings); | |
729 | if (rc) { | |
730 | dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); | |
731 | return rc; | |
732 | } | |
733 | ||
734 | rc = cxl_event_req_irq(cxlds, policy.warn_settings); | |
735 | if (rc) { | |
736 | dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); | |
737 | return rc; | |
738 | } | |
739 | ||
740 | rc = cxl_event_req_irq(cxlds, policy.failure_settings); | |
741 | if (rc) { | |
742 | dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); | |
743 | return rc; | |
744 | } | |
745 | ||
746 | rc = cxl_event_req_irq(cxlds, policy.fatal_settings); | |
747 | if (rc) { | |
748 | dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); | |
749 | return rc; | |
750 | } | |
751 | ||
752 | return 0; | |
753 | } | |
754 | ||
755 | static bool cxl_event_int_is_fw(u8 setting) | |
756 | { | |
757 | u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); | |
758 | ||
759 | return mode == CXL_INT_FW; | |
760 | } | |
761 | ||
762 | static int cxl_event_config(struct pci_host_bridge *host_bridge, | |
d72a4caf | 763 | struct cxl_memdev_state *mds, bool irq_avail) |
a49aa814 DB |
764 | { |
765 | struct cxl_event_interrupt_policy policy; | |
766 | int rc; | |
767 | ||
768 | /* | |
769 | * When BIOS maintains CXL error reporting control, it will process | |
770 | * event records. Only one agent can do so. | |
771 | */ | |
772 | if (!host_bridge->native_cxl_error) | |
773 | return 0; | |
774 | ||
d72a4caf IW |
775 | if (!irq_avail) { |
776 | dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); | |
777 | return 0; | |
778 | } | |
779 | ||
59f8d151 | 780 | rc = cxl_event_get_int_policy(mds, &policy); |
a49aa814 DB |
781 | if (rc) |
782 | return rc; | |
783 | ||
784 | if (cxl_event_int_is_fw(policy.info_settings) || | |
785 | cxl_event_int_is_fw(policy.warn_settings) || | |
786 | cxl_event_int_is_fw(policy.failure_settings) || | |
787 | cxl_event_int_is_fw(policy.fatal_settings)) { | |
59f8d151 DW |
788 | dev_err(mds->cxlds.dev, |
789 | "FW still in control of Event Logs despite _OSC settings\n"); | |
a49aa814 DB |
790 | return -EBUSY; |
791 | } | |
792 | ||
0f6f0d68 IW |
793 | rc = cxl_mem_alloc_event_buf(mds); |
794 | if (rc) | |
795 | return rc; | |
796 | ||
59f8d151 | 797 | rc = cxl_event_irqsetup(mds); |
a49aa814 DB |
798 | if (rc) |
799 | return rc; | |
800 | ||
59f8d151 | 801 | cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); |
a49aa814 DB |
802 | |
803 | return 0; | |
804 | } | |
805 | ||
8d8081ce DJ |
806 | static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) |
807 | { | |
808 | int rc; | |
809 | ||
810 | /* | |
811 | * Fail the init if there's no mailbox. For a type3 this is out of spec. | |
812 | */ | |
813 | if (!cxlds->reg_map.device_map.mbox.valid) | |
814 | return -ENODEV; | |
815 | ||
816 | rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); | |
817 | if (rc) | |
818 | return rc; | |
819 | ||
820 | return 0; | |
821 | } | |
822 | ||
c5eaec79 KD |
823 | static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) |
824 | { | |
825 | struct cxl_dev_state *cxlds = dev_get_drvdata(dev); | |
826 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
827 | struct device *root_dev; | |
828 | struct cxl_dport *dport; | |
829 | struct cxl_port *root __free(put_cxl_port) = | |
830 | cxl_mem_find_port(cxlmd, &dport); | |
831 | ||
832 | if (!root) | |
833 | return -ENXIO; | |
834 | ||
835 | root_dev = root->uport_dev; | |
836 | if (!root_dev) | |
837 | return -ENXIO; | |
838 | ||
09ceba3a LM |
839 | if (!dport->regs.rcd_pcie_cap) |
840 | return -ENXIO; | |
841 | ||
c5eaec79 KD |
842 | guard(device)(root_dev); |
843 | if (!root_dev->driver) | |
844 | return -ENXIO; | |
845 | ||
846 | switch (width) { | |
847 | case 2: | |
848 | return sysfs_emit(buf, "%#x\n", | |
849 | readw(dport->regs.rcd_pcie_cap + offset)); | |
850 | case 4: | |
851 | return sysfs_emit(buf, "%#x\n", | |
852 | readl(dport->regs.rcd_pcie_cap + offset)); | |
853 | default: | |
854 | return -EINVAL; | |
855 | } | |
856 | } | |
857 | ||
858 | static ssize_t rcd_link_cap_show(struct device *dev, | |
859 | struct device_attribute *attr, char *buf) | |
860 | { | |
861 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32)); | |
862 | } | |
863 | static DEVICE_ATTR_RO(rcd_link_cap); | |
864 | ||
865 | static ssize_t rcd_link_ctrl_show(struct device *dev, | |
866 | struct device_attribute *attr, char *buf) | |
867 | { | |
868 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16)); | |
869 | } | |
870 | static DEVICE_ATTR_RO(rcd_link_ctrl); | |
871 | ||
872 | static ssize_t rcd_link_status_show(struct device *dev, | |
873 | struct device_attribute *attr, char *buf) | |
874 | { | |
875 | return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16)); | |
876 | } | |
877 | static DEVICE_ATTR_RO(rcd_link_status); | |
878 | ||
879 | static struct attribute *cxl_rcd_attrs[] = { | |
880 | &dev_attr_rcd_link_cap.attr, | |
881 | &dev_attr_rcd_link_ctrl.attr, | |
882 | &dev_attr_rcd_link_status.attr, | |
883 | NULL | |
884 | }; | |
885 | ||
886 | static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) | |
887 | { | |
888 | struct device *dev = kobj_to_dev(kobj); | |
889 | struct pci_dev *pdev = to_pci_dev(dev); | |
890 | ||
891 | if (is_cxl_restricted(pdev)) | |
892 | return a->mode; | |
893 | ||
894 | return 0; | |
895 | } | |
896 | ||
897 | static struct attribute_group cxl_rcd_group = { | |
898 | .attrs = cxl_rcd_attrs, | |
899 | .is_visible = cxl_rcd_visible, | |
900 | }; | |
901 | __ATTRIBUTE_GROUPS(cxl_rcd); | |
902 | ||
ed97afb5 | 903 | static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
4cdadfd5 | 904 | { |
6ebe28f9 | 905 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); |
8e4c411c | 906 | struct cxl_dpa_info range_info = { 0 }; |
59f8d151 DW |
907 | struct cxl_memdev_state *mds; |
908 | struct cxl_dev_state *cxlds; | |
85afc317 | 909 | struct cxl_register_map map; |
21083f51 | 910 | struct cxl_memdev *cxlmd; |
448a60e8 HY |
911 | int rc, pmu_count; |
912 | unsigned int i; | |
d72a4caf | 913 | bool irq_avail; |
8adaf747 | 914 | |
5a2328f4 DW |
915 | /* |
916 | * Double check the anonymous union trickery in struct cxl_regs | |
917 | * FIXME switch to struct_group() | |
918 | */ | |
919 | BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != | |
920 | offsetof(struct cxl_regs, device_regs.memdev)); | |
921 | ||
8adaf747 BW |
922 | rc = pcim_enable_device(pdev); |
923 | if (rc) | |
924 | return rc; | |
a49aa814 | 925 | pci_set_master(pdev); |
4cdadfd5 | 926 | |
59f8d151 DW |
927 | mds = cxl_memdev_state_create(&pdev->dev); |
928 | if (IS_ERR(mds)) | |
929 | return PTR_ERR(mds); | |
930 | cxlds = &mds->cxlds; | |
2905cb52 | 931 | pci_set_drvdata(pdev, cxlds); |
1b0a1a2a | 932 | |
0a19bfc8 | 933 | cxlds->rcd = is_cxl_restricted(pdev); |
bcc79ea3 | 934 | cxlds->serial = pci_get_dsn(pdev); |
06e279e5 | 935 | cxlds->cxl_dvsec = pci_find_dvsec_capability( |
962f1e79 | 936 | pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); |
06e279e5 BW |
937 | if (!cxlds->cxl_dvsec) |
938 | dev_warn(&pdev->dev, | |
939 | "Device DVSEC not present, skip CXL.mem init\n"); | |
940 | ||
d076bb8c | 941 | rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); |
85afc317 BW |
942 | if (rc) |
943 | return rc; | |
944 | ||
57340804 | 945 | rc = cxl_map_device_regs(&map, &cxlds->regs.device_regs); |
8adaf747 BW |
946 | if (rc) |
947 | return rc; | |
948 | ||
4112a08d BW |
949 | /* |
950 | * If the component registers can't be found, the cxl_pci driver may | |
951 | * still be useful for management functions so don't return an error. | |
952 | */ | |
2dd18279 RR |
953 | rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, |
954 | &cxlds->reg_map); | |
4112a08d BW |
955 | if (rc) |
956 | dev_warn(&pdev->dev, "No component registers (%d)\n", rc); | |
2dd18279 | 957 | else if (!cxlds->reg_map.component_map.ras.valid) |
f1d0525e | 958 | dev_dbg(&pdev->dev, "RAS registers not found\n"); |
4112a08d | 959 | |
2dd18279 | 960 | rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component, |
57340804 | 961 | BIT(CXL_CM_CAP_CAP_ID_RAS)); |
bd09626b DW |
962 | if (rc) |
963 | dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); | |
964 | ||
8d8081ce DJ |
965 | rc = cxl_pci_type3_init_mailbox(cxlds); |
966 | if (rc) | |
967 | return rc; | |
968 | ||
e764f122 DJ |
969 | rc = cxl_await_media_ready(cxlds); |
970 | if (rc == 0) | |
971 | cxlds->media_ready = true; | |
972 | else | |
973 | dev_warn(&pdev->dev, "Media not active (%d)\n", rc); | |
974 | ||
d72a4caf | 975 | irq_avail = cxl_alloc_irq_vectors(pdev); |
f279d0bc | 976 | |
d72a4caf | 977 | rc = cxl_pci_setup_mailbox(mds, irq_avail); |
8adaf747 BW |
978 | if (rc) |
979 | return rc; | |
980 | ||
59f8d151 | 981 | rc = cxl_enumerate_cmds(mds); |
472b1ce6 BW |
982 | if (rc) |
983 | return rc; | |
984 | ||
59f8d151 | 985 | rc = cxl_set_timestamp(mds); |
fa884345 JC |
986 | if (rc) |
987 | return rc; | |
988 | ||
59f8d151 | 989 | rc = cxl_poison_state_init(mds); |
d0abf578 AS |
990 | if (rc) |
991 | return rc; | |
992 | ||
59f8d151 | 993 | rc = cxl_dev_state_identify(mds); |
b39cb105 DW |
994 | if (rc) |
995 | return rc; | |
996 | ||
8e4c411c DW |
997 | rc = cxl_mem_dpa_fetch(mds, &range_info); |
998 | if (rc) | |
999 | return rc; | |
1000 | ||
1001 | rc = cxl_dpa_setup(cxlds, &range_info); | |
f847502a IW |
1002 | if (rc) |
1003 | return rc; | |
1004 | ||
f0e6a232 DJ |
1005 | rc = devm_cxl_setup_features(cxlds); |
1006 | if (rc) | |
1007 | dev_dbg(&pdev->dev, "No CXL Features discovered\n"); | |
1008 | ||
f29a824b | 1009 | cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); |
21083f51 DW |
1010 | if (IS_ERR(cxlmd)) |
1011 | return PTR_ERR(cxlmd); | |
1012 | ||
f29a824b | 1013 | rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); |
9521875b VV |
1014 | if (rc) |
1015 | return rc; | |
1016 | ||
5f2da197 DW |
1017 | rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); |
1018 | if (rc) | |
1019 | return rc; | |
1020 | ||
dc915672 | 1021 | rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); |
858ce2f5 DJ |
1022 | if (rc) |
1023 | dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); | |
1024 | ||
1ad3f701 | 1025 | pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); |
448a60e8 HY |
1026 | if (pmu_count < 0) |
1027 | return pmu_count; | |
1028 | ||
1ad3f701 JC |
1029 | for (i = 0; i < pmu_count; i++) { |
1030 | struct cxl_pmu_regs pmu_regs; | |
1031 | ||
1032 | rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i); | |
1033 | if (rc) { | |
1034 | dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); | |
1035 | break; | |
1036 | } | |
1037 | ||
e8db0701 | 1038 | rc = cxl_map_pmu_regs(&map, &pmu_regs); |
1ad3f701 JC |
1039 | if (rc) { |
1040 | dev_dbg(&pdev->dev, "Could not map PMU regs\n"); | |
1041 | break; | |
1042 | } | |
1043 | ||
1044 | rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV); | |
1045 | if (rc) { | |
1046 | dev_dbg(&pdev->dev, "Could not add PMU instance\n"); | |
1047 | break; | |
1048 | } | |
1049 | } | |
1050 | ||
d72a4caf | 1051 | rc = cxl_event_config(host_bridge, mds, irq_avail); |
a49aa814 DB |
1052 | if (rc) |
1053 | return rc; | |
6ebe28f9 | 1054 | |
da4d8c83 | 1055 | if (cxl_pci_ras_unmask(pdev)) |
248529ed DJ |
1056 | dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); |
1057 | ||
2905cb52 DW |
1058 | pci_save_state(pdev); |
1059 | ||
21083f51 | 1060 | return rc; |
4cdadfd5 DW |
1061 | } |
1062 | ||
1063 | static const struct pci_device_id cxl_mem_pci_tbl[] = { | |
1064 | /* PCI class code for CXL.mem Type-3 Devices */ | |
1065 | { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, | |
1066 | { /* terminate list */ }, | |
1067 | }; | |
1068 | MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); | |
1069 | ||
2905cb52 DW |
1070 | static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) |
1071 | { | |
1072 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
1073 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
1074 | struct device *dev = &cxlmd->dev; | |
1075 | ||
1076 | dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", | |
1077 | dev_name(dev)); | |
1078 | pci_restore_state(pdev); | |
1079 | if (device_attach(dev) <= 0) | |
1080 | return PCI_ERS_RESULT_DISCONNECT; | |
1081 | return PCI_ERS_RESULT_RECOVERED; | |
1082 | } | |
1083 | ||
1084 | static void cxl_error_resume(struct pci_dev *pdev) | |
1085 | { | |
1086 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
1087 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
1088 | struct device *dev = &cxlmd->dev; | |
1089 | ||
1090 | dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), | |
1091 | dev->driver ? "successful" : "failed"); | |
1092 | } | |
1093 | ||
934edcd4 DJ |
1094 | static void cxl_reset_done(struct pci_dev *pdev) |
1095 | { | |
1096 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
1097 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
1098 | struct device *dev = &pdev->dev; | |
1099 | ||
1100 | /* | |
1101 | * FLR does not expect to touch the HDM decoders and related | |
1102 | * registers. SBR, however, will wipe all device configurations. | |
1103 | * Issue a warning if there was an active decoder before the reset | |
1104 | * that no longer exists. | |
1105 | */ | |
1106 | guard(device)(&cxlmd->dev); | |
1107 | if (cxlmd->endpoint && | |
1108 | cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { | |
1109 | dev_crit(dev, "SBR happened without memory regions removal.\n"); | |
1110 | dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); | |
1111 | add_taint(TAINT_USER, LOCKDEP_STILL_OK); | |
1112 | } | |
1113 | } | |
1114 | ||
2905cb52 DW |
1115 | static const struct pci_error_handlers cxl_error_handlers = { |
1116 | .error_detected = cxl_error_detected, | |
1117 | .slot_reset = cxl_slot_reset, | |
1118 | .resume = cxl_error_resume, | |
6155ccc9 | 1119 | .cor_error_detected = cxl_cor_error_detected, |
934edcd4 | 1120 | .reset_done = cxl_reset_done, |
2905cb52 DW |
1121 | }; |
1122 | ||
ed97afb5 | 1123 | static struct pci_driver cxl_pci_driver = { |
4cdadfd5 DW |
1124 | .name = KBUILD_MODNAME, |
1125 | .id_table = cxl_mem_pci_tbl, | |
ed97afb5 | 1126 | .probe = cxl_pci_probe, |
2905cb52 | 1127 | .err_handler = &cxl_error_handlers, |
c5eaec79 | 1128 | .dev_groups = cxl_rcd_groups, |
4cdadfd5 DW |
1129 | .driver = { |
1130 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | |
1131 | }, | |
1132 | }; | |
1133 | ||
c19ac30e IW |
1134 | #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) |
1135 | static void cxl_handle_cper_event(enum cxl_event_type ev_type, | |
1136 | struct cxl_cper_event_rec *rec) | |
1137 | { | |
1138 | struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; | |
1139 | struct pci_dev *pdev __free(pci_dev_put) = NULL; | |
1140 | enum cxl_event_log_type log_type; | |
1141 | struct cxl_dev_state *cxlds; | |
1142 | unsigned int devfn; | |
1143 | u32 hdr_flags; | |
1144 | ||
1145 | pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type, | |
1146 | device_id->segment_num, device_id->bus_num, | |
1147 | device_id->device_num, device_id->func_num); | |
1148 | ||
1149 | devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); | |
1150 | pdev = pci_get_domain_bus_and_slot(device_id->segment_num, | |
1151 | device_id->bus_num, devfn); | |
1152 | if (!pdev) | |
1153 | return; | |
1154 | ||
1155 | guard(device)(&pdev->dev); | |
1156 | if (pdev->driver != &cxl_pci_driver) | |
1157 | return; | |
1158 | ||
1159 | cxlds = pci_get_drvdata(pdev); | |
1160 | if (!cxlds) | |
1161 | return; | |
1162 | ||
1163 | /* Fabricate a log type */ | |
1164 | hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); | |
1165 | log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); | |
1166 | ||
1167 | cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, | |
1168 | &uuid_null, &rec->event); | |
1169 | } | |
1170 | ||
1171 | static void cxl_cper_work_fn(struct work_struct *work) | |
1172 | { | |
1173 | struct cxl_cper_work_data wd; | |
1174 | ||
1175 | while (cxl_cper_kfifo_get(&wd)) | |
1176 | cxl_handle_cper_event(wd.event_type, &wd.rec); | |
1177 | } | |
1178 | static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); | |
1179 | ||
1180 | static int __init cxl_pci_driver_init(void) | |
1181 | { | |
1182 | int rc; | |
1183 | ||
1184 | rc = pci_register_driver(&cxl_pci_driver); | |
1185 | if (rc) | |
1186 | return rc; | |
1187 | ||
1188 | rc = cxl_cper_register_work(&cxl_cper_work); | |
1189 | if (rc) | |
1190 | pci_unregister_driver(&cxl_pci_driver); | |
1191 | ||
1192 | return rc; | |
1193 | } | |
1194 | ||
1195 | static void __exit cxl_pci_driver_exit(void) | |
1196 | { | |
1197 | cxl_cper_unregister_work(&cxl_cper_work); | |
1198 | cancel_work_sync(&cxl_cper_work); | |
1199 | pci_unregister_driver(&cxl_pci_driver); | |
1200 | } | |
1201 | ||
1202 | module_init(cxl_pci_driver_init); | |
1203 | module_exit(cxl_pci_driver_exit); | |
a0caa197 | 1204 | MODULE_DESCRIPTION("CXL: PCI manageability"); |
4cdadfd5 | 1205 | MODULE_LICENSE("GPL v2"); |
cdd30ebb | 1206 | MODULE_IMPORT_NS("CXL"); |