Commit | Line | Data |
---|---|---|
4cdadfd5 DW |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ | |
4faf31b4 | 3 | #include <linux/io-64-nonatomic-lo-hi.h> |
229e8828 | 4 | #include <linux/moduleparam.h> |
4cdadfd5 | 5 | #include <linux/module.h> |
229e8828 | 6 | #include <linux/delay.h> |
fae8817a | 7 | #include <linux/sizes.h> |
b39cb105 | 8 | #include <linux/mutex.h> |
30af9729 | 9 | #include <linux/list.h> |
4cdadfd5 | 10 | #include <linux/pci.h> |
3eddcc93 | 11 | #include <linux/pci-doe.h> |
2905cb52 | 12 | #include <linux/aer.h> |
4cdadfd5 | 13 | #include <linux/io.h> |
5161a55c | 14 | #include "cxlmem.h" |
af9cae9f | 15 | #include "cxlpci.h" |
8adaf747 BW |
16 | #include "cxl.h" |
17 | ||
18 | /** | |
21e9f767 | 19 | * DOC: cxl pci |
8adaf747 | 20 | * |
21e9f767 BW |
21 | * This implements the PCI exclusive functionality for a CXL device as it is |
22 | * defined by the Compute Express Link specification. CXL devices may surface | |
ed97afb5 BW |
23 | * certain functionality even if it isn't CXL enabled. While this driver is |
24 | * focused around the PCI specific aspects of a CXL device, it binds to the | |
25 | * specific CXL memory device class code, and therefore the implementation of | |
26 | * cxl_pci is focused around CXL memory devices. | |
8adaf747 BW |
27 | * |
28 | * The driver has several responsibilities, mainly: | |
29 | * - Create the memX device and register on the CXL bus. | |
30 | * - Enumerate device's register interface and map them. | |
ed97afb5 BW |
31 | * - Registers nvdimm bridge device with cxl_core. |
32 | * - Registers a CXL mailbox with cxl_core. | |
8adaf747 BW |
33 | */ |
34 | ||
5e2411ae IW |
35 | #define cxl_doorbell_busy(cxlds) \ |
36 | (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ | |
8adaf747 BW |
37 | CXLDEV_MBOX_CTRL_DOORBELL) |
38 | ||
39 | /* CXL 2.0 - 8.2.8.4 */ | |
40 | #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) | |
41 | ||
229e8828 BW |
42 | /* |
43 | * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to | |
44 | * dictate how long to wait for the mailbox to become ready. The new | |
45 | * field allows the device to tell software the amount of time to wait | |
46 | * before mailbox ready. This field per the spec theoretically allows | |
47 | * for up to 255 seconds. 255 seconds is unreasonably long, its longer | |
48 | * than the maximum SATA port link recovery wait. Default to 60 seconds | |
49 | * until someone builds a CXL device that needs more time in practice. | |
50 | */ | |
51 | static unsigned short mbox_ready_timeout = 60; | |
52 | module_param(mbox_ready_timeout, ushort, 0644); | |
2e4ba0ec | 53 | MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); |
229e8828 | 54 | |
5e2411ae | 55 | static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) |
8adaf747 BW |
56 | { |
57 | const unsigned long start = jiffies; | |
58 | unsigned long end = start; | |
59 | ||
5e2411ae | 60 | while (cxl_doorbell_busy(cxlds)) { |
8adaf747 BW |
61 | end = jiffies; |
62 | ||
63 | if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { | |
64 | /* Check again in case preempted before timeout test */ | |
5e2411ae | 65 | if (!cxl_doorbell_busy(cxlds)) |
8adaf747 BW |
66 | break; |
67 | return -ETIMEDOUT; | |
68 | } | |
69 | cpu_relax(); | |
70 | } | |
71 | ||
5e2411ae | 72 | dev_dbg(cxlds->dev, "Doorbell wait took %dms", |
8adaf747 BW |
73 | jiffies_to_msecs(end) - jiffies_to_msecs(start)); |
74 | return 0; | |
75 | } | |
76 | ||
4f195ee7 DW |
77 | #define cxl_err(dev, status, msg) \ |
78 | dev_err_ratelimited(dev, msg ", device state %s%s\n", \ | |
79 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ | |
80 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") | |
8adaf747 | 81 | |
4f195ee7 DW |
82 | #define cxl_cmd_err(dev, cmd, status, msg) \ |
83 | dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ | |
84 | (cmd)->opcode, \ | |
85 | status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ | |
86 | status & CXLMDEV_FW_HALT ? " firmware-halt" : "") | |
8adaf747 BW |
87 | |
88 | /** | |
ed97afb5 | 89 | * __cxl_pci_mbox_send_cmd() - Execute a mailbox command |
5e2411ae | 90 | * @cxlds: The device state to communicate with. |
8adaf747 BW |
91 | * @mbox_cmd: Command to send to the memory device. |
92 | * | |
93 | * Context: Any context. Expects mbox_mutex to be held. | |
94 | * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. | |
95 | * Caller should check the return code in @mbox_cmd to make sure it | |
96 | * succeeded. | |
97 | * | |
98 | * This is a generic form of the CXL mailbox send command thus only using the | |
99 | * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory | |
100 | * devices, and perhaps other types of CXL devices may have further information | |
101 | * available upon error conditions. Driver facilities wishing to send mailbox | |
102 | * commands should use the wrapper command. | |
103 | * | |
104 | * The CXL spec allows for up to two mailboxes. The intention is for the primary | |
105 | * mailbox to be OS controlled and the secondary mailbox to be used by system | |
106 | * firmware. This allows the OS and firmware to communicate with the device and | |
107 | * not need to coordinate with each other. The driver only uses the primary | |
108 | * mailbox. | |
109 | */ | |
5e2411ae | 110 | static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, |
b64955a9 | 111 | struct cxl_mbox_cmd *mbox_cmd) |
8adaf747 | 112 | { |
5e2411ae IW |
113 | void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; |
114 | struct device *dev = cxlds->dev; | |
8adaf747 BW |
115 | u64 cmd_reg, status_reg; |
116 | size_t out_len; | |
117 | int rc; | |
118 | ||
5e2411ae | 119 | lockdep_assert_held(&cxlds->mbox_mutex); |
8adaf747 BW |
120 | |
121 | /* | |
122 | * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. | |
123 | * 1. Caller reads MB Control Register to verify doorbell is clear | |
124 | * 2. Caller writes Command Register | |
125 | * 3. Caller writes Command Payload Registers if input payload is non-empty | |
126 | * 4. Caller writes MB Control Register to set doorbell | |
127 | * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured | |
128 | * 6. Caller reads MB Status Register to fetch Return code | |
129 | * 7. If command successful, Caller reads Command Register to get Payload Length | |
130 | * 8. If output payload is non-empty, host reads Command Payload Registers | |
131 | * | |
132 | * Hardware is free to do whatever it wants before the doorbell is rung, | |
133 | * and isn't allowed to change anything after it clears the doorbell. As | |
134 | * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can | |
135 | * also happen in any order (though some orders might not make sense). | |
136 | */ | |
137 | ||
138 | /* #1 */ | |
5e2411ae | 139 | if (cxl_doorbell_busy(cxlds)) { |
4f195ee7 DW |
140 | u64 md_status = |
141 | readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); | |
142 | ||
143 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, | |
144 | "mailbox queue busy"); | |
8adaf747 BW |
145 | return -EBUSY; |
146 | } | |
147 | ||
148 | cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, | |
149 | mbox_cmd->opcode); | |
150 | if (mbox_cmd->size_in) { | |
151 | if (WARN_ON(!mbox_cmd->payload_in)) | |
152 | return -EINVAL; | |
153 | ||
154 | cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, | |
155 | mbox_cmd->size_in); | |
156 | memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); | |
157 | } | |
158 | ||
159 | /* #2, #3 */ | |
5e2411ae | 160 | writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
8adaf747 BW |
161 | |
162 | /* #4 */ | |
852db33c | 163 | dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); |
8adaf747 | 164 | writel(CXLDEV_MBOX_CTRL_DOORBELL, |
5e2411ae | 165 | cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); |
8adaf747 BW |
166 | |
167 | /* #5 */ | |
5e2411ae | 168 | rc = cxl_pci_mbox_wait_for_doorbell(cxlds); |
8adaf747 | 169 | if (rc == -ETIMEDOUT) { |
4f195ee7 DW |
170 | u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); |
171 | ||
172 | cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); | |
8adaf747 BW |
173 | return rc; |
174 | } | |
175 | ||
176 | /* #6 */ | |
5e2411ae | 177 | status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); |
8adaf747 BW |
178 | mbox_cmd->return_code = |
179 | FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); | |
180 | ||
92fcc1ab | 181 | if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { |
c43e036d DB |
182 | dev_dbg(dev, "Mailbox operation had an error: %s\n", |
183 | cxl_mbox_cmd_rc2str(mbox_cmd)); | |
cbe83a20 | 184 | return 0; /* completed but caller must check return_code */ |
8adaf747 BW |
185 | } |
186 | ||
187 | /* #7 */ | |
5e2411ae | 188 | cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); |
8adaf747 BW |
189 | out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); |
190 | ||
191 | /* #8 */ | |
192 | if (out_len && mbox_cmd->payload_out) { | |
193 | /* | |
194 | * Sanitize the copy. If hardware misbehaves, out_len per the | |
195 | * spec can actually be greater than the max allowed size (21 | |
196 | * bits available but spec defined 1M max). The caller also may | |
197 | * have requested less data than the hardware supplied even | |
198 | * within spec. | |
199 | */ | |
5e2411ae | 200 | size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len); |
8adaf747 BW |
201 | |
202 | memcpy_fromio(mbox_cmd->payload_out, payload, n); | |
203 | mbox_cmd->size_out = n; | |
204 | } else { | |
205 | mbox_cmd->size_out = 0; | |
206 | } | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
5e2411ae | 211 | static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) |
b64955a9 DW |
212 | { |
213 | int rc; | |
214 | ||
4f195ee7 | 215 | mutex_lock_io(&cxlds->mbox_mutex); |
5e2411ae | 216 | rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); |
4f195ee7 | 217 | mutex_unlock(&cxlds->mbox_mutex); |
b64955a9 DW |
218 | |
219 | return rc; | |
220 | } | |
221 | ||
5e2411ae | 222 | static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) |
8adaf747 | 223 | { |
5e2411ae | 224 | const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); |
229e8828 BW |
225 | unsigned long timeout; |
226 | u64 md_status; | |
227 | ||
228 | timeout = jiffies + mbox_ready_timeout * HZ; | |
229 | do { | |
230 | md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); | |
231 | if (md_status & CXLMDEV_MBOX_IF_READY) | |
232 | break; | |
233 | if (msleep_interruptible(100)) | |
234 | break; | |
235 | } while (!time_after(jiffies, timeout)); | |
236 | ||
237 | if (!(md_status & CXLMDEV_MBOX_IF_READY)) { | |
4f195ee7 DW |
238 | cxl_err(cxlds->dev, md_status, |
239 | "timeout awaiting mailbox ready"); | |
240 | return -ETIMEDOUT; | |
241 | } | |
242 | ||
243 | /* | |
244 | * A command may be in flight from a previous driver instance, | |
245 | * think kexec, do one doorbell wait so that | |
246 | * __cxl_pci_mbox_send_cmd() can assume that it is the only | |
247 | * source for future doorbell busy events. | |
248 | */ | |
249 | if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { | |
250 | cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); | |
251 | return -ETIMEDOUT; | |
229e8828 | 252 | } |
8adaf747 | 253 | |
5e2411ae IW |
254 | cxlds->mbox_send = cxl_pci_mbox_send; |
255 | cxlds->payload_size = | |
8adaf747 BW |
256 | 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); |
257 | ||
258 | /* | |
259 | * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register | |
260 | * | |
261 | * If the size is too small, mandatory commands will not work and so | |
262 | * there's no point in going forward. If the size is too large, there's | |
263 | * no harm is soft limiting it. | |
264 | */ | |
5e2411ae IW |
265 | cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M); |
266 | if (cxlds->payload_size < 256) { | |
267 | dev_err(cxlds->dev, "Mailbox is too small (%zub)", | |
268 | cxlds->payload_size); | |
8adaf747 BW |
269 | return -ENXIO; |
270 | } | |
271 | ||
5e2411ae IW |
272 | dev_dbg(cxlds->dev, "Mailbox payload sized %zu", |
273 | cxlds->payload_size); | |
8adaf747 BW |
274 | |
275 | return 0; | |
276 | } | |
277 | ||
a261e9a1 | 278 | static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) |
1b0a1a2a | 279 | { |
7dc7a64d | 280 | struct device *dev = &pdev->dev; |
1b0a1a2a | 281 | |
6c7f4f1e DW |
282 | map->base = ioremap(map->resource, map->max_size); |
283 | if (!map->base) { | |
8adaf747 | 284 | dev_err(dev, "failed to map registers\n"); |
a261e9a1 | 285 | return -ENOMEM; |
8adaf747 | 286 | } |
8adaf747 | 287 | |
6c7f4f1e | 288 | dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); |
a261e9a1 | 289 | return 0; |
30af9729 IW |
290 | } |
291 | ||
a261e9a1 DW |
292 | static void cxl_unmap_regblock(struct pci_dev *pdev, |
293 | struct cxl_register_map *map) | |
30af9729 | 294 | { |
6c7f4f1e | 295 | iounmap(map->base); |
a261e9a1 | 296 | map->base = NULL; |
8adaf747 | 297 | } |
4cdadfd5 | 298 | |
a261e9a1 | 299 | static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) |
30af9729 | 300 | { |
08422378 | 301 | struct cxl_component_reg_map *comp_map; |
30af9729 | 302 | struct cxl_device_reg_map *dev_map; |
7dc7a64d | 303 | struct device *dev = &pdev->dev; |
a261e9a1 | 304 | void __iomem *base = map->base; |
30af9729 IW |
305 | |
306 | switch (map->reg_type) { | |
08422378 BW |
307 | case CXL_REGLOC_RBI_COMPONENT: |
308 | comp_map = &map->component_map; | |
309 | cxl_probe_component_regs(dev, base, comp_map); | |
310 | if (!comp_map->hdm_decoder.valid) { | |
311 | dev_err(dev, "HDM decoder registers not found\n"); | |
312 | return -ENXIO; | |
313 | } | |
314 | ||
bd09626b DW |
315 | if (!comp_map->ras.valid) |
316 | dev_dbg(dev, "RAS registers not found\n"); | |
317 | ||
08422378 BW |
318 | dev_dbg(dev, "Set up component registers\n"); |
319 | break; | |
30af9729 IW |
320 | case CXL_REGLOC_RBI_MEMDEV: |
321 | dev_map = &map->device_map; | |
322 | cxl_probe_device_regs(dev, base, dev_map); | |
323 | if (!dev_map->status.valid || !dev_map->mbox.valid || | |
324 | !dev_map->memdev.valid) { | |
325 | dev_err(dev, "registers not found: %s%s%s\n", | |
326 | !dev_map->status.valid ? "status " : "", | |
da582aa5 LQJL |
327 | !dev_map->mbox.valid ? "mbox " : "", |
328 | !dev_map->memdev.valid ? "memdev " : ""); | |
30af9729 IW |
329 | return -ENXIO; |
330 | } | |
331 | ||
332 | dev_dbg(dev, "Probing device registers...\n"); | |
333 | break; | |
334 | default: | |
335 | break; | |
336 | } | |
337 | ||
338 | return 0; | |
339 | } | |
340 | ||
85afc317 BW |
341 | static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, |
342 | struct cxl_register_map *map) | |
343 | { | |
344 | int rc; | |
5b68705d | 345 | |
85afc317 BW |
346 | rc = cxl_find_regblock(pdev, type, map); |
347 | if (rc) | |
348 | return rc; | |
1d5a4159 | 349 | |
85afc317 BW |
350 | rc = cxl_map_regblock(pdev, map); |
351 | if (rc) | |
352 | return rc; | |
353 | ||
354 | rc = cxl_probe_regs(pdev, map); | |
355 | cxl_unmap_regblock(pdev, map); | |
1d5a4159 | 356 | |
85afc317 | 357 | return rc; |
1d5a4159 BW |
358 | } |
359 | ||
3eddcc93 IW |
360 | static void cxl_pci_destroy_doe(void *mbs) |
361 | { | |
362 | xa_destroy(mbs); | |
363 | } | |
364 | ||
365 | static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) | |
366 | { | |
367 | struct device *dev = cxlds->dev; | |
368 | struct pci_dev *pdev = to_pci_dev(dev); | |
369 | u16 off = 0; | |
370 | ||
371 | xa_init(&cxlds->doe_mbs); | |
372 | if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) { | |
373 | dev_err(dev, "Failed to create XArray for DOE's\n"); | |
374 | return; | |
375 | } | |
376 | ||
377 | /* | |
378 | * Mailbox creation is best effort. Higher layers must determine if | |
379 | * the lack of a mailbox for their protocol is a device failure or not. | |
380 | */ | |
381 | pci_doe_for_each_off(pdev, off) { | |
382 | struct pci_doe_mb *doe_mb; | |
383 | ||
384 | doe_mb = pcim_doe_create_mb(pdev, off); | |
385 | if (IS_ERR(doe_mb)) { | |
386 | dev_err(dev, "Failed to create MB object for MB @ %x\n", | |
387 | off); | |
388 | continue; | |
389 | } | |
390 | ||
487d828d IW |
391 | if (!pci_request_config_region_exclusive(pdev, off, |
392 | PCI_DOE_CAP_SIZEOF, | |
393 | dev_name(dev))) | |
394 | pci_err(pdev, "Failed to exclude DOE registers\n"); | |
395 | ||
3eddcc93 IW |
396 | if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) { |
397 | dev_err(dev, "xa_insert failed to insert MB @ %x\n", | |
398 | off); | |
399 | continue; | |
400 | } | |
401 | ||
402 | dev_dbg(dev, "Created DOE mailbox @%x\n", off); | |
403 | } | |
404 | } | |
405 | ||
0a19bfc8 DW |
406 | /* |
407 | * Assume that any RCIEP that emits the CXL memory expander class code | |
408 | * is an RCD | |
409 | */ | |
410 | static bool is_cxl_restricted(struct pci_dev *pdev) | |
411 | { | |
412 | return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; | |
413 | } | |
414 | ||
248529ed DJ |
415 | /* |
416 | * CXL v3.0 6.2.3 Table 6-4 | |
417 | * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits | |
418 | * mode, otherwise it's 68B flits mode. | |
419 | */ | |
420 | static bool cxl_pci_flit_256(struct pci_dev *pdev) | |
2905cb52 | 421 | { |
248529ed DJ |
422 | u16 lnksta2; |
423 | ||
424 | pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2); | |
425 | return lnksta2 & PCI_EXP_LNKSTA2_FLIT; | |
426 | } | |
427 | ||
428 | static int cxl_pci_ras_unmask(struct pci_dev *pdev) | |
429 | { | |
430 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); | |
431 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
432 | void __iomem *addr; | |
433 | u32 orig_val, val, mask; | |
434 | u16 cap; | |
435 | int rc; | |
436 | ||
437 | if (!cxlds->regs.ras) { | |
438 | dev_dbg(&pdev->dev, "No RAS registers.\n"); | |
439 | return 0; | |
440 | } | |
441 | ||
442 | /* BIOS has CXL error control */ | |
443 | if (!host_bridge->native_cxl_error) | |
444 | return -ENXIO; | |
445 | ||
446 | rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); | |
447 | if (rc) | |
448 | return rc; | |
449 | ||
450 | if (cap & PCI_EXP_DEVCTL_URRE) { | |
451 | addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; | |
452 | orig_val = readl(addr); | |
453 | ||
454 | mask = CXL_RAS_UNCORRECTABLE_MASK_MASK; | |
455 | if (!cxl_pci_flit_256(pdev)) | |
456 | mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; | |
457 | val = orig_val & ~mask; | |
458 | writel(val, addr); | |
459 | dev_dbg(&pdev->dev, | |
460 | "Uncorrectable RAS Errors Mask: %#x -> %#x\n", | |
461 | orig_val, val); | |
462 | } | |
463 | ||
464 | if (cap & PCI_EXP_DEVCTL_CERE) { | |
465 | addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; | |
466 | orig_val = readl(addr); | |
467 | val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; | |
468 | writel(val, addr); | |
469 | dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", | |
470 | orig_val, val); | |
471 | } | |
472 | ||
473 | return 0; | |
2905cb52 DW |
474 | } |
475 | ||
6ebe28f9 IW |
476 | static void free_event_buf(void *buf) |
477 | { | |
478 | kvfree(buf); | |
479 | } | |
480 | ||
481 | /* | |
482 | * There is a single buffer for reading event logs from the mailbox. All logs | |
483 | * share this buffer protected by the cxlds->event_log_lock. | |
484 | */ | |
485 | static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds) | |
486 | { | |
487 | struct cxl_get_event_payload *buf; | |
488 | ||
489 | buf = kvmalloc(cxlds->payload_size, GFP_KERNEL); | |
490 | if (!buf) | |
491 | return -ENOMEM; | |
492 | cxlds->event.buf = buf; | |
493 | ||
494 | return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf); | |
495 | } | |
496 | ||
a49aa814 DB |
497 | static int cxl_alloc_irq_vectors(struct pci_dev *pdev) |
498 | { | |
499 | int nvecs; | |
500 | ||
501 | /* | |
502 | * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must | |
503 | * not generate INTx messages if that function participates in | |
504 | * CXL.cache or CXL.mem. | |
505 | * | |
506 | * Additionally pci_alloc_irq_vectors() handles calling | |
507 | * pci_free_irq_vectors() automatically despite not being called | |
508 | * pcim_*. See pci_setup_msi_context(). | |
509 | */ | |
510 | nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, | |
511 | PCI_IRQ_MSIX | PCI_IRQ_MSI); | |
512 | if (nvecs < 1) { | |
513 | dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); | |
514 | return -ENXIO; | |
515 | } | |
516 | return 0; | |
517 | } | |
518 | ||
519 | struct cxl_dev_id { | |
520 | struct cxl_dev_state *cxlds; | |
521 | }; | |
522 | ||
523 | static irqreturn_t cxl_event_thread(int irq, void *id) | |
524 | { | |
525 | struct cxl_dev_id *dev_id = id; | |
526 | struct cxl_dev_state *cxlds = dev_id->cxlds; | |
527 | u32 status; | |
528 | ||
529 | do { | |
530 | /* | |
531 | * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; | |
532 | * ignore the reserved upper 32 bits | |
533 | */ | |
534 | status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); | |
535 | /* Ignore logs unknown to the driver */ | |
536 | status &= CXLDEV_EVENT_STATUS_ALL; | |
537 | if (!status) | |
538 | break; | |
539 | cxl_mem_get_event_records(cxlds, status); | |
540 | cond_resched(); | |
541 | } while (status); | |
542 | ||
543 | return IRQ_HANDLED; | |
544 | } | |
545 | ||
546 | static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) | |
547 | { | |
548 | struct device *dev = cxlds->dev; | |
549 | struct pci_dev *pdev = to_pci_dev(dev); | |
550 | struct cxl_dev_id *dev_id; | |
551 | int irq; | |
552 | ||
553 | if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) | |
554 | return -ENXIO; | |
555 | ||
556 | /* dev_id must be globally unique and must contain the cxlds */ | |
557 | dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); | |
558 | if (!dev_id) | |
559 | return -ENOMEM; | |
560 | dev_id->cxlds = cxlds; | |
561 | ||
562 | irq = pci_irq_vector(pdev, | |
563 | FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); | |
564 | if (irq < 0) | |
565 | return irq; | |
566 | ||
567 | return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread, | |
5a84711f DW |
568 | IRQF_SHARED | IRQF_ONESHOT, NULL, |
569 | dev_id); | |
a49aa814 DB |
570 | } |
571 | ||
572 | static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds, | |
573 | struct cxl_event_interrupt_policy *policy) | |
574 | { | |
575 | struct cxl_mbox_cmd mbox_cmd = { | |
576 | .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, | |
577 | .payload_out = policy, | |
578 | .size_out = sizeof(*policy), | |
579 | }; | |
580 | int rc; | |
581 | ||
582 | rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); | |
583 | if (rc < 0) | |
584 | dev_err(cxlds->dev, "Failed to get event interrupt policy : %d", | |
585 | rc); | |
586 | ||
587 | return rc; | |
588 | } | |
589 | ||
590 | static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds, | |
591 | struct cxl_event_interrupt_policy *policy) | |
592 | { | |
593 | struct cxl_mbox_cmd mbox_cmd; | |
594 | int rc; | |
595 | ||
596 | *policy = (struct cxl_event_interrupt_policy) { | |
597 | .info_settings = CXL_INT_MSI_MSIX, | |
598 | .warn_settings = CXL_INT_MSI_MSIX, | |
599 | .failure_settings = CXL_INT_MSI_MSIX, | |
600 | .fatal_settings = CXL_INT_MSI_MSIX, | |
601 | }; | |
602 | ||
603 | mbox_cmd = (struct cxl_mbox_cmd) { | |
604 | .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, | |
605 | .payload_in = policy, | |
606 | .size_in = sizeof(*policy), | |
607 | }; | |
608 | ||
609 | rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); | |
610 | if (rc < 0) { | |
611 | dev_err(cxlds->dev, "Failed to set event interrupt policy : %d", | |
612 | rc); | |
613 | return rc; | |
614 | } | |
615 | ||
616 | /* Retrieve final interrupt settings */ | |
617 | return cxl_event_get_int_policy(cxlds, policy); | |
618 | } | |
619 | ||
620 | static int cxl_event_irqsetup(struct cxl_dev_state *cxlds) | |
621 | { | |
622 | struct cxl_event_interrupt_policy policy; | |
623 | int rc; | |
624 | ||
625 | rc = cxl_event_config_msgnums(cxlds, &policy); | |
626 | if (rc) | |
627 | return rc; | |
628 | ||
629 | rc = cxl_event_req_irq(cxlds, policy.info_settings); | |
630 | if (rc) { | |
631 | dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); | |
632 | return rc; | |
633 | } | |
634 | ||
635 | rc = cxl_event_req_irq(cxlds, policy.warn_settings); | |
636 | if (rc) { | |
637 | dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); | |
638 | return rc; | |
639 | } | |
640 | ||
641 | rc = cxl_event_req_irq(cxlds, policy.failure_settings); | |
642 | if (rc) { | |
643 | dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); | |
644 | return rc; | |
645 | } | |
646 | ||
647 | rc = cxl_event_req_irq(cxlds, policy.fatal_settings); | |
648 | if (rc) { | |
649 | dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); | |
650 | return rc; | |
651 | } | |
652 | ||
653 | return 0; | |
654 | } | |
655 | ||
656 | static bool cxl_event_int_is_fw(u8 setting) | |
657 | { | |
658 | u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); | |
659 | ||
660 | return mode == CXL_INT_FW; | |
661 | } | |
662 | ||
663 | static int cxl_event_config(struct pci_host_bridge *host_bridge, | |
664 | struct cxl_dev_state *cxlds) | |
665 | { | |
666 | struct cxl_event_interrupt_policy policy; | |
667 | int rc; | |
668 | ||
669 | /* | |
670 | * When BIOS maintains CXL error reporting control, it will process | |
671 | * event records. Only one agent can do so. | |
672 | */ | |
673 | if (!host_bridge->native_cxl_error) | |
674 | return 0; | |
675 | ||
676 | rc = cxl_mem_alloc_event_buf(cxlds); | |
677 | if (rc) | |
678 | return rc; | |
679 | ||
680 | rc = cxl_event_get_int_policy(cxlds, &policy); | |
681 | if (rc) | |
682 | return rc; | |
683 | ||
684 | if (cxl_event_int_is_fw(policy.info_settings) || | |
685 | cxl_event_int_is_fw(policy.warn_settings) || | |
686 | cxl_event_int_is_fw(policy.failure_settings) || | |
687 | cxl_event_int_is_fw(policy.fatal_settings)) { | |
688 | dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n"); | |
689 | return -EBUSY; | |
690 | } | |
691 | ||
692 | rc = cxl_event_irqsetup(cxlds); | |
693 | if (rc) | |
694 | return rc; | |
695 | ||
696 | cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); | |
697 | ||
698 | return 0; | |
699 | } | |
700 | ||
ed97afb5 | 701 | static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
4cdadfd5 | 702 | { |
6ebe28f9 | 703 | struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); |
85afc317 | 704 | struct cxl_register_map map; |
21083f51 | 705 | struct cxl_memdev *cxlmd; |
5e2411ae | 706 | struct cxl_dev_state *cxlds; |
1d5a4159 | 707 | int rc; |
8adaf747 | 708 | |
5a2328f4 DW |
709 | /* |
710 | * Double check the anonymous union trickery in struct cxl_regs | |
711 | * FIXME switch to struct_group() | |
712 | */ | |
713 | BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != | |
714 | offsetof(struct cxl_regs, device_regs.memdev)); | |
715 | ||
8adaf747 BW |
716 | rc = pcim_enable_device(pdev); |
717 | if (rc) | |
718 | return rc; | |
a49aa814 | 719 | pci_set_master(pdev); |
4cdadfd5 | 720 | |
5e2411ae IW |
721 | cxlds = cxl_dev_state_create(&pdev->dev); |
722 | if (IS_ERR(cxlds)) | |
723 | return PTR_ERR(cxlds); | |
2905cb52 | 724 | pci_set_drvdata(pdev, cxlds); |
1b0a1a2a | 725 | |
0a19bfc8 | 726 | cxlds->rcd = is_cxl_restricted(pdev); |
bcc79ea3 | 727 | cxlds->serial = pci_get_dsn(pdev); |
06e279e5 BW |
728 | cxlds->cxl_dvsec = pci_find_dvsec_capability( |
729 | pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); | |
730 | if (!cxlds->cxl_dvsec) | |
731 | dev_warn(&pdev->dev, | |
732 | "Device DVSEC not present, skip CXL.mem init\n"); | |
733 | ||
85afc317 BW |
734 | rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); |
735 | if (rc) | |
736 | return rc; | |
737 | ||
6c7f4f1e | 738 | rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map); |
8adaf747 BW |
739 | if (rc) |
740 | return rc; | |
741 | ||
4112a08d BW |
742 | /* |
743 | * If the component registers can't be found, the cxl_pci driver may | |
744 | * still be useful for management functions so don't return an error. | |
745 | */ | |
746 | cxlds->component_reg_phys = CXL_RESOURCE_NONE; | |
747 | rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); | |
748 | if (rc) | |
749 | dev_warn(&pdev->dev, "No component registers (%d)\n", rc); | |
750 | ||
6c7f4f1e | 751 | cxlds->component_reg_phys = map.resource; |
4112a08d | 752 | |
3eddcc93 IW |
753 | devm_cxl_pci_create_doe(cxlds); |
754 | ||
bd09626b DW |
755 | rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component, |
756 | &map, BIT(CXL_CM_CAP_CAP_ID_RAS)); | |
757 | if (rc) | |
758 | dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); | |
759 | ||
5e2411ae | 760 | rc = cxl_pci_setup_mailbox(cxlds); |
8adaf747 BW |
761 | if (rc) |
762 | return rc; | |
763 | ||
5e2411ae | 764 | rc = cxl_enumerate_cmds(cxlds); |
472b1ce6 BW |
765 | if (rc) |
766 | return rc; | |
767 | ||
fa884345 JC |
768 | rc = cxl_set_timestamp(cxlds); |
769 | if (rc) | |
770 | return rc; | |
771 | ||
5e2411ae | 772 | rc = cxl_dev_state_identify(cxlds); |
b39cb105 DW |
773 | if (rc) |
774 | return rc; | |
775 | ||
5e2411ae | 776 | rc = cxl_mem_create_range_info(cxlds); |
f847502a IW |
777 | if (rc) |
778 | return rc; | |
779 | ||
a49aa814 DB |
780 | rc = cxl_alloc_irq_vectors(pdev); |
781 | if (rc) | |
782 | return rc; | |
783 | ||
5e2411ae | 784 | cxlmd = devm_cxl_add_memdev(cxlds); |
21083f51 DW |
785 | if (IS_ERR(cxlmd)) |
786 | return PTR_ERR(cxlmd); | |
787 | ||
a49aa814 DB |
788 | rc = cxl_event_config(host_bridge, cxlds); |
789 | if (rc) | |
790 | return rc; | |
6ebe28f9 | 791 | |
248529ed DJ |
792 | rc = cxl_pci_ras_unmask(pdev); |
793 | if (rc) | |
794 | dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); | |
795 | ||
2905cb52 DW |
796 | pci_save_state(pdev); |
797 | ||
21083f51 | 798 | return rc; |
4cdadfd5 DW |
799 | } |
800 | ||
801 | static const struct pci_device_id cxl_mem_pci_tbl[] = { | |
802 | /* PCI class code for CXL.mem Type-3 Devices */ | |
803 | { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, | |
804 | { /* terminate list */ }, | |
805 | }; | |
806 | MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); | |
807 | ||
2905cb52 DW |
808 | static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) |
809 | { | |
810 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
811 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
812 | struct device *dev = &cxlmd->dev; | |
813 | ||
814 | dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", | |
815 | dev_name(dev)); | |
816 | pci_restore_state(pdev); | |
817 | if (device_attach(dev) <= 0) | |
818 | return PCI_ERS_RESULT_DISCONNECT; | |
819 | return PCI_ERS_RESULT_RECOVERED; | |
820 | } | |
821 | ||
822 | static void cxl_error_resume(struct pci_dev *pdev) | |
823 | { | |
824 | struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); | |
825 | struct cxl_memdev *cxlmd = cxlds->cxlmd; | |
826 | struct device *dev = &cxlmd->dev; | |
827 | ||
828 | dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), | |
829 | dev->driver ? "successful" : "failed"); | |
830 | } | |
831 | ||
832 | static const struct pci_error_handlers cxl_error_handlers = { | |
833 | .error_detected = cxl_error_detected, | |
834 | .slot_reset = cxl_slot_reset, | |
835 | .resume = cxl_error_resume, | |
6155ccc9 | 836 | .cor_error_detected = cxl_cor_error_detected, |
2905cb52 DW |
837 | }; |
838 | ||
ed97afb5 | 839 | static struct pci_driver cxl_pci_driver = { |
4cdadfd5 DW |
840 | .name = KBUILD_MODNAME, |
841 | .id_table = cxl_mem_pci_tbl, | |
ed97afb5 | 842 | .probe = cxl_pci_probe, |
2905cb52 | 843 | .err_handler = &cxl_error_handlers, |
4cdadfd5 DW |
844 | .driver = { |
845 | .probe_type = PROBE_PREFER_ASYNCHRONOUS, | |
846 | }, | |
847 | }; | |
848 | ||
849 | MODULE_LICENSE("GPL v2"); | |
ed97afb5 | 850 | module_pci_driver(cxl_pci_driver); |
b39cb105 | 851 | MODULE_IMPORT_NS(CXL); |