PCI/MSI: Deobfuscate virtual MSI-X
[linux-block.git] / drivers / pci / msi.c
CommitLineData
7328c8f4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
df62ab5e 3 * PCI Message Signaled Interrupt (MSI)
1da177e4
LT
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
aff17164 7 * Copyright (C) 2016 Christoph Hellwig.
1da177e4
LT
8 */
9
1ce03373 10#include <linux/err.h>
1da177e4
LT
11#include <linux/mm.h>
12#include <linux/irq.h>
13#include <linux/interrupt.h>
363c75db 14#include <linux/export.h>
1da177e4 15#include <linux/ioport.h>
1da177e4
LT
16#include <linux/pci.h>
17#include <linux/proc_fs.h>
3b7d1921 18#include <linux/msi.h>
4fdadebc 19#include <linux/smp.h>
500559a9
HS
20#include <linux/errno.h>
21#include <linux/io.h>
be2021ba 22#include <linux/acpi_iort.h>
5a0e3ad6 23#include <linux/slab.h>
3878eaef 24#include <linux/irqdomain.h>
b6eec9b7 25#include <linux/of_irq.h>
1da177e4
LT
26
27#include "pci.h"
1da177e4 28
cbc40d5c
BH
29#ifdef CONFIG_PCI_MSI
30
1da177e4 31static int pci_msi_enable = 1;
38737d82 32int pci_msi_ignore_mask;
1da177e4 33
527eee29
BH
34#define msix_table_size(flags) ((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
35
8e047ada 36#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
8e047ada
JL
37static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
38{
39 struct irq_domain *domain;
40
47feb418 41 domain = dev_get_msi_domain(&dev->dev);
3845d295 42 if (domain && irq_domain_is_hierarchy(domain))
699c4cec 43 return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
8e047ada
JL
44
45 return arch_setup_msi_irqs(dev, nvec, type);
46}
47
48static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
49{
50 struct irq_domain *domain;
51
47feb418 52 domain = dev_get_msi_domain(&dev->dev);
3845d295 53 if (domain && irq_domain_is_hierarchy(domain))
699c4cec 54 msi_domain_free_irqs(domain, &dev->dev);
8e047ada
JL
55 else
56 arch_teardown_msi_irqs(dev);
57}
58#else
59#define pci_msi_setup_msi_irqs arch_setup_msi_irqs
60#define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs
61#endif
527eee29 62
077ee78e 63#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
6a9e7f20 64/* Arch hooks */
4287d824
TP
65int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
66{
3a05d08f 67 return -EINVAL;
4287d824
TP
68}
69
70void __weak arch_teardown_msi_irq(unsigned int irq)
6a9e7f20 71{
6a9e7f20
AB
72}
73
4287d824 74int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
6a9e7f20
AB
75{
76 struct msi_desc *entry;
77 int ret;
78
1c8d7b0a
MW
79 /*
80 * If an architecture wants to support multiple MSI, it needs to
81 * override arch_setup_msi_irqs()
82 */
83 if (type == PCI_CAP_ID_MSI && nvec > 1)
84 return 1;
85
5004e98a 86 for_each_pci_msi_entry(entry, dev) {
6a9e7f20 87 ret = arch_setup_msi_irq(dev, entry);
b5fbf533 88 if (ret < 0)
6a9e7f20 89 return ret;
b5fbf533
ME
90 if (ret > 0)
91 return -ENOSPC;
6a9e7f20
AB
92 }
93
94 return 0;
95}
1525bf0d 96
f8bcf249 97void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
6a9e7f20 98{
63a7b17e 99 int i;
6a9e7f20
AB
100 struct msi_desc *entry;
101
5004e98a 102 for_each_pci_msi_entry(entry, dev)
63a7b17e
JL
103 if (entry->irq)
104 for (i = 0; i < entry->nvec_used; i++)
105 arch_teardown_msi_irq(entry->irq + i);
6a9e7f20 106}
077ee78e 107#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
76ccc297 108
ac8344c4 109static void default_restore_msi_irq(struct pci_dev *dev, int irq)
76ccc297
KRW
110{
111 struct msi_desc *entry;
112
113 entry = NULL;
114 if (dev->msix_enabled) {
5004e98a 115 for_each_pci_msi_entry(entry, dev) {
76ccc297
KRW
116 if (irq == entry->irq)
117 break;
118 }
119 } else if (dev->msi_enabled) {
120 entry = irq_get_msi_desc(irq);
121 }
122
123 if (entry)
83a18912 124 __pci_write_msi_msg(entry, &entry->msg);
76ccc297 125}
4287d824 126
ac8344c4 127void __weak arch_restore_msi_irqs(struct pci_dev *dev)
4287d824 128{
ac8344c4 129 return default_restore_msi_irqs(dev);
4287d824 130}
76ccc297 131
bffac3c5
MW
132static inline __attribute_const__ u32 msi_mask(unsigned x)
133{
0b49ec37
MW
134 /* Don't shift by >= width of type */
135 if (x >= 5)
136 return 0xffffffff;
137 return (1 << (1 << x)) - 1;
bffac3c5
MW
138}
139
ce6fce42
MW
140/*
141 * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to
142 * mask all MSI interrupts by clearing the MSI enable bit does not work
143 * reliably as devices without an INTx disable bit will then generate a
144 * level IRQ which will never be cleared.
ce6fce42 145 */
3998527d 146static void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
1da177e4 147{
77e89afc
TG
148 raw_spinlock_t *lock = &desc->dev->msi_lock;
149 unsigned long flags;
1da177e4 150
38737d82 151 if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
77e89afc 152 return;
f2440d9a 153
77e89afc 154 raw_spin_lock_irqsave(lock, flags);
67961e77
TG
155 desc->msi_mask &= ~mask;
156 desc->msi_mask |= flag;
e39758e0 157 pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos,
67961e77 158 desc->msi_mask);
77e89afc 159 raw_spin_unlock_irqrestore(lock, flags);
12abb8ba
HS
160}
161
162static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
163{
77e89afc 164 __pci_msi_desc_mask_irq(desc, mask, flag);
f2440d9a
MW
165}
166
5eb6d660
CH
167static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
168{
b296abab 169 return desc->mask_base + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
5eb6d660
CH
170}
171
f2440d9a
MW
172/*
173 * This internal function does not flush PCI writes to the device.
174 * All users must ensure that they read from the device before either
175 * assuming that the device state is up to date, or returning out of this
176 * file. This saves a few milliseconds when initialising devices with lots
177 * of MSI-X interrupts.
178 */
3998527d 179static u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
f2440d9a 180{
b296abab 181 void __iomem *desc_addr = pci_msix_desc_addr(desc);
67961e77 182 u32 ctrl = desc->msix_ctrl;
e045fa29 183
b296abab 184 if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
d7cc609f 185 return 0;
38737d82 186
67961e77
TG
187 ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
188 if (ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)
189 ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
d7cc609f 190
67961e77 191 writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
12abb8ba 192
67961e77 193 return ctrl;
12abb8ba
HS
194}
195
196static void msix_mask_irq(struct msi_desc *desc, u32 flag)
197{
67961e77 198 desc->msix_ctrl = __pci_msix_desc_mask_irq(desc, flag);
f2440d9a 199}
24d27553 200
1c9db525 201static void msi_set_mask_bit(struct irq_data *data, u32 flag)
f2440d9a 202{
c391f262 203 struct msi_desc *desc = irq_data_get_msi_desc(data);
24d27553 204
f2440d9a
MW
205 if (desc->msi_attrib.is_msix) {
206 msix_mask_irq(desc, flag);
207 readl(desc->mask_base); /* Flush write to device */
208 } else {
a281b788 209 unsigned offset = data->irq - desc->irq;
1c8d7b0a 210 msi_mask_irq(desc, 1 << offset, flag << offset);
1da177e4 211 }
f2440d9a
MW
212}
213
23ed8d57 214/**
f6b6aefe 215 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
23ed8d57
TG
216 * @data: pointer to irqdata associated to that interrupt
217 */
218void pci_msi_mask_irq(struct irq_data *data)
f2440d9a 219{
1c9db525 220 msi_set_mask_bit(data, 1);
f2440d9a 221}
a4289dc2 222EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
f2440d9a 223
23ed8d57 224/**
f6b6aefe 225 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
23ed8d57
TG
226 * @data: pointer to irqdata associated to that interrupt
227 */
228void pci_msi_unmask_irq(struct irq_data *data)
f2440d9a 229{
1c9db525 230 msi_set_mask_bit(data, 0);
1da177e4 231}
a4289dc2 232EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
1da177e4 233
ac8344c4
D
234void default_restore_msi_irqs(struct pci_dev *dev)
235{
236 struct msi_desc *entry;
237
5004e98a 238 for_each_pci_msi_entry(entry, dev)
ac8344c4 239 default_restore_msi_irq(dev, entry->irq);
ac8344c4
D
240}
241
891d4a48 242void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
1da177e4 243{
e39758e0
JL
244 struct pci_dev *dev = msi_desc_to_pci_dev(entry);
245
246 BUG_ON(dev->current_state != PCI_D0);
30da5524
BH
247
248 if (entry->msi_attrib.is_msix) {
5eb6d660 249 void __iomem *base = pci_msix_desc_addr(entry);
30da5524 250
b296abab 251 if (WARN_ON_ONCE(entry->msi_attrib.is_virtual))
d7cc609f 252 return;
d7cc609f 253
30da5524
BH
254 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
255 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
256 msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
257 } else {
f5322169 258 int pos = dev->msi_cap;
30da5524
BH
259 u16 data;
260
9925ad0c
BH
261 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
262 &msg->address_lo);
30da5524 263 if (entry->msi_attrib.is_64) {
9925ad0c
BH
264 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
265 &msg->address_hi);
2f221349 266 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
30da5524
BH
267 } else {
268 msg->address_hi = 0;
2f221349 269 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
30da5524
BH
270 }
271 msg->data = data;
272 }
273}
274
83a18912 275void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
3145e941 276{
e39758e0
JL
277 struct pci_dev *dev = msi_desc_to_pci_dev(entry);
278
0170591b 279 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
fcd097f3
BH
280 /* Don't touch the hardware now */
281 } else if (entry->msi_attrib.is_msix) {
5eb6d660 282 void __iomem *base = pci_msix_desc_addr(entry);
67961e77 283 bool unmasked = !(entry->msix_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
24d27553 284
b296abab 285 if (entry->msi_attrib.is_virtual)
d7cc609f
LG
286 goto skip;
287
da181dc9
TG
288 /*
289 * The specification mandates that the entry is masked
290 * when the message is modified:
291 *
292 * "If software changes the Address or Data value of an
293 * entry while the entry is unmasked, the result is
294 * undefined."
295 */
296 if (unmasked)
297 __pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT);
298
2c21fd4b
HS
299 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
300 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
301 writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
da181dc9
TG
302
303 if (unmasked)
304 __pci_msix_desc_mask_irq(entry, 0);
b9255a7c
TG
305
306 /* Ensure that the writes are visible in the device */
307 readl(base + PCI_MSIX_ENTRY_DATA);
24d27553 308 } else {
f5322169 309 int pos = dev->msi_cap;
1c8d7b0a
MW
310 u16 msgctl;
311
f84ecd28 312 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
1c8d7b0a
MW
313 msgctl &= ~PCI_MSI_FLAGS_QSIZE;
314 msgctl |= entry->msi_attrib.multiple << 4;
f84ecd28 315 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
0366f8f7 316
9925ad0c
BH
317 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
318 msg->address_lo);
0366f8f7 319 if (entry->msi_attrib.is_64) {
9925ad0c
BH
320 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
321 msg->address_hi);
2f221349
BH
322 pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
323 msg->data);
0366f8f7 324 } else {
2f221349
BH
325 pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
326 msg->data);
0366f8f7 327 }
b9255a7c
TG
328 /* Ensure that the writes are visible in the device */
329 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
1da177e4 330 }
d7cc609f
LG
331
332skip:
392ee1e6 333 entry->msg = *msg;
d7cc609f
LG
334
335 if (entry->write_msi_msg)
336 entry->write_msi_msg(entry, entry->write_msi_msg_data);
337
1da177e4 338}
0366f8f7 339
83a18912 340void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
3145e941 341{
dced35ae 342 struct msi_desc *entry = irq_get_msi_desc(irq);
3145e941 343
83a18912 344 __pci_write_msi_msg(entry, msg);
3145e941 345}
83a18912 346EXPORT_SYMBOL_GPL(pci_write_msi_msg);
3145e941 347
f56e4481
HS
348static void free_msi_irqs(struct pci_dev *dev)
349{
5004e98a 350 struct list_head *msi_list = dev_to_msi_list(&dev->dev);
f56e4481 351 struct msi_desc *entry, *tmp;
1c51b50c
GKH
352 struct attribute **msi_attrs;
353 struct device_attribute *dev_attr;
63a7b17e 354 int i, count = 0;
f56e4481 355
5004e98a 356 for_each_pci_msi_entry(entry, dev)
63a7b17e
JL
357 if (entry->irq)
358 for (i = 0; i < entry->nvec_used; i++)
359 BUG_ON(irq_has_action(entry->irq + i));
f56e4481 360
8e047ada 361 pci_msi_teardown_msi_irqs(dev);
f56e4481 362
5004e98a 363 list_for_each_entry_safe(entry, tmp, msi_list, list) {
f56e4481 364 if (entry->msi_attrib.is_msix) {
5004e98a 365 if (list_is_last(&entry->list, msi_list))
f56e4481
HS
366 iounmap(entry->mask_base);
367 }
424eb391 368
f56e4481 369 list_del(&entry->list);
81efbadd 370 free_msi_entry(entry);
f56e4481 371 }
1c51b50c
GKH
372
373 if (dev->msi_irq_groups) {
374 sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
375 msi_attrs = dev->msi_irq_groups[0]->attrs;
b701c0b1 376 while (msi_attrs[count]) {
1c51b50c
GKH
377 dev_attr = container_of(msi_attrs[count],
378 struct device_attribute, attr);
379 kfree(dev_attr->attr.name);
380 kfree(dev_attr);
381 ++count;
382 }
383 kfree(msi_attrs);
384 kfree(dev->msi_irq_groups[0]);
385 kfree(dev->msi_irq_groups);
386 dev->msi_irq_groups = NULL;
387 }
f56e4481 388}
c54c1879 389
ba698ad4
DM
390static void pci_intx_for_msi(struct pci_dev *dev, int enable)
391{
392 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
393 pci_intx(dev, enable);
394}
395
830dfe88
BH
396static void pci_msi_set_enable(struct pci_dev *dev, int enable)
397{
398 u16 control;
399
400 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
401 control &= ~PCI_MSI_FLAGS_ENABLE;
402 if (enable)
403 control |= PCI_MSI_FLAGS_ENABLE;
404 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
405}
406
8fed4b65 407static void __pci_restore_msi_state(struct pci_dev *dev)
41017f0c 408{
41017f0c 409 u16 control;
392ee1e6 410 struct msi_desc *entry;
41017f0c 411
b1cbf4e4
EB
412 if (!dev->msi_enabled)
413 return;
414
dced35ae 415 entry = irq_get_msi_desc(dev->irq);
41017f0c 416
ba698ad4 417 pci_intx_for_msi(dev, 0);
61b64abd 418 pci_msi_set_enable(dev, 0);
ac8344c4 419 arch_restore_msi_irqs(dev);
392ee1e6 420
f5322169 421 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
31ea5d4d 422 msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
67961e77 423 entry->msi_mask);
abad2ec9 424 control &= ~PCI_MSI_FLAGS_QSIZE;
1c8d7b0a 425 control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
f5322169 426 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
8fed4b65
ME
427}
428
830dfe88
BH
429static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
430{
431 u16 ctrl;
432
433 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
434 ctrl &= ~clear;
435 ctrl |= set;
436 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
437}
438
8fed4b65 439static void __pci_restore_msix_state(struct pci_dev *dev)
41017f0c 440{
41017f0c 441 struct msi_desc *entry;
41017f0c 442
ded86d8d
EB
443 if (!dev->msix_enabled)
444 return;
5004e98a 445 BUG_ON(list_empty(dev_to_msi_list(&dev->dev)));
ded86d8d 446
41017f0c 447 /* route the table */
ba698ad4 448 pci_intx_for_msi(dev, 0);
61b64abd 449 pci_msix_clear_and_set_ctrl(dev, 0,
66f0d0c4 450 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
41017f0c 451
ac8344c4 452 arch_restore_msi_irqs(dev);
5004e98a 453 for_each_pci_msi_entry(entry, dev)
67961e77 454 msix_mask_irq(entry, entry->msix_ctrl);
41017f0c 455
61b64abd 456 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
41017f0c 457}
8fed4b65
ME
458
459void pci_restore_msi_state(struct pci_dev *dev)
460{
461 __pci_restore_msi_state(dev);
462 __pci_restore_msix_state(dev);
463}
94688cf2 464EXPORT_SYMBOL_GPL(pci_restore_msi_state);
41017f0c 465
1c51b50c 466static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
da8d1c8b
NH
467 char *buf)
468{
1c51b50c
GKH
469 struct msi_desc *entry;
470 unsigned long irq;
471 int retval;
da8d1c8b 472
1c51b50c
GKH
473 retval = kstrtoul(attr->attr.name, 10, &irq);
474 if (retval)
475 return retval;
da8d1c8b 476
e11ece5a 477 entry = irq_get_msi_desc(irq);
f8cf6e51
KW
478 if (!entry)
479 return -ENODEV;
e11ece5a 480
f8cf6e51
KW
481 return sysfs_emit(buf, "%s\n",
482 entry->msi_attrib.is_msix ? "msix" : "msi");
da8d1c8b
NH
483}
484
da8d1c8b
NH
485static int populate_msi_sysfs(struct pci_dev *pdev)
486{
1c51b50c
GKH
487 struct attribute **msi_attrs;
488 struct attribute *msi_attr;
489 struct device_attribute *msi_dev_attr;
490 struct attribute_group *msi_irq_group;
491 const struct attribute_group **msi_irq_groups;
da8d1c8b 492 struct msi_desc *entry;
1c51b50c
GKH
493 int ret = -ENOMEM;
494 int num_msi = 0;
da8d1c8b 495 int count = 0;
a8676066 496 int i;
da8d1c8b 497
1c51b50c 498 /* Determine how many msi entries we have */
5004e98a 499 for_each_pci_msi_entry(entry, pdev)
a8676066 500 num_msi += entry->nvec_used;
1c51b50c
GKH
501 if (!num_msi)
502 return 0;
da8d1c8b 503
1c51b50c 504 /* Dynamically create the MSI attributes for the PCI device */
6396bb22 505 msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
1c51b50c
GKH
506 if (!msi_attrs)
507 return -ENOMEM;
5004e98a 508 for_each_pci_msi_entry(entry, pdev) {
a8676066
RB
509 for (i = 0; i < entry->nvec_used; i++) {
510 msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
511 if (!msi_dev_attr)
512 goto error_attrs;
513 msi_attrs[count] = &msi_dev_attr->attr;
514
515 sysfs_attr_init(&msi_dev_attr->attr);
516 msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
517 entry->irq + i);
518 if (!msi_dev_attr->attr.name)
519 goto error_attrs;
520 msi_dev_attr->attr.mode = S_IRUGO;
521 msi_dev_attr->show = msi_mode_show;
522 ++count;
523 }
da8d1c8b
NH
524 }
525
1c51b50c
GKH
526 msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
527 if (!msi_irq_group)
528 goto error_attrs;
529 msi_irq_group->name = "msi_irqs";
530 msi_irq_group->attrs = msi_attrs;
531
6396bb22 532 msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
1c51b50c
GKH
533 if (!msi_irq_groups)
534 goto error_irq_group;
535 msi_irq_groups[0] = msi_irq_group;
536
537 ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
538 if (ret)
539 goto error_irq_groups;
540 pdev->msi_irq_groups = msi_irq_groups;
541
da8d1c8b
NH
542 return 0;
543
1c51b50c
GKH
544error_irq_groups:
545 kfree(msi_irq_groups);
546error_irq_group:
547 kfree(msi_irq_group);
548error_attrs:
549 count = 0;
550 msi_attr = msi_attrs[count];
551 while (msi_attr) {
552 msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
553 kfree(msi_attr->name);
554 kfree(msi_dev_attr);
555 ++count;
556 msi_attr = msi_attrs[count];
da8d1c8b 557 }
29237756 558 kfree(msi_attrs);
da8d1c8b
NH
559 return ret;
560}
561
e75eafb9 562static struct msi_desc *
c66d4bd1 563msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
d873b4d4 564{
bec04037 565 struct irq_affinity_desc *masks = NULL;
d873b4d4 566 struct msi_desc *entry;
e75eafb9
TG
567 u16 control;
568
8e1101d2 569 if (affd)
61e1c590 570 masks = irq_create_affinity_masks(nvec, affd);
8e1101d2 571
d873b4d4 572 /* MSI Entry Initialization */
e75eafb9 573 entry = alloc_msi_entry(&dev->dev, nvec, masks);
d873b4d4 574 if (!entry)
e75eafb9 575 goto out;
d873b4d4
YW
576
577 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
578
579 entry->msi_attrib.is_msix = 0;
580 entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
d7cc609f 581 entry->msi_attrib.is_virtual = 0;
d873b4d4
YW
582 entry->msi_attrib.entry_nr = 0;
583 entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
584 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
d873b4d4 585 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
63a7b17e 586 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
d873b4d4
YW
587
588 if (control & PCI_MSI_FLAGS_64BIT)
589 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
590 else
591 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
592
593 /* Save the initial mask status */
594 if (entry->msi_attrib.maskbit)
67961e77 595 pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
d873b4d4 596
e75eafb9
TG
597out:
598 kfree(masks);
d873b4d4
YW
599 return entry;
600}
601
f144d149
BH
602static int msi_verify_entries(struct pci_dev *dev)
603{
604 struct msi_desc *entry;
605
a6e8b946
TG
606 if (!dev->no_64bit_msi)
607 return 0;
608
5004e98a 609 for_each_pci_msi_entry(entry, dev) {
a6e8b946 610 if (entry->msg.address_hi) {
2053230a
VS
611 pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
612 entry->msg.address_hi, entry->msg.address_lo);
613 return -EIO;
614 }
f144d149
BH
615 }
616 return 0;
617}
618
1da177e4
LT
619/**
620 * msi_capability_init - configure device's MSI capability structure
621 * @dev: pointer to the pci_dev data structure of MSI device function
1c8d7b0a 622 * @nvec: number of interrupts to allocate
f6b6aefe 623 * @affd: description of automatic IRQ affinity assignments (may be %NULL)
1da177e4 624 *
1c8d7b0a
MW
625 * Setup the MSI capability structure of the device with the requested
626 * number of interrupts. A return value of zero indicates the successful
f6b6aefe 627 * setup of an entry with the new MSI IRQ. A negative return value indicates
1c8d7b0a
MW
628 * an error, and a positive return value indicates the number of interrupts
629 * which could have been allocated.
630 */
61e1c590 631static int msi_capability_init(struct pci_dev *dev, int nvec,
c66d4bd1 632 struct irq_affinity *affd)
1da177e4
LT
633{
634 struct msi_desc *entry;
f465136d 635 int ret;
f2440d9a 636 unsigned mask;
1da177e4 637
61b64abd 638 pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
110828c9 639
61e1c590 640 entry = msi_setup_entry(dev, nvec, affd);
f7feaca7
EB
641 if (!entry)
642 return -ENOMEM;
1ce03373 643
f6b6aefe 644 /* All MSIs are unmasked by default; mask them all */
31ea5d4d 645 mask = msi_mask(entry->msi_attrib.multi_cap);
f2440d9a
MW
646 msi_mask_irq(entry, mask, mask);
647
5004e98a 648 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
9c831334 649
1da177e4 650 /* Configure MSI capability structure */
8e047ada 651 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
8eb5ce3f
TG
652 if (ret)
653 goto err;
f7feaca7 654
f144d149 655 ret = msi_verify_entries(dev);
8eb5ce3f
TG
656 if (ret)
657 goto err;
f144d149 658
da8d1c8b 659 ret = populate_msi_sysfs(dev);
8eb5ce3f
TG
660 if (ret)
661 goto err;
da8d1c8b 662
f6b6aefe 663 /* Set MSI enabled bits */
ba698ad4 664 pci_intx_for_msi(dev, 0);
61b64abd 665 pci_msi_set_enable(dev, 1);
b1cbf4e4 666 dev->msi_enabled = 1;
1da177e4 667
5f226991 668 pcibios_free_irq(dev);
7fe3730d 669 dev->irq = entry->irq;
1da177e4 670 return 0;
8eb5ce3f
TG
671
672err:
673 msi_mask_irq(entry, mask, 0);
674 free_msi_irqs(dev);
675 return ret;
1da177e4
LT
676}
677
520fe9dc 678static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
5a05a9d8 679{
4302e0fb 680 resource_size_t phys_addr;
5a05a9d8 681 u32 table_offset;
6a878e50 682 unsigned long flags;
5a05a9d8
HS
683 u8 bir;
684
909094c6
BH
685 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
686 &table_offset);
4d18760c 687 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
6a878e50
YW
688 flags = pci_resource_flags(dev, bir);
689 if (!flags || (flags & IORESOURCE_UNSET))
690 return NULL;
691
4d18760c 692 table_offset &= PCI_MSIX_TABLE_OFFSET;
5a05a9d8
HS
693 phys_addr = pci_resource_start(dev, bir) + table_offset;
694
4bdc0d67 695 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
5a05a9d8
HS
696}
697
520fe9dc 698static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
e75eafb9 699 struct msix_entry *entries, int nvec,
c66d4bd1 700 struct irq_affinity *affd)
d9d7070e 701{
bec04037 702 struct irq_affinity_desc *curmsk, *masks = NULL;
d9d7070e 703 struct msi_desc *entry;
7d5ec3d3 704 void __iomem *addr;
e75eafb9 705 int ret, i;
d7cc609f 706 int vec_count = pci_msix_vec_count(dev);
4ef33685 707
8e1101d2 708 if (affd)
61e1c590 709 masks = irq_create_affinity_masks(nvec, affd);
4ef33685 710
e75eafb9
TG
711 for (i = 0, curmsk = masks; i < nvec; i++) {
712 entry = alloc_msi_entry(&dev->dev, 1, curmsk);
d9d7070e
HS
713 if (!entry) {
714 if (!i)
715 iounmap(base);
716 else
717 free_msi_irqs(dev);
718 /* No enough memory. Don't try again */
e75eafb9
TG
719 ret = -ENOMEM;
720 goto out;
d9d7070e
HS
721 }
722
723 entry->msi_attrib.is_msix = 1;
724 entry->msi_attrib.is_64 = 1;
7d5ec3d3 725
3ac020e0
CH
726 if (entries)
727 entry->msi_attrib.entry_nr = entries[i].entry;
728 else
729 entry->msi_attrib.entry_nr = i;
d7cc609f
LG
730
731 entry->msi_attrib.is_virtual =
732 entry->msi_attrib.entry_nr >= vec_count;
733
d9d7070e 734 entry->msi_attrib.default_irq = dev->irq;
d9d7070e
HS
735 entry->mask_base = base;
736
b296abab
TG
737 if (!entry->msi_attrib.is_virtual) {
738 addr = pci_msix_desc_addr(entry);
67961e77 739 entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
b296abab 740 }
7d5ec3d3 741
5004e98a 742 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
e75eafb9
TG
743 if (masks)
744 curmsk++;
d9d7070e 745 }
e75eafb9
TG
746 ret = 0;
747out:
748 kfree(masks);
3adfb572 749 return ret;
d9d7070e
HS
750}
751
7d5ec3d3 752static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
75cb3426
HS
753{
754 struct msi_desc *entry;
75cb3426 755
5004e98a 756 for_each_pci_msi_entry(entry, dev) {
7d5ec3d3
TG
757 if (entries) {
758 entries->vector = entry->irq;
759 entries++;
760 }
761 }
762}
d7cc609f 763
7d5ec3d3
TG
764static void msix_mask_all(void __iomem *base, int tsize)
765{
766 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
767 int i;
d7cc609f 768
7d5ec3d3
TG
769 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
770 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
75cb3426
HS
771}
772
1da177e4
LT
773/**
774 * msix_capability_init - configure device's MSI-X capability
775 * @dev: pointer to the pci_dev data structure of MSI-X device function
8f7020d3
RD
776 * @entries: pointer to an array of struct msix_entry entries
777 * @nvec: number of @entries
f6b6aefe 778 * @affd: Optional pointer to enable automatic affinity assignment
1da177e4 779 *
eaae4b3a 780 * Setup the MSI-X capability structure of device function with a
f6b6aefe
BH
781 * single MSI-X IRQ. A return of zero indicates the successful setup of
782 * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
1da177e4 783 **/
e75eafb9 784static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
c66d4bd1 785 int nvec, struct irq_affinity *affd)
1da177e4 786{
1da177e4 787 void __iomem *base;
7d5ec3d3
TG
788 int ret, tsize;
789 u16 control;
1da177e4 790
43855395
TG
791 /*
792 * Some devices require MSI-X to be enabled before the MSI-X
793 * registers can be accessed. Mask all the vectors to prevent
794 * interrupts coming in before they're fully set up.
795 */
796 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
797 PCI_MSIX_FLAGS_ENABLE);
f598282f 798
66f0d0c4 799 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
1da177e4 800 /* Request & Map MSI-X table region */
7d5ec3d3
TG
801 tsize = msix_table_size(control);
802 base = msix_map_region(dev, tsize);
43855395
TG
803 if (!base) {
804 ret = -ENOMEM;
805 goto out_disable;
806 }
1da177e4 807
7d5ec3d3
TG
808 /* Ensure that all table entries are masked. */
809 msix_mask_all(base, tsize);
810
61e1c590 811 ret = msix_setup_entries(dev, base, entries, nvec, affd);
d9d7070e 812 if (ret)
43855395 813 goto out_disable;
9c831334 814
8e047ada 815 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
583871d4 816 if (ret)
2adc7907 817 goto out_avail;
9c831334 818
f144d149
BH
819 /* Check if all MSI entries honor device restrictions */
820 ret = msi_verify_entries(dev);
821 if (ret)
822 goto out_free;
823
7d5ec3d3 824 msix_update_entries(dev, entries);
f598282f 825
da8d1c8b 826 ret = populate_msi_sysfs(dev);
2adc7907
AG
827 if (ret)
828 goto out_free;
da8d1c8b 829
f598282f 830 /* Set MSI-X enabled bits and unmask the function */
ba698ad4 831 pci_intx_for_msi(dev, 0);
b1cbf4e4 832 dev->msix_enabled = 1;
61b64abd 833 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
8d181018 834
5f226991 835 pcibios_free_irq(dev);
1da177e4 836 return 0;
583871d4 837
2adc7907 838out_avail:
583871d4
HS
839 if (ret < 0) {
840 /*
f6b6aefe 841 * If we had some success, report the number of IRQs
583871d4
HS
842 * we succeeded in setting up.
843 */
d9d7070e 844 struct msi_desc *entry;
583871d4
HS
845 int avail = 0;
846
5004e98a 847 for_each_pci_msi_entry(entry, dev) {
583871d4
HS
848 if (entry->irq != 0)
849 avail++;
850 }
851 if (avail != 0)
852 ret = avail;
853 }
854
2adc7907 855out_free:
583871d4
HS
856 free_msi_irqs(dev);
857
43855395
TG
858out_disable:
859 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
860
583871d4 861 return ret;
1da177e4
LT
862}
863
24334a12 864/**
a06cd74c 865 * pci_msi_supported - check whether MSI may be enabled on a device
24334a12 866 * @dev: pointer to the pci_dev data structure of MSI device function
f6b6aefe 867 * @nvec: how many MSIs have been requested?
24334a12 868 *
f7625980 869 * Look at global flags, the device itself, and its parent buses
17bbc12a 870 * to determine if MSI/-X are supported for the device. If MSI/-X is
a06cd74c 871 * supported return 1, else return 0.
24334a12 872 **/
a06cd74c 873static int pci_msi_supported(struct pci_dev *dev, int nvec)
24334a12
BG
874{
875 struct pci_bus *bus;
876
0306ebfa 877 /* MSI must be globally enabled and supported by the device */
27e20603 878 if (!pci_msi_enable)
a06cd74c 879 return 0;
27e20603 880
901c4ddb 881 if (!dev || dev->no_msi)
a06cd74c 882 return 0;
24334a12 883
314e77b3
ME
884 /*
885 * You can't ask to have 0 or less MSIs configured.
886 * a) it's stupid ..
887 * b) the list manipulation code assumes nvec >= 1.
888 */
889 if (nvec < 1)
a06cd74c 890 return 0;
314e77b3 891
500559a9
HS
892 /*
893 * Any bridge which does NOT route MSI transactions from its
894 * secondary bus to its primary bus must set NO_MSI flag on
0306ebfa 895 * the secondary pci_bus.
61af6929
MZ
896 *
897 * The NO_MSI flag can either be set directly by:
898 * - arch-specific PCI host bus controller drivers (deprecated)
899 * - quirks for specific PCI bridges
900 *
901 * or indirectly by platform-specific PCI host bridge drivers by
902 * advertising the 'msi_domain' property, which results in
903 * the NO_MSI flag when no MSI domain is found for this bridge
904 * at probe time.
0306ebfa 905 */
24334a12
BG
906 for (bus = dev->bus; bus; bus = bus->parent)
907 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
a06cd74c 908 return 0;
24334a12 909
a06cd74c 910 return 1;
24334a12
BG
911}
912
d1ac1d26
AG
913/**
914 * pci_msi_vec_count - Return the number of MSI vectors a device can send
915 * @dev: device to report about
916 *
917 * This function returns the number of MSI vectors a device requested via
918 * Multiple Message Capable register. It returns a negative errno if the
919 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
920 * and returns a power of two, up to a maximum of 2^5 (32), according to the
921 * MSI specification.
922 **/
923int pci_msi_vec_count(struct pci_dev *dev)
924{
925 int ret;
926 u16 msgctl;
927
928 if (!dev->msi_cap)
929 return -EINVAL;
930
931 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
932 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
933
934 return ret;
935}
936EXPORT_SYMBOL(pci_msi_vec_count);
937
688769f6 938static void pci_msi_shutdown(struct pci_dev *dev)
1da177e4 939{
f2440d9a
MW
940 struct msi_desc *desc;
941 u32 mask;
1da177e4 942
128bc5fc 943 if (!pci_msi_enable || !dev || !dev->msi_enabled)
ded86d8d
EB
944 return;
945
5004e98a 946 BUG_ON(list_empty(dev_to_msi_list(&dev->dev)));
4a7cc831 947 desc = first_pci_msi_entry(dev);
110828c9 948
61b64abd 949 pci_msi_set_enable(dev, 0);
ba698ad4 950 pci_intx_for_msi(dev, 1);
b1cbf4e4 951 dev->msi_enabled = 0;
7bd007e4 952
12abb8ba 953 /* Return the device with MSI unmasked as initial states */
31ea5d4d 954 mask = msi_mask(desc->msi_attrib.multi_cap);
d28d4ad2 955 msi_mask_irq(desc, mask, 0);
e387b9ee 956
f6b6aefe 957 /* Restore dev->irq to its default pin-assertion IRQ */
f2440d9a 958 dev->irq = desc->msi_attrib.default_irq;
5f226991 959 pcibios_alloc_irq(dev);
d52877c7 960}
24d27553 961
500559a9 962void pci_disable_msi(struct pci_dev *dev)
d52877c7 963{
d52877c7
YL
964 if (!pci_msi_enable || !dev || !dev->msi_enabled)
965 return;
966
967 pci_msi_shutdown(dev);
f56e4481 968 free_msi_irqs(dev);
1da177e4 969}
4cc086fa 970EXPORT_SYMBOL(pci_disable_msi);
1da177e4 971
a52e2e35 972/**
ff1aa430 973 * pci_msix_vec_count - return the number of device's MSI-X table entries
a52e2e35 974 * @dev: pointer to the pci_dev data structure of MSI-X device function
ff1aa430
AG
975 * This function returns the number of device's MSI-X table entries and
976 * therefore the number of MSI-X vectors device is capable of sending.
977 * It returns a negative errno if the device is not capable of sending MSI-X
978 * interrupts.
979 **/
980int pci_msix_vec_count(struct pci_dev *dev)
a52e2e35 981{
a52e2e35
RW
982 u16 control;
983
520fe9dc 984 if (!dev->msix_cap)
ff1aa430 985 return -EINVAL;
a52e2e35 986
f84ecd28 987 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
527eee29 988 return msix_table_size(control);
a52e2e35 989}
ff1aa430 990EXPORT_SYMBOL(pci_msix_vec_count);
a52e2e35 991
e75eafb9 992static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
d7cc609f 993 int nvec, struct irq_affinity *affd, int flags)
1da177e4 994{
5ec09405 995 int nr_entries;
ded86d8d 996 int i, j;
1da177e4 997
901c4ddb 998 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
a06cd74c 999 return -EINVAL;
c9953a73 1000
ff1aa430
AG
1001 nr_entries = pci_msix_vec_count(dev);
1002 if (nr_entries < 0)
1003 return nr_entries;
d7cc609f 1004 if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
57fbf52c 1005 return nr_entries;
1da177e4 1006
3ac020e0
CH
1007 if (entries) {
1008 /* Check for any invalid entries */
1009 for (i = 0; i < nvec; i++) {
1010 if (entries[i].entry >= nr_entries)
1011 return -EINVAL; /* invalid entry */
1012 for (j = i + 1; j < nvec; j++) {
1013 if (entries[i].entry == entries[j].entry)
1014 return -EINVAL; /* duplicate entry */
1015 }
1da177e4
LT
1016 }
1017 }
7bd007e4 1018
f6b6aefe 1019 /* Check whether driver already requested for MSI IRQ */
500559a9 1020 if (dev->msi_enabled) {
7506dc79 1021 pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
1da177e4
LT
1022 return -EINVAL;
1023 }
61e1c590 1024 return msix_capability_init(dev, entries, nvec, affd);
e75eafb9
TG
1025}
1026
688769f6 1027static void pci_msix_shutdown(struct pci_dev *dev)
fc4afc7b 1028{
12abb8ba
HS
1029 struct msi_desc *entry;
1030
128bc5fc 1031 if (!pci_msi_enable || !dev || !dev->msix_enabled)
ded86d8d
EB
1032 return;
1033
0170591b
KB
1034 if (pci_dev_is_disconnected(dev)) {
1035 dev->msix_enabled = 0;
1036 return;
1037 }
1038
12abb8ba 1039 /* Return the device with MSI-X masked as initial states */
689e6b53 1040 for_each_pci_msi_entry(entry, dev)
23ed8d57 1041 __pci_msix_desc_mask_irq(entry, 1);
12abb8ba 1042
61b64abd 1043 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
ba698ad4 1044 pci_intx_for_msi(dev, 1);
b1cbf4e4 1045 dev->msix_enabled = 0;
5f226991 1046 pcibios_alloc_irq(dev);
d52877c7 1047}
c901851f 1048
500559a9 1049void pci_disable_msix(struct pci_dev *dev)
d52877c7
YL
1050{
1051 if (!pci_msi_enable || !dev || !dev->msix_enabled)
1052 return;
1053
1054 pci_msix_shutdown(dev);
f56e4481 1055 free_msi_irqs(dev);
1da177e4 1056}
4cc086fa 1057EXPORT_SYMBOL(pci_disable_msix);
1da177e4 1058
309e57df
MW
1059void pci_no_msi(void)
1060{
1061 pci_msi_enable = 0;
1062}
c9953a73 1063
07ae95f9
AP
1064/**
1065 * pci_msi_enabled - is MSI enabled?
1066 *
1067 * Returns true if MSI has not been disabled by the command-line option
1068 * pci=nomsi.
1069 **/
1070int pci_msi_enabled(void)
d389fec6 1071{
07ae95f9 1072 return pci_msi_enable;
d389fec6 1073}
07ae95f9 1074EXPORT_SYMBOL(pci_msi_enabled);
d389fec6 1075
4ef33685 1076static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
c66d4bd1 1077 struct irq_affinity *affd)
302a2523 1078{
034cd97e 1079 int nvec;
302a2523
AG
1080 int rc;
1081
901c4ddb 1082 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
a06cd74c 1083 return -EINVAL;
034cd97e 1084
f6b6aefe 1085 /* Check whether driver already requested MSI-X IRQs */
034cd97e 1086 if (dev->msix_enabled) {
7506dc79 1087 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
034cd97e
AG
1088 return -EINVAL;
1089 }
1090
302a2523
AG
1091 if (maxvec < minvec)
1092 return -ERANGE;
1093
4c1ef72e
TZ
1094 if (WARN_ON_ONCE(dev->msi_enabled))
1095 return -EINVAL;
1096
034cd97e
AG
1097 nvec = pci_msi_vec_count(dev);
1098 if (nvec < 0)
1099 return nvec;
4ef33685 1100 if (nvec < minvec)
948b7620 1101 return -ENOSPC;
4ef33685
CH
1102
1103 if (nvec > maxvec)
034cd97e
AG
1104 nvec = maxvec;
1105
4ef33685 1106 for (;;) {
61e1c590 1107 if (affd) {
6f9a22bc 1108 nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
4ef33685
CH
1109 if (nvec < minvec)
1110 return -ENOSPC;
1111 }
1112
61e1c590 1113 rc = msi_capability_init(dev, nvec, affd);
4ef33685
CH
1114 if (rc == 0)
1115 return nvec;
1116
4ef33685 1117 if (rc < 0)
302a2523 1118 return rc;
4ef33685
CH
1119 if (rc < minvec)
1120 return -ENOSPC;
1121
1122 nvec = rc;
1123 }
1124}
1125
4fe03955
CH
1126/* deprecated, don't use */
1127int pci_enable_msi(struct pci_dev *dev)
4ef33685 1128{
4fe03955
CH
1129 int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
1130 if (rc < 0)
1131 return rc;
1132 return 0;
4ef33685 1133}
4fe03955 1134EXPORT_SYMBOL(pci_enable_msi);
4ef33685
CH
1135
1136static int __pci_enable_msix_range(struct pci_dev *dev,
61e1c590 1137 struct msix_entry *entries, int minvec,
d7cc609f
LG
1138 int maxvec, struct irq_affinity *affd,
1139 int flags)
4ef33685 1140{
e75eafb9 1141 int rc, nvec = maxvec;
4ef33685
CH
1142
1143 if (maxvec < minvec)
1144 return -ERANGE;
1145
4c1ef72e
TZ
1146 if (WARN_ON_ONCE(dev->msix_enabled))
1147 return -EINVAL;
1148
4ef33685 1149 for (;;) {
61e1c590 1150 if (affd) {
6f9a22bc 1151 nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
4ef33685 1152 if (nvec < minvec)
302a2523 1153 return -ENOSPC;
302a2523 1154 }
302a2523 1155
d7cc609f 1156 rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
4ef33685
CH
1157 if (rc == 0)
1158 return nvec;
1159
4ef33685
CH
1160 if (rc < 0)
1161 return rc;
1162 if (rc < minvec)
1163 return -ENOSPC;
1164
1165 nvec = rc;
1166 }
302a2523 1167}
302a2523
AG
1168
1169/**
1170 * pci_enable_msix_range - configure device's MSI-X capability structure
1171 * @dev: pointer to the pci_dev data structure of MSI-X device function
1172 * @entries: pointer to an array of MSI-X entries
f6b6aefe
BH
1173 * @minvec: minimum number of MSI-X IRQs requested
1174 * @maxvec: maximum number of MSI-X IRQs requested
302a2523
AG
1175 *
1176 * Setup the MSI-X capability structure of device function with a maximum
1177 * possible number of interrupts in the range between @minvec and @maxvec
1178 * upon its software driver call to request for MSI-X mode enabled on its
1179 * hardware device function. It returns a negative errno if an error occurs.
1180 * If it succeeds, it returns the actual number of interrupts allocated and
1181 * indicates the successful configuration of MSI-X capability structure
1182 * with new allocated MSI-X interrupts.
1183 **/
1184int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
4ef33685 1185 int minvec, int maxvec)
302a2523 1186{
d7cc609f 1187 return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
302a2523
AG
1188}
1189EXPORT_SYMBOL(pci_enable_msix_range);
3878eaef 1190
aff17164 1191/**
402723ad 1192 * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
aff17164
CH
1193 * @dev: PCI device to operate on
1194 * @min_vecs: minimum number of vectors required (must be >= 1)
1195 * @max_vecs: maximum (desired) number of vectors
1196 * @flags: flags or quirks for the allocation
402723ad 1197 * @affd: optional description of the affinity requirements
aff17164
CH
1198 *
1199 * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
1200 * vectors if available, and fall back to a single legacy vector
1201 * if neither is available. Return the number of vectors allocated,
1202 * (which might be smaller than @max_vecs) if successful, or a negative
1203 * error code on error. If less than @min_vecs interrupt vectors are
1204 * available for @dev the function will fail with -ENOSPC.
1205 *
1206 * To get the Linux IRQ number used for a vector that can be passed to
1207 * request_irq() use the pci_irq_vector() helper.
1208 */
402723ad
CH
1209int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
1210 unsigned int max_vecs, unsigned int flags,
c66d4bd1 1211 struct irq_affinity *affd)
aff17164 1212{
c66d4bd1 1213 struct irq_affinity msi_default_affd = {0};
30ff3e8c 1214 int nvecs = -ENOSPC;
aff17164 1215
402723ad
CH
1216 if (flags & PCI_IRQ_AFFINITY) {
1217 if (!affd)
1218 affd = &msi_default_affd;
1219 } else {
1220 if (WARN_ON(affd))
1221 affd = NULL;
1222 }
61e1c590 1223
4fe0d154 1224 if (flags & PCI_IRQ_MSIX) {
30ff3e8c
PS
1225 nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
1226 affd, flags);
1227 if (nvecs > 0)
1228 return nvecs;
aff17164
CH
1229 }
1230
4fe0d154 1231 if (flags & PCI_IRQ_MSI) {
30ff3e8c
PS
1232 nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
1233 if (nvecs > 0)
1234 return nvecs;
aff17164
CH
1235 }
1236
f6b6aefe 1237 /* use legacy IRQ if allowed */
862290f9
CH
1238 if (flags & PCI_IRQ_LEGACY) {
1239 if (min_vecs == 1 && dev->irq) {
c66d4bd1
ML
1240 /*
1241 * Invoke the affinity spreading logic to ensure that
1242 * the device driver can adjust queue configuration
1243 * for the single interrupt case.
1244 */
1245 if (affd)
1246 irq_create_affinity_masks(1, affd);
862290f9
CH
1247 pci_intx(dev, 1);
1248 return 1;
1249 }
5d0bdf28
CH
1250 }
1251
30ff3e8c 1252 return nvecs;
aff17164 1253}
402723ad 1254EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
aff17164
CH
1255
1256/**
1257 * pci_free_irq_vectors - free previously allocated IRQs for a device
1258 * @dev: PCI device to operate on
1259 *
1260 * Undoes the allocations and enabling in pci_alloc_irq_vectors().
1261 */
1262void pci_free_irq_vectors(struct pci_dev *dev)
1263{
1264 pci_disable_msix(dev);
1265 pci_disable_msi(dev);
1266}
1267EXPORT_SYMBOL(pci_free_irq_vectors);
1268
1269/**
1270 * pci_irq_vector - return Linux IRQ number of a device vector
1271 * @dev: PCI device to operate on
1272 * @nr: device-relative interrupt vector index (0-based).
1273 */
1274int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1275{
1276 if (dev->msix_enabled) {
1277 struct msi_desc *entry;
1278 int i = 0;
1279
1280 for_each_pci_msi_entry(entry, dev) {
1281 if (i == nr)
1282 return entry->irq;
1283 i++;
1284 }
1285 WARN_ON_ONCE(1);
1286 return -EINVAL;
1287 }
1288
1289 if (dev->msi_enabled) {
1290 struct msi_desc *entry = first_pci_msi_entry(dev);
1291
1292 if (WARN_ON_ONCE(nr >= entry->nvec_used))
1293 return -EINVAL;
1294 } else {
1295 if (WARN_ON_ONCE(nr > 0))
1296 return -EINVAL;
1297 }
1298
1299 return dev->irq + nr;
1300}
1301EXPORT_SYMBOL(pci_irq_vector);
1302
ee8d41e5 1303/**
f6b6aefe 1304 * pci_irq_get_affinity - return the affinity of a particular MSI vector
ee8d41e5
TG
1305 * @dev: PCI device to operate on
1306 * @nr: device-relative interrupt vector index (0-based).
1307 */
1308const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1309{
1310 if (dev->msix_enabled) {
1311 struct msi_desc *entry;
1312 int i = 0;
1313
1314 for_each_pci_msi_entry(entry, dev) {
1315 if (i == nr)
bec04037 1316 return &entry->affinity->mask;
ee8d41e5
TG
1317 i++;
1318 }
1319 WARN_ON_ONCE(1);
1320 return NULL;
1321 } else if (dev->msi_enabled) {
1322 struct msi_desc *entry = first_pci_msi_entry(dev);
1323
d1d111e0
JB
1324 if (WARN_ON_ONCE(!entry || !entry->affinity ||
1325 nr >= entry->nvec_used))
ee8d41e5
TG
1326 return NULL;
1327
bec04037 1328 return &entry->affinity[nr].mask;
ee8d41e5
TG
1329 } else {
1330 return cpu_possible_mask;
1331 }
1332}
1333EXPORT_SYMBOL(pci_irq_get_affinity);
1334
25a98bd4
JL
1335struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
1336{
1337 return to_pci_dev(desc->dev);
1338}
a4289dc2 1339EXPORT_SYMBOL(msi_desc_to_pci_dev);
25a98bd4 1340
c179c9b9
JL
1341void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
1342{
1343 struct pci_dev *dev = msi_desc_to_pci_dev(desc);
1344
1345 return dev->bus->sysdata;
1346}
1347EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata);
1348
3878eaef
JL
1349#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
1350/**
1351 * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
1352 * @irq_data: Pointer to interrupt data of the MSI interrupt
1353 * @msg: Pointer to the message
1354 */
1355void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
1356{
507a883e 1357 struct msi_desc *desc = irq_data_get_msi_desc(irq_data);
3878eaef
JL
1358
1359 /*
1360 * For MSI-X desc->irq is always equal to irq_data->irq. For
1361 * MSI only the first interrupt of MULTI MSI passes the test.
1362 */
1363 if (desc->irq == irq_data->irq)
1364 __pci_write_msi_msg(desc, msg);
1365}
1366
1367/**
1368 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
f6b6aefe 1369 * @desc: Pointer to the MSI descriptor
3878eaef
JL
1370 *
1371 * The ID number is only used within the irqdomain.
1372 */
9006c133 1373static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
3878eaef 1374{
dfb9eb7c
TG
1375 struct pci_dev *dev = msi_desc_to_pci_dev(desc);
1376
3878eaef 1377 return (irq_hw_number_t)desc->msi_attrib.entry_nr |
4e544bac 1378 pci_dev_id(dev) << 11 |
3878eaef
JL
1379 (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
1380}
1381
1382static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
1383{
1384 return !desc->msi_attrib.is_msix && desc->nvec_used > 1;
1385}
1386
1387/**
f6b6aefe
BH
1388 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities
1389 * for @dev
3878eaef
JL
1390 * @domain: The interrupt domain to check
1391 * @info: The domain info for verification
1392 * @dev: The device to check
1393 *
1394 * Returns:
1395 * 0 if the functionality is supported
1396 * 1 if Multi MSI is requested, but the domain does not support it
1397 * -ENOTSUPP otherwise
1398 */
1399int pci_msi_domain_check_cap(struct irq_domain *domain,
1400 struct msi_domain_info *info, struct device *dev)
1401{
1402 struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
1403
4fe03955 1404 /* Special handling to support __pci_enable_msi_range() */
3878eaef
JL
1405 if (pci_msi_desc_is_multi_msi(desc) &&
1406 !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
1407 return 1;
1408 else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX))
1409 return -ENOTSUPP;
1410
1411 return 0;
1412}
1413
1414static int pci_msi_domain_handle_error(struct irq_domain *domain,
1415 struct msi_desc *desc, int error)
1416{
4fe03955 1417 /* Special handling to support __pci_enable_msi_range() */
3878eaef
JL
1418 if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC)
1419 return 1;
1420
1421 return error;
1422}
1423
3878eaef
JL
1424static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
1425 struct msi_desc *desc)
1426{
1427 arg->desc = desc;
dfb9eb7c 1428 arg->hwirq = pci_msi_domain_calc_hwirq(desc);
3878eaef 1429}
3878eaef
JL
1430
1431static struct msi_domain_ops pci_msi_domain_ops_default = {
1432 .set_desc = pci_msi_domain_set_desc,
1433 .msi_check = pci_msi_domain_check_cap,
1434 .handle_error = pci_msi_domain_handle_error,
1435};
1436
1437static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
1438{
1439 struct msi_domain_ops *ops = info->ops;
1440
1441 if (ops == NULL) {
1442 info->ops = &pci_msi_domain_ops_default;
1443 } else {
1444 if (ops->set_desc == NULL)
1445 ops->set_desc = pci_msi_domain_set_desc;
1446 if (ops->msi_check == NULL)
1447 ops->msi_check = pci_msi_domain_check_cap;
1448 if (ops->handle_error == NULL)
1449 ops->handle_error = pci_msi_domain_handle_error;
1450 }
1451}
1452
1453static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
1454{
1455 struct irq_chip *chip = info->chip;
1456
1457 BUG_ON(!chip);
1458 if (!chip->irq_write_msi_msg)
1459 chip->irq_write_msi_msg = pci_msi_domain_write_msg;
0701c53e
MZ
1460 if (!chip->irq_mask)
1461 chip->irq_mask = pci_msi_mask_irq;
1462 if (!chip->irq_unmask)
1463 chip->irq_unmask = pci_msi_unmask_irq;
3878eaef
JL
1464}
1465
1466/**
be5436c8
MZ
1467 * pci_msi_create_irq_domain - Create a MSI interrupt domain
1468 * @fwnode: Optional fwnode of the interrupt controller
3878eaef
JL
1469 * @info: MSI domain info
1470 * @parent: Parent irq domain
1471 *
1472 * Updates the domain and chip ops and creates a MSI interrupt domain.
1473 *
1474 * Returns:
1475 * A domain pointer or NULL in case of failure.
1476 */
be5436c8 1477struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
3878eaef
JL
1478 struct msi_domain_info *info,
1479 struct irq_domain *parent)
1480{
0380839d
MZ
1481 struct irq_domain *domain;
1482
6988e0e0
MZ
1483 if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE))
1484 info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
1485
3878eaef
JL
1486 if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
1487 pci_msi_domain_update_dom_ops(info);
1488 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
1489 pci_msi_domain_update_chip_ops(info);
1490
f3b0946d 1491 info->flags |= MSI_FLAG_ACTIVATE_EARLY;
25e960ef
TG
1492 if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
1493 info->flags |= MSI_FLAG_MUST_REACTIVATE;
f3b0946d 1494
923aa4c3
HK
1495 /* PCI-MSI is oneshot-safe */
1496 info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
1497
be5436c8 1498 domain = msi_create_irq_domain(fwnode, info, parent);
0380839d
MZ
1499 if (!domain)
1500 return NULL;
1501
96f0d93a 1502 irq_domain_update_bus_token(domain, DOMAIN_BUS_PCI_MSI);
0380839d 1503 return domain;
3878eaef 1504}
a4289dc2 1505EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);
3878eaef 1506
235b2c77
RM
1507/*
1508 * Users of the generic MSI infrastructure expect a device to have a single ID,
1509 * so with DMA aliases we have to pick the least-worst compromise. Devices with
1510 * DMA phantom functions tend to still emit MSIs from the real function number,
1511 * so we ignore those and only consider topological aliases where either the
1512 * alias device or RID appears on a different bus number. We also make the
1513 * reasonable assumption that bridges are walked in an upstream direction (so
1514 * the last one seen wins), and the much braver assumption that the most likely
1515 * case is that of PCI->PCIe so we should always use the alias RID. This echoes
1516 * the logic from intel_irq_remapping's set_msi_sid(), which presumably works
1517 * well enough in practice; in the face of the horrible PCIe<->PCI-X conditions
1518 * for taking ownership all we can really do is close our eyes and hope...
1519 */
b6eec9b7
DD
1520static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data)
1521{
1522 u32 *pa = data;
235b2c77
RM
1523 u8 bus = PCI_BUS_NUM(*pa);
1524
1525 if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus)
1526 *pa = alias;
b6eec9b7 1527
b6eec9b7
DD
1528 return 0;
1529}
235b2c77 1530
b6eec9b7
DD
1531/**
1532 * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID)
1533 * @domain: The interrupt domain
1534 * @pdev: The PCI device.
1535 *
1536 * The RID for a device is formed from the alias, with a firmware
1537 * supplied mapping applied
1538 *
1539 * Returns: The RID.
1540 */
1541u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev)
1542{
1543 struct device_node *of_node;
4e544bac 1544 u32 rid = pci_dev_id(pdev);
b6eec9b7
DD
1545
1546 pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
1547
1548 of_node = irq_domain_get_of_node(domain);
2bcdd8f2 1549 rid = of_node ? of_msi_map_id(&pdev->dev, of_node, rid) :
39c3cf56 1550 iort_msi_map_id(&pdev->dev, rid);
b6eec9b7
DD
1551
1552 return rid;
1553}
54fa97ee
MZ
1554
1555/**
1556 * pci_msi_get_device_domain - Get the MSI domain for a given PCI device
1557 * @pdev: The PCI device
1558 *
1559 * Use the firmware data to find a device-specific MSI domain
235b2c77 1560 * (i.e. not one that is set as a default).
54fa97ee 1561 *
235b2c77 1562 * Returns: The corresponding MSI domain or NULL if none has been found.
54fa97ee
MZ
1563 */
1564struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
1565{
be2021ba 1566 struct irq_domain *dom;
4e544bac 1567 u32 rid = pci_dev_id(pdev);
54fa97ee
MZ
1568
1569 pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
6f881aba 1570 dom = of_msi_map_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI);
be2021ba 1571 if (!dom)
d1718a1b
LP
1572 dom = iort_get_device_domain(&pdev->dev, rid,
1573 DOMAIN_BUS_PCI_MSI);
be2021ba 1574 return dom;
54fa97ee 1575}
2fd60266
TG
1576
1577/**
1578 * pci_dev_has_special_msi_domain - Check whether the device is handled by
1579 * a non-standard PCI-MSI domain
1580 * @pdev: The PCI device to check.
1581 *
1582 * Returns: True if the device irqdomain or the bus irqdomain is
1583 * non-standard PCI/MSI.
1584 */
1585bool pci_dev_has_special_msi_domain(struct pci_dev *pdev)
1586{
1587 struct irq_domain *dom = dev_get_msi_domain(&pdev->dev);
1588
1589 if (!dom)
1590 dom = dev_get_msi_domain(&pdev->bus->dev);
1591
1592 if (!dom)
1593 return true;
1594
1595 return dom->bus_token != DOMAIN_BUS_PCI_MSI;
1596}
1597
3878eaef 1598#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
cbc40d5c
BH
1599#endif /* CONFIG_PCI_MSI */
1600
1601void pci_msi_init(struct pci_dev *dev)
1602{
1603 u16 ctrl;
1604
1605 /*
1606 * Disable the MSI hardware to avoid screaming interrupts
1607 * during boot. This is the power on reset default so
1608 * usually this should be a noop.
1609 */
1610 dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
1611 if (!dev->msi_cap)
1612 return;
1613
1614 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
1615 if (ctrl & PCI_MSI_FLAGS_ENABLE)
1616 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS,
1617 ctrl & ~PCI_MSI_FLAGS_ENABLE);
2053230a
VS
1618
1619 if (!(ctrl & PCI_MSI_FLAGS_64BIT))
1620 dev->no_64bit_msi = 1;
cbc40d5c
BH
1621}
1622
1623void pci_msix_init(struct pci_dev *dev)
1624{
1625 u16 ctrl;
1626
1627 dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1628 if (!dev->msix_cap)
1629 return;
1630
1631 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
1632 if (ctrl & PCI_MSIX_FLAGS_ENABLE)
1633 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS,
1634 ctrl & ~PCI_MSIX_FLAGS_ENABLE);
1635}