treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 156
[linux-block.git] / arch / powerpc / platforms / pseries / iommu.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4  *
5  * Rewrite, cleanup:
6  *
7  * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
8  * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
9  *
10  * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
11  */
12
13 #include <linux/init.h>
14 #include <linux/types.h>
15 #include <linux/slab.h>
16 #include <linux/mm.h>
17 #include <linux/memblock.h>
18 #include <linux/spinlock.h>
19 #include <linux/string.h>
20 #include <linux/pci.h>
21 #include <linux/dma-mapping.h>
22 #include <linux/crash_dump.h>
23 #include <linux/memory.h>
24 #include <linux/of.h>
25 #include <linux/iommu.h>
26 #include <linux/rculist.h>
27 #include <asm/io.h>
28 #include <asm/prom.h>
29 #include <asm/rtas.h>
30 #include <asm/iommu.h>
31 #include <asm/pci-bridge.h>
32 #include <asm/machdep.h>
33 #include <asm/firmware.h>
34 #include <asm/tce.h>
35 #include <asm/ppc-pci.h>
36 #include <asm/udbg.h>
37 #include <asm/mmzone.h>
38 #include <asm/plpar_wrappers.h>
39
40 #include "pseries.h"
41
42 static struct iommu_table_group *iommu_pseries_alloc_group(int node)
43 {
44         struct iommu_table_group *table_group;
45         struct iommu_table *tbl;
46
47         table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
48                            node);
49         if (!table_group)
50                 return NULL;
51
52         tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
53         if (!tbl)
54                 goto free_group;
55
56         INIT_LIST_HEAD_RCU(&tbl->it_group_list);
57         kref_init(&tbl->it_kref);
58
59         table_group->tables[0] = tbl;
60
61         return table_group;
62
63 free_group:
64         kfree(table_group);
65         return NULL;
66 }
67
68 static void iommu_pseries_free_group(struct iommu_table_group *table_group,
69                 const char *node_name)
70 {
71         struct iommu_table *tbl;
72
73         if (!table_group)
74                 return;
75
76         tbl = table_group->tables[0];
77 #ifdef CONFIG_IOMMU_API
78         if (table_group->group) {
79                 iommu_group_put(table_group->group);
80                 BUG_ON(table_group->group);
81         }
82 #endif
83         iommu_tce_table_put(tbl);
84
85         kfree(table_group);
86 }
87
88 static int tce_build_pSeries(struct iommu_table *tbl, long index,
89                               long npages, unsigned long uaddr,
90                               enum dma_data_direction direction,
91                               unsigned long attrs)
92 {
93         u64 proto_tce;
94         __be64 *tcep;
95         u64 rpn;
96
97         proto_tce = TCE_PCI_READ; // Read allowed
98
99         if (direction != DMA_TO_DEVICE)
100                 proto_tce |= TCE_PCI_WRITE;
101
102         tcep = ((__be64 *)tbl->it_base) + index;
103
104         while (npages--) {
105                 /* can't move this out since we might cross MEMBLOCK boundary */
106                 rpn = __pa(uaddr) >> TCE_SHIFT;
107                 *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
108
109                 uaddr += TCE_PAGE_SIZE;
110                 tcep++;
111         }
112         return 0;
113 }
114
115
116 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
117 {
118         __be64 *tcep;
119
120         tcep = ((__be64 *)tbl->it_base) + index;
121
122         while (npages--)
123                 *(tcep++) = 0;
124 }
125
126 static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
127 {
128         __be64 *tcep;
129
130         tcep = ((__be64 *)tbl->it_base) + index;
131
132         return be64_to_cpu(*tcep);
133 }
134
135 static void tce_free_pSeriesLP(struct iommu_table*, long, long);
136 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
137
138 static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
139                                 long npages, unsigned long uaddr,
140                                 enum dma_data_direction direction,
141                                 unsigned long attrs)
142 {
143         u64 rc = 0;
144         u64 proto_tce, tce;
145         u64 rpn;
146         int ret = 0;
147         long tcenum_start = tcenum, npages_start = npages;
148
149         rpn = __pa(uaddr) >> TCE_SHIFT;
150         proto_tce = TCE_PCI_READ;
151         if (direction != DMA_TO_DEVICE)
152                 proto_tce |= TCE_PCI_WRITE;
153
154         while (npages--) {
155                 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
156                 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
157
158                 if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
159                         ret = (int)rc;
160                         tce_free_pSeriesLP(tbl, tcenum_start,
161                                            (npages_start - (npages + 1)));
162                         break;
163                 }
164
165                 if (rc && printk_ratelimit()) {
166                         printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
167                         printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
168                         printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
169                         printk("\ttce val = 0x%llx\n", tce );
170                         dump_stack();
171                 }
172
173                 tcenum++;
174                 rpn++;
175         }
176         return ret;
177 }
178
179 static DEFINE_PER_CPU(__be64 *, tce_page);
180
181 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
182                                      long npages, unsigned long uaddr,
183                                      enum dma_data_direction direction,
184                                      unsigned long attrs)
185 {
186         u64 rc = 0;
187         u64 proto_tce;
188         __be64 *tcep;
189         u64 rpn;
190         long l, limit;
191         long tcenum_start = tcenum, npages_start = npages;
192         int ret = 0;
193         unsigned long flags;
194
195         if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
196                 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
197                                            direction, attrs);
198         }
199
200         local_irq_save(flags);  /* to protect tcep and the page behind it */
201
202         tcep = __this_cpu_read(tce_page);
203
204         /* This is safe to do since interrupts are off when we're called
205          * from iommu_alloc{,_sg}()
206          */
207         if (!tcep) {
208                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
209                 /* If allocation fails, fall back to the loop implementation */
210                 if (!tcep) {
211                         local_irq_restore(flags);
212                         return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
213                                             direction, attrs);
214                 }
215                 __this_cpu_write(tce_page, tcep);
216         }
217
218         rpn = __pa(uaddr) >> TCE_SHIFT;
219         proto_tce = TCE_PCI_READ;
220         if (direction != DMA_TO_DEVICE)
221                 proto_tce |= TCE_PCI_WRITE;
222
223         /* We can map max one pageful of TCEs at a time */
224         do {
225                 /*
226                  * Set up the page with TCE data, looping through and setting
227                  * the values.
228                  */
229                 limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
230
231                 for (l = 0; l < limit; l++) {
232                         tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
233                         rpn++;
234                 }
235
236                 rc = plpar_tce_put_indirect((u64)tbl->it_index,
237                                             (u64)tcenum << 12,
238                                             (u64)__pa(tcep),
239                                             limit);
240
241                 npages -= limit;
242                 tcenum += limit;
243         } while (npages > 0 && !rc);
244
245         local_irq_restore(flags);
246
247         if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
248                 ret = (int)rc;
249                 tce_freemulti_pSeriesLP(tbl, tcenum_start,
250                                         (npages_start - (npages + limit)));
251                 return ret;
252         }
253
254         if (rc && printk_ratelimit()) {
255                 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
256                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
257                 printk("\tnpages  = 0x%llx\n", (u64)npages);
258                 printk("\ttce[0] val = 0x%llx\n", tcep[0]);
259                 dump_stack();
260         }
261         return ret;
262 }
263
264 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
265 {
266         u64 rc;
267
268         while (npages--) {
269                 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
270
271                 if (rc && printk_ratelimit()) {
272                         printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
273                         printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
274                         printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
275                         dump_stack();
276                 }
277
278                 tcenum++;
279         }
280 }
281
282
283 static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
284 {
285         u64 rc;
286
287         if (!firmware_has_feature(FW_FEATURE_MULTITCE))
288                 return tce_free_pSeriesLP(tbl, tcenum, npages);
289
290         rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
291
292         if (rc && printk_ratelimit()) {
293                 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
294                 printk("\trc      = %lld\n", rc);
295                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
296                 printk("\tnpages  = 0x%llx\n", (u64)npages);
297                 dump_stack();
298         }
299 }
300
301 static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
302 {
303         u64 rc;
304         unsigned long tce_ret;
305
306         rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
307
308         if (rc && printk_ratelimit()) {
309                 printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
310                 printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
311                 printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
312                 dump_stack();
313         }
314
315         return tce_ret;
316 }
317
318 /* this is compatible with cells for the device tree property */
319 struct dynamic_dma_window_prop {
320         __be32  liobn;          /* tce table number */
321         __be64  dma_base;       /* address hi,lo */
322         __be32  tce_shift;      /* ilog2(tce_page_size) */
323         __be32  window_shift;   /* ilog2(tce_window_size) */
324 };
325
326 struct direct_window {
327         struct device_node *device;
328         const struct dynamic_dma_window_prop *prop;
329         struct list_head list;
330 };
331
332 /* Dynamic DMA Window support */
333 struct ddw_query_response {
334         u32 windows_available;
335         u32 largest_available_block;
336         u32 page_size;
337         u32 migration_capable;
338 };
339
340 struct ddw_create_response {
341         u32 liobn;
342         u32 addr_hi;
343         u32 addr_lo;
344 };
345
346 static LIST_HEAD(direct_window_list);
347 /* prevents races between memory on/offline and window creation */
348 static DEFINE_SPINLOCK(direct_window_list_lock);
349 /* protects initializing window twice for same device */
350 static DEFINE_MUTEX(direct_window_init_mutex);
351 #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
352
353 static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
354                                         unsigned long num_pfn, const void *arg)
355 {
356         const struct dynamic_dma_window_prop *maprange = arg;
357         int rc;
358         u64 tce_size, num_tce, dma_offset, next;
359         u32 tce_shift;
360         long limit;
361
362         tce_shift = be32_to_cpu(maprange->tce_shift);
363         tce_size = 1ULL << tce_shift;
364         next = start_pfn << PAGE_SHIFT;
365         num_tce = num_pfn << PAGE_SHIFT;
366
367         /* round back to the beginning of the tce page size */
368         num_tce += next & (tce_size - 1);
369         next &= ~(tce_size - 1);
370
371         /* covert to number of tces */
372         num_tce |= tce_size - 1;
373         num_tce >>= tce_shift;
374
375         do {
376                 /*
377                  * Set up the page with TCE data, looping through and setting
378                  * the values.
379                  */
380                 limit = min_t(long, num_tce, 512);
381                 dma_offset = next + be64_to_cpu(maprange->dma_base);
382
383                 rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
384                                              dma_offset,
385                                              0, limit);
386                 next += limit * tce_size;
387                 num_tce -= limit;
388         } while (num_tce > 0 && !rc);
389
390         return rc;
391 }
392
393 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
394                                         unsigned long num_pfn, const void *arg)
395 {
396         const struct dynamic_dma_window_prop *maprange = arg;
397         u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
398         __be64 *tcep;
399         u32 tce_shift;
400         u64 rc = 0;
401         long l, limit;
402
403         local_irq_disable();    /* to protect tcep and the page behind it */
404         tcep = __this_cpu_read(tce_page);
405
406         if (!tcep) {
407                 tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
408                 if (!tcep) {
409                         local_irq_enable();
410                         return -ENOMEM;
411                 }
412                 __this_cpu_write(tce_page, tcep);
413         }
414
415         proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
416
417         liobn = (u64)be32_to_cpu(maprange->liobn);
418         tce_shift = be32_to_cpu(maprange->tce_shift);
419         tce_size = 1ULL << tce_shift;
420         next = start_pfn << PAGE_SHIFT;
421         num_tce = num_pfn << PAGE_SHIFT;
422
423         /* round back to the beginning of the tce page size */
424         num_tce += next & (tce_size - 1);
425         next &= ~(tce_size - 1);
426
427         /* covert to number of tces */
428         num_tce |= tce_size - 1;
429         num_tce >>= tce_shift;
430
431         /* We can map max one pageful of TCEs at a time */
432         do {
433                 /*
434                  * Set up the page with TCE data, looping through and setting
435                  * the values.
436                  */
437                 limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
438                 dma_offset = next + be64_to_cpu(maprange->dma_base);
439
440                 for (l = 0; l < limit; l++) {
441                         tcep[l] = cpu_to_be64(proto_tce | next);
442                         next += tce_size;
443                 }
444
445                 rc = plpar_tce_put_indirect(liobn,
446                                             dma_offset,
447                                             (u64)__pa(tcep),
448                                             limit);
449
450                 num_tce -= limit;
451         } while (num_tce > 0 && !rc);
452
453         /* error cleanup: caller will clear whole range */
454
455         local_irq_enable();
456         return rc;
457 }
458
459 static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
460                 unsigned long num_pfn, void *arg)
461 {
462         return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
463 }
464
465 static void iommu_table_setparms(struct pci_controller *phb,
466                                  struct device_node *dn,
467                                  struct iommu_table *tbl)
468 {
469         struct device_node *node;
470         const unsigned long *basep;
471         const u32 *sizep;
472
473         node = phb->dn;
474
475         basep = of_get_property(node, "linux,tce-base", NULL);
476         sizep = of_get_property(node, "linux,tce-size", NULL);
477         if (basep == NULL || sizep == NULL) {
478                 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
479                                 "missing tce entries !\n", dn);
480                 return;
481         }
482
483         tbl->it_base = (unsigned long)__va(*basep);
484
485         if (!is_kdump_kernel())
486                 memset((void *)tbl->it_base, 0, *sizep);
487
488         tbl->it_busno = phb->bus->number;
489         tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
490
491         /* Units of tce entries */
492         tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
493
494         /* Test if we are going over 2GB of DMA space */
495         if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
496                 udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
497                 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
498         }
499
500         phb->dma_window_base_cur += phb->dma_window_size;
501
502         /* Set the tce table size - measured in entries */
503         tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
504
505         tbl->it_index = 0;
506         tbl->it_blocksize = 16;
507         tbl->it_type = TCE_PCI;
508 }
509
510 /*
511  * iommu_table_setparms_lpar
512  *
513  * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
514  */
515 static void iommu_table_setparms_lpar(struct pci_controller *phb,
516                                       struct device_node *dn,
517                                       struct iommu_table *tbl,
518                                       struct iommu_table_group *table_group,
519                                       const __be32 *dma_window)
520 {
521         unsigned long offset, size;
522
523         of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
524
525         tbl->it_busno = phb->bus->number;
526         tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
527         tbl->it_base   = 0;
528         tbl->it_blocksize  = 16;
529         tbl->it_type = TCE_PCI;
530         tbl->it_offset = offset >> tbl->it_page_shift;
531         tbl->it_size = size >> tbl->it_page_shift;
532
533         table_group->tce32_start = offset;
534         table_group->tce32_size = size;
535 }
536
537 struct iommu_table_ops iommu_table_pseries_ops = {
538         .set = tce_build_pSeries,
539         .clear = tce_free_pSeries,
540         .get = tce_get_pseries
541 };
542
543 static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
544 {
545         struct device_node *dn;
546         struct iommu_table *tbl;
547         struct device_node *isa_dn, *isa_dn_orig;
548         struct device_node *tmp;
549         struct pci_dn *pci;
550         int children;
551
552         dn = pci_bus_to_OF_node(bus);
553
554         pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
555
556         if (bus->self) {
557                 /* This is not a root bus, any setup will be done for the
558                  * device-side of the bridge in iommu_dev_setup_pSeries().
559                  */
560                 return;
561         }
562         pci = PCI_DN(dn);
563
564         /* Check if the ISA bus on the system is under
565          * this PHB.
566          */
567         isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
568
569         while (isa_dn && isa_dn != dn)
570                 isa_dn = isa_dn->parent;
571
572         of_node_put(isa_dn_orig);
573
574         /* Count number of direct PCI children of the PHB. */
575         for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
576                 children++;
577
578         pr_debug("Children: %d\n", children);
579
580         /* Calculate amount of DMA window per slot. Each window must be
581          * a power of two (due to pci_alloc_consistent requirements).
582          *
583          * Keep 256MB aside for PHBs with ISA.
584          */
585
586         if (!isa_dn) {
587                 /* No ISA/IDE - just set window size and return */
588                 pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
589
590                 while (pci->phb->dma_window_size * children > 0x80000000ul)
591                         pci->phb->dma_window_size >>= 1;
592                 pr_debug("No ISA/IDE, window size is 0x%llx\n",
593                          pci->phb->dma_window_size);
594                 pci->phb->dma_window_base_cur = 0;
595
596                 return;
597         }
598
599         /* If we have ISA, then we probably have an IDE
600          * controller too. Allocate a 128MB table but
601          * skip the first 128MB to avoid stepping on ISA
602          * space.
603          */
604         pci->phb->dma_window_size = 0x8000000ul;
605         pci->phb->dma_window_base_cur = 0x8000000ul;
606
607         pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
608         tbl = pci->table_group->tables[0];
609
610         iommu_table_setparms(pci->phb, dn, tbl);
611         tbl->it_ops = &iommu_table_pseries_ops;
612         iommu_init_table(tbl, pci->phb->node);
613
614         /* Divide the rest (1.75GB) among the children */
615         pci->phb->dma_window_size = 0x80000000ul;
616         while (pci->phb->dma_window_size * children > 0x70000000ul)
617                 pci->phb->dma_window_size >>= 1;
618
619         pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
620 }
621
622 #ifdef CONFIG_IOMMU_API
623 static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
624                                 long *tce, enum dma_data_direction *direction)
625 {
626         long rc;
627         unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
628         unsigned long flags, oldtce = 0;
629         u64 proto_tce = iommu_direction_to_tce_perm(*direction);
630         unsigned long newtce = *tce | proto_tce;
631
632         spin_lock_irqsave(&tbl->large_pool.lock, flags);
633
634         rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce);
635         if (!rc)
636                 rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
637
638         if (!rc) {
639                 *direction = iommu_tce_direction(oldtce);
640                 *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
641         }
642
643         spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
644
645         return rc;
646 }
647 #endif
648
649 struct iommu_table_ops iommu_table_lpar_multi_ops = {
650         .set = tce_buildmulti_pSeriesLP,
651 #ifdef CONFIG_IOMMU_API
652         .exchange = tce_exchange_pseries,
653 #endif
654         .clear = tce_freemulti_pSeriesLP,
655         .get = tce_get_pSeriesLP
656 };
657
658 static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
659 {
660         struct iommu_table *tbl;
661         struct device_node *dn, *pdn;
662         struct pci_dn *ppci;
663         const __be32 *dma_window = NULL;
664
665         dn = pci_bus_to_OF_node(bus);
666
667         pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
668                  dn);
669
670         /* Find nearest ibm,dma-window, walking up the device tree */
671         for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
672                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
673                 if (dma_window != NULL)
674                         break;
675         }
676
677         if (dma_window == NULL) {
678                 pr_debug("  no ibm,dma-window property !\n");
679                 return;
680         }
681
682         ppci = PCI_DN(pdn);
683
684         pr_debug("  parent is %pOF, iommu_table: 0x%p\n",
685                  pdn, ppci->table_group);
686
687         if (!ppci->table_group) {
688                 ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
689                 tbl = ppci->table_group->tables[0];
690                 iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
691                                 ppci->table_group, dma_window);
692                 tbl->it_ops = &iommu_table_lpar_multi_ops;
693                 iommu_init_table(tbl, ppci->phb->node);
694                 iommu_register_group(ppci->table_group,
695                                 pci_domain_nr(bus), 0);
696                 pr_debug("  created table: %p\n", ppci->table_group);
697         }
698 }
699
700
701 static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
702 {
703         struct device_node *dn;
704         struct iommu_table *tbl;
705
706         pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
707
708         dn = dev->dev.of_node;
709
710         /* If we're the direct child of a root bus, then we need to allocate
711          * an iommu table ourselves. The bus setup code should have setup
712          * the window sizes already.
713          */
714         if (!dev->bus->self) {
715                 struct pci_controller *phb = PCI_DN(dn)->phb;
716
717                 pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
718                 PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
719                 tbl = PCI_DN(dn)->table_group->tables[0];
720                 iommu_table_setparms(phb, dn, tbl);
721                 tbl->it_ops = &iommu_table_pseries_ops;
722                 iommu_init_table(tbl, phb->node);
723                 set_iommu_table_base(&dev->dev, tbl);
724                 return;
725         }
726
727         /* If this device is further down the bus tree, search upwards until
728          * an already allocated iommu table is found and use that.
729          */
730
731         while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
732                 dn = dn->parent;
733
734         if (dn && PCI_DN(dn))
735                 set_iommu_table_base(&dev->dev,
736                                 PCI_DN(dn)->table_group->tables[0]);
737         else
738                 printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
739                        pci_name(dev));
740 }
741
742 static int __read_mostly disable_ddw;
743
744 static int __init disable_ddw_setup(char *str)
745 {
746         disable_ddw = 1;
747         printk(KERN_INFO "ppc iommu: disabling ddw.\n");
748
749         return 0;
750 }
751
752 early_param("disable_ddw", disable_ddw_setup);
753
754 static void remove_ddw(struct device_node *np, bool remove_prop)
755 {
756         struct dynamic_dma_window_prop *dwp;
757         struct property *win64;
758         u32 ddw_avail[3];
759         u64 liobn;
760         int ret = 0;
761
762         ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
763                                          &ddw_avail[0], 3);
764
765         win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
766         if (!win64)
767                 return;
768
769         if (ret || win64->length < sizeof(*dwp))
770                 goto delprop;
771
772         dwp = win64->value;
773         liobn = (u64)be32_to_cpu(dwp->liobn);
774
775         /* clear the whole window, note the arg is in kernel pages */
776         ret = tce_clearrange_multi_pSeriesLP(0,
777                 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
778         if (ret)
779                 pr_warn("%pOF failed to clear tces in window.\n",
780                         np);
781         else
782                 pr_debug("%pOF successfully cleared tces in window.\n",
783                          np);
784
785         ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
786         if (ret)
787                 pr_warn("%pOF: failed to remove direct window: rtas returned "
788                         "%d to ibm,remove-pe-dma-window(%x) %llx\n",
789                         np, ret, ddw_avail[2], liobn);
790         else
791                 pr_debug("%pOF: successfully removed direct window: rtas returned "
792                         "%d to ibm,remove-pe-dma-window(%x) %llx\n",
793                         np, ret, ddw_avail[2], liobn);
794
795 delprop:
796         if (remove_prop)
797                 ret = of_remove_property(np, win64);
798         if (ret)
799                 pr_warn("%pOF: failed to remove direct window property: %d\n",
800                         np, ret);
801 }
802
803 static u64 find_existing_ddw(struct device_node *pdn)
804 {
805         struct direct_window *window;
806         const struct dynamic_dma_window_prop *direct64;
807         u64 dma_addr = 0;
808
809         spin_lock(&direct_window_list_lock);
810         /* check if we already created a window and dupe that config if so */
811         list_for_each_entry(window, &direct_window_list, list) {
812                 if (window->device == pdn) {
813                         direct64 = window->prop;
814                         dma_addr = be64_to_cpu(direct64->dma_base);
815                         break;
816                 }
817         }
818         spin_unlock(&direct_window_list_lock);
819
820         return dma_addr;
821 }
822
823 static int find_existing_ddw_windows(void)
824 {
825         int len;
826         struct device_node *pdn;
827         struct direct_window *window;
828         const struct dynamic_dma_window_prop *direct64;
829
830         if (!firmware_has_feature(FW_FEATURE_LPAR))
831                 return 0;
832
833         for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
834                 direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
835                 if (!direct64)
836                         continue;
837
838                 window = kzalloc(sizeof(*window), GFP_KERNEL);
839                 if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
840                         kfree(window);
841                         remove_ddw(pdn, true);
842                         continue;
843                 }
844
845                 window->device = pdn;
846                 window->prop = direct64;
847                 spin_lock(&direct_window_list_lock);
848                 list_add(&window->list, &direct_window_list);
849                 spin_unlock(&direct_window_list_lock);
850         }
851
852         return 0;
853 }
854 machine_arch_initcall(pseries, find_existing_ddw_windows);
855
856 static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
857                         struct ddw_query_response *query)
858 {
859         struct device_node *dn;
860         struct pci_dn *pdn;
861         u32 cfg_addr;
862         u64 buid;
863         int ret;
864
865         /*
866          * Get the config address and phb buid of the PE window.
867          * Rely on eeh to retrieve this for us.
868          * Retrieve them from the pci device, not the node with the
869          * dma-window property
870          */
871         dn = pci_device_to_OF_node(dev);
872         pdn = PCI_DN(dn);
873         buid = pdn->phb->buid;
874         cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
875
876         ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
877                   cfg_addr, BUID_HI(buid), BUID_LO(buid));
878         dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
879                 " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
880                 BUID_LO(buid), ret);
881         return ret;
882 }
883
884 static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
885                         struct ddw_create_response *create, int page_shift,
886                         int window_shift)
887 {
888         struct device_node *dn;
889         struct pci_dn *pdn;
890         u32 cfg_addr;
891         u64 buid;
892         int ret;
893
894         /*
895          * Get the config address and phb buid of the PE window.
896          * Rely on eeh to retrieve this for us.
897          * Retrieve them from the pci device, not the node with the
898          * dma-window property
899          */
900         dn = pci_device_to_OF_node(dev);
901         pdn = PCI_DN(dn);
902         buid = pdn->phb->buid;
903         cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
904
905         do {
906                 /* extra outputs are LIOBN and dma-addr (hi, lo) */
907                 ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
908                                 cfg_addr, BUID_HI(buid), BUID_LO(buid),
909                                 page_shift, window_shift);
910         } while (rtas_busy_delay(ret));
911         dev_info(&dev->dev,
912                 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
913                 "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
914                  cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
915                  window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
916
917         return ret;
918 }
919
920 struct failed_ddw_pdn {
921         struct device_node *pdn;
922         struct list_head list;
923 };
924
925 static LIST_HEAD(failed_ddw_pdn_list);
926
927 static phys_addr_t ddw_memory_hotplug_max(void)
928 {
929         phys_addr_t max_addr = memory_hotplug_max();
930         struct device_node *memory;
931
932         for_each_node_by_type(memory, "memory") {
933                 unsigned long start, size;
934                 int n_mem_addr_cells, n_mem_size_cells, len;
935                 const __be32 *memcell_buf;
936
937                 memcell_buf = of_get_property(memory, "reg", &len);
938                 if (!memcell_buf || len <= 0)
939                         continue;
940
941                 n_mem_addr_cells = of_n_addr_cells(memory);
942                 n_mem_size_cells = of_n_size_cells(memory);
943
944                 start = of_read_number(memcell_buf, n_mem_addr_cells);
945                 memcell_buf += n_mem_addr_cells;
946                 size = of_read_number(memcell_buf, n_mem_size_cells);
947                 memcell_buf += n_mem_size_cells;
948
949                 max_addr = max_t(phys_addr_t, max_addr, start + size);
950         }
951
952         return max_addr;
953 }
954
955 /*
956  * If the PE supports dynamic dma windows, and there is space for a table
957  * that can map all pages in a linear offset, then setup such a table,
958  * and record the dma-offset in the struct device.
959  *
960  * dev: the pci device we are checking
961  * pdn: the parent pe node with the ibm,dma_window property
962  * Future: also check if we can remap the base window for our base page size
963  *
964  * returns the dma offset for use by the direct mapped DMA code.
965  */
966 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
967 {
968         int len, ret;
969         struct ddw_query_response query;
970         struct ddw_create_response create;
971         int page_shift;
972         u64 dma_addr, max_addr;
973         struct device_node *dn;
974         u32 ddw_avail[3];
975         struct direct_window *window;
976         struct property *win64;
977         struct dynamic_dma_window_prop *ddwprop;
978         struct failed_ddw_pdn *fpdn;
979
980         mutex_lock(&direct_window_init_mutex);
981
982         dma_addr = find_existing_ddw(pdn);
983         if (dma_addr != 0)
984                 goto out_unlock;
985
986         /*
987          * If we already went through this for a previous function of
988          * the same device and failed, we don't want to muck with the
989          * DMA window again, as it will race with in-flight operations
990          * and can lead to EEHs. The above mutex protects access to the
991          * list.
992          */
993         list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
994                 if (fpdn->pdn == pdn)
995                         goto out_unlock;
996         }
997
998         /*
999          * the ibm,ddw-applicable property holds the tokens for:
1000          * ibm,query-pe-dma-window
1001          * ibm,create-pe-dma-window
1002          * ibm,remove-pe-dma-window
1003          * for the given node in that order.
1004          * the property is actually in the parent, not the PE
1005          */
1006         ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
1007                                          &ddw_avail[0], 3);
1008         if (ret)
1009                 goto out_failed;
1010
1011        /*
1012          * Query if there is a second window of size to map the
1013          * whole partition.  Query returns number of windows, largest
1014          * block assigned to PE (partition endpoint), and two bitmasks
1015          * of page sizes: supported and supported for migrate-dma.
1016          */
1017         dn = pci_device_to_OF_node(dev);
1018         ret = query_ddw(dev, ddw_avail, &query);
1019         if (ret != 0)
1020                 goto out_failed;
1021
1022         if (query.windows_available == 0) {
1023                 /*
1024                  * no additional windows are available for this device.
1025                  * We might be able to reallocate the existing window,
1026                  * trading in for a larger page size.
1027                  */
1028                 dev_dbg(&dev->dev, "no free dynamic windows");
1029                 goto out_failed;
1030         }
1031         if (query.page_size & 4) {
1032                 page_shift = 24; /* 16MB */
1033         } else if (query.page_size & 2) {
1034                 page_shift = 16; /* 64kB */
1035         } else if (query.page_size & 1) {
1036                 page_shift = 12; /* 4kB */
1037         } else {
1038                 dev_dbg(&dev->dev, "no supported direct page size in mask %x",
1039                           query.page_size);
1040                 goto out_failed;
1041         }
1042         /* verify the window * number of ptes will map the partition */
1043         /* check largest block * page size > max memory hotplug addr */
1044         max_addr = ddw_memory_hotplug_max();
1045         if (query.largest_available_block < (max_addr >> page_shift)) {
1046                 dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
1047                           "%llu-sized pages\n", max_addr,  query.largest_available_block,
1048                           1ULL << page_shift);
1049                 goto out_failed;
1050         }
1051         len = order_base_2(max_addr);
1052         win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
1053         if (!win64) {
1054                 dev_info(&dev->dev,
1055                         "couldn't allocate property for 64bit dma window\n");
1056                 goto out_failed;
1057         }
1058         win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
1059         win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
1060         win64->length = sizeof(*ddwprop);
1061         if (!win64->name || !win64->value) {
1062                 dev_info(&dev->dev,
1063                         "couldn't allocate property name and value\n");
1064                 goto out_free_prop;
1065         }
1066
1067         ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
1068         if (ret != 0)
1069                 goto out_free_prop;
1070
1071         ddwprop->liobn = cpu_to_be32(create.liobn);
1072         ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
1073                         create.addr_lo);
1074         ddwprop->tce_shift = cpu_to_be32(page_shift);
1075         ddwprop->window_shift = cpu_to_be32(len);
1076
1077         dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
1078                   create.liobn, dn);
1079
1080         window = kzalloc(sizeof(*window), GFP_KERNEL);
1081         if (!window)
1082                 goto out_clear_window;
1083
1084         ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1085                         win64->value, tce_setrange_multi_pSeriesLP_walk);
1086         if (ret) {
1087                 dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
1088                          dn, ret);
1089                 goto out_free_window;
1090         }
1091
1092         ret = of_add_property(pdn, win64);
1093         if (ret) {
1094                 dev_err(&dev->dev, "unable to add dma window property for %pOF: %d",
1095                          pdn, ret);
1096                 goto out_free_window;
1097         }
1098
1099         window->device = pdn;
1100         window->prop = ddwprop;
1101         spin_lock(&direct_window_list_lock);
1102         list_add(&window->list, &direct_window_list);
1103         spin_unlock(&direct_window_list_lock);
1104
1105         dma_addr = be64_to_cpu(ddwprop->dma_base);
1106         goto out_unlock;
1107
1108 out_free_window:
1109         kfree(window);
1110
1111 out_clear_window:
1112         remove_ddw(pdn, true);
1113
1114 out_free_prop:
1115         kfree(win64->name);
1116         kfree(win64->value);
1117         kfree(win64);
1118
1119 out_failed:
1120
1121         fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
1122         if (!fpdn)
1123                 goto out_unlock;
1124         fpdn->pdn = pdn;
1125         list_add(&fpdn->list, &failed_ddw_pdn_list);
1126
1127 out_unlock:
1128         mutex_unlock(&direct_window_init_mutex);
1129         return dma_addr;
1130 }
1131
1132 static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
1133 {
1134         struct device_node *pdn, *dn;
1135         struct iommu_table *tbl;
1136         const __be32 *dma_window = NULL;
1137         struct pci_dn *pci;
1138
1139         pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
1140
1141         /* dev setup for LPAR is a little tricky, since the device tree might
1142          * contain the dma-window properties per-device and not necessarily
1143          * for the bus. So we need to search upwards in the tree until we
1144          * either hit a dma-window property, OR find a parent with a table
1145          * already allocated.
1146          */
1147         dn = pci_device_to_OF_node(dev);
1148         pr_debug("  node is %pOF\n", dn);
1149
1150         for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1151              pdn = pdn->parent) {
1152                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1153                 if (dma_window)
1154                         break;
1155         }
1156
1157         if (!pdn || !PCI_DN(pdn)) {
1158                 printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
1159                        "no DMA window found for pci dev=%s dn=%pOF\n",
1160                                  pci_name(dev), dn);
1161                 return;
1162         }
1163         pr_debug("  parent is %pOF\n", pdn);
1164
1165         pci = PCI_DN(pdn);
1166         if (!pci->table_group) {
1167                 pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
1168                 tbl = pci->table_group->tables[0];
1169                 iommu_table_setparms_lpar(pci->phb, pdn, tbl,
1170                                 pci->table_group, dma_window);
1171                 tbl->it_ops = &iommu_table_lpar_multi_ops;
1172                 iommu_init_table(tbl, pci->phb->node);
1173                 iommu_register_group(pci->table_group,
1174                                 pci_domain_nr(pci->phb->bus), 0);
1175                 pr_debug("  created table: %p\n", pci->table_group);
1176         } else {
1177                 pr_debug("  found DMA window, table: %p\n", pci->table_group);
1178         }
1179
1180         set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
1181         iommu_add_device(pci->table_group, &dev->dev);
1182 }
1183
1184 static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
1185 {
1186         struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
1187         const __be32 *dma_window = NULL;
1188
1189         /* only attempt to use a new window if 64-bit DMA is requested */
1190         if (dma_mask < DMA_BIT_MASK(64))
1191                 return false;
1192
1193         dev_dbg(&pdev->dev, "node is %pOF\n", dn);
1194
1195         /*
1196          * the device tree might contain the dma-window properties
1197          * per-device and not necessarily for the bus. So we need to
1198          * search upwards in the tree until we either hit a dma-window
1199          * property, OR find a parent with a table already allocated.
1200          */
1201         for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group;
1202                         pdn = pdn->parent) {
1203                 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1204                 if (dma_window)
1205                         break;
1206         }
1207
1208         if (pdn && PCI_DN(pdn)) {
1209                 pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn);
1210                 if (pdev->dev.archdata.dma_offset)
1211                         return true;
1212         }
1213
1214         return false;
1215 }
1216
1217 static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
1218                 void *data)
1219 {
1220         struct direct_window *window;
1221         struct memory_notify *arg = data;
1222         int ret = 0;
1223
1224         switch (action) {
1225         case MEM_GOING_ONLINE:
1226                 spin_lock(&direct_window_list_lock);
1227                 list_for_each_entry(window, &direct_window_list, list) {
1228                         ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
1229                                         arg->nr_pages, window->prop);
1230                         /* XXX log error */
1231                 }
1232                 spin_unlock(&direct_window_list_lock);
1233                 break;
1234         case MEM_CANCEL_ONLINE:
1235         case MEM_OFFLINE:
1236                 spin_lock(&direct_window_list_lock);
1237                 list_for_each_entry(window, &direct_window_list, list) {
1238                         ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
1239                                         arg->nr_pages, window->prop);
1240                         /* XXX log error */
1241                 }
1242                 spin_unlock(&direct_window_list_lock);
1243                 break;
1244         default:
1245                 break;
1246         }
1247         if (ret && action != MEM_CANCEL_ONLINE)
1248                 return NOTIFY_BAD;
1249
1250         return NOTIFY_OK;
1251 }
1252
1253 static struct notifier_block iommu_mem_nb = {
1254         .notifier_call = iommu_mem_notifier,
1255 };
1256
1257 static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
1258 {
1259         int err = NOTIFY_OK;
1260         struct of_reconfig_data *rd = data;
1261         struct device_node *np = rd->dn;
1262         struct pci_dn *pci = PCI_DN(np);
1263         struct direct_window *window;
1264
1265         switch (action) {
1266         case OF_RECONFIG_DETACH_NODE:
1267                 /*
1268                  * Removing the property will invoke the reconfig
1269                  * notifier again, which causes dead-lock on the
1270                  * read-write semaphore of the notifier chain. So
1271                  * we have to remove the property when releasing
1272                  * the device node.
1273                  */
1274                 remove_ddw(np, false);
1275                 if (pci && pci->table_group)
1276                         iommu_pseries_free_group(pci->table_group,
1277                                         np->full_name);
1278
1279                 spin_lock(&direct_window_list_lock);
1280                 list_for_each_entry(window, &direct_window_list, list) {
1281                         if (window->device == np) {
1282                                 list_del(&window->list);
1283                                 kfree(window);
1284                                 break;
1285                         }
1286                 }
1287                 spin_unlock(&direct_window_list_lock);
1288                 break;
1289         default:
1290                 err = NOTIFY_DONE;
1291                 break;
1292         }
1293         return err;
1294 }
1295
1296 static struct notifier_block iommu_reconfig_nb = {
1297         .notifier_call = iommu_reconfig_notifier,
1298 };
1299
1300 /* These are called very early. */
1301 void iommu_init_early_pSeries(void)
1302 {
1303         if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
1304                 return;
1305
1306         if (firmware_has_feature(FW_FEATURE_LPAR)) {
1307                 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
1308                 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1309                 if (!disable_ddw)
1310                         pseries_pci_controller_ops.iommu_bypass_supported =
1311                                 iommu_bypass_supported_pSeriesLP;
1312         } else {
1313                 pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
1314                 pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
1315         }
1316
1317
1318         of_reconfig_notifier_register(&iommu_reconfig_nb);
1319         register_memory_notifier(&iommu_mem_nb);
1320
1321         set_pci_dma_ops(&dma_iommu_ops);
1322 }
1323
1324 static int __init disable_multitce(char *str)
1325 {
1326         if (strcmp(str, "off") == 0 &&
1327             firmware_has_feature(FW_FEATURE_LPAR) &&
1328             firmware_has_feature(FW_FEATURE_MULTITCE)) {
1329                 printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
1330                 powerpc_firmware_features &= ~FW_FEATURE_MULTITCE;
1331         }
1332         return 1;
1333 }
1334
1335 __setup("multitce=", disable_multitce);
1336
1337 static int tce_iommu_bus_notifier(struct notifier_block *nb,
1338                 unsigned long action, void *data)
1339 {
1340         struct device *dev = data;
1341
1342         switch (action) {
1343         case BUS_NOTIFY_DEL_DEVICE:
1344                 iommu_del_device(dev);
1345                 return 0;
1346         default:
1347                 return 0;
1348         }
1349 }
1350
1351 static struct notifier_block tce_iommu_bus_nb = {
1352         .notifier_call = tce_iommu_bus_notifier,
1353 };
1354
1355 static int __init tce_iommu_bus_notifier_init(void)
1356 {
1357         bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1358         return 0;
1359 }
1360 machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);