Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
1da177e4 LT |
2 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
3 | * | |
bc97ce95 | 4 | * Rewrite, cleanup: |
1da177e4 | 5 | * |
91f14480 | 6 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
bc97ce95 | 7 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
1da177e4 LT |
8 | * |
9 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. | |
10 | * | |
bc97ce95 | 11 | * |
1da177e4 LT |
12 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
bc97ce95 | 16 | * |
1da177e4 LT |
17 | * This program is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
bc97ce95 | 21 | * |
1da177e4 LT |
22 | * You should have received a copy of the GNU General Public License |
23 | * along with this program; if not, write to the Free Software | |
24 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
25 | */ | |
26 | ||
1da177e4 LT |
27 | #include <linux/init.h> |
28 | #include <linux/types.h> | |
29 | #include <linux/slab.h> | |
30 | #include <linux/mm.h> | |
beacc6da | 31 | #include <linux/memblock.h> |
1da177e4 LT |
32 | #include <linux/spinlock.h> |
33 | #include <linux/string.h> | |
34 | #include <linux/pci.h> | |
35 | #include <linux/dma-mapping.h> | |
62a8bd6c | 36 | #include <linux/crash_dump.h> |
4e8b0cf4 | 37 | #include <linux/memory.h> |
1cf3d8b3 | 38 | #include <linux/of.h> |
ac9a5889 | 39 | #include <linux/iommu.h> |
0eaf4def | 40 | #include <linux/rculist.h> |
1da177e4 LT |
41 | #include <asm/io.h> |
42 | #include <asm/prom.h> | |
43 | #include <asm/rtas.h> | |
1da177e4 LT |
44 | #include <asm/iommu.h> |
45 | #include <asm/pci-bridge.h> | |
46 | #include <asm/machdep.h> | |
1ababe11 | 47 | #include <asm/firmware.h> |
c707ffcf | 48 | #include <asm/tce.h> |
d387899f | 49 | #include <asm/ppc-pci.h> |
2249ca9d | 50 | #include <asm/udbg.h> |
4e8b0cf4 | 51 | #include <asm/mmzone.h> |
212bebb4 | 52 | #include <asm/plpar_wrappers.h> |
a1218720 | 53 | |
38ae9ec4 | 54 | #include "pseries.h" |
1da177e4 | 55 | |
b348aa65 AK |
56 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
57 | { | |
4dd9eab3 ME |
58 | struct iommu_table_group *table_group; |
59 | struct iommu_table *tbl; | |
b348aa65 AK |
60 | |
61 | table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, | |
62 | node); | |
63 | if (!table_group) | |
4dd9eab3 | 64 | return NULL; |
b348aa65 AK |
65 | |
66 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); | |
67 | if (!tbl) | |
4dd9eab3 | 68 | goto free_group; |
b348aa65 | 69 | |
0eaf4def | 70 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
e5afdf9d | 71 | kref_init(&tbl->it_kref); |
0eaf4def | 72 | |
b348aa65 AK |
73 | table_group->tables[0] = tbl; |
74 | ||
75 | return table_group; | |
76 | ||
4dd9eab3 ME |
77 | free_group: |
78 | kfree(table_group); | |
b348aa65 AK |
79 | return NULL; |
80 | } | |
81 | ||
82 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |
ac9a5889 AK |
83 | const char *node_name) |
84 | { | |
b348aa65 AK |
85 | struct iommu_table *tbl; |
86 | ||
87 | if (!table_group) | |
88 | return; | |
89 | ||
0eaf4def | 90 | tbl = table_group->tables[0]; |
ac9a5889 | 91 | #ifdef CONFIG_IOMMU_API |
b348aa65 AK |
92 | if (table_group->group) { |
93 | iommu_group_put(table_group->group); | |
94 | BUG_ON(table_group->group); | |
ac9a5889 AK |
95 | } |
96 | #endif | |
e5afdf9d | 97 | iommu_tce_table_put(tbl); |
b348aa65 AK |
98 | |
99 | kfree(table_group); | |
ac9a5889 AK |
100 | } |
101 | ||
6490c490 | 102 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
bc97ce95 | 103 | long npages, unsigned long uaddr, |
4f3dd8a0 | 104 | enum dma_data_direction direction, |
00085f1e | 105 | unsigned long attrs) |
1da177e4 | 106 | { |
bc97ce95 | 107 | u64 proto_tce; |
df015604 | 108 | __be64 *tcep, *tces; |
bc97ce95 | 109 | u64 rpn; |
1da177e4 | 110 | |
bc97ce95 | 111 | proto_tce = TCE_PCI_READ; // Read allowed |
1da177e4 LT |
112 | |
113 | if (direction != DMA_TO_DEVICE) | |
bc97ce95 | 114 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 | 115 | |
df015604 | 116 | tces = tcep = ((__be64 *)tbl->it_base) + index; |
1da177e4 LT |
117 | |
118 | while (npages--) { | |
95f72d1e | 119 | /* can't move this out since we might cross MEMBLOCK boundary */ |
474e3d56 | 120 | rpn = __pa(uaddr) >> TCE_SHIFT; |
df015604 | 121 | *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); |
1da177e4 | 122 | |
d0035c62 | 123 | uaddr += TCE_PAGE_SIZE; |
bc97ce95 | 124 | tcep++; |
1da177e4 | 125 | } |
6490c490 | 126 | return 0; |
1da177e4 LT |
127 | } |
128 | ||
129 | ||
130 | static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) | |
131 | { | |
df015604 | 132 | __be64 *tcep, *tces; |
1da177e4 | 133 | |
df015604 | 134 | tces = tcep = ((__be64 *)tbl->it_base) + index; |
bc97ce95 OJ |
135 | |
136 | while (npages--) | |
137 | *(tcep++) = 0; | |
1da177e4 LT |
138 | } |
139 | ||
5f50867b HM |
140 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
141 | { | |
df015604 | 142 | __be64 *tcep; |
5f50867b | 143 | |
df015604 | 144 | tcep = ((__be64 *)tbl->it_base) + index; |
5f50867b | 145 | |
df015604 | 146 | return be64_to_cpu(*tcep); |
5f50867b | 147 | } |
1da177e4 | 148 | |
6490c490 RJ |
149 | static void tce_free_pSeriesLP(struct iommu_table*, long, long); |
150 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); | |
151 | ||
152 | static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, | |
1da177e4 | 153 | long npages, unsigned long uaddr, |
4f3dd8a0 | 154 | enum dma_data_direction direction, |
00085f1e | 155 | unsigned long attrs) |
1da177e4 | 156 | { |
6490c490 | 157 | u64 rc = 0; |
bc97ce95 OJ |
158 | u64 proto_tce, tce; |
159 | u64 rpn; | |
6490c490 RJ |
160 | int ret = 0; |
161 | long tcenum_start = tcenum, npages_start = npages; | |
1da177e4 | 162 | |
474e3d56 | 163 | rpn = __pa(uaddr) >> TCE_SHIFT; |
bc97ce95 | 164 | proto_tce = TCE_PCI_READ; |
1da177e4 | 165 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 166 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
167 | |
168 | while (npages--) { | |
bc97ce95 OJ |
169 | tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; |
170 | rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); | |
171 | ||
6490c490 RJ |
172 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
173 | ret = (int)rc; | |
174 | tce_free_pSeriesLP(tbl, tcenum_start, | |
175 | (npages_start - (npages + 1))); | |
176 | break; | |
177 | } | |
178 | ||
1da177e4 | 179 | if (rc && printk_ratelimit()) { |
fe333321 IM |
180 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
181 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
182 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
183 | printk("\ttce val = 0x%llx\n", tce ); | |
4ff52b4d | 184 | dump_stack(); |
1da177e4 | 185 | } |
bc97ce95 | 186 | |
1da177e4 | 187 | tcenum++; |
bc97ce95 | 188 | rpn++; |
1da177e4 | 189 | } |
6490c490 | 190 | return ret; |
1da177e4 LT |
191 | } |
192 | ||
df015604 | 193 | static DEFINE_PER_CPU(__be64 *, tce_page); |
1da177e4 | 194 | |
6490c490 | 195 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
1da177e4 | 196 | long npages, unsigned long uaddr, |
4f3dd8a0 | 197 | enum dma_data_direction direction, |
00085f1e | 198 | unsigned long attrs) |
1da177e4 | 199 | { |
6490c490 | 200 | u64 rc = 0; |
bc97ce95 | 201 | u64 proto_tce; |
df015604 | 202 | __be64 *tcep; |
bc97ce95 | 203 | u64 rpn; |
1da177e4 | 204 | long l, limit; |
6490c490 RJ |
205 | long tcenum_start = tcenum, npages_start = npages; |
206 | int ret = 0; | |
c1703e85 | 207 | unsigned long flags; |
1da177e4 | 208 | |
da004c36 | 209 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { |
6490c490 RJ |
210 | return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, |
211 | direction, attrs); | |
541b2755 | 212 | } |
1da177e4 | 213 | |
c1703e85 AB |
214 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
215 | ||
69111bac | 216 | tcep = __this_cpu_read(tce_page); |
1da177e4 LT |
217 | |
218 | /* This is safe to do since interrupts are off when we're called | |
219 | * from iommu_alloc{,_sg}() | |
220 | */ | |
221 | if (!tcep) { | |
df015604 | 222 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
1da177e4 | 223 | /* If allocation fails, fall back to the loop implementation */ |
541b2755 | 224 | if (!tcep) { |
c1703e85 | 225 | local_irq_restore(flags); |
6490c490 | 226 | return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, |
4f3dd8a0 | 227 | direction, attrs); |
541b2755 | 228 | } |
69111bac | 229 | __this_cpu_write(tce_page, tcep); |
1da177e4 LT |
230 | } |
231 | ||
474e3d56 | 232 | rpn = __pa(uaddr) >> TCE_SHIFT; |
bc97ce95 | 233 | proto_tce = TCE_PCI_READ; |
1da177e4 | 234 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 235 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
236 | |
237 | /* We can map max one pageful of TCEs at a time */ | |
238 | do { | |
239 | /* | |
240 | * Set up the page with TCE data, looping through and setting | |
241 | * the values. | |
242 | */ | |
bc97ce95 | 243 | limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); |
1da177e4 LT |
244 | |
245 | for (l = 0; l < limit; l++) { | |
df015604 | 246 | tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); |
bc97ce95 | 247 | rpn++; |
1da177e4 LT |
248 | } |
249 | ||
250 | rc = plpar_tce_put_indirect((u64)tbl->it_index, | |
251 | (u64)tcenum << 12, | |
474e3d56 | 252 | (u64)__pa(tcep), |
1da177e4 LT |
253 | limit); |
254 | ||
255 | npages -= limit; | |
256 | tcenum += limit; | |
257 | } while (npages > 0 && !rc); | |
258 | ||
c1703e85 AB |
259 | local_irq_restore(flags); |
260 | ||
6490c490 RJ |
261 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
262 | ret = (int)rc; | |
263 | tce_freemulti_pSeriesLP(tbl, tcenum_start, | |
264 | (npages_start - (npages + limit))); | |
265 | return ret; | |
266 | } | |
267 | ||
1da177e4 | 268 | if (rc && printk_ratelimit()) { |
fe333321 IM |
269 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
270 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
271 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
272 | printk("\ttce[0] val = 0x%llx\n", tcep[0]); | |
4ff52b4d | 273 | dump_stack(); |
1da177e4 | 274 | } |
6490c490 | 275 | return ret; |
1da177e4 LT |
276 | } |
277 | ||
278 | static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
279 | { | |
280 | u64 rc; | |
1da177e4 | 281 | |
1da177e4 | 282 | while (npages--) { |
bc97ce95 | 283 | rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); |
1da177e4 LT |
284 | |
285 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
286 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
287 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
288 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 289 | dump_stack(); |
1da177e4 LT |
290 | } |
291 | ||
292 | tcenum++; | |
293 | } | |
294 | } | |
295 | ||
296 | ||
297 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
298 | { | |
299 | u64 rc; | |
1da177e4 | 300 | |
da004c36 AK |
301 | if (!firmware_has_feature(FW_FEATURE_MULTITCE)) |
302 | return tce_free_pSeriesLP(tbl, tcenum, npages); | |
303 | ||
bc97ce95 | 304 | rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); |
1da177e4 LT |
305 | |
306 | if (rc && printk_ratelimit()) { | |
307 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); | |
fe333321 IM |
308 | printk("\trc = %lld\n", rc); |
309 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
310 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
4ff52b4d | 311 | dump_stack(); |
1da177e4 LT |
312 | } |
313 | } | |
314 | ||
5f50867b HM |
315 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
316 | { | |
317 | u64 rc; | |
318 | unsigned long tce_ret; | |
319 | ||
5f50867b HM |
320 | rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret); |
321 | ||
322 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
323 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); |
324 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
325 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 326 | dump_stack(); |
5f50867b HM |
327 | } |
328 | ||
329 | return tce_ret; | |
330 | } | |
331 | ||
25985edc | 332 | /* this is compatible with cells for the device tree property */ |
4e8b0cf4 NA |
333 | struct dynamic_dma_window_prop { |
334 | __be32 liobn; /* tce table number */ | |
335 | __be64 dma_base; /* address hi,lo */ | |
336 | __be32 tce_shift; /* ilog2(tce_page_size) */ | |
337 | __be32 window_shift; /* ilog2(tce_window_size) */ | |
338 | }; | |
339 | ||
340 | struct direct_window { | |
341 | struct device_node *device; | |
342 | const struct dynamic_dma_window_prop *prop; | |
343 | struct list_head list; | |
344 | }; | |
345 | ||
346 | /* Dynamic DMA Window support */ | |
347 | struct ddw_query_response { | |
9410e018 AK |
348 | u32 windows_available; |
349 | u32 largest_available_block; | |
350 | u32 page_size; | |
351 | u32 migration_capable; | |
4e8b0cf4 NA |
352 | }; |
353 | ||
354 | struct ddw_create_response { | |
9410e018 AK |
355 | u32 liobn; |
356 | u32 addr_hi; | |
357 | u32 addr_lo; | |
4e8b0cf4 NA |
358 | }; |
359 | ||
360 | static LIST_HEAD(direct_window_list); | |
361 | /* prevents races between memory on/offline and window creation */ | |
362 | static DEFINE_SPINLOCK(direct_window_list_lock); | |
363 | /* protects initializing window twice for same device */ | |
364 | static DEFINE_MUTEX(direct_window_init_mutex); | |
365 | #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" | |
366 | ||
367 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
368 | unsigned long num_pfn, const void *arg) | |
369 | { | |
370 | const struct dynamic_dma_window_prop *maprange = arg; | |
371 | int rc; | |
372 | u64 tce_size, num_tce, dma_offset, next; | |
373 | u32 tce_shift; | |
374 | long limit; | |
375 | ||
376 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
377 | tce_size = 1ULL << tce_shift; | |
378 | next = start_pfn << PAGE_SHIFT; | |
379 | num_tce = num_pfn << PAGE_SHIFT; | |
380 | ||
381 | /* round back to the beginning of the tce page size */ | |
382 | num_tce += next & (tce_size - 1); | |
383 | next &= ~(tce_size - 1); | |
384 | ||
385 | /* covert to number of tces */ | |
386 | num_tce |= tce_size - 1; | |
387 | num_tce >>= tce_shift; | |
388 | ||
389 | do { | |
390 | /* | |
391 | * Set up the page with TCE data, looping through and setting | |
392 | * the values. | |
393 | */ | |
394 | limit = min_t(long, num_tce, 512); | |
395 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
396 | ||
397 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
398 | dma_offset, | |
399 | 0, limit); | |
22b38298 | 400 | next += limit * tce_size; |
4e8b0cf4 NA |
401 | num_tce -= limit; |
402 | } while (num_tce > 0 && !rc); | |
403 | ||
404 | return rc; | |
405 | } | |
406 | ||
407 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
408 | unsigned long num_pfn, const void *arg) | |
409 | { | |
410 | const struct dynamic_dma_window_prop *maprange = arg; | |
df015604 AB |
411 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
412 | __be64 *tcep; | |
4e8b0cf4 NA |
413 | u32 tce_shift; |
414 | u64 rc = 0; | |
415 | long l, limit; | |
416 | ||
417 | local_irq_disable(); /* to protect tcep and the page behind it */ | |
69111bac | 418 | tcep = __this_cpu_read(tce_page); |
4e8b0cf4 NA |
419 | |
420 | if (!tcep) { | |
df015604 | 421 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
4e8b0cf4 NA |
422 | if (!tcep) { |
423 | local_irq_enable(); | |
424 | return -ENOMEM; | |
425 | } | |
69111bac | 426 | __this_cpu_write(tce_page, tcep); |
4e8b0cf4 NA |
427 | } |
428 | ||
429 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
430 | ||
431 | liobn = (u64)be32_to_cpu(maprange->liobn); | |
432 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
433 | tce_size = 1ULL << tce_shift; | |
434 | next = start_pfn << PAGE_SHIFT; | |
435 | num_tce = num_pfn << PAGE_SHIFT; | |
436 | ||
437 | /* round back to the beginning of the tce page size */ | |
438 | num_tce += next & (tce_size - 1); | |
439 | next &= ~(tce_size - 1); | |
440 | ||
441 | /* covert to number of tces */ | |
442 | num_tce |= tce_size - 1; | |
443 | num_tce >>= tce_shift; | |
444 | ||
445 | /* We can map max one pageful of TCEs at a time */ | |
446 | do { | |
447 | /* | |
448 | * Set up the page with TCE data, looping through and setting | |
449 | * the values. | |
450 | */ | |
451 | limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE); | |
452 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
453 | ||
454 | for (l = 0; l < limit; l++) { | |
df015604 | 455 | tcep[l] = cpu_to_be64(proto_tce | next); |
4e8b0cf4 NA |
456 | next += tce_size; |
457 | } | |
458 | ||
459 | rc = plpar_tce_put_indirect(liobn, | |
460 | dma_offset, | |
474e3d56 | 461 | (u64)__pa(tcep), |
4e8b0cf4 NA |
462 | limit); |
463 | ||
464 | num_tce -= limit; | |
465 | } while (num_tce > 0 && !rc); | |
466 | ||
467 | /* error cleanup: caller will clear whole range */ | |
468 | ||
469 | local_irq_enable(); | |
470 | return rc; | |
471 | } | |
472 | ||
473 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
474 | unsigned long num_pfn, void *arg) | |
475 | { | |
476 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
477 | } | |
478 | ||
1da177e4 LT |
479 | static void iommu_table_setparms(struct pci_controller *phb, |
480 | struct device_node *dn, | |
bc97ce95 | 481 | struct iommu_table *tbl) |
1da177e4 LT |
482 | { |
483 | struct device_node *node; | |
b7d6bf4f | 484 | const unsigned long *basep; |
9938c474 | 485 | const u32 *sizep; |
1da177e4 | 486 | |
44ef3390 | 487 | node = phb->dn; |
1da177e4 | 488 | |
e2eb6392 SR |
489 | basep = of_get_property(node, "linux,tce-base", NULL); |
490 | sizep = of_get_property(node, "linux,tce-size", NULL); | |
1da177e4 | 491 | if (basep == NULL || sizep == NULL) { |
b7c670d6 RH |
492 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
493 | "missing tce entries !\n", dn); | |
1da177e4 LT |
494 | return; |
495 | } | |
496 | ||
497 | tbl->it_base = (unsigned long)__va(*basep); | |
5f50867b | 498 | |
62a8bd6c | 499 | if (!is_kdump_kernel()) |
54622f10 | 500 | memset((void *)tbl->it_base, 0, *sizep); |
1da177e4 LT |
501 | |
502 | tbl->it_busno = phb->bus->number; | |
3a553170 | 503 | tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; |
bc97ce95 | 504 | |
1da177e4 | 505 | /* Units of tce entries */ |
3a553170 | 506 | tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; |
bc97ce95 | 507 | |
1da177e4 | 508 | /* Test if we are going over 2GB of DMA space */ |
3c2822cc OJ |
509 | if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { |
510 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
bc97ce95 | 511 | panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); |
3c2822cc | 512 | } |
bc97ce95 | 513 | |
1da177e4 LT |
514 | phb->dma_window_base_cur += phb->dma_window_size; |
515 | ||
516 | /* Set the tce table size - measured in entries */ | |
3a553170 | 517 | tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; |
1da177e4 LT |
518 | |
519 | tbl->it_index = 0; | |
520 | tbl->it_blocksize = 16; | |
521 | tbl->it_type = TCE_PCI; | |
522 | } | |
523 | ||
524 | /* | |
525 | * iommu_table_setparms_lpar | |
526 | * | |
527 | * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. | |
1da177e4 LT |
528 | */ |
529 | static void iommu_table_setparms_lpar(struct pci_controller *phb, | |
530 | struct device_node *dn, | |
531 | struct iommu_table *tbl, | |
b6e1f6ad | 532 | struct iommu_table_group *table_group, |
2083f681 | 533 | const __be32 *dma_window) |
1da177e4 | 534 | { |
4c76e0bc JK |
535 | unsigned long offset, size; |
536 | ||
4c76e0bc | 537 | of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); |
1da177e4 | 538 | |
b8c49def | 539 | tbl->it_busno = phb->bus->number; |
3a553170 | 540 | tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; |
1da177e4 | 541 | tbl->it_base = 0; |
1da177e4 LT |
542 | tbl->it_blocksize = 16; |
543 | tbl->it_type = TCE_PCI; | |
3a553170 AP |
544 | tbl->it_offset = offset >> tbl->it_page_shift; |
545 | tbl->it_size = size >> tbl->it_page_shift; | |
b6e1f6ad AK |
546 | |
547 | table_group->tce32_start = offset; | |
548 | table_group->tce32_size = size; | |
1da177e4 LT |
549 | } |
550 | ||
da004c36 AK |
551 | struct iommu_table_ops iommu_table_pseries_ops = { |
552 | .set = tce_build_pSeries, | |
553 | .clear = tce_free_pSeries, | |
554 | .get = tce_get_pseries | |
555 | }; | |
556 | ||
12d04eef | 557 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
1da177e4 | 558 | { |
3c2822cc | 559 | struct device_node *dn; |
1da177e4 | 560 | struct iommu_table *tbl; |
3c2822cc OJ |
561 | struct device_node *isa_dn, *isa_dn_orig; |
562 | struct device_node *tmp; | |
563 | struct pci_dn *pci; | |
564 | int children; | |
1da177e4 | 565 | |
3c2822cc | 566 | dn = pci_bus_to_OF_node(bus); |
12d04eef | 567 | |
b7c670d6 | 568 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); |
3c2822cc OJ |
569 | |
570 | if (bus->self) { | |
571 | /* This is not a root bus, any setup will be done for the | |
572 | * device-side of the bridge in iommu_dev_setup_pSeries(). | |
573 | */ | |
574 | return; | |
575 | } | |
12d04eef | 576 | pci = PCI_DN(dn); |
3c2822cc OJ |
577 | |
578 | /* Check if the ISA bus on the system is under | |
579 | * this PHB. | |
1da177e4 | 580 | */ |
3c2822cc | 581 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); |
1da177e4 | 582 | |
3c2822cc OJ |
583 | while (isa_dn && isa_dn != dn) |
584 | isa_dn = isa_dn->parent; | |
585 | ||
498b6514 | 586 | of_node_put(isa_dn_orig); |
1da177e4 | 587 | |
d3c58fb1 | 588 | /* Count number of direct PCI children of the PHB. */ |
3c2822cc | 589 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
d3c58fb1 | 590 | children++; |
1da177e4 | 591 | |
f7ebf352 | 592 | pr_debug("Children: %d\n", children); |
1da177e4 | 593 | |
3c2822cc OJ |
594 | /* Calculate amount of DMA window per slot. Each window must be |
595 | * a power of two (due to pci_alloc_consistent requirements). | |
596 | * | |
597 | * Keep 256MB aside for PHBs with ISA. | |
598 | */ | |
1da177e4 | 599 | |
3c2822cc OJ |
600 | if (!isa_dn) { |
601 | /* No ISA/IDE - just set window size and return */ | |
602 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ | |
603 | ||
604 | while (pci->phb->dma_window_size * children > 0x80000000ul) | |
605 | pci->phb->dma_window_size >>= 1; | |
41febbc8 | 606 | pr_debug("No ISA/IDE, window size is 0x%llx\n", |
f7ebf352 | 607 | pci->phb->dma_window_size); |
3c2822cc OJ |
608 | pci->phb->dma_window_base_cur = 0; |
609 | ||
610 | return; | |
1da177e4 | 611 | } |
3c2822cc OJ |
612 | |
613 | /* If we have ISA, then we probably have an IDE | |
614 | * controller too. Allocate a 128MB table but | |
615 | * skip the first 128MB to avoid stepping on ISA | |
616 | * space. | |
617 | */ | |
618 | pci->phb->dma_window_size = 0x8000000ul; | |
619 | pci->phb->dma_window_base_cur = 0x8000000ul; | |
620 | ||
b348aa65 AK |
621 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); |
622 | tbl = pci->table_group->tables[0]; | |
3c2822cc OJ |
623 | |
624 | iommu_table_setparms(pci->phb, dn, tbl); | |
da004c36 | 625 | tbl->it_ops = &iommu_table_pseries_ops; |
b348aa65 | 626 | iommu_init_table(tbl, pci->phb->node); |
3c2822cc OJ |
627 | |
628 | /* Divide the rest (1.75GB) among the children */ | |
629 | pci->phb->dma_window_size = 0x80000000ul; | |
630 | while (pci->phb->dma_window_size * children > 0x70000000ul) | |
631 | pci->phb->dma_window_size >>= 1; | |
632 | ||
41febbc8 | 633 | pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); |
1da177e4 LT |
634 | } |
635 | ||
b6e1f6ad AK |
636 | #ifdef CONFIG_IOMMU_API |
637 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned | |
638 | long *tce, enum dma_data_direction *direction) | |
639 | { | |
640 | long rc; | |
641 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; | |
642 | unsigned long flags, oldtce = 0; | |
643 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
644 | unsigned long newtce = *tce | proto_tce; | |
645 | ||
646 | spin_lock_irqsave(&tbl->large_pool.lock, flags); | |
647 | ||
648 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); | |
649 | if (!rc) | |
650 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); | |
651 | ||
652 | if (!rc) { | |
653 | *direction = iommu_tce_direction(oldtce); | |
654 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
655 | } | |
656 | ||
657 | spin_unlock_irqrestore(&tbl->large_pool.lock, flags); | |
658 | ||
659 | return rc; | |
660 | } | |
661 | #endif | |
662 | ||
da004c36 AK |
663 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
664 | .set = tce_buildmulti_pSeriesLP, | |
b6e1f6ad AK |
665 | #ifdef CONFIG_IOMMU_API |
666 | .exchange = tce_exchange_pseries, | |
667 | #endif | |
da004c36 AK |
668 | .clear = tce_freemulti_pSeriesLP, |
669 | .get = tce_get_pSeriesLP | |
670 | }; | |
1da177e4 | 671 | |
12d04eef | 672 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
1da177e4 LT |
673 | { |
674 | struct iommu_table *tbl; | |
675 | struct device_node *dn, *pdn; | |
1635317f | 676 | struct pci_dn *ppci; |
2083f681 | 677 | const __be32 *dma_window = NULL; |
1da177e4 | 678 | |
1da177e4 LT |
679 | dn = pci_bus_to_OF_node(bus); |
680 | ||
b7c670d6 RH |
681 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", |
682 | dn); | |
12d04eef | 683 | |
1da177e4 LT |
684 | /* Find nearest ibm,dma-window, walking up the device tree */ |
685 | for (pdn = dn; pdn != NULL; pdn = pdn->parent) { | |
e2eb6392 | 686 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
687 | if (dma_window != NULL) |
688 | break; | |
689 | } | |
690 | ||
691 | if (dma_window == NULL) { | |
f7ebf352 | 692 | pr_debug(" no ibm,dma-window property !\n"); |
1da177e4 LT |
693 | return; |
694 | } | |
695 | ||
e07102db | 696 | ppci = PCI_DN(pdn); |
12d04eef | 697 | |
b7c670d6 RH |
698 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n", |
699 | pdn, ppci->table_group); | |
12d04eef | 700 | |
b348aa65 AK |
701 | if (!ppci->table_group) { |
702 | ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); | |
703 | tbl = ppci->table_group->tables[0]; | |
b6e1f6ad AK |
704 | iommu_table_setparms_lpar(ppci->phb, pdn, tbl, |
705 | ppci->table_group, dma_window); | |
da004c36 | 706 | tbl->it_ops = &iommu_table_lpar_multi_ops; |
b348aa65 AK |
707 | iommu_init_table(tbl, ppci->phb->node); |
708 | iommu_register_group(ppci->table_group, | |
709 | pci_domain_nr(bus), 0); | |
710 | pr_debug(" created table: %p\n", ppci->table_group); | |
1da177e4 | 711 | } |
1da177e4 LT |
712 | } |
713 | ||
714 | ||
12d04eef | 715 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
1da177e4 | 716 | { |
12d04eef | 717 | struct device_node *dn; |
3c2822cc | 718 | struct iommu_table *tbl; |
1da177e4 | 719 | |
f7ebf352 | 720 | pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); |
1da177e4 | 721 | |
58f9b0b0 | 722 | dn = dev->dev.of_node; |
1da177e4 | 723 | |
3c2822cc OJ |
724 | /* If we're the direct child of a root bus, then we need to allocate |
725 | * an iommu table ourselves. The bus setup code should have setup | |
726 | * the window sizes already. | |
727 | */ | |
728 | if (!dev->bus->self) { | |
12d04eef BH |
729 | struct pci_controller *phb = PCI_DN(dn)->phb; |
730 | ||
f7ebf352 | 731 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); |
b348aa65 AK |
732 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); |
733 | tbl = PCI_DN(dn)->table_group->tables[0]; | |
12d04eef | 734 | iommu_table_setparms(phb, dn, tbl); |
da004c36 | 735 | tbl->it_ops = &iommu_table_pseries_ops; |
b348aa65 | 736 | iommu_init_table(tbl, phb->node); |
4617082e | 737 | set_iommu_table_base(&dev->dev, tbl); |
3c2822cc OJ |
738 | return; |
739 | } | |
740 | ||
741 | /* If this device is further down the bus tree, search upwards until | |
742 | * an already allocated iommu table is found and use that. | |
743 | */ | |
744 | ||
b348aa65 | 745 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
1da177e4 LT |
746 | dn = dn->parent; |
747 | ||
c409c631 | 748 | if (dn && PCI_DN(dn)) |
b348aa65 AK |
749 | set_iommu_table_base(&dev->dev, |
750 | PCI_DN(dn)->table_group->tables[0]); | |
c409c631 | 751 | else |
12d04eef BH |
752 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n", |
753 | pci_name(dev)); | |
1da177e4 LT |
754 | } |
755 | ||
4e8b0cf4 NA |
756 | static int __read_mostly disable_ddw; |
757 | ||
758 | static int __init disable_ddw_setup(char *str) | |
759 | { | |
760 | disable_ddw = 1; | |
761 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
762 | ||
763 | return 0; | |
764 | } | |
765 | ||
766 | early_param("disable_ddw", disable_ddw_setup); | |
767 | ||
5efbabe0 | 768 | static void remove_ddw(struct device_node *np, bool remove_prop) |
4e8b0cf4 NA |
769 | { |
770 | struct dynamic_dma_window_prop *dwp; | |
771 | struct property *win64; | |
9410e018 | 772 | u32 ddw_avail[3]; |
4e8b0cf4 | 773 | u64 liobn; |
9410e018 AK |
774 | int ret = 0; |
775 | ||
776 | ret = of_property_read_u32_array(np, "ibm,ddw-applicable", | |
777 | &ddw_avail[0], 3); | |
4e8b0cf4 | 778 | |
4e8b0cf4 | 779 | win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); |
2573f684 | 780 | if (!win64) |
4e8b0cf4 NA |
781 | return; |
782 | ||
9410e018 | 783 | if (ret || win64->length < sizeof(*dwp)) |
2573f684 MM |
784 | goto delprop; |
785 | ||
4e8b0cf4 NA |
786 | dwp = win64->value; |
787 | liobn = (u64)be32_to_cpu(dwp->liobn); | |
788 | ||
789 | /* clear the whole window, note the arg is in kernel pages */ | |
790 | ret = tce_clearrange_multi_pSeriesLP(0, | |
791 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
792 | if (ret) | |
f2c2cbcc JP |
793 | pr_warn("%pOF failed to clear tces in window.\n", |
794 | np); | |
4e8b0cf4 | 795 | else |
b7c670d6 RH |
796 | pr_debug("%pOF successfully cleared tces in window.\n", |
797 | np); | |
4e8b0cf4 | 798 | |
ae69e1ed NA |
799 | ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn); |
800 | if (ret) | |
f2c2cbcc | 801 | pr_warn("%pOF: failed to remove direct window: rtas returned " |
ae69e1ed | 802 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
b7c670d6 | 803 | np, ret, ddw_avail[2], liobn); |
ae69e1ed | 804 | else |
b7c670d6 | 805 | pr_debug("%pOF: successfully removed direct window: rtas returned " |
ae69e1ed | 806 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
b7c670d6 | 807 | np, ret, ddw_avail[2], liobn); |
4e8b0cf4 | 808 | |
2573f684 | 809 | delprop: |
5efbabe0 GS |
810 | if (remove_prop) |
811 | ret = of_remove_property(np, win64); | |
2573f684 | 812 | if (ret) |
f2c2cbcc | 813 | pr_warn("%pOF: failed to remove direct window property: %d\n", |
b7c670d6 | 814 | np, ret); |
2573f684 | 815 | } |
4e8b0cf4 | 816 | |
b73a635f | 817 | static u64 find_existing_ddw(struct device_node *pdn) |
4e8b0cf4 | 818 | { |
4e8b0cf4 NA |
819 | struct direct_window *window; |
820 | const struct dynamic_dma_window_prop *direct64; | |
821 | u64 dma_addr = 0; | |
822 | ||
4e8b0cf4 NA |
823 | spin_lock(&direct_window_list_lock); |
824 | /* check if we already created a window and dupe that config if so */ | |
825 | list_for_each_entry(window, &direct_window_list, list) { | |
826 | if (window->device == pdn) { | |
827 | direct64 = window->prop; | |
df015604 | 828 | dma_addr = be64_to_cpu(direct64->dma_base); |
4e8b0cf4 NA |
829 | break; |
830 | } | |
831 | } | |
832 | spin_unlock(&direct_window_list_lock); | |
833 | ||
834 | return dma_addr; | |
835 | } | |
836 | ||
c8566780 | 837 | static int find_existing_ddw_windows(void) |
4e8b0cf4 | 838 | { |
97e7dc52 | 839 | int len; |
c8566780 | 840 | struct device_node *pdn; |
97e7dc52 | 841 | struct direct_window *window; |
4e8b0cf4 | 842 | const struct dynamic_dma_window_prop *direct64; |
4e8b0cf4 | 843 | |
c8566780 MM |
844 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
845 | return 0; | |
846 | ||
847 | for_each_node_with_property(pdn, DIRECT64_PROPNAME) { | |
97e7dc52 | 848 | direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); |
c8566780 MM |
849 | if (!direct64) |
850 | continue; | |
851 | ||
97e7dc52 NA |
852 | window = kzalloc(sizeof(*window), GFP_KERNEL); |
853 | if (!window || len < sizeof(struct dynamic_dma_window_prop)) { | |
854 | kfree(window); | |
5efbabe0 | 855 | remove_ddw(pdn, true); |
97e7dc52 NA |
856 | continue; |
857 | } | |
c8566780 | 858 | |
97e7dc52 NA |
859 | window->device = pdn; |
860 | window->prop = direct64; | |
861 | spin_lock(&direct_window_list_lock); | |
862 | list_add(&window->list, &direct_window_list); | |
863 | spin_unlock(&direct_window_list_lock); | |
4e8b0cf4 NA |
864 | } |
865 | ||
c8566780 | 866 | return 0; |
4e8b0cf4 | 867 | } |
c8566780 | 868 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
4e8b0cf4 | 869 | |
b73a635f | 870 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
871 | struct ddw_query_response *query) |
872 | { | |
8445a87f GP |
873 | struct device_node *dn; |
874 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
875 | u32 cfg_addr; |
876 | u64 buid; | |
877 | int ret; | |
878 | ||
879 | /* | |
880 | * Get the config address and phb buid of the PE window. | |
881 | * Rely on eeh to retrieve this for us. | |
882 | * Retrieve them from the pci device, not the node with the | |
883 | * dma-window property | |
884 | */ | |
8445a87f GP |
885 | dn = pci_device_to_OF_node(dev); |
886 | pdn = PCI_DN(dn); | |
887 | buid = pdn->phb->buid; | |
8a934efe | 888 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
39baadbf | 889 | |
b73a635f | 890 | ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query, |
4e8b0cf4 NA |
891 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
892 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" | |
b73a635f | 893 | " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid), |
4e8b0cf4 NA |
894 | BUID_LO(buid), ret); |
895 | return ret; | |
896 | } | |
897 | ||
b73a635f | 898 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
899 | struct ddw_create_response *create, int page_shift, |
900 | int window_shift) | |
901 | { | |
8445a87f GP |
902 | struct device_node *dn; |
903 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
904 | u32 cfg_addr; |
905 | u64 buid; | |
906 | int ret; | |
907 | ||
908 | /* | |
909 | * Get the config address and phb buid of the PE window. | |
910 | * Rely on eeh to retrieve this for us. | |
911 | * Retrieve them from the pci device, not the node with the | |
912 | * dma-window property | |
913 | */ | |
8445a87f GP |
914 | dn = pci_device_to_OF_node(dev); |
915 | pdn = PCI_DN(dn); | |
916 | buid = pdn->phb->buid; | |
8a934efe | 917 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
4e8b0cf4 NA |
918 | |
919 | do { | |
920 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
9410e018 AK |
921 | ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, |
922 | cfg_addr, BUID_HI(buid), BUID_LO(buid), | |
923 | page_shift, window_shift); | |
4e8b0cf4 NA |
924 | } while (rtas_busy_delay(ret)); |
925 | dev_info(&dev->dev, | |
926 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
b73a635f | 927 | "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1], |
4e8b0cf4 NA |
928 | cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, |
929 | window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); | |
930 | ||
931 | return ret; | |
932 | } | |
933 | ||
61435690 NA |
934 | struct failed_ddw_pdn { |
935 | struct device_node *pdn; | |
936 | struct list_head list; | |
937 | }; | |
938 | ||
939 | static LIST_HEAD(failed_ddw_pdn_list); | |
940 | ||
68c0449e AK |
941 | static phys_addr_t ddw_memory_hotplug_max(void) |
942 | { | |
943 | phys_addr_t max_addr = memory_hotplug_max(); | |
944 | struct device_node *memory; | |
945 | ||
946 | for_each_node_by_type(memory, "memory") { | |
947 | unsigned long start, size; | |
948 | int ranges, n_mem_addr_cells, n_mem_size_cells, len; | |
949 | const __be32 *memcell_buf; | |
950 | ||
951 | memcell_buf = of_get_property(memory, "reg", &len); | |
952 | if (!memcell_buf || len <= 0) | |
953 | continue; | |
954 | ||
955 | n_mem_addr_cells = of_n_addr_cells(memory); | |
956 | n_mem_size_cells = of_n_size_cells(memory); | |
957 | ||
958 | /* ranges in cell */ | |
959 | ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); | |
960 | ||
961 | start = of_read_number(memcell_buf, n_mem_addr_cells); | |
962 | memcell_buf += n_mem_addr_cells; | |
963 | size = of_read_number(memcell_buf, n_mem_size_cells); | |
964 | memcell_buf += n_mem_size_cells; | |
965 | ||
966 | max_addr = max_t(phys_addr_t, max_addr, start + size); | |
967 | } | |
968 | ||
969 | return max_addr; | |
970 | } | |
971 | ||
4e8b0cf4 NA |
972 | /* |
973 | * If the PE supports dynamic dma windows, and there is space for a table | |
974 | * that can map all pages in a linear offset, then setup such a table, | |
975 | * and record the dma-offset in the struct device. | |
976 | * | |
977 | * dev: the pci device we are checking | |
978 | * pdn: the parent pe node with the ibm,dma_window property | |
979 | * Future: also check if we can remap the base window for our base page size | |
980 | * | |
9ae2fdde | 981 | * returns the dma offset for use by the direct mapped DMA code. |
4e8b0cf4 NA |
982 | */ |
983 | static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) | |
984 | { | |
985 | int len, ret; | |
986 | struct ddw_query_response query; | |
987 | struct ddw_create_response create; | |
988 | int page_shift; | |
989 | u64 dma_addr, max_addr; | |
990 | struct device_node *dn; | |
9410e018 | 991 | u32 ddw_avail[3]; |
4e8b0cf4 | 992 | struct direct_window *window; |
76730334 | 993 | struct property *win64; |
4e8b0cf4 | 994 | struct dynamic_dma_window_prop *ddwprop; |
61435690 | 995 | struct failed_ddw_pdn *fpdn; |
4e8b0cf4 NA |
996 | |
997 | mutex_lock(&direct_window_init_mutex); | |
998 | ||
b73a635f | 999 | dma_addr = find_existing_ddw(pdn); |
4e8b0cf4 NA |
1000 | if (dma_addr != 0) |
1001 | goto out_unlock; | |
1002 | ||
61435690 NA |
1003 | /* |
1004 | * If we already went through this for a previous function of | |
1005 | * the same device and failed, we don't want to muck with the | |
1006 | * DMA window again, as it will race with in-flight operations | |
1007 | * and can lead to EEHs. The above mutex protects access to the | |
1008 | * list. | |
1009 | */ | |
1010 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
b7c670d6 | 1011 | if (fpdn->pdn == pdn) |
61435690 NA |
1012 | goto out_unlock; |
1013 | } | |
1014 | ||
4e8b0cf4 NA |
1015 | /* |
1016 | * the ibm,ddw-applicable property holds the tokens for: | |
1017 | * ibm,query-pe-dma-window | |
1018 | * ibm,create-pe-dma-window | |
1019 | * ibm,remove-pe-dma-window | |
1020 | * for the given node in that order. | |
1021 | * the property is actually in the parent, not the PE | |
1022 | */ | |
9410e018 AK |
1023 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", |
1024 | &ddw_avail[0], 3); | |
1025 | if (ret) | |
ae69e1ed | 1026 | goto out_failed; |
25ebc45b | 1027 | |
ae69e1ed | 1028 | /* |
4e8b0cf4 NA |
1029 | * Query if there is a second window of size to map the |
1030 | * whole partition. Query returns number of windows, largest | |
1031 | * block assigned to PE (partition endpoint), and two bitmasks | |
1032 | * of page sizes: supported and supported for migrate-dma. | |
1033 | */ | |
1034 | dn = pci_device_to_OF_node(dev); | |
b73a635f | 1035 | ret = query_ddw(dev, ddw_avail, &query); |
4e8b0cf4 | 1036 | if (ret != 0) |
ae69e1ed | 1037 | goto out_failed; |
4e8b0cf4 NA |
1038 | |
1039 | if (query.windows_available == 0) { | |
1040 | /* | |
1041 | * no additional windows are available for this device. | |
1042 | * We might be able to reallocate the existing window, | |
1043 | * trading in for a larger page size. | |
1044 | */ | |
1045 | dev_dbg(&dev->dev, "no free dynamic windows"); | |
ae69e1ed | 1046 | goto out_failed; |
4e8b0cf4 | 1047 | } |
9410e018 | 1048 | if (query.page_size & 4) { |
4e8b0cf4 | 1049 | page_shift = 24; /* 16MB */ |
9410e018 | 1050 | } else if (query.page_size & 2) { |
4e8b0cf4 | 1051 | page_shift = 16; /* 64kB */ |
9410e018 | 1052 | } else if (query.page_size & 1) { |
4e8b0cf4 NA |
1053 | page_shift = 12; /* 4kB */ |
1054 | } else { | |
1055 | dev_dbg(&dev->dev, "no supported direct page size in mask %x", | |
1056 | query.page_size); | |
ae69e1ed | 1057 | goto out_failed; |
4e8b0cf4 NA |
1058 | } |
1059 | /* verify the window * number of ptes will map the partition */ | |
1060 | /* check largest block * page size > max memory hotplug addr */ | |
68c0449e | 1061 | max_addr = ddw_memory_hotplug_max(); |
9410e018 | 1062 | if (query.largest_available_block < (max_addr >> page_shift)) { |
8ab102d6 | 1063 | dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u " |
4e8b0cf4 NA |
1064 | "%llu-sized pages\n", max_addr, query.largest_available_block, |
1065 | 1ULL << page_shift); | |
ae69e1ed | 1066 | goto out_failed; |
4e8b0cf4 NA |
1067 | } |
1068 | len = order_base_2(max_addr); | |
1069 | win64 = kzalloc(sizeof(struct property), GFP_KERNEL); | |
1070 | if (!win64) { | |
1071 | dev_info(&dev->dev, | |
1072 | "couldn't allocate property for 64bit dma window\n"); | |
ae69e1ed | 1073 | goto out_failed; |
4e8b0cf4 NA |
1074 | } |
1075 | win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); | |
1076 | win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); | |
76730334 | 1077 | win64->length = sizeof(*ddwprop); |
4e8b0cf4 NA |
1078 | if (!win64->name || !win64->value) { |
1079 | dev_info(&dev->dev, | |
1080 | "couldn't allocate property name and value\n"); | |
1081 | goto out_free_prop; | |
1082 | } | |
1083 | ||
b73a635f | 1084 | ret = create_ddw(dev, ddw_avail, &create, page_shift, len); |
4e8b0cf4 NA |
1085 | if (ret != 0) |
1086 | goto out_free_prop; | |
1087 | ||
9410e018 AK |
1088 | ddwprop->liobn = cpu_to_be32(create.liobn); |
1089 | ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | | |
1090 | create.addr_lo); | |
4e8b0cf4 NA |
1091 | ddwprop->tce_shift = cpu_to_be32(page_shift); |
1092 | ddwprop->window_shift = cpu_to_be32(len); | |
1093 | ||
b7c670d6 RH |
1094 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", |
1095 | create.liobn, dn); | |
4e8b0cf4 NA |
1096 | |
1097 | window = kzalloc(sizeof(*window), GFP_KERNEL); | |
1098 | if (!window) | |
1099 | goto out_clear_window; | |
1100 | ||
1101 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
1102 | win64->value, tce_setrange_multi_pSeriesLP_walk); | |
1103 | if (ret) { | |
b7c670d6 RH |
1104 | dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", |
1105 | dn, ret); | |
7a19081f | 1106 | goto out_free_window; |
4e8b0cf4 NA |
1107 | } |
1108 | ||
79d1c712 | 1109 | ret = of_add_property(pdn, win64); |
4e8b0cf4 | 1110 | if (ret) { |
b7c670d6 RH |
1111 | dev_err(&dev->dev, "unable to add dma window property for %pOF: %d", |
1112 | pdn, ret); | |
7a19081f | 1113 | goto out_free_window; |
4e8b0cf4 NA |
1114 | } |
1115 | ||
1116 | window->device = pdn; | |
1117 | window->prop = ddwprop; | |
1118 | spin_lock(&direct_window_list_lock); | |
1119 | list_add(&window->list, &direct_window_list); | |
1120 | spin_unlock(&direct_window_list_lock); | |
1121 | ||
9410e018 | 1122 | dma_addr = be64_to_cpu(ddwprop->dma_base); |
4e8b0cf4 NA |
1123 | goto out_unlock; |
1124 | ||
7a19081f JL |
1125 | out_free_window: |
1126 | kfree(window); | |
1127 | ||
4e8b0cf4 | 1128 | out_clear_window: |
5efbabe0 | 1129 | remove_ddw(pdn, true); |
4e8b0cf4 NA |
1130 | |
1131 | out_free_prop: | |
1132 | kfree(win64->name); | |
1133 | kfree(win64->value); | |
1134 | kfree(win64); | |
1135 | ||
ae69e1ed | 1136 | out_failed: |
25ebc45b | 1137 | |
61435690 NA |
1138 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); |
1139 | if (!fpdn) | |
1140 | goto out_unlock; | |
1141 | fpdn->pdn = pdn; | |
1142 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
1143 | ||
4e8b0cf4 NA |
1144 | out_unlock: |
1145 | mutex_unlock(&direct_window_init_mutex); | |
1146 | return dma_addr; | |
1147 | } | |
1148 | ||
12d04eef | 1149 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1da177e4 LT |
1150 | { |
1151 | struct device_node *pdn, *dn; | |
1152 | struct iommu_table *tbl; | |
2083f681 | 1153 | const __be32 *dma_window = NULL; |
1635317f | 1154 | struct pci_dn *pci; |
1da177e4 | 1155 | |
f7ebf352 | 1156 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); |
12d04eef | 1157 | |
1da177e4 | 1158 | /* dev setup for LPAR is a little tricky, since the device tree might |
25985edc | 1159 | * contain the dma-window properties per-device and not necessarily |
1da177e4 LT |
1160 | * for the bus. So we need to search upwards in the tree until we |
1161 | * either hit a dma-window property, OR find a parent with a table | |
1162 | * already allocated. | |
1163 | */ | |
1164 | dn = pci_device_to_OF_node(dev); | |
b7c670d6 | 1165 | pr_debug(" node is %pOF\n", dn); |
5d2efba6 | 1166 | |
b348aa65 | 1167 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; |
1635317f | 1168 | pdn = pdn->parent) { |
e2eb6392 | 1169 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); |
1da177e4 LT |
1170 | if (dma_window) |
1171 | break; | |
1172 | } | |
1173 | ||
650f7b3b LV |
1174 | if (!pdn || !PCI_DN(pdn)) { |
1175 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " | |
b7c670d6 RH |
1176 | "no DMA window found for pci dev=%s dn=%pOF\n", |
1177 | pci_name(dev), dn); | |
650f7b3b LV |
1178 | return; |
1179 | } | |
b7c670d6 | 1180 | pr_debug(" parent is %pOF\n", pdn); |
12d04eef | 1181 | |
e07102db | 1182 | pci = PCI_DN(pdn); |
b348aa65 AK |
1183 | if (!pci->table_group) { |
1184 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); | |
1185 | tbl = pci->table_group->tables[0]; | |
b6e1f6ad AK |
1186 | iommu_table_setparms_lpar(pci->phb, pdn, tbl, |
1187 | pci->table_group, dma_window); | |
da004c36 | 1188 | tbl->it_ops = &iommu_table_lpar_multi_ops; |
b348aa65 AK |
1189 | iommu_init_table(tbl, pci->phb->node); |
1190 | iommu_register_group(pci->table_group, | |
1191 | pci_domain_nr(pci->phb->bus), 0); | |
1192 | pr_debug(" created table: %p\n", pci->table_group); | |
de113217 | 1193 | } else { |
b348aa65 | 1194 | pr_debug(" found DMA window, table: %p\n", pci->table_group); |
1da177e4 LT |
1195 | } |
1196 | ||
b348aa65 | 1197 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
c4e9d3c1 | 1198 | iommu_add_device(pci->table_group, &dev->dev); |
1da177e4 | 1199 | } |
4e8b0cf4 | 1200 | |
9ae2fdde | 1201 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
4e8b0cf4 | 1202 | { |
9ae2fdde | 1203 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
2083f681 | 1204 | const __be32 *dma_window = NULL; |
4e8b0cf4 | 1205 | |
4e8b0cf4 | 1206 | /* only attempt to use a new window if 64-bit DMA is requested */ |
9ae2fdde CH |
1207 | if (dma_mask < DMA_BIT_MASK(64)) |
1208 | return false; | |
4e8b0cf4 | 1209 | |
9ae2fdde | 1210 | dev_dbg(&pdev->dev, "node is %pOF\n", dn); |
4e8b0cf4 | 1211 | |
9ae2fdde CH |
1212 | /* |
1213 | * the device tree might contain the dma-window properties | |
1214 | * per-device and not necessarily for the bus. So we need to | |
1215 | * search upwards in the tree until we either hit a dma-window | |
1216 | * property, OR find a parent with a table already allocated. | |
1217 | */ | |
1218 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; | |
1219 | pdn = pdn->parent) { | |
1220 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); | |
1221 | if (dma_window) | |
1222 | break; | |
4e8b0cf4 NA |
1223 | } |
1224 | ||
9ae2fdde | 1225 | if (pdn && PCI_DN(pdn)) { |
0617fc0c CH |
1226 | pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn); |
1227 | if (pdev->dev.archdata.dma_offset) | |
9ae2fdde | 1228 | return true; |
6a5c7be5 MM |
1229 | } |
1230 | ||
9ae2fdde | 1231 | return false; |
6a5c7be5 MM |
1232 | } |
1233 | ||
4e8b0cf4 NA |
1234 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
1235 | void *data) | |
1236 | { | |
1237 | struct direct_window *window; | |
1238 | struct memory_notify *arg = data; | |
1239 | int ret = 0; | |
1240 | ||
1241 | switch (action) { | |
1242 | case MEM_GOING_ONLINE: | |
1243 | spin_lock(&direct_window_list_lock); | |
1244 | list_for_each_entry(window, &direct_window_list, list) { | |
1245 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | |
1246 | arg->nr_pages, window->prop); | |
1247 | /* XXX log error */ | |
1248 | } | |
1249 | spin_unlock(&direct_window_list_lock); | |
1250 | break; | |
1251 | case MEM_CANCEL_ONLINE: | |
1252 | case MEM_OFFLINE: | |
1253 | spin_lock(&direct_window_list_lock); | |
1254 | list_for_each_entry(window, &direct_window_list, list) { | |
1255 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | |
1256 | arg->nr_pages, window->prop); | |
1257 | /* XXX log error */ | |
1258 | } | |
1259 | spin_unlock(&direct_window_list_lock); | |
1260 | break; | |
1261 | default: | |
1262 | break; | |
1263 | } | |
1264 | if (ret && action != MEM_CANCEL_ONLINE) | |
1265 | return NOTIFY_BAD; | |
1266 | ||
1267 | return NOTIFY_OK; | |
1268 | } | |
1269 | ||
1270 | static struct notifier_block iommu_mem_nb = { | |
1271 | .notifier_call = iommu_mem_notifier, | |
1272 | }; | |
1273 | ||
f5242e5a | 1274 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
bed59275 SR |
1275 | { |
1276 | int err = NOTIFY_OK; | |
f5242e5a GL |
1277 | struct of_reconfig_data *rd = data; |
1278 | struct device_node *np = rd->dn; | |
bed59275 | 1279 | struct pci_dn *pci = PCI_DN(np); |
4e8b0cf4 | 1280 | struct direct_window *window; |
bed59275 SR |
1281 | |
1282 | switch (action) { | |
1cf3d8b3 | 1283 | case OF_RECONFIG_DETACH_NODE: |
5efbabe0 GS |
1284 | /* |
1285 | * Removing the property will invoke the reconfig | |
1286 | * notifier again, which causes dead-lock on the | |
1287 | * read-write semaphore of the notifier chain. So | |
1288 | * we have to remove the property when releasing | |
1289 | * the device node. | |
1290 | */ | |
1291 | remove_ddw(np, false); | |
b348aa65 AK |
1292 | if (pci && pci->table_group) |
1293 | iommu_pseries_free_group(pci->table_group, | |
ac9a5889 | 1294 | np->full_name); |
4e8b0cf4 NA |
1295 | |
1296 | spin_lock(&direct_window_list_lock); | |
1297 | list_for_each_entry(window, &direct_window_list, list) { | |
1298 | if (window->device == np) { | |
1299 | list_del(&window->list); | |
1300 | kfree(window); | |
1301 | break; | |
1302 | } | |
1303 | } | |
1304 | spin_unlock(&direct_window_list_lock); | |
bed59275 SR |
1305 | break; |
1306 | default: | |
1307 | err = NOTIFY_DONE; | |
1308 | break; | |
1309 | } | |
1310 | return err; | |
1311 | } | |
1312 | ||
1313 | static struct notifier_block iommu_reconfig_nb = { | |
1314 | .notifier_call = iommu_reconfig_notifier, | |
1315 | }; | |
1da177e4 | 1316 | |
1da177e4 LT |
1317 | /* These are called very early. */ |
1318 | void iommu_init_early_pSeries(void) | |
1319 | { | |
a8daac8a | 1320 | if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) |
1da177e4 | 1321 | return; |
1da177e4 | 1322 | |
57cfb814 | 1323 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
38ae9ec4 DA |
1324 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
1325 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | |
9ae2fdde CH |
1326 | if (!disable_ddw) |
1327 | pseries_pci_controller_ops.iommu_bypass_supported = | |
1328 | iommu_bypass_supported_pSeriesLP; | |
1da177e4 | 1329 | } else { |
38ae9ec4 DA |
1330 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
1331 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; | |
1da177e4 LT |
1332 | } |
1333 | ||
1334 | ||
1cf3d8b3 | 1335 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
4e8b0cf4 | 1336 | register_memory_notifier(&iommu_mem_nb); |
1da177e4 | 1337 | |
98747770 | 1338 | set_pci_dma_ops(&dma_iommu_ops); |
1da177e4 LT |
1339 | } |
1340 | ||
4e89a2d8 WS |
1341 | static int __init disable_multitce(char *str) |
1342 | { | |
1343 | if (strcmp(str, "off") == 0 && | |
1344 | firmware_has_feature(FW_FEATURE_LPAR) && | |
1345 | firmware_has_feature(FW_FEATURE_MULTITCE)) { | |
1346 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); | |
4e89a2d8 WS |
1347 | powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; |
1348 | } | |
1349 | return 1; | |
1350 | } | |
1351 | ||
1352 | __setup("multitce=", disable_multitce); | |
4ad04e59 | 1353 | |
c4e9d3c1 AK |
1354 | static int tce_iommu_bus_notifier(struct notifier_block *nb, |
1355 | unsigned long action, void *data) | |
1356 | { | |
1357 | struct device *dev = data; | |
1358 | ||
1359 | switch (action) { | |
1360 | case BUS_NOTIFY_DEL_DEVICE: | |
1361 | iommu_del_device(dev); | |
1362 | return 0; | |
1363 | default: | |
1364 | return 0; | |
1365 | } | |
1366 | } | |
1367 | ||
1368 | static struct notifier_block tce_iommu_bus_nb = { | |
1369 | .notifier_call = tce_iommu_bus_notifier, | |
1370 | }; | |
1371 | ||
1372 | static int __init tce_iommu_bus_notifier_init(void) | |
1373 | { | |
1374 | bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); | |
1375 | return 0; | |
1376 | } | |
4ad04e59 | 1377 | machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); |