Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
4 | * | |
bc97ce95 | 5 | * Rewrite, cleanup: |
1da177e4 | 6 | * |
91f14480 | 7 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
bc97ce95 | 8 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
1da177e4 LT |
9 | * |
10 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. | |
1da177e4 LT |
11 | */ |
12 | ||
1da177e4 LT |
13 | #include <linux/init.h> |
14 | #include <linux/types.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/mm.h> | |
beacc6da | 17 | #include <linux/memblock.h> |
1da177e4 LT |
18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> | |
20 | #include <linux/pci.h> | |
21 | #include <linux/dma-mapping.h> | |
62a8bd6c | 22 | #include <linux/crash_dump.h> |
4e8b0cf4 | 23 | #include <linux/memory.h> |
f431a8cd | 24 | #include <linux/vmalloc.h> |
1cf3d8b3 | 25 | #include <linux/of.h> |
2500763d | 26 | #include <linux/of_address.h> |
ac9a5889 | 27 | #include <linux/iommu.h> |
0eaf4def | 28 | #include <linux/rculist.h> |
1da177e4 LT |
29 | #include <asm/io.h> |
30 | #include <asm/prom.h> | |
31 | #include <asm/rtas.h> | |
1da177e4 LT |
32 | #include <asm/iommu.h> |
33 | #include <asm/pci-bridge.h> | |
34 | #include <asm/machdep.h> | |
1ababe11 | 35 | #include <asm/firmware.h> |
c707ffcf | 36 | #include <asm/tce.h> |
d387899f | 37 | #include <asm/ppc-pci.h> |
2249ca9d | 38 | #include <asm/udbg.h> |
4e8b0cf4 | 39 | #include <asm/mmzone.h> |
212bebb4 | 40 | #include <asm/plpar_wrappers.h> |
a1218720 | 41 | |
38ae9ec4 | 42 | #include "pseries.h" |
1da177e4 | 43 | |
cac3e629 LB |
44 | enum { |
45 | DDW_QUERY_PE_DMA_WIN = 0, | |
46 | DDW_CREATE_PE_DMA_WIN = 1, | |
47 | DDW_REMOVE_PE_DMA_WIN = 2, | |
48 | ||
49 | DDW_APPLICABLE_SIZE | |
50 | }; | |
51 | ||
80f02512 LB |
52 | enum { |
53 | DDW_EXT_SIZE = 0, | |
54 | DDW_EXT_RESET_DMA_WIN = 1, | |
55 | DDW_EXT_QUERY_OUT_SIZE = 2 | |
56 | }; | |
57 | ||
4ff8677a | 58 | static struct iommu_table *iommu_pseries_alloc_table(int node) |
b348aa65 | 59 | { |
4dd9eab3 | 60 | struct iommu_table *tbl; |
b348aa65 | 61 | |
b348aa65 AK |
62 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); |
63 | if (!tbl) | |
4ff8677a | 64 | return NULL; |
b348aa65 | 65 | |
0eaf4def | 66 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
e5afdf9d | 67 | kref_init(&tbl->it_kref); |
4ff8677a LB |
68 | return tbl; |
69 | } | |
0eaf4def | 70 | |
af199e6c | 71 | #ifdef CONFIG_IOMMU_API |
b09c031d | 72 | static struct iommu_table_group_ops spapr_tce_table_group_ops; |
af199e6c | 73 | #endif |
b09c031d | 74 | |
4ff8677a LB |
75 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
76 | { | |
77 | struct iommu_table_group *table_group; | |
78 | ||
79 | table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); | |
80 | if (!table_group) | |
81 | return NULL; | |
b348aa65 | 82 | |
9d67c943 AK |
83 | #ifdef CONFIG_IOMMU_API |
84 | table_group->ops = &spapr_tce_table_group_ops; | |
85 | table_group->pgsizes = SZ_4K; | |
86 | #endif | |
87 | ||
4ff8677a LB |
88 | table_group->tables[0] = iommu_pseries_alloc_table(node); |
89 | if (table_group->tables[0]) | |
90 | return table_group; | |
b348aa65 | 91 | |
4dd9eab3 | 92 | kfree(table_group); |
b348aa65 AK |
93 | return NULL; |
94 | } | |
95 | ||
96 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |
ac9a5889 AK |
97 | const char *node_name) |
98 | { | |
b348aa65 AK |
99 | if (!table_group) |
100 | return; | |
101 | ||
ac9a5889 | 102 | #ifdef CONFIG_IOMMU_API |
b348aa65 AK |
103 | if (table_group->group) { |
104 | iommu_group_put(table_group->group); | |
105 | BUG_ON(table_group->group); | |
ac9a5889 AK |
106 | } |
107 | #endif | |
1f7aacc5 GB |
108 | |
109 | /* Default DMA window table is at index 0, while DDW at 1. SR-IOV | |
aed6e494 | 110 | * adapters only have table on index 0(if not direct mapped). |
1f7aacc5 GB |
111 | */ |
112 | if (table_group->tables[0]) | |
113 | iommu_tce_table_put(table_group->tables[0]); | |
114 | ||
115 | if (table_group->tables[1]) | |
116 | iommu_tce_table_put(table_group->tables[1]); | |
b348aa65 AK |
117 | |
118 | kfree(table_group); | |
ac9a5889 AK |
119 | } |
120 | ||
6490c490 | 121 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
bc97ce95 | 122 | long npages, unsigned long uaddr, |
4f3dd8a0 | 123 | enum dma_data_direction direction, |
00085f1e | 124 | unsigned long attrs) |
1da177e4 | 125 | { |
bc97ce95 | 126 | u64 proto_tce; |
c05f57fd | 127 | __be64 *tcep; |
bc97ce95 | 128 | u64 rpn; |
0c634baf LB |
129 | const unsigned long tceshift = tbl->it_page_shift; |
130 | const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); | |
1da177e4 | 131 | |
bc97ce95 | 132 | proto_tce = TCE_PCI_READ; // Read allowed |
1da177e4 LT |
133 | |
134 | if (direction != DMA_TO_DEVICE) | |
bc97ce95 | 135 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 | 136 | |
c05f57fd | 137 | tcep = ((__be64 *)tbl->it_base) + index; |
1da177e4 LT |
138 | |
139 | while (npages--) { | |
95f72d1e | 140 | /* can't move this out since we might cross MEMBLOCK boundary */ |
0c634baf LB |
141 | rpn = __pa(uaddr) >> tceshift; |
142 | *tcep = cpu_to_be64(proto_tce | rpn << tceshift); | |
1da177e4 | 143 | |
0c634baf | 144 | uaddr += pagesize; |
bc97ce95 | 145 | tcep++; |
1da177e4 | 146 | } |
6490c490 | 147 | return 0; |
1da177e4 LT |
148 | } |
149 | ||
150 | ||
f431a8cd | 151 | static void tce_clear_pSeries(struct iommu_table *tbl, long index, long npages) |
1da177e4 | 152 | { |
c05f57fd | 153 | __be64 *tcep; |
1da177e4 | 154 | |
c05f57fd | 155 | tcep = ((__be64 *)tbl->it_base) + index; |
bc97ce95 OJ |
156 | |
157 | while (npages--) | |
158 | *(tcep++) = 0; | |
1da177e4 LT |
159 | } |
160 | ||
5f50867b HM |
161 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
162 | { | |
df015604 | 163 | __be64 *tcep; |
5f50867b | 164 | |
df015604 | 165 | tcep = ((__be64 *)tbl->it_base) + index; |
5f50867b | 166 | |
df015604 | 167 | return be64_to_cpu(*tcep); |
5f50867b | 168 | } |
1da177e4 | 169 | |
af199e6c | 170 | #ifdef CONFIG_IOMMU_API |
f431a8cd SB |
171 | static long pseries_tce_iommu_userspace_view_alloc(struct iommu_table *tbl) |
172 | { | |
173 | unsigned long cb = ALIGN(sizeof(tbl->it_userspace[0]) * tbl->it_size, PAGE_SIZE); | |
174 | unsigned long *uas; | |
175 | ||
176 | if (tbl->it_indirect_levels) /* Impossible */ | |
177 | return -EPERM; | |
178 | ||
179 | WARN_ON(tbl->it_userspace); | |
180 | ||
181 | uas = vzalloc(cb); | |
182 | if (!uas) | |
183 | return -ENOMEM; | |
184 | ||
185 | tbl->it_userspace = (__be64 *) uas; | |
186 | ||
187 | return 0; | |
188 | } | |
af199e6c | 189 | #endif |
f431a8cd SB |
190 | |
191 | static void tce_iommu_userspace_view_free(struct iommu_table *tbl) | |
192 | { | |
193 | vfree(tbl->it_userspace); | |
194 | tbl->it_userspace = NULL; | |
195 | } | |
196 | ||
197 | static void tce_free_pSeries(struct iommu_table *tbl) | |
198 | { | |
199 | if (!tbl->it_userspace) | |
200 | tce_iommu_userspace_view_free(tbl); | |
201 | } | |
202 | ||
0c634baf | 203 | static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); |
6490c490 RJ |
204 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); |
205 | ||
7559d3d2 | 206 | static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
1da177e4 | 207 | long npages, unsigned long uaddr, |
4f3dd8a0 | 208 | enum dma_data_direction direction, |
00085f1e | 209 | unsigned long attrs) |
1da177e4 | 210 | { |
6490c490 | 211 | u64 rc = 0; |
bc97ce95 OJ |
212 | u64 proto_tce, tce; |
213 | u64 rpn; | |
6490c490 RJ |
214 | int ret = 0; |
215 | long tcenum_start = tcenum, npages_start = npages; | |
1da177e4 | 216 | |
7559d3d2 | 217 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 218 | proto_tce = TCE_PCI_READ; |
1da177e4 | 219 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 220 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
221 | |
222 | while (npages--) { | |
0c634baf | 223 | tce = proto_tce | rpn << tceshift; |
7559d3d2 | 224 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); |
bc97ce95 | 225 | |
6490c490 RJ |
226 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
227 | ret = (int)rc; | |
0c634baf | 228 | tce_free_pSeriesLP(liobn, tcenum_start, tceshift, |
6490c490 RJ |
229 | (npages_start - (npages + 1))); |
230 | break; | |
231 | } | |
232 | ||
1da177e4 | 233 | if (rc && printk_ratelimit()) { |
fe333321 | 234 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 235 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 IM |
236 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
237 | printk("\ttce val = 0x%llx\n", tce ); | |
4ff52b4d | 238 | dump_stack(); |
1da177e4 | 239 | } |
bc97ce95 | 240 | |
1da177e4 | 241 | tcenum++; |
bc97ce95 | 242 | rpn++; |
1da177e4 | 243 | } |
6490c490 | 244 | return ret; |
1da177e4 LT |
245 | } |
246 | ||
df015604 | 247 | static DEFINE_PER_CPU(__be64 *, tce_page); |
1da177e4 | 248 | |
6490c490 | 249 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
1da177e4 | 250 | long npages, unsigned long uaddr, |
4f3dd8a0 | 251 | enum dma_data_direction direction, |
00085f1e | 252 | unsigned long attrs) |
1da177e4 | 253 | { |
6490c490 | 254 | u64 rc = 0; |
bc97ce95 | 255 | u64 proto_tce; |
df015604 | 256 | __be64 *tcep; |
bc97ce95 | 257 | u64 rpn; |
1da177e4 | 258 | long l, limit; |
6490c490 RJ |
259 | long tcenum_start = tcenum, npages_start = npages; |
260 | int ret = 0; | |
c1703e85 | 261 | unsigned long flags; |
0c634baf | 262 | const unsigned long tceshift = tbl->it_page_shift; |
1da177e4 | 263 | |
17a0364c | 264 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 | 265 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 266 | tceshift, npages, uaddr, |
6490c490 | 267 | direction, attrs); |
541b2755 | 268 | } |
1da177e4 | 269 | |
c1703e85 AB |
270 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
271 | ||
69111bac | 272 | tcep = __this_cpu_read(tce_page); |
1da177e4 LT |
273 | |
274 | /* This is safe to do since interrupts are off when we're called | |
275 | * from iommu_alloc{,_sg}() | |
276 | */ | |
277 | if (!tcep) { | |
df015604 | 278 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
1da177e4 | 279 | /* If allocation fails, fall back to the loop implementation */ |
541b2755 | 280 | if (!tcep) { |
c1703e85 | 281 | local_irq_restore(flags); |
7559d3d2 | 282 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 283 | tceshift, |
7559d3d2 | 284 | npages, uaddr, direction, attrs); |
541b2755 | 285 | } |
69111bac | 286 | __this_cpu_write(tce_page, tcep); |
1da177e4 LT |
287 | } |
288 | ||
0c634baf | 289 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 290 | proto_tce = TCE_PCI_READ; |
1da177e4 | 291 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 292 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
293 | |
294 | /* We can map max one pageful of TCEs at a time */ | |
295 | do { | |
296 | /* | |
297 | * Set up the page with TCE data, looping through and setting | |
298 | * the values. | |
299 | */ | |
14b5d59a | 300 | limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); |
1da177e4 LT |
301 | |
302 | for (l = 0; l < limit; l++) { | |
0c634baf | 303 | tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); |
bc97ce95 | 304 | rpn++; |
1da177e4 LT |
305 | } |
306 | ||
307 | rc = plpar_tce_put_indirect((u64)tbl->it_index, | |
0c634baf | 308 | (u64)tcenum << tceshift, |
474e3d56 | 309 | (u64)__pa(tcep), |
1da177e4 LT |
310 | limit); |
311 | ||
312 | npages -= limit; | |
313 | tcenum += limit; | |
314 | } while (npages > 0 && !rc); | |
315 | ||
c1703e85 AB |
316 | local_irq_restore(flags); |
317 | ||
6490c490 RJ |
318 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
319 | ret = (int)rc; | |
320 | tce_freemulti_pSeriesLP(tbl, tcenum_start, | |
321 | (npages_start - (npages + limit))); | |
322 | return ret; | |
323 | } | |
324 | ||
1da177e4 | 325 | if (rc && printk_ratelimit()) { |
fe333321 IM |
326 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
327 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
328 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
329 | printk("\ttce[0] val = 0x%llx\n", tcep[0]); | |
4ff52b4d | 330 | dump_stack(); |
1da177e4 | 331 | } |
6490c490 | 332 | return ret; |
1da177e4 LT |
333 | } |
334 | ||
0c634baf LB |
335 | static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
336 | long npages) | |
1da177e4 LT |
337 | { |
338 | u64 rc; | |
1da177e4 | 339 | |
1da177e4 | 340 | while (npages--) { |
0c634baf | 341 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); |
1da177e4 LT |
342 | |
343 | if (rc && printk_ratelimit()) { | |
fe333321 | 344 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 345 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 | 346 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
4ff52b4d | 347 | dump_stack(); |
1da177e4 LT |
348 | } |
349 | ||
350 | tcenum++; | |
351 | } | |
352 | } | |
353 | ||
354 | ||
355 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
356 | { | |
357 | u64 rc; | |
9d2ccf00 GB |
358 | long rpages = npages; |
359 | unsigned long limit; | |
1da177e4 | 360 | |
17a0364c | 361 | if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) |
0c634baf LB |
362 | return tce_free_pSeriesLP(tbl->it_index, tcenum, |
363 | tbl->it_page_shift, npages); | |
da004c36 | 364 | |
9d2ccf00 GB |
365 | do { |
366 | limit = min_t(unsigned long, rpages, 512); | |
367 | ||
368 | rc = plpar_tce_stuff((u64)tbl->it_index, | |
369 | (u64)tcenum << tbl->it_page_shift, 0, limit); | |
370 | ||
371 | rpages -= limit; | |
372 | tcenum += limit; | |
373 | } while (rpages > 0 && !rc); | |
1da177e4 LT |
374 | |
375 | if (rc && printk_ratelimit()) { | |
376 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); | |
fe333321 IM |
377 | printk("\trc = %lld\n", rc); |
378 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
379 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
4ff52b4d | 380 | dump_stack(); |
1da177e4 LT |
381 | } |
382 | } | |
383 | ||
5f50867b HM |
384 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
385 | { | |
386 | u64 rc; | |
387 | unsigned long tce_ret; | |
388 | ||
0c634baf LB |
389 | rc = plpar_tce_get((u64)tbl->it_index, |
390 | (u64)tcenum << tbl->it_page_shift, &tce_ret); | |
5f50867b HM |
391 | |
392 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
393 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); |
394 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
395 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 396 | dump_stack(); |
5f50867b HM |
397 | } |
398 | ||
399 | return tce_ret; | |
400 | } | |
401 | ||
25985edc | 402 | /* this is compatible with cells for the device tree property */ |
4e8b0cf4 NA |
403 | struct dynamic_dma_window_prop { |
404 | __be32 liobn; /* tce table number */ | |
405 | __be64 dma_base; /* address hi,lo */ | |
406 | __be32 tce_shift; /* ilog2(tce_page_size) */ | |
407 | __be32 window_shift; /* ilog2(tce_window_size) */ | |
408 | }; | |
409 | ||
57dbbe59 | 410 | struct dma_win { |
4e8b0cf4 NA |
411 | struct device_node *device; |
412 | const struct dynamic_dma_window_prop *prop; | |
d61cd13e | 413 | bool direct; |
4e8b0cf4 NA |
414 | struct list_head list; |
415 | }; | |
416 | ||
417 | /* Dynamic DMA Window support */ | |
418 | struct ddw_query_response { | |
9410e018 | 419 | u32 windows_available; |
80f02512 | 420 | u64 largest_available_block; |
9410e018 AK |
421 | u32 page_size; |
422 | u32 migration_capable; | |
4e8b0cf4 NA |
423 | }; |
424 | ||
425 | struct ddw_create_response { | |
9410e018 AK |
426 | u32 liobn; |
427 | u32 addr_hi; | |
428 | u32 addr_lo; | |
4e8b0cf4 NA |
429 | }; |
430 | ||
57dbbe59 | 431 | static LIST_HEAD(dma_win_list); |
4e8b0cf4 | 432 | /* prevents races between memory on/offline and window creation */ |
57dbbe59 | 433 | static DEFINE_SPINLOCK(dma_win_list_lock); |
4e8b0cf4 | 434 | /* protects initializing window twice for same device */ |
57dbbe59 | 435 | static DEFINE_MUTEX(dma_win_init_mutex); |
4e8b0cf4 NA |
436 | |
437 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
438 | unsigned long num_pfn, const void *arg) | |
439 | { | |
440 | const struct dynamic_dma_window_prop *maprange = arg; | |
441 | int rc; | |
442 | u64 tce_size, num_tce, dma_offset, next; | |
443 | u32 tce_shift; | |
444 | long limit; | |
445 | ||
446 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
447 | tce_size = 1ULL << tce_shift; | |
448 | next = start_pfn << PAGE_SHIFT; | |
449 | num_tce = num_pfn << PAGE_SHIFT; | |
450 | ||
451 | /* round back to the beginning of the tce page size */ | |
452 | num_tce += next & (tce_size - 1); | |
453 | next &= ~(tce_size - 1); | |
454 | ||
455 | /* covert to number of tces */ | |
456 | num_tce |= tce_size - 1; | |
457 | num_tce >>= tce_shift; | |
458 | ||
459 | do { | |
460 | /* | |
461 | * Set up the page with TCE data, looping through and setting | |
462 | * the values. | |
463 | */ | |
464 | limit = min_t(long, num_tce, 512); | |
465 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
466 | ||
467 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
468 | dma_offset, | |
469 | 0, limit); | |
22b38298 | 470 | next += limit * tce_size; |
4e8b0cf4 NA |
471 | num_tce -= limit; |
472 | } while (num_tce > 0 && !rc); | |
473 | ||
474 | return rc; | |
475 | } | |
476 | ||
477 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
478 | unsigned long num_pfn, const void *arg) | |
479 | { | |
480 | const struct dynamic_dma_window_prop *maprange = arg; | |
df015604 AB |
481 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
482 | __be64 *tcep; | |
4e8b0cf4 NA |
483 | u32 tce_shift; |
484 | u64 rc = 0; | |
485 | long l, limit; | |
486 | ||
17a0364c | 487 | if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 AK |
488 | unsigned long tceshift = be32_to_cpu(maprange->tce_shift); |
489 | unsigned long dmastart = (start_pfn << PAGE_SHIFT) + | |
490 | be64_to_cpu(maprange->dma_base); | |
491 | unsigned long tcenum = dmastart >> tceshift; | |
492 | unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; | |
493 | void *uaddr = __va(start_pfn << PAGE_SHIFT); | |
494 | ||
495 | return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), | |
496 | tcenum, tceshift, npages, (unsigned long) uaddr, | |
497 | DMA_BIDIRECTIONAL, 0); | |
498 | } | |
499 | ||
4e8b0cf4 | 500 | local_irq_disable(); /* to protect tcep and the page behind it */ |
69111bac | 501 | tcep = __this_cpu_read(tce_page); |
4e8b0cf4 NA |
502 | |
503 | if (!tcep) { | |
df015604 | 504 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
4e8b0cf4 NA |
505 | if (!tcep) { |
506 | local_irq_enable(); | |
507 | return -ENOMEM; | |
508 | } | |
69111bac | 509 | __this_cpu_write(tce_page, tcep); |
4e8b0cf4 NA |
510 | } |
511 | ||
512 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
513 | ||
514 | liobn = (u64)be32_to_cpu(maprange->liobn); | |
515 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
516 | tce_size = 1ULL << tce_shift; | |
517 | next = start_pfn << PAGE_SHIFT; | |
518 | num_tce = num_pfn << PAGE_SHIFT; | |
519 | ||
520 | /* round back to the beginning of the tce page size */ | |
521 | num_tce += next & (tce_size - 1); | |
522 | next &= ~(tce_size - 1); | |
523 | ||
524 | /* covert to number of tces */ | |
525 | num_tce |= tce_size - 1; | |
526 | num_tce >>= tce_shift; | |
527 | ||
528 | /* We can map max one pageful of TCEs at a time */ | |
529 | do { | |
530 | /* | |
531 | * Set up the page with TCE data, looping through and setting | |
532 | * the values. | |
533 | */ | |
2747fd26 | 534 | limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE); |
4e8b0cf4 NA |
535 | dma_offset = next + be64_to_cpu(maprange->dma_base); |
536 | ||
537 | for (l = 0; l < limit; l++) { | |
df015604 | 538 | tcep[l] = cpu_to_be64(proto_tce | next); |
4e8b0cf4 NA |
539 | next += tce_size; |
540 | } | |
541 | ||
542 | rc = plpar_tce_put_indirect(liobn, | |
543 | dma_offset, | |
474e3d56 | 544 | (u64)__pa(tcep), |
4e8b0cf4 NA |
545 | limit); |
546 | ||
547 | num_tce -= limit; | |
548 | } while (num_tce > 0 && !rc); | |
549 | ||
550 | /* error cleanup: caller will clear whole range */ | |
551 | ||
552 | local_irq_enable(); | |
553 | return rc; | |
554 | } | |
555 | ||
556 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
557 | unsigned long num_pfn, void *arg) | |
558 | { | |
559 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
560 | } | |
561 | ||
fc8cba8f LB |
562 | static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, |
563 | unsigned long liobn, unsigned long win_addr, | |
564 | unsigned long window_size, unsigned long page_shift, | |
565 | void *base, struct iommu_table_ops *table_ops) | |
566 | { | |
567 | tbl->it_busno = busno; | |
568 | tbl->it_index = liobn; | |
569 | tbl->it_offset = win_addr >> page_shift; | |
570 | tbl->it_size = window_size >> page_shift; | |
571 | tbl->it_page_shift = page_shift; | |
572 | tbl->it_base = (unsigned long)base; | |
573 | tbl->it_blocksize = 16; | |
574 | tbl->it_type = TCE_PCI; | |
575 | tbl->it_ops = table_ops; | |
576 | } | |
577 | ||
578 | struct iommu_table_ops iommu_table_pseries_ops; | |
579 | ||
1da177e4 LT |
580 | static void iommu_table_setparms(struct pci_controller *phb, |
581 | struct device_node *dn, | |
bc97ce95 | 582 | struct iommu_table *tbl) |
1da177e4 LT |
583 | { |
584 | struct device_node *node; | |
b7d6bf4f | 585 | const unsigned long *basep; |
9938c474 | 586 | const u32 *sizep; |
1da177e4 | 587 | |
fc8cba8f LB |
588 | /* Test if we are going over 2GB of DMA space */ |
589 | if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { | |
590 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
591 | panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
592 | } | |
1da177e4 | 593 | |
fc8cba8f | 594 | node = phb->dn; |
e2eb6392 SR |
595 | basep = of_get_property(node, "linux,tce-base", NULL); |
596 | sizep = of_get_property(node, "linux,tce-size", NULL); | |
1da177e4 | 597 | if (basep == NULL || sizep == NULL) { |
b7c670d6 RH |
598 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
599 | "missing tce entries !\n", dn); | |
1da177e4 LT |
600 | return; |
601 | } | |
602 | ||
fc8cba8f LB |
603 | iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur, |
604 | phb->dma_window_size, IOMMU_PAGE_SHIFT_4K, | |
605 | __va(*basep), &iommu_table_pseries_ops); | |
5f50867b | 606 | |
62a8bd6c | 607 | if (!is_kdump_kernel()) |
54622f10 | 608 | memset((void *)tbl->it_base, 0, *sizep); |
1da177e4 | 609 | |
1da177e4 | 610 | phb->dma_window_base_cur += phb->dma_window_size; |
1da177e4 LT |
611 | } |
612 | ||
fc8cba8f LB |
613 | struct iommu_table_ops iommu_table_lpar_multi_ops; |
614 | ||
da004c36 AK |
615 | struct iommu_table_ops iommu_table_pseries_ops = { |
616 | .set = tce_build_pSeries, | |
f431a8cd | 617 | .clear = tce_clear_pSeries, |
da004c36 AK |
618 | .get = tce_get_pseries |
619 | }; | |
620 | ||
12d04eef | 621 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
1da177e4 | 622 | { |
3c2822cc | 623 | struct device_node *dn; |
1da177e4 | 624 | struct iommu_table *tbl; |
3c2822cc OJ |
625 | struct device_node *isa_dn, *isa_dn_orig; |
626 | struct device_node *tmp; | |
627 | struct pci_dn *pci; | |
628 | int children; | |
1da177e4 | 629 | |
3c2822cc | 630 | dn = pci_bus_to_OF_node(bus); |
12d04eef | 631 | |
b7c670d6 | 632 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); |
3c2822cc OJ |
633 | |
634 | if (bus->self) { | |
635 | /* This is not a root bus, any setup will be done for the | |
636 | * device-side of the bridge in iommu_dev_setup_pSeries(). | |
637 | */ | |
638 | return; | |
639 | } | |
12d04eef | 640 | pci = PCI_DN(dn); |
3c2822cc OJ |
641 | |
642 | /* Check if the ISA bus on the system is under | |
643 | * this PHB. | |
1da177e4 | 644 | */ |
3c2822cc | 645 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); |
1da177e4 | 646 | |
3c2822cc OJ |
647 | while (isa_dn && isa_dn != dn) |
648 | isa_dn = isa_dn->parent; | |
649 | ||
498b6514 | 650 | of_node_put(isa_dn_orig); |
1da177e4 | 651 | |
d3c58fb1 | 652 | /* Count number of direct PCI children of the PHB. */ |
3c2822cc | 653 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
d3c58fb1 | 654 | children++; |
1da177e4 | 655 | |
f7ebf352 | 656 | pr_debug("Children: %d\n", children); |
1da177e4 | 657 | |
3c2822cc OJ |
658 | /* Calculate amount of DMA window per slot. Each window must be |
659 | * a power of two (due to pci_alloc_consistent requirements). | |
660 | * | |
661 | * Keep 256MB aside for PHBs with ISA. | |
662 | */ | |
1da177e4 | 663 | |
3c2822cc OJ |
664 | if (!isa_dn) { |
665 | /* No ISA/IDE - just set window size and return */ | |
666 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ | |
667 | ||
668 | while (pci->phb->dma_window_size * children > 0x80000000ul) | |
669 | pci->phb->dma_window_size >>= 1; | |
41febbc8 | 670 | pr_debug("No ISA/IDE, window size is 0x%llx\n", |
f7ebf352 | 671 | pci->phb->dma_window_size); |
3c2822cc OJ |
672 | pci->phb->dma_window_base_cur = 0; |
673 | ||
674 | return; | |
1da177e4 | 675 | } |
3c2822cc OJ |
676 | |
677 | /* If we have ISA, then we probably have an IDE | |
678 | * controller too. Allocate a 128MB table but | |
679 | * skip the first 128MB to avoid stepping on ISA | |
680 | * space. | |
681 | */ | |
682 | pci->phb->dma_window_size = 0x8000000ul; | |
683 | pci->phb->dma_window_base_cur = 0x8000000ul; | |
684 | ||
b348aa65 AK |
685 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); |
686 | tbl = pci->table_group->tables[0]; | |
3c2822cc OJ |
687 | |
688 | iommu_table_setparms(pci->phb, dn, tbl); | |
fc8cba8f | 689 | |
4be518d8 AK |
690 | if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) |
691 | panic("Failed to initialize iommu table"); | |
3c2822cc OJ |
692 | |
693 | /* Divide the rest (1.75GB) among the children */ | |
694 | pci->phb->dma_window_size = 0x80000000ul; | |
695 | while (pci->phb->dma_window_size * children > 0x70000000ul) | |
696 | pci->phb->dma_window_size >>= 1; | |
697 | ||
41febbc8 | 698 | pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); |
1da177e4 LT |
699 | } |
700 | ||
b6e1f6ad AK |
701 | #ifdef CONFIG_IOMMU_API |
702 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned | |
cad32d9d | 703 | long *tce, enum dma_data_direction *direction) |
b6e1f6ad AK |
704 | { |
705 | long rc; | |
706 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; | |
707 | unsigned long flags, oldtce = 0; | |
708 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
709 | unsigned long newtce = *tce | proto_tce; | |
710 | ||
711 | spin_lock_irqsave(&tbl->large_pool.lock, flags); | |
712 | ||
713 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); | |
714 | if (!rc) | |
715 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); | |
716 | ||
717 | if (!rc) { | |
718 | *direction = iommu_tce_direction(oldtce); | |
719 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
720 | } | |
721 | ||
722 | spin_unlock_irqrestore(&tbl->large_pool.lock, flags); | |
723 | ||
724 | return rc; | |
725 | } | |
f431a8cd SB |
726 | |
727 | static __be64 *tce_useraddr_pSeriesLP(struct iommu_table *tbl, long index, | |
728 | bool __always_unused alloc) | |
729 | { | |
730 | return tbl->it_userspace ? &tbl->it_userspace[index - tbl->it_offset] : NULL; | |
731 | } | |
b6e1f6ad AK |
732 | #endif |
733 | ||
da004c36 AK |
734 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
735 | .set = tce_buildmulti_pSeriesLP, | |
b6e1f6ad | 736 | #ifdef CONFIG_IOMMU_API |
021b7868 | 737 | .xchg_no_kill = tce_exchange_pseries, |
f431a8cd | 738 | .useraddrptr = tce_useraddr_pSeriesLP, |
b6e1f6ad | 739 | #endif |
da004c36 | 740 | .clear = tce_freemulti_pSeriesLP, |
f431a8cd SB |
741 | .get = tce_get_pSeriesLP, |
742 | .free = tce_free_pSeries | |
da004c36 | 743 | }; |
1da177e4 | 744 | |
af199e6c | 745 | #ifdef CONFIG_IOMMU_API |
f431a8cd SB |
746 | /* |
747 | * When the DMA window properties might have been removed, | |
748 | * the parent node has the table_group setup on it. | |
749 | */ | |
750 | static struct device_node *pci_dma_find_parent_node(struct pci_dev *dev, | |
751 | struct iommu_table_group *table_group) | |
752 | { | |
753 | struct device_node *dn = pci_device_to_OF_node(dev); | |
754 | struct pci_dn *rpdn; | |
755 | ||
756 | for (; dn && PCI_DN(dn); dn = dn->parent) { | |
757 | rpdn = PCI_DN(dn); | |
758 | ||
759 | if (table_group == rpdn->table_group) | |
760 | return dn; | |
761 | } | |
762 | ||
763 | return NULL; | |
764 | } | |
af199e6c | 765 | #endif |
f431a8cd | 766 | |
b1fc44ea AK |
767 | /* |
768 | * Find nearest ibm,dma-window (default DMA window) or direct DMA window or | |
769 | * dynamic 64bit DMA window, walking up the device tree. | |
770 | */ | |
771 | static struct device_node *pci_dma_find(struct device_node *dn, | |
09a3c1e4 | 772 | struct dynamic_dma_window_prop *prop) |
b1fc44ea | 773 | { |
09a3c1e4 GB |
774 | const __be32 *default_prop = NULL; |
775 | const __be32 *ddw_prop = NULL; | |
776 | struct device_node *rdn = NULL; | |
777 | bool default_win = false, ddw_win = false; | |
b1fc44ea AK |
778 | |
779 | for ( ; dn && PCI_DN(dn); dn = dn->parent) { | |
09a3c1e4 GB |
780 | default_prop = of_get_property(dn, "ibm,dma-window", NULL); |
781 | if (default_prop) { | |
782 | rdn = dn; | |
783 | default_win = true; | |
784 | } | |
785 | ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); | |
786 | if (ddw_prop) { | |
787 | rdn = dn; | |
788 | ddw_win = true; | |
789 | break; | |
790 | } | |
791 | ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); | |
792 | if (ddw_prop) { | |
793 | rdn = dn; | |
794 | ddw_win = true; | |
795 | break; | |
b1fc44ea | 796 | } |
09a3c1e4 GB |
797 | |
798 | /* At least found default window, which is the case for normal boot */ | |
799 | if (default_win) | |
800 | break; | |
b1fc44ea AK |
801 | } |
802 | ||
09a3c1e4 GB |
803 | /* For PCI devices there will always be a DMA window, either on the device |
804 | * or parent bus | |
805 | */ | |
806 | WARN_ON(!(default_win | ddw_win)); | |
807 | ||
808 | /* caller doesn't want to get DMA window property */ | |
809 | if (!prop) | |
810 | return rdn; | |
811 | ||
812 | /* parse DMA window property. During normal system boot, only default | |
813 | * DMA window is passed in OF. But, for kdump, a dedicated adapter might | |
814 | * have both default and DDW in FDT. In this scenario, DDW takes precedence | |
815 | * over default window. | |
816 | */ | |
817 | if (ddw_win) { | |
818 | struct dynamic_dma_window_prop *p; | |
819 | ||
820 | p = (struct dynamic_dma_window_prop *)ddw_prop; | |
821 | prop->liobn = p->liobn; | |
822 | prop->dma_base = p->dma_base; | |
823 | prop->tce_shift = p->tce_shift; | |
824 | prop->window_shift = p->window_shift; | |
825 | } else if (default_win) { | |
826 | unsigned long offset, size, liobn; | |
827 | ||
828 | of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); | |
829 | ||
830 | prop->liobn = cpu_to_be32((u32)liobn); | |
831 | prop->dma_base = cpu_to_be64(offset); | |
832 | prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); | |
833 | prop->window_shift = cpu_to_be32(order_base_2(size)); | |
834 | } | |
835 | ||
836 | return rdn; | |
b1fc44ea AK |
837 | } |
838 | ||
12d04eef | 839 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
1da177e4 LT |
840 | { |
841 | struct iommu_table *tbl; | |
842 | struct device_node *dn, *pdn; | |
1635317f | 843 | struct pci_dn *ppci; |
09a3c1e4 | 844 | struct dynamic_dma_window_prop prop; |
1da177e4 | 845 | |
1da177e4 LT |
846 | dn = pci_bus_to_OF_node(bus); |
847 | ||
b7c670d6 RH |
848 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", |
849 | dn); | |
12d04eef | 850 | |
09a3c1e4 | 851 | pdn = pci_dma_find(dn, &prop); |
1da177e4 | 852 | |
09a3c1e4 GB |
853 | /* In PPC architecture, there will always be DMA window on bus or one of the |
854 | * parent bus. During reboot, there will be ibm,dma-window property to | |
855 | * define DMA window. For kdump, there will at least be default window or DDW | |
856 | * or both. | |
49a940db GB |
857 | * There is an exception to the above. In case the PE goes into frozen |
858 | * state, firmware may not provide ibm,dma-window property at the time | |
859 | * of LPAR boot up. | |
09a3c1e4 | 860 | */ |
1da177e4 | 861 | |
49a940db GB |
862 | if (!pdn) { |
863 | pr_debug(" no ibm,dma-window property !\n"); | |
864 | return; | |
865 | } | |
866 | ||
e07102db | 867 | ppci = PCI_DN(pdn); |
12d04eef | 868 | |
b7c670d6 RH |
869 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n", |
870 | pdn, ppci->table_group); | |
12d04eef | 871 | |
b348aa65 AK |
872 | if (!ppci->table_group) { |
873 | ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); | |
874 | tbl = ppci->table_group->tables[0]; | |
fc8cba8f | 875 | |
09a3c1e4 GB |
876 | iommu_table_setparms_common(tbl, ppci->phb->bus->number, |
877 | be32_to_cpu(prop.liobn), | |
878 | be64_to_cpu(prop.dma_base), | |
879 | 1ULL << be32_to_cpu(prop.window_shift), | |
880 | be32_to_cpu(prop.tce_shift), NULL, | |
881 | &iommu_table_lpar_multi_ops); | |
882 | ||
09a3c1e4 GB |
883 | if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) |
884 | panic("Failed to initialize iommu table"); | |
885 | ||
b348aa65 AK |
886 | iommu_register_group(ppci->table_group, |
887 | pci_domain_nr(bus), 0); | |
888 | pr_debug(" created table: %p\n", ppci->table_group); | |
1da177e4 | 889 | } |
1da177e4 LT |
890 | } |
891 | ||
892 | ||
12d04eef | 893 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
1da177e4 | 894 | { |
12d04eef | 895 | struct device_node *dn; |
3c2822cc | 896 | struct iommu_table *tbl; |
1da177e4 | 897 | |
f7ebf352 | 898 | pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); |
1da177e4 | 899 | |
58f9b0b0 | 900 | dn = dev->dev.of_node; |
1da177e4 | 901 | |
3c2822cc OJ |
902 | /* If we're the direct child of a root bus, then we need to allocate |
903 | * an iommu table ourselves. The bus setup code should have setup | |
904 | * the window sizes already. | |
905 | */ | |
906 | if (!dev->bus->self) { | |
12d04eef BH |
907 | struct pci_controller *phb = PCI_DN(dn)->phb; |
908 | ||
f7ebf352 | 909 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); |
b348aa65 AK |
910 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); |
911 | tbl = PCI_DN(dn)->table_group->tables[0]; | |
12d04eef | 912 | iommu_table_setparms(phb, dn, tbl); |
fc8cba8f | 913 | |
4be518d8 AK |
914 | if (!iommu_init_table(tbl, phb->node, 0, 0)) |
915 | panic("Failed to initialize iommu table"); | |
916 | ||
4617082e | 917 | set_iommu_table_base(&dev->dev, tbl); |
3c2822cc OJ |
918 | return; |
919 | } | |
920 | ||
921 | /* If this device is further down the bus tree, search upwards until | |
922 | * an already allocated iommu table is found and use that. | |
923 | */ | |
924 | ||
b348aa65 | 925 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
1da177e4 LT |
926 | dn = dn->parent; |
927 | ||
c409c631 | 928 | if (dn && PCI_DN(dn)) |
b348aa65 AK |
929 | set_iommu_table_base(&dev->dev, |
930 | PCI_DN(dn)->table_group->tables[0]); | |
c409c631 | 931 | else |
12d04eef BH |
932 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n", |
933 | pci_name(dev)); | |
1da177e4 LT |
934 | } |
935 | ||
4e8b0cf4 NA |
936 | static int __read_mostly disable_ddw; |
937 | ||
938 | static int __init disable_ddw_setup(char *str) | |
939 | { | |
940 | disable_ddw = 1; | |
941 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
942 | ||
943 | return 0; | |
944 | } | |
945 | ||
946 | early_param("disable_ddw", disable_ddw_setup); | |
947 | ||
7ed2ed2d | 948 | static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) |
4e8b0cf4 | 949 | { |
74d0b399 | 950 | int ret; |
9410e018 | 951 | |
4e8b0cf4 NA |
952 | ret = tce_clearrange_multi_pSeriesLP(0, |
953 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
954 | if (ret) | |
f2c2cbcc JP |
955 | pr_warn("%pOF failed to clear tces in window.\n", |
956 | np); | |
4e8b0cf4 | 957 | else |
b7c670d6 RH |
958 | pr_debug("%pOF successfully cleared tces in window.\n", |
959 | np); | |
7ed2ed2d LB |
960 | } |
961 | ||
962 | /* | |
963 | * Call only if DMA window is clean. | |
964 | */ | |
965 | static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) | |
966 | { | |
967 | int ret; | |
4e8b0cf4 | 968 | |
cac3e629 | 969 | ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); |
ae69e1ed | 970 | if (ret) |
7ed2ed2d | 971 | pr_warn("%pOF: failed to remove DMA window: rtas returned " |
ae69e1ed | 972 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 973 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
ae69e1ed | 974 | else |
7ed2ed2d | 975 | pr_debug("%pOF: successfully removed DMA window: rtas returned " |
ae69e1ed | 976 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 977 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
74d0b399 LB |
978 | } |
979 | ||
7ed2ed2d | 980 | static void remove_dma_window(struct device_node *np, u32 *ddw_avail, |
f431a8cd | 981 | struct property *win, bool cleanup) |
7ed2ed2d LB |
982 | { |
983 | struct dynamic_dma_window_prop *dwp; | |
984 | u64 liobn; | |
985 | ||
986 | dwp = win->value; | |
987 | liobn = (u64)be32_to_cpu(dwp->liobn); | |
988 | ||
f431a8cd SB |
989 | if (cleanup) |
990 | clean_dma_window(np, dwp); | |
7ed2ed2d LB |
991 | __remove_dma_window(np, ddw_avail, liobn); |
992 | } | |
993 | ||
f431a8cd SB |
994 | static void copy_property(struct device_node *pdn, const char *from, const char *to) |
995 | { | |
996 | struct property *src, *dst; | |
997 | ||
998 | src = of_find_property(pdn, from, NULL); | |
999 | if (!src) | |
1000 | return; | |
1001 | ||
1002 | dst = kzalloc(sizeof(*dst), GFP_KERNEL); | |
1003 | if (!dst) | |
1004 | return; | |
1005 | ||
1006 | dst->name = kstrdup(to, GFP_KERNEL); | |
1007 | dst->value = kmemdup(src->value, src->length, GFP_KERNEL); | |
1008 | dst->length = src->length; | |
1009 | if (!dst->name || !dst->value) | |
1010 | return; | |
1011 | ||
1012 | if (of_add_property(pdn, dst)) { | |
1013 | pr_err("Unable to add DMA window property for %pOF", pdn); | |
1014 | goto free_prop; | |
1015 | } | |
1016 | ||
1017 | return; | |
1018 | ||
1019 | free_prop: | |
1020 | kfree(dst->name); | |
1021 | kfree(dst->value); | |
1022 | kfree(dst); | |
1023 | } | |
1024 | ||
1025 | static int remove_dma_window_named(struct device_node *np, bool remove_prop, const char *win_name, | |
1026 | bool cleanup) | |
74d0b399 LB |
1027 | { |
1028 | struct property *win; | |
1029 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; | |
1030 | int ret = 0; | |
1031 | ||
a5fd9512 LB |
1032 | win = of_find_property(np, win_name, NULL); |
1033 | if (!win) | |
1034 | return -EINVAL; | |
1035 | ||
74d0b399 LB |
1036 | ret = of_property_read_u32_array(np, "ibm,ddw-applicable", |
1037 | &ddw_avail[0], DDW_APPLICABLE_SIZE); | |
1038 | if (ret) | |
a5fd9512 | 1039 | return 0; |
74d0b399 | 1040 | |
74d0b399 | 1041 | if (win->length >= sizeof(struct dynamic_dma_window_prop)) |
f431a8cd | 1042 | remove_dma_window(np, ddw_avail, win, cleanup); |
74d0b399 LB |
1043 | |
1044 | if (!remove_prop) | |
a5fd9512 | 1045 | return 0; |
4e8b0cf4 | 1046 | |
f431a8cd SB |
1047 | /* Default window property if removed is lost as reset-pe doesn't restore it. |
1048 | * Though FDT has a copy of it, the DLPAR hotplugged devices will not have a | |
1049 | * node on FDT until next reboot. So, back it up. | |
1050 | */ | |
1051 | if ((strcmp(win_name, "ibm,dma-window") == 0) && | |
1052 | !of_find_property(np, "ibm,dma-window-saved", NULL)) | |
1053 | copy_property(np, win_name, "ibm,dma-window-saved"); | |
1054 | ||
74d0b399 | 1055 | ret = of_remove_property(np, win); |
2573f684 | 1056 | if (ret) |
57dbbe59 | 1057 | pr_warn("%pOF: failed to remove DMA window property: %d\n", |
b7c670d6 | 1058 | np, ret); |
a5fd9512 | 1059 | return 0; |
2573f684 | 1060 | } |
4e8b0cf4 | 1061 | |
3bf983e4 GB |
1062 | static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, |
1063 | bool *direct_mapping) | |
4e8b0cf4 | 1064 | { |
57dbbe59 LB |
1065 | struct dma_win *window; |
1066 | const struct dynamic_dma_window_prop *dma64; | |
2ca73c54 | 1067 | bool found = false; |
4e8b0cf4 | 1068 | |
57dbbe59 | 1069 | spin_lock(&dma_win_list_lock); |
4e8b0cf4 | 1070 | /* check if we already created a window and dupe that config if so */ |
57dbbe59 | 1071 | list_for_each_entry(window, &dma_win_list, list) { |
4e8b0cf4 | 1072 | if (window->device == pdn) { |
57dbbe59 LB |
1073 | dma64 = window->prop; |
1074 | *dma_addr = be64_to_cpu(dma64->dma_base); | |
1075 | *window_shift = be32_to_cpu(dma64->window_shift); | |
3bf983e4 | 1076 | *direct_mapping = window->direct; |
2ca73c54 | 1077 | found = true; |
4e8b0cf4 NA |
1078 | break; |
1079 | } | |
1080 | } | |
57dbbe59 | 1081 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 | 1082 | |
2ca73c54 | 1083 | return found; |
4e8b0cf4 NA |
1084 | } |
1085 | ||
57dbbe59 LB |
1086 | static struct dma_win *ddw_list_new_entry(struct device_node *pdn, |
1087 | const struct dynamic_dma_window_prop *dma64) | |
92a23219 | 1088 | { |
57dbbe59 | 1089 | struct dma_win *window; |
92a23219 LB |
1090 | |
1091 | window = kzalloc(sizeof(*window), GFP_KERNEL); | |
1092 | if (!window) | |
1093 | return NULL; | |
1094 | ||
1095 | window->device = pdn; | |
1096 | window->prop = dma64; | |
d61cd13e | 1097 | window->direct = false; |
92a23219 LB |
1098 | |
1099 | return window; | |
1100 | } | |
1101 | ||
8599395d | 1102 | static void find_existing_ddw_windows_named(const char *name) |
4e8b0cf4 | 1103 | { |
97e7dc52 | 1104 | int len; |
c8566780 | 1105 | struct device_node *pdn; |
57dbbe59 | 1106 | struct dma_win *window; |
8599395d | 1107 | const struct dynamic_dma_window_prop *dma64; |
c8566780 | 1108 | |
8599395d LB |
1109 | for_each_node_with_property(pdn, name) { |
1110 | dma64 = of_get_property(pdn, name, &len); | |
1111 | if (!dma64 || len < sizeof(*dma64)) { | |
f431a8cd | 1112 | remove_dma_window_named(pdn, true, name, true); |
97e7dc52 NA |
1113 | continue; |
1114 | } | |
c8566780 | 1115 | |
09a3c1e4 GB |
1116 | /* If at the time of system initialization, there are DDWs in OF, |
1117 | * it means this is during kexec. DDW could be direct or dynamic. | |
1118 | * We will just mark DDWs as "dynamic" since this is kdump path, | |
1119 | * no need to worry about perforance. ddw_list_new_entry() will | |
1120 | * set window->direct = false. | |
1121 | */ | |
8599395d | 1122 | window = ddw_list_new_entry(pdn, dma64); |
915b368f WJ |
1123 | if (!window) { |
1124 | of_node_put(pdn); | |
92a23219 | 1125 | break; |
915b368f | 1126 | } |
92a23219 | 1127 | |
57dbbe59 LB |
1128 | spin_lock(&dma_win_list_lock); |
1129 | list_add(&window->list, &dma_win_list); | |
1130 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 1131 | } |
8599395d LB |
1132 | } |
1133 | ||
1134 | static int find_existing_ddw_windows(void) | |
1135 | { | |
1136 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | |
1137 | return 0; | |
1138 | ||
1139 | find_existing_ddw_windows_named(DIRECT64_PROPNAME); | |
381ceda8 | 1140 | find_existing_ddw_windows_named(DMA64_PROPNAME); |
4e8b0cf4 | 1141 | |
c8566780 | 1142 | return 0; |
4e8b0cf4 | 1143 | } |
c8566780 | 1144 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
4e8b0cf4 | 1145 | |
80f02512 LB |
1146 | /** |
1147 | * ddw_read_ext - Get the value of an DDW extension | |
1148 | * @np: device node from which the extension value is to be read. | |
1149 | * @extnum: index number of the extension. | |
1150 | * @value: pointer to return value, modified when extension is available. | |
1151 | * | |
1152 | * Checks if "ibm,ddw-extensions" exists for this node, and get the value | |
1153 | * on index 'extnum'. | |
1154 | * It can be used only to check if a property exists, passing value == NULL. | |
1155 | * | |
1156 | * Returns: | |
1157 | * 0 if extension successfully read | |
1158 | * -EINVAL if the "ibm,ddw-extensions" does not exist, | |
1159 | * -ENODATA if "ibm,ddw-extensions" does not have a value, and | |
1160 | * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. | |
1161 | */ | |
1162 | static inline int ddw_read_ext(const struct device_node *np, int extnum, | |
1163 | u32 *value) | |
1164 | { | |
1165 | static const char propname[] = "ibm,ddw-extensions"; | |
1166 | u32 count; | |
1167 | int ret; | |
1168 | ||
1169 | ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); | |
1170 | if (ret) | |
1171 | return ret; | |
1172 | ||
1173 | if (count < extnum) | |
1174 | return -EOVERFLOW; | |
1175 | ||
1176 | if (!value) | |
1177 | value = &count; | |
1178 | ||
1179 | return of_property_read_u32_index(np, propname, extnum, value); | |
1180 | } | |
1181 | ||
b73a635f | 1182 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
80f02512 LB |
1183 | struct ddw_query_response *query, |
1184 | struct device_node *parent) | |
4e8b0cf4 | 1185 | { |
8445a87f GP |
1186 | struct device_node *dn; |
1187 | struct pci_dn *pdn; | |
80f02512 | 1188 | u32 cfg_addr, ext_query, query_out[5]; |
4e8b0cf4 | 1189 | u64 buid; |
80f02512 LB |
1190 | int ret, out_sz; |
1191 | ||
1192 | /* | |
1193 | * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many | |
1194 | * output parameters ibm,query-pe-dma-windows will have, ranging from | |
1195 | * 5 to 6. | |
1196 | */ | |
1197 | ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); | |
1198 | if (!ret && ext_query == 1) | |
1199 | out_sz = 6; | |
1200 | else | |
1201 | out_sz = 5; | |
4e8b0cf4 NA |
1202 | |
1203 | /* | |
1204 | * Get the config address and phb buid of the PE window. | |
1205 | * Rely on eeh to retrieve this for us. | |
1206 | * Retrieve them from the pci device, not the node with the | |
1207 | * dma-window property | |
1208 | */ | |
8445a87f GP |
1209 | dn = pci_device_to_OF_node(dev); |
1210 | pdn = PCI_DN(dn); | |
1211 | buid = pdn->phb->buid; | |
8a934efe | 1212 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
39baadbf | 1213 | |
80f02512 | 1214 | ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, |
cac3e629 | 1215 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
80f02512 LB |
1216 | |
1217 | switch (out_sz) { | |
1218 | case 5: | |
1219 | query->windows_available = query_out[0]; | |
1220 | query->largest_available_block = query_out[1]; | |
1221 | query->page_size = query_out[2]; | |
1222 | query->migration_capable = query_out[3]; | |
1223 | break; | |
1224 | case 6: | |
1225 | query->windows_available = query_out[0]; | |
1226 | query->largest_available_block = ((u64)query_out[1] << 32) | | |
1227 | query_out[2]; | |
1228 | query->page_size = query_out[3]; | |
1229 | query->migration_capable = query_out[4]; | |
1230 | break; | |
1231 | } | |
1232 | ||
a28a2eff AK |
1233 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n", |
1234 | ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1235 | BUID_LO(buid), ret, query->largest_available_block, | |
1236 | query->page_size, query->windows_available); | |
1237 | ||
4e8b0cf4 NA |
1238 | return ret; |
1239 | } | |
1240 | ||
b73a635f | 1241 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
1242 | struct ddw_create_response *create, int page_shift, |
1243 | int window_shift) | |
1244 | { | |
8445a87f GP |
1245 | struct device_node *dn; |
1246 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
1247 | u32 cfg_addr; |
1248 | u64 buid; | |
1249 | int ret; | |
1250 | ||
1251 | /* | |
1252 | * Get the config address and phb buid of the PE window. | |
1253 | * Rely on eeh to retrieve this for us. | |
1254 | * Retrieve them from the pci device, not the node with the | |
1255 | * dma-window property | |
1256 | */ | |
8445a87f GP |
1257 | dn = pci_device_to_OF_node(dev); |
1258 | pdn = PCI_DN(dn); | |
1259 | buid = pdn->phb->buid; | |
8a934efe | 1260 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
4e8b0cf4 NA |
1261 | |
1262 | do { | |
1263 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
cac3e629 LB |
1264 | ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, |
1265 | (u32 *)create, cfg_addr, BUID_HI(buid), | |
1266 | BUID_LO(buid), page_shift, window_shift); | |
4e8b0cf4 NA |
1267 | } while (rtas_busy_delay(ret)); |
1268 | dev_info(&dev->dev, | |
1269 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
cac3e629 LB |
1270 | "(liobn = 0x%x starting addr = %x %x)\n", |
1271 | ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1272 | BUID_LO(buid), page_shift, window_shift, ret, create->liobn, | |
1273 | create->addr_hi, create->addr_lo); | |
4e8b0cf4 NA |
1274 | |
1275 | return ret; | |
1276 | } | |
1277 | ||
61435690 NA |
1278 | struct failed_ddw_pdn { |
1279 | struct device_node *pdn; | |
1280 | struct list_head list; | |
1281 | }; | |
1282 | ||
1283 | static LIST_HEAD(failed_ddw_pdn_list); | |
1284 | ||
68c0449e AK |
1285 | static phys_addr_t ddw_memory_hotplug_max(void) |
1286 | { | |
2500763d | 1287 | resource_size_t max_addr = memory_hotplug_max(); |
68c0449e AK |
1288 | struct device_node *memory; |
1289 | ||
1290 | for_each_node_by_type(memory, "memory") { | |
2500763d | 1291 | struct resource res; |
68c0449e | 1292 | |
2500763d | 1293 | if (of_address_to_resource(memory, 0, &res)) |
68c0449e AK |
1294 | continue; |
1295 | ||
2500763d | 1296 | max_addr = max_t(resource_size_t, max_addr, res.end + 1); |
68c0449e AK |
1297 | } |
1298 | ||
1299 | return max_addr; | |
1300 | } | |
1301 | ||
8c0d5159 LB |
1302 | /* |
1303 | * Platforms supporting the DDW option starting with LoPAR level 2.7 implement | |
1304 | * ibm,ddw-extensions, which carries the rtas token for | |
1305 | * ibm,reset-pe-dma-windows. | |
1306 | * That rtas-call can be used to restore the default DMA window for the device. | |
1307 | */ | |
1308 | static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) | |
1309 | { | |
1310 | int ret; | |
1311 | u32 cfg_addr, reset_dma_win; | |
1312 | u64 buid; | |
1313 | struct device_node *dn; | |
1314 | struct pci_dn *pdn; | |
1315 | ||
1316 | ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); | |
1317 | if (ret) | |
1318 | return; | |
1319 | ||
1320 | dn = pci_device_to_OF_node(dev); | |
1321 | pdn = PCI_DN(dn); | |
1322 | buid = pdn->phb->buid; | |
1323 | cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); | |
1324 | ||
1325 | ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), | |
1326 | BUID_LO(buid)); | |
1327 | if (ret) | |
1328 | dev_info(&dev->dev, | |
1329 | "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", | |
1330 | reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), | |
1331 | ret); | |
1332 | } | |
1333 | ||
47272411 LB |
1334 | /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ |
1335 | static int iommu_get_page_shift(u32 query_page_size) | |
1336 | { | |
38727311 | 1337 | /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */ |
47272411 LB |
1338 | const int shift[] = { |
1339 | __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), | |
1340 | __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), | |
38727311 | 1341 | __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M) |
47272411 LB |
1342 | }; |
1343 | ||
1344 | int i = ARRAY_SIZE(shift) - 1; | |
38727311 | 1345 | int ret = 0; |
47272411 LB |
1346 | |
1347 | /* | |
1348 | * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: | |
1349 | * - bit 31 means 4k pages are supported, | |
1350 | * - bit 30 means 64k pages are supported, and so on. | |
1351 | * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. | |
1352 | */ | |
1353 | for (; i >= 0 ; i--) { | |
1354 | if (query_page_size & (1 << i)) | |
38727311 | 1355 | ret = max(ret, shift[i]); |
47272411 LB |
1356 | } |
1357 | ||
38727311 | 1358 | return ret; |
47272411 LB |
1359 | } |
1360 | ||
7ed2ed2d LB |
1361 | static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, |
1362 | u32 page_shift, u32 window_shift) | |
1363 | { | |
1364 | struct dynamic_dma_window_prop *ddwprop; | |
1365 | struct property *win64; | |
1366 | ||
1367 | win64 = kzalloc(sizeof(*win64), GFP_KERNEL); | |
1368 | if (!win64) | |
1369 | return NULL; | |
1370 | ||
1371 | win64->name = kstrdup(propname, GFP_KERNEL); | |
1372 | ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL); | |
1373 | win64->value = ddwprop; | |
1374 | win64->length = sizeof(*ddwprop); | |
1375 | if (!win64->name || !win64->value) { | |
1376 | kfree(win64->name); | |
1377 | kfree(win64->value); | |
1378 | kfree(win64); | |
1379 | return NULL; | |
1380 | } | |
1381 | ||
1382 | ddwprop->liobn = cpu_to_be32(liobn); | |
1383 | ddwprop->dma_base = cpu_to_be64(dma_addr); | |
1384 | ddwprop->tce_shift = cpu_to_be32(page_shift); | |
1385 | ddwprop->window_shift = cpu_to_be32(window_shift); | |
1386 | ||
1387 | return win64; | |
1388 | } | |
1389 | ||
4e8b0cf4 NA |
1390 | /* |
1391 | * If the PE supports dynamic dma windows, and there is space for a table | |
1392 | * that can map all pages in a linear offset, then setup such a table, | |
1393 | * and record the dma-offset in the struct device. | |
1394 | * | |
1395 | * dev: the pci device we are checking | |
1396 | * pdn: the parent pe node with the ibm,dma_window property | |
1397 | * Future: also check if we can remap the base window for our base page size | |
1398 | * | |
2ca73c54 | 1399 | * returns true if can map all pages (direct mapping), false otherwise.. |
4e8b0cf4 | 1400 | */ |
2ca73c54 | 1401 | static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) |
4e8b0cf4 | 1402 | { |
bf6e2d56 AK |
1403 | int len = 0, ret; |
1404 | int max_ram_len = order_base_2(ddw_memory_hotplug_max()); | |
4e8b0cf4 NA |
1405 | struct ddw_query_response query; |
1406 | struct ddw_create_response create; | |
1407 | int page_shift; | |
ff5163bb | 1408 | u64 win_addr, dynamic_offset = 0; |
381ceda8 | 1409 | const char *win_name; |
4e8b0cf4 | 1410 | struct device_node *dn; |
cac3e629 | 1411 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; |
57dbbe59 | 1412 | struct dma_win *window; |
76730334 | 1413 | struct property *win64; |
61435690 | 1414 | struct failed_ddw_pdn *fpdn; |
381ceda8 | 1415 | bool default_win_removed = false, direct_mapping = false; |
ff5163bb | 1416 | bool dynamic_mapping = false; |
bf6e2d56 | 1417 | bool pmem_present; |
381ceda8 | 1418 | struct pci_dn *pci = PCI_DN(pdn); |
b1fc44ea | 1419 | struct property *default_win = NULL; |
bf6e2d56 AK |
1420 | |
1421 | dn = of_find_node_by_type(NULL, "ibm,pmemory"); | |
1422 | pmem_present = dn != NULL; | |
1423 | of_node_put(dn); | |
4e8b0cf4 | 1424 | |
57dbbe59 | 1425 | mutex_lock(&dma_win_init_mutex); |
4e8b0cf4 | 1426 | |
3bf983e4 | 1427 | if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping)) |
4e8b0cf4 NA |
1428 | goto out_unlock; |
1429 | ||
61435690 NA |
1430 | /* |
1431 | * If we already went through this for a previous function of | |
1432 | * the same device and failed, we don't want to muck with the | |
1433 | * DMA window again, as it will race with in-flight operations | |
1434 | * and can lead to EEHs. The above mutex protects access to the | |
1435 | * list. | |
1436 | */ | |
1437 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
b7c670d6 | 1438 | if (fpdn->pdn == pdn) |
61435690 NA |
1439 | goto out_unlock; |
1440 | } | |
1441 | ||
4e8b0cf4 NA |
1442 | /* |
1443 | * the ibm,ddw-applicable property holds the tokens for: | |
1444 | * ibm,query-pe-dma-window | |
1445 | * ibm,create-pe-dma-window | |
1446 | * ibm,remove-pe-dma-window | |
1447 | * for the given node in that order. | |
1448 | * the property is actually in the parent, not the PE | |
1449 | */ | |
9410e018 | 1450 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", |
cac3e629 | 1451 | &ddw_avail[0], DDW_APPLICABLE_SIZE); |
9410e018 | 1452 | if (ret) |
ae69e1ed | 1453 | goto out_failed; |
25ebc45b | 1454 | |
ae69e1ed | 1455 | /* |
4e8b0cf4 NA |
1456 | * Query if there is a second window of size to map the |
1457 | * whole partition. Query returns number of windows, largest | |
1458 | * block assigned to PE (partition endpoint), and two bitmasks | |
1459 | * of page sizes: supported and supported for migrate-dma. | |
1460 | */ | |
1461 | dn = pci_device_to_OF_node(dev); | |
80f02512 | 1462 | ret = query_ddw(dev, ddw_avail, &query, pdn); |
4e8b0cf4 | 1463 | if (ret != 0) |
ae69e1ed | 1464 | goto out_failed; |
4e8b0cf4 | 1465 | |
8c0d5159 LB |
1466 | /* |
1467 | * If there is no window available, remove the default DMA window, | |
1468 | * if it's present. This will make all the resources available to the | |
1469 | * new DDW window. | |
1470 | * If anything fails after this, we need to restore it, so also check | |
1471 | * for extensions presence. | |
1472 | */ | |
4e8b0cf4 | 1473 | if (query.windows_available == 0) { |
8c0d5159 LB |
1474 | int reset_win_ext; |
1475 | ||
92fe01b7 | 1476 | /* DDW + IOMMU on single window may fail if there is any allocation */ |
b1fc44ea | 1477 | if (iommu_table_in_use(pci->table_group->tables[0])) { |
92fe01b7 AK |
1478 | dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); |
1479 | goto out_failed; | |
1480 | } | |
1481 | ||
8c0d5159 LB |
1482 | default_win = of_find_property(pdn, "ibm,dma-window", NULL); |
1483 | if (!default_win) | |
1484 | goto out_failed; | |
1485 | ||
1486 | reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); | |
1487 | if (reset_win_ext) | |
1488 | goto out_failed; | |
1489 | ||
f431a8cd | 1490 | remove_dma_window(pdn, ddw_avail, default_win, true); |
8c0d5159 LB |
1491 | default_win_removed = true; |
1492 | ||
1493 | /* Query again, to check if the window is available */ | |
1494 | ret = query_ddw(dev, ddw_avail, &query, pdn); | |
1495 | if (ret != 0) | |
1496 | goto out_failed; | |
1497 | ||
1498 | if (query.windows_available == 0) { | |
1499 | /* no windows are available for this device. */ | |
1500 | dev_dbg(&dev->dev, "no free dynamic windows"); | |
1501 | goto out_failed; | |
1502 | } | |
4e8b0cf4 | 1503 | } |
47272411 LB |
1504 | |
1505 | page_shift = iommu_get_page_shift(query.page_size); | |
1506 | if (!page_shift) { | |
57dbbe59 LB |
1507 | dev_dbg(&dev->dev, "no supported page size in mask %x", |
1508 | query.page_size); | |
ae69e1ed | 1509 | goto out_failed; |
4e8b0cf4 | 1510 | } |
381ceda8 | 1511 | |
bf6e2d56 AK |
1512 | /* |
1513 | * The "ibm,pmemory" can appear anywhere in the address space. | |
1514 | * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS | |
1515 | * for the upper limit and fallback to max RAM otherwise but this | |
1516 | * disables device::dma_ops_bypass. | |
1517 | */ | |
1518 | len = max_ram_len; | |
1519 | if (pmem_present) { | |
1520 | if (query.largest_available_block >= | |
1521 | (1ULL << (MAX_PHYSMEM_BITS - page_shift))) | |
a9d2f9bb | 1522 | len = MAX_PHYSMEM_BITS; |
bf6e2d56 AK |
1523 | else |
1524 | dev_info(&dev->dev, "Skipping ibm,pmemory"); | |
1525 | } | |
1526 | ||
381ceda8 | 1527 | /* check if the available block * number of ptes will map everything */ |
bf6e2d56 AK |
1528 | if (query.largest_available_block < (1ULL << (len - page_shift))) { |
1529 | dev_dbg(&dev->dev, | |
1530 | "can't map partition max 0x%llx with %llu %llu-sized pages\n", | |
1531 | 1ULL << len, | |
1532 | query.largest_available_block, | |
1533 | 1ULL << page_shift); | |
381ceda8 | 1534 | |
381ceda8 | 1535 | len = order_base_2(query.largest_available_block << page_shift); |
ff5163bb GB |
1536 | |
1537 | dynamic_mapping = true; | |
381ceda8 | 1538 | } else { |
ad397602 AK |
1539 | direct_mapping = !default_win_removed || |
1540 | (len == MAX_PHYSMEM_BITS) || | |
1541 | (!pmem_present && (len == max_ram_len)); | |
ff5163bb GB |
1542 | |
1543 | /* DDW is big enough to direct map RAM. If there is vPMEM, check | |
1544 | * if enough space is left in DDW where we can dynamically | |
1545 | * allocate TCEs for vPMEM. For now, this Hybrid sharing of DDW | |
1546 | * is only for SR-IOV devices. | |
1547 | */ | |
1548 | if (default_win_removed && pmem_present && !direct_mapping) { | |
1549 | /* DDW is big enough to be split */ | |
1550 | if ((query.largest_available_block << page_shift) >= | |
1551 | MIN_DDW_VPMEM_DMA_WINDOW + (1ULL << max_ram_len)) { | |
1552 | direct_mapping = true; | |
1553 | ||
1554 | /* offset of the Dynamic part of DDW */ | |
1555 | dynamic_offset = 1ULL << max_ram_len; | |
1556 | } | |
1557 | ||
1558 | /* DDW will at least have dynamic allocation */ | |
1559 | dynamic_mapping = true; | |
1560 | ||
1561 | /* create max size DDW possible */ | |
1562 | len = order_base_2(query.largest_available_block | |
1563 | << page_shift); | |
1564 | } | |
4e8b0cf4 | 1565 | } |
4e8b0cf4 | 1566 | |
ff5163bb GB |
1567 | /* Even if the DDW is split into both direct mapped RAM and dynamically |
1568 | * mapped vPMEM, the DDW property in OF will be marked as Direct. | |
1569 | */ | |
1570 | win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; | |
1571 | ||
b73a635f | 1572 | ret = create_ddw(dev, ddw_avail, &create, page_shift, len); |
4e8b0cf4 | 1573 | if (ret != 0) |
7ed2ed2d | 1574 | goto out_failed; |
4e8b0cf4 | 1575 | |
b7c670d6 RH |
1576 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", |
1577 | create.liobn, dn); | |
4e8b0cf4 | 1578 | |
7ed2ed2d | 1579 | win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; |
381ceda8 LB |
1580 | win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len); |
1581 | ||
7ed2ed2d LB |
1582 | if (!win64) { |
1583 | dev_info(&dev->dev, | |
1584 | "couldn't allocate property, property name, or value\n"); | |
1585 | goto out_remove_win; | |
1586 | } | |
1587 | ||
1588 | ret = of_add_property(pdn, win64); | |
1589 | if (ret) { | |
57dbbe59 | 1590 | dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d", |
7ed2ed2d LB |
1591 | pdn, ret); |
1592 | goto out_free_prop; | |
1593 | } | |
1594 | ||
1595 | window = ddw_list_new_entry(pdn, win64->value); | |
4e8b0cf4 | 1596 | if (!window) |
7ed2ed2d | 1597 | goto out_del_prop; |
4e8b0cf4 | 1598 | |
ff5163bb | 1599 | window->direct = direct_mapping; |
d61cd13e | 1600 | |
ff5163bb | 1601 | if (direct_mapping) { |
381ceda8 LB |
1602 | /* DDW maps the whole partition, so enable direct DMA mapping */ |
1603 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
1604 | win64->value, tce_setrange_multi_pSeriesLP_walk); | |
1605 | if (ret) { | |
57dbbe59 | 1606 | dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", |
381ceda8 | 1607 | dn, ret); |
4e8b0cf4 | 1608 | |
fb4ee2b3 AK |
1609 | /* Make sure to clean DDW if any TCE was set*/ |
1610 | clean_dma_window(pdn, win64->value); | |
381ceda8 LB |
1611 | goto out_del_list; |
1612 | } | |
aed6e494 SB |
1613 | if (default_win_removed) { |
1614 | iommu_tce_table_put(pci->table_group->tables[0]); | |
1615 | pci->table_group->tables[0] = NULL; | |
1616 | set_iommu_table_base(&dev->dev, NULL); | |
1617 | } | |
ff5163bb GB |
1618 | } |
1619 | ||
1620 | if (dynamic_mapping) { | |
381ceda8 LB |
1621 | struct iommu_table *newtbl; |
1622 | int i; | |
d853adc7 | 1623 | unsigned long start = 0, end = 0; |
ff5163bb | 1624 | u64 dynamic_addr, dynamic_len; |
d61cd13e | 1625 | |
381ceda8 LB |
1626 | for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { |
1627 | const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; | |
1628 | ||
1629 | /* Look for MMIO32 */ | |
d853adc7 AK |
1630 | if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { |
1631 | start = pci->phb->mem_resources[i].start; | |
1632 | end = pci->phb->mem_resources[i].end; | |
381ceda8 | 1633 | break; |
d853adc7 | 1634 | } |
381ceda8 LB |
1635 | } |
1636 | ||
381ceda8 LB |
1637 | /* New table for using DDW instead of the default DMA window */ |
1638 | newtbl = iommu_pseries_alloc_table(pci->phb->node); | |
1639 | if (!newtbl) { | |
1640 | dev_dbg(&dev->dev, "couldn't create new IOMMU table\n"); | |
1641 | goto out_del_list; | |
1642 | } | |
1643 | ||
ff5163bb GB |
1644 | /* If the DDW is split between directly mapped RAM and Dynamic |
1645 | * mapped for TCES, offset into the DDW where the dynamic part | |
1646 | * begins. | |
1647 | */ | |
1648 | dynamic_addr = win_addr + dynamic_offset; | |
1649 | dynamic_len = (1UL << len) - dynamic_offset; | |
1650 | iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, | |
1651 | dynamic_addr, dynamic_len, page_shift, NULL, | |
1652 | &iommu_table_lpar_multi_ops); | |
d853adc7 | 1653 | iommu_init_table(newtbl, pci->phb->node, start, end); |
381ceda8 | 1654 | |
aed6e494 | 1655 | pci->table_group->tables[default_win_removed ? 0 : 1] = newtbl; |
381ceda8 | 1656 | |
381ceda8 | 1657 | set_iommu_table_base(&dev->dev, newtbl); |
4e8b0cf4 NA |
1658 | } |
1659 | ||
b1fc44ea | 1660 | if (default_win_removed) { |
b1fc44ea | 1661 | /* default_win is valid here because default_win_removed == true */ |
f431a8cd SB |
1662 | if (!of_find_property(pdn, "ibm,dma-window-saved", NULL)) |
1663 | copy_property(pdn, "ibm,dma-window", "ibm,dma-window-saved"); | |
b1fc44ea AK |
1664 | of_remove_property(pdn, default_win); |
1665 | dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); | |
1666 | } | |
1667 | ||
57dbbe59 LB |
1668 | spin_lock(&dma_win_list_lock); |
1669 | list_add(&window->list, &dma_win_list); | |
1670 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 1671 | |
7ed2ed2d | 1672 | dev->dev.archdata.dma_offset = win_addr; |
4e8b0cf4 NA |
1673 | goto out_unlock; |
1674 | ||
7ed2ed2d | 1675 | out_del_list: |
7a19081f JL |
1676 | kfree(window); |
1677 | ||
7ed2ed2d LB |
1678 | out_del_prop: |
1679 | of_remove_property(pdn, win64); | |
4e8b0cf4 NA |
1680 | |
1681 | out_free_prop: | |
1682 | kfree(win64->name); | |
1683 | kfree(win64->value); | |
1684 | kfree(win64); | |
1685 | ||
7ed2ed2d LB |
1686 | out_remove_win: |
1687 | /* DDW is clean, so it's ok to call this directly. */ | |
1688 | __remove_dma_window(pdn, ddw_avail, create.liobn); | |
1689 | ||
ae69e1ed | 1690 | out_failed: |
8c0d5159 LB |
1691 | if (default_win_removed) |
1692 | reset_dma_window(dev, pdn); | |
25ebc45b | 1693 | |
61435690 NA |
1694 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); |
1695 | if (!fpdn) | |
1696 | goto out_unlock; | |
1697 | fpdn->pdn = pdn; | |
1698 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
1699 | ||
4e8b0cf4 | 1700 | out_unlock: |
57dbbe59 | 1701 | mutex_unlock(&dma_win_init_mutex); |
bf6e2d56 | 1702 | |
ff5163bb GB |
1703 | /* If we have persistent memory and the window size is not big enough |
1704 | * to directly map both RAM and vPMEM, then we need to set DMA limit. | |
bf6e2d56 | 1705 | */ |
ff5163bb GB |
1706 | if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS) |
1707 | dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + | |
1708 | (1ULL << max_ram_len); | |
bf6e2d56 | 1709 | |
fb4ee2b3 | 1710 | return direct_mapping; |
4e8b0cf4 NA |
1711 | } |
1712 | ||
6af67f2d SB |
1713 | static __u64 query_page_size_to_mask(u32 query_page_size) |
1714 | { | |
1715 | const long shift[] = { | |
1716 | (SZ_4K), (SZ_64K), (SZ_16M), | |
1717 | (SZ_32M), (SZ_64M), (SZ_128M), | |
1718 | (SZ_256M), (SZ_16G), (SZ_2M) | |
1719 | }; | |
1720 | int i, ret = 0; | |
1721 | ||
1722 | for (i = 0; i < ARRAY_SIZE(shift); i++) { | |
1723 | if (query_page_size & (1 << i)) | |
1724 | ret |= shift[i]; | |
1725 | } | |
1726 | ||
1727 | return ret; | |
1728 | } | |
1729 | ||
1730 | static void spapr_tce_init_table_group(struct pci_dev *pdev, | |
1731 | struct device_node *pdn, | |
1732 | struct dynamic_dma_window_prop prop) | |
1733 | { | |
1734 | struct iommu_table_group *table_group = PCI_DN(pdn)->table_group; | |
1735 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; | |
1736 | ||
1737 | struct ddw_query_response query; | |
1738 | int ret; | |
1739 | ||
1740 | /* Only for normal boot with default window. Doesn't matter during | |
1741 | * kdump, since these will not be used during kdump. | |
1742 | */ | |
1743 | if (is_kdump_kernel()) | |
1744 | return; | |
1745 | ||
1746 | if (table_group->max_dynamic_windows_supported != 0) | |
1747 | return; /* already initialized */ | |
1748 | ||
1749 | table_group->tce32_start = be64_to_cpu(prop.dma_base); | |
1750 | table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); | |
1751 | ||
1752 | if (!of_find_property(pdn, "ibm,dma-window", NULL)) | |
1753 | dev_err(&pdev->dev, "default dma window missing!\n"); | |
1754 | ||
1755 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", | |
1756 | &ddw_avail[0], DDW_APPLICABLE_SIZE); | |
1757 | if (ret) { | |
1758 | table_group->max_dynamic_windows_supported = -1; | |
1759 | return; | |
1760 | } | |
1761 | ||
1762 | ret = query_ddw(pdev, ddw_avail, &query, pdn); | |
1763 | if (ret) { | |
1764 | dev_err(&pdev->dev, "%s: query_ddw failed\n", __func__); | |
1765 | table_group->max_dynamic_windows_supported = -1; | |
1766 | return; | |
1767 | } | |
1768 | ||
1769 | if (query.windows_available == 0) | |
1770 | table_group->max_dynamic_windows_supported = 1; | |
1771 | else | |
1772 | table_group->max_dynamic_windows_supported = IOMMU_TABLE_GROUP_MAX_TABLES; | |
1773 | ||
1774 | table_group->max_levels = 1; | |
1775 | table_group->pgsizes |= query_page_size_to_mask(query.page_size); | |
1776 | } | |
1777 | ||
12d04eef | 1778 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1da177e4 LT |
1779 | { |
1780 | struct device_node *pdn, *dn; | |
1781 | struct iommu_table *tbl; | |
1635317f | 1782 | struct pci_dn *pci; |
09a3c1e4 | 1783 | struct dynamic_dma_window_prop prop; |
1da177e4 | 1784 | |
f7ebf352 | 1785 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); |
12d04eef | 1786 | |
1da177e4 | 1787 | /* dev setup for LPAR is a little tricky, since the device tree might |
25985edc | 1788 | * contain the dma-window properties per-device and not necessarily |
1da177e4 LT |
1789 | * for the bus. So we need to search upwards in the tree until we |
1790 | * either hit a dma-window property, OR find a parent with a table | |
1791 | * already allocated. | |
1792 | */ | |
1793 | dn = pci_device_to_OF_node(dev); | |
b7c670d6 | 1794 | pr_debug(" node is %pOF\n", dn); |
5d2efba6 | 1795 | |
09a3c1e4 | 1796 | pdn = pci_dma_find(dn, &prop); |
650f7b3b LV |
1797 | if (!pdn || !PCI_DN(pdn)) { |
1798 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " | |
b7c670d6 RH |
1799 | "no DMA window found for pci dev=%s dn=%pOF\n", |
1800 | pci_name(dev), dn); | |
650f7b3b LV |
1801 | return; |
1802 | } | |
b7c670d6 | 1803 | pr_debug(" parent is %pOF\n", pdn); |
12d04eef | 1804 | |
e07102db | 1805 | pci = PCI_DN(pdn); |
b348aa65 AK |
1806 | if (!pci->table_group) { |
1807 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); | |
1808 | tbl = pci->table_group->tables[0]; | |
09a3c1e4 GB |
1809 | |
1810 | iommu_table_setparms_common(tbl, pci->phb->bus->number, | |
1811 | be32_to_cpu(prop.liobn), | |
1812 | be64_to_cpu(prop.dma_base), | |
1813 | 1ULL << be32_to_cpu(prop.window_shift), | |
1814 | be32_to_cpu(prop.tce_shift), NULL, | |
1815 | &iommu_table_lpar_multi_ops); | |
1816 | ||
201ed7f3 | 1817 | iommu_init_table(tbl, pci->phb->node, 0, 0); |
b348aa65 AK |
1818 | iommu_register_group(pci->table_group, |
1819 | pci_domain_nr(pci->phb->bus), 0); | |
1820 | pr_debug(" created table: %p\n", pci->table_group); | |
de113217 | 1821 | } else { |
b348aa65 | 1822 | pr_debug(" found DMA window, table: %p\n", pci->table_group); |
1da177e4 LT |
1823 | } |
1824 | ||
6af67f2d SB |
1825 | spapr_tce_init_table_group(dev, pdn, prop); |
1826 | ||
b348aa65 | 1827 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
c4e9d3c1 | 1828 | iommu_add_device(pci->table_group, &dev->dev); |
1da177e4 | 1829 | } |
4e8b0cf4 | 1830 | |
9ae2fdde | 1831 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
4e8b0cf4 | 1832 | { |
9ae2fdde | 1833 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
4e8b0cf4 | 1834 | |
4e8b0cf4 | 1835 | /* only attempt to use a new window if 64-bit DMA is requested */ |
9ae2fdde CH |
1836 | if (dma_mask < DMA_BIT_MASK(64)) |
1837 | return false; | |
4e8b0cf4 | 1838 | |
9ae2fdde | 1839 | dev_dbg(&pdev->dev, "node is %pOF\n", dn); |
4e8b0cf4 | 1840 | |
9ae2fdde CH |
1841 | /* |
1842 | * the device tree might contain the dma-window properties | |
1843 | * per-device and not necessarily for the bus. So we need to | |
1844 | * search upwards in the tree until we either hit a dma-window | |
1845 | * property, OR find a parent with a table already allocated. | |
1846 | */ | |
b1fc44ea | 1847 | pdn = pci_dma_find(dn, NULL); |
2ca73c54 LB |
1848 | if (pdn && PCI_DN(pdn)) |
1849 | return enable_ddw(pdev, pdn); | |
6a5c7be5 | 1850 | |
9ae2fdde | 1851 | return false; |
6a5c7be5 MM |
1852 | } |
1853 | ||
af199e6c | 1854 | #ifdef CONFIG_IOMMU_API |
b09c031d SB |
1855 | /* |
1856 | * A simple iommu_table_group_ops which only allows reusing the existing | |
1857 | * iommu_table. This handles VFIO for POWER7 or the nested KVM. | |
1858 | * The ops does not allow creating windows and only allows reusing the existing | |
1859 | * one if it matches table_group->tce32_start/tce32_size/page_shift. | |
1860 | */ | |
1861 | static unsigned long spapr_tce_get_table_size(__u32 page_shift, | |
1862 | __u64 window_size, __u32 levels) | |
1863 | { | |
1864 | unsigned long size; | |
1865 | ||
1866 | if (levels > 1) | |
1867 | return ~0U; | |
1868 | size = window_size >> (page_shift - 3); | |
1869 | return size; | |
1870 | } | |
1871 | ||
f431a8cd SB |
1872 | static struct pci_dev *iommu_group_get_first_pci_dev(struct iommu_group *group) |
1873 | { | |
1874 | struct pci_dev *pdev = NULL; | |
1875 | int ret; | |
1876 | ||
1877 | /* No IOMMU group ? */ | |
1878 | if (!group) | |
1879 | return NULL; | |
1880 | ||
1881 | ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); | |
1882 | if (!ret || !pdev) | |
1883 | return NULL; | |
1884 | return pdev; | |
1885 | } | |
1886 | ||
1887 | static void restore_default_dma_window(struct pci_dev *pdev, struct device_node *pdn) | |
1888 | { | |
1889 | reset_dma_window(pdev, pdn); | |
1890 | copy_property(pdn, "ibm,dma-window-saved", "ibm,dma-window"); | |
1891 | } | |
1892 | ||
1893 | static long remove_dynamic_dma_windows(struct pci_dev *pdev, struct device_node *pdn) | |
1894 | { | |
1895 | struct pci_dn *pci = PCI_DN(pdn); | |
1896 | struct dma_win *window; | |
1897 | bool direct_mapping; | |
1898 | int len; | |
1899 | ||
1900 | if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, &direct_mapping)) { | |
1901 | remove_dma_window_named(pdn, true, direct_mapping ? | |
1902 | DIRECT64_PROPNAME : DMA64_PROPNAME, true); | |
1903 | if (!direct_mapping) { | |
1904 | WARN_ON(!pci->table_group->tables[0] && !pci->table_group->tables[1]); | |
1905 | ||
1906 | if (pci->table_group->tables[1]) { | |
1907 | iommu_tce_table_put(pci->table_group->tables[1]); | |
1908 | pci->table_group->tables[1] = NULL; | |
1909 | } else if (pci->table_group->tables[0]) { | |
1910 | /* Default window was removed and only the DDW exists */ | |
1911 | iommu_tce_table_put(pci->table_group->tables[0]); | |
1912 | pci->table_group->tables[0] = NULL; | |
1913 | } | |
1914 | } | |
1915 | spin_lock(&dma_win_list_lock); | |
1916 | list_for_each_entry(window, &dma_win_list, list) { | |
1917 | if (window->device == pdn) { | |
1918 | list_del(&window->list); | |
1919 | kfree(window); | |
1920 | break; | |
1921 | } | |
1922 | } | |
1923 | spin_unlock(&dma_win_list_lock); | |
1924 | } | |
1925 | ||
1926 | return 0; | |
1927 | } | |
1928 | ||
1929 | static long pseries_setup_default_iommu_config(struct iommu_table_group *table_group, | |
1930 | struct device *dev) | |
1931 | { | |
1932 | struct pci_dev *pdev = to_pci_dev(dev); | |
1933 | const __be32 *default_prop; | |
1934 | long liobn, offset, size; | |
1935 | struct device_node *pdn; | |
1936 | struct iommu_table *tbl; | |
1937 | struct pci_dn *pci; | |
1938 | ||
1939 | pdn = pci_dma_find_parent_node(pdev, table_group); | |
1940 | if (!pdn || !PCI_DN(pdn)) { | |
1941 | dev_warn(&pdev->dev, "No table_group configured for the node %pOF\n", pdn); | |
1942 | return -1; | |
1943 | } | |
1944 | pci = PCI_DN(pdn); | |
1945 | ||
1946 | /* The default window is restored if not present already on removal of DDW. | |
1947 | * However, if used by VFIO SPAPR sub driver, the user's order of removal of | |
1948 | * windows might have been different to not leading to auto restoration, | |
1949 | * suppose the DDW was removed first followed by the default one. | |
1950 | * So, restore the default window with reset-pe-dma call explicitly. | |
1951 | */ | |
1952 | restore_default_dma_window(pdev, pdn); | |
1953 | ||
1954 | default_prop = of_get_property(pdn, "ibm,dma-window", NULL); | |
1955 | of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); | |
1956 | tbl = iommu_pseries_alloc_table(pci->phb->node); | |
1957 | if (!tbl) { | |
1958 | dev_err(&pdev->dev, "couldn't create new IOMMU table\n"); | |
1959 | return -1; | |
1960 | } | |
1961 | ||
1962 | iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, offset, | |
1963 | size, IOMMU_PAGE_SHIFT_4K, NULL, | |
1964 | &iommu_table_lpar_multi_ops); | |
1965 | iommu_init_table(tbl, pci->phb->node, 0, 0); | |
1966 | ||
1967 | pci->table_group->tables[0] = tbl; | |
1968 | set_iommu_table_base(&pdev->dev, tbl); | |
1969 | ||
1970 | return 0; | |
1971 | } | |
1972 | ||
1973 | static bool is_default_window_request(struct iommu_table_group *table_group, __u32 page_shift, | |
1974 | __u64 window_size) | |
1975 | { | |
1976 | if ((window_size <= table_group->tce32_size) && | |
1977 | (page_shift == IOMMU_PAGE_SHIFT_4K)) | |
1978 | return true; | |
1979 | ||
1980 | return false; | |
1981 | } | |
1982 | ||
b09c031d SB |
1983 | static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, |
1984 | __u32 page_shift, __u64 window_size, __u32 levels, | |
1985 | struct iommu_table **ptbl) | |
1986 | { | |
f431a8cd SB |
1987 | struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); |
1988 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; | |
1989 | struct ddw_create_response create; | |
1990 | unsigned long liobn, offset, size; | |
1991 | unsigned long start = 0, end = 0; | |
1992 | struct ddw_query_response query; | |
1993 | const __be32 *default_prop; | |
1994 | struct failed_ddw_pdn *fpdn; | |
1995 | unsigned int window_shift; | |
1996 | struct device_node *pdn; | |
1997 | struct iommu_table *tbl; | |
1998 | struct dma_win *window; | |
1999 | struct property *win64; | |
2000 | struct pci_dn *pci; | |
2001 | u64 win_addr; | |
2002 | int len, i; | |
2003 | long ret; | |
b09c031d | 2004 | |
f431a8cd | 2005 | if (!is_power_of_2(window_size) || levels > 1) |
b09c031d SB |
2006 | return -EINVAL; |
2007 | ||
f431a8cd SB |
2008 | window_shift = order_base_2(window_size); |
2009 | ||
2010 | mutex_lock(&dma_win_init_mutex); | |
2011 | ||
2012 | ret = -ENODEV; | |
2013 | ||
2014 | pdn = pci_dma_find_parent_node(pdev, table_group); | |
2015 | if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ | |
2016 | dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); | |
2017 | goto out_failed; | |
2018 | } | |
2019 | pci = PCI_DN(pdn); | |
2020 | ||
2021 | /* If the enable DDW failed for the pdn, dont retry! */ | |
2022 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
2023 | if (fpdn->pdn == pdn) { | |
2024 | dev_info(&pdev->dev, "%pOF in failed DDW device list\n", pdn); | |
2025 | goto out_unlock; | |
2026 | } | |
2027 | } | |
2028 | ||
2029 | tbl = iommu_pseries_alloc_table(pci->phb->node); | |
2030 | if (!tbl) { | |
2031 | dev_dbg(&pdev->dev, "couldn't create new IOMMU table\n"); | |
2032 | goto out_unlock; | |
2033 | } | |
2034 | ||
2035 | if (num == 0) { | |
2036 | bool direct_mapping; | |
2037 | /* The request is not for default window? Ensure there is no DDW window already */ | |
2038 | if (!is_default_window_request(table_group, page_shift, window_size)) { | |
2039 | if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, | |
2040 | &direct_mapping)) { | |
2041 | dev_warn(&pdev->dev, "%pOF: 64-bit window already present.", pdn); | |
2042 | ret = -EPERM; | |
2043 | goto out_unlock; | |
2044 | } | |
2045 | } else { | |
2046 | /* Request is for Default window, ensure there is no DDW if there is a | |
2047 | * need to reset. reset-pe otherwise removes the DDW also | |
2048 | */ | |
2049 | default_prop = of_get_property(pdn, "ibm,dma-window", NULL); | |
2050 | if (!default_prop) { | |
2051 | if (find_existing_ddw(pdn, &pdev->dev.archdata.dma_offset, &len, | |
2052 | &direct_mapping)) { | |
2053 | dev_warn(&pdev->dev, "%pOF: Attempt to create window#0 when 64-bit window is present. Preventing the attempt as that would destroy the 64-bit window", | |
2054 | pdn); | |
2055 | ret = -EPERM; | |
2056 | goto out_unlock; | |
2057 | } | |
2058 | ||
2059 | restore_default_dma_window(pdev, pdn); | |
2060 | ||
2061 | default_prop = of_get_property(pdn, "ibm,dma-window", NULL); | |
2062 | of_parse_dma_window(pdn, default_prop, &liobn, &offset, &size); | |
2063 | /* Limit the default window size to window_size */ | |
2064 | iommu_table_setparms_common(tbl, pci->phb->bus->number, liobn, | |
2065 | offset, 1UL << window_shift, | |
2066 | IOMMU_PAGE_SHIFT_4K, NULL, | |
2067 | &iommu_table_lpar_multi_ops); | |
2068 | iommu_init_table(tbl, pci->phb->node, start, end); | |
2069 | ||
2070 | table_group->tables[0] = tbl; | |
2071 | ||
2072 | mutex_unlock(&dma_win_init_mutex); | |
2073 | ||
2074 | goto exit; | |
2075 | } | |
2076 | } | |
2077 | } | |
2078 | ||
2079 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", | |
2080 | &ddw_avail[0], DDW_APPLICABLE_SIZE); | |
2081 | if (ret) { | |
2082 | dev_info(&pdev->dev, "ibm,ddw-applicable not found\n"); | |
2083 | goto out_failed; | |
2084 | } | |
2085 | ret = -ENODEV; | |
2086 | ||
2087 | pr_err("%s: Calling query %pOF\n", __func__, pdn); | |
2088 | ret = query_ddw(pdev, ddw_avail, &query, pdn); | |
2089 | if (ret) | |
2090 | goto out_failed; | |
2091 | ret = -ENODEV; | |
2092 | ||
2093 | len = window_shift; | |
2094 | if (query.largest_available_block < (1ULL << (len - page_shift))) { | |
2095 | dev_dbg(&pdev->dev, "can't map window 0x%llx with %llu %llu-sized pages\n", | |
2096 | 1ULL << len, query.largest_available_block, | |
2097 | 1ULL << page_shift); | |
2098 | ret = -EINVAL; /* Retry with smaller window size */ | |
2099 | goto out_unlock; | |
2100 | } | |
2101 | ||
2102 | if (create_ddw(pdev, ddw_avail, &create, page_shift, len)) { | |
2103 | pr_err("%s: Create ddw failed %pOF\n", __func__, pdn); | |
2104 | goto out_failed; | |
2105 | } | |
2106 | ||
2107 | win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; | |
2108 | win64 = ddw_property_create(DMA64_PROPNAME, create.liobn, win_addr, page_shift, len); | |
2109 | if (!win64) | |
2110 | goto remove_window; | |
2111 | ||
2112 | ret = of_add_property(pdn, win64); | |
2113 | if (ret) { | |
2114 | dev_err(&pdev->dev, "unable to add DMA window property for %pOF: %ld", pdn, ret); | |
2115 | goto free_property; | |
2116 | } | |
2117 | ret = -ENODEV; | |
2118 | ||
2119 | window = ddw_list_new_entry(pdn, win64->value); | |
2120 | if (!window) | |
2121 | goto remove_property; | |
2122 | ||
2123 | window->direct = false; | |
2124 | ||
2125 | for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { | |
2126 | const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; | |
2127 | ||
2128 | /* Look for MMIO32 */ | |
2129 | if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { | |
2130 | start = pci->phb->mem_resources[i].start; | |
2131 | end = pci->phb->mem_resources[i].end; | |
2132 | break; | |
2133 | } | |
2134 | } | |
2135 | ||
2136 | /* New table for using DDW instead of the default DMA window */ | |
2137 | iommu_table_setparms_common(tbl, pci->phb->bus->number, create.liobn, win_addr, | |
2138 | 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); | |
2139 | iommu_init_table(tbl, pci->phb->node, start, end); | |
2140 | ||
2141 | pci->table_group->tables[num] = tbl; | |
2142 | set_iommu_table_base(&pdev->dev, tbl); | |
2143 | pdev->dev.archdata.dma_offset = win_addr; | |
2144 | ||
2145 | spin_lock(&dma_win_list_lock); | |
2146 | list_add(&window->list, &dma_win_list); | |
2147 | spin_unlock(&dma_win_list_lock); | |
2148 | ||
2149 | mutex_unlock(&dma_win_init_mutex); | |
2150 | ||
2151 | goto exit; | |
2152 | ||
2153 | remove_property: | |
2154 | of_remove_property(pdn, win64); | |
2155 | free_property: | |
2156 | kfree(win64->name); | |
2157 | kfree(win64->value); | |
2158 | kfree(win64); | |
2159 | remove_window: | |
2160 | __remove_dma_window(pdn, ddw_avail, create.liobn); | |
2161 | ||
2162 | out_failed: | |
2163 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); | |
2164 | if (!fpdn) | |
2165 | goto out_unlock; | |
2166 | fpdn->pdn = pdn; | |
2167 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
2168 | ||
2169 | out_unlock: | |
2170 | mutex_unlock(&dma_win_init_mutex); | |
2171 | ||
2172 | return ret; | |
2173 | exit: | |
2174 | /* Allocate the userspace view */ | |
2175 | pseries_tce_iommu_userspace_view_alloc(tbl); | |
2176 | tbl->it_allocated_size = spapr_tce_get_table_size(page_shift, window_size, levels); | |
2177 | ||
b09c031d | 2178 | *ptbl = iommu_tce_table_get(tbl); |
f431a8cd | 2179 | |
b09c031d SB |
2180 | return 0; |
2181 | } | |
2182 | ||
f431a8cd SB |
2183 | static bool is_default_window_table(struct iommu_table_group *table_group, struct iommu_table *tbl) |
2184 | { | |
2185 | if (((tbl->it_size << tbl->it_page_shift) <= table_group->tce32_size) && | |
2186 | (tbl->it_page_shift == IOMMU_PAGE_SHIFT_4K)) | |
2187 | return true; | |
2188 | ||
2189 | return false; | |
2190 | } | |
2191 | ||
b09c031d SB |
2192 | static long spapr_tce_set_window(struct iommu_table_group *table_group, |
2193 | int num, struct iommu_table *tbl) | |
2194 | { | |
2195 | return tbl == table_group->tables[num] ? 0 : -EPERM; | |
2196 | } | |
2197 | ||
2198 | static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) | |
2199 | { | |
f431a8cd SB |
2200 | struct pci_dev *pdev = iommu_group_get_first_pci_dev(table_group->group); |
2201 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; | |
2202 | struct iommu_table *tbl = table_group->tables[num]; | |
2203 | struct failed_ddw_pdn *fpdn; | |
2204 | struct dma_win *window; | |
2205 | const char *win_name; | |
2206 | int ret = -ENODEV; | |
2207 | ||
2208 | mutex_lock(&dma_win_init_mutex); | |
2209 | ||
2210 | if ((num == 0) && is_default_window_table(table_group, tbl)) | |
2211 | win_name = "ibm,dma-window"; | |
2212 | else | |
2213 | win_name = DMA64_PROPNAME; | |
2214 | ||
2215 | pdn = pci_dma_find(dn, NULL); | |
2216 | if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ | |
2217 | dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); | |
2218 | goto out_failed; | |
2219 | } | |
2220 | ||
2221 | /* Dont clear the TCEs, User should have done it */ | |
2222 | if (remove_dma_window_named(pdn, true, win_name, false)) { | |
2223 | pr_err("%s: The existing DDW removal failed for node %pOF\n", __func__, pdn); | |
2224 | goto out_failed; /* Could not remove it either! */ | |
2225 | } | |
2226 | ||
2227 | if (strcmp(win_name, DMA64_PROPNAME) == 0) { | |
2228 | spin_lock(&dma_win_list_lock); | |
2229 | list_for_each_entry(window, &dma_win_list, list) { | |
2230 | if (window->device == pdn) { | |
2231 | list_del(&window->list); | |
2232 | kfree(window); | |
2233 | break; | |
2234 | } | |
2235 | } | |
2236 | spin_unlock(&dma_win_list_lock); | |
2237 | } | |
2238 | ||
2239 | iommu_tce_table_put(table_group->tables[num]); | |
2240 | table_group->tables[num] = NULL; | |
2241 | ||
2242 | ret = 0; | |
2243 | ||
2244 | goto out_unlock; | |
2245 | ||
2246 | out_failed: | |
2247 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); | |
2248 | if (!fpdn) | |
2249 | goto out_unlock; | |
2250 | fpdn->pdn = pdn; | |
2251 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
2252 | ||
2253 | out_unlock: | |
2254 | mutex_unlock(&dma_win_init_mutex); | |
2255 | ||
2256 | return ret; | |
b09c031d SB |
2257 | } |
2258 | ||
f431a8cd | 2259 | static long spapr_tce_take_ownership(struct iommu_table_group *table_group, struct device *dev) |
b09c031d | 2260 | { |
f431a8cd SB |
2261 | struct iommu_table *tbl = table_group->tables[0]; |
2262 | struct pci_dev *pdev = to_pci_dev(dev); | |
2263 | struct device_node *dn = pci_device_to_OF_node(pdev); | |
2264 | struct device_node *pdn; | |
b09c031d | 2265 | |
f431a8cd SB |
2266 | /* SRIOV VFs using direct map by the host driver OR multifunction devices |
2267 | * where the ownership was taken on the attempt by the first function | |
2268 | */ | |
2269 | if (!tbl && (table_group->max_dynamic_windows_supported != 1)) | |
2270 | return 0; | |
b09c031d | 2271 | |
f431a8cd | 2272 | mutex_lock(&dma_win_init_mutex); |
b09c031d | 2273 | |
f431a8cd SB |
2274 | pdn = pci_dma_find(dn, NULL); |
2275 | if (!pdn || !PCI_DN(pdn)) { /* Niether of 32s|64-bit exist! */ | |
2276 | dev_warn(&pdev->dev, "No dma-windows exist for the node %pOF\n", pdn); | |
2277 | mutex_unlock(&dma_win_init_mutex); | |
2278 | return -1; | |
2279 | } | |
b09c031d | 2280 | |
f431a8cd SB |
2281 | /* |
2282 | * Though rtas call reset-pe removes the DDW, it doesn't clear the entries on the table | |
2283 | * if there are any. In case of direct map, the entries will be left over, which | |
2284 | * is fine for PEs with 2 DMA windows where the second window is created with create-pe | |
2285 | * at which point the table is cleared. However, on VFs having only one DMA window, the | |
2286 | * default window would end up seeing the entries left over from the direct map done | |
2287 | * on the second window. So, remove the ddw explicitly so that clean_dma_window() | |
2288 | * cleans up the entries if any. | |
2289 | */ | |
2290 | if (remove_dynamic_dma_windows(pdev, pdn)) { | |
2291 | dev_warn(&pdev->dev, "The existing DDW removal failed for node %pOF\n", pdn); | |
2292 | mutex_unlock(&dma_win_init_mutex); | |
2293 | return -1; | |
2294 | } | |
2295 | ||
2296 | /* The table_group->tables[0] is not null now, it must be the default window | |
2297 | * Remove it, let the userspace create it as it needs. | |
2298 | */ | |
2299 | if (table_group->tables[0]) { | |
2300 | remove_dma_window_named(pdn, true, "ibm,dma-window", true); | |
2301 | iommu_tce_table_put(tbl); | |
2302 | table_group->tables[0] = NULL; | |
b09c031d | 2303 | } |
f431a8cd SB |
2304 | set_iommu_table_base(dev, NULL); |
2305 | ||
2306 | mutex_unlock(&dma_win_init_mutex); | |
2307 | ||
b09c031d SB |
2308 | return 0; |
2309 | } | |
2310 | ||
f431a8cd | 2311 | static void spapr_tce_release_ownership(struct iommu_table_group *table_group, struct device *dev) |
b09c031d | 2312 | { |
f431a8cd | 2313 | struct iommu_table *tbl = table_group->tables[0]; |
b09c031d | 2314 | |
f431a8cd SB |
2315 | if (tbl) { /* Default window already restored */ |
2316 | return; | |
2317 | } | |
b09c031d | 2318 | |
f431a8cd | 2319 | mutex_lock(&dma_win_init_mutex); |
b09c031d | 2320 | |
f431a8cd SB |
2321 | /* Restore the default window */ |
2322 | pseries_setup_default_iommu_config(table_group, dev); | |
2323 | ||
2324 | mutex_unlock(&dma_win_init_mutex); | |
2325 | ||
2326 | return; | |
b09c031d SB |
2327 | } |
2328 | ||
2329 | static struct iommu_table_group_ops spapr_tce_table_group_ops = { | |
2330 | .get_table_size = spapr_tce_get_table_size, | |
2331 | .create_table = spapr_tce_create_table, | |
2332 | .set_window = spapr_tce_set_window, | |
2333 | .unset_window = spapr_tce_unset_window, | |
2334 | .take_ownership = spapr_tce_take_ownership, | |
2335 | .release_ownership = spapr_tce_release_ownership, | |
2336 | }; | |
af199e6c | 2337 | #endif |
b09c031d | 2338 | |
4e8b0cf4 NA |
2339 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
2340 | void *data) | |
2341 | { | |
57dbbe59 | 2342 | struct dma_win *window; |
4e8b0cf4 NA |
2343 | struct memory_notify *arg = data; |
2344 | int ret = 0; | |
2345 | ||
2346 | switch (action) { | |
2347 | case MEM_GOING_ONLINE: | |
57dbbe59 LB |
2348 | spin_lock(&dma_win_list_lock); |
2349 | list_for_each_entry(window, &dma_win_list, list) { | |
d61cd13e GB |
2350 | if (window->direct) { |
2351 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | |
2352 | arg->nr_pages, window->prop); | |
2353 | } | |
4e8b0cf4 NA |
2354 | /* XXX log error */ |
2355 | } | |
57dbbe59 | 2356 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
2357 | break; |
2358 | case MEM_CANCEL_ONLINE: | |
2359 | case MEM_OFFLINE: | |
57dbbe59 LB |
2360 | spin_lock(&dma_win_list_lock); |
2361 | list_for_each_entry(window, &dma_win_list, list) { | |
d61cd13e GB |
2362 | if (window->direct) { |
2363 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | |
2364 | arg->nr_pages, window->prop); | |
2365 | } | |
4e8b0cf4 NA |
2366 | /* XXX log error */ |
2367 | } | |
57dbbe59 | 2368 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
2369 | break; |
2370 | default: | |
2371 | break; | |
2372 | } | |
2373 | if (ret && action != MEM_CANCEL_ONLINE) | |
2374 | return NOTIFY_BAD; | |
2375 | ||
2376 | return NOTIFY_OK; | |
2377 | } | |
2378 | ||
2379 | static struct notifier_block iommu_mem_nb = { | |
2380 | .notifier_call = iommu_mem_notifier, | |
2381 | }; | |
2382 | ||
f5242e5a | 2383 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
bed59275 SR |
2384 | { |
2385 | int err = NOTIFY_OK; | |
f5242e5a GL |
2386 | struct of_reconfig_data *rd = data; |
2387 | struct device_node *np = rd->dn; | |
bed59275 | 2388 | struct pci_dn *pci = PCI_DN(np); |
57dbbe59 | 2389 | struct dma_win *window; |
bed59275 SR |
2390 | |
2391 | switch (action) { | |
1cf3d8b3 | 2392 | case OF_RECONFIG_DETACH_NODE: |
5efbabe0 GS |
2393 | /* |
2394 | * Removing the property will invoke the reconfig | |
2395 | * notifier again, which causes dead-lock on the | |
2396 | * read-write semaphore of the notifier chain. So | |
2397 | * we have to remove the property when releasing | |
2398 | * the device node. | |
2399 | */ | |
f431a8cd SB |
2400 | if (remove_dma_window_named(np, false, DIRECT64_PROPNAME, true)) |
2401 | remove_dma_window_named(np, false, DMA64_PROPNAME, true); | |
381ceda8 | 2402 | |
b348aa65 AK |
2403 | if (pci && pci->table_group) |
2404 | iommu_pseries_free_group(pci->table_group, | |
ac9a5889 | 2405 | np->full_name); |
4e8b0cf4 | 2406 | |
57dbbe59 LB |
2407 | spin_lock(&dma_win_list_lock); |
2408 | list_for_each_entry(window, &dma_win_list, list) { | |
4e8b0cf4 NA |
2409 | if (window->device == np) { |
2410 | list_del(&window->list); | |
2411 | kfree(window); | |
2412 | break; | |
2413 | } | |
2414 | } | |
57dbbe59 | 2415 | spin_unlock(&dma_win_list_lock); |
bed59275 SR |
2416 | break; |
2417 | default: | |
2418 | err = NOTIFY_DONE; | |
2419 | break; | |
2420 | } | |
2421 | return err; | |
2422 | } | |
2423 | ||
2424 | static struct notifier_block iommu_reconfig_nb = { | |
2425 | .notifier_call = iommu_reconfig_notifier, | |
2426 | }; | |
1da177e4 | 2427 | |
1da177e4 | 2428 | /* These are called very early. */ |
e14ff96d | 2429 | void __init iommu_init_early_pSeries(void) |
1da177e4 | 2430 | { |
a8daac8a | 2431 | if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) |
1da177e4 | 2432 | return; |
1da177e4 | 2433 | |
57cfb814 | 2434 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
38ae9ec4 DA |
2435 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
2436 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | |
9ae2fdde CH |
2437 | if (!disable_ddw) |
2438 | pseries_pci_controller_ops.iommu_bypass_supported = | |
2439 | iommu_bypass_supported_pSeriesLP; | |
1da177e4 | 2440 | } else { |
38ae9ec4 DA |
2441 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
2442 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; | |
1da177e4 LT |
2443 | } |
2444 | ||
2445 | ||
1cf3d8b3 | 2446 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
4e8b0cf4 | 2447 | register_memory_notifier(&iommu_mem_nb); |
1da177e4 | 2448 | |
d862b441 | 2449 | set_pci_dma_ops(&dma_iommu_ops); |
1da177e4 LT |
2450 | } |
2451 | ||
4e89a2d8 WS |
2452 | static int __init disable_multitce(char *str) |
2453 | { | |
2454 | if (strcmp(str, "off") == 0 && | |
2455 | firmware_has_feature(FW_FEATURE_LPAR) && | |
17a0364c AK |
2456 | (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || |
2457 | firmware_has_feature(FW_FEATURE_STUFF_TCE))) { | |
4e89a2d8 | 2458 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); |
17a0364c AK |
2459 | powerpc_firmware_features &= |
2460 | ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); | |
4e89a2d8 WS |
2461 | } |
2462 | return 1; | |
2463 | } | |
2464 | ||
2465 | __setup("multitce=", disable_multitce); | |
4ad04e59 | 2466 | |
a9409044 AK |
2467 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
2468 | struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, | |
2469 | struct pci_dev *pdev) | |
2470 | { | |
2471 | struct device_node *pdn, *dn = pdev->dev.of_node; | |
2472 | struct iommu_group *grp; | |
2473 | struct pci_dn *pci; | |
2474 | ||
2475 | pdn = pci_dma_find(dn, NULL); | |
2476 | if (!pdn || !PCI_DN(pdn)) | |
2477 | return ERR_PTR(-ENODEV); | |
2478 | ||
2479 | pci = PCI_DN(pdn); | |
2480 | if (!pci->table_group) | |
2481 | return ERR_PTR(-ENODEV); | |
2482 | ||
2483 | grp = pci->table_group->group; | |
2484 | if (!grp) | |
2485 | return ERR_PTR(-ENODEV); | |
2486 | ||
2487 | return iommu_group_ref_get(grp); | |
2488 | } | |
2489 | #endif |