Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation |
4 | * | |
bc97ce95 | 5 | * Rewrite, cleanup: |
1da177e4 | 6 | * |
91f14480 | 7 | * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation |
bc97ce95 | 8 | * Copyright (C) 2006 Olof Johansson <olof@lixom.net> |
1da177e4 LT |
9 | * |
10 | * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. | |
1da177e4 LT |
11 | */ |
12 | ||
1da177e4 LT |
13 | #include <linux/init.h> |
14 | #include <linux/types.h> | |
15 | #include <linux/slab.h> | |
16 | #include <linux/mm.h> | |
beacc6da | 17 | #include <linux/memblock.h> |
1da177e4 LT |
18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> | |
20 | #include <linux/pci.h> | |
21 | #include <linux/dma-mapping.h> | |
62a8bd6c | 22 | #include <linux/crash_dump.h> |
4e8b0cf4 | 23 | #include <linux/memory.h> |
1cf3d8b3 | 24 | #include <linux/of.h> |
2500763d | 25 | #include <linux/of_address.h> |
ac9a5889 | 26 | #include <linux/iommu.h> |
0eaf4def | 27 | #include <linux/rculist.h> |
1da177e4 LT |
28 | #include <asm/io.h> |
29 | #include <asm/prom.h> | |
30 | #include <asm/rtas.h> | |
1da177e4 LT |
31 | #include <asm/iommu.h> |
32 | #include <asm/pci-bridge.h> | |
33 | #include <asm/machdep.h> | |
1ababe11 | 34 | #include <asm/firmware.h> |
c707ffcf | 35 | #include <asm/tce.h> |
d387899f | 36 | #include <asm/ppc-pci.h> |
2249ca9d | 37 | #include <asm/udbg.h> |
4e8b0cf4 | 38 | #include <asm/mmzone.h> |
212bebb4 | 39 | #include <asm/plpar_wrappers.h> |
a1218720 | 40 | |
38ae9ec4 | 41 | #include "pseries.h" |
1da177e4 | 42 | |
cac3e629 LB |
43 | enum { |
44 | DDW_QUERY_PE_DMA_WIN = 0, | |
45 | DDW_CREATE_PE_DMA_WIN = 1, | |
46 | DDW_REMOVE_PE_DMA_WIN = 2, | |
47 | ||
48 | DDW_APPLICABLE_SIZE | |
49 | }; | |
50 | ||
80f02512 LB |
51 | enum { |
52 | DDW_EXT_SIZE = 0, | |
53 | DDW_EXT_RESET_DMA_WIN = 1, | |
54 | DDW_EXT_QUERY_OUT_SIZE = 2 | |
55 | }; | |
56 | ||
4ff8677a | 57 | static struct iommu_table *iommu_pseries_alloc_table(int node) |
b348aa65 | 58 | { |
4dd9eab3 | 59 | struct iommu_table *tbl; |
b348aa65 | 60 | |
b348aa65 AK |
61 | tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); |
62 | if (!tbl) | |
4ff8677a | 63 | return NULL; |
b348aa65 | 64 | |
0eaf4def | 65 | INIT_LIST_HEAD_RCU(&tbl->it_group_list); |
e5afdf9d | 66 | kref_init(&tbl->it_kref); |
4ff8677a LB |
67 | return tbl; |
68 | } | |
0eaf4def | 69 | |
4ff8677a LB |
70 | static struct iommu_table_group *iommu_pseries_alloc_group(int node) |
71 | { | |
72 | struct iommu_table_group *table_group; | |
73 | ||
74 | table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); | |
75 | if (!table_group) | |
76 | return NULL; | |
b348aa65 | 77 | |
9d67c943 AK |
78 | #ifdef CONFIG_IOMMU_API |
79 | table_group->ops = &spapr_tce_table_group_ops; | |
80 | table_group->pgsizes = SZ_4K; | |
81 | #endif | |
82 | ||
4ff8677a LB |
83 | table_group->tables[0] = iommu_pseries_alloc_table(node); |
84 | if (table_group->tables[0]) | |
85 | return table_group; | |
b348aa65 | 86 | |
4dd9eab3 | 87 | kfree(table_group); |
b348aa65 AK |
88 | return NULL; |
89 | } | |
90 | ||
91 | static void iommu_pseries_free_group(struct iommu_table_group *table_group, | |
ac9a5889 AK |
92 | const char *node_name) |
93 | { | |
b348aa65 AK |
94 | if (!table_group) |
95 | return; | |
96 | ||
ac9a5889 | 97 | #ifdef CONFIG_IOMMU_API |
b348aa65 AK |
98 | if (table_group->group) { |
99 | iommu_group_put(table_group->group); | |
100 | BUG_ON(table_group->group); | |
ac9a5889 AK |
101 | } |
102 | #endif | |
1f7aacc5 GB |
103 | |
104 | /* Default DMA window table is at index 0, while DDW at 1. SR-IOV | |
105 | * adapters only have table on index 1. | |
106 | */ | |
107 | if (table_group->tables[0]) | |
108 | iommu_tce_table_put(table_group->tables[0]); | |
109 | ||
110 | if (table_group->tables[1]) | |
111 | iommu_tce_table_put(table_group->tables[1]); | |
b348aa65 AK |
112 | |
113 | kfree(table_group); | |
ac9a5889 AK |
114 | } |
115 | ||
6490c490 | 116 | static int tce_build_pSeries(struct iommu_table *tbl, long index, |
bc97ce95 | 117 | long npages, unsigned long uaddr, |
4f3dd8a0 | 118 | enum dma_data_direction direction, |
00085f1e | 119 | unsigned long attrs) |
1da177e4 | 120 | { |
bc97ce95 | 121 | u64 proto_tce; |
c05f57fd | 122 | __be64 *tcep; |
bc97ce95 | 123 | u64 rpn; |
0c634baf LB |
124 | const unsigned long tceshift = tbl->it_page_shift; |
125 | const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); | |
1da177e4 | 126 | |
bc97ce95 | 127 | proto_tce = TCE_PCI_READ; // Read allowed |
1da177e4 LT |
128 | |
129 | if (direction != DMA_TO_DEVICE) | |
bc97ce95 | 130 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 | 131 | |
c05f57fd | 132 | tcep = ((__be64 *)tbl->it_base) + index; |
1da177e4 LT |
133 | |
134 | while (npages--) { | |
95f72d1e | 135 | /* can't move this out since we might cross MEMBLOCK boundary */ |
0c634baf LB |
136 | rpn = __pa(uaddr) >> tceshift; |
137 | *tcep = cpu_to_be64(proto_tce | rpn << tceshift); | |
1da177e4 | 138 | |
0c634baf | 139 | uaddr += pagesize; |
bc97ce95 | 140 | tcep++; |
1da177e4 | 141 | } |
6490c490 | 142 | return 0; |
1da177e4 LT |
143 | } |
144 | ||
145 | ||
146 | static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) | |
147 | { | |
c05f57fd | 148 | __be64 *tcep; |
1da177e4 | 149 | |
c05f57fd | 150 | tcep = ((__be64 *)tbl->it_base) + index; |
bc97ce95 OJ |
151 | |
152 | while (npages--) | |
153 | *(tcep++) = 0; | |
1da177e4 LT |
154 | } |
155 | ||
5f50867b HM |
156 | static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) |
157 | { | |
df015604 | 158 | __be64 *tcep; |
5f50867b | 159 | |
df015604 | 160 | tcep = ((__be64 *)tbl->it_base) + index; |
5f50867b | 161 | |
df015604 | 162 | return be64_to_cpu(*tcep); |
5f50867b | 163 | } |
1da177e4 | 164 | |
0c634baf | 165 | static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); |
6490c490 RJ |
166 | static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); |
167 | ||
7559d3d2 | 168 | static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
1da177e4 | 169 | long npages, unsigned long uaddr, |
4f3dd8a0 | 170 | enum dma_data_direction direction, |
00085f1e | 171 | unsigned long attrs) |
1da177e4 | 172 | { |
6490c490 | 173 | u64 rc = 0; |
bc97ce95 OJ |
174 | u64 proto_tce, tce; |
175 | u64 rpn; | |
6490c490 RJ |
176 | int ret = 0; |
177 | long tcenum_start = tcenum, npages_start = npages; | |
1da177e4 | 178 | |
7559d3d2 | 179 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 180 | proto_tce = TCE_PCI_READ; |
1da177e4 | 181 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 182 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
183 | |
184 | while (npages--) { | |
0c634baf | 185 | tce = proto_tce | rpn << tceshift; |
7559d3d2 | 186 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); |
bc97ce95 | 187 | |
6490c490 RJ |
188 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
189 | ret = (int)rc; | |
0c634baf | 190 | tce_free_pSeriesLP(liobn, tcenum_start, tceshift, |
6490c490 RJ |
191 | (npages_start - (npages + 1))); |
192 | break; | |
193 | } | |
194 | ||
1da177e4 | 195 | if (rc && printk_ratelimit()) { |
fe333321 | 196 | printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 197 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 IM |
198 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
199 | printk("\ttce val = 0x%llx\n", tce ); | |
4ff52b4d | 200 | dump_stack(); |
1da177e4 | 201 | } |
bc97ce95 | 202 | |
1da177e4 | 203 | tcenum++; |
bc97ce95 | 204 | rpn++; |
1da177e4 | 205 | } |
6490c490 | 206 | return ret; |
1da177e4 LT |
207 | } |
208 | ||
df015604 | 209 | static DEFINE_PER_CPU(__be64 *, tce_page); |
1da177e4 | 210 | |
6490c490 | 211 | static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
1da177e4 | 212 | long npages, unsigned long uaddr, |
4f3dd8a0 | 213 | enum dma_data_direction direction, |
00085f1e | 214 | unsigned long attrs) |
1da177e4 | 215 | { |
6490c490 | 216 | u64 rc = 0; |
bc97ce95 | 217 | u64 proto_tce; |
df015604 | 218 | __be64 *tcep; |
bc97ce95 | 219 | u64 rpn; |
1da177e4 | 220 | long l, limit; |
6490c490 RJ |
221 | long tcenum_start = tcenum, npages_start = npages; |
222 | int ret = 0; | |
c1703e85 | 223 | unsigned long flags; |
0c634baf | 224 | const unsigned long tceshift = tbl->it_page_shift; |
1da177e4 | 225 | |
17a0364c | 226 | if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 | 227 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 228 | tceshift, npages, uaddr, |
6490c490 | 229 | direction, attrs); |
541b2755 | 230 | } |
1da177e4 | 231 | |
c1703e85 AB |
232 | local_irq_save(flags); /* to protect tcep and the page behind it */ |
233 | ||
69111bac | 234 | tcep = __this_cpu_read(tce_page); |
1da177e4 LT |
235 | |
236 | /* This is safe to do since interrupts are off when we're called | |
237 | * from iommu_alloc{,_sg}() | |
238 | */ | |
239 | if (!tcep) { | |
df015604 | 240 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
1da177e4 | 241 | /* If allocation fails, fall back to the loop implementation */ |
541b2755 | 242 | if (!tcep) { |
c1703e85 | 243 | local_irq_restore(flags); |
7559d3d2 | 244 | return tce_build_pSeriesLP(tbl->it_index, tcenum, |
0c634baf | 245 | tceshift, |
7559d3d2 | 246 | npages, uaddr, direction, attrs); |
541b2755 | 247 | } |
69111bac | 248 | __this_cpu_write(tce_page, tcep); |
1da177e4 LT |
249 | } |
250 | ||
0c634baf | 251 | rpn = __pa(uaddr) >> tceshift; |
bc97ce95 | 252 | proto_tce = TCE_PCI_READ; |
1da177e4 | 253 | if (direction != DMA_TO_DEVICE) |
bc97ce95 | 254 | proto_tce |= TCE_PCI_WRITE; |
1da177e4 LT |
255 | |
256 | /* We can map max one pageful of TCEs at a time */ | |
257 | do { | |
258 | /* | |
259 | * Set up the page with TCE data, looping through and setting | |
260 | * the values. | |
261 | */ | |
14b5d59a | 262 | limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); |
1da177e4 LT |
263 | |
264 | for (l = 0; l < limit; l++) { | |
0c634baf | 265 | tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); |
bc97ce95 | 266 | rpn++; |
1da177e4 LT |
267 | } |
268 | ||
269 | rc = plpar_tce_put_indirect((u64)tbl->it_index, | |
0c634baf | 270 | (u64)tcenum << tceshift, |
474e3d56 | 271 | (u64)__pa(tcep), |
1da177e4 LT |
272 | limit); |
273 | ||
274 | npages -= limit; | |
275 | tcenum += limit; | |
276 | } while (npages > 0 && !rc); | |
277 | ||
c1703e85 AB |
278 | local_irq_restore(flags); |
279 | ||
6490c490 RJ |
280 | if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
281 | ret = (int)rc; | |
282 | tce_freemulti_pSeriesLP(tbl, tcenum_start, | |
283 | (npages_start - (npages + limit))); | |
284 | return ret; | |
285 | } | |
286 | ||
1da177e4 | 287 | if (rc && printk_ratelimit()) { |
fe333321 IM |
288 | printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
289 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
290 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
291 | printk("\ttce[0] val = 0x%llx\n", tcep[0]); | |
4ff52b4d | 292 | dump_stack(); |
1da177e4 | 293 | } |
6490c490 | 294 | return ret; |
1da177e4 LT |
295 | } |
296 | ||
0c634baf LB |
297 | static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
298 | long npages) | |
1da177e4 LT |
299 | { |
300 | u64 rc; | |
1da177e4 | 301 | |
1da177e4 | 302 | while (npages--) { |
0c634baf | 303 | rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); |
1da177e4 LT |
304 | |
305 | if (rc && printk_ratelimit()) { | |
fe333321 | 306 | printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); |
7559d3d2 | 307 | printk("\tindex = 0x%llx\n", (u64)liobn); |
fe333321 | 308 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); |
4ff52b4d | 309 | dump_stack(); |
1da177e4 LT |
310 | } |
311 | ||
312 | tcenum++; | |
313 | } | |
314 | } | |
315 | ||
316 | ||
317 | static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) | |
318 | { | |
319 | u64 rc; | |
9d2ccf00 GB |
320 | long rpages = npages; |
321 | unsigned long limit; | |
1da177e4 | 322 | |
17a0364c | 323 | if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) |
0c634baf LB |
324 | return tce_free_pSeriesLP(tbl->it_index, tcenum, |
325 | tbl->it_page_shift, npages); | |
da004c36 | 326 | |
9d2ccf00 GB |
327 | do { |
328 | limit = min_t(unsigned long, rpages, 512); | |
329 | ||
330 | rc = plpar_tce_stuff((u64)tbl->it_index, | |
331 | (u64)tcenum << tbl->it_page_shift, 0, limit); | |
332 | ||
333 | rpages -= limit; | |
334 | tcenum += limit; | |
335 | } while (rpages > 0 && !rc); | |
1da177e4 LT |
336 | |
337 | if (rc && printk_ratelimit()) { | |
338 | printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); | |
fe333321 IM |
339 | printk("\trc = %lld\n", rc); |
340 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
341 | printk("\tnpages = 0x%llx\n", (u64)npages); | |
4ff52b4d | 342 | dump_stack(); |
1da177e4 LT |
343 | } |
344 | } | |
345 | ||
5f50867b HM |
346 | static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) |
347 | { | |
348 | u64 rc; | |
349 | unsigned long tce_ret; | |
350 | ||
0c634baf LB |
351 | rc = plpar_tce_get((u64)tbl->it_index, |
352 | (u64)tcenum << tbl->it_page_shift, &tce_ret); | |
5f50867b HM |
353 | |
354 | if (rc && printk_ratelimit()) { | |
fe333321 IM |
355 | printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); |
356 | printk("\tindex = 0x%llx\n", (u64)tbl->it_index); | |
357 | printk("\ttcenum = 0x%llx\n", (u64)tcenum); | |
4ff52b4d | 358 | dump_stack(); |
5f50867b HM |
359 | } |
360 | ||
361 | return tce_ret; | |
362 | } | |
363 | ||
25985edc | 364 | /* this is compatible with cells for the device tree property */ |
4e8b0cf4 NA |
365 | struct dynamic_dma_window_prop { |
366 | __be32 liobn; /* tce table number */ | |
367 | __be64 dma_base; /* address hi,lo */ | |
368 | __be32 tce_shift; /* ilog2(tce_page_size) */ | |
369 | __be32 window_shift; /* ilog2(tce_window_size) */ | |
370 | }; | |
371 | ||
57dbbe59 | 372 | struct dma_win { |
4e8b0cf4 NA |
373 | struct device_node *device; |
374 | const struct dynamic_dma_window_prop *prop; | |
d61cd13e | 375 | bool direct; |
4e8b0cf4 NA |
376 | struct list_head list; |
377 | }; | |
378 | ||
379 | /* Dynamic DMA Window support */ | |
380 | struct ddw_query_response { | |
9410e018 | 381 | u32 windows_available; |
80f02512 | 382 | u64 largest_available_block; |
9410e018 AK |
383 | u32 page_size; |
384 | u32 migration_capable; | |
4e8b0cf4 NA |
385 | }; |
386 | ||
387 | struct ddw_create_response { | |
9410e018 AK |
388 | u32 liobn; |
389 | u32 addr_hi; | |
390 | u32 addr_lo; | |
4e8b0cf4 NA |
391 | }; |
392 | ||
57dbbe59 | 393 | static LIST_HEAD(dma_win_list); |
4e8b0cf4 | 394 | /* prevents races between memory on/offline and window creation */ |
57dbbe59 | 395 | static DEFINE_SPINLOCK(dma_win_list_lock); |
4e8b0cf4 | 396 | /* protects initializing window twice for same device */ |
57dbbe59 | 397 | static DEFINE_MUTEX(dma_win_init_mutex); |
4e8b0cf4 NA |
398 | |
399 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | |
400 | unsigned long num_pfn, const void *arg) | |
401 | { | |
402 | const struct dynamic_dma_window_prop *maprange = arg; | |
403 | int rc; | |
404 | u64 tce_size, num_tce, dma_offset, next; | |
405 | u32 tce_shift; | |
406 | long limit; | |
407 | ||
408 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
409 | tce_size = 1ULL << tce_shift; | |
410 | next = start_pfn << PAGE_SHIFT; | |
411 | num_tce = num_pfn << PAGE_SHIFT; | |
412 | ||
413 | /* round back to the beginning of the tce page size */ | |
414 | num_tce += next & (tce_size - 1); | |
415 | next &= ~(tce_size - 1); | |
416 | ||
417 | /* covert to number of tces */ | |
418 | num_tce |= tce_size - 1; | |
419 | num_tce >>= tce_shift; | |
420 | ||
421 | do { | |
422 | /* | |
423 | * Set up the page with TCE data, looping through and setting | |
424 | * the values. | |
425 | */ | |
426 | limit = min_t(long, num_tce, 512); | |
427 | dma_offset = next + be64_to_cpu(maprange->dma_base); | |
428 | ||
429 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | |
430 | dma_offset, | |
431 | 0, limit); | |
22b38298 | 432 | next += limit * tce_size; |
4e8b0cf4 NA |
433 | num_tce -= limit; |
434 | } while (num_tce > 0 && !rc); | |
435 | ||
436 | return rc; | |
437 | } | |
438 | ||
439 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | |
440 | unsigned long num_pfn, const void *arg) | |
441 | { | |
442 | const struct dynamic_dma_window_prop *maprange = arg; | |
df015604 AB |
443 | u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; |
444 | __be64 *tcep; | |
4e8b0cf4 NA |
445 | u32 tce_shift; |
446 | u64 rc = 0; | |
447 | long l, limit; | |
448 | ||
17a0364c | 449 | if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { |
7559d3d2 AK |
450 | unsigned long tceshift = be32_to_cpu(maprange->tce_shift); |
451 | unsigned long dmastart = (start_pfn << PAGE_SHIFT) + | |
452 | be64_to_cpu(maprange->dma_base); | |
453 | unsigned long tcenum = dmastart >> tceshift; | |
454 | unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; | |
455 | void *uaddr = __va(start_pfn << PAGE_SHIFT); | |
456 | ||
457 | return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), | |
458 | tcenum, tceshift, npages, (unsigned long) uaddr, | |
459 | DMA_BIDIRECTIONAL, 0); | |
460 | } | |
461 | ||
4e8b0cf4 | 462 | local_irq_disable(); /* to protect tcep and the page behind it */ |
69111bac | 463 | tcep = __this_cpu_read(tce_page); |
4e8b0cf4 NA |
464 | |
465 | if (!tcep) { | |
df015604 | 466 | tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
4e8b0cf4 NA |
467 | if (!tcep) { |
468 | local_irq_enable(); | |
469 | return -ENOMEM; | |
470 | } | |
69111bac | 471 | __this_cpu_write(tce_page, tcep); |
4e8b0cf4 NA |
472 | } |
473 | ||
474 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | |
475 | ||
476 | liobn = (u64)be32_to_cpu(maprange->liobn); | |
477 | tce_shift = be32_to_cpu(maprange->tce_shift); | |
478 | tce_size = 1ULL << tce_shift; | |
479 | next = start_pfn << PAGE_SHIFT; | |
480 | num_tce = num_pfn << PAGE_SHIFT; | |
481 | ||
482 | /* round back to the beginning of the tce page size */ | |
483 | num_tce += next & (tce_size - 1); | |
484 | next &= ~(tce_size - 1); | |
485 | ||
486 | /* covert to number of tces */ | |
487 | num_tce |= tce_size - 1; | |
488 | num_tce >>= tce_shift; | |
489 | ||
490 | /* We can map max one pageful of TCEs at a time */ | |
491 | do { | |
492 | /* | |
493 | * Set up the page with TCE data, looping through and setting | |
494 | * the values. | |
495 | */ | |
2747fd26 | 496 | limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE); |
4e8b0cf4 NA |
497 | dma_offset = next + be64_to_cpu(maprange->dma_base); |
498 | ||
499 | for (l = 0; l < limit; l++) { | |
df015604 | 500 | tcep[l] = cpu_to_be64(proto_tce | next); |
4e8b0cf4 NA |
501 | next += tce_size; |
502 | } | |
503 | ||
504 | rc = plpar_tce_put_indirect(liobn, | |
505 | dma_offset, | |
474e3d56 | 506 | (u64)__pa(tcep), |
4e8b0cf4 NA |
507 | limit); |
508 | ||
509 | num_tce -= limit; | |
510 | } while (num_tce > 0 && !rc); | |
511 | ||
512 | /* error cleanup: caller will clear whole range */ | |
513 | ||
514 | local_irq_enable(); | |
515 | return rc; | |
516 | } | |
517 | ||
518 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | |
519 | unsigned long num_pfn, void *arg) | |
520 | { | |
521 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | |
522 | } | |
523 | ||
fc8cba8f LB |
524 | static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, |
525 | unsigned long liobn, unsigned long win_addr, | |
526 | unsigned long window_size, unsigned long page_shift, | |
527 | void *base, struct iommu_table_ops *table_ops) | |
528 | { | |
529 | tbl->it_busno = busno; | |
530 | tbl->it_index = liobn; | |
531 | tbl->it_offset = win_addr >> page_shift; | |
532 | tbl->it_size = window_size >> page_shift; | |
533 | tbl->it_page_shift = page_shift; | |
534 | tbl->it_base = (unsigned long)base; | |
535 | tbl->it_blocksize = 16; | |
536 | tbl->it_type = TCE_PCI; | |
537 | tbl->it_ops = table_ops; | |
538 | } | |
539 | ||
540 | struct iommu_table_ops iommu_table_pseries_ops; | |
541 | ||
1da177e4 LT |
542 | static void iommu_table_setparms(struct pci_controller *phb, |
543 | struct device_node *dn, | |
bc97ce95 | 544 | struct iommu_table *tbl) |
1da177e4 LT |
545 | { |
546 | struct device_node *node; | |
b7d6bf4f | 547 | const unsigned long *basep; |
9938c474 | 548 | const u32 *sizep; |
1da177e4 | 549 | |
fc8cba8f LB |
550 | /* Test if we are going over 2GB of DMA space */ |
551 | if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { | |
552 | udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
553 | panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); | |
554 | } | |
1da177e4 | 555 | |
fc8cba8f | 556 | node = phb->dn; |
e2eb6392 SR |
557 | basep = of_get_property(node, "linux,tce-base", NULL); |
558 | sizep = of_get_property(node, "linux,tce-size", NULL); | |
1da177e4 | 559 | if (basep == NULL || sizep == NULL) { |
b7c670d6 RH |
560 | printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " |
561 | "missing tce entries !\n", dn); | |
1da177e4 LT |
562 | return; |
563 | } | |
564 | ||
fc8cba8f LB |
565 | iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur, |
566 | phb->dma_window_size, IOMMU_PAGE_SHIFT_4K, | |
567 | __va(*basep), &iommu_table_pseries_ops); | |
5f50867b | 568 | |
62a8bd6c | 569 | if (!is_kdump_kernel()) |
54622f10 | 570 | memset((void *)tbl->it_base, 0, *sizep); |
1da177e4 | 571 | |
1da177e4 | 572 | phb->dma_window_base_cur += phb->dma_window_size; |
1da177e4 LT |
573 | } |
574 | ||
fc8cba8f LB |
575 | struct iommu_table_ops iommu_table_lpar_multi_ops; |
576 | ||
da004c36 AK |
577 | struct iommu_table_ops iommu_table_pseries_ops = { |
578 | .set = tce_build_pSeries, | |
579 | .clear = tce_free_pSeries, | |
580 | .get = tce_get_pseries | |
581 | }; | |
582 | ||
12d04eef | 583 | static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) |
1da177e4 | 584 | { |
3c2822cc | 585 | struct device_node *dn; |
1da177e4 | 586 | struct iommu_table *tbl; |
3c2822cc OJ |
587 | struct device_node *isa_dn, *isa_dn_orig; |
588 | struct device_node *tmp; | |
589 | struct pci_dn *pci; | |
590 | int children; | |
1da177e4 | 591 | |
3c2822cc | 592 | dn = pci_bus_to_OF_node(bus); |
12d04eef | 593 | |
b7c670d6 | 594 | pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); |
3c2822cc OJ |
595 | |
596 | if (bus->self) { | |
597 | /* This is not a root bus, any setup will be done for the | |
598 | * device-side of the bridge in iommu_dev_setup_pSeries(). | |
599 | */ | |
600 | return; | |
601 | } | |
12d04eef | 602 | pci = PCI_DN(dn); |
3c2822cc OJ |
603 | |
604 | /* Check if the ISA bus on the system is under | |
605 | * this PHB. | |
1da177e4 | 606 | */ |
3c2822cc | 607 | isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); |
1da177e4 | 608 | |
3c2822cc OJ |
609 | while (isa_dn && isa_dn != dn) |
610 | isa_dn = isa_dn->parent; | |
611 | ||
498b6514 | 612 | of_node_put(isa_dn_orig); |
1da177e4 | 613 | |
d3c58fb1 | 614 | /* Count number of direct PCI children of the PHB. */ |
3c2822cc | 615 | for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) |
d3c58fb1 | 616 | children++; |
1da177e4 | 617 | |
f7ebf352 | 618 | pr_debug("Children: %d\n", children); |
1da177e4 | 619 | |
3c2822cc OJ |
620 | /* Calculate amount of DMA window per slot. Each window must be |
621 | * a power of two (due to pci_alloc_consistent requirements). | |
622 | * | |
623 | * Keep 256MB aside for PHBs with ISA. | |
624 | */ | |
1da177e4 | 625 | |
3c2822cc OJ |
626 | if (!isa_dn) { |
627 | /* No ISA/IDE - just set window size and return */ | |
628 | pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ | |
629 | ||
630 | while (pci->phb->dma_window_size * children > 0x80000000ul) | |
631 | pci->phb->dma_window_size >>= 1; | |
41febbc8 | 632 | pr_debug("No ISA/IDE, window size is 0x%llx\n", |
f7ebf352 | 633 | pci->phb->dma_window_size); |
3c2822cc OJ |
634 | pci->phb->dma_window_base_cur = 0; |
635 | ||
636 | return; | |
1da177e4 | 637 | } |
3c2822cc OJ |
638 | |
639 | /* If we have ISA, then we probably have an IDE | |
640 | * controller too. Allocate a 128MB table but | |
641 | * skip the first 128MB to avoid stepping on ISA | |
642 | * space. | |
643 | */ | |
644 | pci->phb->dma_window_size = 0x8000000ul; | |
645 | pci->phb->dma_window_base_cur = 0x8000000ul; | |
646 | ||
b348aa65 AK |
647 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); |
648 | tbl = pci->table_group->tables[0]; | |
3c2822cc OJ |
649 | |
650 | iommu_table_setparms(pci->phb, dn, tbl); | |
fc8cba8f | 651 | |
4be518d8 AK |
652 | if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) |
653 | panic("Failed to initialize iommu table"); | |
3c2822cc OJ |
654 | |
655 | /* Divide the rest (1.75GB) among the children */ | |
656 | pci->phb->dma_window_size = 0x80000000ul; | |
657 | while (pci->phb->dma_window_size * children > 0x70000000ul) | |
658 | pci->phb->dma_window_size >>= 1; | |
659 | ||
41febbc8 | 660 | pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); |
1da177e4 LT |
661 | } |
662 | ||
b6e1f6ad AK |
663 | #ifdef CONFIG_IOMMU_API |
664 | static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned | |
cad32d9d | 665 | long *tce, enum dma_data_direction *direction) |
b6e1f6ad AK |
666 | { |
667 | long rc; | |
668 | unsigned long ioba = (unsigned long) index << tbl->it_page_shift; | |
669 | unsigned long flags, oldtce = 0; | |
670 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
671 | unsigned long newtce = *tce | proto_tce; | |
672 | ||
673 | spin_lock_irqsave(&tbl->large_pool.lock, flags); | |
674 | ||
675 | rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); | |
676 | if (!rc) | |
677 | rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); | |
678 | ||
679 | if (!rc) { | |
680 | *direction = iommu_tce_direction(oldtce); | |
681 | *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
682 | } | |
683 | ||
684 | spin_unlock_irqrestore(&tbl->large_pool.lock, flags); | |
685 | ||
686 | return rc; | |
687 | } | |
688 | #endif | |
689 | ||
da004c36 AK |
690 | struct iommu_table_ops iommu_table_lpar_multi_ops = { |
691 | .set = tce_buildmulti_pSeriesLP, | |
b6e1f6ad | 692 | #ifdef CONFIG_IOMMU_API |
021b7868 | 693 | .xchg_no_kill = tce_exchange_pseries, |
b6e1f6ad | 694 | #endif |
da004c36 AK |
695 | .clear = tce_freemulti_pSeriesLP, |
696 | .get = tce_get_pSeriesLP | |
697 | }; | |
1da177e4 | 698 | |
b1fc44ea AK |
699 | /* |
700 | * Find nearest ibm,dma-window (default DMA window) or direct DMA window or | |
701 | * dynamic 64bit DMA window, walking up the device tree. | |
702 | */ | |
703 | static struct device_node *pci_dma_find(struct device_node *dn, | |
09a3c1e4 | 704 | struct dynamic_dma_window_prop *prop) |
b1fc44ea | 705 | { |
09a3c1e4 GB |
706 | const __be32 *default_prop = NULL; |
707 | const __be32 *ddw_prop = NULL; | |
708 | struct device_node *rdn = NULL; | |
709 | bool default_win = false, ddw_win = false; | |
b1fc44ea AK |
710 | |
711 | for ( ; dn && PCI_DN(dn); dn = dn->parent) { | |
09a3c1e4 GB |
712 | default_prop = of_get_property(dn, "ibm,dma-window", NULL); |
713 | if (default_prop) { | |
714 | rdn = dn; | |
715 | default_win = true; | |
716 | } | |
717 | ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); | |
718 | if (ddw_prop) { | |
719 | rdn = dn; | |
720 | ddw_win = true; | |
721 | break; | |
722 | } | |
723 | ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); | |
724 | if (ddw_prop) { | |
725 | rdn = dn; | |
726 | ddw_win = true; | |
727 | break; | |
b1fc44ea | 728 | } |
09a3c1e4 GB |
729 | |
730 | /* At least found default window, which is the case for normal boot */ | |
731 | if (default_win) | |
732 | break; | |
b1fc44ea AK |
733 | } |
734 | ||
09a3c1e4 GB |
735 | /* For PCI devices there will always be a DMA window, either on the device |
736 | * or parent bus | |
737 | */ | |
738 | WARN_ON(!(default_win | ddw_win)); | |
739 | ||
740 | /* caller doesn't want to get DMA window property */ | |
741 | if (!prop) | |
742 | return rdn; | |
743 | ||
744 | /* parse DMA window property. During normal system boot, only default | |
745 | * DMA window is passed in OF. But, for kdump, a dedicated adapter might | |
746 | * have both default and DDW in FDT. In this scenario, DDW takes precedence | |
747 | * over default window. | |
748 | */ | |
749 | if (ddw_win) { | |
750 | struct dynamic_dma_window_prop *p; | |
751 | ||
752 | p = (struct dynamic_dma_window_prop *)ddw_prop; | |
753 | prop->liobn = p->liobn; | |
754 | prop->dma_base = p->dma_base; | |
755 | prop->tce_shift = p->tce_shift; | |
756 | prop->window_shift = p->window_shift; | |
757 | } else if (default_win) { | |
758 | unsigned long offset, size, liobn; | |
759 | ||
760 | of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); | |
761 | ||
762 | prop->liobn = cpu_to_be32((u32)liobn); | |
763 | prop->dma_base = cpu_to_be64(offset); | |
764 | prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); | |
765 | prop->window_shift = cpu_to_be32(order_base_2(size)); | |
766 | } | |
767 | ||
768 | return rdn; | |
b1fc44ea AK |
769 | } |
770 | ||
12d04eef | 771 | static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) |
1da177e4 LT |
772 | { |
773 | struct iommu_table *tbl; | |
774 | struct device_node *dn, *pdn; | |
1635317f | 775 | struct pci_dn *ppci; |
09a3c1e4 | 776 | struct dynamic_dma_window_prop prop; |
1da177e4 | 777 | |
1da177e4 LT |
778 | dn = pci_bus_to_OF_node(bus); |
779 | ||
b7c670d6 RH |
780 | pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", |
781 | dn); | |
12d04eef | 782 | |
09a3c1e4 | 783 | pdn = pci_dma_find(dn, &prop); |
1da177e4 | 784 | |
09a3c1e4 GB |
785 | /* In PPC architecture, there will always be DMA window on bus or one of the |
786 | * parent bus. During reboot, there will be ibm,dma-window property to | |
787 | * define DMA window. For kdump, there will at least be default window or DDW | |
788 | * or both. | |
789 | */ | |
1da177e4 | 790 | |
e07102db | 791 | ppci = PCI_DN(pdn); |
12d04eef | 792 | |
b7c670d6 RH |
793 | pr_debug(" parent is %pOF, iommu_table: 0x%p\n", |
794 | pdn, ppci->table_group); | |
12d04eef | 795 | |
b348aa65 AK |
796 | if (!ppci->table_group) { |
797 | ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); | |
798 | tbl = ppci->table_group->tables[0]; | |
fc8cba8f | 799 | |
09a3c1e4 GB |
800 | iommu_table_setparms_common(tbl, ppci->phb->bus->number, |
801 | be32_to_cpu(prop.liobn), | |
802 | be64_to_cpu(prop.dma_base), | |
803 | 1ULL << be32_to_cpu(prop.window_shift), | |
804 | be32_to_cpu(prop.tce_shift), NULL, | |
805 | &iommu_table_lpar_multi_ops); | |
806 | ||
807 | /* Only for normal boot with default window. Doesn't matter even | |
808 | * if we set these with DDW which is 64bit during kdump, since | |
809 | * these will not be used during kdump. | |
810 | */ | |
811 | ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); | |
812 | ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); | |
813 | ||
814 | if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) | |
815 | panic("Failed to initialize iommu table"); | |
816 | ||
b348aa65 AK |
817 | iommu_register_group(ppci->table_group, |
818 | pci_domain_nr(bus), 0); | |
819 | pr_debug(" created table: %p\n", ppci->table_group); | |
1da177e4 | 820 | } |
1da177e4 LT |
821 | } |
822 | ||
823 | ||
12d04eef | 824 | static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) |
1da177e4 | 825 | { |
12d04eef | 826 | struct device_node *dn; |
3c2822cc | 827 | struct iommu_table *tbl; |
1da177e4 | 828 | |
f7ebf352 | 829 | pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); |
1da177e4 | 830 | |
58f9b0b0 | 831 | dn = dev->dev.of_node; |
1da177e4 | 832 | |
3c2822cc OJ |
833 | /* If we're the direct child of a root bus, then we need to allocate |
834 | * an iommu table ourselves. The bus setup code should have setup | |
835 | * the window sizes already. | |
836 | */ | |
837 | if (!dev->bus->self) { | |
12d04eef BH |
838 | struct pci_controller *phb = PCI_DN(dn)->phb; |
839 | ||
f7ebf352 | 840 | pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); |
b348aa65 AK |
841 | PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); |
842 | tbl = PCI_DN(dn)->table_group->tables[0]; | |
12d04eef | 843 | iommu_table_setparms(phb, dn, tbl); |
fc8cba8f | 844 | |
4be518d8 AK |
845 | if (!iommu_init_table(tbl, phb->node, 0, 0)) |
846 | panic("Failed to initialize iommu table"); | |
847 | ||
4617082e | 848 | set_iommu_table_base(&dev->dev, tbl); |
3c2822cc OJ |
849 | return; |
850 | } | |
851 | ||
852 | /* If this device is further down the bus tree, search upwards until | |
853 | * an already allocated iommu table is found and use that. | |
854 | */ | |
855 | ||
b348aa65 | 856 | while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) |
1da177e4 LT |
857 | dn = dn->parent; |
858 | ||
c409c631 | 859 | if (dn && PCI_DN(dn)) |
b348aa65 AK |
860 | set_iommu_table_base(&dev->dev, |
861 | PCI_DN(dn)->table_group->tables[0]); | |
c409c631 | 862 | else |
12d04eef BH |
863 | printk(KERN_WARNING "iommu: Device %s has no iommu table\n", |
864 | pci_name(dev)); | |
1da177e4 LT |
865 | } |
866 | ||
4e8b0cf4 NA |
867 | static int __read_mostly disable_ddw; |
868 | ||
869 | static int __init disable_ddw_setup(char *str) | |
870 | { | |
871 | disable_ddw = 1; | |
872 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | |
873 | ||
874 | return 0; | |
875 | } | |
876 | ||
877 | early_param("disable_ddw", disable_ddw_setup); | |
878 | ||
7ed2ed2d | 879 | static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) |
4e8b0cf4 | 880 | { |
74d0b399 | 881 | int ret; |
9410e018 | 882 | |
4e8b0cf4 NA |
883 | ret = tce_clearrange_multi_pSeriesLP(0, |
884 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | |
885 | if (ret) | |
f2c2cbcc JP |
886 | pr_warn("%pOF failed to clear tces in window.\n", |
887 | np); | |
4e8b0cf4 | 888 | else |
b7c670d6 RH |
889 | pr_debug("%pOF successfully cleared tces in window.\n", |
890 | np); | |
7ed2ed2d LB |
891 | } |
892 | ||
893 | /* | |
894 | * Call only if DMA window is clean. | |
895 | */ | |
896 | static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) | |
897 | { | |
898 | int ret; | |
4e8b0cf4 | 899 | |
cac3e629 | 900 | ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); |
ae69e1ed | 901 | if (ret) |
7ed2ed2d | 902 | pr_warn("%pOF: failed to remove DMA window: rtas returned " |
ae69e1ed | 903 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 904 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
ae69e1ed | 905 | else |
7ed2ed2d | 906 | pr_debug("%pOF: successfully removed DMA window: rtas returned " |
ae69e1ed | 907 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", |
cac3e629 | 908 | np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); |
74d0b399 LB |
909 | } |
910 | ||
7ed2ed2d LB |
911 | static void remove_dma_window(struct device_node *np, u32 *ddw_avail, |
912 | struct property *win) | |
913 | { | |
914 | struct dynamic_dma_window_prop *dwp; | |
915 | u64 liobn; | |
916 | ||
917 | dwp = win->value; | |
918 | liobn = (u64)be32_to_cpu(dwp->liobn); | |
919 | ||
920 | clean_dma_window(np, dwp); | |
921 | __remove_dma_window(np, ddw_avail, liobn); | |
922 | } | |
923 | ||
a5fd9512 | 924 | static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) |
74d0b399 LB |
925 | { |
926 | struct property *win; | |
927 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; | |
928 | int ret = 0; | |
929 | ||
a5fd9512 LB |
930 | win = of_find_property(np, win_name, NULL); |
931 | if (!win) | |
932 | return -EINVAL; | |
933 | ||
74d0b399 LB |
934 | ret = of_property_read_u32_array(np, "ibm,ddw-applicable", |
935 | &ddw_avail[0], DDW_APPLICABLE_SIZE); | |
936 | if (ret) | |
a5fd9512 | 937 | return 0; |
74d0b399 | 938 | |
74d0b399 LB |
939 | |
940 | if (win->length >= sizeof(struct dynamic_dma_window_prop)) | |
941 | remove_dma_window(np, ddw_avail, win); | |
942 | ||
943 | if (!remove_prop) | |
a5fd9512 | 944 | return 0; |
4e8b0cf4 | 945 | |
74d0b399 | 946 | ret = of_remove_property(np, win); |
2573f684 | 947 | if (ret) |
57dbbe59 | 948 | pr_warn("%pOF: failed to remove DMA window property: %d\n", |
b7c670d6 | 949 | np, ret); |
a5fd9512 | 950 | return 0; |
2573f684 | 951 | } |
4e8b0cf4 | 952 | |
3bf983e4 GB |
953 | static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, |
954 | bool *direct_mapping) | |
4e8b0cf4 | 955 | { |
57dbbe59 LB |
956 | struct dma_win *window; |
957 | const struct dynamic_dma_window_prop *dma64; | |
2ca73c54 | 958 | bool found = false; |
4e8b0cf4 | 959 | |
57dbbe59 | 960 | spin_lock(&dma_win_list_lock); |
4e8b0cf4 | 961 | /* check if we already created a window and dupe that config if so */ |
57dbbe59 | 962 | list_for_each_entry(window, &dma_win_list, list) { |
4e8b0cf4 | 963 | if (window->device == pdn) { |
57dbbe59 LB |
964 | dma64 = window->prop; |
965 | *dma_addr = be64_to_cpu(dma64->dma_base); | |
966 | *window_shift = be32_to_cpu(dma64->window_shift); | |
3bf983e4 | 967 | *direct_mapping = window->direct; |
2ca73c54 | 968 | found = true; |
4e8b0cf4 NA |
969 | break; |
970 | } | |
971 | } | |
57dbbe59 | 972 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 | 973 | |
2ca73c54 | 974 | return found; |
4e8b0cf4 NA |
975 | } |
976 | ||
57dbbe59 LB |
977 | static struct dma_win *ddw_list_new_entry(struct device_node *pdn, |
978 | const struct dynamic_dma_window_prop *dma64) | |
92a23219 | 979 | { |
57dbbe59 | 980 | struct dma_win *window; |
92a23219 LB |
981 | |
982 | window = kzalloc(sizeof(*window), GFP_KERNEL); | |
983 | if (!window) | |
984 | return NULL; | |
985 | ||
986 | window->device = pdn; | |
987 | window->prop = dma64; | |
d61cd13e | 988 | window->direct = false; |
92a23219 LB |
989 | |
990 | return window; | |
991 | } | |
992 | ||
8599395d | 993 | static void find_existing_ddw_windows_named(const char *name) |
4e8b0cf4 | 994 | { |
97e7dc52 | 995 | int len; |
c8566780 | 996 | struct device_node *pdn; |
57dbbe59 | 997 | struct dma_win *window; |
8599395d | 998 | const struct dynamic_dma_window_prop *dma64; |
c8566780 | 999 | |
8599395d LB |
1000 | for_each_node_with_property(pdn, name) { |
1001 | dma64 = of_get_property(pdn, name, &len); | |
1002 | if (!dma64 || len < sizeof(*dma64)) { | |
1003 | remove_ddw(pdn, true, name); | |
97e7dc52 NA |
1004 | continue; |
1005 | } | |
c8566780 | 1006 | |
09a3c1e4 GB |
1007 | /* If at the time of system initialization, there are DDWs in OF, |
1008 | * it means this is during kexec. DDW could be direct or dynamic. | |
1009 | * We will just mark DDWs as "dynamic" since this is kdump path, | |
1010 | * no need to worry about perforance. ddw_list_new_entry() will | |
1011 | * set window->direct = false. | |
1012 | */ | |
8599395d | 1013 | window = ddw_list_new_entry(pdn, dma64); |
915b368f WJ |
1014 | if (!window) { |
1015 | of_node_put(pdn); | |
92a23219 | 1016 | break; |
915b368f | 1017 | } |
92a23219 | 1018 | |
57dbbe59 LB |
1019 | spin_lock(&dma_win_list_lock); |
1020 | list_add(&window->list, &dma_win_list); | |
1021 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 1022 | } |
8599395d LB |
1023 | } |
1024 | ||
1025 | static int find_existing_ddw_windows(void) | |
1026 | { | |
1027 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | |
1028 | return 0; | |
1029 | ||
1030 | find_existing_ddw_windows_named(DIRECT64_PROPNAME); | |
381ceda8 | 1031 | find_existing_ddw_windows_named(DMA64_PROPNAME); |
4e8b0cf4 | 1032 | |
c8566780 | 1033 | return 0; |
4e8b0cf4 | 1034 | } |
c8566780 | 1035 | machine_arch_initcall(pseries, find_existing_ddw_windows); |
4e8b0cf4 | 1036 | |
80f02512 LB |
1037 | /** |
1038 | * ddw_read_ext - Get the value of an DDW extension | |
1039 | * @np: device node from which the extension value is to be read. | |
1040 | * @extnum: index number of the extension. | |
1041 | * @value: pointer to return value, modified when extension is available. | |
1042 | * | |
1043 | * Checks if "ibm,ddw-extensions" exists for this node, and get the value | |
1044 | * on index 'extnum'. | |
1045 | * It can be used only to check if a property exists, passing value == NULL. | |
1046 | * | |
1047 | * Returns: | |
1048 | * 0 if extension successfully read | |
1049 | * -EINVAL if the "ibm,ddw-extensions" does not exist, | |
1050 | * -ENODATA if "ibm,ddw-extensions" does not have a value, and | |
1051 | * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. | |
1052 | */ | |
1053 | static inline int ddw_read_ext(const struct device_node *np, int extnum, | |
1054 | u32 *value) | |
1055 | { | |
1056 | static const char propname[] = "ibm,ddw-extensions"; | |
1057 | u32 count; | |
1058 | int ret; | |
1059 | ||
1060 | ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); | |
1061 | if (ret) | |
1062 | return ret; | |
1063 | ||
1064 | if (count < extnum) | |
1065 | return -EOVERFLOW; | |
1066 | ||
1067 | if (!value) | |
1068 | value = &count; | |
1069 | ||
1070 | return of_property_read_u32_index(np, propname, extnum, value); | |
1071 | } | |
1072 | ||
b73a635f | 1073 | static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
80f02512 LB |
1074 | struct ddw_query_response *query, |
1075 | struct device_node *parent) | |
4e8b0cf4 | 1076 | { |
8445a87f GP |
1077 | struct device_node *dn; |
1078 | struct pci_dn *pdn; | |
80f02512 | 1079 | u32 cfg_addr, ext_query, query_out[5]; |
4e8b0cf4 | 1080 | u64 buid; |
80f02512 LB |
1081 | int ret, out_sz; |
1082 | ||
1083 | /* | |
1084 | * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many | |
1085 | * output parameters ibm,query-pe-dma-windows will have, ranging from | |
1086 | * 5 to 6. | |
1087 | */ | |
1088 | ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); | |
1089 | if (!ret && ext_query == 1) | |
1090 | out_sz = 6; | |
1091 | else | |
1092 | out_sz = 5; | |
4e8b0cf4 NA |
1093 | |
1094 | /* | |
1095 | * Get the config address and phb buid of the PE window. | |
1096 | * Rely on eeh to retrieve this for us. | |
1097 | * Retrieve them from the pci device, not the node with the | |
1098 | * dma-window property | |
1099 | */ | |
8445a87f GP |
1100 | dn = pci_device_to_OF_node(dev); |
1101 | pdn = PCI_DN(dn); | |
1102 | buid = pdn->phb->buid; | |
8a934efe | 1103 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
39baadbf | 1104 | |
80f02512 | 1105 | ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, |
cac3e629 | 1106 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); |
80f02512 LB |
1107 | |
1108 | switch (out_sz) { | |
1109 | case 5: | |
1110 | query->windows_available = query_out[0]; | |
1111 | query->largest_available_block = query_out[1]; | |
1112 | query->page_size = query_out[2]; | |
1113 | query->migration_capable = query_out[3]; | |
1114 | break; | |
1115 | case 6: | |
1116 | query->windows_available = query_out[0]; | |
1117 | query->largest_available_block = ((u64)query_out[1] << 32) | | |
1118 | query_out[2]; | |
1119 | query->page_size = query_out[3]; | |
1120 | query->migration_capable = query_out[4]; | |
1121 | break; | |
1122 | } | |
1123 | ||
a28a2eff AK |
1124 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n", |
1125 | ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1126 | BUID_LO(buid), ret, query->largest_available_block, | |
1127 | query->page_size, query->windows_available); | |
1128 | ||
4e8b0cf4 NA |
1129 | return ret; |
1130 | } | |
1131 | ||
b73a635f | 1132 | static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, |
4e8b0cf4 NA |
1133 | struct ddw_create_response *create, int page_shift, |
1134 | int window_shift) | |
1135 | { | |
8445a87f GP |
1136 | struct device_node *dn; |
1137 | struct pci_dn *pdn; | |
4e8b0cf4 NA |
1138 | u32 cfg_addr; |
1139 | u64 buid; | |
1140 | int ret; | |
1141 | ||
1142 | /* | |
1143 | * Get the config address and phb buid of the PE window. | |
1144 | * Rely on eeh to retrieve this for us. | |
1145 | * Retrieve them from the pci device, not the node with the | |
1146 | * dma-window property | |
1147 | */ | |
8445a87f GP |
1148 | dn = pci_device_to_OF_node(dev); |
1149 | pdn = PCI_DN(dn); | |
1150 | buid = pdn->phb->buid; | |
8a934efe | 1151 | cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); |
4e8b0cf4 NA |
1152 | |
1153 | do { | |
1154 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | |
cac3e629 LB |
1155 | ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, |
1156 | (u32 *)create, cfg_addr, BUID_HI(buid), | |
1157 | BUID_LO(buid), page_shift, window_shift); | |
4e8b0cf4 NA |
1158 | } while (rtas_busy_delay(ret)); |
1159 | dev_info(&dev->dev, | |
1160 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | |
cac3e629 LB |
1161 | "(liobn = 0x%x starting addr = %x %x)\n", |
1162 | ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), | |
1163 | BUID_LO(buid), page_shift, window_shift, ret, create->liobn, | |
1164 | create->addr_hi, create->addr_lo); | |
4e8b0cf4 NA |
1165 | |
1166 | return ret; | |
1167 | } | |
1168 | ||
61435690 NA |
1169 | struct failed_ddw_pdn { |
1170 | struct device_node *pdn; | |
1171 | struct list_head list; | |
1172 | }; | |
1173 | ||
1174 | static LIST_HEAD(failed_ddw_pdn_list); | |
1175 | ||
68c0449e AK |
1176 | static phys_addr_t ddw_memory_hotplug_max(void) |
1177 | { | |
2500763d | 1178 | resource_size_t max_addr = memory_hotplug_max(); |
68c0449e AK |
1179 | struct device_node *memory; |
1180 | ||
1181 | for_each_node_by_type(memory, "memory") { | |
2500763d | 1182 | struct resource res; |
68c0449e | 1183 | |
2500763d | 1184 | if (of_address_to_resource(memory, 0, &res)) |
68c0449e AK |
1185 | continue; |
1186 | ||
2500763d | 1187 | max_addr = max_t(resource_size_t, max_addr, res.end + 1); |
68c0449e AK |
1188 | } |
1189 | ||
1190 | return max_addr; | |
1191 | } | |
1192 | ||
8c0d5159 LB |
1193 | /* |
1194 | * Platforms supporting the DDW option starting with LoPAR level 2.7 implement | |
1195 | * ibm,ddw-extensions, which carries the rtas token for | |
1196 | * ibm,reset-pe-dma-windows. | |
1197 | * That rtas-call can be used to restore the default DMA window for the device. | |
1198 | */ | |
1199 | static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) | |
1200 | { | |
1201 | int ret; | |
1202 | u32 cfg_addr, reset_dma_win; | |
1203 | u64 buid; | |
1204 | struct device_node *dn; | |
1205 | struct pci_dn *pdn; | |
1206 | ||
1207 | ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); | |
1208 | if (ret) | |
1209 | return; | |
1210 | ||
1211 | dn = pci_device_to_OF_node(dev); | |
1212 | pdn = PCI_DN(dn); | |
1213 | buid = pdn->phb->buid; | |
1214 | cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); | |
1215 | ||
1216 | ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), | |
1217 | BUID_LO(buid)); | |
1218 | if (ret) | |
1219 | dev_info(&dev->dev, | |
1220 | "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", | |
1221 | reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), | |
1222 | ret); | |
1223 | } | |
1224 | ||
47272411 LB |
1225 | /* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ |
1226 | static int iommu_get_page_shift(u32 query_page_size) | |
1227 | { | |
38727311 | 1228 | /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */ |
47272411 LB |
1229 | const int shift[] = { |
1230 | __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), | |
1231 | __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), | |
38727311 | 1232 | __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M) |
47272411 LB |
1233 | }; |
1234 | ||
1235 | int i = ARRAY_SIZE(shift) - 1; | |
38727311 | 1236 | int ret = 0; |
47272411 LB |
1237 | |
1238 | /* | |
1239 | * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: | |
1240 | * - bit 31 means 4k pages are supported, | |
1241 | * - bit 30 means 64k pages are supported, and so on. | |
1242 | * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. | |
1243 | */ | |
1244 | for (; i >= 0 ; i--) { | |
1245 | if (query_page_size & (1 << i)) | |
38727311 | 1246 | ret = max(ret, shift[i]); |
47272411 LB |
1247 | } |
1248 | ||
38727311 | 1249 | return ret; |
47272411 LB |
1250 | } |
1251 | ||
7ed2ed2d LB |
1252 | static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, |
1253 | u32 page_shift, u32 window_shift) | |
1254 | { | |
1255 | struct dynamic_dma_window_prop *ddwprop; | |
1256 | struct property *win64; | |
1257 | ||
1258 | win64 = kzalloc(sizeof(*win64), GFP_KERNEL); | |
1259 | if (!win64) | |
1260 | return NULL; | |
1261 | ||
1262 | win64->name = kstrdup(propname, GFP_KERNEL); | |
1263 | ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL); | |
1264 | win64->value = ddwprop; | |
1265 | win64->length = sizeof(*ddwprop); | |
1266 | if (!win64->name || !win64->value) { | |
1267 | kfree(win64->name); | |
1268 | kfree(win64->value); | |
1269 | kfree(win64); | |
1270 | return NULL; | |
1271 | } | |
1272 | ||
1273 | ddwprop->liobn = cpu_to_be32(liobn); | |
1274 | ddwprop->dma_base = cpu_to_be64(dma_addr); | |
1275 | ddwprop->tce_shift = cpu_to_be32(page_shift); | |
1276 | ddwprop->window_shift = cpu_to_be32(window_shift); | |
1277 | ||
1278 | return win64; | |
1279 | } | |
1280 | ||
4e8b0cf4 NA |
1281 | /* |
1282 | * If the PE supports dynamic dma windows, and there is space for a table | |
1283 | * that can map all pages in a linear offset, then setup such a table, | |
1284 | * and record the dma-offset in the struct device. | |
1285 | * | |
1286 | * dev: the pci device we are checking | |
1287 | * pdn: the parent pe node with the ibm,dma_window property | |
1288 | * Future: also check if we can remap the base window for our base page size | |
1289 | * | |
2ca73c54 | 1290 | * returns true if can map all pages (direct mapping), false otherwise.. |
4e8b0cf4 | 1291 | */ |
2ca73c54 | 1292 | static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) |
4e8b0cf4 | 1293 | { |
bf6e2d56 AK |
1294 | int len = 0, ret; |
1295 | int max_ram_len = order_base_2(ddw_memory_hotplug_max()); | |
4e8b0cf4 NA |
1296 | struct ddw_query_response query; |
1297 | struct ddw_create_response create; | |
1298 | int page_shift; | |
7ed2ed2d | 1299 | u64 win_addr; |
381ceda8 | 1300 | const char *win_name; |
4e8b0cf4 | 1301 | struct device_node *dn; |
cac3e629 | 1302 | u32 ddw_avail[DDW_APPLICABLE_SIZE]; |
57dbbe59 | 1303 | struct dma_win *window; |
76730334 | 1304 | struct property *win64; |
61435690 | 1305 | struct failed_ddw_pdn *fpdn; |
381ceda8 | 1306 | bool default_win_removed = false, direct_mapping = false; |
bf6e2d56 | 1307 | bool pmem_present; |
381ceda8 | 1308 | struct pci_dn *pci = PCI_DN(pdn); |
b1fc44ea | 1309 | struct property *default_win = NULL; |
bf6e2d56 AK |
1310 | |
1311 | dn = of_find_node_by_type(NULL, "ibm,pmemory"); | |
1312 | pmem_present = dn != NULL; | |
1313 | of_node_put(dn); | |
4e8b0cf4 | 1314 | |
57dbbe59 | 1315 | mutex_lock(&dma_win_init_mutex); |
4e8b0cf4 | 1316 | |
3bf983e4 | 1317 | if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping)) |
4e8b0cf4 NA |
1318 | goto out_unlock; |
1319 | ||
61435690 NA |
1320 | /* |
1321 | * If we already went through this for a previous function of | |
1322 | * the same device and failed, we don't want to muck with the | |
1323 | * DMA window again, as it will race with in-flight operations | |
1324 | * and can lead to EEHs. The above mutex protects access to the | |
1325 | * list. | |
1326 | */ | |
1327 | list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { | |
b7c670d6 | 1328 | if (fpdn->pdn == pdn) |
61435690 NA |
1329 | goto out_unlock; |
1330 | } | |
1331 | ||
4e8b0cf4 NA |
1332 | /* |
1333 | * the ibm,ddw-applicable property holds the tokens for: | |
1334 | * ibm,query-pe-dma-window | |
1335 | * ibm,create-pe-dma-window | |
1336 | * ibm,remove-pe-dma-window | |
1337 | * for the given node in that order. | |
1338 | * the property is actually in the parent, not the PE | |
1339 | */ | |
9410e018 | 1340 | ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", |
cac3e629 | 1341 | &ddw_avail[0], DDW_APPLICABLE_SIZE); |
9410e018 | 1342 | if (ret) |
ae69e1ed | 1343 | goto out_failed; |
25ebc45b | 1344 | |
ae69e1ed | 1345 | /* |
4e8b0cf4 NA |
1346 | * Query if there is a second window of size to map the |
1347 | * whole partition. Query returns number of windows, largest | |
1348 | * block assigned to PE (partition endpoint), and two bitmasks | |
1349 | * of page sizes: supported and supported for migrate-dma. | |
1350 | */ | |
1351 | dn = pci_device_to_OF_node(dev); | |
80f02512 | 1352 | ret = query_ddw(dev, ddw_avail, &query, pdn); |
4e8b0cf4 | 1353 | if (ret != 0) |
ae69e1ed | 1354 | goto out_failed; |
4e8b0cf4 | 1355 | |
8c0d5159 LB |
1356 | /* |
1357 | * If there is no window available, remove the default DMA window, | |
1358 | * if it's present. This will make all the resources available to the | |
1359 | * new DDW window. | |
1360 | * If anything fails after this, we need to restore it, so also check | |
1361 | * for extensions presence. | |
1362 | */ | |
4e8b0cf4 | 1363 | if (query.windows_available == 0) { |
8c0d5159 LB |
1364 | int reset_win_ext; |
1365 | ||
92fe01b7 | 1366 | /* DDW + IOMMU on single window may fail if there is any allocation */ |
b1fc44ea | 1367 | if (iommu_table_in_use(pci->table_group->tables[0])) { |
92fe01b7 AK |
1368 | dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); |
1369 | goto out_failed; | |
1370 | } | |
1371 | ||
8c0d5159 LB |
1372 | default_win = of_find_property(pdn, "ibm,dma-window", NULL); |
1373 | if (!default_win) | |
1374 | goto out_failed; | |
1375 | ||
1376 | reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); | |
1377 | if (reset_win_ext) | |
1378 | goto out_failed; | |
1379 | ||
1380 | remove_dma_window(pdn, ddw_avail, default_win); | |
1381 | default_win_removed = true; | |
1382 | ||
1383 | /* Query again, to check if the window is available */ | |
1384 | ret = query_ddw(dev, ddw_avail, &query, pdn); | |
1385 | if (ret != 0) | |
1386 | goto out_failed; | |
1387 | ||
1388 | if (query.windows_available == 0) { | |
1389 | /* no windows are available for this device. */ | |
1390 | dev_dbg(&dev->dev, "no free dynamic windows"); | |
1391 | goto out_failed; | |
1392 | } | |
4e8b0cf4 | 1393 | } |
47272411 LB |
1394 | |
1395 | page_shift = iommu_get_page_shift(query.page_size); | |
1396 | if (!page_shift) { | |
57dbbe59 LB |
1397 | dev_dbg(&dev->dev, "no supported page size in mask %x", |
1398 | query.page_size); | |
ae69e1ed | 1399 | goto out_failed; |
4e8b0cf4 | 1400 | } |
381ceda8 LB |
1401 | |
1402 | ||
bf6e2d56 AK |
1403 | /* |
1404 | * The "ibm,pmemory" can appear anywhere in the address space. | |
1405 | * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS | |
1406 | * for the upper limit and fallback to max RAM otherwise but this | |
1407 | * disables device::dma_ops_bypass. | |
1408 | */ | |
1409 | len = max_ram_len; | |
1410 | if (pmem_present) { | |
1411 | if (query.largest_available_block >= | |
1412 | (1ULL << (MAX_PHYSMEM_BITS - page_shift))) | |
a9d2f9bb | 1413 | len = MAX_PHYSMEM_BITS; |
bf6e2d56 AK |
1414 | else |
1415 | dev_info(&dev->dev, "Skipping ibm,pmemory"); | |
1416 | } | |
1417 | ||
381ceda8 | 1418 | /* check if the available block * number of ptes will map everything */ |
bf6e2d56 AK |
1419 | if (query.largest_available_block < (1ULL << (len - page_shift))) { |
1420 | dev_dbg(&dev->dev, | |
1421 | "can't map partition max 0x%llx with %llu %llu-sized pages\n", | |
1422 | 1ULL << len, | |
1423 | query.largest_available_block, | |
1424 | 1ULL << page_shift); | |
381ceda8 | 1425 | |
381ceda8 LB |
1426 | len = order_base_2(query.largest_available_block << page_shift); |
1427 | win_name = DMA64_PROPNAME; | |
1428 | } else { | |
ad397602 AK |
1429 | direct_mapping = !default_win_removed || |
1430 | (len == MAX_PHYSMEM_BITS) || | |
1431 | (!pmem_present && (len == max_ram_len)); | |
1432 | win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; | |
4e8b0cf4 | 1433 | } |
4e8b0cf4 | 1434 | |
b73a635f | 1435 | ret = create_ddw(dev, ddw_avail, &create, page_shift, len); |
4e8b0cf4 | 1436 | if (ret != 0) |
7ed2ed2d | 1437 | goto out_failed; |
4e8b0cf4 | 1438 | |
b7c670d6 RH |
1439 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", |
1440 | create.liobn, dn); | |
4e8b0cf4 | 1441 | |
7ed2ed2d | 1442 | win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; |
381ceda8 LB |
1443 | win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len); |
1444 | ||
7ed2ed2d LB |
1445 | if (!win64) { |
1446 | dev_info(&dev->dev, | |
1447 | "couldn't allocate property, property name, or value\n"); | |
1448 | goto out_remove_win; | |
1449 | } | |
1450 | ||
1451 | ret = of_add_property(pdn, win64); | |
1452 | if (ret) { | |
57dbbe59 | 1453 | dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d", |
7ed2ed2d LB |
1454 | pdn, ret); |
1455 | goto out_free_prop; | |
1456 | } | |
1457 | ||
1458 | window = ddw_list_new_entry(pdn, win64->value); | |
4e8b0cf4 | 1459 | if (!window) |
7ed2ed2d | 1460 | goto out_del_prop; |
4e8b0cf4 | 1461 | |
381ceda8 | 1462 | if (direct_mapping) { |
d61cd13e GB |
1463 | window->direct = true; |
1464 | ||
381ceda8 LB |
1465 | /* DDW maps the whole partition, so enable direct DMA mapping */ |
1466 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | |
1467 | win64->value, tce_setrange_multi_pSeriesLP_walk); | |
1468 | if (ret) { | |
57dbbe59 | 1469 | dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", |
381ceda8 | 1470 | dn, ret); |
4e8b0cf4 | 1471 | |
fb4ee2b3 AK |
1472 | /* Make sure to clean DDW if any TCE was set*/ |
1473 | clean_dma_window(pdn, win64->value); | |
381ceda8 LB |
1474 | goto out_del_list; |
1475 | } | |
1476 | } else { | |
1477 | struct iommu_table *newtbl; | |
1478 | int i; | |
d853adc7 | 1479 | unsigned long start = 0, end = 0; |
381ceda8 | 1480 | |
d61cd13e GB |
1481 | window->direct = false; |
1482 | ||
381ceda8 LB |
1483 | for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { |
1484 | const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; | |
1485 | ||
1486 | /* Look for MMIO32 */ | |
d853adc7 AK |
1487 | if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { |
1488 | start = pci->phb->mem_resources[i].start; | |
1489 | end = pci->phb->mem_resources[i].end; | |
381ceda8 | 1490 | break; |
d853adc7 | 1491 | } |
381ceda8 LB |
1492 | } |
1493 | ||
381ceda8 LB |
1494 | /* New table for using DDW instead of the default DMA window */ |
1495 | newtbl = iommu_pseries_alloc_table(pci->phb->node); | |
1496 | if (!newtbl) { | |
1497 | dev_dbg(&dev->dev, "couldn't create new IOMMU table\n"); | |
1498 | goto out_del_list; | |
1499 | } | |
1500 | ||
1501 | iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, | |
1502 | 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); | |
d853adc7 | 1503 | iommu_init_table(newtbl, pci->phb->node, start, end); |
381ceda8 LB |
1504 | |
1505 | pci->table_group->tables[1] = newtbl; | |
1506 | ||
381ceda8 | 1507 | set_iommu_table_base(&dev->dev, newtbl); |
4e8b0cf4 NA |
1508 | } |
1509 | ||
b1fc44ea AK |
1510 | if (default_win_removed) { |
1511 | iommu_tce_table_put(pci->table_group->tables[0]); | |
1512 | pci->table_group->tables[0] = NULL; | |
1513 | ||
1514 | /* default_win is valid here because default_win_removed == true */ | |
1515 | of_remove_property(pdn, default_win); | |
1516 | dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); | |
1517 | } | |
1518 | ||
57dbbe59 LB |
1519 | spin_lock(&dma_win_list_lock); |
1520 | list_add(&window->list, &dma_win_list); | |
1521 | spin_unlock(&dma_win_list_lock); | |
4e8b0cf4 | 1522 | |
7ed2ed2d | 1523 | dev->dev.archdata.dma_offset = win_addr; |
4e8b0cf4 NA |
1524 | goto out_unlock; |
1525 | ||
7ed2ed2d | 1526 | out_del_list: |
7a19081f JL |
1527 | kfree(window); |
1528 | ||
7ed2ed2d LB |
1529 | out_del_prop: |
1530 | of_remove_property(pdn, win64); | |
4e8b0cf4 NA |
1531 | |
1532 | out_free_prop: | |
1533 | kfree(win64->name); | |
1534 | kfree(win64->value); | |
1535 | kfree(win64); | |
1536 | ||
7ed2ed2d LB |
1537 | out_remove_win: |
1538 | /* DDW is clean, so it's ok to call this directly. */ | |
1539 | __remove_dma_window(pdn, ddw_avail, create.liobn); | |
1540 | ||
ae69e1ed | 1541 | out_failed: |
8c0d5159 LB |
1542 | if (default_win_removed) |
1543 | reset_dma_window(dev, pdn); | |
25ebc45b | 1544 | |
61435690 NA |
1545 | fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); |
1546 | if (!fpdn) | |
1547 | goto out_unlock; | |
1548 | fpdn->pdn = pdn; | |
1549 | list_add(&fpdn->list, &failed_ddw_pdn_list); | |
1550 | ||
4e8b0cf4 | 1551 | out_unlock: |
57dbbe59 | 1552 | mutex_unlock(&dma_win_init_mutex); |
bf6e2d56 AK |
1553 | |
1554 | /* | |
1555 | * If we have persistent memory and the window size is only as big | |
1556 | * as RAM, then we failed to create a window to cover persistent | |
1557 | * memory and need to set the DMA limit. | |
1558 | */ | |
fb4ee2b3 | 1559 | if (pmem_present && direct_mapping && len == max_ram_len) |
2ca73c54 | 1560 | dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); |
bf6e2d56 | 1561 | |
fb4ee2b3 | 1562 | return direct_mapping; |
4e8b0cf4 NA |
1563 | } |
1564 | ||
12d04eef | 1565 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
1da177e4 LT |
1566 | { |
1567 | struct device_node *pdn, *dn; | |
1568 | struct iommu_table *tbl; | |
1635317f | 1569 | struct pci_dn *pci; |
09a3c1e4 | 1570 | struct dynamic_dma_window_prop prop; |
1da177e4 | 1571 | |
f7ebf352 | 1572 | pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); |
12d04eef | 1573 | |
1da177e4 | 1574 | /* dev setup for LPAR is a little tricky, since the device tree might |
25985edc | 1575 | * contain the dma-window properties per-device and not necessarily |
1da177e4 LT |
1576 | * for the bus. So we need to search upwards in the tree until we |
1577 | * either hit a dma-window property, OR find a parent with a table | |
1578 | * already allocated. | |
1579 | */ | |
1580 | dn = pci_device_to_OF_node(dev); | |
b7c670d6 | 1581 | pr_debug(" node is %pOF\n", dn); |
5d2efba6 | 1582 | |
09a3c1e4 | 1583 | pdn = pci_dma_find(dn, &prop); |
650f7b3b LV |
1584 | if (!pdn || !PCI_DN(pdn)) { |
1585 | printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " | |
b7c670d6 RH |
1586 | "no DMA window found for pci dev=%s dn=%pOF\n", |
1587 | pci_name(dev), dn); | |
650f7b3b LV |
1588 | return; |
1589 | } | |
b7c670d6 | 1590 | pr_debug(" parent is %pOF\n", pdn); |
12d04eef | 1591 | |
e07102db | 1592 | pci = PCI_DN(pdn); |
b348aa65 AK |
1593 | if (!pci->table_group) { |
1594 | pci->table_group = iommu_pseries_alloc_group(pci->phb->node); | |
1595 | tbl = pci->table_group->tables[0]; | |
09a3c1e4 GB |
1596 | |
1597 | iommu_table_setparms_common(tbl, pci->phb->bus->number, | |
1598 | be32_to_cpu(prop.liobn), | |
1599 | be64_to_cpu(prop.dma_base), | |
1600 | 1ULL << be32_to_cpu(prop.window_shift), | |
1601 | be32_to_cpu(prop.tce_shift), NULL, | |
1602 | &iommu_table_lpar_multi_ops); | |
1603 | ||
1604 | /* Only for normal boot with default window. Doesn't matter even | |
1605 | * if we set these with DDW which is 64bit during kdump, since | |
1606 | * these will not be used during kdump. | |
1607 | */ | |
1608 | pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); | |
1609 | pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); | |
fc8cba8f | 1610 | |
201ed7f3 | 1611 | iommu_init_table(tbl, pci->phb->node, 0, 0); |
b348aa65 AK |
1612 | iommu_register_group(pci->table_group, |
1613 | pci_domain_nr(pci->phb->bus), 0); | |
1614 | pr_debug(" created table: %p\n", pci->table_group); | |
de113217 | 1615 | } else { |
b348aa65 | 1616 | pr_debug(" found DMA window, table: %p\n", pci->table_group); |
1da177e4 LT |
1617 | } |
1618 | ||
b348aa65 | 1619 | set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); |
c4e9d3c1 | 1620 | iommu_add_device(pci->table_group, &dev->dev); |
1da177e4 | 1621 | } |
4e8b0cf4 | 1622 | |
9ae2fdde | 1623 | static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) |
4e8b0cf4 | 1624 | { |
9ae2fdde | 1625 | struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; |
4e8b0cf4 | 1626 | |
4e8b0cf4 | 1627 | /* only attempt to use a new window if 64-bit DMA is requested */ |
9ae2fdde CH |
1628 | if (dma_mask < DMA_BIT_MASK(64)) |
1629 | return false; | |
4e8b0cf4 | 1630 | |
9ae2fdde | 1631 | dev_dbg(&pdev->dev, "node is %pOF\n", dn); |
4e8b0cf4 | 1632 | |
9ae2fdde CH |
1633 | /* |
1634 | * the device tree might contain the dma-window properties | |
1635 | * per-device and not necessarily for the bus. So we need to | |
1636 | * search upwards in the tree until we either hit a dma-window | |
1637 | * property, OR find a parent with a table already allocated. | |
1638 | */ | |
b1fc44ea | 1639 | pdn = pci_dma_find(dn, NULL); |
2ca73c54 LB |
1640 | if (pdn && PCI_DN(pdn)) |
1641 | return enable_ddw(pdev, pdn); | |
6a5c7be5 | 1642 | |
9ae2fdde | 1643 | return false; |
6a5c7be5 MM |
1644 | } |
1645 | ||
4e8b0cf4 NA |
1646 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, |
1647 | void *data) | |
1648 | { | |
57dbbe59 | 1649 | struct dma_win *window; |
4e8b0cf4 NA |
1650 | struct memory_notify *arg = data; |
1651 | int ret = 0; | |
1652 | ||
1653 | switch (action) { | |
1654 | case MEM_GOING_ONLINE: | |
57dbbe59 LB |
1655 | spin_lock(&dma_win_list_lock); |
1656 | list_for_each_entry(window, &dma_win_list, list) { | |
d61cd13e GB |
1657 | if (window->direct) { |
1658 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | |
1659 | arg->nr_pages, window->prop); | |
1660 | } | |
4e8b0cf4 NA |
1661 | /* XXX log error */ |
1662 | } | |
57dbbe59 | 1663 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
1664 | break; |
1665 | case MEM_CANCEL_ONLINE: | |
1666 | case MEM_OFFLINE: | |
57dbbe59 LB |
1667 | spin_lock(&dma_win_list_lock); |
1668 | list_for_each_entry(window, &dma_win_list, list) { | |
d61cd13e GB |
1669 | if (window->direct) { |
1670 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | |
1671 | arg->nr_pages, window->prop); | |
1672 | } | |
4e8b0cf4 NA |
1673 | /* XXX log error */ |
1674 | } | |
57dbbe59 | 1675 | spin_unlock(&dma_win_list_lock); |
4e8b0cf4 NA |
1676 | break; |
1677 | default: | |
1678 | break; | |
1679 | } | |
1680 | if (ret && action != MEM_CANCEL_ONLINE) | |
1681 | return NOTIFY_BAD; | |
1682 | ||
1683 | return NOTIFY_OK; | |
1684 | } | |
1685 | ||
1686 | static struct notifier_block iommu_mem_nb = { | |
1687 | .notifier_call = iommu_mem_notifier, | |
1688 | }; | |
1689 | ||
f5242e5a | 1690 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) |
bed59275 SR |
1691 | { |
1692 | int err = NOTIFY_OK; | |
f5242e5a GL |
1693 | struct of_reconfig_data *rd = data; |
1694 | struct device_node *np = rd->dn; | |
bed59275 | 1695 | struct pci_dn *pci = PCI_DN(np); |
57dbbe59 | 1696 | struct dma_win *window; |
bed59275 SR |
1697 | |
1698 | switch (action) { | |
1cf3d8b3 | 1699 | case OF_RECONFIG_DETACH_NODE: |
5efbabe0 GS |
1700 | /* |
1701 | * Removing the property will invoke the reconfig | |
1702 | * notifier again, which causes dead-lock on the | |
1703 | * read-write semaphore of the notifier chain. So | |
1704 | * we have to remove the property when releasing | |
1705 | * the device node. | |
1706 | */ | |
381ceda8 LB |
1707 | if (remove_ddw(np, false, DIRECT64_PROPNAME)) |
1708 | remove_ddw(np, false, DMA64_PROPNAME); | |
1709 | ||
b348aa65 AK |
1710 | if (pci && pci->table_group) |
1711 | iommu_pseries_free_group(pci->table_group, | |
ac9a5889 | 1712 | np->full_name); |
4e8b0cf4 | 1713 | |
57dbbe59 LB |
1714 | spin_lock(&dma_win_list_lock); |
1715 | list_for_each_entry(window, &dma_win_list, list) { | |
4e8b0cf4 NA |
1716 | if (window->device == np) { |
1717 | list_del(&window->list); | |
1718 | kfree(window); | |
1719 | break; | |
1720 | } | |
1721 | } | |
57dbbe59 | 1722 | spin_unlock(&dma_win_list_lock); |
bed59275 SR |
1723 | break; |
1724 | default: | |
1725 | err = NOTIFY_DONE; | |
1726 | break; | |
1727 | } | |
1728 | return err; | |
1729 | } | |
1730 | ||
1731 | static struct notifier_block iommu_reconfig_nb = { | |
1732 | .notifier_call = iommu_reconfig_notifier, | |
1733 | }; | |
1da177e4 | 1734 | |
1da177e4 | 1735 | /* These are called very early. */ |
e14ff96d | 1736 | void __init iommu_init_early_pSeries(void) |
1da177e4 | 1737 | { |
a8daac8a | 1738 | if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) |
1da177e4 | 1739 | return; |
1da177e4 | 1740 | |
57cfb814 | 1741 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
38ae9ec4 DA |
1742 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
1743 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | |
9ae2fdde CH |
1744 | if (!disable_ddw) |
1745 | pseries_pci_controller_ops.iommu_bypass_supported = | |
1746 | iommu_bypass_supported_pSeriesLP; | |
1da177e4 | 1747 | } else { |
38ae9ec4 DA |
1748 | pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; |
1749 | pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; | |
1da177e4 LT |
1750 | } |
1751 | ||
1752 | ||
1cf3d8b3 | 1753 | of_reconfig_notifier_register(&iommu_reconfig_nb); |
4e8b0cf4 | 1754 | register_memory_notifier(&iommu_mem_nb); |
1da177e4 | 1755 | |
d862b441 | 1756 | set_pci_dma_ops(&dma_iommu_ops); |
1da177e4 LT |
1757 | } |
1758 | ||
4e89a2d8 WS |
1759 | static int __init disable_multitce(char *str) |
1760 | { | |
1761 | if (strcmp(str, "off") == 0 && | |
1762 | firmware_has_feature(FW_FEATURE_LPAR) && | |
17a0364c AK |
1763 | (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || |
1764 | firmware_has_feature(FW_FEATURE_STUFF_TCE))) { | |
4e89a2d8 | 1765 | printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); |
17a0364c AK |
1766 | powerpc_firmware_features &= |
1767 | ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); | |
4e89a2d8 WS |
1768 | } |
1769 | return 1; | |
1770 | } | |
1771 | ||
1772 | __setup("multitce=", disable_multitce); | |
4ad04e59 | 1773 | |
a9409044 AK |
1774 | #ifdef CONFIG_SPAPR_TCE_IOMMU |
1775 | struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, | |
1776 | struct pci_dev *pdev) | |
1777 | { | |
1778 | struct device_node *pdn, *dn = pdev->dev.of_node; | |
1779 | struct iommu_group *grp; | |
1780 | struct pci_dn *pci; | |
1781 | ||
1782 | pdn = pci_dma_find(dn, NULL); | |
1783 | if (!pdn || !PCI_DN(pdn)) | |
1784 | return ERR_PTR(-ENODEV); | |
1785 | ||
1786 | pci = PCI_DN(pdn); | |
1787 | if (!pci->table_group) | |
1788 | return ERR_PTR(-ENODEV); | |
1789 | ||
1790 | grp = pci->table_group->group; | |
1791 | if (!grp) | |
1792 | return ERR_PTR(-ENODEV); | |
1793 | ||
1794 | return iommu_group_ref_get(grp); | |
1795 | } | |
1796 | #endif |