Commit | Line | Data |
---|---|---|
191c2287 AK |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* | |
3 | * TCE helpers for IODA PCI/PCIe on PowerNV platforms | |
4 | * | |
5 | * Copyright 2018 IBM Corp. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; either version | |
10 | * 2 of the License, or (at your option) any later version. | |
11 | */ | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/iommu.h> | |
15 | ||
16 | #include <asm/iommu.h> | |
17 | #include <asm/tce.h> | |
18 | #include "pci.h" | |
19 | ||
20 | void pnv_pci_setup_iommu_table(struct iommu_table *tbl, | |
21 | void *tce_mem, u64 tce_size, | |
22 | u64 dma_offset, unsigned int page_shift) | |
23 | { | |
24 | tbl->it_blocksize = 16; | |
25 | tbl->it_base = (unsigned long)tce_mem; | |
26 | tbl->it_page_shift = page_shift; | |
27 | tbl->it_offset = dma_offset >> tbl->it_page_shift; | |
28 | tbl->it_index = 0; | |
29 | tbl->it_size = tce_size >> 3; | |
30 | tbl->it_busno = 0; | |
31 | tbl->it_type = TCE_PCI; | |
32 | } | |
33 | ||
9bc98c8a AK |
34 | static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) |
35 | { | |
36 | struct page *tce_mem = NULL; | |
37 | __be64 *addr; | |
38 | ||
39 | tce_mem = alloc_pages_node(nid, GFP_KERNEL, shift - PAGE_SHIFT); | |
40 | if (!tce_mem) { | |
41 | pr_err("Failed to allocate a TCE memory, level shift=%d\n", | |
42 | shift); | |
43 | return NULL; | |
44 | } | |
45 | addr = page_address(tce_mem); | |
46 | memset(addr, 0, 1UL << shift); | |
47 | ||
48 | return addr; | |
49 | } | |
50 | ||
a68bd126 | 51 | static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) |
191c2287 | 52 | { |
090bad39 | 53 | __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; |
191c2287 AK |
54 | int level = tbl->it_indirect_levels; |
55 | const long shift = ilog2(tbl->it_level_size); | |
56 | unsigned long mask = (tbl->it_level_size - 1) << (level * shift); | |
57 | ||
58 | while (level) { | |
59 | int n = (idx & mask) >> (level * shift); | |
a68bd126 AK |
60 | unsigned long tce; |
61 | ||
62 | if (tmp[n] == 0) { | |
63 | __be64 *tmp2; | |
64 | ||
65 | if (!alloc) | |
66 | return NULL; | |
67 | ||
68 | tmp2 = pnv_alloc_tce_level(tbl->it_nid, | |
69 | ilog2(tbl->it_level_size) + 3); | |
70 | if (!tmp2) | |
71 | return NULL; | |
72 | ||
73 | tmp[n] = cpu_to_be64(__pa(tmp2) | | |
74 | TCE_PCI_READ | TCE_PCI_WRITE); | |
75 | } | |
76 | tce = be64_to_cpu(tmp[n]); | |
191c2287 AK |
77 | |
78 | tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); | |
79 | idx &= ~mask; | |
80 | mask >>= shift; | |
81 | --level; | |
82 | } | |
83 | ||
84 | return tmp + idx; | |
85 | } | |
86 | ||
87 | int pnv_tce_build(struct iommu_table *tbl, long index, long npages, | |
88 | unsigned long uaddr, enum dma_data_direction direction, | |
89 | unsigned long attrs) | |
90 | { | |
91 | u64 proto_tce = iommu_direction_to_tce_perm(direction); | |
92 | u64 rpn = __pa(uaddr) >> tbl->it_page_shift; | |
93 | long i; | |
94 | ||
95 | if (proto_tce & TCE_PCI_WRITE) | |
96 | proto_tce |= TCE_PCI_READ; | |
97 | ||
98 | for (i = 0; i < npages; i++) { | |
99 | unsigned long newtce = proto_tce | | |
100 | ((rpn + i) << tbl->it_page_shift); | |
101 | unsigned long idx = index - tbl->it_offset + i; | |
102 | ||
a68bd126 | 103 | *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); |
191c2287 AK |
104 | } |
105 | ||
106 | return 0; | |
107 | } | |
108 | ||
109 | #ifdef CONFIG_IOMMU_API | |
110 | int pnv_tce_xchg(struct iommu_table *tbl, long index, | |
a68bd126 AK |
111 | unsigned long *hpa, enum dma_data_direction *direction, |
112 | bool alloc) | |
191c2287 AK |
113 | { |
114 | u64 proto_tce = iommu_direction_to_tce_perm(*direction); | |
115 | unsigned long newtce = *hpa | proto_tce, oldtce; | |
116 | unsigned long idx = index - tbl->it_offset; | |
a68bd126 | 117 | __be64 *ptce = NULL; |
191c2287 AK |
118 | |
119 | BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); | |
120 | ||
a68bd126 AK |
121 | if (*direction == DMA_NONE) { |
122 | ptce = pnv_tce(tbl, false, idx, false); | |
123 | if (!ptce) { | |
124 | *hpa = 0; | |
125 | return 0; | |
126 | } | |
127 | } | |
128 | ||
129 | if (!ptce) { | |
130 | ptce = pnv_tce(tbl, false, idx, alloc); | |
131 | if (!ptce) | |
132 | return alloc ? H_HARDWARE : H_TOO_HARD; | |
133 | } | |
134 | ||
191c2287 AK |
135 | if (newtce & TCE_PCI_WRITE) |
136 | newtce |= TCE_PCI_READ; | |
137 | ||
a68bd126 | 138 | oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); |
191c2287 AK |
139 | *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); |
140 | *direction = iommu_tce_direction(oldtce); | |
141 | ||
142 | return 0; | |
143 | } | |
090bad39 | 144 | |
a68bd126 | 145 | __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) |
090bad39 AK |
146 | { |
147 | if (WARN_ON_ONCE(!tbl->it_userspace)) | |
148 | return NULL; | |
149 | ||
a68bd126 | 150 | return pnv_tce(tbl, true, index - tbl->it_offset, alloc); |
090bad39 | 151 | } |
191c2287 AK |
152 | #endif |
153 | ||
154 | void pnv_tce_free(struct iommu_table *tbl, long index, long npages) | |
155 | { | |
156 | long i; | |
157 | ||
158 | for (i = 0; i < npages; i++) { | |
159 | unsigned long idx = index - tbl->it_offset + i; | |
a68bd126 | 160 | __be64 *ptce = pnv_tce(tbl, false, idx, false); |
191c2287 | 161 | |
a68bd126 AK |
162 | if (ptce) |
163 | *ptce = cpu_to_be64(0); | |
191c2287 AK |
164 | } |
165 | } | |
166 | ||
167 | unsigned long pnv_tce_get(struct iommu_table *tbl, long index) | |
168 | { | |
a68bd126 AK |
169 | __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); |
170 | ||
171 | if (!ptce) | |
172 | return 0; | |
090bad39 AK |
173 | |
174 | return be64_to_cpu(*ptce); | |
191c2287 AK |
175 | } |
176 | ||
177 | static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, | |
178 | unsigned long size, unsigned int levels) | |
179 | { | |
180 | const unsigned long addr_ul = (unsigned long) addr & | |
181 | ~(TCE_PCI_READ | TCE_PCI_WRITE); | |
182 | ||
183 | if (levels) { | |
184 | long i; | |
185 | u64 *tmp = (u64 *) addr_ul; | |
186 | ||
187 | for (i = 0; i < size; ++i) { | |
188 | unsigned long hpa = be64_to_cpu(tmp[i]); | |
189 | ||
190 | if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) | |
191 | continue; | |
192 | ||
193 | pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, | |
194 | levels - 1); | |
195 | } | |
196 | } | |
197 | ||
198 | free_pages(addr_ul, get_order(size << 3)); | |
199 | } | |
200 | ||
201 | void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) | |
202 | { | |
203 | const unsigned long size = tbl->it_indirect_levels ? | |
204 | tbl->it_level_size : tbl->it_size; | |
205 | ||
206 | if (!tbl->it_size) | |
207 | return; | |
208 | ||
209 | pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, | |
210 | tbl->it_indirect_levels); | |
090bad39 AK |
211 | if (tbl->it_userspace) { |
212 | pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, | |
213 | tbl->it_indirect_levels); | |
214 | } | |
191c2287 AK |
215 | } |
216 | ||
217 | static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, | |
218 | unsigned int levels, unsigned long limit, | |
219 | unsigned long *current_offset, unsigned long *total_allocated) | |
220 | { | |
191c2287 | 221 | __be64 *addr, *tmp; |
9bc98c8a | 222 | unsigned long allocated = 1UL << shift; |
191c2287 AK |
223 | unsigned int entries = 1UL << (shift - 3); |
224 | long i; | |
225 | ||
9bc98c8a | 226 | addr = pnv_alloc_tce_level(nid, shift); |
191c2287 AK |
227 | *total_allocated += allocated; |
228 | ||
229 | --levels; | |
230 | if (!levels) { | |
231 | *current_offset += allocated; | |
232 | return addr; | |
233 | } | |
234 | ||
235 | for (i = 0; i < entries; ++i) { | |
236 | tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, | |
237 | levels, limit, current_offset, total_allocated); | |
238 | if (!tmp) | |
239 | break; | |
240 | ||
241 | addr[i] = cpu_to_be64(__pa(tmp) | | |
242 | TCE_PCI_READ | TCE_PCI_WRITE); | |
243 | ||
244 | if (*current_offset >= limit) | |
245 | break; | |
246 | } | |
247 | ||
248 | return addr; | |
249 | } | |
250 | ||
251 | long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, | |
252 | __u32 page_shift, __u64 window_size, __u32 levels, | |
090bad39 | 253 | bool alloc_userspace_copy, struct iommu_table *tbl) |
191c2287 | 254 | { |
090bad39 | 255 | void *addr, *uas = NULL; |
191c2287 | 256 | unsigned long offset = 0, level_shift, total_allocated = 0; |
090bad39 | 257 | unsigned long total_allocated_uas = 0; |
191c2287 AK |
258 | const unsigned int window_shift = ilog2(window_size); |
259 | unsigned int entries_shift = window_shift - page_shift; | |
260 | unsigned int table_shift = max_t(unsigned int, entries_shift + 3, | |
261 | PAGE_SHIFT); | |
262 | const unsigned long tce_table_size = 1UL << table_shift; | |
a68bd126 | 263 | unsigned int tmplevels = levels; |
191c2287 AK |
264 | |
265 | if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) | |
266 | return -EINVAL; | |
267 | ||
268 | if (!is_power_of_2(window_size)) | |
269 | return -EINVAL; | |
270 | ||
a68bd126 AK |
271 | if (alloc_userspace_copy && (window_size > (1ULL << 32))) |
272 | tmplevels = 1; | |
273 | ||
191c2287 AK |
274 | /* Adjust direct table size from window_size and levels */ |
275 | entries_shift = (entries_shift + levels - 1) / levels; | |
276 | level_shift = entries_shift + 3; | |
277 | level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); | |
278 | ||
7233b8ca | 279 | if ((level_shift - 3) * levels + page_shift >= 55) |
191c2287 AK |
280 | return -EINVAL; |
281 | ||
282 | /* Allocate TCE table */ | |
283 | addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, | |
a68bd126 | 284 | tmplevels, tce_table_size, &offset, &total_allocated); |
191c2287 AK |
285 | |
286 | /* addr==NULL means that the first level allocation failed */ | |
287 | if (!addr) | |
288 | return -ENOMEM; | |
289 | ||
290 | /* | |
291 | * First level was allocated but some lower level failed as | |
292 | * we did not allocate as much as we wanted, | |
293 | * release partially allocated table. | |
294 | */ | |
a68bd126 | 295 | if (tmplevels == levels && offset < tce_table_size) |
090bad39 AK |
296 | goto free_tces_exit; |
297 | ||
298 | /* Allocate userspace view of the TCE table */ | |
299 | if (alloc_userspace_copy) { | |
300 | offset = 0; | |
301 | uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, | |
302 | levels, tce_table_size, &offset, | |
303 | &total_allocated_uas); | |
304 | if (!uas) | |
305 | goto free_tces_exit; | |
a68bd126 AK |
306 | if (tmplevels == levels && (offset < tce_table_size || |
307 | total_allocated_uas != total_allocated)) | |
090bad39 | 308 | goto free_uas_exit; |
191c2287 AK |
309 | } |
310 | ||
311 | /* Setup linux iommu table */ | |
312 | pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, | |
313 | page_shift); | |
314 | tbl->it_level_size = 1ULL << (level_shift - 3); | |
315 | tbl->it_indirect_levels = levels - 1; | |
316 | tbl->it_allocated_size = total_allocated; | |
090bad39 | 317 | tbl->it_userspace = uas; |
a68bd126 | 318 | tbl->it_nid = nid; |
191c2287 | 319 | |
a68bd126 | 320 | pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", |
090bad39 | 321 | window_size, tce_table_size, bus_offset, tbl->it_base, |
a68bd126 | 322 | tbl->it_userspace, tmplevels, levels); |
191c2287 AK |
323 | |
324 | return 0; | |
090bad39 AK |
325 | |
326 | free_uas_exit: | |
327 | pnv_pci_ioda2_table_do_free_pages(uas, | |
328 | 1ULL << (level_shift - 3), levels - 1); | |
329 | free_tces_exit: | |
330 | pnv_pci_ioda2_table_do_free_pages(addr, | |
331 | 1ULL << (level_shift - 3), levels - 1); | |
332 | ||
333 | return -ENOMEM; | |
191c2287 AK |
334 | } |
335 | ||
336 | static void pnv_iommu_table_group_link_free(struct rcu_head *head) | |
337 | { | |
338 | struct iommu_table_group_link *tgl = container_of(head, | |
339 | struct iommu_table_group_link, rcu); | |
340 | ||
341 | kfree(tgl); | |
342 | } | |
343 | ||
344 | void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, | |
345 | struct iommu_table_group *table_group) | |
346 | { | |
347 | long i; | |
348 | bool found; | |
349 | struct iommu_table_group_link *tgl; | |
350 | ||
351 | if (!tbl || !table_group) | |
352 | return; | |
353 | ||
354 | /* Remove link to a group from table's list of attached groups */ | |
355 | found = false; | |
356 | list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { | |
357 | if (tgl->table_group == table_group) { | |
358 | list_del_rcu(&tgl->next); | |
359 | call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); | |
360 | found = true; | |
361 | break; | |
362 | } | |
363 | } | |
364 | if (WARN_ON(!found)) | |
365 | return; | |
366 | ||
367 | /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ | |
368 | found = false; | |
369 | for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { | |
370 | if (table_group->tables[i] == tbl) { | |
371 | table_group->tables[i] = NULL; | |
372 | found = true; | |
373 | break; | |
374 | } | |
375 | } | |
376 | WARN_ON(!found); | |
377 | } | |
378 | ||
379 | long pnv_pci_link_table_and_group(int node, int num, | |
380 | struct iommu_table *tbl, | |
381 | struct iommu_table_group *table_group) | |
382 | { | |
383 | struct iommu_table_group_link *tgl = NULL; | |
384 | ||
385 | if (WARN_ON(!tbl || !table_group)) | |
386 | return -EINVAL; | |
387 | ||
388 | tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, | |
389 | node); | |
390 | if (!tgl) | |
391 | return -ENOMEM; | |
392 | ||
393 | tgl->table_group = table_group; | |
394 | list_add_rcu(&tgl->next, &tbl->it_group_list); | |
395 | ||
396 | table_group->tables[num] = tbl; | |
397 | ||
398 | return 0; | |
399 | } |