Commit | Line | Data |
---|---|---|
58ccf019 JM |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (c) 2022, Oracle and/or its affiliates. | |
4 | * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved | |
5 | */ | |
6 | #include <linux/iova_bitmap.h> | |
7 | #include <linux/mm.h> | |
ea00d4ed | 8 | #include <linux/slab.h> |
58ccf019 JM |
9 | #include <linux/highmem.h> |
10 | ||
11 | #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) | |
12 | ||
13 | /* | |
14 | * struct iova_bitmap_map - A bitmap representing an IOVA range | |
15 | * | |
16 | * Main data structure for tracking mapped user pages of bitmap data. | |
17 | * | |
18 | * For example, for something recording dirty IOVAs, it will be provided a | |
19 | * struct iova_bitmap structure, as a general structure for iterating the | |
20 | * total IOVA range. The struct iova_bitmap_map, though, represents the | |
21 | * subset of said IOVA space that is pinned by its parent structure (struct | |
22 | * iova_bitmap). | |
23 | * | |
24 | * The user does not need to exact location of the bits in the bitmap. | |
25 | * From user perspective the only API available is iova_bitmap_set() which | |
26 | * records the IOVA *range* in the bitmap by setting the corresponding | |
27 | * bits. | |
28 | * | |
29 | * The bitmap is an array of u64 whereas each bit represents an IOVA of | |
30 | * range of (1 << pgshift). Thus formula for the bitmap data to be set is: | |
31 | * | |
32 | * data[(iova / page_size) / 64] & (1ULL << (iova % 64)) | |
33 | */ | |
34 | struct iova_bitmap_map { | |
35 | /* base IOVA representing bit 0 of the first page */ | |
36 | unsigned long iova; | |
37 | ||
38 | /* page size order that each bit granules to */ | |
39 | unsigned long pgshift; | |
40 | ||
41 | /* page offset of the first user page pinned */ | |
42 | unsigned long pgoff; | |
43 | ||
44 | /* number of pages pinned */ | |
45 | unsigned long npages; | |
46 | ||
47 | /* pinned pages representing the bitmap data */ | |
48 | struct page **pages; | |
49 | }; | |
50 | ||
51 | /* | |
52 | * struct iova_bitmap - The IOVA bitmap object | |
53 | * | |
54 | * Main data structure for iterating over the bitmap data. | |
55 | * | |
56 | * Abstracts the pinning work and iterates in IOVA ranges. | |
57 | * It uses a windowing scheme and pins the bitmap in relatively | |
58 | * big ranges e.g. | |
59 | * | |
60 | * The bitmap object uses one base page to store all the pinned pages | |
61 | * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores | |
62 | * 512 struct page pointers which, if the base page size is 4K, it means | |
63 | * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is | |
64 | * also 4K then the range window to iterate is 64G. | |
65 | * | |
66 | * For example iterating on a total IOVA range of 4G..128G, it will walk | |
67 | * through this set of ranges: | |
68 | * | |
69 | * 4G - 68G-1 (64G) | |
70 | * 68G - 128G-1 (64G) | |
71 | * | |
72 | * An example of the APIs on how to use/iterate over the IOVA bitmap: | |
73 | * | |
74 | * bitmap = iova_bitmap_alloc(iova, length, page_size, data); | |
75 | * if (IS_ERR(bitmap)) | |
76 | * return PTR_ERR(bitmap); | |
77 | * | |
78 | * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn); | |
79 | * | |
80 | * iova_bitmap_free(bitmap); | |
81 | * | |
82 | * Each iteration of the @dirty_reporter_fn is called with a unique @iova | |
83 | * and @length argument, indicating the current range available through the | |
84 | * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty | |
85 | * areas (@iova_length) within that provided range, as following: | |
86 | * | |
87 | * iova_bitmap_set(bitmap, iova, iova_length); | |
88 | * | |
89 | * The internals of the object uses an index @mapped_base_index that indexes | |
90 | * which u64 word of the bitmap is mapped, up to @mapped_total_index. | |
91 | * Those keep being incremented until @mapped_total_index is reached while | |
92 | * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages. | |
93 | * | |
94 | * The IOVA bitmap is usually located on what tracks DMA mapped ranges or | |
95 | * some form of IOVA range tracking that co-relates to the user passed | |
96 | * bitmap. | |
97 | */ | |
98 | struct iova_bitmap { | |
99 | /* IOVA range representing the currently mapped bitmap data */ | |
100 | struct iova_bitmap_map mapped; | |
101 | ||
102 | /* userspace address of the bitmap */ | |
d18411ec | 103 | u8 __user *bitmap; |
58ccf019 JM |
104 | |
105 | /* u64 index that @mapped points to */ | |
106 | unsigned long mapped_base_index; | |
107 | ||
108 | /* how many u64 can we walk in total */ | |
109 | unsigned long mapped_total_index; | |
110 | ||
111 | /* base IOVA of the whole bitmap */ | |
112 | unsigned long iova; | |
113 | ||
114 | /* length of the IOVA range for the whole bitmap */ | |
115 | size_t length; | |
2780025e JM |
116 | |
117 | /* length of the IOVA range set ahead the pinned pages */ | |
118 | unsigned long set_ahead_length; | |
58ccf019 JM |
119 | }; |
120 | ||
121 | /* | |
122 | * Converts a relative IOVA to a bitmap index. | |
123 | * This function provides the index into the u64 array (bitmap::bitmap) | |
124 | * for a given IOVA offset. | |
125 | * Relative IOVA means relative to the bitmap::mapped base IOVA | |
126 | * (stored in mapped::iova). All computations in this file are done using | |
127 | * relative IOVAs and thus avoid an extra subtraction against mapped::iova. | |
128 | * The user API iova_bitmap_set() always uses a regular absolute IOVAs. | |
129 | */ | |
130 | static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, | |
131 | unsigned long iova) | |
132 | { | |
133 | unsigned long pgsize = 1 << bitmap->mapped.pgshift; | |
134 | ||
135 | return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); | |
136 | } | |
137 | ||
138 | /* | |
139 | * Converts a bitmap index to a *relative* IOVA. | |
140 | */ | |
141 | static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap, | |
142 | unsigned long index) | |
143 | { | |
144 | unsigned long pgshift = bitmap->mapped.pgshift; | |
145 | ||
146 | return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift; | |
147 | } | |
148 | ||
149 | /* | |
150 | * Returns the base IOVA of the mapped range. | |
151 | */ | |
152 | static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap) | |
153 | { | |
154 | unsigned long skip = bitmap->mapped_base_index; | |
155 | ||
156 | return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip); | |
157 | } | |
158 | ||
159 | /* | |
160 | * Pins the bitmap user pages for the current range window. | |
161 | * This is internal to IOVA bitmap and called when advancing the | |
162 | * index (@mapped_base_index) or allocating the bitmap. | |
163 | */ | |
164 | static int iova_bitmap_get(struct iova_bitmap *bitmap) | |
165 | { | |
166 | struct iova_bitmap_map *mapped = &bitmap->mapped; | |
167 | unsigned long npages; | |
d18411ec | 168 | u8 __user *addr; |
58ccf019 JM |
169 | long ret; |
170 | ||
171 | /* | |
172 | * @mapped_base_index is the index of the currently mapped u64 words | |
173 | * that we have access. Anything before @mapped_base_index is not | |
174 | * mapped. The range @mapped_base_index .. @mapped_total_index-1 is | |
175 | * mapped but capped at a maximum number of pages. | |
176 | */ | |
177 | npages = DIV_ROUND_UP((bitmap->mapped_total_index - | |
178 | bitmap->mapped_base_index) * | |
179 | sizeof(*bitmap->bitmap), PAGE_SIZE); | |
180 | ||
58ccf019 JM |
181 | /* |
182 | * Bitmap address to be pinned is calculated via pointer arithmetic | |
183 | * with bitmap u64 word index. | |
184 | */ | |
185 | addr = bitmap->bitmap + bitmap->mapped_base_index; | |
186 | ||
4bbcbc6e JM |
187 | /* |
188 | * We always cap at max number of 'struct page' a base page can fit. | |
189 | * This is, for example, on x86 means 2M of bitmap data max. | |
190 | */ | |
191 | npages = min(npages + !!offset_in_page(addr), | |
192 | PAGE_SIZE / sizeof(struct page *)); | |
193 | ||
58ccf019 JM |
194 | ret = pin_user_pages_fast((unsigned long)addr, npages, |
195 | FOLL_WRITE, mapped->pages); | |
196 | if (ret <= 0) | |
197 | return -EFAULT; | |
198 | ||
199 | mapped->npages = (unsigned long)ret; | |
200 | /* Base IOVA where @pages point to i.e. bit 0 of the first page */ | |
201 | mapped->iova = iova_bitmap_mapped_iova(bitmap); | |
202 | ||
203 | /* | |
204 | * offset of the page where pinned pages bit 0 is located. | |
205 | * This handles the case where the bitmap is not PAGE_SIZE | |
206 | * aligned. | |
207 | */ | |
208 | mapped->pgoff = offset_in_page(addr); | |
209 | return 0; | |
210 | } | |
211 | ||
212 | /* | |
213 | * Unpins the bitmap user pages and clears @npages | |
214 | * (un)pinning is abstracted from API user and it's done when advancing | |
215 | * the index or freeing the bitmap. | |
216 | */ | |
217 | static void iova_bitmap_put(struct iova_bitmap *bitmap) | |
218 | { | |
219 | struct iova_bitmap_map *mapped = &bitmap->mapped; | |
220 | ||
221 | if (mapped->npages) { | |
222 | unpin_user_pages(mapped->pages, mapped->npages); | |
223 | mapped->npages = 0; | |
224 | } | |
225 | } | |
226 | ||
227 | /** | |
228 | * iova_bitmap_alloc() - Allocates an IOVA bitmap object | |
229 | * @iova: Start address of the IOVA range | |
230 | * @length: Length of the IOVA range | |
231 | * @page_size: Page size of the IOVA bitmap. It defines what each bit | |
232 | * granularity represents | |
233 | * @data: Userspace address of the bitmap | |
234 | * | |
235 | * Allocates an IOVA object and initializes all its fields including the | |
236 | * first user pages of @data. | |
237 | * | |
238 | * Return: A pointer to a newly allocated struct iova_bitmap | |
239 | * or ERR_PTR() on error. | |
240 | */ | |
241 | struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, | |
242 | unsigned long page_size, u64 __user *data) | |
243 | { | |
244 | struct iova_bitmap_map *mapped; | |
245 | struct iova_bitmap *bitmap; | |
246 | int rc; | |
247 | ||
248 | bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); | |
249 | if (!bitmap) | |
250 | return ERR_PTR(-ENOMEM); | |
251 | ||
252 | mapped = &bitmap->mapped; | |
253 | mapped->pgshift = __ffs(page_size); | |
d18411ec | 254 | bitmap->bitmap = (u8 __user *)data; |
58ccf019 JM |
255 | bitmap->mapped_total_index = |
256 | iova_bitmap_offset_to_index(bitmap, length - 1) + 1; | |
257 | bitmap->iova = iova; | |
258 | bitmap->length = length; | |
259 | mapped->iova = iova; | |
260 | mapped->pages = (struct page **)__get_free_page(GFP_KERNEL); | |
261 | if (!mapped->pages) { | |
262 | rc = -ENOMEM; | |
263 | goto err; | |
264 | } | |
265 | ||
266 | rc = iova_bitmap_get(bitmap); | |
267 | if (rc) | |
268 | goto err; | |
269 | return bitmap; | |
270 | ||
271 | err: | |
272 | iova_bitmap_free(bitmap); | |
273 | return ERR_PTR(rc); | |
274 | } | |
13578d4e | 275 | EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD); |
58ccf019 JM |
276 | |
277 | /** | |
278 | * iova_bitmap_free() - Frees an IOVA bitmap object | |
279 | * @bitmap: IOVA bitmap to free | |
280 | * | |
281 | * It unpins and releases pages array memory and clears any leftover | |
282 | * state. | |
283 | */ | |
284 | void iova_bitmap_free(struct iova_bitmap *bitmap) | |
285 | { | |
286 | struct iova_bitmap_map *mapped = &bitmap->mapped; | |
287 | ||
288 | iova_bitmap_put(bitmap); | |
289 | ||
290 | if (mapped->pages) { | |
291 | free_page((unsigned long)mapped->pages); | |
292 | mapped->pages = NULL; | |
293 | } | |
294 | ||
295 | kfree(bitmap); | |
296 | } | |
13578d4e | 297 | EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD); |
58ccf019 JM |
298 | |
299 | /* | |
300 | * Returns the remaining bitmap indexes from mapped_total_index to process for | |
301 | * the currently pinned bitmap pages. | |
302 | */ | |
303 | static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) | |
304 | { | |
f38044e5 JM |
305 | unsigned long remaining, bytes; |
306 | ||
b058ea3a | 307 | bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff; |
58ccf019 JM |
308 | |
309 | remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; | |
310 | remaining = min_t(unsigned long, remaining, | |
d18411ec | 311 | DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); |
58ccf019 JM |
312 | |
313 | return remaining; | |
314 | } | |
315 | ||
316 | /* | |
317 | * Returns the length of the mapped IOVA range. | |
318 | */ | |
319 | static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap) | |
320 | { | |
321 | unsigned long max_iova = bitmap->iova + bitmap->length - 1; | |
322 | unsigned long iova = iova_bitmap_mapped_iova(bitmap); | |
323 | unsigned long remaining; | |
324 | ||
325 | /* | |
326 | * iova_bitmap_mapped_remaining() returns a number of indexes which | |
327 | * when converted to IOVA gives us a max length that the bitmap | |
328 | * pinned data can cover. Afterwards, that is capped to | |
329 | * only cover the IOVA range in @bitmap::iova .. @bitmap::length. | |
330 | */ | |
331 | remaining = iova_bitmap_index_to_offset(bitmap, | |
332 | iova_bitmap_mapped_remaining(bitmap)); | |
333 | ||
334 | if (iova + remaining - 1 > max_iova) | |
335 | remaining -= ((iova + remaining - 1) - max_iova); | |
336 | ||
337 | return remaining; | |
338 | } | |
339 | ||
340 | /* | |
341 | * Returns true if there's not more data to iterate. | |
342 | */ | |
343 | static bool iova_bitmap_done(struct iova_bitmap *bitmap) | |
344 | { | |
345 | return bitmap->mapped_base_index >= bitmap->mapped_total_index; | |
346 | } | |
347 | ||
2780025e JM |
348 | static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, |
349 | size_t set_ahead_length) | |
350 | { | |
351 | int ret = 0; | |
352 | ||
353 | while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { | |
354 | unsigned long length = iova_bitmap_mapped_length(bitmap); | |
355 | unsigned long iova = iova_bitmap_mapped_iova(bitmap); | |
356 | ||
357 | ret = iova_bitmap_get(bitmap); | |
358 | if (ret) | |
359 | break; | |
360 | ||
361 | length = min(length, set_ahead_length); | |
362 | iova_bitmap_set(bitmap, iova, length); | |
363 | ||
364 | set_ahead_length -= length; | |
365 | bitmap->mapped_base_index += | |
366 | iova_bitmap_offset_to_index(bitmap, length - 1) + 1; | |
367 | iova_bitmap_put(bitmap); | |
368 | } | |
369 | ||
370 | bitmap->set_ahead_length = 0; | |
371 | return ret; | |
372 | } | |
373 | ||
58ccf019 JM |
374 | /* |
375 | * Advances to the next range, releases the current pinned | |
376 | * pages and pins the next set of bitmap pages. | |
377 | * Returns 0 on success or otherwise errno. | |
378 | */ | |
379 | static int iova_bitmap_advance(struct iova_bitmap *bitmap) | |
380 | { | |
381 | unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1; | |
382 | unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1; | |
383 | ||
384 | bitmap->mapped_base_index += count; | |
385 | ||
386 | iova_bitmap_put(bitmap); | |
387 | if (iova_bitmap_done(bitmap)) | |
388 | return 0; | |
389 | ||
2780025e JM |
390 | /* Iterate, set and skip any bits requested for next iteration */ |
391 | if (bitmap->set_ahead_length) { | |
392 | int ret; | |
393 | ||
394 | ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); | |
395 | if (ret) | |
396 | return ret; | |
397 | } | |
398 | ||
58ccf019 JM |
399 | /* When advancing the index we pin the next set of bitmap pages */ |
400 | return iova_bitmap_get(bitmap); | |
401 | } | |
402 | ||
403 | /** | |
404 | * iova_bitmap_for_each() - Iterates over the bitmap | |
405 | * @bitmap: IOVA bitmap to iterate | |
406 | * @opaque: Additional argument to pass to the callback | |
407 | * @fn: Function that gets called for each IOVA range | |
408 | * | |
409 | * Helper function to iterate over bitmap data representing a portion of IOVA | |
410 | * space. It hides the complexity of iterating bitmaps and translating the | |
411 | * mapped bitmap user pages into IOVA ranges to process. | |
412 | * | |
413 | * Return: 0 on success, and an error on failure either upon | |
414 | * iteration or when the callback returns an error. | |
415 | */ | |
416 | int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, | |
417 | iova_bitmap_fn_t fn) | |
418 | { | |
419 | int ret = 0; | |
420 | ||
421 | for (; !iova_bitmap_done(bitmap) && !ret; | |
422 | ret = iova_bitmap_advance(bitmap)) { | |
423 | ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), | |
424 | iova_bitmap_mapped_length(bitmap), opaque); | |
425 | if (ret) | |
426 | break; | |
427 | } | |
428 | ||
429 | return ret; | |
430 | } | |
13578d4e | 431 | EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); |
58ccf019 JM |
432 | |
433 | /** | |
434 | * iova_bitmap_set() - Records an IOVA range in bitmap | |
435 | * @bitmap: IOVA bitmap | |
436 | * @iova: IOVA to start | |
437 | * @length: IOVA range length | |
438 | * | |
439 | * Set the bits corresponding to the range [iova .. iova+length-1] in | |
440 | * the user bitmap. | |
441 | * | |
58ccf019 JM |
442 | */ |
443 | void iova_bitmap_set(struct iova_bitmap *bitmap, | |
444 | unsigned long iova, size_t length) | |
445 | { | |
446 | struct iova_bitmap_map *mapped = &bitmap->mapped; | |
b058ea3a JM |
447 | unsigned long cur_bit = ((iova - mapped->iova) >> |
448 | mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; | |
449 | unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> | |
450 | mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; | |
a4ab7ded | 451 | unsigned long last_page_idx = mapped->npages - 1; |
58ccf019 JM |
452 | |
453 | do { | |
b058ea3a JM |
454 | unsigned int page_idx = cur_bit / BITS_PER_PAGE; |
455 | unsigned int offset = cur_bit % BITS_PER_PAGE; | |
456 | unsigned int nbits = min(BITS_PER_PAGE - offset, | |
457 | last_bit - cur_bit + 1); | |
458 | void *kaddr; | |
58ccf019 | 459 | |
a4ab7ded JM |
460 | if (unlikely(page_idx > last_page_idx)) |
461 | break; | |
462 | ||
58ccf019 | 463 | kaddr = kmap_local_page(mapped->pages[page_idx]); |
b058ea3a | 464 | bitmap_set(kaddr, offset, nbits); |
58ccf019 | 465 | kunmap_local(kaddr); |
b058ea3a JM |
466 | cur_bit += nbits; |
467 | } while (cur_bit <= last_bit); | |
2780025e JM |
468 | |
469 | if (unlikely(cur_bit <= last_bit)) { | |
470 | bitmap->set_ahead_length = | |
471 | ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); | |
472 | } | |
58ccf019 | 473 | } |
13578d4e | 474 | EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); |