RDMA/odp: Use mmu_interval_notifier_insert()
[linux-2.6-block.git] / drivers / infiniband / core / umem_odp.c
CommitLineData
8ada2c1c
SR
1/*
2 * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/types.h>
34#include <linux/sched.h>
6e84f315 35#include <linux/sched/mm.h>
0881e7bd 36#include <linux/sched/task.h>
8ada2c1c
SR
37#include <linux/pid.h>
38#include <linux/slab.h>
39#include <linux/export.h>
40#include <linux/vmalloc.h>
0008b84e 41#include <linux/hugetlb.h>
7cc2e18f 42#include <linux/interval_tree.h>
75a3e6a3 43#include <linux/pagemap.h>
8ada2c1c
SR
44
45#include <rdma/ib_verbs.h>
46#include <rdma/ib_umem.h>
47#include <rdma/ib_umem_odp.h>
48
f20bef6a
JG
49#include "uverbs.h"
50
f25a546e
JG
51static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
52 const struct mmu_interval_notifier_ops *ops)
882214e2 53{
22d79c9a
JG
54 int ret;
55
56 umem_odp->umem.is_odp = 1;
f25a546e
JG
57 mutex_init(&umem_odp->umem_mutex);
58
22d79c9a 59 if (!umem_odp->is_implicit_odp) {
204e3e56 60 size_t page_size = 1UL << umem_odp->page_shift;
f25a546e
JG
61 unsigned long start;
62 unsigned long end;
204e3e56
JG
63 size_t pages;
64
f25a546e 65 start = ALIGN_DOWN(umem_odp->umem.address, page_size);
204e3e56 66 if (check_add_overflow(umem_odp->umem.address,
b97b218b 67 (unsigned long)umem_odp->umem.length,
f25a546e 68 &end))
204e3e56 69 return -EOVERFLOW;
f25a546e
JG
70 end = ALIGN(end, page_size);
71 if (unlikely(end < page_size))
204e3e56
JG
72 return -EOVERFLOW;
73
f25a546e 74 pages = (end - start) >> umem_odp->page_shift;
22d79c9a
JG
75 if (!pages)
76 return -EINVAL;
77
37824952
JG
78 umem_odp->page_list = kvcalloc(
79 pages, sizeof(*umem_odp->page_list), GFP_KERNEL);
22d79c9a
JG
80 if (!umem_odp->page_list)
81 return -ENOMEM;
82
37824952
JG
83 umem_odp->dma_list = kvcalloc(
84 pages, sizeof(*umem_odp->dma_list), GFP_KERNEL);
22d79c9a
JG
85 if (!umem_odp->dma_list) {
86 ret = -ENOMEM;
87 goto out_page_list;
88 }
22d79c9a 89
f25a546e
JG
90 ret = mmu_interval_notifier_insert(&umem_odp->notifier,
91 umem_odp->umem.owning_mm,
92 start, end - start, ops);
93 if (ret)
94 goto out_dma_list;
22d79c9a 95 }
22d79c9a
JG
96
97 return 0;
98
c571feca 99out_dma_list:
37824952 100 kvfree(umem_odp->dma_list);
22d79c9a 101out_page_list:
37824952 102 kvfree(umem_odp->page_list);
22d79c9a
JG
103 return ret;
104}
105
f20bef6a
JG
106/**
107 * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
108 *
109 * Implicit ODP umems do not have a VA range and do not have any page lists.
110 * They exist only to hold the per_mm reference to help the driver create
111 * children umems.
112 *
113 * @udata: udata from the syscall being used to create the umem
114 * @access: ib_reg_mr access flags
115 */
116struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
117 int access)
118{
119 struct ib_ucontext *context =
120 container_of(udata, struct uverbs_attr_bundle, driver_udata)
121 ->context;
122 struct ib_umem *umem;
123 struct ib_umem_odp *umem_odp;
124 int ret;
125
126 if (access & IB_ACCESS_HUGETLB)
127 return ERR_PTR(-EINVAL);
128
129 if (!context)
130 return ERR_PTR(-EIO);
f20bef6a
JG
131
132 umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
133 if (!umem_odp)
134 return ERR_PTR(-ENOMEM);
135 umem = &umem_odp->umem;
47f725ee 136 umem->ibdev = context->device;
f20bef6a
JG
137 umem->writable = ib_access_writable(access);
138 umem->owning_mm = current->mm;
139 umem_odp->is_implicit_odp = 1;
140 umem_odp->page_shift = PAGE_SHIFT;
141
f25a546e
JG
142 umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
143 ret = ib_init_umem_odp(umem_odp, NULL);
f20bef6a 144 if (ret) {
f25a546e 145 put_pid(umem_odp->tgid);
f20bef6a
JG
146 kfree(umem_odp);
147 return ERR_PTR(ret);
148 }
f20bef6a
JG
149 return umem_odp;
150}
151EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
152
153/**
154 * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
155 * parent ODP umem
156 *
157 * @root: The parent umem enclosing the child. This must be allocated using
158 * ib_alloc_implicit_odp_umem()
159 * @addr: The starting userspace VA
160 * @size: The length of the userspace VA
161 */
f25a546e
JG
162struct ib_umem_odp *
163ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
164 size_t size,
165 const struct mmu_interval_notifier_ops *ops)
f27a0d50 166{
22d79c9a
JG
167 /*
168 * Caller must ensure that root cannot be freed during the call to
169 * ib_alloc_odp_umem.
170 */
d07d1d70 171 struct ib_umem_odp *odp_data;
41b4deea 172 struct ib_umem *umem;
d07d1d70
AK
173 int ret;
174
f20bef6a
JG
175 if (WARN_ON(!root->is_implicit_odp))
176 return ERR_PTR(-EINVAL);
177
41b4deea
JG
178 odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
179 if (!odp_data)
d07d1d70 180 return ERR_PTR(-ENOMEM);
41b4deea 181 umem = &odp_data->umem;
47f725ee 182 umem->ibdev = root->umem.ibdev;
3e7e1193
AK
183 umem->length = size;
184 umem->address = addr;
da6a496a 185 umem->writable = root->umem.writable;
22d79c9a
JG
186 umem->owning_mm = root->umem.owning_mm;
187 odp_data->page_shift = PAGE_SHIFT;
f25a546e 188 odp_data->notifier.ops = ops;
d07d1d70 189
f25a546e
JG
190 odp_data->tgid = get_pid(root->tgid);
191 ret = ib_init_umem_odp(odp_data, ops);
22d79c9a 192 if (ret) {
f25a546e 193 put_pid(odp_data->tgid);
22d79c9a
JG
194 kfree(odp_data);
195 return ERR_PTR(ret);
d07d1d70 196 }
b5231b01 197 return odp_data;
d07d1d70 198}
f20bef6a 199EXPORT_SYMBOL(ib_umem_odp_alloc_child);
d07d1d70 200
f20bef6a 201/**
261dc53f 202 * ib_umem_odp_get - Create a umem_odp for a userspace va
f20bef6a 203 *
261dc53f
JG
204 * @udata: userspace context to pin memory for
205 * @addr: userspace virtual address to start at
206 * @size: length of region to pin
207 * @access: IB_ACCESS_xxx flags for memory being pinned
208 *
209 * The driver should use when the access flags indicate ODP memory. It avoids
210 * pinning, instead, stores the mm for future page fault handling in
211 * conjunction with MMU notifiers.
f20bef6a 212 */
261dc53f 213struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
f25a546e
JG
214 size_t size, int access,
215 const struct mmu_interval_notifier_ops *ops)
8ada2c1c 216{
261dc53f
JG
217 struct ib_umem_odp *umem_odp;
218 struct ib_ucontext *context;
219 struct mm_struct *mm;
220 int ret;
221
222 if (!udata)
223 return ERR_PTR(-EIO);
224
225 context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
226 ->context;
227 if (!context)
228 return ERR_PTR(-EIO);
229
f25a546e 230 if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
261dc53f
JG
231 return ERR_PTR(-EINVAL);
232
233 umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
234 if (!umem_odp)
235 return ERR_PTR(-ENOMEM);
236
47f725ee 237 umem_odp->umem.ibdev = context->device;
261dc53f
JG
238 umem_odp->umem.length = size;
239 umem_odp->umem.address = addr;
240 umem_odp->umem.writable = ib_access_writable(access);
241 umem_odp->umem.owning_mm = mm = current->mm;
f25a546e 242 umem_odp->notifier.ops = ops;
8ada2c1c 243
d2183c6f 244 umem_odp->page_shift = PAGE_SHIFT;
0008b84e
AK
245 if (access & IB_ACCESS_HUGETLB) {
246 struct vm_area_struct *vma;
247 struct hstate *h;
248
79bb5b7e 249 down_read(&mm->mmap_sem);
d2183c6f 250 vma = find_vma(mm, ib_umem_start(umem_odp));
79bb5b7e
LR
251 if (!vma || !is_vm_hugetlb_page(vma)) {
252 up_read(&mm->mmap_sem);
261dc53f
JG
253 ret = -EINVAL;
254 goto err_free;
79bb5b7e 255 }
0008b84e 256 h = hstate_vma(vma);
d2183c6f 257 umem_odp->page_shift = huge_page_shift(h);
79bb5b7e 258 up_read(&mm->mmap_sem);
0008b84e
AK
259 }
260
f25a546e
JG
261 umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
262 ret = ib_init_umem_odp(umem_odp, ops);
261dc53f 263 if (ret)
f25a546e 264 goto err_put_pid;
261dc53f
JG
265 return umem_odp;
266
f25a546e
JG
267err_put_pid:
268 put_pid(umem_odp->tgid);
261dc53f
JG
269err_free:
270 kfree(umem_odp);
271 return ERR_PTR(ret);
8ada2c1c 272}
261dc53f 273EXPORT_SYMBOL(ib_umem_odp_get);
8ada2c1c 274
b5231b01 275void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
8ada2c1c
SR
276{
277 /*
278 * Ensure that no more pages are mapped in the umem.
279 *
280 * It is the driver's responsibility to ensure, before calling us,
281 * that the hardware will not attempt to access the MR any more.
282 */
fd7dbf03 283 if (!umem_odp->is_implicit_odp) {
9dc775e7 284 mutex_lock(&umem_odp->umem_mutex);
fd7dbf03
JG
285 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
286 ib_umem_end(umem_odp));
9dc775e7 287 mutex_unlock(&umem_odp->umem_mutex);
f25a546e 288 mmu_interval_notifier_remove(&umem_odp->notifier);
37824952
JG
289 kvfree(umem_odp->dma_list);
290 kvfree(umem_odp->page_list);
f25a546e 291 put_pid(umem_odp->tgid);
fd7dbf03 292 }
0446cad9 293 kfree(umem_odp);
8ada2c1c 294}
0446cad9 295EXPORT_SYMBOL(ib_umem_odp_release);
8ada2c1c
SR
296
297/*
298 * Map for DMA and insert a single page into the on-demand paging page tables.
299 *
300 * @umem: the umem to insert the page to.
301 * @page_index: index in the umem to add the page to.
302 * @page: the page struct to map and add.
303 * @access_mask: access permissions needed for this page.
304 * @current_seq: sequence number for synchronization with invalidations.
305 * the sequence number is taken from
b5231b01 306 * umem_odp->notifiers_seq.
8ada2c1c 307 *
882214e2
HE
308 * The function returns -EFAULT if the DMA mapping operation fails. It returns
309 * -EAGAIN if a concurrent invalidation prevents us from updating the page.
8ada2c1c 310 *
ea996974 311 * The page is released via put_user_page even if the operation failed. For
8ada2c1c
SR
312 * on-demand pinning, the page is released whenever it isn't stored in the
313 * umem.
314 */
315static int ib_umem_odp_map_dma_single_page(
b5231b01 316 struct ib_umem_odp *umem_odp,
f25a546e 317 unsigned int page_index,
8ada2c1c
SR
318 struct page *page,
319 u64 access_mask,
320 unsigned long current_seq)
321{
47f725ee 322 struct ib_device *dev = umem_odp->umem.ibdev;
8ada2c1c 323 dma_addr_t dma_addr;
8ada2c1c
SR
324 int ret = 0;
325
f25a546e 326 if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) {
882214e2
HE
327 ret = -EAGAIN;
328 goto out;
329 }
b5231b01 330 if (!(umem_odp->dma_list[page_index])) {
d2183c6f
JG
331 dma_addr =
332 ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
333 DMA_BIDIRECTIONAL);
8ada2c1c
SR
334 if (ib_dma_mapping_error(dev, dma_addr)) {
335 ret = -EFAULT;
336 goto out;
337 }
b5231b01
JG
338 umem_odp->dma_list[page_index] = dma_addr | access_mask;
339 umem_odp->page_list[page_index] = page;
d10bcf94 340 umem_odp->npages++;
b5231b01
JG
341 } else if (umem_odp->page_list[page_index] == page) {
342 umem_odp->dma_list[page_index] |= access_mask;
8ada2c1c 343 } else {
46870b23
JG
344 /*
345 * This is a race here where we could have done:
346 *
347 * CPU0 CPU1
348 * get_user_pages()
349 * invalidate()
350 * page_fault()
351 * mutex_lock(umem_mutex)
352 * page from GUP != page in ODP
353 *
354 * It should be prevented by the retry test above as reading
355 * the seq number should be reliable under the
356 * umem_mutex. Thus something is really not working right if
357 * things get here.
358 */
359 WARN(true,
360 "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
361 umem_odp->page_list[page_index], page);
362 ret = -EAGAIN;
8ada2c1c
SR
363 }
364
365out:
ea996974 366 put_user_page(page);
8ada2c1c
SR
367 return ret;
368}
369
370/**
371 * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
372 *
373 * Pins the range of pages passed in the argument, and maps them to
374 * DMA addresses. The DMA addresses of the mapped pages is updated in
b5231b01 375 * umem_odp->dma_list.
8ada2c1c
SR
376 *
377 * Returns the number of pages mapped in success, negative error code
378 * for failure.
882214e2
HE
379 * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
380 * the function from completing its task.
d9d0674c
AK
381 * An -ENOENT error code indicates that userspace process is being terminated
382 * and mm was already destroyed.
b5231b01 383 * @umem_odp: the umem to map and pin
8ada2c1c
SR
384 * @user_virt: the address from which we need to map.
385 * @bcnt: the minimal number of bytes to pin and map. The mapping might be
386 * bigger due to alignment, and may also be smaller in case of an error
387 * pinning or mapping a page. The actual pages mapped is returned in
388 * the return value.
389 * @access_mask: bit mask of the requested access permissions for the given
390 * range.
391 * @current_seq: the MMU notifiers sequance value for synchronization with
392 * invalidations. the sequance number is read from
b5231b01 393 * umem_odp->notifiers_seq before calling this function
8ada2c1c 394 */
b5231b01
JG
395int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
396 u64 bcnt, u64 access_mask,
397 unsigned long current_seq)
8ada2c1c
SR
398{
399 struct task_struct *owning_process = NULL;
f27a0d50 400 struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
8ada2c1c 401 struct page **local_page_list = NULL;
403cd12e 402 u64 page_mask, off;
d2183c6f
JG
403 int j, k, ret = 0, start_idx, npages = 0;
404 unsigned int flags = 0, page_shift;
403cd12e 405 phys_addr_t p = 0;
8ada2c1c
SR
406
407 if (access_mask == 0)
408 return -EINVAL;
409
d2183c6f
JG
410 if (user_virt < ib_umem_start(umem_odp) ||
411 user_virt + bcnt > ib_umem_end(umem_odp))
8ada2c1c
SR
412 return -EFAULT;
413
414 local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
415 if (!local_page_list)
416 return -ENOMEM;
417
d2183c6f 418 page_shift = umem_odp->page_shift;
403cd12e
AK
419 page_mask = ~(BIT(page_shift) - 1);
420 off = user_virt & (~page_mask);
421 user_virt = user_virt & page_mask;
8ada2c1c
SR
422 bcnt += off; /* Charge for the first page offset as well. */
423
f27a0d50
JG
424 /*
425 * owning_process is allowed to be NULL, this means somehow the mm is
426 * existing beyond the lifetime of the originating process.. Presumably
427 * mmget_not_zero will fail in this case.
428 */
f25a546e 429 owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID);
4438ee3f 430 if (!owning_process || !mmget_not_zero(owning_mm)) {
8ada2c1c 431 ret = -EINVAL;
8ada2c1c
SR
432 goto out_put_task;
433 }
434
9beae1ea
LS
435 if (access_mask & ODP_WRITE_ALLOWED_BIT)
436 flags |= FOLL_WRITE;
437
d2183c6f 438 start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
8ada2c1c
SR
439 k = start_idx;
440
441 while (bcnt > 0) {
403cd12e
AK
442 const size_t gup_num_pages = min_t(size_t,
443 (bcnt + BIT(page_shift) - 1) >> page_shift,
444 PAGE_SIZE / sizeof(struct page *));
8ada2c1c
SR
445
446 down_read(&owning_mm->mmap_sem);
447 /*
448 * Note: this might result in redundent page getting. We can
449 * avoid this by checking dma_list to be 0 before calling
450 * get_user_pages. However, this make the code much more
451 * complex (and doesn't gain us much performance in most use
452 * cases).
453 */
1e987790
DH
454 npages = get_user_pages_remote(owning_process, owning_mm,
455 user_virt, gup_num_pages,
5b56d49f 456 flags, local_page_list, NULL, NULL);
8ada2c1c
SR
457 up_read(&owning_mm->mmap_sem);
458
b02394aa
MS
459 if (npages < 0) {
460 if (npages != -EAGAIN)
461 pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
462 else
463 pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
8ada2c1c 464 break;
b02394aa 465 }
8ada2c1c
SR
466
467 bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
b5231b01 468 mutex_lock(&umem_odp->umem_mutex);
403cd12e
AK
469 for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
470 if (user_virt & ~page_mask) {
471 p += PAGE_SIZE;
472 if (page_to_phys(local_page_list[j]) != p) {
473 ret = -EFAULT;
474 break;
475 }
ea996974 476 put_user_page(local_page_list[j]);
403cd12e
AK
477 continue;
478 }
479
8ada2c1c 480 ret = ib_umem_odp_map_dma_single_page(
b5231b01 481 umem_odp, k, local_page_list[j],
403cd12e 482 access_mask, current_seq);
b02394aa
MS
483 if (ret < 0) {
484 if (ret != -EAGAIN)
485 pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
486 else
487 pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
8ada2c1c 488 break;
b02394aa 489 }
403cd12e
AK
490
491 p = page_to_phys(local_page_list[j]);
8ada2c1c
SR
492 k++;
493 }
b5231b01 494 mutex_unlock(&umem_odp->umem_mutex);
8ada2c1c
SR
495
496 if (ret < 0) {
75a3e6a3 497 /*
0c507d8f
JH
498 * Release pages, remembering that the first page
499 * to hit an error was already released by
500 * ib_umem_odp_map_dma_single_page().
75a3e6a3 501 */
0c507d8f 502 if (npages - (j + 1) > 0)
ea996974
JH
503 put_user_pages(&local_page_list[j+1],
504 npages - (j + 1));
8ada2c1c
SR
505 break;
506 }
507 }
508
509 if (ret >= 0) {
510 if (npages < 0 && k == start_idx)
511 ret = npages;
512 else
513 ret = k - start_idx;
514 }
515
516 mmput(owning_mm);
517out_put_task:
f27a0d50
JG
518 if (owning_process)
519 put_task_struct(owning_process);
8ada2c1c
SR
520 free_page((unsigned long)local_page_list);
521 return ret;
522}
523EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
524
b5231b01 525void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
8ada2c1c
SR
526 u64 bound)
527{
528 int idx;
529 u64 addr;
47f725ee 530 struct ib_device *dev = umem_odp->umem.ibdev;
8ada2c1c 531
9dc775e7
JG
532 lockdep_assert_held(&umem_odp->umem_mutex);
533
d2183c6f
JG
534 virt = max_t(u64, virt, ib_umem_start(umem_odp));
535 bound = min_t(u64, bound, ib_umem_end(umem_odp));
882214e2
HE
536 /* Note that during the run of this function, the
537 * notifiers_count of the MR is > 0, preventing any racing
538 * faults from completion. We might be racing with other
539 * invalidations, so we must make sure we free each page only
540 * once. */
d2183c6f
JG
541 for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
542 idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
b5231b01
JG
543 if (umem_odp->page_list[idx]) {
544 struct page *page = umem_odp->page_list[idx];
545 dma_addr_t dma = umem_odp->dma_list[idx];
8ada2c1c
SR
546 dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
547
548 WARN_ON(!dma_addr);
549
dd82e668
JG
550 ib_dma_unmap_page(dev, dma_addr,
551 BIT(umem_odp->page_shift),
8ada2c1c 552 DMA_BIDIRECTIONAL);
325ad061
GS
553 if (dma & ODP_WRITE_ALLOWED_BIT) {
554 struct page *head_page = compound_head(page);
882214e2
HE
555 /*
556 * set_page_dirty prefers being called with
557 * the page lock. However, MMU notifiers are
558 * called sometimes with and sometimes without
559 * the lock. We rely on the umem_mutex instead
560 * to prevent other mmu notifiers from
561 * continuing and allowing the page mapping to
562 * be removed.
563 */
564 set_page_dirty(head_page);
325ad061 565 }
b5231b01
JG
566 umem_odp->page_list[idx] = NULL;
567 umem_odp->dma_list[idx] = 0;
d10bcf94 568 umem_odp->npages--;
8ada2c1c 569 }
8ada2c1c
SR
570 }
571}
572EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);