Merge tag 'parisc-for-6.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...
[linux-block.git] / lib / test_hmm.c
CommitLineData
b2ef9f5a
RC
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * This is a module to test the HMM (Heterogeneous Memory Management)
4 * mirror and zone device private memory migration APIs of the kernel.
5 * Userspace programs can register with the driver to mirror their own address
6 * space and can use the device to read/write any valid virtual address.
7 */
8#include <linux/init.h>
9#include <linux/fs.h>
10#include <linux/mm.h>
11#include <linux/module.h>
12#include <linux/kernel.h>
13#include <linux/cdev.h>
14#include <linux/device.h>
dc90f084 15#include <linux/memremap.h>
b2ef9f5a
RC
16#include <linux/mutex.h>
17#include <linux/rwsem.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/highmem.h>
21#include <linux/delay.h>
22#include <linux/pagemap.h>
23#include <linux/hmm.h>
24#include <linux/vmalloc.h>
25#include <linux/swap.h>
26#include <linux/swapops.h>
27#include <linux/sched/mm.h>
28#include <linux/platform_device.h>
b659baea 29#include <linux/rmap.h>
730ff521
CH
30#include <linux/mmu_notifier.h>
31#include <linux/migrate.h>
b2ef9f5a
RC
32
33#include "test_hmm_uapi.h"
34
4c2e0f76 35#define DMIRROR_NDEVICES 4
b2ef9f5a
RC
36#define DMIRROR_RANGE_FAULT_TIMEOUT 1000
37#define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U)
38#define DEVMEM_CHUNKS_RESERVE 16
39
4c2e0f76
AS
40/*
41 * For device_private pages, dpage is just a dummy struct page
42 * representing a piece of device memory. dmirror_devmem_alloc_page
43 * allocates a real system memory page as backing storage to fake a
44 * real device. zone_device_data points to that backing page. But
45 * for device_coherent memory, the struct page represents real
46 * physical CPU-accessible memory that we can use directly.
47 */
48#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
49 (page)->zone_device_data : (page))
50
25b80162
AS
51static unsigned long spm_addr_dev0;
52module_param(spm_addr_dev0, long, 0644);
53MODULE_PARM_DESC(spm_addr_dev0,
54 "Specify start address for SPM (special purpose memory) used for device 0. By setting this Coherent device type will be used. Make sure spm_addr_dev1 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
55
56static unsigned long spm_addr_dev1;
57module_param(spm_addr_dev1, long, 0644);
58MODULE_PARM_DESC(spm_addr_dev1,
59 "Specify start address for SPM (special purpose memory) used for device 1. By setting this Coherent device type will be used. Make sure spm_addr_dev0 is set too. Minimum SPM size should be DEVMEM_CHUNK_SIZE.");
60
b2ef9f5a
RC
61static const struct dev_pagemap_ops dmirror_devmem_ops;
62static const struct mmu_interval_notifier_ops dmirror_min_ops;
63static dev_t dmirror_dev;
b2ef9f5a
RC
64
65struct dmirror_device;
66
67struct dmirror_bounce {
68 void *ptr;
69 unsigned long size;
70 unsigned long addr;
71 unsigned long cpages;
72};
73
b659baea 74#define DPT_XA_TAG_ATOMIC 1UL
b2ef9f5a
RC
75#define DPT_XA_TAG_WRITE 3UL
76
77/*
78 * Data structure to track address ranges and register for mmu interval
79 * notifier updates.
80 */
81struct dmirror_interval {
82 struct mmu_interval_notifier notifier;
83 struct dmirror *dmirror;
84};
85
86/*
87 * Data attached to the open device file.
88 * Note that it might be shared after a fork().
89 */
90struct dmirror {
91 struct dmirror_device *mdevice;
92 struct xarray pt;
93 struct mmu_interval_notifier notifier;
94 struct mutex mutex;
95};
96
97/*
98 * ZONE_DEVICE pages for migration and simulating device memory.
99 */
100struct dmirror_chunk {
101 struct dev_pagemap pagemap;
102 struct dmirror_device *mdevice;
103};
104
105/*
106 * Per device data.
107 */
108struct dmirror_device {
109 struct cdev cdevice;
188f4826 110 unsigned int zone_device_type;
6a760f58 111 struct device device;
b2ef9f5a
RC
112
113 unsigned int devmem_capacity;
114 unsigned int devmem_count;
115 struct dmirror_chunk **devmem_chunks;
116 struct mutex devmem_lock; /* protects the above */
117
118 unsigned long calloc;
119 unsigned long cfree;
120 struct page *free_pages;
121 spinlock_t lock; /* protects the above */
122};
123
124static struct dmirror_device dmirror_devices[DMIRROR_NDEVICES];
125
126static int dmirror_bounce_init(struct dmirror_bounce *bounce,
127 unsigned long addr,
128 unsigned long size)
129{
130 bounce->addr = addr;
131 bounce->size = size;
132 bounce->cpages = 0;
133 bounce->ptr = vmalloc(size);
134 if (!bounce->ptr)
135 return -ENOMEM;
136 return 0;
137}
138
4c2e0f76
AS
139static bool dmirror_is_private_zone(struct dmirror_device *mdevice)
140{
141 return (mdevice->zone_device_type ==
142 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
143}
144
145static enum migrate_vma_direction
146dmirror_select_device(struct dmirror *dmirror)
147{
148 return (dmirror->mdevice->zone_device_type ==
149 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
150 MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
151 MIGRATE_VMA_SELECT_DEVICE_COHERENT;
152}
153
b2ef9f5a
RC
154static void dmirror_bounce_fini(struct dmirror_bounce *bounce)
155{
156 vfree(bounce->ptr);
157}
158
159static int dmirror_fops_open(struct inode *inode, struct file *filp)
160{
161 struct cdev *cdev = inode->i_cdev;
162 struct dmirror *dmirror;
163 int ret;
164
165 /* Mirror this process address space */
166 dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
167 if (dmirror == NULL)
168 return -ENOMEM;
169
170 dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
171 mutex_init(&dmirror->mutex);
172 xa_init(&dmirror->pt);
173
174 ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
175 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
176 if (ret) {
177 kfree(dmirror);
178 return ret;
179 }
180
181 filp->private_data = dmirror;
182 return 0;
183}
184
185static int dmirror_fops_release(struct inode *inode, struct file *filp)
186{
187 struct dmirror *dmirror = filp->private_data;
188
189 mmu_interval_notifier_remove(&dmirror->notifier);
190 xa_destroy(&dmirror->pt);
191 kfree(dmirror);
192 return 0;
193}
194
195static struct dmirror_device *dmirror_page_to_device(struct page *page)
196
197{
198 return container_of(page->pgmap, struct dmirror_chunk,
199 pagemap)->mdevice;
200}
201
202static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
203{
204 unsigned long *pfns = range->hmm_pfns;
205 unsigned long pfn;
206
207 for (pfn = (range->start >> PAGE_SHIFT);
208 pfn < (range->end >> PAGE_SHIFT);
209 pfn++, pfns++) {
210 struct page *page;
211 void *entry;
212
213 /*
214 * Since we asked for hmm_range_fault() to populate pages,
215 * it shouldn't return an error entry on success.
216 */
217 WARN_ON(*pfns & HMM_PFN_ERROR);
218 WARN_ON(!(*pfns & HMM_PFN_VALID));
219
220 page = hmm_pfn_to_page(*pfns);
221 WARN_ON(!page);
222
223 entry = page;
224 if (*pfns & HMM_PFN_WRITE)
225 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
226 else if (WARN_ON(range->default_flags & HMM_PFN_WRITE))
227 return -EFAULT;
228 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
229 if (xa_is_err(entry))
230 return xa_err(entry);
231 }
232
233 return 0;
234}
235
236static void dmirror_do_update(struct dmirror *dmirror, unsigned long start,
237 unsigned long end)
238{
239 unsigned long pfn;
240 void *entry;
241
242 /*
243 * The XArray doesn't hold references to pages since it relies on
244 * the mmu notifier to clear page pointers when they become stale.
245 * Therefore, it is OK to just clear the entry.
246 */
247 xa_for_each_range(&dmirror->pt, pfn, entry, start >> PAGE_SHIFT,
248 end >> PAGE_SHIFT)
249 xa_erase(&dmirror->pt, pfn);
250}
251
252static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
253 const struct mmu_notifier_range *range,
254 unsigned long cur_seq)
255{
256 struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
257
7d17e83a
RC
258 /*
259 * Ignore invalidation callbacks for device private pages since
260 * the invalidation is handled as part of the migration process.
261 */
262 if (range->event == MMU_NOTIFY_MIGRATE &&
6b49bf6d 263 range->owner == dmirror->mdevice)
7d17e83a
RC
264 return true;
265
b2ef9f5a
RC
266 if (mmu_notifier_range_blockable(range))
267 mutex_lock(&dmirror->mutex);
268 else if (!mutex_trylock(&dmirror->mutex))
269 return false;
270
271 mmu_interval_set_seq(mni, cur_seq);
272 dmirror_do_update(dmirror, range->start, range->end);
273
274 mutex_unlock(&dmirror->mutex);
275 return true;
276}
277
278static const struct mmu_interval_notifier_ops dmirror_min_ops = {
279 .invalidate = dmirror_interval_invalidate,
280};
281
282static int dmirror_range_fault(struct dmirror *dmirror,
283 struct hmm_range *range)
284{
285 struct mm_struct *mm = dmirror->notifier.mm;
286 unsigned long timeout =
287 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
288 int ret;
289
290 while (true) {
291 if (time_after(jiffies, timeout)) {
292 ret = -EBUSY;
293 goto out;
294 }
295
296 range->notifier_seq = mmu_interval_read_begin(range->notifier);
89154dd5 297 mmap_read_lock(mm);
b2ef9f5a 298 ret = hmm_range_fault(range);
89154dd5 299 mmap_read_unlock(mm);
b2ef9f5a
RC
300 if (ret) {
301 if (ret == -EBUSY)
302 continue;
303 goto out;
304 }
305
306 mutex_lock(&dmirror->mutex);
307 if (mmu_interval_read_retry(range->notifier,
308 range->notifier_seq)) {
309 mutex_unlock(&dmirror->mutex);
310 continue;
311 }
312 break;
313 }
314
315 ret = dmirror_do_fault(dmirror, range);
316
317 mutex_unlock(&dmirror->mutex);
318out:
319 return ret;
320}
321
322static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
323 unsigned long end, bool write)
324{
325 struct mm_struct *mm = dmirror->notifier.mm;
326 unsigned long addr;
327 unsigned long pfns[64];
328 struct hmm_range range = {
329 .notifier = &dmirror->notifier,
330 .hmm_pfns = pfns,
331 .pfn_flags_mask = 0,
332 .default_flags =
333 HMM_PFN_REQ_FAULT | (write ? HMM_PFN_REQ_WRITE : 0),
334 .dev_private_owner = dmirror->mdevice,
335 };
336 int ret = 0;
337
338 /* Since the mm is for the mirrored process, get a reference first. */
339 if (!mmget_not_zero(mm))
340 return 0;
341
342 for (addr = start; addr < end; addr = range.end) {
343 range.start = addr;
344 range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
345
346 ret = dmirror_range_fault(dmirror, &range);
347 if (ret)
348 break;
349 }
350
351 mmput(mm);
352 return ret;
353}
354
355static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
356 unsigned long end, struct dmirror_bounce *bounce)
357{
358 unsigned long pfn;
359 void *ptr;
360
361 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
362
363 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
364 void *entry;
365 struct page *page;
366 void *tmp;
367
368 entry = xa_load(&dmirror->pt, pfn);
369 page = xa_untag_pointer(entry);
370 if (!page)
371 return -ENOENT;
372
373 tmp = kmap(page);
374 memcpy(ptr, tmp, PAGE_SIZE);
375 kunmap(page);
376
377 ptr += PAGE_SIZE;
378 bounce->cpages++;
379 }
380
381 return 0;
382}
383
384static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
385{
386 struct dmirror_bounce bounce;
387 unsigned long start, end;
388 unsigned long size = cmd->npages << PAGE_SHIFT;
389 int ret;
390
391 start = cmd->addr;
392 end = start + size;
393 if (end < start)
394 return -EINVAL;
395
396 ret = dmirror_bounce_init(&bounce, start, size);
397 if (ret)
398 return ret;
399
400 while (1) {
401 mutex_lock(&dmirror->mutex);
402 ret = dmirror_do_read(dmirror, start, end, &bounce);
403 mutex_unlock(&dmirror->mutex);
404 if (ret != -ENOENT)
405 break;
406
407 start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
408 ret = dmirror_fault(dmirror, start, end, false);
409 if (ret)
410 break;
411 cmd->faults++;
412 }
413
414 if (ret == 0) {
415 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
416 bounce.size))
417 ret = -EFAULT;
418 }
419 cmd->cpages = bounce.cpages;
420 dmirror_bounce_fini(&bounce);
421 return ret;
422}
423
424static int dmirror_do_write(struct dmirror *dmirror, unsigned long start,
425 unsigned long end, struct dmirror_bounce *bounce)
426{
427 unsigned long pfn;
428 void *ptr;
429
430 ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
431
432 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
433 void *entry;
434 struct page *page;
435 void *tmp;
436
437 entry = xa_load(&dmirror->pt, pfn);
438 page = xa_untag_pointer(entry);
439 if (!page || xa_pointer_tag(entry) != DPT_XA_TAG_WRITE)
440 return -ENOENT;
441
442 tmp = kmap(page);
443 memcpy(tmp, ptr, PAGE_SIZE);
444 kunmap(page);
445
446 ptr += PAGE_SIZE;
447 bounce->cpages++;
448 }
449
450 return 0;
451}
452
453static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
454{
455 struct dmirror_bounce bounce;
456 unsigned long start, end;
457 unsigned long size = cmd->npages << PAGE_SHIFT;
458 int ret;
459
460 start = cmd->addr;
461 end = start + size;
462 if (end < start)
463 return -EINVAL;
464
465 ret = dmirror_bounce_init(&bounce, start, size);
466 if (ret)
467 return ret;
468 if (copy_from_user(bounce.ptr, u64_to_user_ptr(cmd->ptr),
469 bounce.size)) {
470 ret = -EFAULT;
471 goto fini;
472 }
473
474 while (1) {
475 mutex_lock(&dmirror->mutex);
476 ret = dmirror_do_write(dmirror, start, end, &bounce);
477 mutex_unlock(&dmirror->mutex);
478 if (ret != -ENOENT)
479 break;
480
481 start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
482 ret = dmirror_fault(dmirror, start, end, true);
483 if (ret)
484 break;
485 cmd->faults++;
486 }
487
488fini:
489 cmd->cpages = bounce.cpages;
490 dmirror_bounce_fini(&bounce);
491 return ret;
492}
493
25b80162 494static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
b2ef9f5a
RC
495 struct page **ppage)
496{
497 struct dmirror_chunk *devmem;
25b80162 498 struct resource *res = NULL;
b2ef9f5a
RC
499 unsigned long pfn;
500 unsigned long pfn_first;
501 unsigned long pfn_last;
502 void *ptr;
25b80162 503 int ret = -ENOMEM;
b2ef9f5a 504
a4574f63
DW
505 devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
506 if (!devmem)
25b80162 507 return ret;
a4574f63 508
25b80162
AS
509 switch (mdevice->zone_device_type) {
510 case HMM_DMIRROR_MEMORY_DEVICE_PRIVATE:
511 res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
512 "hmm_dmirror");
513 if (IS_ERR_OR_NULL(res))
514 goto err_devmem;
515 devmem->pagemap.range.start = res->start;
516 devmem->pagemap.range.end = res->end;
517 devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
518 break;
519 case HMM_DMIRROR_MEMORY_DEVICE_COHERENT:
520 devmem->pagemap.range.start = (MINOR(mdevice->cdevice.dev) - 2) ?
521 spm_addr_dev0 :
522 spm_addr_dev1;
523 devmem->pagemap.range.end = devmem->pagemap.range.start +
524 DEVMEM_CHUNK_SIZE - 1;
525 devmem->pagemap.type = MEMORY_DEVICE_COHERENT;
526 break;
527 default:
528 ret = -EINVAL;
a4574f63 529 goto err_devmem;
25b80162 530 }
a4574f63 531
b7b3c01b 532 devmem->pagemap.nr_range = 1;
a4574f63
DW
533 devmem->pagemap.ops = &dmirror_devmem_ops;
534 devmem->pagemap.owner = mdevice;
535
b2ef9f5a
RC
536 mutex_lock(&mdevice->devmem_lock);
537
538 if (mdevice->devmem_count == mdevice->devmem_capacity) {
539 struct dmirror_chunk **new_chunks;
540 unsigned int new_capacity;
541
542 new_capacity = mdevice->devmem_capacity +
543 DEVMEM_CHUNKS_RESERVE;
544 new_chunks = krealloc(mdevice->devmem_chunks,
545 sizeof(new_chunks[0]) * new_capacity,
546 GFP_KERNEL);
547 if (!new_chunks)
a4574f63 548 goto err_release;
b2ef9f5a
RC
549 mdevice->devmem_capacity = new_capacity;
550 mdevice->devmem_chunks = new_chunks;
551 }
b2ef9f5a 552 ptr = memremap_pages(&devmem->pagemap, numa_node_id());
25b80162
AS
553 if (IS_ERR_OR_NULL(ptr)) {
554 if (ptr)
555 ret = PTR_ERR(ptr);
556 else
557 ret = -EFAULT;
a4574f63 558 goto err_release;
25b80162 559 }
b2ef9f5a
RC
560
561 devmem->mdevice = mdevice;
a4574f63
DW
562 pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
563 pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
b2ef9f5a
RC
564 mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
565
566 mutex_unlock(&mdevice->devmem_lock);
567
568 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
569 DEVMEM_CHUNK_SIZE / (1024 * 1024),
570 mdevice->devmem_count,
571 mdevice->devmem_count * (DEVMEM_CHUNK_SIZE / (1024 * 1024)),
572 pfn_first, pfn_last);
573
574 spin_lock(&mdevice->lock);
575 for (pfn = pfn_first; pfn < pfn_last; pfn++) {
576 struct page *page = pfn_to_page(pfn);
577
578 page->zone_device_data = mdevice->free_pages;
579 mdevice->free_pages = page;
580 }
581 if (ppage) {
582 *ppage = mdevice->free_pages;
583 mdevice->free_pages = (*ppage)->zone_device_data;
584 mdevice->calloc++;
585 }
586 spin_unlock(&mdevice->lock);
587
25b80162 588 return 0;
b2ef9f5a 589
b2ef9f5a 590err_release:
b2ef9f5a 591 mutex_unlock(&mdevice->devmem_lock);
25b80162
AS
592 if (res && devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
593 release_mem_region(devmem->pagemap.range.start,
594 range_len(&devmem->pagemap.range));
a4574f63
DW
595err_devmem:
596 kfree(devmem);
597
25b80162 598 return ret;
b2ef9f5a
RC
599}
600
601static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
602{
603 struct page *dpage = NULL;
4c2e0f76 604 struct page *rpage = NULL;
b2ef9f5a
RC
605
606 /*
4c2e0f76
AS
607 * For ZONE_DEVICE private type, this is a fake device so we allocate
608 * real system memory to store our device memory.
609 * For ZONE_DEVICE coherent type we use the actual dpage to store the
610 * data and ignore rpage.
b2ef9f5a 611 */
4c2e0f76
AS
612 if (dmirror_is_private_zone(mdevice)) {
613 rpage = alloc_page(GFP_HIGHUSER);
614 if (!rpage)
615 return NULL;
616 }
b2ef9f5a
RC
617 spin_lock(&mdevice->lock);
618
619 if (mdevice->free_pages) {
620 dpage = mdevice->free_pages;
621 mdevice->free_pages = dpage->zone_device_data;
622 mdevice->calloc++;
623 spin_unlock(&mdevice->lock);
624 } else {
625 spin_unlock(&mdevice->lock);
25b80162 626 if (dmirror_allocate_chunk(mdevice, &dpage))
b2ef9f5a
RC
627 goto error;
628 }
629
630 dpage->zone_device_data = rpage;
b2ef9f5a
RC
631 lock_page(dpage);
632 return dpage;
633
634error:
4c2e0f76
AS
635 if (rpage)
636 __free_page(rpage);
b2ef9f5a
RC
637 return NULL;
638}
639
640static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
641 struct dmirror *dmirror)
642{
643 struct dmirror_device *mdevice = dmirror->mdevice;
644 const unsigned long *src = args->src;
645 unsigned long *dst = args->dst;
646 unsigned long addr;
647
648 for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
649 src++, dst++) {
650 struct page *spage;
651 struct page *dpage;
652 struct page *rpage;
653
654 if (!(*src & MIGRATE_PFN_MIGRATE))
655 continue;
656
657 /*
658 * Note that spage might be NULL which is OK since it is an
659 * unallocated pte_none() or read-only zero page.
660 */
661 spage = migrate_pfn_to_page(*src);
4c2e0f76
AS
662 if (WARN(spage && is_zone_device_page(spage),
663 "page already in device spage pfn: 0x%lx\n",
664 page_to_pfn(spage)))
665 continue;
b2ef9f5a 666
b2ef9f5a
RC
667 dpage = dmirror_devmem_alloc_page(mdevice);
668 if (!dpage)
669 continue;
670
4c2e0f76 671 rpage = BACKING_PAGE(dpage);
b2ef9f5a
RC
672 if (spage)
673 copy_highpage(rpage, spage);
674 else
675 clear_highpage(rpage);
676
677 /*
678 * Normally, a device would use the page->zone_device_data to
679 * point to the mirror but here we use it to hold the page for
680 * the simulated device memory and that page holds the pointer
681 * to the mirror.
682 */
683 rpage->zone_device_data = dmirror;
684
4c2e0f76
AS
685 pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
686 page_to_pfn(spage), page_to_pfn(dpage));
ab09243a 687 *dst = migrate_pfn(page_to_pfn(dpage));
b2ef9f5a
RC
688 if ((*src & MIGRATE_PFN_WRITE) ||
689 (!spage && args->vma->vm_flags & VM_WRITE))
690 *dst |= MIGRATE_PFN_WRITE;
691 }
692}
693
b659baea
AP
694static int dmirror_check_atomic(struct dmirror *dmirror, unsigned long start,
695 unsigned long end)
696{
697 unsigned long pfn;
698
699 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
700 void *entry;
b659baea
AP
701
702 entry = xa_load(&dmirror->pt, pfn);
b659baea
AP
703 if (xa_pointer_tag(entry) == DPT_XA_TAG_ATOMIC)
704 return -EPERM;
705 }
706
707 return 0;
708}
709
710static int dmirror_atomic_map(unsigned long start, unsigned long end,
711 struct page **pages, struct dmirror *dmirror)
712{
713 unsigned long pfn, mapped = 0;
714 int i;
715
716 /* Map the migrated pages into the device's page tables. */
717 mutex_lock(&dmirror->mutex);
718
719 for (i = 0, pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++, i++) {
720 void *entry;
721
722 if (!pages[i])
723 continue;
724
725 entry = pages[i];
726 entry = xa_tag_pointer(entry, DPT_XA_TAG_ATOMIC);
727 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
728 if (xa_is_err(entry)) {
729 mutex_unlock(&dmirror->mutex);
730 return xa_err(entry);
731 }
732
733 mapped++;
734 }
735
736 mutex_unlock(&dmirror->mutex);
737 return mapped;
738}
739
b2ef9f5a
RC
740static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
741 struct dmirror *dmirror)
742{
743 unsigned long start = args->start;
744 unsigned long end = args->end;
745 const unsigned long *src = args->src;
746 const unsigned long *dst = args->dst;
747 unsigned long pfn;
748
749 /* Map the migrated pages into the device's page tables. */
750 mutex_lock(&dmirror->mutex);
751
752 for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
753 src++, dst++) {
754 struct page *dpage;
755 void *entry;
756
757 if (!(*src & MIGRATE_PFN_MIGRATE))
758 continue;
759
760 dpage = migrate_pfn_to_page(*dst);
761 if (!dpage)
762 continue;
763
4c2e0f76 764 entry = BACKING_PAGE(dpage);
b2ef9f5a
RC
765 if (*dst & MIGRATE_PFN_WRITE)
766 entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
767 entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
768 if (xa_is_err(entry)) {
769 mutex_unlock(&dmirror->mutex);
770 return xa_err(entry);
771 }
772 }
773
774 mutex_unlock(&dmirror->mutex);
775 return 0;
776}
777
b659baea
AP
778static int dmirror_exclusive(struct dmirror *dmirror,
779 struct hmm_dmirror_cmd *cmd)
780{
781 unsigned long start, end, addr;
782 unsigned long size = cmd->npages << PAGE_SHIFT;
783 struct mm_struct *mm = dmirror->notifier.mm;
784 struct page *pages[64];
785 struct dmirror_bounce bounce;
786 unsigned long next;
787 int ret;
788
789 start = cmd->addr;
790 end = start + size;
791 if (end < start)
792 return -EINVAL;
793
794 /* Since the mm is for the mirrored process, get a reference first. */
795 if (!mmget_not_zero(mm))
796 return -EINVAL;
797
798 mmap_read_lock(mm);
799 for (addr = start; addr < end; addr = next) {
ed913b05 800 unsigned long mapped = 0;
b659baea
AP
801 int i;
802
803 if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT))
804 next = end;
805 else
806 next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT);
807
808 ret = make_device_exclusive_range(mm, addr, next, pages, NULL);
ed913b05
ML
809 /*
810 * Do dmirror_atomic_map() iff all pages are marked for
811 * exclusive access to avoid accessing uninitialized
812 * fields of pages.
813 */
814 if (ret == (next - addr) >> PAGE_SHIFT)
815 mapped = dmirror_atomic_map(addr, next, pages, dmirror);
b659baea
AP
816 for (i = 0; i < ret; i++) {
817 if (pages[i]) {
818 unlock_page(pages[i]);
819 put_page(pages[i]);
820 }
821 }
822
823 if (addr + (mapped << PAGE_SHIFT) < next) {
824 mmap_read_unlock(mm);
825 mmput(mm);
826 return -EBUSY;
827 }
828 }
829 mmap_read_unlock(mm);
830 mmput(mm);
831
832 /* Return the migrated data for verification. */
833 ret = dmirror_bounce_init(&bounce, start, size);
834 if (ret)
835 return ret;
836 mutex_lock(&dmirror->mutex);
837 ret = dmirror_do_read(dmirror, start, end, &bounce);
838 mutex_unlock(&dmirror->mutex);
839 if (ret == 0) {
840 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
841 bounce.size))
842 ret = -EFAULT;
843 }
844
845 cmd->cpages = bounce.cpages;
846 dmirror_bounce_fini(&bounce);
847 return ret;
848}
849
4c2e0f76
AS
850static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
851 struct dmirror *dmirror)
852{
853 const unsigned long *src = args->src;
854 unsigned long *dst = args->dst;
855 unsigned long start = args->start;
856 unsigned long end = args->end;
857 unsigned long addr;
858
859 for (addr = start; addr < end; addr += PAGE_SIZE,
860 src++, dst++) {
861 struct page *dpage, *spage;
862
863 spage = migrate_pfn_to_page(*src);
864 if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
865 continue;
866
867 if (WARN_ON(!is_device_private_page(spage) &&
868 !is_device_coherent_page(spage)))
869 continue;
870 spage = BACKING_PAGE(spage);
871 dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
872 if (!dpage)
873 continue;
874 pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
875 page_to_pfn(spage), page_to_pfn(dpage));
876
877 lock_page(dpage);
878 xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
879 copy_highpage(dpage, spage);
880 *dst = migrate_pfn(page_to_pfn(dpage));
881 if (*src & MIGRATE_PFN_WRITE)
882 *dst |= MIGRATE_PFN_WRITE;
883 }
884 return 0;
885}
886
887static unsigned long
888dmirror_successful_migrated_pages(struct migrate_vma *migrate)
889{
890 unsigned long cpages = 0;
891 unsigned long i;
892
893 for (i = 0; i < migrate->npages; i++) {
894 if (migrate->src[i] & MIGRATE_PFN_VALID &&
895 migrate->src[i] & MIGRATE_PFN_MIGRATE)
896 cpages++;
897 }
898 return cpages;
899}
900
901static int dmirror_migrate_to_system(struct dmirror *dmirror,
902 struct hmm_dmirror_cmd *cmd)
b2ef9f5a
RC
903{
904 unsigned long start, end, addr;
905 unsigned long size = cmd->npages << PAGE_SHIFT;
906 struct mm_struct *mm = dmirror->notifier.mm;
907 struct vm_area_struct *vma;
4c2e0f76
AS
908 unsigned long src_pfns[64] = { 0 };
909 unsigned long dst_pfns[64] = { 0 };
910 struct migrate_vma args;
911 unsigned long next;
912 int ret;
913
914 start = cmd->addr;
915 end = start + size;
916 if (end < start)
917 return -EINVAL;
918
919 /* Since the mm is for the mirrored process, get a reference first. */
920 if (!mmget_not_zero(mm))
921 return -EINVAL;
922
923 cmd->cpages = 0;
924 mmap_read_lock(mm);
925 for (addr = start; addr < end; addr = next) {
926 vma = vma_lookup(mm, addr);
927 if (!vma || !(vma->vm_flags & VM_READ)) {
928 ret = -EINVAL;
929 goto out;
930 }
931 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
932 if (next > vma->vm_end)
933 next = vma->vm_end;
934
935 args.vma = vma;
936 args.src = src_pfns;
937 args.dst = dst_pfns;
938 args.start = addr;
939 args.end = next;
940 args.pgmap_owner = dmirror->mdevice;
941 args.flags = dmirror_select_device(dmirror);
942
943 ret = migrate_vma_setup(&args);
944 if (ret)
945 goto out;
946
947 pr_debug("Migrating from device mem to sys mem\n");
948 dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
949
950 migrate_vma_pages(&args);
951 cmd->cpages += dmirror_successful_migrated_pages(&args);
952 migrate_vma_finalize(&args);
953 }
954out:
955 mmap_read_unlock(mm);
956 mmput(mm);
957
958 return ret;
959}
960
961static int dmirror_migrate_to_device(struct dmirror *dmirror,
962 struct hmm_dmirror_cmd *cmd)
963{
964 unsigned long start, end, addr;
965 unsigned long size = cmd->npages << PAGE_SHIFT;
966 struct mm_struct *mm = dmirror->notifier.mm;
967 struct vm_area_struct *vma;
968 unsigned long src_pfns[64] = { 0 };
969 unsigned long dst_pfns[64] = { 0 };
b2ef9f5a
RC
970 struct dmirror_bounce bounce;
971 struct migrate_vma args;
972 unsigned long next;
973 int ret;
974
975 start = cmd->addr;
976 end = start + size;
977 if (end < start)
978 return -EINVAL;
979
980 /* Since the mm is for the mirrored process, get a reference first. */
981 if (!mmget_not_zero(mm))
982 return -EINVAL;
983
89154dd5 984 mmap_read_lock(mm);
b2ef9f5a 985 for (addr = start; addr < end; addr = next) {
46e6b31d
LH
986 vma = vma_lookup(mm, addr);
987 if (!vma || !(vma->vm_flags & VM_READ)) {
b2ef9f5a
RC
988 ret = -EINVAL;
989 goto out;
990 }
991 next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
992 if (next > vma->vm_end)
993 next = vma->vm_end;
994
995 args.vma = vma;
996 args.src = src_pfns;
997 args.dst = dst_pfns;
998 args.start = addr;
999 args.end = next;
7d17e83a 1000 args.pgmap_owner = dmirror->mdevice;
5143192c 1001 args.flags = MIGRATE_VMA_SELECT_SYSTEM;
b2ef9f5a
RC
1002 ret = migrate_vma_setup(&args);
1003 if (ret)
1004 goto out;
1005
4c2e0f76 1006 pr_debug("Migrating from sys mem to device mem\n");
b2ef9f5a
RC
1007 dmirror_migrate_alloc_and_copy(&args, dmirror);
1008 migrate_vma_pages(&args);
1009 dmirror_migrate_finalize_and_map(&args, dmirror);
1010 migrate_vma_finalize(&args);
1011 }
89154dd5 1012 mmap_read_unlock(mm);
b2ef9f5a
RC
1013 mmput(mm);
1014
4c2e0f76
AS
1015 /*
1016 * Return the migrated data for verification.
1017 * Only for pages in device zone
1018 */
b2ef9f5a
RC
1019 ret = dmirror_bounce_init(&bounce, start, size);
1020 if (ret)
1021 return ret;
1022 mutex_lock(&dmirror->mutex);
1023 ret = dmirror_do_read(dmirror, start, end, &bounce);
1024 mutex_unlock(&dmirror->mutex);
1025 if (ret == 0) {
1026 if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
1027 bounce.size))
1028 ret = -EFAULT;
1029 }
1030 cmd->cpages = bounce.cpages;
1031 dmirror_bounce_fini(&bounce);
1032 return ret;
1033
1034out:
89154dd5 1035 mmap_read_unlock(mm);
b2ef9f5a
RC
1036 mmput(mm);
1037 return ret;
1038}
1039
1040static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range,
1041 unsigned char *perm, unsigned long entry)
1042{
1043 struct page *page;
1044
1045 if (entry & HMM_PFN_ERROR) {
1046 *perm = HMM_DMIRROR_PROT_ERROR;
1047 return;
1048 }
1049 if (!(entry & HMM_PFN_VALID)) {
1050 *perm = HMM_DMIRROR_PROT_NONE;
1051 return;
1052 }
1053
1054 page = hmm_pfn_to_page(entry);
1055 if (is_device_private_page(page)) {
1056 /* Is the page migrated to this device or some other? */
1057 if (dmirror->mdevice == dmirror_page_to_device(page))
1058 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
1059 else
1060 *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
4c2e0f76
AS
1061 } else if (is_device_coherent_page(page)) {
1062 /* Is the page migrated to this device or some other? */
1063 if (dmirror->mdevice == dmirror_page_to_device(page))
1064 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
1065 else
1066 *perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
b2ef9f5a
RC
1067 } else if (is_zero_pfn(page_to_pfn(page)))
1068 *perm = HMM_DMIRROR_PROT_ZERO;
1069 else
1070 *perm = HMM_DMIRROR_PROT_NONE;
1071 if (entry & HMM_PFN_WRITE)
1072 *perm |= HMM_DMIRROR_PROT_WRITE;
1073 else
1074 *perm |= HMM_DMIRROR_PROT_READ;
e478425b
RC
1075 if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PMD_SHIFT)
1076 *perm |= HMM_DMIRROR_PROT_PMD;
1077 else if (hmm_pfn_to_map_order(entry) + PAGE_SHIFT == PUD_SHIFT)
1078 *perm |= HMM_DMIRROR_PROT_PUD;
b2ef9f5a
RC
1079}
1080
1081static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier *mni,
1082 const struct mmu_notifier_range *range,
1083 unsigned long cur_seq)
1084{
1085 struct dmirror_interval *dmi =
1086 container_of(mni, struct dmirror_interval, notifier);
1087 struct dmirror *dmirror = dmi->dmirror;
1088
1089 if (mmu_notifier_range_blockable(range))
1090 mutex_lock(&dmirror->mutex);
1091 else if (!mutex_trylock(&dmirror->mutex))
1092 return false;
1093
1094 /*
1095 * Snapshots only need to set the sequence number since any
1096 * invalidation in the interval invalidates the whole snapshot.
1097 */
1098 mmu_interval_set_seq(mni, cur_seq);
1099
1100 mutex_unlock(&dmirror->mutex);
1101 return true;
1102}
1103
1104static const struct mmu_interval_notifier_ops dmirror_mrn_ops = {
1105 .invalidate = dmirror_snapshot_invalidate,
1106};
1107
1108static int dmirror_range_snapshot(struct dmirror *dmirror,
1109 struct hmm_range *range,
1110 unsigned char *perm)
1111{
1112 struct mm_struct *mm = dmirror->notifier.mm;
1113 struct dmirror_interval notifier;
1114 unsigned long timeout =
1115 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
1116 unsigned long i;
1117 unsigned long n;
1118 int ret = 0;
1119
1120 notifier.dmirror = dmirror;
1121 range->notifier = &notifier.notifier;
1122
1123 ret = mmu_interval_notifier_insert(range->notifier, mm,
1124 range->start, range->end - range->start,
1125 &dmirror_mrn_ops);
1126 if (ret)
1127 return ret;
1128
1129 while (true) {
1130 if (time_after(jiffies, timeout)) {
1131 ret = -EBUSY;
1132 goto out;
1133 }
1134
1135 range->notifier_seq = mmu_interval_read_begin(range->notifier);
1136
89154dd5 1137 mmap_read_lock(mm);
b2ef9f5a 1138 ret = hmm_range_fault(range);
89154dd5 1139 mmap_read_unlock(mm);
b2ef9f5a
RC
1140 if (ret) {
1141 if (ret == -EBUSY)
1142 continue;
1143 goto out;
1144 }
1145
1146 mutex_lock(&dmirror->mutex);
1147 if (mmu_interval_read_retry(range->notifier,
1148 range->notifier_seq)) {
1149 mutex_unlock(&dmirror->mutex);
1150 continue;
1151 }
1152 break;
1153 }
1154
1155 n = (range->end - range->start) >> PAGE_SHIFT;
1156 for (i = 0; i < n; i++)
1157 dmirror_mkentry(dmirror, range, perm + i, range->hmm_pfns[i]);
1158
1159 mutex_unlock(&dmirror->mutex);
1160out:
1161 mmu_interval_notifier_remove(range->notifier);
1162 return ret;
1163}
1164
1165static int dmirror_snapshot(struct dmirror *dmirror,
1166 struct hmm_dmirror_cmd *cmd)
1167{
1168 struct mm_struct *mm = dmirror->notifier.mm;
1169 unsigned long start, end;
1170 unsigned long size = cmd->npages << PAGE_SHIFT;
1171 unsigned long addr;
1172 unsigned long next;
1173 unsigned long pfns[64];
1174 unsigned char perm[64];
1175 char __user *uptr;
1176 struct hmm_range range = {
1177 .hmm_pfns = pfns,
1178 .dev_private_owner = dmirror->mdevice,
1179 };
1180 int ret = 0;
1181
1182 start = cmd->addr;
1183 end = start + size;
1184 if (end < start)
1185 return -EINVAL;
1186
1187 /* Since the mm is for the mirrored process, get a reference first. */
1188 if (!mmget_not_zero(mm))
1189 return -EINVAL;
1190
1191 /*
1192 * Register a temporary notifier to detect invalidations even if it
1193 * overlaps with other mmu_interval_notifiers.
1194 */
1195 uptr = u64_to_user_ptr(cmd->ptr);
1196 for (addr = start; addr < end; addr = next) {
1197 unsigned long n;
1198
1199 next = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
1200 range.start = addr;
1201 range.end = next;
1202
1203 ret = dmirror_range_snapshot(dmirror, &range, perm);
1204 if (ret)
1205 break;
1206
1207 n = (range.end - range.start) >> PAGE_SHIFT;
1208 if (copy_to_user(uptr, perm, n)) {
1209 ret = -EFAULT;
1210 break;
1211 }
1212
1213 cmd->cpages += n;
1214 uptr += n;
1215 }
1216 mmput(mm);
1217
1218 return ret;
1219}
1220
1221static long dmirror_fops_unlocked_ioctl(struct file *filp,
1222 unsigned int command,
1223 unsigned long arg)
1224{
1225 void __user *uarg = (void __user *)arg;
1226 struct hmm_dmirror_cmd cmd;
1227 struct dmirror *dmirror;
1228 int ret;
1229
1230 dmirror = filp->private_data;
1231 if (!dmirror)
1232 return -EINVAL;
1233
1234 if (copy_from_user(&cmd, uarg, sizeof(cmd)))
1235 return -EFAULT;
1236
1237 if (cmd.addr & ~PAGE_MASK)
1238 return -EINVAL;
1239 if (cmd.addr >= (cmd.addr + (cmd.npages << PAGE_SHIFT)))
1240 return -EINVAL;
1241
1242 cmd.cpages = 0;
1243 cmd.faults = 0;
1244
1245 switch (command) {
1246 case HMM_DMIRROR_READ:
1247 ret = dmirror_read(dmirror, &cmd);
1248 break;
1249
1250 case HMM_DMIRROR_WRITE:
1251 ret = dmirror_write(dmirror, &cmd);
1252 break;
1253
4c2e0f76
AS
1254 case HMM_DMIRROR_MIGRATE_TO_DEV:
1255 ret = dmirror_migrate_to_device(dmirror, &cmd);
1256 break;
1257
1258 case HMM_DMIRROR_MIGRATE_TO_SYS:
1259 ret = dmirror_migrate_to_system(dmirror, &cmd);
b2ef9f5a
RC
1260 break;
1261
b659baea
AP
1262 case HMM_DMIRROR_EXCLUSIVE:
1263 ret = dmirror_exclusive(dmirror, &cmd);
1264 break;
1265
1266 case HMM_DMIRROR_CHECK_EXCLUSIVE:
1267 ret = dmirror_check_atomic(dmirror, cmd.addr,
1268 cmd.addr + (cmd.npages << PAGE_SHIFT));
1269 break;
1270
b2ef9f5a
RC
1271 case HMM_DMIRROR_SNAPSHOT:
1272 ret = dmirror_snapshot(dmirror, &cmd);
1273 break;
1274
1275 default:
1276 return -EINVAL;
1277 }
1278 if (ret)
1279 return ret;
1280
1281 if (copy_to_user(uarg, &cmd, sizeof(cmd)))
1282 return -EFAULT;
1283
1284 return 0;
1285}
1286
87c01d57
AP
1287static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
1288{
1289 unsigned long addr;
1290
1291 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
1292 struct page *page;
1293 int ret;
1294
1295 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1296 if (!page)
1297 return -ENOMEM;
1298
1299 ret = vm_insert_page(vma, addr, page);
1300 if (ret) {
1301 __free_page(page);
1302 return ret;
1303 }
1304 put_page(page);
1305 }
1306
1307 return 0;
1308}
1309
b2ef9f5a
RC
1310static const struct file_operations dmirror_fops = {
1311 .open = dmirror_fops_open,
1312 .release = dmirror_fops_release,
87c01d57 1313 .mmap = dmirror_fops_mmap,
b2ef9f5a
RC
1314 .unlocked_ioctl = dmirror_fops_unlocked_ioctl,
1315 .llseek = default_llseek,
1316 .owner = THIS_MODULE,
1317};
1318
1319static void dmirror_devmem_free(struct page *page)
1320{
4c2e0f76 1321 struct page *rpage = BACKING_PAGE(page);
b2ef9f5a
RC
1322 struct dmirror_device *mdevice;
1323
4c2e0f76 1324 if (rpage != page)
b2ef9f5a
RC
1325 __free_page(rpage);
1326
1327 mdevice = dmirror_page_to_device(page);
b2ef9f5a
RC
1328 spin_lock(&mdevice->lock);
1329 mdevice->cfree++;
1330 page->zone_device_data = mdevice->free_pages;
1331 mdevice->free_pages = page;
1332 spin_unlock(&mdevice->lock);
1333}
1334
b2ef9f5a
RC
1335static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
1336{
1337 struct migrate_vma args;
4c2e0f76
AS
1338 unsigned long src_pfns = 0;
1339 unsigned long dst_pfns = 0;
b2ef9f5a
RC
1340 struct page *rpage;
1341 struct dmirror *dmirror;
1342 vm_fault_t ret;
1343
1344 /*
1345 * Normally, a device would use the page->zone_device_data to point to
1346 * the mirror but here we use it to hold the page for the simulated
1347 * device memory and that page holds the pointer to the mirror.
1348 */
1349 rpage = vmf->page->zone_device_data;
1350 dmirror = rpage->zone_device_data;
1351
1352 /* FIXME demonstrate how we can adjust migrate range */
1353 args.vma = vmf->vma;
1354 args.start = vmf->address;
1355 args.end = args.start + PAGE_SIZE;
1356 args.src = &src_pfns;
1357 args.dst = &dst_pfns;
5143192c 1358 args.pgmap_owner = dmirror->mdevice;
4c2e0f76 1359 args.flags = dmirror_select_device(dmirror);
b2ef9f5a
RC
1360
1361 if (migrate_vma_setup(&args))
1362 return VM_FAULT_SIGBUS;
1363
7d17e83a 1364 ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
b2ef9f5a
RC
1365 if (ret)
1366 return ret;
1367 migrate_vma_pages(&args);
7d17e83a
RC
1368 /*
1369 * No device finalize step is needed since
1370 * dmirror_devmem_fault_alloc_and_copy() will have already
1371 * invalidated the device page table.
1372 */
b2ef9f5a
RC
1373 migrate_vma_finalize(&args);
1374 return 0;
1375}
1376
1377static const struct dev_pagemap_ops dmirror_devmem_ops = {
1378 .page_free = dmirror_devmem_free,
1379 .migrate_to_ram = dmirror_devmem_fault,
1380};
1381
1382static int dmirror_device_init(struct dmirror_device *mdevice, int id)
1383{
1384 dev_t dev;
1385 int ret;
1386
1387 dev = MKDEV(MAJOR(dmirror_dev), id);
1388 mutex_init(&mdevice->devmem_lock);
1389 spin_lock_init(&mdevice->lock);
1390
1391 cdev_init(&mdevice->cdevice, &dmirror_fops);
1392 mdevice->cdevice.owner = THIS_MODULE;
6a760f58
MP
1393 device_initialize(&mdevice->device);
1394 mdevice->device.devt = dev;
1395
1396 ret = dev_set_name(&mdevice->device, "hmm_dmirror%u", id);
1397 if (ret)
1398 return ret;
1399
1400 ret = cdev_device_add(&mdevice->cdevice, &mdevice->device);
b2ef9f5a
RC
1401 if (ret)
1402 return ret;
1403
25b80162
AS
1404 /* Build a list of free ZONE_DEVICE struct pages */
1405 return dmirror_allocate_chunk(mdevice, NULL);
b2ef9f5a
RC
1406}
1407
1408static void dmirror_device_remove(struct dmirror_device *mdevice)
1409{
1410 unsigned int i;
1411
1412 if (mdevice->devmem_chunks) {
1413 for (i = 0; i < mdevice->devmem_count; i++) {
1414 struct dmirror_chunk *devmem =
1415 mdevice->devmem_chunks[i];
1416
1417 memunmap_pages(&devmem->pagemap);
25b80162
AS
1418 if (devmem->pagemap.type == MEMORY_DEVICE_PRIVATE)
1419 release_mem_region(devmem->pagemap.range.start,
1420 range_len(&devmem->pagemap.range));
b2ef9f5a
RC
1421 kfree(devmem);
1422 }
1423 kfree(mdevice->devmem_chunks);
1424 }
1425
6a760f58 1426 cdev_device_del(&mdevice->cdevice, &mdevice->device);
b2ef9f5a
RC
1427}
1428
1429static int __init hmm_dmirror_init(void)
1430{
1431 int ret;
188f4826
AS
1432 int id = 0;
1433 int ndevices = 0;
b2ef9f5a
RC
1434
1435 ret = alloc_chrdev_region(&dmirror_dev, 0, DMIRROR_NDEVICES,
1436 "HMM_DMIRROR");
1437 if (ret)
1438 goto err_unreg;
1439
188f4826
AS
1440 memset(dmirror_devices, 0, DMIRROR_NDEVICES * sizeof(dmirror_devices[0]));
1441 dmirror_devices[ndevices++].zone_device_type =
1442 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
1443 dmirror_devices[ndevices++].zone_device_type =
1444 HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
4c2e0f76
AS
1445 if (spm_addr_dev0 && spm_addr_dev1) {
1446 dmirror_devices[ndevices++].zone_device_type =
1447 HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
1448 dmirror_devices[ndevices++].zone_device_type =
1449 HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
1450 }
188f4826 1451 for (id = 0; id < ndevices; id++) {
b2ef9f5a
RC
1452 ret = dmirror_device_init(dmirror_devices + id, id);
1453 if (ret)
1454 goto err_chrdev;
1455 }
1456
b2ef9f5a
RC
1457 pr_info("HMM test module loaded. This is only for testing HMM.\n");
1458 return 0;
1459
1460err_chrdev:
1461 while (--id >= 0)
1462 dmirror_device_remove(dmirror_devices + id);
1463 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1464err_unreg:
1465 return ret;
1466}
1467
1468static void __exit hmm_dmirror_exit(void)
1469{
1470 int id;
1471
b2ef9f5a 1472 for (id = 0; id < DMIRROR_NDEVICES; id++)
4c2e0f76
AS
1473 if (dmirror_devices[id].zone_device_type)
1474 dmirror_device_remove(dmirror_devices + id);
b2ef9f5a
RC
1475 unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
1476}
1477
1478module_init(hmm_dmirror_init);
1479module_exit(hmm_dmirror_exit);
1480MODULE_LICENSE("GPL");