Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/mm/msync.c | |
3 | * | |
4 | * Copyright (C) 1994-1999 Linus Torvalds | |
5 | */ | |
6 | ||
7 | /* | |
8 | * The msync() system call. | |
9 | */ | |
10 | #include <linux/slab.h> | |
11 | #include <linux/pagemap.h> | |
8f2e9f15 | 12 | #include <linux/fs.h> |
1da177e4 LT |
13 | #include <linux/mm.h> |
14 | #include <linux/mman.h> | |
15 | #include <linux/hugetlb.h> | |
9c50823e AM |
16 | #include <linux/writeback.h> |
17 | #include <linux/file.h> | |
1da177e4 LT |
18 | #include <linux/syscalls.h> |
19 | ||
20 | #include <asm/pgtable.h> | |
21 | #include <asm/tlbflush.h> | |
22 | ||
9c50823e | 23 | static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
1da177e4 LT |
24 | unsigned long addr, unsigned long end) |
25 | { | |
26 | pte_t *pte; | |
705e87c0 | 27 | spinlock_t *ptl; |
0c942a45 | 28 | int progress = 0; |
9c50823e | 29 | unsigned long ret = 0; |
1da177e4 | 30 | |
0c942a45 | 31 | again: |
705e87c0 | 32 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
1da177e4 | 33 | do { |
1da177e4 LT |
34 | struct page *page; |
35 | ||
0c942a45 HD |
36 | if (progress >= 64) { |
37 | progress = 0; | |
705e87c0 | 38 | if (need_resched() || need_lockbreak(ptl)) |
0c942a45 HD |
39 | break; |
40 | } | |
41 | progress++; | |
1da177e4 LT |
42 | if (!pte_present(*pte)) |
43 | continue; | |
b4955ce3 AK |
44 | if (!pte_maybe_dirty(*pte)) |
45 | continue; | |
6aab341e LT |
46 | page = vm_normal_page(vma, addr, *pte); |
47 | if (!page) | |
1da177e4 | 48 | continue; |
1da177e4 | 49 | if (ptep_clear_flush_dirty(vma, addr, pte) || |
9c50823e AM |
50 | page_test_and_clear_dirty(page)) |
51 | ret += set_page_dirty(page); | |
0c942a45 | 52 | progress += 3; |
1da177e4 | 53 | } while (pte++, addr += PAGE_SIZE, addr != end); |
705e87c0 HD |
54 | pte_unmap_unlock(pte - 1, ptl); |
55 | cond_resched(); | |
0c942a45 HD |
56 | if (addr != end) |
57 | goto again; | |
9c50823e | 58 | return ret; |
1da177e4 LT |
59 | } |
60 | ||
9c50823e AM |
61 | static inline unsigned long msync_pmd_range(struct vm_area_struct *vma, |
62 | pud_t *pud, unsigned long addr, unsigned long end) | |
1da177e4 LT |
63 | { |
64 | pmd_t *pmd; | |
65 | unsigned long next; | |
9c50823e | 66 | unsigned long ret = 0; |
1da177e4 LT |
67 | |
68 | pmd = pmd_offset(pud, addr); | |
69 | do { | |
70 | next = pmd_addr_end(addr, end); | |
71 | if (pmd_none_or_clear_bad(pmd)) | |
72 | continue; | |
9c50823e | 73 | ret += msync_pte_range(vma, pmd, addr, next); |
1da177e4 | 74 | } while (pmd++, addr = next, addr != end); |
9c50823e | 75 | return ret; |
1da177e4 LT |
76 | } |
77 | ||
9c50823e AM |
78 | static inline unsigned long msync_pud_range(struct vm_area_struct *vma, |
79 | pgd_t *pgd, unsigned long addr, unsigned long end) | |
1da177e4 LT |
80 | { |
81 | pud_t *pud; | |
82 | unsigned long next; | |
9c50823e | 83 | unsigned long ret = 0; |
1da177e4 LT |
84 | |
85 | pud = pud_offset(pgd, addr); | |
86 | do { | |
87 | next = pud_addr_end(addr, end); | |
88 | if (pud_none_or_clear_bad(pud)) | |
89 | continue; | |
9c50823e | 90 | ret += msync_pmd_range(vma, pud, addr, next); |
1da177e4 | 91 | } while (pud++, addr = next, addr != end); |
9c50823e | 92 | return ret; |
1da177e4 LT |
93 | } |
94 | ||
9c50823e | 95 | static unsigned long msync_page_range(struct vm_area_struct *vma, |
1da177e4 LT |
96 | unsigned long addr, unsigned long end) |
97 | { | |
1da177e4 LT |
98 | pgd_t *pgd; |
99 | unsigned long next; | |
9c50823e | 100 | unsigned long ret = 0; |
1da177e4 LT |
101 | |
102 | /* For hugepages we can't go walking the page table normally, | |
103 | * but that's ok, hugetlbfs is memory based, so we don't need | |
b5810039 | 104 | * to do anything more on an msync(). |
b5810039 | 105 | */ |
6aab341e | 106 | if (vma->vm_flags & VM_HUGETLB) |
9c50823e | 107 | return 0; |
1da177e4 LT |
108 | |
109 | BUG_ON(addr >= end); | |
705e87c0 | 110 | pgd = pgd_offset(vma->vm_mm, addr); |
1da177e4 | 111 | flush_cache_range(vma, addr, end); |
1da177e4 LT |
112 | do { |
113 | next = pgd_addr_end(addr, end); | |
114 | if (pgd_none_or_clear_bad(pgd)) | |
115 | continue; | |
9c50823e | 116 | ret += msync_pud_range(vma, pgd, addr, next); |
1da177e4 | 117 | } while (pgd++, addr = next, addr != end); |
9c50823e | 118 | return ret; |
1da177e4 LT |
119 | } |
120 | ||
1da177e4 LT |
121 | /* |
122 | * MS_SYNC syncs the entire file - including mappings. | |
123 | * | |
124 | * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just | |
125 | * marks the relevant pages dirty. The application may now run fsync() to | |
126 | * write out the dirty pages and wait on the writeout and check the result. | |
127 | * Or the application may run fadvise(FADV_DONTNEED) against the fd to start | |
128 | * async writeout immediately. | |
16538c40 | 129 | * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to |
1da177e4 LT |
130 | * applications. |
131 | */ | |
9c50823e AM |
132 | static int msync_interval(struct vm_area_struct *vma, unsigned long addr, |
133 | unsigned long end, int flags, | |
134 | unsigned long *nr_pages_dirtied) | |
1da177e4 | 135 | { |
1da177e4 LT |
136 | struct file *file = vma->vm_file; |
137 | ||
138 | if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) | |
139 | return -EBUSY; | |
140 | ||
707c21c8 | 141 | if (file && (vma->vm_flags & VM_SHARED)) |
9c50823e | 142 | *nr_pages_dirtied = msync_page_range(vma, addr, end); |
707c21c8 | 143 | return 0; |
1da177e4 LT |
144 | } |
145 | ||
146 | asmlinkage long sys_msync(unsigned long start, size_t len, int flags) | |
147 | { | |
148 | unsigned long end; | |
149 | struct vm_area_struct *vma; | |
676758bd AM |
150 | int unmapped_error = 0; |
151 | int error = -EINVAL; | |
9c50823e | 152 | int done = 0; |
1da177e4 | 153 | |
1da177e4 LT |
154 | if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) |
155 | goto out; | |
156 | if (start & ~PAGE_MASK) | |
157 | goto out; | |
158 | if ((flags & MS_ASYNC) && (flags & MS_SYNC)) | |
159 | goto out; | |
160 | error = -ENOMEM; | |
161 | len = (len + ~PAGE_MASK) & PAGE_MASK; | |
162 | end = start + len; | |
163 | if (end < start) | |
164 | goto out; | |
165 | error = 0; | |
166 | if (end == start) | |
167 | goto out; | |
168 | /* | |
169 | * If the interval [start,end) covers some unmapped address ranges, | |
170 | * just ignore them, but return -ENOMEM at the end. | |
171 | */ | |
9c50823e AM |
172 | down_read(¤t->mm->mmap_sem); |
173 | if (flags & MS_SYNC) | |
174 | current->flags |= PF_SYNCWRITE; | |
1da177e4 | 175 | vma = find_vma(current->mm, start); |
676758bd AM |
176 | if (!vma) { |
177 | error = -ENOMEM; | |
178 | goto out_unlock; | |
179 | } | |
9c50823e AM |
180 | do { |
181 | unsigned long nr_pages_dirtied = 0; | |
182 | struct file *file; | |
183 | ||
1da177e4 LT |
184 | /* Here start < vma->vm_end. */ |
185 | if (start < vma->vm_start) { | |
186 | unmapped_error = -ENOMEM; | |
187 | start = vma->vm_start; | |
188 | } | |
189 | /* Here vma->vm_start <= start < vma->vm_end. */ | |
190 | if (end <= vma->vm_end) { | |
191 | if (start < end) { | |
9c50823e AM |
192 | error = msync_interval(vma, start, end, flags, |
193 | &nr_pages_dirtied); | |
1da177e4 | 194 | if (error) |
9c50823e | 195 | goto out_unlock; |
1da177e4 LT |
196 | } |
197 | error = unmapped_error; | |
9c50823e AM |
198 | done = 1; |
199 | } else { | |
200 | /* Here vma->vm_start <= start < vma->vm_end < end. */ | |
201 | error = msync_interval(vma, start, vma->vm_end, flags, | |
202 | &nr_pages_dirtied); | |
203 | if (error) | |
204 | goto out_unlock; | |
1da177e4 | 205 | } |
9c50823e | 206 | file = vma->vm_file; |
1da177e4 | 207 | start = vma->vm_end; |
9c50823e AM |
208 | if ((flags & MS_ASYNC) && file && nr_pages_dirtied) { |
209 | get_file(file); | |
210 | up_read(¤t->mm->mmap_sem); | |
211 | balance_dirty_pages_ratelimited_nr(file->f_mapping, | |
212 | nr_pages_dirtied); | |
213 | fput(file); | |
214 | down_read(¤t->mm->mmap_sem); | |
215 | vma = find_vma(current->mm, start); | |
707c21c8 AM |
216 | } else if ((flags & MS_SYNC) && file && |
217 | (vma->vm_flags & VM_SHARED)) { | |
707c21c8 AM |
218 | get_file(file); |
219 | up_read(¤t->mm->mmap_sem); | |
8f2e9f15 | 220 | error = do_fsync(file, 0); |
707c21c8 AM |
221 | fput(file); |
222 | down_read(¤t->mm->mmap_sem); | |
223 | if (error) | |
224 | goto out_unlock; | |
225 | vma = find_vma(current->mm, start); | |
9c50823e AM |
226 | } else { |
227 | vma = vma->vm_next; | |
228 | } | |
676758bd | 229 | } while (vma && !done); |
9c50823e | 230 | out_unlock: |
1da177e4 | 231 | current->flags &= ~PF_SYNCWRITE; |
9c50823e AM |
232 | up_read(¤t->mm->mmap_sem); |
233 | out: | |
1da177e4 LT |
234 | return error; |
235 | } |