Commit | Line | Data |
---|---|---|
1507f512 MR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright IBM Corporation, 2021 | |
4 | * | |
5 | * Author: Mike Rapoport <rppt@linux.ibm.com> | |
6 | */ | |
7 | ||
8 | #include <linux/mm.h> | |
9 | #include <linux/fs.h> | |
10 | #include <linux/swap.h> | |
11 | #include <linux/mount.h> | |
12 | #include <linux/memfd.h> | |
13 | #include <linux/bitops.h> | |
14 | #include <linux/printk.h> | |
15 | #include <linux/pagemap.h> | |
16 | #include <linux/syscalls.h> | |
17 | #include <linux/pseudo_fs.h> | |
18 | #include <linux/secretmem.h> | |
19 | #include <linux/set_memory.h> | |
20 | #include <linux/sched/signal.h> | |
21 | ||
22 | #include <uapi/linux/magic.h> | |
23 | ||
24 | #include <asm/tlbflush.h> | |
25 | ||
26 | #include "internal.h" | |
27 | ||
28 | #undef pr_fmt | |
29 | #define pr_fmt(fmt) "secretmem: " fmt | |
30 | ||
31 | /* | |
32 | * Define mode and flag masks to allow validation of the system call | |
33 | * parameters. | |
34 | */ | |
35 | #define SECRETMEM_MODE_MASK (0x0) | |
36 | #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK | |
37 | ||
b758fe6d | 38 | static bool secretmem_enable __ro_after_init = 1; |
1507f512 MR |
39 | module_param_named(enable, secretmem_enable, bool, 0400); |
40 | MODULE_PARM_DESC(secretmem_enable, | |
41 | "Enable secretmem and memfd_secret(2) system call"); | |
42 | ||
87066fdd | 43 | static atomic_t secretmem_users; |
9a436f8f MR |
44 | |
45 | bool secretmem_active(void) | |
46 | { | |
87066fdd | 47 | return !!atomic_read(&secretmem_users); |
9a436f8f MR |
48 | } |
49 | ||
1507f512 MR |
50 | static vm_fault_t secretmem_fault(struct vm_fault *vmf) |
51 | { | |
52 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; | |
53 | struct inode *inode = file_inode(vmf->vma->vm_file); | |
54 | pgoff_t offset = vmf->pgoff; | |
55 | gfp_t gfp = vmf->gfp_mask; | |
56 | unsigned long addr; | |
57 | struct page *page; | |
84ac0130 | 58 | vm_fault_t ret; |
1507f512 MR |
59 | int err; |
60 | ||
61 | if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) | |
62 | return vmf_error(-EINVAL); | |
63 | ||
84ac0130 MR |
64 | filemap_invalidate_lock_shared(mapping); |
65 | ||
1507f512 MR |
66 | retry: |
67 | page = find_lock_page(mapping, offset); | |
68 | if (!page) { | |
69 | page = alloc_page(gfp | __GFP_ZERO); | |
84ac0130 MR |
70 | if (!page) { |
71 | ret = VM_FAULT_OOM; | |
72 | goto out; | |
73 | } | |
1507f512 MR |
74 | |
75 | err = set_direct_map_invalid_noflush(page); | |
76 | if (err) { | |
77 | put_page(page); | |
84ac0130 MR |
78 | ret = vmf_error(err); |
79 | goto out; | |
1507f512 MR |
80 | } |
81 | ||
82 | __SetPageUptodate(page); | |
83 | err = add_to_page_cache_lru(page, mapping, offset, gfp); | |
84 | if (unlikely(err)) { | |
85 | put_page(page); | |
86 | /* | |
87 | * If a split of large page was required, it | |
88 | * already happened when we marked the page invalid | |
89 | * which guarantees that this call won't fail | |
90 | */ | |
91 | set_direct_map_default_noflush(page); | |
92 | if (err == -EEXIST) | |
93 | goto retry; | |
94 | ||
84ac0130 MR |
95 | ret = vmf_error(err); |
96 | goto out; | |
1507f512 MR |
97 | } |
98 | ||
99 | addr = (unsigned long)page_address(page); | |
100 | flush_tlb_kernel_range(addr, addr + PAGE_SIZE); | |
101 | } | |
102 | ||
103 | vmf->page = page; | |
84ac0130 MR |
104 | ret = VM_FAULT_LOCKED; |
105 | ||
106 | out: | |
107 | filemap_invalidate_unlock_shared(mapping); | |
108 | return ret; | |
1507f512 MR |
109 | } |
110 | ||
111 | static const struct vm_operations_struct secretmem_vm_ops = { | |
112 | .fault = secretmem_fault, | |
113 | }; | |
114 | ||
9a436f8f MR |
115 | static int secretmem_release(struct inode *inode, struct file *file) |
116 | { | |
87066fdd | 117 | atomic_dec(&secretmem_users); |
9a436f8f MR |
118 | return 0; |
119 | } | |
120 | ||
1507f512 MR |
121 | static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
122 | { | |
123 | unsigned long len = vma->vm_end - vma->vm_start; | |
124 | ||
125 | if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) | |
126 | return -EINVAL; | |
127 | ||
b0cc5e89 | 128 | if (!mlock_future_ok(vma->vm_mm, vma->vm_flags | VM_LOCKED, len)) |
1507f512 MR |
129 | return -EAGAIN; |
130 | ||
1c71222e | 131 | vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP); |
1507f512 MR |
132 | vma->vm_ops = &secretmem_vm_ops; |
133 | ||
134 | return 0; | |
135 | } | |
136 | ||
137 | bool vma_is_secretmem(struct vm_area_struct *vma) | |
138 | { | |
139 | return vma->vm_ops == &secretmem_vm_ops; | |
140 | } | |
141 | ||
142 | static const struct file_operations secretmem_fops = { | |
9a436f8f | 143 | .release = secretmem_release, |
1507f512 MR |
144 | .mmap = secretmem_mmap, |
145 | }; | |
146 | ||
5409548d MWO |
147 | static int secretmem_migrate_folio(struct address_space *mapping, |
148 | struct folio *dst, struct folio *src, enum migrate_mode mode) | |
1507f512 MR |
149 | { |
150 | return -EBUSY; | |
151 | } | |
152 | ||
6612ed24 | 153 | static void secretmem_free_folio(struct folio *folio) |
1507f512 | 154 | { |
6612ed24 MWO |
155 | set_direct_map_default_noflush(&folio->page); |
156 | folio_zero_segment(folio, 0, folio_size(folio)); | |
1507f512 MR |
157 | } |
158 | ||
159 | const struct address_space_operations secretmem_aops = { | |
46de8b97 | 160 | .dirty_folio = noop_dirty_folio, |
6612ed24 | 161 | .free_folio = secretmem_free_folio, |
5409548d | 162 | .migrate_folio = secretmem_migrate_folio, |
1507f512 MR |
163 | }; |
164 | ||
c1632a0f | 165 | static int secretmem_setattr(struct mnt_idmap *idmap, |
f9b141f9 AR |
166 | struct dentry *dentry, struct iattr *iattr) |
167 | { | |
168 | struct inode *inode = d_inode(dentry); | |
84ac0130 | 169 | struct address_space *mapping = inode->i_mapping; |
f9b141f9 | 170 | unsigned int ia_valid = iattr->ia_valid; |
84ac0130 MR |
171 | int ret; |
172 | ||
173 | filemap_invalidate_lock(mapping); | |
f9b141f9 AR |
174 | |
175 | if ((ia_valid & ATTR_SIZE) && inode->i_size) | |
84ac0130 MR |
176 | ret = -EINVAL; |
177 | else | |
c1632a0f | 178 | ret = simple_setattr(idmap, dentry, iattr); |
84ac0130 MR |
179 | |
180 | filemap_invalidate_unlock(mapping); | |
f9b141f9 | 181 | |
84ac0130 | 182 | return ret; |
f9b141f9 AR |
183 | } |
184 | ||
185 | static const struct inode_operations secretmem_iops = { | |
186 | .setattr = secretmem_setattr, | |
187 | }; | |
188 | ||
1507f512 MR |
189 | static struct vfsmount *secretmem_mnt; |
190 | ||
191 | static struct file *secretmem_file_create(unsigned long flags) | |
192 | { | |
98001fd6 | 193 | struct file *file; |
1507f512 | 194 | struct inode *inode; |
2bfe15c5 CG |
195 | const char *anon_name = "[secretmem]"; |
196 | const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); | |
197 | int err; | |
1507f512 MR |
198 | |
199 | inode = alloc_anon_inode(secretmem_mnt->mnt_sb); | |
200 | if (IS_ERR(inode)) | |
201 | return ERR_CAST(inode); | |
202 | ||
2bfe15c5 CG |
203 | err = security_inode_init_security_anon(inode, &qname, NULL); |
204 | if (err) { | |
205 | file = ERR_PTR(err); | |
206 | goto err_free_inode; | |
207 | } | |
208 | ||
1507f512 MR |
209 | file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", |
210 | O_RDWR, &secretmem_fops); | |
211 | if (IS_ERR(file)) | |
212 | goto err_free_inode; | |
213 | ||
214 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | |
215 | mapping_set_unevictable(inode->i_mapping); | |
216 | ||
f9b141f9 | 217 | inode->i_op = &secretmem_iops; |
1507f512 MR |
218 | inode->i_mapping->a_ops = &secretmem_aops; |
219 | ||
220 | /* pretend we are a normal file with zero size */ | |
221 | inode->i_mode |= S_IFREG; | |
222 | inode->i_size = 0; | |
223 | ||
224 | return file; | |
225 | ||
226 | err_free_inode: | |
227 | iput(inode); | |
228 | return file; | |
229 | } | |
230 | ||
231 | SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) | |
232 | { | |
233 | struct file *file; | |
234 | int fd, err; | |
235 | ||
236 | /* make sure local flags do not confict with global fcntl.h */ | |
237 | BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); | |
238 | ||
239 | if (!secretmem_enable) | |
240 | return -ENOSYS; | |
241 | ||
242 | if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) | |
243 | return -EINVAL; | |
cb685432 MWO |
244 | if (atomic_read(&secretmem_users) < 0) |
245 | return -ENFILE; | |
1507f512 MR |
246 | |
247 | fd = get_unused_fd_flags(flags & O_CLOEXEC); | |
248 | if (fd < 0) | |
249 | return fd; | |
250 | ||
251 | file = secretmem_file_create(flags); | |
252 | if (IS_ERR(file)) { | |
253 | err = PTR_ERR(file); | |
254 | goto err_put_fd; | |
255 | } | |
256 | ||
257 | file->f_flags |= O_LARGEFILE; | |
258 | ||
87066fdd | 259 | atomic_inc(&secretmem_users); |
855d4443 | 260 | fd_install(fd, file); |
1507f512 MR |
261 | return fd; |
262 | ||
263 | err_put_fd: | |
264 | put_unused_fd(fd); | |
265 | return err; | |
266 | } | |
267 | ||
268 | static int secretmem_init_fs_context(struct fs_context *fc) | |
269 | { | |
270 | return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; | |
271 | } | |
272 | ||
273 | static struct file_system_type secretmem_fs = { | |
274 | .name = "secretmem", | |
275 | .init_fs_context = secretmem_init_fs_context, | |
276 | .kill_sb = kill_anon_super, | |
277 | }; | |
278 | ||
1ea41595 | 279 | static int __init secretmem_init(void) |
1507f512 | 280 | { |
1507f512 | 281 | if (!secretmem_enable) |
f7c5b1aa | 282 | return 0; |
1507f512 MR |
283 | |
284 | secretmem_mnt = kern_mount(&secretmem_fs); | |
285 | if (IS_ERR(secretmem_mnt)) | |
4eb5bbde | 286 | return PTR_ERR(secretmem_mnt); |
1507f512 MR |
287 | |
288 | /* prevent secretmem mappings from ever getting PROT_EXEC */ | |
289 | secretmem_mnt->mnt_flags |= MNT_NOEXEC; | |
290 | ||
f7c5b1aa | 291 | return 0; |
1507f512 MR |
292 | } |
293 | fs_initcall(secretmem_init); |