Commit | Line | Data |
---|---|---|
cb2c7d1a MS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Landlock LSM - Filesystem management and hooks | |
4 | * | |
5 | * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> | |
6 | * Copyright © 2018-2020 ANSSI | |
b91c3e4e | 7 | * Copyright © 2021-2022 Microsoft Corporation |
cb2c7d1a MS |
8 | */ |
9 | ||
10 | #include <linux/atomic.h> | |
11 | #include <linux/bitops.h> | |
12 | #include <linux/bits.h> | |
13 | #include <linux/compiler_types.h> | |
14 | #include <linux/dcache.h> | |
15 | #include <linux/err.h> | |
16 | #include <linux/fs.h> | |
17 | #include <linux/init.h> | |
18 | #include <linux/kernel.h> | |
19 | #include <linux/limits.h> | |
20 | #include <linux/list.h> | |
21 | #include <linux/lsm_hooks.h> | |
22 | #include <linux/mount.h> | |
23 | #include <linux/namei.h> | |
24 | #include <linux/path.h> | |
25 | #include <linux/rcupdate.h> | |
26 | #include <linux/spinlock.h> | |
27 | #include <linux/stat.h> | |
28 | #include <linux/types.h> | |
29 | #include <linux/wait_bit.h> | |
30 | #include <linux/workqueue.h> | |
31 | #include <uapi/linux/landlock.h> | |
32 | ||
33 | #include "common.h" | |
34 | #include "cred.h" | |
35 | #include "fs.h" | |
36 | #include "limits.h" | |
37 | #include "object.h" | |
38 | #include "ruleset.h" | |
39 | #include "setup.h" | |
40 | ||
41 | /* Underlying object management */ | |
42 | ||
43 | static void release_inode(struct landlock_object *const object) | |
44 | __releases(object->lock) | |
45 | { | |
46 | struct inode *const inode = object->underobj; | |
47 | struct super_block *sb; | |
48 | ||
49 | if (!inode) { | |
50 | spin_unlock(&object->lock); | |
51 | return; | |
52 | } | |
53 | ||
54 | /* | |
55 | * Protects against concurrent use by hook_sb_delete() of the reference | |
56 | * to the underlying inode. | |
57 | */ | |
58 | object->underobj = NULL; | |
59 | /* | |
60 | * Makes sure that if the filesystem is concurrently unmounted, | |
61 | * hook_sb_delete() will wait for us to finish iput(). | |
62 | */ | |
63 | sb = inode->i_sb; | |
64 | atomic_long_inc(&landlock_superblock(sb)->inode_refs); | |
65 | spin_unlock(&object->lock); | |
66 | /* | |
67 | * Because object->underobj was not NULL, hook_sb_delete() and | |
68 | * get_inode_object() guarantee that it is safe to reset | |
69 | * landlock_inode(inode)->object while it is not NULL. It is therefore | |
70 | * not necessary to lock inode->i_lock. | |
71 | */ | |
72 | rcu_assign_pointer(landlock_inode(inode)->object, NULL); | |
73 | /* | |
74 | * Now, new rules can safely be tied to @inode with get_inode_object(). | |
75 | */ | |
76 | ||
77 | iput(inode); | |
78 | if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) | |
79 | wake_up_var(&landlock_superblock(sb)->inode_refs); | |
80 | } | |
81 | ||
82 | static const struct landlock_object_underops landlock_fs_underops = { | |
83 | .release = release_inode | |
84 | }; | |
85 | ||
86 | /* Ruleset management */ | |
87 | ||
88 | static struct landlock_object *get_inode_object(struct inode *const inode) | |
89 | { | |
90 | struct landlock_object *object, *new_object; | |
91 | struct landlock_inode_security *inode_sec = landlock_inode(inode); | |
92 | ||
93 | rcu_read_lock(); | |
94 | retry: | |
95 | object = rcu_dereference(inode_sec->object); | |
96 | if (object) { | |
97 | if (likely(refcount_inc_not_zero(&object->usage))) { | |
98 | rcu_read_unlock(); | |
99 | return object; | |
100 | } | |
101 | /* | |
102 | * We are racing with release_inode(), the object is going | |
103 | * away. Wait for release_inode(), then retry. | |
104 | */ | |
105 | spin_lock(&object->lock); | |
106 | spin_unlock(&object->lock); | |
107 | goto retry; | |
108 | } | |
109 | rcu_read_unlock(); | |
110 | ||
111 | /* | |
112 | * If there is no object tied to @inode, then create a new one (without | |
113 | * holding any locks). | |
114 | */ | |
115 | new_object = landlock_create_object(&landlock_fs_underops, inode); | |
116 | if (IS_ERR(new_object)) | |
117 | return new_object; | |
118 | ||
119 | /* | |
120 | * Protects against concurrent calls to get_inode_object() or | |
121 | * hook_sb_delete(). | |
122 | */ | |
123 | spin_lock(&inode->i_lock); | |
124 | if (unlikely(rcu_access_pointer(inode_sec->object))) { | |
125 | /* Someone else just created the object, bail out and retry. */ | |
126 | spin_unlock(&inode->i_lock); | |
127 | kfree(new_object); | |
128 | ||
129 | rcu_read_lock(); | |
130 | goto retry; | |
131 | } | |
132 | ||
133 | /* | |
134 | * @inode will be released by hook_sb_delete() on its superblock | |
135 | * shutdown, or by release_inode() when no more ruleset references the | |
136 | * related object. | |
137 | */ | |
138 | ihold(inode); | |
139 | rcu_assign_pointer(inode_sec->object, new_object); | |
140 | spin_unlock(&inode->i_lock); | |
141 | return new_object; | |
142 | } | |
143 | ||
144 | /* All access rights that can be tied to files. */ | |
6cc2df8e | 145 | /* clang-format off */ |
cb2c7d1a MS |
146 | #define ACCESS_FILE ( \ |
147 | LANDLOCK_ACCESS_FS_EXECUTE | \ | |
148 | LANDLOCK_ACCESS_FS_WRITE_FILE | \ | |
b9f5ce27 GN |
149 | LANDLOCK_ACCESS_FS_READ_FILE | \ |
150 | LANDLOCK_ACCESS_FS_TRUNCATE) | |
6cc2df8e | 151 | /* clang-format on */ |
cb2c7d1a | 152 | |
55e55920 MS |
153 | /* |
154 | * All access rights that are denied by default whether they are handled or not | |
155 | * by a ruleset/layer. This must be ORed with all ruleset->fs_access_masks[] | |
156 | * entries when we need to get the absolute handled access masks. | |
157 | */ | |
158 | /* clang-format off */ | |
159 | #define ACCESS_INITIALLY_DENIED ( \ | |
160 | LANDLOCK_ACCESS_FS_REFER) | |
161 | /* clang-format on */ | |
162 | ||
cb2c7d1a MS |
163 | /* |
164 | * @path: Should have been checked by get_path_from_fd(). | |
165 | */ | |
166 | int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, | |
5f2ff33e MS |
167 | const struct path *const path, |
168 | access_mask_t access_rights) | |
cb2c7d1a MS |
169 | { |
170 | int err; | |
171 | struct landlock_object *object; | |
172 | ||
173 | /* Files only get access rights that make sense. */ | |
06a1c40a MS |
174 | if (!d_is_dir(path->dentry) && |
175 | (access_rights | ACCESS_FILE) != ACCESS_FILE) | |
cb2c7d1a MS |
176 | return -EINVAL; |
177 | if (WARN_ON_ONCE(ruleset->num_layers != 1)) | |
178 | return -EINVAL; | |
179 | ||
180 | /* Transforms relative access rights to absolute ones. */ | |
55e55920 MS |
181 | access_rights |= |
182 | LANDLOCK_MASK_ACCESS_FS & | |
183 | ~(ruleset->fs_access_masks[0] | ACCESS_INITIALLY_DENIED); | |
cb2c7d1a MS |
184 | object = get_inode_object(d_backing_inode(path->dentry)); |
185 | if (IS_ERR(object)) | |
186 | return PTR_ERR(object); | |
187 | mutex_lock(&ruleset->lock); | |
188 | err = landlock_insert_rule(ruleset, object, access_rights); | |
189 | mutex_unlock(&ruleset->lock); | |
190 | /* | |
191 | * No need to check for an error because landlock_insert_rule() | |
192 | * increments the refcount for the new object if needed. | |
193 | */ | |
194 | landlock_put_object(object); | |
195 | return err; | |
196 | } | |
197 | ||
198 | /* Access-control management */ | |
199 | ||
2cd7cd6e MS |
200 | /* |
201 | * The lifetime of the returned rule is tied to @domain. | |
202 | * | |
203 | * Returns NULL if no rule is found or if @dentry is negative. | |
204 | */ | |
205 | static inline const struct landlock_rule * | |
206 | find_rule(const struct landlock_ruleset *const domain, | |
207 | const struct dentry *const dentry) | |
cb2c7d1a MS |
208 | { |
209 | const struct landlock_rule *rule; | |
210 | const struct inode *inode; | |
cb2c7d1a | 211 | |
2cd7cd6e MS |
212 | /* Ignores nonexistent leafs. */ |
213 | if (d_is_negative(dentry)) | |
214 | return NULL; | |
215 | ||
216 | inode = d_backing_inode(dentry); | |
cb2c7d1a | 217 | rcu_read_lock(); |
06a1c40a MS |
218 | rule = landlock_find_rule( |
219 | domain, rcu_dereference(landlock_inode(inode)->object)); | |
cb2c7d1a | 220 | rcu_read_unlock(); |
2cd7cd6e MS |
221 | return rule; |
222 | } | |
223 | ||
8ba0005f MS |
224 | /* |
225 | * @layer_masks is read and may be updated according to the access request and | |
226 | * the matching rule. | |
227 | * | |
228 | * Returns true if the request is allowed (i.e. relevant layer masks for the | |
229 | * request are empty). | |
230 | */ | |
231 | static inline bool | |
232 | unmask_layers(const struct landlock_rule *const rule, | |
233 | const access_mask_t access_request, | |
234 | layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) | |
2cd7cd6e MS |
235 | { |
236 | size_t layer_level; | |
237 | ||
8ba0005f MS |
238 | if (!access_request || !layer_masks) |
239 | return true; | |
cb2c7d1a | 240 | if (!rule) |
8ba0005f | 241 | return false; |
cb2c7d1a MS |
242 | |
243 | /* | |
244 | * An access is granted if, for each policy layer, at least one rule | |
8ba0005f MS |
245 | * encountered on the pathwalk grants the requested access, |
246 | * regardless of its position in the layer stack. We must then check | |
cb2c7d1a | 247 | * the remaining layers for each inode, from the first added layer to |
8ba0005f MS |
248 | * the last one. When there is multiple requested accesses, for each |
249 | * policy layer, the full set of requested accesses may not be granted | |
250 | * by only one rule, but by the union (binary OR) of multiple rules. | |
251 | * E.g. /a/b <execute> + /a <read> => /a/b <execute + read> | |
cb2c7d1a | 252 | */ |
2cd7cd6e MS |
253 | for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { |
254 | const struct landlock_layer *const layer = | |
255 | &rule->layers[layer_level]; | |
75c542d6 | 256 | const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); |
8ba0005f MS |
257 | const unsigned long access_req = access_request; |
258 | unsigned long access_bit; | |
259 | bool is_empty; | |
cb2c7d1a | 260 | |
8ba0005f MS |
261 | /* |
262 | * Records in @layer_masks which layer grants access to each | |
263 | * requested access. | |
264 | */ | |
265 | is_empty = true; | |
266 | for_each_set_bit(access_bit, &access_req, | |
267 | ARRAY_SIZE(*layer_masks)) { | |
268 | if (layer->access & BIT_ULL(access_bit)) | |
269 | (*layer_masks)[access_bit] &= ~layer_bit; | |
270 | is_empty = is_empty && !(*layer_masks)[access_bit]; | |
cb2c7d1a | 271 | } |
8ba0005f MS |
272 | if (is_empty) |
273 | return true; | |
cb2c7d1a | 274 | } |
8ba0005f | 275 | return false; |
cb2c7d1a MS |
276 | } |
277 | ||
9da82b20 MS |
278 | /* |
279 | * Allows access to pseudo filesystems that will never be mountable (e.g. | |
280 | * sockfs, pipefs), but can still be reachable through | |
281 | * /proc/<pid>/fd/<file-descriptor> | |
282 | */ | |
283 | static inline bool is_nouser_or_private(const struct dentry *dentry) | |
284 | { | |
285 | return (dentry->d_sb->s_flags & SB_NOUSER) || | |
286 | (d_is_positive(dentry) && | |
287 | unlikely(IS_PRIVATE(d_backing_inode(dentry)))); | |
288 | } | |
289 | ||
b91c3e4e MS |
290 | static inline access_mask_t |
291 | get_handled_accesses(const struct landlock_ruleset *const domain) | |
cb2c7d1a | 292 | { |
55e55920 MS |
293 | access_mask_t access_dom = ACCESS_INITIALLY_DENIED; |
294 | size_t layer_level; | |
b91c3e4e | 295 | |
55e55920 MS |
296 | for (layer_level = 0; layer_level < domain->num_layers; layer_level++) |
297 | access_dom |= domain->fs_access_masks[layer_level]; | |
298 | return access_dom & LANDLOCK_MASK_ACCESS_FS; | |
b91c3e4e MS |
299 | } |
300 | ||
52a13488 GN |
301 | /** |
302 | * init_layer_masks - Initialize layer masks from an access request | |
303 | * | |
304 | * Populates @layer_masks such that for each access right in @access_request, | |
305 | * the bits for all the layers are set where this access right is handled. | |
306 | * | |
307 | * @domain: The domain that defines the current restrictions. | |
308 | * @access_request: The requested access rights to check. | |
309 | * @layer_masks: The layer masks to populate. | |
310 | * | |
311 | * Returns: An access mask where each access right bit is set which is handled | |
312 | * in any of the active layers in @domain. | |
313 | */ | |
b91c3e4e MS |
314 | static inline access_mask_t |
315 | init_layer_masks(const struct landlock_ruleset *const domain, | |
316 | const access_mask_t access_request, | |
317 | layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) | |
318 | { | |
319 | access_mask_t handled_accesses = 0; | |
320 | size_t layer_level; | |
cb2c7d1a | 321 | |
b91c3e4e MS |
322 | memset(layer_masks, 0, sizeof(*layer_masks)); |
323 | /* An empty access request can happen because of O_WRONLY | O_RDWR. */ | |
cb2c7d1a MS |
324 | if (!access_request) |
325 | return 0; | |
cb2c7d1a | 326 | |
b91c3e4e MS |
327 | /* Saves all handled accesses per layer. */ |
328 | for (layer_level = 0; layer_level < domain->num_layers; layer_level++) { | |
8ba0005f MS |
329 | const unsigned long access_req = access_request; |
330 | unsigned long access_bit; | |
331 | ||
332 | for_each_set_bit(access_bit, &access_req, | |
b91c3e4e | 333 | ARRAY_SIZE(*layer_masks)) { |
55e55920 MS |
334 | /* |
335 | * Artificially handles all initially denied by default | |
336 | * access rights. | |
337 | */ | |
338 | if (BIT_ULL(access_bit) & | |
339 | (domain->fs_access_masks[layer_level] | | |
340 | ACCESS_INITIALLY_DENIED)) { | |
b91c3e4e MS |
341 | (*layer_masks)[access_bit] |= |
342 | BIT_ULL(layer_level); | |
343 | handled_accesses |= BIT_ULL(access_bit); | |
8ba0005f MS |
344 | } |
345 | } | |
cb2c7d1a | 346 | } |
b91c3e4e MS |
347 | return handled_accesses; |
348 | } | |
349 | ||
350 | /* | |
351 | * Check that a destination file hierarchy has more restrictions than a source | |
352 | * file hierarchy. This is only used for link and rename actions. | |
353 | * | |
354 | * @layer_masks_child2: Optional child masks. | |
355 | */ | |
356 | static inline bool no_more_access( | |
357 | const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], | |
358 | const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], | |
359 | const bool child1_is_directory, | |
360 | const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], | |
361 | const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], | |
362 | const bool child2_is_directory) | |
363 | { | |
364 | unsigned long access_bit; | |
365 | ||
366 | for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); | |
367 | access_bit++) { | |
368 | /* Ignores accesses that only make sense for directories. */ | |
369 | const bool is_file_access = | |
370 | !!(BIT_ULL(access_bit) & ACCESS_FILE); | |
371 | ||
372 | if (child1_is_directory || is_file_access) { | |
373 | /* | |
374 | * Checks if the destination restrictions are a | |
375 | * superset of the source ones (i.e. inherited access | |
376 | * rights without child exceptions): | |
377 | * restrictions(parent2) >= restrictions(child1) | |
378 | */ | |
379 | if ((((*layer_masks_parent1)[access_bit] & | |
380 | (*layer_masks_child1)[access_bit]) | | |
381 | (*layer_masks_parent2)[access_bit]) != | |
382 | (*layer_masks_parent2)[access_bit]) | |
383 | return false; | |
384 | } | |
385 | ||
386 | if (!layer_masks_child2) | |
387 | continue; | |
388 | if (child2_is_directory || is_file_access) { | |
389 | /* | |
390 | * Checks inverted restrictions for RENAME_EXCHANGE: | |
391 | * restrictions(parent1) >= restrictions(child2) | |
392 | */ | |
393 | if ((((*layer_masks_parent2)[access_bit] & | |
394 | (*layer_masks_child2)[access_bit]) | | |
395 | (*layer_masks_parent1)[access_bit]) != | |
396 | (*layer_masks_parent1)[access_bit]) | |
397 | return false; | |
398 | } | |
399 | } | |
400 | return true; | |
401 | } | |
402 | ||
403 | /* | |
404 | * Removes @layer_masks accesses that are not requested. | |
405 | * | |
406 | * Returns true if the request is allowed, false otherwise. | |
407 | */ | |
408 | static inline bool | |
409 | scope_to_request(const access_mask_t access_request, | |
410 | layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) | |
411 | { | |
412 | const unsigned long access_req = access_request; | |
413 | unsigned long access_bit; | |
414 | ||
415 | if (WARN_ON_ONCE(!layer_masks)) | |
416 | return true; | |
417 | ||
418 | for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) | |
419 | (*layer_masks)[access_bit] = 0; | |
420 | return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); | |
421 | } | |
422 | ||
423 | /* | |
424 | * Returns true if there is at least one access right different than | |
425 | * LANDLOCK_ACCESS_FS_REFER. | |
426 | */ | |
427 | static inline bool | |
428 | is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], | |
429 | const access_mask_t access_request) | |
430 | { | |
431 | unsigned long access_bit; | |
432 | /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ | |
433 | const unsigned long access_check = access_request & | |
434 | ~LANDLOCK_ACCESS_FS_REFER; | |
435 | ||
436 | if (!layer_masks) | |
437 | return false; | |
438 | ||
439 | for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { | |
440 | if ((*layer_masks)[access_bit]) | |
441 | return true; | |
442 | } | |
443 | return false; | |
444 | } | |
445 | ||
446 | /** | |
106794c4 | 447 | * is_access_to_paths_allowed - Check accesses for requests with a common path |
b91c3e4e MS |
448 | * |
449 | * @domain: Domain to check against. | |
450 | * @path: File hierarchy to walk through. | |
451 | * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is | |
452 | * equal to @layer_masks_parent2 (if any). This is tied to the unique | |
453 | * requested path for most actions, or the source in case of a refer action | |
454 | * (i.e. rename or link), or the source and destination in case of | |
455 | * RENAME_EXCHANGE. | |
456 | * @layer_masks_parent1: Pointer to a matrix of layer masks per access | |
457 | * masks, identifying the layers that forbid a specific access. Bits from | |
458 | * this matrix can be unset according to the @path walk. An empty matrix | |
459 | * means that @domain allows all possible Landlock accesses (i.e. not only | |
460 | * those identified by @access_request_parent1). This matrix can | |
461 | * initially refer to domain layer masks and, when the accesses for the | |
462 | * destination and source are the same, to requested layer masks. | |
463 | * @dentry_child1: Dentry to the initial child of the parent1 path. This | |
464 | * pointer must be NULL for non-refer actions (i.e. not link nor rename). | |
465 | * @access_request_parent2: Similar to @access_request_parent1 but for a | |
466 | * request involving a source and a destination. This refers to the | |
467 | * destination, except in case of RENAME_EXCHANGE where it also refers to | |
468 | * the source. Must be set to 0 when using a simple path request. | |
469 | * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer | |
470 | * action. This must be NULL otherwise. | |
471 | * @dentry_child2: Dentry to the initial child of the parent2 path. This | |
472 | * pointer is only set for RENAME_EXCHANGE actions and must be NULL | |
473 | * otherwise. | |
474 | * | |
475 | * This helper first checks that the destination has a superset of restrictions | |
476 | * compared to the source (if any) for a common path. Because of | |
477 | * RENAME_EXCHANGE actions, source and destinations may be swapped. It then | |
478 | * checks that the collected accesses and the remaining ones are enough to | |
479 | * allow the request. | |
480 | * | |
481 | * Returns: | |
106794c4 GN |
482 | * - true if the access request is granted; |
483 | * - false otherwise. | |
b91c3e4e | 484 | */ |
106794c4 | 485 | static bool is_access_to_paths_allowed( |
b91c3e4e MS |
486 | const struct landlock_ruleset *const domain, |
487 | const struct path *const path, | |
488 | const access_mask_t access_request_parent1, | |
489 | layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], | |
490 | const struct dentry *const dentry_child1, | |
491 | const access_mask_t access_request_parent2, | |
492 | layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], | |
493 | const struct dentry *const dentry_child2) | |
494 | { | |
495 | bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, | |
496 | child1_is_directory = true, child2_is_directory = true; | |
497 | struct path walker_path; | |
498 | access_mask_t access_masked_parent1, access_masked_parent2; | |
499 | layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], | |
500 | _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; | |
501 | layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, | |
502 | (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; | |
503 | ||
504 | if (!access_request_parent1 && !access_request_parent2) | |
106794c4 | 505 | return true; |
b91c3e4e | 506 | if (WARN_ON_ONCE(!domain || !path)) |
106794c4 | 507 | return true; |
b91c3e4e | 508 | if (is_nouser_or_private(path->dentry)) |
106794c4 | 509 | return true; |
b91c3e4e | 510 | if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) |
106794c4 | 511 | return false; |
b91c3e4e MS |
512 | |
513 | if (unlikely(layer_masks_parent2)) { | |
514 | if (WARN_ON_ONCE(!dentry_child1)) | |
106794c4 | 515 | return false; |
b91c3e4e MS |
516 | /* |
517 | * For a double request, first check for potential privilege | |
518 | * escalation by looking at domain handled accesses (which are | |
519 | * a superset of the meaningful requested accesses). | |
520 | */ | |
521 | access_masked_parent1 = access_masked_parent2 = | |
522 | get_handled_accesses(domain); | |
523 | is_dom_check = true; | |
524 | } else { | |
525 | if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) | |
106794c4 | 526 | return false; |
b91c3e4e MS |
527 | /* For a simple request, only check for requested accesses. */ |
528 | access_masked_parent1 = access_request_parent1; | |
529 | access_masked_parent2 = access_request_parent2; | |
530 | is_dom_check = false; | |
531 | } | |
532 | ||
533 | if (unlikely(dentry_child1)) { | |
534 | unmask_layers(find_rule(domain, dentry_child1), | |
535 | init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, | |
536 | &_layer_masks_child1), | |
537 | &_layer_masks_child1); | |
538 | layer_masks_child1 = &_layer_masks_child1; | |
539 | child1_is_directory = d_is_dir(dentry_child1); | |
540 | } | |
541 | if (unlikely(dentry_child2)) { | |
542 | unmask_layers(find_rule(domain, dentry_child2), | |
543 | init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, | |
544 | &_layer_masks_child2), | |
545 | &_layer_masks_child2); | |
546 | layer_masks_child2 = &_layer_masks_child2; | |
547 | child2_is_directory = d_is_dir(dentry_child2); | |
548 | } | |
cb2c7d1a MS |
549 | |
550 | walker_path = *path; | |
551 | path_get(&walker_path); | |
552 | /* | |
553 | * We need to walk through all the hierarchy to not miss any relevant | |
554 | * restriction. | |
555 | */ | |
556 | while (true) { | |
557 | struct dentry *parent_dentry; | |
b91c3e4e MS |
558 | const struct landlock_rule *rule; |
559 | ||
560 | /* | |
561 | * If at least all accesses allowed on the destination are | |
562 | * already allowed on the source, respectively if there is at | |
563 | * least as much as restrictions on the destination than on the | |
564 | * source, then we can safely refer files from the source to | |
565 | * the destination without risking a privilege escalation. | |
566 | * This also applies in the case of RENAME_EXCHANGE, which | |
567 | * implies checks on both direction. This is crucial for | |
568 | * standalone multilayered security policies. Furthermore, | |
569 | * this helps avoid policy writers to shoot themselves in the | |
570 | * foot. | |
571 | */ | |
572 | if (unlikely(is_dom_check && | |
573 | no_more_access( | |
574 | layer_masks_parent1, layer_masks_child1, | |
575 | child1_is_directory, layer_masks_parent2, | |
576 | layer_masks_child2, | |
577 | child2_is_directory))) { | |
578 | allowed_parent1 = scope_to_request( | |
579 | access_request_parent1, layer_masks_parent1); | |
580 | allowed_parent2 = scope_to_request( | |
581 | access_request_parent2, layer_masks_parent2); | |
582 | ||
583 | /* Stops when all accesses are granted. */ | |
584 | if (allowed_parent1 && allowed_parent2) | |
585 | break; | |
cb2c7d1a | 586 | |
b91c3e4e MS |
587 | /* |
588 | * Now, downgrades the remaining checks from domain | |
589 | * handled accesses to requested accesses. | |
590 | */ | |
591 | is_dom_check = false; | |
592 | access_masked_parent1 = access_request_parent1; | |
593 | access_masked_parent2 = access_request_parent2; | |
594 | } | |
595 | ||
596 | rule = find_rule(domain, walker_path.dentry); | |
597 | allowed_parent1 = unmask_layers(rule, access_masked_parent1, | |
598 | layer_masks_parent1); | |
599 | allowed_parent2 = unmask_layers(rule, access_masked_parent2, | |
600 | layer_masks_parent2); | |
601 | ||
602 | /* Stops when a rule from each layer grants access. */ | |
603 | if (allowed_parent1 && allowed_parent2) | |
cb2c7d1a | 604 | break; |
cb2c7d1a MS |
605 | |
606 | jump_up: | |
607 | if (walker_path.dentry == walker_path.mnt->mnt_root) { | |
608 | if (follow_up(&walker_path)) { | |
609 | /* Ignores hidden mount points. */ | |
610 | goto jump_up; | |
611 | } else { | |
612 | /* | |
613 | * Stops at the real root. Denies access | |
614 | * because not all layers have granted access. | |
615 | */ | |
cb2c7d1a MS |
616 | break; |
617 | } | |
618 | } | |
619 | if (unlikely(IS_ROOT(walker_path.dentry))) { | |
620 | /* | |
621 | * Stops at disconnected root directories. Only allows | |
622 | * access to internal filesystems (e.g. nsfs, which is | |
623 | * reachable through /proc/<pid>/ns/<namespace>). | |
624 | */ | |
b91c3e4e MS |
625 | allowed_parent1 = allowed_parent2 = |
626 | !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); | |
cb2c7d1a MS |
627 | break; |
628 | } | |
629 | parent_dentry = dget_parent(walker_path.dentry); | |
630 | dput(walker_path.dentry); | |
631 | walker_path.dentry = parent_dentry; | |
632 | } | |
633 | path_put(&walker_path); | |
b91c3e4e | 634 | |
106794c4 | 635 | return allowed_parent1 && allowed_parent2; |
b91c3e4e MS |
636 | } |
637 | ||
638 | static inline int check_access_path(const struct landlock_ruleset *const domain, | |
639 | const struct path *const path, | |
640 | access_mask_t access_request) | |
641 | { | |
642 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; | |
643 | ||
644 | access_request = init_layer_masks(domain, access_request, &layer_masks); | |
106794c4 GN |
645 | if (is_access_to_paths_allowed(domain, path, access_request, |
646 | &layer_masks, NULL, 0, NULL, NULL)) | |
647 | return 0; | |
648 | return -EACCES; | |
cb2c7d1a MS |
649 | } |
650 | ||
651 | static inline int current_check_access_path(const struct path *const path, | |
5f2ff33e | 652 | const access_mask_t access_request) |
cb2c7d1a MS |
653 | { |
654 | const struct landlock_ruleset *const dom = | |
655 | landlock_get_current_domain(); | |
656 | ||
657 | if (!dom) | |
658 | return 0; | |
659 | return check_access_path(dom, path, access_request); | |
660 | } | |
661 | ||
9da82b20 MS |
662 | static inline access_mask_t get_mode_access(const umode_t mode) |
663 | { | |
664 | switch (mode & S_IFMT) { | |
665 | case S_IFLNK: | |
666 | return LANDLOCK_ACCESS_FS_MAKE_SYM; | |
667 | case 0: | |
668 | /* A zero mode translates to S_IFREG. */ | |
669 | case S_IFREG: | |
670 | return LANDLOCK_ACCESS_FS_MAKE_REG; | |
671 | case S_IFDIR: | |
672 | return LANDLOCK_ACCESS_FS_MAKE_DIR; | |
673 | case S_IFCHR: | |
674 | return LANDLOCK_ACCESS_FS_MAKE_CHAR; | |
675 | case S_IFBLK: | |
676 | return LANDLOCK_ACCESS_FS_MAKE_BLOCK; | |
677 | case S_IFIFO: | |
678 | return LANDLOCK_ACCESS_FS_MAKE_FIFO; | |
679 | case S_IFSOCK: | |
680 | return LANDLOCK_ACCESS_FS_MAKE_SOCK; | |
681 | default: | |
682 | WARN_ON_ONCE(1); | |
683 | return 0; | |
684 | } | |
685 | } | |
686 | ||
687 | static inline access_mask_t maybe_remove(const struct dentry *const dentry) | |
688 | { | |
689 | if (d_is_negative(dentry)) | |
690 | return 0; | |
691 | return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : | |
692 | LANDLOCK_ACCESS_FS_REMOVE_FILE; | |
693 | } | |
694 | ||
b91c3e4e MS |
695 | /** |
696 | * collect_domain_accesses - Walk through a file path and collect accesses | |
697 | * | |
698 | * @domain: Domain to check against. | |
699 | * @mnt_root: Last directory to check. | |
700 | * @dir: Directory to start the walk from. | |
701 | * @layer_masks_dom: Where to store the collected accesses. | |
702 | * | |
703 | * This helper is useful to begin a path walk from the @dir directory to a | |
704 | * @mnt_root directory used as a mount point. This mount point is the common | |
705 | * ancestor between the source and the destination of a renamed and linked | |
706 | * file. While walking from @dir to @mnt_root, we record all the domain's | |
707 | * allowed accesses in @layer_masks_dom. | |
708 | * | |
106794c4 GN |
709 | * This is similar to is_access_to_paths_allowed() but much simpler because it |
710 | * only handles walking on the same mount point and only checks one set of | |
711 | * accesses. | |
b91c3e4e MS |
712 | * |
713 | * Returns: | |
714 | * - true if all the domain access rights are allowed for @dir; | |
715 | * - false if the walk reached @mnt_root. | |
716 | */ | |
717 | static bool collect_domain_accesses( | |
718 | const struct landlock_ruleset *const domain, | |
719 | const struct dentry *const mnt_root, struct dentry *dir, | |
720 | layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) | |
721 | { | |
722 | unsigned long access_dom; | |
723 | bool ret = false; | |
724 | ||
725 | if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) | |
726 | return true; | |
727 | if (is_nouser_or_private(dir)) | |
728 | return true; | |
729 | ||
730 | access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, | |
731 | layer_masks_dom); | |
732 | ||
733 | dget(dir); | |
734 | while (true) { | |
735 | struct dentry *parent_dentry; | |
736 | ||
737 | /* Gets all layers allowing all domain accesses. */ | |
738 | if (unmask_layers(find_rule(domain, dir), access_dom, | |
739 | layer_masks_dom)) { | |
740 | /* | |
741 | * Stops when all handled accesses are allowed by at | |
742 | * least one rule in each layer. | |
743 | */ | |
744 | ret = true; | |
745 | break; | |
746 | } | |
747 | ||
748 | /* We should not reach a root other than @mnt_root. */ | |
749 | if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) | |
750 | break; | |
751 | ||
752 | parent_dentry = dget_parent(dir); | |
753 | dput(dir); | |
754 | dir = parent_dentry; | |
755 | } | |
756 | dput(dir); | |
757 | return ret; | |
758 | } | |
759 | ||
760 | /** | |
761 | * current_check_refer_path - Check if a rename or link action is allowed | |
762 | * | |
763 | * @old_dentry: File or directory requested to be moved or linked. | |
764 | * @new_dir: Destination parent directory. | |
765 | * @new_dentry: Destination file or directory. | |
766 | * @removable: Sets to true if it is a rename operation. | |
767 | * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. | |
768 | * | |
769 | * Because of its unprivileged constraints, Landlock relies on file hierarchies | |
770 | * (and not only inodes) to tie access rights to files. Being able to link or | |
771 | * rename a file hierarchy brings some challenges. Indeed, moving or linking a | |
772 | * file (i.e. creating a new reference to an inode) can have an impact on the | |
773 | * actions allowed for a set of files if it would change its parent directory | |
774 | * (i.e. reparenting). | |
775 | * | |
776 | * To avoid trivial access right bypasses, Landlock first checks if the file or | |
777 | * directory requested to be moved would gain new access rights inherited from | |
778 | * its new hierarchy. Before returning any error, Landlock then checks that | |
779 | * the parent source hierarchy and the destination hierarchy would allow the | |
780 | * link or rename action. If it is not the case, an error with EACCES is | |
781 | * returned to inform user space that there is no way to remove or create the | |
782 | * requested source file type. If it should be allowed but the new inherited | |
783 | * access rights would be greater than the source access rights, then the | |
784 | * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables | |
785 | * user space to abort the whole operation if there is no way to do it, or to | |
786 | * manually copy the source to the destination if this remains allowed, e.g. | |
787 | * because file creation is allowed on the destination directory but not direct | |
788 | * linking. | |
789 | * | |
790 | * To achieve this goal, the kernel needs to compare two file hierarchies: the | |
791 | * one identifying the source file or directory (including itself), and the | |
792 | * destination one. This can be seen as a multilayer partial ordering problem. | |
793 | * The kernel walks through these paths and collects in a matrix the access | |
794 | * rights that are denied per layer. These matrices are then compared to see | |
795 | * if the destination one has more (or the same) restrictions as the source | |
796 | * one. If this is the case, the requested action will not return EXDEV, which | |
797 | * doesn't mean the action is allowed. The parent hierarchy of the source | |
798 | * (i.e. parent directory), and the destination hierarchy must also be checked | |
799 | * to verify that they explicitly allow such action (i.e. referencing, | |
800 | * creation and potentially removal rights). The kernel implementation is then | |
801 | * required to rely on potentially four matrices of access rights: one for the | |
802 | * source file or directory (i.e. the child), a potentially other one for the | |
803 | * other source/destination (in case of RENAME_EXCHANGE), one for the source | |
804 | * parent hierarchy and a last one for the destination hierarchy. These | |
805 | * ephemeral matrices take some space on the stack, which limits the number of | |
806 | * layers to a deemed reasonable number: 16. | |
807 | * | |
808 | * Returns: | |
809 | * - 0 if access is allowed; | |
810 | * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; | |
811 | * - -EACCES if file removal or creation is denied. | |
812 | */ | |
813 | static int current_check_refer_path(struct dentry *const old_dentry, | |
814 | const struct path *const new_dir, | |
815 | struct dentry *const new_dentry, | |
816 | const bool removable, const bool exchange) | |
817 | { | |
818 | const struct landlock_ruleset *const dom = | |
819 | landlock_get_current_domain(); | |
820 | bool allow_parent1, allow_parent2; | |
821 | access_mask_t access_request_parent1, access_request_parent2; | |
822 | struct path mnt_dir; | |
823 | layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], | |
824 | layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; | |
825 | ||
826 | if (!dom) | |
827 | return 0; | |
828 | if (WARN_ON_ONCE(dom->num_layers < 1)) | |
829 | return -EACCES; | |
830 | if (unlikely(d_is_negative(old_dentry))) | |
831 | return -ENOENT; | |
832 | if (exchange) { | |
833 | if (unlikely(d_is_negative(new_dentry))) | |
834 | return -ENOENT; | |
835 | access_request_parent1 = | |
836 | get_mode_access(d_backing_inode(new_dentry)->i_mode); | |
837 | } else { | |
838 | access_request_parent1 = 0; | |
839 | } | |
840 | access_request_parent2 = | |
841 | get_mode_access(d_backing_inode(old_dentry)->i_mode); | |
842 | if (removable) { | |
843 | access_request_parent1 |= maybe_remove(old_dentry); | |
844 | access_request_parent2 |= maybe_remove(new_dentry); | |
845 | } | |
846 | ||
847 | /* The mount points are the same for old and new paths, cf. EXDEV. */ | |
848 | if (old_dentry->d_parent == new_dir->dentry) { | |
849 | /* | |
850 | * The LANDLOCK_ACCESS_FS_REFER access right is not required | |
851 | * for same-directory referer (i.e. no reparenting). | |
852 | */ | |
853 | access_request_parent1 = init_layer_masks( | |
854 | dom, access_request_parent1 | access_request_parent2, | |
855 | &layer_masks_parent1); | |
106794c4 GN |
856 | if (is_access_to_paths_allowed( |
857 | dom, new_dir, access_request_parent1, | |
858 | &layer_masks_parent1, NULL, 0, NULL, NULL)) | |
859 | return 0; | |
860 | return -EACCES; | |
b91c3e4e MS |
861 | } |
862 | ||
b91c3e4e MS |
863 | access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; |
864 | access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; | |
865 | ||
866 | /* Saves the common mount point. */ | |
867 | mnt_dir.mnt = new_dir->mnt; | |
868 | mnt_dir.dentry = new_dir->mnt->mnt_root; | |
869 | ||
870 | /* new_dir->dentry is equal to new_dentry->d_parent */ | |
871 | allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, | |
872 | old_dentry->d_parent, | |
873 | &layer_masks_parent1); | |
874 | allow_parent2 = collect_domain_accesses( | |
875 | dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); | |
876 | ||
877 | if (allow_parent1 && allow_parent2) | |
878 | return 0; | |
879 | ||
880 | /* | |
881 | * To be able to compare source and destination domain access rights, | |
882 | * take into account the @old_dentry access rights aggregated with its | |
883 | * parent access rights. This will be useful to compare with the | |
884 | * destination parent access rights. | |
885 | */ | |
106794c4 GN |
886 | if (is_access_to_paths_allowed( |
887 | dom, &mnt_dir, access_request_parent1, &layer_masks_parent1, | |
888 | old_dentry, access_request_parent2, &layer_masks_parent2, | |
889 | exchange ? new_dentry : NULL)) | |
890 | return 0; | |
891 | ||
892 | /* | |
893 | * This prioritizes EACCES over EXDEV for all actions, including | |
894 | * renames with RENAME_EXCHANGE. | |
895 | */ | |
896 | if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || | |
897 | is_eacces(&layer_masks_parent2, access_request_parent2))) | |
898 | return -EACCES; | |
899 | ||
900 | /* | |
901 | * Gracefully forbids reparenting if the destination directory | |
902 | * hierarchy is not a superset of restrictions of the source directory | |
903 | * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the | |
904 | * source or the destination. | |
905 | */ | |
906 | return -EXDEV; | |
b91c3e4e MS |
907 | } |
908 | ||
cb2c7d1a MS |
909 | /* Inode hooks */ |
910 | ||
911 | static void hook_inode_free_security(struct inode *const inode) | |
912 | { | |
913 | /* | |
914 | * All inodes must already have been untied from their object by | |
915 | * release_inode() or hook_sb_delete(). | |
916 | */ | |
917 | WARN_ON_ONCE(landlock_inode(inode)->object); | |
918 | } | |
919 | ||
920 | /* Super-block hooks */ | |
921 | ||
922 | /* | |
923 | * Release the inodes used in a security policy. | |
924 | * | |
925 | * Cf. fsnotify_unmount_inodes() and invalidate_inodes() | |
926 | */ | |
927 | static void hook_sb_delete(struct super_block *const sb) | |
928 | { | |
929 | struct inode *inode, *prev_inode = NULL; | |
930 | ||
931 | if (!landlock_initialized) | |
932 | return; | |
933 | ||
934 | spin_lock(&sb->s_inode_list_lock); | |
935 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | |
936 | struct landlock_object *object; | |
937 | ||
938 | /* Only handles referenced inodes. */ | |
939 | if (!atomic_read(&inode->i_count)) | |
940 | continue; | |
941 | ||
942 | /* | |
943 | * Protects against concurrent modification of inode (e.g. | |
944 | * from get_inode_object()). | |
945 | */ | |
946 | spin_lock(&inode->i_lock); | |
947 | /* | |
948 | * Checks I_FREEING and I_WILL_FREE to protect against a race | |
949 | * condition when release_inode() just called iput(), which | |
950 | * could lead to a NULL dereference of inode->security or a | |
951 | * second call to iput() for the same Landlock object. Also | |
952 | * checks I_NEW because such inode cannot be tied to an object. | |
953 | */ | |
954 | if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { | |
955 | spin_unlock(&inode->i_lock); | |
956 | continue; | |
957 | } | |
958 | ||
959 | rcu_read_lock(); | |
960 | object = rcu_dereference(landlock_inode(inode)->object); | |
961 | if (!object) { | |
962 | rcu_read_unlock(); | |
963 | spin_unlock(&inode->i_lock); | |
964 | continue; | |
965 | } | |
966 | /* Keeps a reference to this inode until the next loop walk. */ | |
967 | __iget(inode); | |
968 | spin_unlock(&inode->i_lock); | |
969 | ||
970 | /* | |
971 | * If there is no concurrent release_inode() ongoing, then we | |
972 | * are in charge of calling iput() on this inode, otherwise we | |
973 | * will just wait for it to finish. | |
974 | */ | |
975 | spin_lock(&object->lock); | |
976 | if (object->underobj == inode) { | |
977 | object->underobj = NULL; | |
978 | spin_unlock(&object->lock); | |
979 | rcu_read_unlock(); | |
980 | ||
981 | /* | |
982 | * Because object->underobj was not NULL, | |
983 | * release_inode() and get_inode_object() guarantee | |
984 | * that it is safe to reset | |
985 | * landlock_inode(inode)->object while it is not NULL. | |
986 | * It is therefore not necessary to lock inode->i_lock. | |
987 | */ | |
988 | rcu_assign_pointer(landlock_inode(inode)->object, NULL); | |
989 | /* | |
990 | * At this point, we own the ihold() reference that was | |
991 | * originally set up by get_inode_object() and the | |
992 | * __iget() reference that we just set in this loop | |
993 | * walk. Therefore the following call to iput() will | |
994 | * not sleep nor drop the inode because there is now at | |
995 | * least two references to it. | |
996 | */ | |
997 | iput(inode); | |
998 | } else { | |
999 | spin_unlock(&object->lock); | |
1000 | rcu_read_unlock(); | |
1001 | } | |
1002 | ||
1003 | if (prev_inode) { | |
1004 | /* | |
1005 | * At this point, we still own the __iget() reference | |
1006 | * that we just set in this loop walk. Therefore we | |
1007 | * can drop the list lock and know that the inode won't | |
1008 | * disappear from under us until the next loop walk. | |
1009 | */ | |
1010 | spin_unlock(&sb->s_inode_list_lock); | |
1011 | /* | |
1012 | * We can now actually put the inode reference from the | |
1013 | * previous loop walk, which is not needed anymore. | |
1014 | */ | |
1015 | iput(prev_inode); | |
1016 | cond_resched(); | |
1017 | spin_lock(&sb->s_inode_list_lock); | |
1018 | } | |
1019 | prev_inode = inode; | |
1020 | } | |
1021 | spin_unlock(&sb->s_inode_list_lock); | |
1022 | ||
1023 | /* Puts the inode reference from the last loop walk, if any. */ | |
1024 | if (prev_inode) | |
1025 | iput(prev_inode); | |
1026 | /* Waits for pending iput() in release_inode(). */ | |
06a1c40a MS |
1027 | wait_var_event(&landlock_superblock(sb)->inode_refs, |
1028 | !atomic_long_read(&landlock_superblock(sb)->inode_refs)); | |
cb2c7d1a MS |
1029 | } |
1030 | ||
1031 | /* | |
1032 | * Because a Landlock security policy is defined according to the filesystem | |
1033 | * topology (i.e. the mount namespace), changing it may grant access to files | |
1034 | * not previously allowed. | |
1035 | * | |
1036 | * To make it simple, deny any filesystem topology modification by landlocked | |
1037 | * processes. Non-landlocked processes may still change the namespace of a | |
1038 | * landlocked process, but this kind of threat must be handled by a system-wide | |
1039 | * access-control security policy. | |
1040 | * | |
1041 | * This could be lifted in the future if Landlock can safely handle mount | |
1042 | * namespace updates requested by a landlocked process. Indeed, we could | |
1043 | * update the current domain (which is currently read-only) by taking into | |
1044 | * account the accesses of the source and the destination of a new mount point. | |
1045 | * However, it would also require to make all the child domains dynamically | |
1046 | * inherit these new constraints. Anyway, for backward compatibility reasons, | |
1047 | * a dedicated user space option would be required (e.g. as a ruleset flag). | |
1048 | */ | |
1049 | static int hook_sb_mount(const char *const dev_name, | |
06a1c40a MS |
1050 | const struct path *const path, const char *const type, |
1051 | const unsigned long flags, void *const data) | |
cb2c7d1a MS |
1052 | { |
1053 | if (!landlock_get_current_domain()) | |
1054 | return 0; | |
1055 | return -EPERM; | |
1056 | } | |
1057 | ||
1058 | static int hook_move_mount(const struct path *const from_path, | |
06a1c40a | 1059 | const struct path *const to_path) |
cb2c7d1a MS |
1060 | { |
1061 | if (!landlock_get_current_domain()) | |
1062 | return 0; | |
1063 | return -EPERM; | |
1064 | } | |
1065 | ||
1066 | /* | |
1067 | * Removing a mount point may reveal a previously hidden file hierarchy, which | |
1068 | * may then grant access to files, which may have previously been forbidden. | |
1069 | */ | |
1070 | static int hook_sb_umount(struct vfsmount *const mnt, const int flags) | |
1071 | { | |
1072 | if (!landlock_get_current_domain()) | |
1073 | return 0; | |
1074 | return -EPERM; | |
1075 | } | |
1076 | ||
1077 | static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) | |
1078 | { | |
1079 | if (!landlock_get_current_domain()) | |
1080 | return 0; | |
1081 | return -EPERM; | |
1082 | } | |
1083 | ||
1084 | /* | |
1085 | * pivot_root(2), like mount(2), changes the current mount namespace. It must | |
1086 | * then be forbidden for a landlocked process. | |
1087 | * | |
1088 | * However, chroot(2) may be allowed because it only changes the relative root | |
1089 | * directory of the current process. Moreover, it can be used to restrict the | |
1090 | * view of the filesystem. | |
1091 | */ | |
1092 | static int hook_sb_pivotroot(const struct path *const old_path, | |
06a1c40a | 1093 | const struct path *const new_path) |
cb2c7d1a MS |
1094 | { |
1095 | if (!landlock_get_current_domain()) | |
1096 | return 0; | |
1097 | return -EPERM; | |
1098 | } | |
1099 | ||
1100 | /* Path hooks */ | |
1101 | ||
cb2c7d1a | 1102 | static int hook_path_link(struct dentry *const old_dentry, |
06a1c40a MS |
1103 | const struct path *const new_dir, |
1104 | struct dentry *const new_dentry) | |
cb2c7d1a | 1105 | { |
b91c3e4e MS |
1106 | return current_check_refer_path(old_dentry, new_dir, new_dentry, false, |
1107 | false); | |
cb2c7d1a MS |
1108 | } |
1109 | ||
cb2c7d1a | 1110 | static int hook_path_rename(const struct path *const old_dir, |
06a1c40a MS |
1111 | struct dentry *const old_dentry, |
1112 | const struct path *const new_dir, | |
100f59d9 MS |
1113 | struct dentry *const new_dentry, |
1114 | const unsigned int flags) | |
cb2c7d1a | 1115 | { |
b91c3e4e MS |
1116 | /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ |
1117 | return current_check_refer_path(old_dentry, new_dir, new_dentry, true, | |
1118 | !!(flags & RENAME_EXCHANGE)); | |
cb2c7d1a MS |
1119 | } |
1120 | ||
1121 | static int hook_path_mkdir(const struct path *const dir, | |
06a1c40a | 1122 | struct dentry *const dentry, const umode_t mode) |
cb2c7d1a MS |
1123 | { |
1124 | return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); | |
1125 | } | |
1126 | ||
1127 | static int hook_path_mknod(const struct path *const dir, | |
06a1c40a MS |
1128 | struct dentry *const dentry, const umode_t mode, |
1129 | const unsigned int dev) | |
cb2c7d1a MS |
1130 | { |
1131 | const struct landlock_ruleset *const dom = | |
1132 | landlock_get_current_domain(); | |
1133 | ||
1134 | if (!dom) | |
1135 | return 0; | |
1136 | return check_access_path(dom, dir, get_mode_access(mode)); | |
1137 | } | |
1138 | ||
1139 | static int hook_path_symlink(const struct path *const dir, | |
06a1c40a MS |
1140 | struct dentry *const dentry, |
1141 | const char *const old_name) | |
cb2c7d1a MS |
1142 | { |
1143 | return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); | |
1144 | } | |
1145 | ||
1146 | static int hook_path_unlink(const struct path *const dir, | |
06a1c40a | 1147 | struct dentry *const dentry) |
cb2c7d1a MS |
1148 | { |
1149 | return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); | |
1150 | } | |
1151 | ||
1152 | static int hook_path_rmdir(const struct path *const dir, | |
06a1c40a | 1153 | struct dentry *const dentry) |
cb2c7d1a MS |
1154 | { |
1155 | return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); | |
1156 | } | |
1157 | ||
b9f5ce27 GN |
1158 | static int hook_path_truncate(const struct path *const path) |
1159 | { | |
1160 | return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); | |
1161 | } | |
1162 | ||
cb2c7d1a MS |
1163 | /* File hooks */ |
1164 | ||
b9f5ce27 GN |
1165 | /** |
1166 | * get_required_file_open_access - Get access needed to open a file | |
1167 | * | |
1168 | * @file: File being opened. | |
1169 | * | |
1170 | * Returns the access rights that are required for opening the given file, | |
1171 | * depending on the file type and open mode. | |
1172 | */ | |
1173 | static inline access_mask_t | |
1174 | get_required_file_open_access(const struct file *const file) | |
cb2c7d1a | 1175 | { |
5f2ff33e | 1176 | access_mask_t access = 0; |
cb2c7d1a MS |
1177 | |
1178 | if (file->f_mode & FMODE_READ) { | |
1179 | /* A directory can only be opened in read mode. */ | |
1180 | if (S_ISDIR(file_inode(file)->i_mode)) | |
1181 | return LANDLOCK_ACCESS_FS_READ_DIR; | |
1182 | access = LANDLOCK_ACCESS_FS_READ_FILE; | |
1183 | } | |
1184 | if (file->f_mode & FMODE_WRITE) | |
1185 | access |= LANDLOCK_ACCESS_FS_WRITE_FILE; | |
1186 | /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ | |
1187 | if (file->f_flags & __FMODE_EXEC) | |
1188 | access |= LANDLOCK_ACCESS_FS_EXECUTE; | |
1189 | return access; | |
1190 | } | |
1191 | ||
b9f5ce27 GN |
1192 | static int hook_file_alloc_security(struct file *const file) |
1193 | { | |
1194 | /* | |
1195 | * Grants all access rights, even if most of them are not checked later | |
1196 | * on. It is more consistent. | |
1197 | * | |
1198 | * Notably, file descriptors for regular files can also be acquired | |
1199 | * without going through the file_open hook, for example when using | |
1200 | * memfd_create(2). | |
1201 | */ | |
1202 | landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; | |
1203 | return 0; | |
1204 | } | |
1205 | ||
cb2c7d1a MS |
1206 | static int hook_file_open(struct file *const file) |
1207 | { | |
b9f5ce27 GN |
1208 | layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; |
1209 | access_mask_t open_access_request, full_access_request, allowed_access; | |
1210 | const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; | |
cb2c7d1a MS |
1211 | const struct landlock_ruleset *const dom = |
1212 | landlock_get_current_domain(); | |
1213 | ||
1214 | if (!dom) | |
1215 | return 0; | |
b9f5ce27 | 1216 | |
cb2c7d1a | 1217 | /* |
b9f5ce27 GN |
1218 | * Because a file may be opened with O_PATH, get_required_file_open_access() |
1219 | * may return 0. This case will be handled with a future Landlock | |
cb2c7d1a MS |
1220 | * evolution. |
1221 | */ | |
b9f5ce27 GN |
1222 | open_access_request = get_required_file_open_access(file); |
1223 | ||
1224 | /* | |
1225 | * We look up more access than what we immediately need for open(), so | |
1226 | * that we can later authorize operations on opened files. | |
1227 | */ | |
1228 | full_access_request = open_access_request | optional_access; | |
1229 | ||
1230 | if (is_access_to_paths_allowed( | |
1231 | dom, &file->f_path, | |
1232 | init_layer_masks(dom, full_access_request, &layer_masks), | |
1233 | &layer_masks, NULL, 0, NULL, NULL)) { | |
1234 | allowed_access = full_access_request; | |
1235 | } else { | |
1236 | unsigned long access_bit; | |
1237 | const unsigned long access_req = full_access_request; | |
1238 | ||
1239 | /* | |
1240 | * Calculate the actual allowed access rights from layer_masks. | |
1241 | * Add each access right to allowed_access which has not been | |
1242 | * vetoed by any layer. | |
1243 | */ | |
1244 | allowed_access = 0; | |
1245 | for_each_set_bit(access_bit, &access_req, | |
1246 | ARRAY_SIZE(layer_masks)) { | |
1247 | if (!layer_masks[access_bit]) | |
1248 | allowed_access |= BIT_ULL(access_bit); | |
1249 | } | |
1250 | } | |
1251 | ||
1252 | /* | |
1253 | * For operations on already opened files (i.e. ftruncate()), it is the | |
1254 | * access rights at the time of open() which decide whether the | |
1255 | * operation is permitted. Therefore, we record the relevant subset of | |
1256 | * file access rights in the opened struct file. | |
1257 | */ | |
1258 | landlock_file(file)->allowed_access = allowed_access; | |
1259 | ||
1260 | if ((open_access_request & allowed_access) == open_access_request) | |
1261 | return 0; | |
1262 | ||
1263 | return -EACCES; | |
1264 | } | |
1265 | ||
1266 | static int hook_file_truncate(struct file *const file) | |
1267 | { | |
1268 | /* | |
1269 | * Allows truncation if the truncate right was available at the time of | |
1270 | * opening the file, to get a consistent access check as for read, write | |
1271 | * and execute operations. | |
1272 | * | |
1273 | * Note: For checks done based on the file's Landlock allowed access, we | |
1274 | * enforce them independently of whether the current thread is in a | |
1275 | * Landlock domain, so that open files passed between independent | |
1276 | * processes retain their behaviour. | |
1277 | */ | |
1278 | if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) | |
1279 | return 0; | |
1280 | return -EACCES; | |
cb2c7d1a MS |
1281 | } |
1282 | ||
1283 | static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { | |
1284 | LSM_HOOK_INIT(inode_free_security, hook_inode_free_security), | |
1285 | ||
1286 | LSM_HOOK_INIT(sb_delete, hook_sb_delete), | |
1287 | LSM_HOOK_INIT(sb_mount, hook_sb_mount), | |
1288 | LSM_HOOK_INIT(move_mount, hook_move_mount), | |
1289 | LSM_HOOK_INIT(sb_umount, hook_sb_umount), | |
1290 | LSM_HOOK_INIT(sb_remount, hook_sb_remount), | |
1291 | LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), | |
1292 | ||
1293 | LSM_HOOK_INIT(path_link, hook_path_link), | |
1294 | LSM_HOOK_INIT(path_rename, hook_path_rename), | |
1295 | LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), | |
1296 | LSM_HOOK_INIT(path_mknod, hook_path_mknod), | |
1297 | LSM_HOOK_INIT(path_symlink, hook_path_symlink), | |
1298 | LSM_HOOK_INIT(path_unlink, hook_path_unlink), | |
1299 | LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), | |
b9f5ce27 | 1300 | LSM_HOOK_INIT(path_truncate, hook_path_truncate), |
cb2c7d1a | 1301 | |
b9f5ce27 | 1302 | LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), |
cb2c7d1a | 1303 | LSM_HOOK_INIT(file_open, hook_file_open), |
b9f5ce27 | 1304 | LSM_HOOK_INIT(file_truncate, hook_file_truncate), |
cb2c7d1a MS |
1305 | }; |
1306 | ||
1307 | __init void landlock_add_fs_hooks(void) | |
1308 | { | |
1309 | security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), | |
06a1c40a | 1310 | LANDLOCK_NAME); |
cb2c7d1a | 1311 | } |