btrfs: qgroup: remove outdated TODO comments
[linux-2.6-block.git] / include / linux / fs.h
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
1da177e4
LT
2#ifndef _LINUX_FS_H
3#define _LINUX_FS_H
4
1da177e4 5#include <linux/linkage.h>
5dd43ce2 6#include <linux/wait_bit.h>
1da177e4
LT
7#include <linux/kdev_t.h>
8#include <linux/dcache.h>
3f8206d4 9#include <linux/path.h>
1da177e4
LT
10#include <linux/stat.h>
11#include <linux/cache.h>
1da177e4 12#include <linux/list.h>
bc3b14cb 13#include <linux/list_lru.h>
4f5e65a1 14#include <linux/llist.h>
1da177e4 15#include <linux/radix-tree.h>
b93b0163 16#include <linux/xarray.h>
6b2dbba8 17#include <linux/rbtree.h>
1da177e4 18#include <linux/init.h>
914e2637 19#include <linux/pid.h>
187f1882 20#include <linux/bug.h>
1b1dcc1b 21#include <linux/mutex.h>
c8c06efa 22#include <linux/rwsem.h>
baabda26 23#include <linux/mm_types.h>
3bd858ab 24#include <linux/capability.h>
6188e10d 25#include <linux/semaphore.h>
c75b1d94 26#include <linux/fcntl.h>
ceb5bdc2 27#include <linux/rculist_bl.h>
07b8ce1e 28#include <linux/atomic.h>
83aeeada 29#include <linux/shrinker.h>
c1aab02d 30#include <linux/migrate_mode.h>
92361636 31#include <linux/uidgid.h>
5accdf82 32#include <linux/lockdep.h>
c2b1ad80 33#include <linux/percpu-rwsem.h>
853b39a7 34#include <linux/workqueue.h>
fceef393 35#include <linux/delayed_call.h>
85787090 36#include <linux/uuid.h>
5660e13d 37#include <linux/errseq.h>
d9a08a9e 38#include <linux/ioprio.h>
bbe7449e 39#include <linux/fs_types.h>
f1fffbd4
RV
40#include <linux/build_bug.h>
41#include <linux/stddef.h>
a6435940 42#include <linux/mount.h>
e6c9a714 43#include <linux/cred.h>
a793d79e 44#include <linux/mnt_idmapping.h>
1da177e4 45
1da177e4 46#include <asm/byteorder.h>
607ca46e 47#include <uapi/linux/fs.h>
1da177e4 48
b83ae6d4 49struct backing_dev_info;
52ebea74 50struct bdi_writeback;
2f8b5444 51struct bio;
5a72e899 52struct io_comp_batch;
a5694255 53struct export_operations;
10c5db28 54struct fiemap_extent_info;
a885c8c4 55struct hd_geometry;
1da177e4 56struct iovec;
92198f7e 57struct kiocb;
57cc7215 58struct kobject;
1da177e4
LT
59struct pipe_inode_info;
60struct poll_table_struct;
61struct kstatfs;
62struct vm_area_struct;
63struct vfsmount;
745ca247 64struct cred;
a509bc1a 65struct swap_info_struct;
55985dd7 66struct seq_file;
7b7a8665 67struct workqueue_struct;
3b93f911 68struct iov_iter;
0b81d077
JK
69struct fscrypt_info;
70struct fscrypt_operations;
5585f2af
EB
71struct fsverity_info;
72struct fsverity_operations;
f3a09c92 73struct fs_context;
d7167b14 74struct fs_parameter_spec;
4c5b4799 75struct fileattr;
1da177e4 76
74bf17cf 77extern void __init inode_init(void);
1da177e4 78extern void __init inode_init_early(void);
4248b0da
MG
79extern void __init files_init(void);
80extern void __init files_maxfiles_init(void);
1da177e4 81
518de9b3 82extern unsigned long get_max_files(void);
9b80a184 83extern unsigned int sysctl_nr_open;
dded4f4d 84
ddef7ed2
CH
85typedef __kernel_rwf_t rwf_t;
86
1da177e4
LT
87struct buffer_head;
88typedef int (get_block_t)(struct inode *inode, sector_t iblock,
89 struct buffer_head *bh_result, int create);
187372a3 90typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
7b7a8665 91 ssize_t bytes, void *private);
1da177e4 92
bbc1096a
DH
93#define MAY_EXEC 0x00000001
94#define MAY_WRITE 0x00000002
95#define MAY_READ 0x00000004
96#define MAY_APPEND 0x00000008
97#define MAY_ACCESS 0x00000010
98#define MAY_OPEN 0x00000020
99#define MAY_CHDIR 0x00000040
100/* called from RCU mode, don't block */
101#define MAY_NOT_BLOCK 0x00000080
102
103/*
104 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
75abe329 105 * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
bbc1096a
DH
106 */
107
108/* file is open for reading */
109#define FMODE_READ ((__force fmode_t)0x1)
110/* file is open for writing */
111#define FMODE_WRITE ((__force fmode_t)0x2)
112/* file is seekable */
113#define FMODE_LSEEK ((__force fmode_t)0x4)
114/* file can be accessed using pread */
115#define FMODE_PREAD ((__force fmode_t)0x8)
116/* file can be accessed using pwrite */
117#define FMODE_PWRITE ((__force fmode_t)0x10)
118/* File is opened for execution with sys_execve / sys_uselib */
119#define FMODE_EXEC ((__force fmode_t)0x20)
120/* File is opened with O_NDELAY (only set for block devices) */
121#define FMODE_NDELAY ((__force fmode_t)0x40)
122/* File is opened with O_EXCL (only set for block devices) */
123#define FMODE_EXCL ((__force fmode_t)0x80)
124/* File is opened using open(.., 3, ..) and is writeable only for ioctls
125 (specialy hack for floppy.c) */
126#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100)
127/* 32bit hashes as llseek() offset (for directories) */
128#define FMODE_32BITHASH ((__force fmode_t)0x200)
129/* 64bit hashes as llseek() offset (for directories) */
130#define FMODE_64BITHASH ((__force fmode_t)0x400)
131
132/*
133 * Don't update ctime and mtime.
134 *
135 * Currently a special hack for the XFS open_by_handle ioctl, but we'll
136 * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
137 */
138#define FMODE_NOCMTIME ((__force fmode_t)0x800)
139
140/* Expect random access pattern */
141#define FMODE_RANDOM ((__force fmode_t)0x1000)
142
bbcd53c9 143/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
bbc1096a
DH
144#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
145
146/* File is opened with O_PATH; almost nothing can be done with it */
147#define FMODE_PATH ((__force fmode_t)0x4000)
148
2be7d348
LT
149/* File needs atomic accesses to f_pos */
150#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
83f936c7
AV
151/* Write access to underlying fs */
152#define FMODE_WRITER ((__force fmode_t)0x10000)
7f7f25e8
AV
153/* Has read method(s) */
154#define FMODE_CAN_READ ((__force fmode_t)0x20000)
155/* Has write method(s) */
156#define FMODE_CAN_WRITE ((__force fmode_t)0x40000)
9c225f26 157
f5d11409 158#define FMODE_OPENED ((__force fmode_t)0x80000)
73a09dd9 159#define FMODE_CREATED ((__force fmode_t)0x100000)
f5d11409 160
10dce8af
KS
161/* File is stream-like */
162#define FMODE_STREAM ((__force fmode_t)0x200000)
163
bbc1096a 164/* File was opened by fanotify and shouldn't generate fanotify events */
75069f2b 165#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
bbc1096a 166
91f9943e 167/* File is capable of returning -EAGAIN if I/O will block */
a07b2000
AV
168#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
169
170/* File represents mount that needs unmounting */
171#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000)
b745fafa 172
d3b1084d 173/* File does not contribute to nr_files count */
a07b2000 174#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
d3b1084d 175
c2a25ec0
JA
176/* File supports async buffered reads */
177#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
178
1da177e4
LT
179/*
180 * Attribute flags. These should be or-ed together to figure out what
181 * has been changed!
182 */
9767d749
MS
183#define ATTR_MODE (1 << 0)
184#define ATTR_UID (1 << 1)
185#define ATTR_GID (1 << 2)
186#define ATTR_SIZE (1 << 3)
187#define ATTR_ATIME (1 << 4)
188#define ATTR_MTIME (1 << 5)
189#define ATTR_CTIME (1 << 6)
190#define ATTR_ATIME_SET (1 << 7)
191#define ATTR_MTIME_SET (1 << 8)
192#define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
9767d749
MS
193#define ATTR_KILL_SUID (1 << 11)
194#define ATTR_KILL_SGID (1 << 12)
195#define ATTR_FILE (1 << 13)
196#define ATTR_KILL_PRIV (1 << 14)
197#define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */
198#define ATTR_TIMES_SET (1 << 16)
f2b20f6e 199#define ATTR_TOUCH (1 << 17)
1da177e4 200
787fb6bc
MS
201/*
202 * Whiteout is represented by a char device. The following constants define the
203 * mode and device number to use.
204 */
205#define WHITEOUT_MODE 0
206#define WHITEOUT_DEV 0
207
1da177e4
LT
208/*
209 * This is the Inode Attributes structure, used for notify_change(). It
210 * uses the above definitions as flags, to know which values have changed.
211 * Also, in this manner, a Filesystem can look at only the values it cares
212 * about. Basically, these are the attributes that the VFS layer can
213 * request to change from the FS layer.
214 *
215 * Derek Atkins <warlord@MIT.EDU> 94-10-20
216 */
217struct iattr {
218 unsigned int ia_valid;
219 umode_t ia_mode;
92361636
EB
220 kuid_t ia_uid;
221 kgid_t ia_gid;
1da177e4 222 loff_t ia_size;
95582b00
DD
223 struct timespec64 ia_atime;
224 struct timespec64 ia_mtime;
225 struct timespec64 ia_ctime;
cc4e69de
MS
226
227 /*
25985edc 228 * Not an attribute, but an auxiliary info for filesystems wanting to
cc4e69de
MS
229 * implement an ftruncate() like method. NOTE: filesystem should
230 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
231 */
232 struct file *ia_file;
1da177e4
LT
233};
234
1da177e4
LT
235/*
236 * Includes for diskquotas.
237 */
238#include <linux/quota.h>
239
69c433ed
MS
240/*
241 * Maximum number of layers of fs stack. Needs to be limited to
242 * prevent kernel stack overflow
243 */
244#define FILESYSTEM_MAX_STACK_DEPTH 2
245
994fc28c
ZB
246/**
247 * enum positive_aop_returns - aop return codes with specific semantics
248 *
249 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
250 * completed, that the page is still locked, and
251 * should be considered active. The VM uses this hint
252 * to return the page to the active list -- it won't
253 * be a candidate for writeback again in the near
254 * future. Other callers must be careful to unlock
255 * the page if they get this return. Returned by
256 * writepage();
257 *
258 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
259 * unlocked it and the page might have been truncated.
260 * The caller should back up to acquiring a new page and
261 * trying again. The aop will be taking reasonable
262 * precautions not to livelock. If the caller held a page
263 * reference, it should drop it before retrying. Returned
55144768 264 * by readpage().
994fc28c
ZB
265 *
266 * address_space_operation functions return these large constants to indicate
267 * special semantics to the caller. These are much larger than the bytes in a
268 * page to allow for functions that return the number of bytes operated on in a
269 * given page.
270 */
271
272enum positive_aop_returns {
273 AOP_WRITEPAGE_ACTIVATE = 0x80000,
274 AOP_TRUNCATED_PAGE = 0x80001,
275};
276
c718a975
TH
277#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */
278#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct
54566b2c
NP
279 * helper code (eg buffer layer)
280 * to clear GFP_FS from alloc */
afddba49 281
1da177e4
LT
282/*
283 * oh the beauties of C type declarations.
284 */
285struct page;
286struct address_space;
287struct writeback_control;
8151b4c8 288struct readahead_control;
1da177e4 289
c75b1d94
JA
290/*
291 * Write life time hint values.
69448867 292 * Stored in struct inode as u8.
c75b1d94
JA
293 */
294enum rw_hint {
295 WRITE_LIFE_NOT_SET = 0,
296 WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
297 WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
298 WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
299 WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
300 WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
301};
302
ce71bfea
JA
303/* Match RWF_* bits to IOCB bits */
304#define IOCB_HIPRI (__force int) RWF_HIPRI
305#define IOCB_DSYNC (__force int) RWF_DSYNC
306#define IOCB_SYNC (__force int) RWF_SYNC
307#define IOCB_NOWAIT (__force int) RWF_NOWAIT
308#define IOCB_APPEND (__force int) RWF_APPEND
309
310/* non-RWF related bits - start at 16 */
311#define IOCB_EVENTFD (1 << 16)
312#define IOCB_DIRECT (1 << 17)
313#define IOCB_WRITE (1 << 18)
dd3e6d50 314/* iocb->ki_waitq is valid */
ce71bfea
JA
315#define IOCB_WAITQ (1 << 19)
316#define IOCB_NOIO (1 << 20)
6c7ef543
JA
317/* can use bio alloc cache */
318#define IOCB_ALLOC_CACHE (1 << 21)
e2e40f2c
CH
319
320struct kiocb {
321 struct file *ki_filp;
84c4e1f8
LT
322
323 /* The 'ki_filp' pointer is shared in a union for aio */
324 randomized_struct_fields_start
325
e2e40f2c 326 loff_t ki_pos;
6b19b766 327 void (*ki_complete)(struct kiocb *iocb, long ret);
e2e40f2c
CH
328 void *private;
329 int ki_flags;
fc28724d 330 u16 ki_hint;
d9a08a9e 331 u16 ki_ioprio; /* See linux/ioprio.h */
3e08773c 332 struct wait_page_queue *ki_waitq; /* for async buffered IO */
84c4e1f8
LT
333 randomized_struct_fields_end
334};
e2e40f2c
CH
335
336static inline bool is_sync_kiocb(struct kiocb *kiocb)
337{
338 return kiocb->ki_complete == NULL;
339}
340
8ab22b9a
HH
341/*
342 * "descriptor" for what we're up to with a read.
343 * This allows us to use the same read code yet
344 * have multiple different users of the data that
345 * we read from a file.
346 *
347 * The simplest case just copies the data to user
348 * mode.
349 */
350typedef struct {
351 size_t written;
352 size_t count;
353 union {
354 char __user *buf;
355 void *data;
356 } arg;
357 int error;
358} read_descriptor_t;
359
360typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
361 unsigned long, unsigned long);
2f718ffc 362
1da177e4
LT
363struct address_space_operations {
364 int (*writepage)(struct page *page, struct writeback_control *wbc);
365 int (*readpage)(struct file *, struct page *);
1da177e4
LT
366
367 /* Write back some dirty pages from this mapping. */
368 int (*writepages)(struct address_space *, struct writeback_control *);
369
4741c9fd 370 /* Set a page dirty. Return true if this dirtied it */
1da177e4
LT
371 int (*set_page_dirty)(struct page *page);
372
74c8164e
JA
373 /*
374 * Reads in the requested pages. Unlike ->readpage(), this is
375 * PURELY used for read-ahead!.
376 */
1da177e4
LT
377 int (*readpages)(struct file *filp, struct address_space *mapping,
378 struct list_head *pages, unsigned nr_pages);
8151b4c8 379 void (*readahead)(struct readahead_control *);
1da177e4 380
afddba49
NP
381 int (*write_begin)(struct file *, struct address_space *mapping,
382 loff_t pos, unsigned len, unsigned flags,
383 struct page **pagep, void **fsdata);
384 int (*write_end)(struct file *, struct address_space *mapping,
385 loff_t pos, unsigned len, unsigned copied,
386 struct page *page, void *fsdata);
387
1da177e4
LT
388 /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
389 sector_t (*bmap)(struct address_space *, sector_t);
d47992f8 390 void (*invalidatepage) (struct page *, unsigned int, unsigned int);
27496a8c 391 int (*releasepage) (struct page *, gfp_t);
6072d13c 392 void (*freepage)(struct page *);
c8b8e32d 393 ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
b969c4ab 394 /*
ef277c73
TC
395 * migrate the contents of a page to the specified target. If
396 * migrate_mode is MIGRATE_ASYNC, it must not block.
b969c4ab 397 */
2d1db3b1 398 int (*migratepage) (struct address_space *,
a6bc32b8 399 struct page *, struct page *, enum migrate_mode);
bda807d4
MK
400 bool (*isolate_page)(struct page *, isolate_mode_t);
401 void (*putback_page)(struct page *);
e3db7691 402 int (*launder_page) (struct page *);
c186afb4 403 int (*is_partially_uptodate) (struct page *, unsigned long,
8ab22b9a 404 unsigned long);
b4597226 405 void (*is_dirty_writeback) (struct page *, bool *, bool *);
25718736 406 int (*error_remove_page)(struct address_space *, struct page *);
62c230bc
MG
407
408 /* swapfile support */
a509bc1a
MG
409 int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
410 sector_t *span);
411 void (*swap_deactivate)(struct file *file);
1da177e4
LT
412};
413
7dcda1c9
JA
414extern const struct address_space_operations empty_aops;
415
afddba49
NP
416/*
417 * pagecache_write_begin/pagecache_write_end must be used by general code
418 * to write into the pagecache.
419 */
420int pagecache_write_begin(struct file *, struct address_space *mapping,
421 loff_t pos, unsigned len, unsigned flags,
422 struct page **pagep, void **fsdata);
423
424int pagecache_write_end(struct file *, struct address_space *mapping,
425 loff_t pos, unsigned len, unsigned copied,
426 struct page *page, void *fsdata);
427
eb797a8e
MW
428/**
429 * struct address_space - Contents of a cacheable, mappable object.
430 * @host: Owner, either the inode or the block_device.
431 * @i_pages: Cached pages.
730633f0
JK
432 * @invalidate_lock: Guards coherency between page cache contents and
433 * file offset->disk block mappings in the filesystem during invalidates.
434 * It is also used to block modification of page cache contents through
435 * memory mappings.
eb797a8e
MW
436 * @gfp_mask: Memory allocation flags to use for allocating pages.
437 * @i_mmap_writable: Number of VM_SHARED mappings.
09d91cda 438 * @nr_thps: Number of THPs in the pagecache (non-shmem only).
eb797a8e
MW
439 * @i_mmap: Tree of private and shared mappings.
440 * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
441 * @nrpages: Number of page entries, protected by the i_pages lock.
eb797a8e
MW
442 * @writeback_index: Writeback starts here.
443 * @a_ops: Methods.
444 * @flags: Error bits and flags (AS_*).
445 * @wb_err: The most recent error which has occurred.
446 * @private_lock: For use by the owner of the address_space.
447 * @private_list: For use by the owner of the address_space.
448 * @private_data: For use by the owner of the address_space.
449 */
1da177e4 450struct address_space {
eb797a8e
MW
451 struct inode *host;
452 struct xarray i_pages;
730633f0 453 struct rw_semaphore invalidate_lock;
eb797a8e
MW
454 gfp_t gfp_mask;
455 atomic_t i_mmap_writable;
09d91cda
SL
456#ifdef CONFIG_READ_ONLY_THP_FOR_FS
457 /* number of thp, only for non-shmem files */
458 atomic_t nr_thps;
459#endif
eb797a8e
MW
460 struct rb_root_cached i_mmap;
461 struct rw_semaphore i_mmap_rwsem;
462 unsigned long nrpages;
eb797a8e
MW
463 pgoff_t writeback_index;
464 const struct address_space_operations *a_ops;
465 unsigned long flags;
5660e13d 466 errseq_t wb_err;
eb797a8e
MW
467 spinlock_t private_lock;
468 struct list_head private_list;
469 void *private_data;
3859a271 470} __attribute__((aligned(sizeof(long)))) __randomize_layout;
1da177e4
LT
471 /*
472 * On most architectures that alignment is already the case; but
25985edc 473 * must be enforced here for CRIS, to let the least significant bit
1da177e4
LT
474 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
475 */
1da177e4 476
ff9c745b
MW
477/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
478#define PAGECACHE_TAG_DIRTY XA_MARK_0
479#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
480#define PAGECACHE_TAG_TOWRITE XA_MARK_2
481
1da177e4 482/*
ff9c745b 483 * Returns true if any of the pages in the mapping are marked with the tag.
1da177e4 484 */
ff9c745b
MW
485static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
486{
487 return xa_marked(&mapping->i_pages, tag);
488}
1da177e4 489
8b28f621
DB
490static inline void i_mmap_lock_write(struct address_space *mapping)
491{
c8c06efa 492 down_write(&mapping->i_mmap_rwsem);
8b28f621
DB
493}
494
c0d0381a
MK
495static inline int i_mmap_trylock_write(struct address_space *mapping)
496{
497 return down_write_trylock(&mapping->i_mmap_rwsem);
498}
499
8b28f621
DB
500static inline void i_mmap_unlock_write(struct address_space *mapping)
501{
c8c06efa 502 up_write(&mapping->i_mmap_rwsem);
8b28f621
DB
503}
504
3dec0ba0
DB
505static inline void i_mmap_lock_read(struct address_space *mapping)
506{
507 down_read(&mapping->i_mmap_rwsem);
508}
509
510static inline void i_mmap_unlock_read(struct address_space *mapping)
511{
512 up_read(&mapping->i_mmap_rwsem);
513}
514
34ae204f
MK
515static inline void i_mmap_assert_locked(struct address_space *mapping)
516{
517 lockdep_assert_held(&mapping->i_mmap_rwsem);
518}
519
520static inline void i_mmap_assert_write_locked(struct address_space *mapping)
521{
522 lockdep_assert_held_write(&mapping->i_mmap_rwsem);
523}
524
1da177e4
LT
525/*
526 * Might pages of this file be mapped into userspace?
527 */
528static inline int mapping_mapped(struct address_space *mapping)
529{
f808c13f 530 return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
1da177e4
LT
531}
532
533/*
534 * Might pages of this file have been modified in userspace?
45e55300 535 * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
1da177e4
LT
536 * marks vma as VM_SHARED if it is shared, and the file was opened for
537 * writing i.e. vma may be mprotected writable even if now readonly.
4bb5f5d9
DH
538 *
539 * If i_mmap_writable is negative, no new writable mappings are allowed. You
540 * can only deny writable mappings, if none exists right now.
1da177e4
LT
541 */
542static inline int mapping_writably_mapped(struct address_space *mapping)
543{
4bb5f5d9
DH
544 return atomic_read(&mapping->i_mmap_writable) > 0;
545}
546
547static inline int mapping_map_writable(struct address_space *mapping)
548{
549 return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
550 0 : -EPERM;
551}
552
553static inline void mapping_unmap_writable(struct address_space *mapping)
554{
555 atomic_dec(&mapping->i_mmap_writable);
556}
557
558static inline int mapping_deny_writable(struct address_space *mapping)
559{
560 return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
561 0 : -EBUSY;
562}
563
564static inline void mapping_allow_writable(struct address_space *mapping)
565{
566 atomic_inc(&mapping->i_mmap_writable);
1da177e4
LT
567}
568
569/*
570 * Use sequence counter to get consistent i_size on 32-bit processors.
571 */
572#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
573#include <linux/seqlock.h>
574#define __NEED_I_SIZE_ORDERED
575#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
576#else
577#define i_size_ordered_init(inode) do { } while (0)
578#endif
579
f19d4a8f
AV
580struct posix_acl;
581#define ACL_NOT_CACHED ((void *)(-1))
332f606b
MS
582/*
583 * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
584 * cache the ACL. This also means that ->get_acl() can be called in RCU mode
585 * with the LOOKUP_RCU flag.
586 */
2a3a2a3f 587#define ACL_DONT_CACHE ((void *)(-3))
f19d4a8f 588
b8a7a3a6
AG
589static inline struct posix_acl *
590uncached_acl_sentinel(struct task_struct *task)
591{
592 return (void *)task + 1;
593}
594
595static inline bool
596is_uncached_acl(struct posix_acl *acl)
597{
598 return (long)acl & 1;
599}
600
3ddcd056
LT
601#define IOP_FASTPERM 0x0001
602#define IOP_LOOKUP 0x0002
603#define IOP_NOFOLLOW 0x0004
d0a5b995 604#define IOP_XATTR 0x0008
76fca90e 605#define IOP_DEFAULT_READLINK 0x0010
3ddcd056 606
9dd813c1
JK
607struct fsnotify_mark_connector;
608
3ddcd056
LT
609/*
610 * Keep mostly read-only and often accessed (especially for
611 * the RCU path lookup and 'stat' data) fields at the beginning
612 * of the 'struct inode'
613 */
1da177e4 614struct inode {
44a7d7a8 615 umode_t i_mode;
3ddcd056 616 unsigned short i_opflags;
92361636
EB
617 kuid_t i_uid;
618 kgid_t i_gid;
3ddcd056
LT
619 unsigned int i_flags;
620
621#ifdef CONFIG_FS_POSIX_ACL
622 struct posix_acl *i_acl;
623 struct posix_acl *i_default_acl;
624#endif
625
44a7d7a8
NP
626 const struct inode_operations *i_op;
627 struct super_block *i_sb;
3ddcd056 628 struct address_space *i_mapping;
44a7d7a8 629
13e12d14
LT
630#ifdef CONFIG_SECURITY
631 void *i_security;
632#endif
44a7d7a8 633
3ddcd056
LT
634 /* Stat data, not accessed from path walking */
635 unsigned long i_ino;
a78ef704
MS
636 /*
637 * Filesystems may only read i_nlink directly. They shall use the
638 * following functions for modification:
639 *
640 * (set|clear|inc|drop)_nlink
641 * inode_(inc|dec)_link_count
642 */
643 union {
644 const unsigned int i_nlink;
645 unsigned int __i_nlink;
646 };
3ddcd056 647 dev_t i_rdev;
2f9d3df8 648 loff_t i_size;
95582b00
DD
649 struct timespec64 i_atime;
650 struct timespec64 i_mtime;
651 struct timespec64 i_ctime;
6cdbb0ef
TT
652 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
653 unsigned short i_bytes;
69448867
AG
654 u8 i_blkbits;
655 u8 i_write_hint;
3ddcd056
LT
656 blkcnt_t i_blocks;
657
658#ifdef __NEED_I_SIZE_ORDERED
659 seqcount_t i_size_seqcount;
660#endif
661
662 /* Misc */
663 unsigned long i_state;
9902af79 664 struct rw_semaphore i_rwsem;
13e12d14 665
44a7d7a8 666 unsigned long dirtied_when; /* jiffies of first dirtying */
a2f48706 667 unsigned long dirtied_time_when;
44a7d7a8 668
1da177e4 669 struct hlist_node i_hash;
c7f54084 670 struct list_head i_io_list; /* backing dev IO list */
52ebea74
TH
671#ifdef CONFIG_CGROUP_WRITEBACK
672 struct bdi_writeback *i_wb; /* the associated cgroup wb */
2a814908
TH
673
674 /* foreign inode detection, see wbc_detach_inode() */
675 int i_wb_frn_winner;
676 u16 i_wb_frn_avg_time;
677 u16 i_wb_frn_history;
52ebea74 678#endif
7ccf19a8 679 struct list_head i_lru; /* inode LRU list */
1da177e4 680 struct list_head i_sb_list;
6c60d2b5 681 struct list_head i_wb_list; /* backing dev writeback list */
fa0d7e3d 682 union {
b3d9b7a3 683 struct hlist_head i_dentry;
fa0d7e3d
NP
684 struct rcu_head i_rcu;
685 };
f02a9ad1 686 atomic64_t i_version;
8019ad13 687 atomic64_t i_sequence; /* see futex */
2f9d3df8 688 atomic_t i_count;
bd5fe6c5 689 atomic_t i_dio_count;
6cdbb0ef 690 atomic_t i_writecount;
387e3746 691#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
d984ea60
MZ
692 atomic_t i_readcount; /* struct files open RO */
693#endif
fdb0da89
AV
694 union {
695 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
696 void (*free_inode)(struct inode *);
697 };
4a075e39 698 struct file_lock_context *i_flctx;
1da177e4 699 struct address_space i_data;
1da177e4 700 struct list_head i_devices;
4c154168
TT
701 union {
702 struct pipe_inode_info *i_pipe;
577c4eb0 703 struct cdev *i_cdev;
61ba64fc 704 char *i_link;
84e710da 705 unsigned i_dir_seq;
4c154168 706 };
1da177e4
LT
707
708 __u32 i_generation;
709
3be25f49
EP
710#ifdef CONFIG_FSNOTIFY
711 __u32 i_fsnotify_mask; /* all events this inode cares about */
08991e83 712 struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
0eeca283
RL
713#endif
714
643fa961 715#ifdef CONFIG_FS_ENCRYPTION
0b81d077
JK
716 struct fscrypt_info *i_crypt_info;
717#endif
718
5585f2af
EB
719#ifdef CONFIG_FS_VERITY
720 struct fsverity_info *i_verity_info;
721#endif
722
8e18e294 723 void *i_private; /* fs or device private pointer */
3859a271 724} __randomize_layout;
1da177e4 725
50e17c00
DD
726struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
727
93407472
FF
728static inline unsigned int i_blocksize(const struct inode *node)
729{
730 return (1 << node->i_blkbits);
731}
732
1d3382cb
AV
733static inline int inode_unhashed(struct inode *inode)
734{
735 return hlist_unhashed(&inode->i_hash);
736}
737
5bef9151
AV
738/*
739 * __mark_inode_dirty expects inodes to be hashed. Since we don't
740 * want special inodes in the fileset inode space, we make them
741 * appear hashed, but do not put on any lists. hlist_del()
742 * will work fine and require no locking.
743 */
744static inline void inode_fake_hash(struct inode *inode)
745{
746 hlist_add_fake(&inode->i_hash);
747}
748
f2eace23
IM
749/*
750 * inode->i_mutex nesting subclasses for the lock validator:
751 *
752 * 0: the object of the current VFS operation
753 * 1: parent
754 * 2: child/target
40bd22c9
BF
755 * 3: xattr
756 * 4: second non-directory
d1b72cc6
MS
757 * 5: second parent (when locking independent directories in rename)
758 *
759 * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
40bd22c9 760 * non-directories at once.
f2eace23
IM
761 *
762 * The locking order between these classes is
d1b72cc6 763 * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
f2eace23
IM
764 */
765enum inode_i_mutex_lock_class
766{
767 I_MUTEX_NORMAL,
768 I_MUTEX_PARENT,
769 I_MUTEX_CHILD,
4df46240 770 I_MUTEX_XATTR,
d1b72cc6
MS
771 I_MUTEX_NONDIR2,
772 I_MUTEX_PARENT2,
f2eace23
IM
773};
774
5955102c
AV
775static inline void inode_lock(struct inode *inode)
776{
9902af79 777 down_write(&inode->i_rwsem);
5955102c
AV
778}
779
780static inline void inode_unlock(struct inode *inode)
781{
9902af79
AV
782 up_write(&inode->i_rwsem);
783}
784
785static inline void inode_lock_shared(struct inode *inode)
786{
787 down_read(&inode->i_rwsem);
788}
789
790static inline void inode_unlock_shared(struct inode *inode)
791{
792 up_read(&inode->i_rwsem);
5955102c
AV
793}
794
795static inline int inode_trylock(struct inode *inode)
796{
9902af79
AV
797 return down_write_trylock(&inode->i_rwsem);
798}
799
800static inline int inode_trylock_shared(struct inode *inode)
801{
802 return down_read_trylock(&inode->i_rwsem);
5955102c
AV
803}
804
805static inline int inode_is_locked(struct inode *inode)
806{
9902af79 807 return rwsem_is_locked(&inode->i_rwsem);
5955102c
AV
808}
809
810static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
811{
9902af79 812 down_write_nested(&inode->i_rwsem, subclass);
5955102c
AV
813}
814
01c2e13d
DW
815static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass)
816{
817 down_read_nested(&inode->i_rwsem, subclass);
818}
819
730633f0
JK
820static inline void filemap_invalidate_lock(struct address_space *mapping)
821{
822 down_write(&mapping->invalidate_lock);
823}
824
825static inline void filemap_invalidate_unlock(struct address_space *mapping)
826{
827 up_write(&mapping->invalidate_lock);
828}
829
830static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
831{
832 down_read(&mapping->invalidate_lock);
833}
834
835static inline int filemap_invalidate_trylock_shared(
836 struct address_space *mapping)
837{
838 return down_read_trylock(&mapping->invalidate_lock);
839}
840
841static inline void filemap_invalidate_unlock_shared(
842 struct address_space *mapping)
843{
844 up_read(&mapping->invalidate_lock);
845}
846
375e289e
BF
847void lock_two_nondirectories(struct inode *, struct inode*);
848void unlock_two_nondirectories(struct inode *, struct inode*);
849
7506ae6a
JK
850void filemap_invalidate_lock_two(struct address_space *mapping1,
851 struct address_space *mapping2);
852void filemap_invalidate_unlock_two(struct address_space *mapping1,
853 struct address_space *mapping2);
854
855
1da177e4
LT
856/*
857 * NOTE: in a 32bit arch with a preemptable kernel and
858 * an UP compile the i_size_read/write must be atomic
859 * with respect to the local cpu (unlike with preempt disabled),
860 * but they don't need to be atomic with respect to other cpus like in
861 * true SMP (so they need either to either locally disable irq around
862 * the read or for example on x86 they can be still implemented as a
863 * cmpxchg8b without the need of the lock prefix). For SMP compiles
864 * and 64bit archs it makes no difference if preempt is enabled or not.
865 */
48ed214d 866static inline loff_t i_size_read(const struct inode *inode)
1da177e4
LT
867{
868#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
869 loff_t i_size;
870 unsigned int seq;
871
872 do {
873 seq = read_seqcount_begin(&inode->i_size_seqcount);
874 i_size = inode->i_size;
875 } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
876 return i_size;
2496396f 877#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
1da177e4
LT
878 loff_t i_size;
879
880 preempt_disable();
881 i_size = inode->i_size;
882 preempt_enable();
883 return i_size;
884#else
885 return inode->i_size;
886#endif
887}
888
7762f5a0
MS
889/*
890 * NOTE: unlike i_size_read(), i_size_write() does need locking around it
891 * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
892 * can be lost, resulting in subsequent i_size_read() calls spinning forever.
893 */
1da177e4
LT
894static inline void i_size_write(struct inode *inode, loff_t i_size)
895{
896#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
74e3d1e1 897 preempt_disable();
1da177e4
LT
898 write_seqcount_begin(&inode->i_size_seqcount);
899 inode->i_size = i_size;
900 write_seqcount_end(&inode->i_size_seqcount);
74e3d1e1 901 preempt_enable();
2496396f 902#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
1da177e4
LT
903 preempt_disable();
904 inode->i_size = i_size;
905 preempt_enable();
906#else
907 inode->i_size = i_size;
908#endif
909}
910
48ed214d 911static inline unsigned iminor(const struct inode *inode)
1da177e4
LT
912{
913 return MINOR(inode->i_rdev);
914}
915
48ed214d 916static inline unsigned imajor(const struct inode *inode)
1da177e4
LT
917{
918 return MAJOR(inode->i_rdev);
919}
920
1da177e4
LT
921struct fown_struct {
922 rwlock_t lock; /* protects pid, uid, euid fields */
609d7fa9
EB
923 struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
924 enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
92361636 925 kuid_t uid, euid; /* uid/euid of process setting the owner */
1da177e4
LT
926 int signum; /* posix.1b rt signal to be delivered on IO */
927};
928
c790fbf2
MWO
929/**
930 * struct file_ra_state - Track a file's readahead state.
931 * @start: Where the most recent readahead started.
932 * @size: Number of pages read in the most recent readahead.
933 * @async_size: Start next readahead when this many pages are left.
934 * @ra_pages: Maximum size of a readahead request.
935 * @mmap_miss: How many mmap accesses missed in the page cache.
936 * @prev_pos: The last byte in the most recent read request.
1da177e4
LT
937 */
938struct file_ra_state {
c790fbf2
MWO
939 pgoff_t start;
940 unsigned int size;
941 unsigned int async_size;
942 unsigned int ra_pages;
943 unsigned int mmap_miss;
944 loff_t prev_pos;
1da177e4 945};
1da177e4 946
5ce1110b
FW
947/*
948 * Check if @index falls in the readahead windows.
949 */
950static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
951{
f9acc8c7
FW
952 return (index >= ra->start &&
953 index < ra->start + ra->size);
5ce1110b
FW
954}
955
1da177e4 956struct file {
2f512016 957 union {
4f5e65a1 958 struct llist_node fu_llist;
2f512016
ED
959 struct rcu_head fu_rcuhead;
960 } f_u;
0f7fc9e4 961 struct path f_path;
dd37978c 962 struct inode *f_inode; /* cached value */
99ac48f5 963 const struct file_operations *f_op;
ef3d0fd2
AK
964
965 /*
319c1517 966 * Protects f_ep, f_flags.
ef3d0fd2
AK
967 * Must not be taken from IRQ context.
968 */
969 spinlock_t f_lock;
c75b1d94 970 enum rw_hint f_write_hint;
516e0cc5 971 atomic_long_t f_count;
1da177e4 972 unsigned int f_flags;
aeb5d727 973 fmode_t f_mode;
9c225f26 974 struct mutex f_pos_lock;
1da177e4
LT
975 loff_t f_pos;
976 struct fown_struct f_owner;
d76b0d9b 977 const struct cred *f_cred;
1da177e4
LT
978 struct file_ra_state f_ra;
979
2b47c361 980 u64 f_version;
50462062 981#ifdef CONFIG_SECURITY
1da177e4 982 void *f_security;
50462062 983#endif
1da177e4
LT
984 /* needed for tty driver, and maybe others */
985 void *private_data;
986
987#ifdef CONFIG_EPOLL
988 /* Used by fs/eventpoll.c to link all the hooks to this file */
319c1517 989 struct hlist_head *f_ep;
1da177e4
LT
990#endif /* #ifdef CONFIG_EPOLL */
991 struct address_space *f_mapping;
5660e13d 992 errseq_t f_wb_err;
735e4ae5 993 errseq_t f_sb_err; /* for syncfs */
3859a271
KC
994} __randomize_layout
995 __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
1da177e4 996
990d6c2d
AK
997struct file_handle {
998 __u32 handle_bytes;
999 int handle_type;
1000 /* file identifier */
9d82973e 1001 unsigned char f_handle[];
990d6c2d
AK
1002};
1003
cb0942b8
AV
1004static inline struct file *get_file(struct file *f)
1005{
1006 atomic_long_inc(&f->f_count);
1007 return f;
1008}
091141a4
JA
1009#define get_file_rcu_many(x, cnt) \
1010 atomic_long_add_unless(&(x)->f_count, (cnt), 0)
1011#define get_file_rcu(x) get_file_rcu_many((x), 1)
516e0cc5 1012#define file_count(x) atomic_long_read(&(x)->f_count)
1da177e4
LT
1013
1014#define MAX_NON_LFS ((1UL<<31) - 1)
1015
1016/* Page cache limit. The filesystems should put that into their s_maxbytes
1017 limits, otherwise bad things can happen in VM. */
1018#if BITS_PER_LONG==32
0cc3b0ec 1019#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT)
1da177e4 1020#elif BITS_PER_LONG==64
0cc3b0ec 1021#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX)
1da177e4
LT
1022#endif
1023
1024#define FL_POSIX 1
1025#define FL_FLOCK 2
617588d5 1026#define FL_DELEG 4 /* NFSv4 delegation */
1da177e4 1027#define FL_ACCESS 8 /* not trying to lock, just looking */
f475ae95 1028#define FL_EXISTS 16 /* when unlocking, test for existence */
1da177e4 1029#define FL_LEASE 32 /* lease held on this file */
75e1fcc0 1030#define FL_CLOSE 64 /* unlock on close */
1da177e4 1031#define FL_SLEEP 128 /* A blocking lock */
778fc546
BF
1032#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */
1033#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
cff2fce5 1034#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
11afe9f7 1035#define FL_LAYOUT 2048 /* outstanding pNFS layout */
bb0a55bb 1036#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
1da177e4 1037
50f2112c
BC
1038#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
1039
bde74e4b
MS
1040/*
1041 * Special return value from posix_lock_file() and vfs_lock_file() for
1042 * asynchronous locking.
1043 */
1044#define FILE_LOCK_DEFERRED 1
1045
7ca76311 1046/* legacy typedef, should eventually be removed */
17fa388d 1047typedef void *fl_owner_t;
1da177e4 1048
a7231a97
JL
1049struct file_lock;
1050
1da177e4 1051struct file_lock_operations {
1da177e4
LT
1052 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
1053 void (*fl_release_private)(struct file_lock *);
1054};
1055
1056struct lock_manager_operations {
cae80b30
JL
1057 fl_owner_t (*lm_get_owner)(fl_owner_t);
1058 void (*lm_put_owner)(fl_owner_t);
8fb47a4f 1059 void (*lm_notify)(struct file_lock *); /* unblock callback */
d0449b90 1060 int (*lm_grant)(struct file_lock *, int);
4d01b7f5 1061 bool (*lm_break)(struct file_lock *);
7448cc37 1062 int (*lm_change)(struct file_lock *, int, struct list_head *);
1c7dd2ff 1063 void (*lm_setup)(struct file_lock *, void **);
28df3d15 1064 bool (*lm_breaker_owns_lease)(struct file_lock *);
1da177e4
LT
1065};
1066
af558e33
BF
1067struct lock_manager {
1068 struct list_head list;
c87fb4a3
BF
1069 /*
1070 * NFSv4 and up also want opens blocked during the grace period;
1071 * NLM doesn't care:
1072 */
1073 bool block_opens;
af558e33
BF
1074};
1075
5ccb0066
SK
1076struct net;
1077void locks_start_grace(struct net *, struct lock_manager *);
af558e33 1078void locks_end_grace(struct lock_manager *);
003278e4
CL
1079bool locks_in_grace(struct net *);
1080bool opens_in_grace(struct net *);
af558e33 1081
1da177e4
LT
1082/* that will die - we need it for nfs_lock_info */
1083#include <linux/nfs_fs_i.h>
1084
1cb36012
JL
1085/*
1086 * struct file_lock represents a generic "file lock". It's used to represent
1087 * POSIX byte range locks, BSD (flock) locks, and leases. It's important to
1088 * note that the same struct is used to represent both a request for a lock and
1089 * the lock itself, but the same object is never used for both.
1090 *
1091 * FIXME: should we create a separate "struct lock_request" to help distinguish
1092 * these two uses?
1093 *
8116bf4c 1094 * The varous i_flctx lists are ordered by:
1cb36012 1095 *
8116bf4c
JL
1096 * 1) lock owner
1097 * 2) lock range start
1098 * 3) lock range end
1cb36012
JL
1099 *
1100 * Obviously, the last two criteria only matter for POSIX locks.
1101 */
1da177e4 1102struct file_lock {
ada5c1da 1103 struct file_lock *fl_blocker; /* The lock, that is blocking us */
6dee60f6 1104 struct list_head fl_list; /* link into file_lock_context */
139ca04e 1105 struct hlist_node fl_link; /* node in global lists */
ada5c1da
N
1106 struct list_head fl_blocked_requests; /* list of requests with
1107 * ->fl_blocker pointing here
1108 */
1109 struct list_head fl_blocked_member; /* node in
1110 * ->fl_blocker->fl_blocked_requests
1111 */
1da177e4 1112 fl_owner_t fl_owner;
710b7216 1113 unsigned int fl_flags;
afc1246f 1114 unsigned char fl_type;
1da177e4 1115 unsigned int fl_pid;
7012b02a 1116 int fl_link_cpu; /* what cpu's list is this on? */
1da177e4
LT
1117 wait_queue_head_t fl_wait;
1118 struct file *fl_file;
1da177e4
LT
1119 loff_t fl_start;
1120 loff_t fl_end;
1121
1122 struct fasync_struct * fl_fasync; /* for lease break notifications */
778fc546
BF
1123 /* for lease breaks: */
1124 unsigned long fl_break_time;
1125 unsigned long fl_downgrade_time;
1da177e4 1126
6aed6285 1127 const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */
7b021967 1128 const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */
1da177e4
LT
1129 union {
1130 struct nfs_lock_info nfs_fl;
8d0a8a9d 1131 struct nfs4_lock_info nfs4_fl;
e8d6c554
DH
1132 struct {
1133 struct list_head link; /* link in AFS vnode's pending_locks list */
1134 int state; /* state of grant or error if -ve */
d4696601 1135 unsigned int debug_id;
e8d6c554 1136 } afs;
1da177e4 1137 } fl_u;
3859a271 1138} __randomize_layout;
1da177e4 1139
4a075e39 1140struct file_lock_context {
6109c850 1141 spinlock_t flc_lock;
4a075e39 1142 struct list_head flc_flock;
bd61e0a9 1143 struct list_head flc_posix;
8634b51f 1144 struct list_head flc_lease;
4a075e39
JL
1145};
1146
1da177e4
LT
1147/* The following constant reflects the upper bound of the file/locking space */
1148#ifndef OFFSET_MAX
1149#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
1150#define OFFSET_MAX INT_LIMIT(loff_t)
1151#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1152#endif
1153
bfcd17a6
TP
1154extern void send_sigio(struct fown_struct *fown, int fd, int band);
1155
de2a4a50 1156#define locks_inode(f) file_inode(f)
c568d683 1157
bfcd17a6 1158#ifdef CONFIG_FILE_LOCKING
a75d30c7 1159extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
c293621b 1160extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
a75d30c7 1161 struct flock *);
1da177e4
LT
1162
1163#if BITS_PER_LONG == 32
a75d30c7 1164extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 *);
c293621b 1165extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
a75d30c7 1166 struct flock64 *);
1da177e4
LT
1167#endif
1168
1da177e4
LT
1169extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
1170extern int fcntl_getlease(struct file *filp);
1171
1172/* fs/locks.c */
f27a0fe0 1173void locks_free_lock_context(struct inode *inode);
05fa3135 1174void locks_free_lock(struct file_lock *fl);
1da177e4 1175extern void locks_init_lock(struct file_lock *);
c5b1f0d9 1176extern struct file_lock * locks_alloc_lock(void);
1da177e4 1177extern void locks_copy_lock(struct file_lock *, struct file_lock *);
3fe0fff1 1178extern void locks_copy_conflock(struct file_lock *, struct file_lock *);
1da177e4 1179extern void locks_remove_posix(struct file *, fl_owner_t);
78ed8a13 1180extern void locks_remove_file(struct file *);
a9e61e25 1181extern void locks_release_private(struct file_lock *);
6d34ac19 1182extern void posix_test_lock(struct file *, struct file_lock *);
150b3934 1183extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
cb03f94f 1184extern int locks_delete_block(struct file_lock *);
3ee17abd 1185extern int vfs_test_lock(struct file *, struct file_lock *);
150b3934 1186extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
9b9d2ab4 1187extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
e55c34a6 1188extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
df4e8d2c 1189extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
95582b00 1190extern void lease_get_mtime(struct inode *, struct timespec64 *time);
e6f5c789
JL
1191extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
1192extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
7448cc37 1193extern int lease_modify(struct file_lock *, int, struct list_head *);
18f6622e
JL
1194
1195struct notifier_block;
1196extern int lease_register_notifier(struct notifier_block *);
1197extern void lease_unregister_notifier(struct notifier_block *);
1198
6c8c9031
AV
1199struct files_struct;
1200extern void show_fd_locks(struct seq_file *f,
1201 struct file *filp, struct files_struct *files);
bfcd17a6 1202#else /* !CONFIG_FILE_LOCKING */
c1e62b8f
JL
1203static inline int fcntl_getlk(struct file *file, unsigned int cmd,
1204 struct flock __user *user)
c2aca5e5
SW
1205{
1206 return -EINVAL;
1207}
1208
1209static inline int fcntl_setlk(unsigned int fd, struct file *file,
1210 unsigned int cmd, struct flock __user *user)
1211{
1212 return -EACCES;
1213}
1214
bfcd17a6 1215#if BITS_PER_LONG == 32
c1e62b8f 1216static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
0ea9fc15 1217 struct flock64 *user)
c2aca5e5
SW
1218{
1219 return -EINVAL;
1220}
1221
1222static inline int fcntl_setlk64(unsigned int fd, struct file *file,
0ea9fc15 1223 unsigned int cmd, struct flock64 *user)
c2aca5e5
SW
1224{
1225 return -EACCES;
1226}
bfcd17a6 1227#endif
c2aca5e5
SW
1228static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1229{
1b2b32dc 1230 return -EINVAL;
c2aca5e5
SW
1231}
1232
1233static inline int fcntl_getlease(struct file *filp)
1234{
1b2b32dc 1235 return F_UNLCK;
c2aca5e5
SW
1236}
1237
4a075e39 1238static inline void
f27a0fe0 1239locks_free_lock_context(struct inode *inode)
4a075e39
JL
1240{
1241}
1242
c2aca5e5
SW
1243static inline void locks_init_lock(struct file_lock *fl)
1244{
1245 return;
1246}
1247
3fe0fff1 1248static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
c2aca5e5
SW
1249{
1250 return;
1251}
1252
1253static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1254{
1255 return;
1256}
1257
1258static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
1259{
1260 return;
1261}
1262
78ed8a13 1263static inline void locks_remove_file(struct file *filp)
c2aca5e5
SW
1264{
1265 return;
1266}
1267
1268static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
1269{
1270 return;
1271}
1272
1273static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
1274 struct file_lock *conflock)
1275{
1276 return -ENOLCK;
1277}
1278
cb03f94f 1279static inline int locks_delete_block(struct file_lock *waiter)
c2aca5e5
SW
1280{
1281 return -ENOENT;
1282}
1283
1284static inline int vfs_test_lock(struct file *filp, struct file_lock *fl)
1285{
1286 return 0;
1287}
1288
1289static inline int vfs_lock_file(struct file *filp, unsigned int cmd,
1290 struct file_lock *fl, struct file_lock *conf)
1291{
1292 return -ENOLCK;
1293}
1294
1295static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
1296{
1297 return 0;
1298}
1299
e55c34a6
BC
1300static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1301{
1302 return -ENOLCK;
1303}
1304
df4e8d2c 1305static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
c2aca5e5
SW
1306{
1307 return 0;
1308}
1309
95582b00
DD
1310static inline void lease_get_mtime(struct inode *inode,
1311 struct timespec64 *time)
c2aca5e5
SW
1312{
1313 return;
1314}
1315
1316static inline int generic_setlease(struct file *filp, long arg,
e6f5c789 1317 struct file_lock **flp, void **priv)
c2aca5e5
SW
1318{
1319 return -EINVAL;
1320}
1321
1322static inline int vfs_setlease(struct file *filp, long arg,
e6f5c789 1323 struct file_lock **lease, void **priv)
c2aca5e5
SW
1324{
1325 return -EINVAL;
1326}
1327
7448cc37 1328static inline int lease_modify(struct file_lock *fl, int arg,
c45198ed 1329 struct list_head *dispose)
c2aca5e5
SW
1330{
1331 return -EINVAL;
1332}
6c8c9031
AV
1333
1334struct files_struct;
1335static inline void show_fd_locks(struct seq_file *f,
1336 struct file *filp, struct files_struct *files) {}
bfcd17a6
TP
1337#endif /* !CONFIG_FILE_LOCKING */
1338
ee296d7c
JL
1339static inline struct inode *file_inode(const struct file *f)
1340{
1341 return f->f_inode;
1342}
1343
d101a125
MS
1344static inline struct dentry *file_dentry(const struct file *file)
1345{
fb16043b 1346 return d_real(file->f_path.dentry, file_inode(file));
d101a125
MS
1347}
1348
e55c34a6
BC
1349static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
1350{
c568d683 1351 return locks_lock_inode_wait(locks_inode(filp), fl);
e55c34a6
BC
1352}
1353
1da177e4 1354struct fasync_struct {
7a107c0f 1355 rwlock_t fa_lock;
989a2979
ED
1356 int magic;
1357 int fa_fd;
1358 struct fasync_struct *fa_next; /* singly linked list */
1359 struct file *fa_file;
1360 struct rcu_head fa_rcu;
1da177e4
LT
1361};
1362
1363#define FASYNC_MAGIC 0x4601
1364
1365/* SMP safe fasync helpers: */
1366extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
f7347ce4
LT
1367extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
1368extern int fasync_remove_entry(struct file *, struct fasync_struct **);
1369extern struct fasync_struct *fasync_alloc(void);
1370extern void fasync_free(struct fasync_struct *);
1371
1da177e4
LT
1372/* can be called from interrupts */
1373extern void kill_fasync(struct fasync_struct **, int, int);
1da177e4 1374
e0b93edd 1375extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
393cc3f5 1376extern int f_setown(struct file *filp, unsigned long arg, int force);
1da177e4 1377extern void f_delown(struct file *filp);
609d7fa9 1378extern pid_t f_getown(struct file *filp);
1da177e4
LT
1379extern int send_sigurg(struct fown_struct *fown);
1380
e462ec50
DH
1381/*
1382 * sb->s_flags. Note that these mirror the equivalent MS_* flags where
1383 * represented in both.
1384 */
1385#define SB_RDONLY 1 /* Mount read-only */
1386#define SB_NOSUID 2 /* Ignore suid and sgid bits */
1387#define SB_NODEV 4 /* Disallow access to device special files */
1388#define SB_NOEXEC 8 /* Disallow program execution */
1389#define SB_SYNCHRONOUS 16 /* Writes are synced at once */
1390#define SB_MANDLOCK 64 /* Allow mandatory locks on an FS */
1391#define SB_DIRSYNC 128 /* Directory modifications are synchronous */
1392#define SB_NOATIME 1024 /* Do not update access times. */
1393#define SB_NODIRATIME 2048 /* Do not update directory access times */
1394#define SB_SILENT 32768
1395#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
457e7a13 1396#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
e462ec50
DH
1397#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
1398#define SB_I_VERSION (1<<23) /* Update inode I_version field */
1399#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
1400
1401/* These sb flags are internal to the kernel */
1402#define SB_SUBMOUNT (1<<26)
8d0347f6 1403#define SB_FORCE (1<<27)
e462ec50
DH
1404#define SB_NOSEC (1<<28)
1405#define SB_BORN (1<<29)
1406#define SB_ACTIVE (1<<30)
1407#define SB_NOUSER (1<<31)
1408
c843843e
DR
1409/* These flags relate to encoding and casefolding */
1410#define SB_ENC_STRICT_MODE_FL (1 << 0)
1411
1412#define sb_has_strict_encoding(sb) \
1413 (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
1414
1da177e4
LT
1415/*
1416 * Umount options
1417 */
1418
1419#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */
1420#define MNT_DETACH 0x00000002 /* Just detach from the tree */
1421#define MNT_EXPIRE 0x00000004 /* Mark for expiry */
db1f05bb
MS
1422#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */
1423#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */
1da177e4 1424
46b15caa
TH
1425/* sb->s_iflags */
1426#define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */
90f8572b 1427#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */
a2982cc9 1428#define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */
1cb039f3 1429#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */
1da177e4 1430
8654df4e
EB
1431/* sb->s_iflags to limit user namespace mounts */
1432#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
57b56ac6
MZ
1433#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
1434#define SB_I_UNTRUSTED_MOUNTER 0x00000040
1da177e4 1435
32b1924b 1436#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
0b3ea092 1437#define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */
32b1924b 1438
5accdf82
JK
1439/* Possible states of 'frozen' field */
1440enum {
1441 SB_UNFROZEN = 0, /* FS is unfrozen */
1442 SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */
5accdf82
JK
1443 SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */
1444 SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop
1445 * internal threads if needed) */
1446 SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */
1447};
1448
1449#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1450
1451struct sb_writers {
8129ed29 1452 int frozen; /* Is sb frozen? */
60b49885 1453 wait_queue_head_t wait_unfrozen; /* wait for thaw */
8129ed29 1454 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
5accdf82
JK
1455};
1456
1da177e4
LT
1457struct super_block {
1458 struct list_head s_list; /* Keep this first */
1459 dev_t s_dev; /* search index; _not_ kdev_t */
270ba5f7
RK
1460 unsigned char s_blocksize_bits;
1461 unsigned long s_blocksize;
42cb56ae 1462 loff_t s_maxbytes; /* Max file size */
1da177e4 1463 struct file_system_type *s_type;
ee9b6d61 1464 const struct super_operations *s_op;
61e225dc 1465 const struct dquot_operations *dq_op;
0d54b217 1466 const struct quotactl_ops *s_qcop;
39655164 1467 const struct export_operations *s_export_op;
1da177e4 1468 unsigned long s_flags;
46b15caa 1469 unsigned long s_iflags; /* internal SB_I_* flags */
1da177e4
LT
1470 unsigned long s_magic;
1471 struct dentry *s_root;
1472 struct rw_semaphore s_umount;
1da177e4 1473 int s_count;
1da177e4 1474 atomic_t s_active;
50462062 1475#ifdef CONFIG_SECURITY
1da177e4 1476 void *s_security;
50462062 1477#endif
bb435453 1478 const struct xattr_handler **s_xattr;
643fa961 1479#ifdef CONFIG_FS_ENCRYPTION
0b81d077 1480 const struct fscrypt_operations *s_cop;
22d94f49 1481 struct key *s_master_keys; /* master crypto keys in use */
5585f2af
EB
1482#endif
1483#ifdef CONFIG_FS_VERITY
1484 const struct fsverity_operations *s_vop;
c843843e 1485#endif
5298d4bf 1486#if IS_ENABLED(CONFIG_UNICODE)
c843843e
DR
1487 struct unicode_map *s_encoding;
1488 __u16 s_encoding_flags;
bbbc3fb6 1489#endif
f1ee6162 1490 struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
39f7c4db 1491 struct list_head s_mounts; /* list of mounts; _not_ for fs use */
1da177e4 1492 struct block_device *s_bdev;
32a88aa1 1493 struct backing_dev_info *s_bdi;
acaebfd8 1494 struct mtd_info *s_mtd;
a5166169 1495 struct hlist_node s_instances;
2c5f648a 1496 unsigned int s_quota_types; /* Bitmask of supported quota types */
1da177e4
LT
1497 struct quota_info s_dquot; /* Diskquota specific options */
1498
5accdf82 1499 struct sb_writers s_writers;
1da177e4 1500
99c228a9
AG
1501 /*
1502 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
1503 * s_fsnotify_marks together for cache efficiency. They are frequently
1504 * accessed and rarely modified.
1505 */
1506 void *s_fs_info; /* Filesystem private info */
1507
1508 /* Granularity of c/m/atime in ns (cannot be worse than a second) */
1509 u32 s_time_gran;
188d20bc
DD
1510 /* Time limits for c/m/atime in seconds */
1511 time64_t s_time_min;
1512 time64_t s_time_max;
99c228a9
AG
1513#ifdef CONFIG_FSNOTIFY
1514 __u32 s_fsnotify_mask;
1515 struct fsnotify_mark_connector __rcu *s_fsnotify_marks;
1516#endif
1517
85787090
CH
1518 char s_id[32]; /* Informational name */
1519 uuid_t s_uuid; /* UUID */
1da177e4 1520
8de52778 1521 unsigned int s_max_links;
30c40d2c 1522 fmode_t s_mode;
1da177e4
LT
1523
1524 /*
1525 * The next field is for VFS *only*. No filesystems have any business
1526 * even looking at it. You had been warned.
1527 */
a11f3a05 1528 struct mutex s_vfs_rename_mutex; /* Kludge */
1da177e4 1529
79c0b2df
MS
1530 /*
1531 * Filesystem subtype. If non-empty the filesystem type field
1532 * in /proc/mounts will be "type.subtype"
1533 */
a0c9a8b8 1534 const char *s_subtype;
b3b304a2 1535
c8aebb0c 1536 const struct dentry_operations *s_d_op; /* default d_op for dentries */
9fdfdcf1 1537
b0d40c92 1538 struct shrinker s_shrink; /* per-sb shrinker handle */
4ed5e82f 1539
7ada4db8
MS
1540 /* Number of inodes with nlink == 0 but still referenced */
1541 atomic_long_t s_remove_count;
1542
ec44610f
AG
1543 /*
1544 * Number of inode/mount/sb objects that are being watched, note that
1545 * inodes objects are currently double-accounted.
1546 */
1547 atomic_long_t s_fsnotify_connectors;
721fb6fb 1548
4ed5e82f
MS
1549 /* Being remounted read-only */
1550 int s_readonly_remount;
7b7a8665 1551
735e4ae5
JL
1552 /* per-sb errseq_t for reporting writeback errors via syncfs */
1553 errseq_t s_wb_err;
1554
7b7a8665
CH
1555 /* AIO completions deferred from interrupt context */
1556 struct workqueue_struct *s_dio_done_wq;
215752fc 1557 struct hlist_head s_pins;
f6041567 1558
6e4eab57
EB
1559 /*
1560 * Owning user namespace and default context in which to
1561 * interpret filesystem uids, gids, quotas, device nodes,
1562 * xattrs and security labels.
1563 */
1564 struct user_namespace *s_user_ns;
1565
f6041567 1566 /*
7d10f70f
WL
1567 * The list_lru structure is essentially just a pointer to a table
1568 * of per-node lru lists, each of which has its own spinlock.
1569 * There is no need to put them into separate cachelines.
f6041567 1570 */
7d10f70f
WL
1571 struct list_lru s_dentry_lru;
1572 struct list_lru s_inode_lru;
e2fec7c3 1573 struct rcu_head rcu;
853b39a7 1574 struct work_struct destroy_work;
69c433ed 1575
e97fedb9 1576 struct mutex s_sync_lock; /* sync serialisation lock */
69c433ed
MS
1577
1578 /*
1579 * Indicates how deep in a filesystem stack this SB is
1580 */
1581 int s_stack_depth;
74278da9
DC
1582
1583 /* s_inode_list_lock protects s_inodes */
1584 spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp;
1585 struct list_head s_inodes; /* all inodes */
6c60d2b5
DC
1586
1587 spinlock_t s_inode_wblist_lock;
1588 struct list_head s_inodes_wb; /* writeback inodes */
3859a271 1589} __randomize_layout;
1da177e4 1590
a1ec9040
CB
1591static inline struct user_namespace *i_user_ns(const struct inode *inode)
1592{
1593 return inode->i_sb->s_user_ns;
1594}
1595
81754357
SF
1596/* Helper functions so that in most cases filesystems will
1597 * not need to deal directly with kuid_t and kgid_t and can
1598 * instead deal with the raw numeric values that are stored
1599 * in the filesystem.
1600 */
1601static inline uid_t i_uid_read(const struct inode *inode)
1602{
a1ec9040 1603 return from_kuid(i_user_ns(inode), inode->i_uid);
81754357
SF
1604}
1605
1606static inline gid_t i_gid_read(const struct inode *inode)
1607{
a1ec9040 1608 return from_kgid(i_user_ns(inode), inode->i_gid);
81754357
SF
1609}
1610
1611static inline void i_uid_write(struct inode *inode, uid_t uid)
1612{
a1ec9040 1613 inode->i_uid = make_kuid(i_user_ns(inode), uid);
81754357
SF
1614}
1615
1616static inline void i_gid_write(struct inode *inode, gid_t gid)
1617{
a1ec9040 1618 inode->i_gid = make_kgid(i_user_ns(inode), gid);
81754357
SF
1619}
1620
1bd66c1a
CB
1621/**
1622 * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns
1623 * @mnt_userns: user namespace of the mount the inode was found from
1624 * @inode: inode to map
1625 *
1626 * Return: the inode's i_uid mapped down according to @mnt_userns.
1627 * If the inode's i_uid has no mapping INVALID_UID is returned.
1628 */
e6c9a714
CB
1629static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
1630 const struct inode *inode)
1631{
bd303368 1632 return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
e6c9a714
CB
1633}
1634
1bd66c1a
CB
1635/**
1636 * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns
1637 * @mnt_userns: user namespace of the mount the inode was found from
1638 * @inode: inode to map
1639 *
1640 * Return: the inode's i_gid mapped down according to @mnt_userns.
1641 * If the inode's i_gid has no mapping INVALID_GID is returned.
1642 */
e6c9a714
CB
1643static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
1644 const struct inode *inode)
1645{
bd303368 1646 return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
e6c9a714
CB
1647}
1648
db998553
CB
1649/**
1650 * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
1651 * @inode: inode to initialize
1652 * @mnt_userns: user namespace of the mount the inode was found from
1653 *
1654 * Initialize the i_uid field of @inode. If the inode was found/created via
1655 * an idmapped mount map the caller's fsuid according to @mnt_users.
1656 */
1657static inline void inode_fsuid_set(struct inode *inode,
1658 struct user_namespace *mnt_userns)
1659{
bd303368 1660 inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
db998553
CB
1661}
1662
1663/**
1664 * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
1665 * @inode: inode to initialize
1666 * @mnt_userns: user namespace of the mount the inode was found from
1667 *
1668 * Initialize the i_gid field of @inode. If the inode was found/created via
1669 * an idmapped mount map the caller's fsgid according to @mnt_users.
1670 */
1671static inline void inode_fsgid_set(struct inode *inode,
1672 struct user_namespace *mnt_userns)
1673{
bd303368 1674 inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
db998553
CB
1675}
1676
8e538913
CB
1677/**
1678 * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
1679 * @sb: the superblock we want a mapping in
1680 * @mnt_userns: user namespace of the relevant mount
1681 *
1682 * Check whether the caller's fsuid and fsgid have a valid mapping in the
1683 * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
1684 * the caller's fsuid and fsgid according to the @mnt_userns first.
1685 *
1686 * Return: true if fsuid and fsgid is mapped, false if not.
1687 */
1688static inline bool fsuidgid_has_mapping(struct super_block *sb,
1689 struct user_namespace *mnt_userns)
1690{
476860b3
CB
1691 struct user_namespace *fs_userns = sb->s_user_ns;
1692 kuid_t kuid;
1693 kgid_t kgid;
1694
bd303368 1695 kuid = mapped_fsuid(mnt_userns, fs_userns);
476860b3
CB
1696 if (!uid_valid(kuid))
1697 return false;
bd303368 1698 kgid = mapped_fsgid(mnt_userns, fs_userns);
476860b3
CB
1699 if (!gid_valid(kgid))
1700 return false;
1701 return kuid_has_mapping(fs_userns, kuid) &&
1702 kgid_has_mapping(fs_userns, kgid);
8e538913
CB
1703}
1704
95582b00 1705extern struct timespec64 current_time(struct inode *inode);
1da177e4
LT
1706
1707/*
1708 * Snapshotting support.
1709 */
1da177e4 1710
9b852342
DW
1711/*
1712 * These are internal functions, please use sb_start_{write,pagefault,intwrite}
1713 * instead.
1714 */
1715static inline void __sb_end_write(struct super_block *sb, int level)
1716{
1717 percpu_up_read(sb->s_writers.rw_sem + level-1);
1718}
1719
1720static inline void __sb_start_write(struct super_block *sb, int level)
1721{
1722 percpu_down_read(sb->s_writers.rw_sem + level - 1);
1723}
1724
1725static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
1726{
1727 return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
1728}
5accdf82 1729
bee9182d 1730#define __sb_writers_acquired(sb, lev) \
8129ed29 1731 percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
bee9182d 1732#define __sb_writers_release(sb, lev) \
8129ed29 1733 percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
bee9182d 1734
5accdf82
JK
1735/**
1736 * sb_end_write - drop write access to a superblock
1737 * @sb: the super we wrote to
1738 *
1739 * Decrement number of writers to the filesystem. Wake up possible waiters
1740 * wanting to freeze the filesystem.
1741 */
1742static inline void sb_end_write(struct super_block *sb)
1743{
1744 __sb_end_write(sb, SB_FREEZE_WRITE);
1745}
1746
1747/**
1748 * sb_end_pagefault - drop write access to a superblock from a page fault
1749 * @sb: the super we wrote to
1750 *
1751 * Decrement number of processes handling write page fault to the filesystem.
1752 * Wake up possible waiters wanting to freeze the filesystem.
1753 */
1754static inline void sb_end_pagefault(struct super_block *sb)
1755{
1756 __sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1757}
1758
1759/**
1760 * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1761 * @sb: the super we wrote to
1762 *
1763 * Decrement fs-internal number of writers to the filesystem. Wake up possible
1764 * waiters wanting to freeze the filesystem.
1765 */
1766static inline void sb_end_intwrite(struct super_block *sb)
1767{
1768 __sb_end_write(sb, SB_FREEZE_FS);
1769}
1770
1771/**
1772 * sb_start_write - get write access to a superblock
1773 * @sb: the super we write to
1774 *
1775 * When a process wants to write data or metadata to a file system (i.e. dirty
1776 * a page or an inode), it should embed the operation in a sb_start_write() -
1777 * sb_end_write() pair to get exclusion against file system freezing. This
1778 * function increments number of writers preventing freezing. If the file
1779 * system is already frozen, the function waits until the file system is
1780 * thawed.
1781 *
1782 * Since freeze protection behaves as a lock, users have to preserve
1783 * ordering of freeze protection and other filesystem locks. Generally,
1784 * freeze protection should be the outermost lock. In particular, we have:
1785 *
1786 * sb_start_write
1787 * -> i_mutex (write path, truncate, directory ops, ...)
1788 * -> s_umount (freeze_super, thaw_super)
1789 */
1790static inline void sb_start_write(struct super_block *sb)
1791{
8a3c84b6 1792 __sb_start_write(sb, SB_FREEZE_WRITE);
5accdf82
JK
1793}
1794
8a3c84b6 1795static inline bool sb_start_write_trylock(struct super_block *sb)
5accdf82 1796{
8a3c84b6 1797 return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
5accdf82
JK
1798}
1799
1800/**
1801 * sb_start_pagefault - get write access to a superblock from a page fault
1802 * @sb: the super we write to
1803 *
1804 * When a process starts handling write page fault, it should embed the
1805 * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1806 * exclusion against file system freezing. This is needed since the page fault
1807 * is going to dirty a page. This function increments number of running page
1808 * faults preventing freezing. If the file system is already frozen, the
1809 * function waits until the file system is thawed.
1810 *
1811 * Since page fault freeze protection behaves as a lock, users have to preserve
1812 * ordering of freeze protection and other filesystem locks. It is advised to
c1e8d7c6 1813 * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
5accdf82
JK
1814 * handling code implies lock dependency:
1815 *
c1e8d7c6 1816 * mmap_lock
5accdf82
JK
1817 * -> sb_start_pagefault
1818 */
1819static inline void sb_start_pagefault(struct super_block *sb)
1820{
8a3c84b6 1821 __sb_start_write(sb, SB_FREEZE_PAGEFAULT);
5accdf82
JK
1822}
1823
39015399 1824/**
5accdf82
JK
1825 * sb_start_intwrite - get write access to a superblock for internal fs purposes
1826 * @sb: the super we write to
1827 *
1828 * This is the third level of protection against filesystem freezing. It is
1829 * free for use by a filesystem. The only requirement is that it must rank
1830 * below sb_start_pagefault.
1831 *
1832 * For example filesystem can call sb_start_intwrite() when starting a
1833 * transaction which somewhat eases handling of freezing for internal sources
1834 * of filesystem changes (internal fs threads, discarding preallocation on file
1835 * close, etc.).
1836 */
1837static inline void sb_start_intwrite(struct super_block *sb)
1838{
8a3c84b6 1839 __sb_start_write(sb, SB_FREEZE_FS);
5accdf82 1840}
1da177e4 1841
8a3c84b6 1842static inline bool sb_start_intwrite_trylock(struct super_block *sb)
0c8e3fe3 1843{
8a3c84b6 1844 return __sb_start_write_trylock(sb, SB_FREEZE_FS);
0c8e3fe3
AG
1845}
1846
21cb47be
CB
1847bool inode_owner_or_capable(struct user_namespace *mnt_userns,
1848 const struct inode *inode);
3bd858ab 1849
1da177e4
LT
1850/*
1851 * VFS helper functions..
1852 */
6521f891
CB
1853int vfs_create(struct user_namespace *, struct inode *,
1854 struct dentry *, umode_t, bool);
1855int vfs_mkdir(struct user_namespace *, struct inode *,
1856 struct dentry *, umode_t);
1857int vfs_mknod(struct user_namespace *, struct inode *, struct dentry *,
1858 umode_t, dev_t);
1859int vfs_symlink(struct user_namespace *, struct inode *,
1860 struct dentry *, const char *);
1861int vfs_link(struct dentry *, struct user_namespace *, struct inode *,
1862 struct dentry *, struct inode **);
1863int vfs_rmdir(struct user_namespace *, struct inode *, struct dentry *);
1864int vfs_unlink(struct user_namespace *, struct inode *, struct dentry *,
1865 struct inode **);
9fe61450 1866
92cb01c7
CB
1867/**
1868 * struct renamedata - contains all information required for renaming
1869 * @old_mnt_userns: old user namespace of the mount the inode was found from
1870 * @old_dir: parent of source
1871 * @old_dentry: source
1872 * @new_mnt_userns: new user namespace of the mount the inode was found from
1873 * @new_dir: parent of destination
1874 * @new_dentry: destination
1875 * @delegated_inode: returns an inode needing a delegation break
1876 * @flags: rename flags
1877 */
9fe61450 1878struct renamedata {
6521f891 1879 struct user_namespace *old_mnt_userns;
9fe61450
CB
1880 struct inode *old_dir;
1881 struct dentry *old_dentry;
6521f891 1882 struct user_namespace *new_mnt_userns;
9fe61450
CB
1883 struct inode *new_dir;
1884 struct dentry *new_dentry;
1885 struct inode **delegated_inode;
1886 unsigned int flags;
1887} __randomize_layout;
a3c751a5 1888
9fe61450 1889int vfs_rename(struct renamedata *);
a3c751a5 1890
6521f891
CB
1891static inline int vfs_whiteout(struct user_namespace *mnt_userns,
1892 struct inode *dir, struct dentry *dentry)
a3c751a5 1893{
6521f891
CB
1894 return vfs_mknod(mnt_userns, dir, dentry, S_IFCHR | WHITEOUT_MODE,
1895 WHITEOUT_DEV);
a3c751a5 1896}
1da177e4 1897
6521f891
CB
1898struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
1899 struct dentry *dentry, umode_t mode, int open_flag);
af7bd4dc 1900
8e6c848e
AV
1901int vfs_mkobj(struct dentry *, umode_t,
1902 int (*f)(struct dentry *, umode_t, void *),
1903 void *);
1904
c04011fe 1905int vfs_fchown(struct file *file, uid_t user, gid_t group);
9e96c8c0 1906int vfs_fchmod(struct file *file, umode_t mode);
fd5ad30c 1907int vfs_utimes(const struct path *path, struct timespec64 *times);
c04011fe 1908
9df6702a
MS
1909extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1910
2952db0f
AB
1911#ifdef CONFIG_COMPAT
1912extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
1913 unsigned long arg);
1914#else
1915#define compat_ptr_ioctl NULL
1916#endif
1917
8c744fb8
CH
1918/*
1919 * VFS file helper functions.
1920 */
21cb47be
CB
1921void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
1922 const struct inode *dir, umode_t mode);
a2982cc9 1923extern bool may_open_dev(const struct path *path);
c4b929b8 1924
1da177e4
LT
1925/*
1926 * This is the "filldir" function type, used by readdir() to let
1927 * the kernel specify what kind of dirent layout it wants to have.
1928 * This allows the kernel to read directories into kernel space or
1929 * to have different dirent layouts depending on the binary type.
1930 */
ac7576f4
MS
1931struct dir_context;
1932typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
1933 unsigned);
1934
5c0ba4e0 1935struct dir_context {
a09acf4b 1936 filldir_t actor;
bb6f619b 1937 loff_t pos;
5c0ba4e0 1938};
bb6f619b 1939
b4caecd4
CH
1940/*
1941 * These flags let !MMU mmap() govern direct device mapping vs immediate
1942 * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
1943 *
1944 * NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE)
1945 * NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED)
1946 * NOMMU_MAP_READ: Can be mapped for reading
1947 * NOMMU_MAP_WRITE: Can be mapped for writing
1948 * NOMMU_MAP_EXEC: Can be mapped for execution
1949 */
1950#define NOMMU_MAP_COPY 0x00000001
1951#define NOMMU_MAP_DIRECT 0x00000008
1952#define NOMMU_MAP_READ VM_MAYREAD
1953#define NOMMU_MAP_WRITE VM_MAYWRITE
1954#define NOMMU_MAP_EXEC VM_MAYEXEC
1955
1956#define NOMMU_VMFLAGS \
1957 (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
1958
2e5dfc99
DW
1959/*
1960 * These flags control the behavior of the remap_file_range function pointer.
1961 * If it is called with len == 0 that means "remap to end of source file".
5c437fa2 1962 * See Documentation/filesystems/vfs.rst for more details about this call.
2e5dfc99
DW
1963 *
1964 * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
eca3654e 1965 * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
2e5dfc99
DW
1966 */
1967#define REMAP_FILE_DEDUP (1 << 0)
eca3654e 1968#define REMAP_FILE_CAN_SHORTEN (1 << 1)
2e5dfc99
DW
1969
1970/*
1971 * These flags signal that the caller is ok with altering various aspects of
1972 * the behavior of the remap operation. The changes must be made by the
1973 * implementation; the vfs remap helper functions can take advantage of them.
1974 * Flags in this category exist to preserve the quirky behavior of the hoisted
1975 * btrfs clone/dedupe ioctls.
2e5dfc99 1976 */
eca3654e 1977#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
b4caecd4 1978
293bc982
AV
1979struct iov_iter;
1980
1da177e4
LT
1981struct file_operations {
1982 struct module *owner;
1983 loff_t (*llseek) (struct file *, loff_t, int);
1984 ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
1da177e4 1985 ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
293bc982
AV
1986 ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
1987 ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
5a72e899
JA
1988 int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
1989 unsigned int flags);
bb6f619b 1990 int (*iterate) (struct file *, struct dir_context *);
61922694 1991 int (*iterate_shared) (struct file *, struct dir_context *);
a3f8683b 1992 __poll_t (*poll) (struct file *, struct poll_table_struct *);
1da177e4
LT
1993 long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
1994 long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
1995 int (*mmap) (struct file *, struct vm_area_struct *);
1c972597 1996 unsigned long mmap_supported_flags;
1da177e4 1997 int (*open) (struct inode *, struct file *);
75e1fcc0 1998 int (*flush) (struct file *, fl_owner_t id);
1da177e4 1999 int (*release) (struct inode *, struct file *);
02c24a82 2000 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
1da177e4
LT
2001 int (*fasync) (int, struct file *, int);
2002 int (*lock) (struct file *, int, struct file_lock *);
1da177e4
LT
2003 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
2004 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
2005 int (*check_flags)(int);
1da177e4 2006 int (*flock) (struct file *, int, struct file_lock *);
cbb7e577
JA
2007 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
2008 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
e6f5c789 2009 int (*setlease)(struct file *, long, struct file_lock **, void **);
2fe17c10
CH
2010 long (*fallocate)(struct file *file, int mode, loff_t offset,
2011 loff_t len);
a3816ab0 2012 void (*show_fdinfo)(struct seq_file *m, struct file *f);
b4caecd4
CH
2013#ifndef CONFIG_MMU
2014 unsigned (*mmap_capabilities)(struct file *);
2015#endif
04b38d60
CH
2016 ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
2017 loff_t, size_t, unsigned int);
42ec3d4c
DW
2018 loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
2019 struct file *file_out, loff_t pos_out,
2020 loff_t len, unsigned int remap_flags);
45cd0faa 2021 int (*fadvise)(struct file *, loff_t, loff_t, int);
3859a271 2022} __randomize_layout;
1da177e4
LT
2023
2024struct inode_operations {
00cd8dd3 2025 struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
fceef393 2026 const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
549c7297 2027 int (*permission) (struct user_namespace *, struct inode *, int);
0cad6246 2028 struct posix_acl * (*get_acl)(struct inode *, int, bool);
44a7d7a8
NP
2029
2030 int (*readlink) (struct dentry *, char __user *,int);
44a7d7a8 2031
549c7297
CB
2032 int (*create) (struct user_namespace *, struct inode *,struct dentry *,
2033 umode_t, bool);
1da177e4
LT
2034 int (*link) (struct dentry *,struct inode *,struct dentry *);
2035 int (*unlink) (struct inode *,struct dentry *);
549c7297
CB
2036 int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
2037 const char *);
2038 int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
2039 umode_t);
1da177e4 2040 int (*rmdir) (struct inode *,struct dentry *);
549c7297
CB
2041 int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
2042 umode_t,dev_t);
2043 int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
520c8b16 2044 struct inode *, struct dentry *, unsigned int);
549c7297
CB
2045 int (*setattr) (struct user_namespace *, struct dentry *,
2046 struct iattr *);
2047 int (*getattr) (struct user_namespace *, const struct path *,
2048 struct kstat *, u32, unsigned int);
1da177e4 2049 ssize_t (*listxattr) (struct dentry *, char *, size_t);
c4b929b8
MF
2050 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
2051 u64 len);
95582b00 2052 int (*update_time)(struct inode *, struct timespec64 *, int);
d9585277 2053 int (*atomic_open)(struct inode *, struct dentry *,
30d90494 2054 struct file *, unsigned open_flag,
44907d79 2055 umode_t create_mode);
549c7297
CB
2056 int (*tmpfile) (struct user_namespace *, struct inode *,
2057 struct dentry *, umode_t);
2058 int (*set_acl)(struct user_namespace *, struct inode *,
2059 struct posix_acl *, int);
4c5b4799
MS
2060 int (*fileattr_set)(struct user_namespace *mnt_userns,
2061 struct dentry *dentry, struct fileattr *fa);
2062 int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
44a7d7a8 2063} ____cacheline_aligned;
1da177e4 2064
bb7462b6
MS
2065static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
2066 struct iov_iter *iter)
2067{
2068 return file->f_op->read_iter(kio, iter);
2069}
2070
2071static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
2072 struct iov_iter *iter)
2073{
2074 return file->f_op->write_iter(kio, iter);
2075}
2076
f74ac015
MS
2077static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
2078{
2079 return file->f_op->mmap(file, vma);
2080}
2081
1da177e4
LT
2082extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
2083extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
29732938
ZB
2084extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
2085 loff_t, size_t, unsigned int);
f16acc9d
DC
2086extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
2087 struct file *file_out, loff_t pos_out,
2088 size_t len, unsigned int flags);
a83ab01a
DW
2089extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
2090 struct file *file_out, loff_t pos_out,
42ec3d4c
DW
2091 loff_t *count,
2092 unsigned int remap_flags);
2093extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
2094 struct file *file_out, loff_t pos_out,
452ce659 2095 loff_t len, unsigned int remap_flags);
42ec3d4c
DW
2096extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2097 struct file *file_out, loff_t pos_out,
452ce659 2098 loff_t len, unsigned int remap_flags);
54dbc151
DW
2099extern int vfs_dedupe_file_range(struct file *file,
2100 struct file_dedupe_range *same);
42ec3d4c
DW
2101extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2102 struct file *dst_file, loff_t dst_pos,
df365836 2103 loff_t len, unsigned int remap_flags);
f1825366 2104
1da177e4 2105
1da177e4
LT
2106struct super_operations {
2107 struct inode *(*alloc_inode)(struct super_block *sb);
2108 void (*destroy_inode)(struct inode *);
fdb0da89 2109 void (*free_inode)(struct inode *);
1da177e4 2110
aa385729 2111 void (*dirty_inode) (struct inode *, int flags);
a9185b41 2112 int (*write_inode) (struct inode *, struct writeback_control *wbc);
45321ac5 2113 int (*drop_inode) (struct inode *);
be7ce416 2114 void (*evict_inode) (struct inode *);
1da177e4 2115 void (*put_super) (struct super_block *);
1da177e4 2116 int (*sync_fs)(struct super_block *sb, int wait);
48b6bca6 2117 int (*freeze_super) (struct super_block *);
c4be0c1d 2118 int (*freeze_fs) (struct super_block *);
48b6bca6 2119 int (*thaw_super) (struct super_block *);
c4be0c1d 2120 int (*unfreeze_fs) (struct super_block *);
726c3342 2121 int (*statfs) (struct dentry *, struct kstatfs *);
1da177e4 2122 int (*remount_fs) (struct super_block *, int *, char *);
42faad99 2123 void (*umount_begin) (struct super_block *);
1da177e4 2124
34c80b1d 2125 int (*show_options)(struct seq_file *, struct dentry *);
d861c630 2126 int (*show_devname)(struct seq_file *, struct dentry *);
a6322de6 2127 int (*show_path)(struct seq_file *, struct dentry *);
64132379 2128 int (*show_stats)(struct seq_file *, struct dentry *);
0e51a720 2129#ifdef CONFIG_QUOTA
1da177e4
LT
2130 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
2131 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
2d0fa467 2132 struct dquot **(*get_dquots)(struct inode *);
0e51a720 2133#endif
4101b624
VD
2134 long (*nr_cached_objects)(struct super_block *,
2135 struct shrink_control *);
2136 long (*free_cached_objects)(struct super_block *,
2137 struct shrink_control *);
1da177e4
LT
2138};
2139
bbc1096a
DH
2140/*
2141 * Inode flags - they have no relation to superblock flags now
2142 */
6414e9b0
EB
2143#define S_SYNC (1 << 0) /* Writes are synced at once */
2144#define S_NOATIME (1 << 1) /* Do not update access times */
2145#define S_APPEND (1 << 2) /* Append-only file */
2146#define S_IMMUTABLE (1 << 3) /* Immutable file */
2147#define S_DEAD (1 << 4) /* removed, but still open directory */
2148#define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */
2149#define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */
2150#define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */
2151#define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */
2152#define S_PRIVATE (1 << 9) /* Inode is fs-internal */
2153#define S_IMA (1 << 10) /* Inode has an associated IMA struct */
2154#define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */
2155#define S_NOSEC (1 << 12) /* no suid or xattr security attributes */
6cd176a5 2156#ifdef CONFIG_FS_DAX
6414e9b0 2157#define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */
fbbbad4b 2158#else
6414e9b0 2159#define S_DAX 0 /* Make all the DAX code disappear */
fbbbad4b 2160#endif
6414e9b0
EB
2161#define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */
2162#define S_CASEFOLD (1 << 15) /* Casefolded file */
2163#define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
1bd9c4e4 2164#define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
bbc1096a
DH
2165
2166/*
2167 * Note that nosuid etc flags are inode-specific: setting some file-system
2168 * flags just means all the inodes inherit those flags by default. It might be
2169 * possible to override it selectively if you really wanted to with some
2170 * ioctl() that is not currently implemented.
2171 *
e462ec50 2172 * Exception: SB_RDONLY is always applied to the entire file system.
bbc1096a
DH
2173 *
2174 * Unfortunately, it is possible to change a filesystems flags with it mounted
2175 * with files in use. This means that all of the inodes will not have their
2176 * i_flags updated. Hence, i_flags no longer inherit the superblock mount
2177 * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
2178 */
2179#define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg))
2180
1751e8a6 2181static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
94e92e7a 2182#define IS_RDONLY(inode) sb_rdonly((inode)->i_sb)
e462ec50 2183#define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \
bbc1096a 2184 ((inode)->i_flags & S_SYNC))
e462ec50 2185#define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \
bbc1096a 2186 ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
e462ec50
DH
2187#define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK)
2188#define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY|SB_NOATIME)
2189#define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION)
bbc1096a
DH
2190
2191#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
2192#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
2193#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
e462ec50 2194#define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL)
bbc1096a
DH
2195
2196#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
2197#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
2198#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
2199#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
2200#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
2201#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
2202#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
fbbbad4b 2203#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
2ee6a576 2204#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
b886ee3e 2205#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
5585f2af 2206#define IS_VERITY(inode) ((inode)->i_flags & S_VERITY)
bbc1096a 2207
787fb6bc
MS
2208#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
2209 (inode)->i_rdev == WHITEOUT_DEV)
2210
ba73d987
CB
2211static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
2212 struct inode *inode)
0bd23d09 2213{
ba73d987
CB
2214 return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
2215 !gid_valid(i_gid_into_mnt(mnt_userns, inode));
0bd23d09
EB
2216}
2217
c75b1d94
JA
2218static inline enum rw_hint file_write_hint(struct file *file)
2219{
2220 if (file->f_write_hint != WRITE_LIFE_NOT_SET)
2221 return file->f_write_hint;
2222
2223 return file_inode(file)->i_write_hint;
2224}
2225
2226static inline int iocb_flags(struct file *file);
2227
fc28724d
AM
2228static inline u16 ki_hint_validate(enum rw_hint hint)
2229{
2230 typeof(((struct kiocb *)0)->ki_hint) max_hint = -1;
2231
2232 if (hint <= max_hint)
2233 return hint;
2234 return 0;
2235}
2236
c75b1d94
JA
2237static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
2238{
2239 *kiocb = (struct kiocb) {
2240 .ki_filp = filp,
2241 .ki_flags = iocb_flags(filp),
fc28724d 2242 .ki_hint = ki_hint_validate(file_write_hint(filp)),
20578bdf 2243 .ki_ioprio = get_current_ioprio(),
c75b1d94
JA
2244 };
2245}
2246
5dcdc43e
JX
2247static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
2248 struct file *filp)
2249{
2250 *kiocb = (struct kiocb) {
2251 .ki_filp = filp,
2252 .ki_flags = kiocb_src->ki_flags,
2253 .ki_hint = kiocb_src->ki_hint,
2254 .ki_ioprio = kiocb_src->ki_ioprio,
2255 .ki_pos = kiocb_src->ki_pos,
2256 };
2257}
2258
1c0eeaf5 2259/*
250df6ed 2260 * Inode state bits. Protected by inode->i_lock
1c0eeaf5 2261 *
1e9d6333
EB
2262 * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
2263 * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
1c0eeaf5
JE
2264 *
2265 * Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
2266 * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
2267 * various stages of removing an inode.
2268 *
eaff8079 2269 * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
1c0eeaf5 2270 *
e7ca2d41 2271 * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
1e9d6333
EB
2272 * fdatasync() (unless I_DIRTY_DATASYNC is also set).
2273 * Timestamp updates are the usual cause.
2274 * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
000cb48e
JK
2275 * these changes separately from I_DIRTY_SYNC so that we
2276 * don't have to write inode on fdatasync() when only
1e9d6333 2277 * e.g. the timestamps have changed.
1c0eeaf5 2278 * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
1e9d6333
EB
2279 * I_DIRTY_TIME The inode itself only has dirty timestamps, and the
2280 * lazytime mount option is enabled. We keep track of this
2281 * separately from I_DIRTY_SYNC in order to implement
2282 * lazytime. This gets cleared if I_DIRTY_INODE
2283 * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
2284 * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
2285 * i_state, but not both. I_DIRTY_PAGES may still be set.
eaff8079
CH
2286 * I_NEW Serves as both a mutex and completion notification.
2287 * New inodes set I_NEW. If two processes both create
2288 * the same inode, one of them will release its inode and
2289 * wait for I_NEW to be released before returning.
2290 * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
2291 * also cause waiting on I_NEW, without I_NEW actually
2292 * being set. find_inode() uses this to prevent returning
2293 * nearly-dead inodes.
1c0eeaf5
JE
2294 * I_WILL_FREE Must be set when calling write_inode_now() if i_count
2295 * is zero. I_FREEING must be set when I_WILL_FREE is
2296 * cleared.
2297 * I_FREEING Set when inode is about to be freed but still has dirty
2298 * pages or buffers attached or the inode itself is still
2299 * dirty.
dbd5768f
JK
2300 * I_CLEAR Added by clear_inode(). In this state the inode is
2301 * clean and can be destroyed. Inode keeps I_FREEING.
1c0eeaf5
JE
2302 *
2303 * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
2304 * prohibited for many purposes. iget() must wait for
2305 * the inode to be completely released, then create it
2306 * anew. Other functions will just ignore such inodes,
eaff8079 2307 * if appropriate. I_NEW is used for waiting.
1c0eeaf5 2308 *
169ebd90
JK
2309 * I_SYNC Writeback of inode is running. The bit is set during
2310 * data writeback, and cleared with a wakeup on the bit
2311 * address once it is done. The bit is also used to pin
2312 * the inode in memory for flusher thread.
1c0eeaf5 2313 *
bd5fe6c5
CH
2314 * I_REFERENCED Marks the inode as recently references on the LRU list.
2315 *
2316 * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
2317 *
682aa8e1
TH
2318 * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to
2319 * synchronize competing switching instances and to tell
b93b0163 2320 * wb stat updates to grab the i_pages lock. See
a9519def 2321 * inode_switch_wbs_work_fn() for details.
682aa8e1 2322 *
ad0af710
AG
2323 * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
2324 * and work dirs among overlayfs mounts.
2325 *
c2b6d621
AV
2326 * I_CREATING New object's inode in the middle of setting up.
2327 *
dae2f8ed
IW
2328 * I_DONTCACHE Evict inode as soon as it is not used anymore.
2329 *
5afced3b
JK
2330 * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
2331 * Used to detect that mark_inode_dirty() should not move
2332 * inode between dirty lists.
2333 *
08276bda
DH
2334 * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback.
2335 *
1c0eeaf5 2336 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1c0eeaf5 2337 */
9e38d86f
NP
2338#define I_DIRTY_SYNC (1 << 0)
2339#define I_DIRTY_DATASYNC (1 << 1)
2340#define I_DIRTY_PAGES (1 << 2)
eaff8079
CH
2341#define __I_NEW 3
2342#define I_NEW (1 << __I_NEW)
9e38d86f
NP
2343#define I_WILL_FREE (1 << 4)
2344#define I_FREEING (1 << 5)
2345#define I_CLEAR (1 << 6)
eaff8079 2346#define __I_SYNC 7
1c0eeaf5 2347#define I_SYNC (1 << __I_SYNC)
9e38d86f 2348#define I_REFERENCED (1 << 8)
bd5fe6c5 2349#define __I_DIO_WAKEUP 9
ac74d8d6 2350#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP)
f4e0c30c 2351#define I_LINKABLE (1 << 10)
0ae45f63 2352#define I_DIRTY_TIME (1 << 11)
682aa8e1 2353#define I_WB_SWITCH (1 << 13)
c2b6d621
AV
2354#define I_OVL_INUSE (1 << 14)
2355#define I_CREATING (1 << 15)
dae2f8ed 2356#define I_DONTCACHE (1 << 16)
5afced3b 2357#define I_SYNC_QUEUED (1 << 17)
08276bda 2358#define I_PINNING_FSCACHE_WB (1 << 18)
1da177e4 2359
0e11f644
CH
2360#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
2361#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
0ae45f63 2362#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
1da177e4
LT
2363
2364extern void __mark_inode_dirty(struct inode *, int);
2365static inline void mark_inode_dirty(struct inode *inode)
2366{
2367 __mark_inode_dirty(inode, I_DIRTY);
2368}
2369
2370static inline void mark_inode_dirty_sync(struct inode *inode)
2371{
2372 __mark_inode_dirty(inode, I_DIRTY_SYNC);
2373}
2374
ed296c6c
EB
2375/*
2376 * Returns true if the given inode itself only has dirty timestamps (its pages
2377 * may still be dirty) and isn't currently being allocated or freed.
2378 * Filesystems should call this if when writing an inode when lazytime is
2379 * enabled, they want to opportunistically write the timestamps of other inodes
2380 * located very nearby on-disk, e.g. in the same inode block. This returns true
2381 * if the given inode is in need of such an opportunistic update. Requires
2382 * i_lock, or at least later re-checking under i_lock.
2383 */
2384static inline bool inode_is_dirtytime_only(struct inode *inode)
2385{
2386 return (inode->i_state & (I_DIRTY_TIME | I_NEW |
2387 I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
2388}
2389
7ada4db8
MS
2390extern void inc_nlink(struct inode *inode);
2391extern void drop_nlink(struct inode *inode);
2392extern void clear_nlink(struct inode *inode);
2393extern void set_nlink(struct inode *inode, unsigned int nlink);
d8c76e6f
DH
2394
2395static inline void inode_inc_link_count(struct inode *inode)
2396{
2397 inc_nlink(inode);
66d21730
AD
2398 mark_inode_dirty(inode);
2399}
2400
9a53c3a7
DH
2401static inline void inode_dec_link_count(struct inode *inode)
2402{
2403 drop_nlink(inode);
66d21730
AD
2404 mark_inode_dirty(inode);
2405}
2406
c3b2da31
JB
2407enum file_time_flags {
2408 S_ATIME = 1,
2409 S_MTIME = 2,
2410 S_CTIME = 4,
2411 S_VERSION = 8,
2412};
2413
c6718543 2414extern bool atime_needs_update(const struct path *, struct inode *);
badcf2b7 2415extern void touch_atime(const struct path *);
e60feb44
JB
2416int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
2417
1da177e4
LT
2418static inline void file_accessed(struct file *file)
2419{
2420 if (!(file->f_flags & O_NOATIME))
68ac1234 2421 touch_atime(&file->f_path);
1da177e4
LT
2422}
2423
e38f7f53
AG
2424extern int file_modified(struct file *file);
2425
c3765016 2426int sync_inode_metadata(struct inode *inode, int wait);
1da177e4 2427
1da177e4
LT
2428struct file_system_type {
2429 const char *name;
2430 int fs_flags;
bbc1096a
DH
2431#define FS_REQUIRES_DEV 1
2432#define FS_BINARY_MOUNTDATA 2
2433#define FS_HAS_SUBTYPE 4
0c55cfc4 2434#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
0b3b094a 2435#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
a6435940 2436#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
bbc1096a 2437#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
f3a09c92 2438 int (*init_fs_context)(struct fs_context *);
d7167b14 2439 const struct fs_parameter_spec *parameters;
c96e41e9
AV
2440 struct dentry *(*mount) (struct file_system_type *, int,
2441 const char *, void *);
1da177e4
LT
2442 void (*kill_sb) (struct super_block *);
2443 struct module *owner;
2444 struct file_system_type * next;
a5166169 2445 struct hlist_head fs_supers;
d475fd42 2446
cf516249 2447 struct lock_class_key s_lock_key;
897c6ff9 2448 struct lock_class_key s_umount_key;
51ee049e 2449 struct lock_class_key s_vfs_rename_key;
5accdf82 2450 struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
d475fd42
PZ
2451
2452 struct lock_class_key i_lock_key;
2453 struct lock_class_key i_mutex_key;
730633f0 2454 struct lock_class_key invalidate_lock_key;
14358e6d 2455 struct lock_class_key i_mutex_dir_key;
1da177e4
LT
2456};
2457
7f78e035
EB
2458#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
2459
152a0836
AV
2460extern struct dentry *mount_bdev(struct file_system_type *fs_type,
2461 int flags, const char *dev_name, void *data,
2462 int (*fill_super)(struct super_block *, void *, int));
fc14f2fe
AV
2463extern struct dentry *mount_single(struct file_system_type *fs_type,
2464 int flags, void *data,
2465 int (*fill_super)(struct super_block *, void *, int));
3c26ff6e
AV
2466extern struct dentry *mount_nodev(struct file_system_type *fs_type,
2467 int flags, void *data,
2468 int (*fill_super)(struct super_block *, void *, int));
ea441d11 2469extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
1da177e4
LT
2470void generic_shutdown_super(struct super_block *sb);
2471void kill_block_super(struct super_block *sb);
2472void kill_anon_super(struct super_block *sb);
2473void kill_litter_super(struct super_block *sb);
2474void deactivate_super(struct super_block *sb);
74dbbdd7 2475void deactivate_locked_super(struct super_block *sb);
1da177e4 2476int set_anon_super(struct super_block *s, void *data);
cb50b348 2477int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
0ee5dc67
AV
2478int get_anon_bdev(dev_t *);
2479void free_anon_bdev(dev_t);
cb50b348
AV
2480struct super_block *sget_fc(struct fs_context *fc,
2481 int (*test)(struct super_block *, struct fs_context *),
2482 int (*set)(struct super_block *, struct fs_context *));
1da177e4
LT
2483struct super_block *sget(struct file_system_type *type,
2484 int (*test)(struct super_block *,void *),
2485 int (*set)(struct super_block *,void *),
9249e17f 2486 int flags, void *data);
1da177e4
LT
2487
2488/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
2489#define fops_get(fops) \
2490 (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
2491#define fops_put(fops) \
2492 do { if (fops) module_put((fops)->owner); } while(0)
e84f9e57
AV
2493/*
2494 * This one is to be used *ONLY* from ->open() instances.
2495 * fops must be non-NULL, pinned down *and* module dependencies
2496 * should be sufficient to pin the caller down as well.
2497 */
2498#define replace_fops(f, fops) \
2499 do { \
2500 struct file *__file = (f); \
2501 fops_put(__file->f_op); \
2502 BUG_ON(!(__file->f_op = (fops))); \
2503 } while(0)
1da177e4
LT
2504
2505extern int register_filesystem(struct file_system_type *);
2506extern int unregister_filesystem(struct file_system_type *);
d911b458 2507extern struct vfsmount *kern_mount(struct file_system_type *);
423e0ab0 2508extern void kern_unmount(struct vfsmount *mnt);
1da177e4
LT
2509extern int may_umount_tree(struct vfsmount *);
2510extern int may_umount(struct vfsmount *);
5e6123f3
SL
2511extern long do_mount(const char *, const char __user *,
2512 const char *, unsigned long, void *);
ca71cf71 2513extern struct vfsmount *collect_mounts(const struct path *);
8aec0809 2514extern void drop_collected_mounts(struct vfsmount *);
1f707137
AV
2515extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
2516 struct vfsmount *);
f0bb5aaf 2517extern int vfs_statfs(const struct path *, struct kstatfs *);
c8b91acc
AV
2518extern int user_statfs(const char __user *, struct kstatfs *);
2519extern int fd_statfs(int, struct kstatfs *);
18e9e510
JB
2520extern int freeze_super(struct super_block *super);
2521extern int thaw_super(struct super_block *super);
02125a82 2522extern bool our_mnt(struct vfsmount *mnt);
fca39346
JK
2523extern __printf(2, 3)
2524int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
2525extern int super_setup_bdi(struct super_block *sb);
1da177e4 2526
ce3b0f8d
AV
2527extern int current_umask(void);
2528
5a14696c
BF
2529extern void ihold(struct inode * inode);
2530extern void iput(struct inode *);
95582b00 2531extern int generic_update_time(struct inode *, struct timespec64 *, int);
5a14696c 2532
f87fd4c2 2533/* /sys/fs */
00d26666 2534extern struct kobject *fs_kobj;
f87fd4c2 2535
09cbfeaf 2536#define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
bfcd17a6 2537
9e8925b6 2538#ifdef CONFIG_FILE_LOCKING
1da177e4
LT
2539static inline int break_lease(struct inode *inode, unsigned int mode)
2540{
24cbe784
JL
2541 /*
2542 * Since this check is lockless, we must ensure that any refcounts
8116bf4c
JL
2543 * taken are done before checking i_flctx->flc_lease. Otherwise, we
2544 * could end up racing with tasks trying to set a new lease on this
2545 * file.
24cbe784
JL
2546 */
2547 smp_mb();
8634b51f 2548 if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
df4e8d2c 2549 return __break_lease(inode, mode, FL_LEASE);
1da177e4
LT
2550 return 0;
2551}
df4e8d2c
BF
2552
2553static inline int break_deleg(struct inode *inode, unsigned int mode)
2554{
962bd40b
JL
2555 /*
2556 * Since this check is lockless, we must ensure that any refcounts
8116bf4c
JL
2557 * taken are done before checking i_flctx->flc_lease. Otherwise, we
2558 * could end up racing with tasks trying to set a new lease on this
2559 * file.
962bd40b
JL
2560 */
2561 smp_mb();
8634b51f 2562 if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
df4e8d2c 2563 return __break_lease(inode, mode, FL_DELEG);
1da177e4
LT
2564 return 0;
2565}
df4e8d2c 2566
5a14696c
BF
2567static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
2568{
2569 int ret;
2570
2571 ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
2572 if (ret == -EWOULDBLOCK && delegated_inode) {
2573 *delegated_inode = inode;
2574 ihold(inode);
2575 }
2576 return ret;
2577}
2578
2579static inline int break_deleg_wait(struct inode **delegated_inode)
2580{
2581 int ret;
2582
2583 ret = break_deleg(*delegated_inode, O_WRONLY);
2584 iput(*delegated_inode);
2585 *delegated_inode = NULL;
2586 return ret;
2587}
2588
11afe9f7
CH
2589static inline int break_layout(struct inode *inode, bool wait)
2590{
2591 smp_mb();
2592 if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
2593 return __break_lease(inode,
2594 wait ? O_WRONLY : O_WRONLY | O_NONBLOCK,
2595 FL_LAYOUT);
2596 return 0;
2597}
2598
bfcd17a6 2599#else /* !CONFIG_FILE_LOCKING */
af5df566
SW
2600static inline int break_lease(struct inode *inode, unsigned int mode)
2601{
2602 return 0;
2603}
2604
df4e8d2c
BF
2605static inline int break_deleg(struct inode *inode, unsigned int mode)
2606{
2607 return 0;
2608}
5a14696c
BF
2609
2610static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
2611{
2612 return 0;
2613}
2614
2615static inline int break_deleg_wait(struct inode **delegated_inode)
2616{
2617 BUG();
2618 return 0;
2619}
2620
11afe9f7
CH
2621static inline int break_layout(struct inode *inode, bool wait)
2622{
2623 return 0;
2624}
2625
bfcd17a6 2626#endif /* CONFIG_FILE_LOCKING */
1da177e4
LT
2627
2628/* fs/open.c */
adb5c247 2629struct audit_names;
91a27b2a 2630struct filename {
adb5c247
JL
2631 const char *name; /* pointer to actual string */
2632 const __user char *uptr; /* original userland pointer */
55422d0b 2633 int refcnt;
1c949843 2634 struct audit_names *aname;
fd2f7cb5 2635 const char iname[];
91a27b2a 2636};
f1fffbd4 2637static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
1da177e4 2638
a6435940
CB
2639static inline struct user_namespace *file_mnt_user_ns(struct file *file)
2640{
2641 return mnt_user_ns(file->f_path.mnt);
2642}
bb49e9e7
CB
2643
2644/**
2645 * is_idmapped_mnt - check whether a mount is mapped
2646 * @mnt: the mount to check
2647 *
bd303368
CB
2648 * If @mnt has an idmapping attached different from the
2649 * filesystem's idmapping then @mnt is mapped.
bb49e9e7
CB
2650 *
2651 * Return: true if mount is mapped, false if not.
2652 */
2653static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
2654{
bd303368 2655 return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
bb49e9e7
CB
2656}
2657
7df818b2 2658extern long vfs_truncate(const struct path *, loff_t);
643fe55a
CB
2659int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
2660 unsigned int time_attrs, struct file *filp);
72c72bdf 2661extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
3e63cbb1 2662 loff_t len);
8e8a1407 2663extern long do_sys_open(int dfd, const char __user *filename, int flags,
a218d0fd 2664 umode_t mode);
669abf4e 2665extern struct file *file_open_name(struct filename *, int, umode_t);
a218d0fd 2666extern struct file *filp_open(const char *, int, umode_t);
ffb37ca3 2667extern struct file *file_open_root(const struct path *,
378c6520 2668 const char *, int, umode_t);
ffb37ca3
AV
2669static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
2670 const char *name, int flags, umode_t mode)
2671{
2672 return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
2673 name, flags, mode);
2674}
765927b2 2675extern struct file * dentry_open(const struct path *, int, const struct cred *);
2abc77af
AV
2676extern struct file * open_with_fake_path(const struct path *, int,
2677 struct inode*, const struct cred *);
19f391eb
AV
2678static inline struct file *file_clone_open(struct file *file)
2679{
2680 return dentry_open(&file->f_path, file->f_flags, file->f_cred);
2681}
1da177e4 2682extern int filp_close(struct file *, fl_owner_t id);
91a27b2a 2683
51f39a1f 2684extern struct filename *getname_flags(const char __user *, int, int *);
8228e2c3 2685extern struct filename *getname_uflags(const char __user *, int);
91a27b2a 2686extern struct filename *getname(const char __user *);
c4ad8f98 2687extern struct filename *getname_kernel(const char *);
55422d0b 2688extern void putname(struct filename *name);
91a27b2a 2689
30d90494 2690extern int finish_open(struct file *file, struct dentry *dentry,
be12af3e 2691 int (*open)(struct inode *, struct file *));
e45198a6 2692extern int finish_no_open(struct file *file, struct dentry *dentry);
1da177e4
LT
2693
2694/* fs/dcache.c */
2695extern void __init vfs_caches_init_early(void);
4248b0da 2696extern void __init vfs_caches_init(void);
1da177e4 2697
b86c089b
CL
2698extern struct kmem_cache *names_cachep;
2699
a608ca21 2700#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
3446a8aa 2701#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
1da177e4 2702
a212b105 2703extern struct super_block *blockdev_superblock;
a212b105
TH
2704static inline bool sb_is_blkdev_sb(struct super_block *sb)
2705{
dd0dca22 2706 return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
47e4491b 2707}
5c0d6b60 2708
b818f09e 2709void emergency_thaw_all(void);
60b0680f 2710extern int sync_filesystem(struct super_block *);
9361401e 2711extern const struct file_operations def_blk_fops;
4b6f5d20 2712extern const struct file_operations def_chr_fops;
1da177e4
LT
2713
2714/* fs/char_dev.c */
8a932f73 2715#define CHRDEV_MAJOR_MAX 512
49db08c3
LW
2716/* Marks the bottom of the first segment of free char majors */
2717#define CHRDEV_MAJOR_DYN_END 234
a5d31a3f
LG
2718/* Marks the top and bottom of the second segment of free char majors */
2719#define CHRDEV_MAJOR_DYN_EXT_START 511
2720#define CHRDEV_MAJOR_DYN_EXT_END 384
2721
1da177e4
LT
2722extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
2723extern int register_chrdev_region(dev_t, unsigned, const char *);
1905b1bf
TH
2724extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2725 unsigned int count, const char *name,
2726 const struct file_operations *fops);
2727extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2728 unsigned int count, const char *name);
1da177e4 2729extern void unregister_chrdev_region(dev_t, unsigned);
68eef3b4 2730extern void chrdev_show(struct seq_file *,off_t);
1da177e4 2731
1905b1bf
TH
2732static inline int register_chrdev(unsigned int major, const char *name,
2733 const struct file_operations *fops)
2734{
2735 return __register_chrdev(major, 0, 256, name, fops);
2736}
2737
2738static inline void unregister_chrdev(unsigned int major, const char *name)
2739{
2740 __unregister_chrdev(major, 0, 256, name);
2741}
2742
1da177e4
LT
2743extern void init_special_inode(struct inode *, umode_t, dev_t);
2744
2745/* Invalid inode operations -- fs/bad_inode.c */
2746extern void make_bad_inode(struct inode *);
0e3ef1fe 2747extern bool is_bad_inode(struct inode *);
1da177e4 2748
1da177e4
LT
2749unsigned long invalidate_mapping_pages(struct address_space *mapping,
2750 pgoff_t start, pgoff_t end);
54bc4855 2751
eb1d7a65
YS
2752void invalidate_mapping_pagevec(struct address_space *mapping,
2753 pgoff_t start, pgoff_t end,
2754 unsigned long *nr_pagevec);
2755
1da177e4
LT
2756static inline void invalidate_remote_inode(struct inode *inode)
2757{
2758 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2759 S_ISLNK(inode->i_mode))
fc0ecff6 2760 invalidate_mapping_pages(inode->i_mapping, 0, -1);
1da177e4
LT
2761}
2762extern int invalidate_inode_pages2(struct address_space *mapping);
2763extern int invalidate_inode_pages2_range(struct address_space *mapping,
2764 pgoff_t start, pgoff_t end);
2765extern int write_inode_now(struct inode *, int);
2766extern int filemap_fdatawrite(struct address_space *);
2767extern int filemap_flush(struct address_space *);
76341cab 2768extern int filemap_fdatawait_keep_errors(struct address_space *mapping);
d3bccb6f
JK
2769extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2770 loff_t lend);
aa0bfcd9
RZ
2771extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
2772 loff_t start_byte, loff_t end_byte);
ffb959bb
JL
2773
2774static inline int filemap_fdatawait(struct address_space *mapping)
2775{
2776 return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
2777}
2778
7fc9e472
GR
2779extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
2780 loff_t lend);
1da177e4
LT
2781extern int filemap_write_and_wait_range(struct address_space *mapping,
2782 loff_t lstart, loff_t lend);
ebcf28e1
AM
2783extern int __filemap_fdatawrite_range(struct address_space *mapping,
2784 loff_t start, loff_t end, int sync_mode);
f4c0a0fd
JK
2785extern int filemap_fdatawrite_range(struct address_space *mapping,
2786 loff_t start, loff_t end);
d72d9e2a 2787extern int filemap_check_errors(struct address_space *mapping);
5660e13d 2788extern void __filemap_set_wb_err(struct address_space *mapping, int err);
5a798493
JB
2789int filemap_fdatawrite_wbc(struct address_space *mapping,
2790 struct writeback_control *wbc);
a823e458 2791
ddf8f376
IW
2792static inline int filemap_write_and_wait(struct address_space *mapping)
2793{
2794 return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
2795}
2796
a823e458
JL
2797extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
2798 loff_t lend);
5660e13d
JL
2799extern int __must_check file_check_and_advance_wb_err(struct file *file);
2800extern int __must_check file_write_and_wait_range(struct file *file,
2801 loff_t start, loff_t end);
2802
a823e458
JL
2803static inline int file_write_and_wait(struct file *file)
2804{
2805 return file_write_and_wait_range(file, 0, LLONG_MAX);
2806}
2807
5660e13d
JL
2808/**
2809 * filemap_set_wb_err - set a writeback error on an address_space
2810 * @mapping: mapping in which to set writeback error
2811 * @err: error to be set in mapping
2812 *
2813 * When writeback fails in some way, we must record that error so that
2814 * userspace can be informed when fsync and the like are called. We endeavor
2815 * to report errors on any file that was open at the time of the error. Some
2816 * internal callers also need to know when writeback errors have occurred.
2817 *
2818 * When a writeback error occurs, most filesystems will want to call
2819 * filemap_set_wb_err to record the error in the mapping so that it will be
2820 * automatically reported whenever fsync is called on the file.
5660e13d
JL
2821 */
2822static inline void filemap_set_wb_err(struct address_space *mapping, int err)
2823{
2824 /* Fastpath for common case of no error */
2825 if (unlikely(err))
2826 __filemap_set_wb_err(mapping, err);
2827}
2828
2829/**
c9dff084 2830 * filemap_check_wb_err - has an error occurred since the mark was sampled?
5660e13d
JL
2831 * @mapping: mapping to check for writeback errors
2832 * @since: previously-sampled errseq_t
2833 *
2834 * Grab the errseq_t value from the mapping, and see if it has changed "since"
2835 * the given value was sampled.
2836 *
2837 * If it has then report the latest error set, otherwise return 0.
2838 */
2839static inline int filemap_check_wb_err(struct address_space *mapping,
2840 errseq_t since)
2841{
2842 return errseq_check(&mapping->wb_err, since);
2843}
2844
2845/**
2846 * filemap_sample_wb_err - sample the current errseq_t to test for later errors
2847 * @mapping: mapping to be sampled
2848 *
2849 * Writeback errors are always reported relative to a particular sample point
2850 * in the past. This function provides those sample points.
2851 */
2852static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
2853{
2854 return errseq_sample(&mapping->wb_err);
2855}
2856
735e4ae5
JL
2857/**
2858 * file_sample_sb_err - sample the current errseq_t to test for later errors
21b9cb34 2859 * @file: file pointer to be sampled
735e4ae5
JL
2860 *
2861 * Grab the most current superblock-level errseq_t value for the given
2862 * struct file.
2863 */
2864static inline errseq_t file_sample_sb_err(struct file *file)
2865{
2866 return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
2867}
2868
8018ab05
CH
2869extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2870 int datasync);
2871extern int vfs_fsync(struct file *file, int datasync);
e2592217 2872
22f96b38
JA
2873extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2874 unsigned int flags);
2875
e2592217
CH
2876/*
2877 * Sync the bytes written if this was a synchronous write. Expect ki_pos
2878 * to already be updated for the write, and will return either the amount
2879 * of bytes passed in, or an error if syncing the file failed.
2880 */
2881static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
2882{
2883 if (iocb->ki_flags & IOCB_DSYNC) {
2884 int ret = vfs_fsync_range(iocb->ki_filp,
2885 iocb->ki_pos - count, iocb->ki_pos - 1,
2886 (iocb->ki_flags & IOCB_SYNC) ? 0 : 1);
2887 if (ret)
2888 return ret;
2889 }
2890
2891 return count;
d311d79d 2892}
e2592217 2893
1da177e4
LT
2894extern void emergency_sync(void);
2895extern void emergency_remount(void);
30460e1e 2896
9361401e 2897#ifdef CONFIG_BLOCK
30460e1e
CM
2898extern int bmap(struct inode *inode, sector_t *block);
2899#else
2900static inline int bmap(struct inode *inode, sector_t *block)
2901{
2902 return -EINVAL;
2903}
9361401e 2904#endif
30460e1e 2905
2f221d6f
CB
2906int notify_change(struct user_namespace *, struct dentry *,
2907 struct iattr *, struct inode **);
47291baa
CB
2908int inode_permission(struct user_namespace *, struct inode *, int);
2909int generic_permission(struct user_namespace *, struct inode *, int);
02f92b38
CB
2910static inline int file_permission(struct file *file, int mask)
2911{
47291baa
CB
2912 return inode_permission(file_mnt_user_ns(file),
2913 file_inode(file), mask);
02f92b38
CB
2914}
2915static inline int path_permission(const struct path *path, int mask)
2916{
47291baa
CB
2917 return inode_permission(mnt_user_ns(path->mnt),
2918 d_inode(path->dentry), mask);
02f92b38 2919}
ba73d987
CB
2920int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
2921 struct inode *inode);
1da177e4 2922
f696a365
MS
2923static inline bool execute_ok(struct inode *inode)
2924{
2925 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2926}
2927
6e3e2c43
AV
2928static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
2929{
2930 return (inode->i_mode ^ mode) & S_IFMT;
2931}
2932
8d71db4f
AV
2933static inline void file_start_write(struct file *file)
2934{
2935 if (!S_ISREG(file_inode(file)->i_mode))
2936 return;
8a3c84b6 2937 sb_start_write(file_inode(file)->i_sb);
8d71db4f
AV
2938}
2939
5ae98f15
JK
2940static inline bool file_start_write_trylock(struct file *file)
2941{
2942 if (!S_ISREG(file_inode(file)->i_mode))
2943 return true;
8a3c84b6 2944 return sb_start_write_trylock(file_inode(file)->i_sb);
5ae98f15
JK
2945}
2946
8d71db4f
AV
2947static inline void file_end_write(struct file *file)
2948{
2949 if (!S_ISREG(file_inode(file)->i_mode))
2950 return;
2951 __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
2952}
bfe219d3 2953
07b8ce1e 2954/*
592ca09b
DH
2955 * This is used for regular files where some users -- especially the
2956 * currently executed binary in a process, previously handled via
2957 * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
2958 * read-write shared) accesses.
2959 *
07b8ce1e
AV
2960 * get_write_access() gets write permission for a file.
2961 * put_write_access() releases this write permission.
592ca09b
DH
2962 * deny_write_access() denies write access to a file.
2963 * allow_write_access() re-enables write access to a file.
2964 *
2965 * The i_writecount field of an inode can have the following values:
2966 * 0: no write access, no denied write access
2967 * < 0: (-i_writecount) users that denied write access to the file.
2968 * > 0: (i_writecount) users that have write access to the file.
07b8ce1e
AV
2969 *
2970 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
2971 * except for the cases where we don't hold i_writecount yet. Then we need to
2972 * use {get,deny}_write_access() - these functions check the sign and refuse
2973 * to do the change if sign is wrong.
2974 */
2975static inline int get_write_access(struct inode *inode)
2976{
2977 return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
2978}
2979static inline int deny_write_access(struct file *file)
2980{
496ad9aa 2981 struct inode *inode = file_inode(file);
07b8ce1e
AV
2982 return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
2983}
1da177e4
LT
2984static inline void put_write_access(struct inode * inode)
2985{
2986 atomic_dec(&inode->i_writecount);
2987}
2988static inline void allow_write_access(struct file *file)
2989{
2990 if (file)
496ad9aa 2991 atomic_inc(&file_inode(file)->i_writecount);
1da177e4 2992}
f1fe29b4
DH
2993static inline bool inode_is_open_for_write(const struct inode *inode)
2994{
2995 return atomic_read(&inode->i_writecount) > 0;
2996}
2997
387e3746 2998#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
a5c96ebf
MZ
2999static inline void i_readcount_dec(struct inode *inode)
3000{
3001 BUG_ON(!atomic_read(&inode->i_readcount));
3002 atomic_dec(&inode->i_readcount);
3003}
3004static inline void i_readcount_inc(struct inode *inode)
3005{
3006 atomic_inc(&inode->i_readcount);
3007}
3008#else
3009static inline void i_readcount_dec(struct inode *inode)
3010{
3011 return;
3012}
3013static inline void i_readcount_inc(struct inode *inode)
3014{
3015 return;
3016}
3017#endif
ed8cae8b 3018extern int do_pipe_flags(int *, int);
1da177e4 3019
bdd1d2d3 3020extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
61a707c5 3021ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
e13ec939 3022extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
73e18f7c 3023extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
1da177e4
LT
3024extern struct file * open_exec(const char *);
3025
3026/* fs/dcache.c -- generic fs support functions */
a6e5787f 3027extern bool is_subdir(struct dentry *, struct dentry *);
640eb7e7 3028extern bool path_is_under(const struct path *, const struct path *);
1da177e4 3029
9bf39ab2
MS
3030extern char *file_path(struct file *, char *, int);
3031
1da177e4
LT
3032#include <linux/err.h>
3033
3034/* needed for stackable file system support */
965c8e59 3035extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
1da177e4 3036
965c8e59 3037extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
1da177e4 3038
54e34621 3039extern int inode_init_always(struct super_block *, struct inode *);
1da177e4 3040extern void inode_init_once(struct inode *);
2aa15890 3041extern void address_space_init_once(struct address_space *mapping);
1da177e4
LT
3042extern struct inode * igrab(struct inode *);
3043extern ino_t iunique(struct super_block *, ino_t);
3044extern int inode_needs_sync(struct inode *inode);
45321ac5 3045extern int generic_delete_inode(struct inode *inode);
9bcb4b73
AV
3046static inline int generic_drop_inode(struct inode *inode)
3047{
88149082 3048 return !inode->i_nlink || inode_unhashed(inode);
9bcb4b73 3049}
2c567af4 3050extern void d_mark_dontcache(struct inode *inode);
1da177e4 3051
88bd5121
AA
3052extern struct inode *ilookup5_nowait(struct super_block *sb,
3053 unsigned long hashval, int (*test)(struct inode *, void *),
3054 void *data);
1da177e4
LT
3055extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
3056 int (*test)(struct inode *, void *), void *data);
3057extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
3058
80ea09a0
MS
3059extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
3060 int (*test)(struct inode *, void *),
3061 int (*set)(struct inode *, void *),
3062 void *data);
1da177e4
LT
3063extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
3064extern struct inode * iget_locked(struct super_block *, unsigned long);
fe032c42
TT
3065extern struct inode *find_inode_nowait(struct super_block *,
3066 unsigned long,
3067 int (*match)(struct inode *,
3068 unsigned long, void *),
3069 void *data);
3f19b2ab
DH
3070extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
3071 int (*)(struct inode *, void *), void *);
3072extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
261bca86
AV
3073extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
3074extern int insert_inode_locked(struct inode *);
e096d0c7
JB
3075#ifdef CONFIG_DEBUG_LOCK_ALLOC
3076extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
3077#else
3078static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
3079#endif
1da177e4 3080extern void unlock_new_inode(struct inode *);
c2b6d621 3081extern void discard_new_inode(struct inode *);
85fe4025 3082extern unsigned int get_next_ino(void);
799ea9e9 3083extern void evict_inodes(struct super_block *sb);
3e9d80a8 3084void dump_mapping(const struct address_space *);
1da177e4 3085
e809d5f0
CD
3086/*
3087 * Userspace may rely on the the inode number being non-zero. For example, glibc
3088 * simply ignores files with zero i_ino in unlink() and other places.
3089 *
3090 * As an additional complication, if userspace was compiled with
3091 * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
3092 * lower 32 bits, so we need to check that those aren't zero explicitly. With
3093 * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
3094 * better safe than sorry.
3095 */
3096static inline bool is_zero_ino(ino_t ino)
3097{
3098 return (u32)ino == 0;
3099}
3100
1da177e4 3101extern void __iget(struct inode * inode);
b46980fe 3102extern void iget_failed(struct inode *);
dbd5768f 3103extern void clear_inode(struct inode *);
2e00c97e 3104extern void __destroy_inode(struct inode *);
a209dfc7
ED
3105extern struct inode *new_inode_pseudo(struct super_block *sb);
3106extern struct inode *new_inode(struct super_block *sb);
ff0c7d15 3107extern void free_inode_nonrcu(struct inode *inode);
01de85e0 3108extern int should_remove_suid(struct dentry *);
5fa8e0a1 3109extern int file_remove_privs(struct file *);
1da177e4
LT
3110
3111extern void __insert_inode_hash(struct inode *, unsigned long hashval);
646ec461
CH
3112static inline void insert_inode_hash(struct inode *inode)
3113{
1da177e4
LT
3114 __insert_inode_hash(inode, inode->i_ino);
3115}
f2ee7abf
ED
3116
3117extern void __remove_inode_hash(struct inode *);
3118static inline void remove_inode_hash(struct inode *inode)
3119{
cbedaac6 3120 if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
f2ee7abf
ED
3121 __remove_inode_hash(inode);
3122}
3123
646ec461 3124extern void inode_sb_list_add(struct inode *inode);
51b8c1fe 3125extern void inode_add_lru(struct inode *inode);
1da177e4 3126
1da177e4
LT
3127extern int sb_set_blocksize(struct super_block *, int);
3128extern int sb_min_blocksize(struct super_block *, int);
3129
3130extern int generic_file_mmap(struct file *, struct vm_area_struct *);
3131extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
3309dd04 3132extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
02e83f46
DW
3133extern int generic_write_check_limits(struct file *file, loff_t pos,
3134 loff_t *count);
a3171351 3135extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
87fa0f3e
CH
3136ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
3137 ssize_t already_read);
ed978a81 3138extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
8174202b 3139extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
8174202b 3140extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
1af5bb49 3141extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
3b93f911 3142extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
88e6faef 3143
18e9710e 3144ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2 3145 rwf_t flags);
abbb6589 3146ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
ddef7ed2 3147 rwf_t flags);
5dcdc43e
JX
3148ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
3149 struct iov_iter *iter);
3150ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
3151 struct iov_iter *iter);
dbe4e192 3152
88e6faef 3153/* fs/splice.c */
cbb7e577 3154extern ssize_t generic_file_splice_read(struct file *, loff_t *,
88e6faef 3155 struct pipe_inode_info *, size_t, unsigned int);
8d020765 3156extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
cbb7e577 3157 struct file *, loff_t *, size_t, unsigned int);
88e6faef 3158extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
cbb7e577 3159 struct file *out, loff_t *, size_t len, unsigned int flags);
1c118596
MS
3160extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
3161 loff_t *opos, size_t len, unsigned int flags);
3162
88e6faef 3163
1da177e4
LT
3164extern void
3165file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
965c8e59
AM
3166extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
3167extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
46a1c2c7 3168extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
965c8e59 3169extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
5760495a 3170extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
965c8e59 3171 int whence, loff_t maxsize, loff_t eof);
1bf9d14d
AV
3172extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
3173 int whence, loff_t size);
b25472f9
AV
3174extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
3175extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
1da177e4
LT
3176extern int generic_file_open(struct inode * inode, struct file * filp);
3177extern int nonseekable_open(struct inode * inode, struct file * filp);
10dce8af 3178extern int stream_open(struct inode * inode, struct file * filp);
1da177e4 3179
9361401e 3180#ifdef CONFIG_BLOCK
8a4c1e42 3181typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
facd07b0 3182 loff_t file_offset);
1da177e4
LT
3183
3184enum {
1e431f5c
CH
3185 /* need locking between buffered and direct access */
3186 DIO_LOCKING = 0x01,
3187
3188 /* filesystem does not support filling holes */
3189 DIO_SKIP_HOLES = 0x02,
1da177e4
LT
3190};
3191
17f8c842
OS
3192ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
3193 struct block_device *bdev, struct iov_iter *iter,
c8b8e32d 3194 get_block_t get_block,
17f8c842
OS
3195 dio_iodone_t end_io, dio_submit_t submit_io,
3196 int flags);
7bb46a67 3197
17f8c842
OS
3198static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
3199 struct inode *inode,
c8b8e32d 3200 struct iov_iter *iter,
17f8c842 3201 get_block_t get_block)
1da177e4 3202{
17f8c842 3203 return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
c8b8e32d 3204 get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
1da177e4 3205}
9361401e 3206#endif
1da177e4 3207
1d59d61f 3208void inode_dio_wait(struct inode *inode);
fe0f07d0 3209
39015399 3210/**
fe0f07d0
JA
3211 * inode_dio_begin - signal start of a direct I/O requests
3212 * @inode: inode the direct I/O happens on
3213 *
3214 * This is called once we've finished processing a direct I/O request,
3215 * and is used to wake up callers waiting for direct I/O to be quiesced.
3216 */
3217static inline void inode_dio_begin(struct inode *inode)
3218{
3219 atomic_inc(&inode->i_dio_count);
3220}
3221
39015399 3222/**
fe0f07d0
JA
3223 * inode_dio_end - signal finish of a direct I/O requests
3224 * @inode: inode the direct I/O happens on
3225 *
3226 * This is called once we've finished processing a direct I/O request,
3227 * and is used to wake up callers waiting for direct I/O to be quiesced.
3228 */
3229static inline void inode_dio_end(struct inode *inode)
3230{
3231 if (atomic_dec_and_test(&inode->i_dio_count))
3232 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
3233}
1d59d61f 3234
a92853b6
KK
3235/*
3236 * Warn about a page cache invalidation failure diring a direct I/O write.
3237 */
3238void dio_warn_stale_pagecache(struct file *filp);
3239
5f16f322
TT
3240extern void inode_set_flags(struct inode *inode, unsigned int flags,
3241 unsigned int mask);
3242
4b6f5d20 3243extern const struct file_operations generic_ro_fops;
1da177e4
LT
3244
3245#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
3246
5d826c84 3247extern int readlink_copy(char __user *, int, const char *);
1da177e4 3248extern int page_readlink(struct dentry *, char __user *, int);
fceef393
AV
3249extern const char *page_get_link(struct dentry *, struct inode *,
3250 struct delayed_call *);
3251extern void page_put_link(void *);
0adb25d2 3252extern int __page_symlink(struct inode *inode, const char *symname, int len,
54566b2c 3253 int nofs);
1da177e4 3254extern int page_symlink(struct inode *inode, const char *symname, int len);
c5ef1c42 3255extern const struct inode_operations page_symlink_inode_operations;
fceef393 3256extern void kfree_link(void *);
0d56a451 3257void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *);
4f911138 3258void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
a528d35e
DH
3259extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
3260extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
b462707e 3261void __inode_add_bytes(struct inode *inode, loff_t bytes);
1da177e4 3262void inode_add_bytes(struct inode *inode, loff_t bytes);
1c8924eb 3263void __inode_sub_bytes(struct inode *inode, loff_t bytes);
1da177e4 3264void inode_sub_bytes(struct inode *inode, loff_t bytes);
f4a8116a
JK
3265static inline loff_t __inode_get_bytes(struct inode *inode)
3266{
3267 return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
3268}
1da177e4
LT
3269loff_t inode_get_bytes(struct inode *inode);
3270void inode_set_bytes(struct inode *inode, loff_t bytes);
fceef393
AV
3271const char *simple_get_link(struct dentry *, struct inode *,
3272 struct delayed_call *);
61ba64fc 3273extern const struct inode_operations simple_symlink_inode_operations;
1da177e4 3274
5c0ba4e0 3275extern int iterate_dir(struct file *, struct dir_context *);
1da177e4 3276
09f1bde4
CH
3277int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
3278 int flags);
da9aa5d9 3279int vfs_fstat(int fd, struct kstat *stat);
a528d35e
DH
3280
3281static inline int vfs_stat(const char __user *filename, struct kstat *stat)
3282{
0b2c6693 3283 return vfs_fstatat(AT_FDCWD, filename, stat, 0);
a528d35e
DH
3284}
3285static inline int vfs_lstat(const char __user *name, struct kstat *stat)
3286{
0b2c6693 3287 return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
a528d35e 3288}
a528d35e 3289
d60874cd 3290extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
fd4a0edf 3291extern int vfs_readlink(struct dentry *, char __user *, int);
1da177e4 3292
ee416bcd 3293extern struct file_system_type *get_filesystem(struct file_system_type *fs);
c18479fe 3294extern void put_filesystem(struct file_system_type *fs);
1da177e4
LT
3295extern struct file_system_type *get_fs_type(const char *name);
3296extern struct super_block *get_super(struct block_device *);
4504230a 3297extern struct super_block *get_active_super(struct block_device *bdev);
1da177e4 3298extern void drop_super(struct super_block *sb);
ba6379f7 3299extern void drop_super_exclusive(struct super_block *sb);
01a05b33 3300extern void iterate_supers(void (*)(struct super_block *, void *), void *);
43e15cdb
AV
3301extern void iterate_supers_type(struct file_system_type *,
3302 void (*)(struct super_block *, void *), void *);
1da177e4
LT
3303
3304extern int dcache_dir_open(struct inode *, struct file *);
3305extern int dcache_dir_close(struct inode *, struct file *);
3306extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
5f99f4e7 3307extern int dcache_readdir(struct file *, struct dir_context *);
549c7297
CB
3308extern int simple_setattr(struct user_namespace *, struct dentry *,
3309 struct iattr *);
3310extern int simple_getattr(struct user_namespace *, const struct path *,
3311 struct kstat *, u32, unsigned int);
726c3342 3312extern int simple_statfs(struct dentry *, struct kstatfs *);
20955e89 3313extern int simple_open(struct inode *inode, struct file *file);
1da177e4
LT
3314extern int simple_link(struct dentry *, struct inode *, struct dentry *);
3315extern int simple_unlink(struct inode *, struct dentry *);
3316extern int simple_rmdir(struct inode *, struct dentry *);
6429e463
LB
3317extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
3318 struct inode *new_dir, struct dentry *new_dentry);
549c7297
CB
3319extern int simple_rename(struct user_namespace *, struct inode *,
3320 struct dentry *, struct inode *, struct dentry *,
3321 unsigned int);
a3d1e7eb
AV
3322extern void simple_recursive_removal(struct dentry *,
3323 void (*callback)(struct dentry *));
02c24a82 3324extern int noop_fsync(struct file *, loff_t, loff_t, int);
f44c7763
DW
3325extern void noop_invalidatepage(struct page *page, unsigned int offset,
3326 unsigned int length);
3327extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
1da177e4 3328extern int simple_empty(struct dentry *);
afddba49
NP
3329extern int simple_write_begin(struct file *file, struct address_space *mapping,
3330 loff_t pos, unsigned len, unsigned flags,
3331 struct page **pagep, void **fsdata);
c1e3dbe9 3332extern const struct address_space_operations ram_aops;
b26d4cd3 3333extern int always_delete_dentry(const struct dentry *);
6987843f 3334extern struct inode *alloc_anon_inode(struct super_block *);
e6f5c789 3335extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
b26d4cd3 3336extern const struct dentry_operations simple_dentry_operations;
1da177e4 3337
00cd8dd3 3338extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
1da177e4 3339extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
4b6f5d20 3340extern const struct file_operations simple_dir_operations;
c5ef1c42 3341extern const struct inode_operations simple_dir_inode_operations;
fbabfd0f
EB
3342extern void make_empty_dir_inode(struct inode *inode);
3343extern bool is_empty_dir_inode(struct inode *inode);
cda37124 3344struct tree_descr { const char *name; const struct file_operations *ops; int mode; };
1da177e4 3345struct dentry *d_alloc_name(struct dentry *, const char *);
cda37124
EB
3346extern int simple_fill_super(struct super_block *, unsigned long,
3347 const struct tree_descr *);
1f5ce9e9 3348extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
1da177e4
LT
3349extern void simple_release_fs(struct vfsmount **mount, int *count);
3350
93b07113
AM
3351extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
3352 loff_t *ppos, const void *from, size_t available);
6a727b43
JS
3353extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
3354 const void __user *from, size_t count);
1da177e4 3355
ac13a829 3356extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
02c24a82 3357extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
d5aacad5 3358
30ca22c7
PL
3359extern int generic_check_addressable(unsigned, u64);
3360
608af703 3361extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
c843843e 3362
e965f963 3363#ifdef CONFIG_MIGRATION
2d1db3b1 3364extern int buffer_migrate_page(struct address_space *,
a6bc32b8
MG
3365 struct page *, struct page *,
3366 enum migrate_mode);
89cb0888
JK
3367extern int buffer_migrate_page_norefs(struct address_space *,
3368 struct page *, struct page *,
3369 enum migrate_mode);
e965f963
CL
3370#else
3371#define buffer_migrate_page NULL
89cb0888 3372#define buffer_migrate_page_norefs NULL
e965f963
CL
3373#endif
3374
7bb698f0
AG
3375int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
3376 unsigned int ia_valid);
2f221d6f 3377int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
25d9e2d1 3378extern int inode_newsize_ok(const struct inode *, loff_t offset);
2f221d6f
CB
3379void setattr_copy(struct user_namespace *, struct inode *inode,
3380 const struct iattr *attr);
1da177e4 3381
c3b2da31 3382extern int file_update_time(struct file *file);
1da177e4 3383
f05a3849 3384static inline bool vma_is_dax(const struct vm_area_struct *vma)
baabda26
DW
3385{
3386 return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
3387}
3388
2bb6d283
DW
3389static inline bool vma_is_fsdax(struct vm_area_struct *vma)
3390{
3391 struct inode *inode;
3392
52650c8b 3393 if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
2bb6d283
DW
3394 return false;
3395 if (!vma_is_dax(vma))
3396 return false;
3397 inode = file_inode(vma->vm_file);
230f5a89 3398 if (S_ISCHR(inode->i_mode))
2bb6d283
DW
3399 return false; /* device-dax */
3400 return true;
3401}
3402
2ba48ce5
AV
3403static inline int iocb_flags(struct file *file)
3404{
3405 int res = 0;
3406 if (file->f_flags & O_APPEND)
3407 res |= IOCB_APPEND;
efbe3c24 3408 if (file->f_flags & O_DIRECT)
2ba48ce5 3409 res |= IOCB_DIRECT;
dde0c2e7
CH
3410 if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
3411 res |= IOCB_DSYNC;
3412 if (file->f_flags & __O_SYNC)
3413 res |= IOCB_SYNC;
2ba48ce5
AV
3414 return res;
3415}
3416
ddef7ed2 3417static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
fdd2f5b7 3418{
1752f0ad
PB
3419 int kiocb_flags = 0;
3420
ce71bfea
JA
3421 /* make sure there's no overlap between RWF and private IOCB flags */
3422 BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);
3423
1752f0ad
PB
3424 if (!flags)
3425 return 0;
fdd2f5b7
GR
3426 if (unlikely(flags & ~RWF_SUPPORTED))
3427 return -EOPNOTSUPP;
3428
b745fafa 3429 if (flags & RWF_NOWAIT) {
91f9943e 3430 if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
b745fafa 3431 return -EOPNOTSUPP;
ce71bfea 3432 kiocb_flags |= IOCB_NOIO;
b745fafa 3433 }
ce71bfea 3434 kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
fdd2f5b7 3435 if (flags & RWF_SYNC)
ce71bfea 3436 kiocb_flags |= IOCB_DSYNC;
1752f0ad
PB
3437
3438 ki->ki_flags |= kiocb_flags;
fdd2f5b7
GR
3439 return 0;
3440}
3441
1da177e4
LT
3442static inline ino_t parent_ino(struct dentry *dentry)
3443{
3444 ino_t res;
3445
b5c84bf6
NP
3446 /*
3447 * Don't strictly need d_lock here? If the parent ino could change
3448 * then surely we'd have a deeper race in the caller?
3449 */
1da177e4
LT
3450 spin_lock(&dentry->d_lock);
3451 res = dentry->d_parent->d_inode->i_ino;
3452 spin_unlock(&dentry->d_lock);
3453 return res;
3454}
3455
1da177e4
LT
3456/* Transaction based IO helpers */
3457
3458/*
3459 * An argresp is stored in an allocated page and holds the
3460 * size of the argument or response, along with its content
3461 */
3462struct simple_transaction_argresp {
3463 ssize_t size;
5e01fdff 3464 char data[];
1da177e4
LT
3465};
3466
3467#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
3468
3469char *simple_transaction_get(struct file *file, const char __user *buf,
3470 size_t size);
3471ssize_t simple_transaction_read(struct file *file, char __user *buf,
3472 size_t size, loff_t *pos);
3473int simple_transaction_release(struct inode *inode, struct file *file);
3474
76791ab2 3475void simple_transaction_set(struct file *file, size_t n);
1da177e4 3476
acaefc25
AB
3477/*
3478 * simple attribute files
3479 *
3480 * These attributes behave similar to those in sysfs:
3481 *
3482 * Writing to an attribute immediately sets a value, an open file can be
3483 * written to multiple times.
3484 *
3485 * Reading from an attribute creates a buffer from the value that might get
3486 * read with multiple read calls. When the attribute has been read
3487 * completely, no further read calls are possible until the file is opened
3488 * again.
3489 *
3490 * All attributes contain a text representation of a numeric value
3491 * that are accessed with the get() and set() functions.
3492 */
3493#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
3494static int __fops ## _open(struct inode *inode, struct file *file) \
3495{ \
3496 __simple_attr_check_format(__fmt, 0ull); \
3497 return simple_attr_open(inode, file, __get, __set, __fmt); \
3498} \
828c0950 3499static const struct file_operations __fops = { \
acaefc25
AB
3500 .owner = THIS_MODULE, \
3501 .open = __fops ## _open, \
74bedc4d 3502 .release = simple_attr_release, \
acaefc25
AB
3503 .read = simple_attr_read, \
3504 .write = simple_attr_write, \
1ec5584e 3505 .llseek = generic_file_llseek, \
68be3029 3506}
acaefc25 3507
b9075fa9
JP
3508static inline __printf(1, 2)
3509void __simple_attr_check_format(const char *fmt, ...)
acaefc25
AB
3510{
3511 /* don't do anything, just let the compiler check the arguments; */
3512}
3513
3514int simple_attr_open(struct inode *inode, struct file *file,
8b88b099 3515 int (*get)(void *, u64 *), int (*set)(void *, u64),
acaefc25 3516 const char *fmt);
74bedc4d 3517int simple_attr_release(struct inode *inode, struct file *file);
acaefc25
AB
3518ssize_t simple_attr_read(struct file *file, char __user *buf,
3519 size_t len, loff_t *ppos);
3520ssize_t simple_attr_write(struct file *file, const char __user *buf,
3521 size_t len, loff_t *ppos);
3522
4be28540 3523struct ctl_table;
6e7c1770 3524int __init list_bdev_fs_names(char *buf, size_t size);
62239ac2 3525
3cd90ea4 3526#define __FMODE_EXEC ((__force int) FMODE_EXEC)
1a44bc8c
NK
3527#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
3528
6d125529 3529#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
ecf081d1 3530#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
1a44bc8c 3531 (flag & __FMODE_NONOTIFY)))
5300990c 3532
d37177ba 3533static inline bool is_sxid(umode_t mode)
69b45732
AK
3534{
3535 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
3536}
3537
ba73d987
CB
3538static inline int check_sticky(struct user_namespace *mnt_userns,
3539 struct inode *dir, struct inode *inode)
cbdf35bc
MS
3540{
3541 if (!(dir->i_mode & S_ISVTX))
3542 return 0;
3543
ba73d987 3544 return __check_sticky(mnt_userns, dir, inode);
cbdf35bc
MS
3545}
3546
69b45732
AK
3547static inline void inode_has_no_xattr(struct inode *inode)
3548{
e462ec50 3549 if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC))
69b45732
AK
3550 inode->i_flags |= S_NOSEC;
3551}
3552
a7400222
AV
3553static inline bool is_root_inode(struct inode *inode)
3554{
3555 return inode == inode->i_sb->s_root->d_inode;
3556}
3557
5f99f4e7
AV
3558static inline bool dir_emit(struct dir_context *ctx,
3559 const char *name, int namelen,
3560 u64 ino, unsigned type)
3561{
3562 return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
3563}
3564static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
3565{
3566 return ctx->actor(ctx, ".", 1, ctx->pos,
3567 file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
3568}
3569static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
3570{
3571 return ctx->actor(ctx, "..", 2, ctx->pos,
3572 parent_ino(file->f_path.dentry), DT_DIR) == 0;
3573}
3574static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
3575{
3576 if (ctx->pos == 0) {
3577 if (!dir_emit_dot(file, ctx))
3578 return false;
3579 ctx->pos = 1;
3580 }
3581 if (ctx->pos == 1) {
3582 if (!dir_emit_dotdot(file, ctx))
3583 return false;
3584 ctx->pos = 2;
3585 }
3586 return true;
3587}
5ded75ec
AV
3588static inline bool dir_relax(struct inode *inode)
3589{
5955102c
AV
3590 inode_unlock(inode);
3591 inode_lock(inode);
5ded75ec
AV
3592 return !IS_DEADDIR(inode);
3593}
5f99f4e7 3594
ae05327a
AV
3595static inline bool dir_relax_shared(struct inode *inode)
3596{
3597 inode_unlock_shared(inode);
3598 inode_lock_shared(inode);
3599 return !IS_DEADDIR(inode);
3600}
3601
90f8572b 3602extern bool path_noexec(const struct path *path);
21fc61c7 3603extern void inode_nohighmem(struct inode *inode);
90f8572b 3604
45cd0faa
AG
3605/* mm/fadvise.c */
3606extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
3607 int advice);
cf1ea059
JK
3608extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
3609 int advice);
45cd0faa 3610
dc617f29
DW
3611/*
3612 * Flush file data before changing attributes. Caller must hold any locks
3613 * required to prevent further writes to this file until we're done setting
3614 * flags.
3615 */
3616static inline int inode_drain_writes(struct inode *inode)
3617{
3618 inode_dio_wait(inode);
3619 return filemap_write_and_wait(inode->i_mapping);
3620}
3621
1da177e4 3622#endif /* _LINUX_FS_H */