include/linux/pagemap.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _LINUX_PAGEMAP_H
   3 #define _LINUX_PAGEMAP_H
   4
   5 /*
   6  * Copyright 1995 Linus Torvalds
   7  */
   8 #include <linux/mm.h>
   9 #include <linux/fs.h>
  10 #include <linux/list.h>
  11 #include <linux/highmem.h>
  12 #include <linux/compiler.h>
  13 #include <linux/uaccess.h>
  14 #include <linux/gfp.h>
  15 #include <linux/bitops.h>
  16 #include <linux/hardirq.h> /* for in_interrupt() */
  17 #include <linux/hugetlb_inline.h>
  18
  19 struct folio_batch;
  20
  21 unsigned long invalidate_mapping_pages(struct address_space *mapping,
  22                                         pgoff_t start, pgoff_t end);
  23
  24 static inline void invalidate_remote_inode(struct inode *inode)
  25 {
  26         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  27             S_ISLNK(inode->i_mode))
  28                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
  29 }
  30 int invalidate_inode_pages2(struct address_space *mapping);
  31 int invalidate_inode_pages2_range(struct address_space *mapping,
  32                 pgoff_t start, pgoff_t end);
  33 int write_inode_now(struct inode *, int sync);
  34 int filemap_fdatawrite(struct address_space *);
  35 int filemap_flush(struct address_space *);
  36 int filemap_fdatawait_keep_errors(struct address_space *mapping);
  37 int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
  38 int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
  39                 loff_t start_byte, loff_t end_byte);
  40
  41 static inline int filemap_fdatawait(struct address_space *mapping)
  42 {
  43         return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
  44 }
  45
  46 bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
  47 int filemap_write_and_wait_range(struct address_space *mapping,
  48                 loff_t lstart, loff_t lend);
  49 int __filemap_fdatawrite_range(struct address_space *mapping,
  50                 loff_t start, loff_t end, int sync_mode);
  51 int filemap_fdatawrite_range(struct address_space *mapping,
  52                 loff_t start, loff_t end);
  53 int filemap_check_errors(struct address_space *mapping);
  54 void __filemap_set_wb_err(struct address_space *mapping, int err);
  55 int filemap_fdatawrite_wbc(struct address_space *mapping,
  56                            struct writeback_control *wbc);
  57
  58 static inline int filemap_write_and_wait(struct address_space *mapping)
  59 {
  60         return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
  61 }
  62
  63 /**
  64  * filemap_set_wb_err - set a writeback error on an address_space
  65  * @mapping: mapping in which to set writeback error
  66  * @err: error to be set in mapping
  67  *
  68  * When writeback fails in some way, we must record that error so that
  69  * userspace can be informed when fsync and the like are called.  We endeavor
  70  * to report errors on any file that was open at the time of the error.  Some
  71  * internal callers also need to know when writeback errors have occurred.
  72  *
  73  * When a writeback error occurs, most filesystems will want to call
  74  * filemap_set_wb_err to record the error in the mapping so that it will be
  75  * automatically reported whenever fsync is called on the file.
  76  */
  77 static inline void filemap_set_wb_err(struct address_space *mapping, int err)
  78 {
  79         /* Fastpath for common case of no error */
  80         if (unlikely(err))
  81                 __filemap_set_wb_err(mapping, err);
  82 }
  83
  84 /**
  85  * filemap_check_wb_err - has an error occurred since the mark was sampled?
  86  * @mapping: mapping to check for writeback errors
  87  * @since: previously-sampled errseq_t
  88  *
  89  * Grab the errseq_t value from the mapping, and see if it has changed "since"
  90  * the given value was sampled.
  91  *
  92  * If it has then report the latest error set, otherwise return 0.
  93  */
  94 static inline int filemap_check_wb_err(struct address_space *mapping,
  95                                         errseq_t since)
  96 {
  97         return errseq_check(&mapping->wb_err, since);
  98 }
  99
 100 /**
 101  * filemap_sample_wb_err - sample the current errseq_t to test for later errors
 102  * @mapping: mapping to be sampled
 103  *
 104  * Writeback errors are always reported relative to a particular sample point
 105  * in the past. This function provides those sample points.
 106  */
 107 static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 108 {
 109         return errseq_sample(&mapping->wb_err);
 110 }
 111
 112 /**
 113  * file_sample_sb_err - sample the current errseq_t to test for later errors
 114  * @file: file pointer to be sampled
 115  *
 116  * Grab the most current superblock-level errseq_t value for the given
 117  * struct file.
 118  */
 119 static inline errseq_t file_sample_sb_err(struct file *file)
 120 {
 121         return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
 122 }
 123
 124 /*
 125  * Flush file data before changing attributes.  Caller must hold any locks
 126  * required to prevent further writes to this file until we're done setting
 127  * flags.
 128  */
 129 static inline int inode_drain_writes(struct inode *inode)
 130 {
 131         inode_dio_wait(inode);
 132         return filemap_write_and_wait(inode->i_mapping);
 133 }
 134
 135 static inline bool mapping_empty(struct address_space *mapping)
 136 {
 137         return xa_empty(&mapping->i_pages);
 138 }
 139
 140 /*
 141  * mapping_shrinkable - test if page cache state allows inode reclaim
 142  * @mapping: the page cache mapping
 143  *
 144  * This checks the mapping's cache state for the pupose of inode
 145  * reclaim and LRU management.
 146  *
 147  * The caller is expected to hold the i_lock, but is not required to
 148  * hold the i_pages lock, which usually protects cache state. That's
 149  * because the i_lock and the list_lru lock that protect the inode and
 150  * its LRU state don't nest inside the irq-safe i_pages lock.
 151  *
 152  * Cache deletions are performed under the i_lock, which ensures that
 153  * when an inode goes empty, it will reliably get queued on the LRU.
 154  *
 155  * Cache additions do not acquire the i_lock and may race with this
 156  * check, in which case we'll report the inode as shrinkable when it
 157  * has cache pages. This is okay: the shrinker also checks the
 158  * refcount and the referenced bit, which will be elevated or set in
 159  * the process of adding new cache pages to an inode.
 160  */
 161 static inline bool mapping_shrinkable(struct address_space *mapping)
 162 {
 163         void *head;
 164
 165         /*
 166          * On highmem systems, there could be lowmem pressure from the
 167          * inodes before there is highmem pressure from the page
 168          * cache. Make inodes shrinkable regardless of cache state.
 169          */
 170         if (IS_ENABLED(CONFIG_HIGHMEM))
 171                 return true;
 172
 173         /* Cache completely empty? Shrink away. */
 174         head = rcu_access_pointer(mapping->i_pages.xa_head);
 175         if (!head)
 176                 return true;
 177
 178         /*
 179          * The xarray stores single offset-0 entries directly in the
 180          * head pointer, which allows non-resident page cache entries
 181          * to escape the shadow shrinker's list of xarray nodes. The
 182          * inode shrinker needs to pick them up under memory pressure.
 183          */
 184         if (!xa_is_node(head) && xa_is_value(head))
 185                 return true;
 186
 187         return false;
 188 }
 189
 190 /*
 191  * Bits in mapping->flags.
 192  */
 193 enum mapping_flags {
 194         AS_EIO          = 0,    /* IO error on async write */
 195         AS_ENOSPC       = 1,    /* ENOSPC on async write */
 196         AS_MM_ALL_LOCKS = 2,    /* under mm_take_all_locks() */
 197         AS_UNEVICTABLE  = 3,    /* e.g., ramdisk, SHM_LOCK */
 198         AS_EXITING      = 4,    /* final truncate in progress */
 199         /* writeback related tags are not used */
 200         AS_NO_WRITEBACK_TAGS = 5,
 201         AS_LARGE_FOLIO_SUPPORT = 6,
 202 };
 203
 204 /**
 205  * mapping_set_error - record a writeback error in the address_space
 206  * @mapping: the mapping in which an error should be set
 207  * @error: the error to set in the mapping
 208  *
 209  * When writeback fails in some way, we must record that error so that
 210  * userspace can be informed when fsync and the like are called.  We endeavor
 211  * to report errors on any file that was open at the time of the error.  Some
 212  * internal callers also need to know when writeback errors have occurred.
 213  *
 214  * When a writeback error occurs, most filesystems will want to call
 215  * mapping_set_error to record the error in the mapping so that it can be
 216  * reported when the application calls fsync(2).
 217  */
 218 static inline void mapping_set_error(struct address_space *mapping, int error)
 219 {
 220         if (likely(!error))
 221                 return;
 222
 223         /* Record in wb_err for checkers using errseq_t based tracking */
 224         __filemap_set_wb_err(mapping, error);
 225
 226         /* Record it in superblock */
 227         if (mapping->host)
 228                 errseq_set(&mapping->host->i_sb->s_wb_err, error);
 229
 230         /* Record it in flags for now, for legacy callers */
 231         if (error == -ENOSPC)
 232                 set_bit(AS_ENOSPC, &mapping->flags);
 233         else
 234                 set_bit(AS_EIO, &mapping->flags);
 235 }
 236
 237 static inline void mapping_set_unevictable(struct address_space *mapping)
 238 {
 239         set_bit(AS_UNEVICTABLE, &mapping->flags);
 240 }
 241
 242 static inline void mapping_clear_unevictable(struct address_space *mapping)
 243 {
 244         clear_bit(AS_UNEVICTABLE, &mapping->flags);
 245 }
 246
 247 static inline bool mapping_unevictable(struct address_space *mapping)
 248 {
 249         return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
 250 }
 251
 252 static inline void mapping_set_exiting(struct address_space *mapping)
 253 {
 254         set_bit(AS_EXITING, &mapping->flags);
 255 }
 256
 257 static inline int mapping_exiting(struct address_space *mapping)
 258 {
 259         return test_bit(AS_EXITING, &mapping->flags);
 260 }
 261
 262 static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
 263 {
 264         set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 265 }
 266
 267 static inline int mapping_use_writeback_tags(struct address_space *mapping)
 268 {
 269         return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 270 }
 271
 272 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 273 {
 274         return mapping->gfp_mask;
 275 }
 276
 277 /* Restricts the given gfp_mask to what the mapping allows. */
 278 static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 279                 gfp_t gfp_mask)
 280 {
 281         return mapping_gfp_mask(mapping) & gfp_mask;
 282 }
 283
 284 /*
 285  * This is non-atomic.  Only to be used before the mapping is activated.
 286  * Probably needs a barrier...
 287  */
 288 static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 289 {
 290         m->gfp_mask = mask;
 291 }
 292
 293 /**
 294  * mapping_set_large_folios() - Indicate the file supports large folios.
 295  * @mapping: The file.
 296  *
 297  * The filesystem should call this function in its inode constructor to
 298  * indicate that the VFS can use large folios to cache the contents of
 299  * the file.
 300  *
 301  * Context: This should not be called while the inode is active as it
 302  * is non-atomic.
 303  */
 304 static inline void mapping_set_large_folios(struct address_space *mapping)
 305 {
 306         __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 307 }
 308
 309 /*
 310  * Large folio support currently depends on THP.  These dependencies are
 311  * being worked on but are not yet fixed.
 312  */
 313 static inline bool mapping_large_folio_support(struct address_space *mapping)
 314 {
 315         return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 316                 test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 317 }
 318
 319 static inline int filemap_nr_thps(struct address_space *mapping)
 320 {
 321 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
 322         return atomic_read(&mapping->nr_thps);
 323 #else
 324         return 0;
 325 #endif
 326 }
 327
 328 static inline void filemap_nr_thps_inc(struct address_space *mapping)
 329 {
 330 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
 331         if (!mapping_large_folio_support(mapping))
 332                 atomic_inc(&mapping->nr_thps);
 333 #else
 334         WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 335 #endif
 336 }
 337
 338 static inline void filemap_nr_thps_dec(struct address_space *mapping)
 339 {
 340 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
 341         if (!mapping_large_folio_support(mapping))
 342                 atomic_dec(&mapping->nr_thps);
 343 #else
 344         WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 345 #endif
 346 }
 347
 348 struct address_space *page_mapping(struct page *);
 349 struct address_space *folio_mapping(struct folio *);
 350 struct address_space *swapcache_mapping(struct folio *);
 351
 352 /**
 353  * folio_file_mapping - Find the mapping this folio belongs to.
 354  * @folio: The folio.
 355  *
 356  * For folios which are in the page cache, return the mapping that this
 357  * page belongs to.  Folios in the swap cache return the mapping of the
 358  * swap file or swap device where the data is stored.  This is different
 359  * from the mapping returned by folio_mapping().  The only reason to
 360  * use it is if, like NFS, you return 0 from ->activate_swapfile.
 361  *
 362  * Do not call this for folios which aren't in the page cache or swap cache.
 363  */
 364 static inline struct address_space *folio_file_mapping(struct folio *folio)
 365 {
 366         if (unlikely(folio_test_swapcache(folio)))
 367                 return swapcache_mapping(folio);
 368
 369         return folio->mapping;
 370 }
 371
 372 static inline struct address_space *page_file_mapping(struct page *page)
 373 {
 374         return folio_file_mapping(page_folio(page));
 375 }
 376
 377 /*
 378  * For file cache pages, return the address_space, otherwise return NULL
 379  */
 380 static inline struct address_space *page_mapping_file(struct page *page)
 381 {
 382         struct folio *folio = page_folio(page);
 383
 384         if (unlikely(folio_test_swapcache(folio)))
 385                 return NULL;
 386         return folio_mapping(folio);
 387 }
 388
 389 /**
 390  * folio_inode - Get the host inode for this folio.
 391  * @folio: The folio.
 392  *
 393  * For folios which are in the page cache, return the inode that this folio
 394  * belongs to.
 395  *
 396  * Do not call this for folios which aren't in the page cache.
 397  */
 398 static inline struct inode *folio_inode(struct folio *folio)
 399 {
 400         return folio->mapping->host;
 401 }
 402
 403 /**
 404  * folio_attach_private - Attach private data to a folio.
 405  * @folio: Folio to attach data to.
 406  * @data: Data to attach to folio.
 407  *
 408  * Attaching private data to a folio increments the page's reference count.
 409  * The data must be detached before the folio will be freed.
 410  */
 411 static inline void folio_attach_private(struct folio *folio, void *data)
 412 {
 413         folio_get(folio);
 414         folio->private = data;
 415         folio_set_private(folio);
 416 }
 417
 418 /**
 419  * folio_change_private - Change private data on a folio.
 420  * @folio: Folio to change the data on.
 421  * @data: Data to set on the folio.
 422  *
 423  * Change the private data attached to a folio and return the old
 424  * data.  The page must previously have had data attached and the data
 425  * must be detached before the folio will be freed.
 426  *
 427  * Return: Data that was previously attached to the folio.
 428  */
 429 static inline void *folio_change_private(struct folio *folio, void *data)
 430 {
 431         void *old = folio_get_private(folio);
 432
 433         folio->private = data;
 434         return old;
 435 }
 436
 437 /**
 438  * folio_detach_private - Detach private data from a folio.
 439  * @folio: Folio to detach data from.
 440  *
 441  * Removes the data that was previously attached to the folio and decrements
 442  * the refcount on the page.
 443  *
 444  * Return: Data that was attached to the folio.
 445  */
 446 static inline void *folio_detach_private(struct folio *folio)
 447 {
 448         void *data = folio_get_private(folio);
 449
 450         if (!folio_test_private(folio))
 451                 return NULL;
 452         folio_clear_private(folio);
 453         folio->private = NULL;
 454         folio_put(folio);
 455
 456         return data;
 457 }
 458
 459 static inline void attach_page_private(struct page *page, void *data)
 460 {
 461         folio_attach_private(page_folio(page), data);
 462 }
 463
 464 static inline void *detach_page_private(struct page *page)
 465 {
 466         return folio_detach_private(page_folio(page));
 467 }
 468
 469 #ifdef CONFIG_NUMA
 470 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 471 #else
 472 static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
 473 {
 474         return folio_alloc(gfp, order);
 475 }
 476 #endif
 477
 478 static inline struct page *__page_cache_alloc(gfp_t gfp)
 479 {
 480         return &filemap_alloc_folio(gfp, 0)->page;
 481 }
 482
 483 static inline struct page *page_cache_alloc(struct address_space *x)
 484 {
 485         return __page_cache_alloc(mapping_gfp_mask(x));
 486 }
 487
 488 static inline gfp_t readahead_gfp_mask(struct address_space *x)
 489 {
 490         return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
 491 }
 492
 493 typedef int filler_t(struct file *, struct folio *);
 494
 495 pgoff_t page_cache_next_miss(struct address_space *mapping,
 496                              pgoff_t index, unsigned long max_scan);
 497 pgoff_t page_cache_prev_miss(struct address_space *mapping,
 498                              pgoff_t index, unsigned long max_scan);
 499
 500 #define FGP_ACCESSED            0x00000001
 501 #define FGP_LOCK                0x00000002
 502 #define FGP_CREAT               0x00000004
 503 #define FGP_WRITE               0x00000008
 504 #define FGP_NOFS                0x00000010
 505 #define FGP_NOWAIT              0x00000020
 506 #define FGP_FOR_MMAP            0x00000040
 507 #define FGP_HEAD                0x00000080
 508 #define FGP_ENTRY               0x00000100
 509 #define FGP_STABLE              0x00000200
 510
 511 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 512                 int fgp_flags, gfp_t gfp);
 513 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 514                 int fgp_flags, gfp_t gfp);
 515
 516 /**
 517  * filemap_get_folio - Find and get a folio.
 518  * @mapping: The address_space to search.
 519  * @index: The page index.
 520  *
 521  * Looks up the page cache entry at @mapping & @index.  If a folio is
 522  * present, it is returned with an increased refcount.
 523  *
 524  * Otherwise, %NULL is returned.
 525  */
 526 static inline struct folio *filemap_get_folio(struct address_space *mapping,
 527                                         pgoff_t index)
 528 {
 529         return __filemap_get_folio(mapping, index, 0, 0);
 530 }
 531
 532 /**
 533  * filemap_lock_folio - Find and lock a folio.
 534  * @mapping: The address_space to search.
 535  * @index: The page index.
 536  *
 537  * Looks up the page cache entry at @mapping & @index.  If a folio is
 538  * present, it is returned locked with an increased refcount.
 539  *
 540  * Context: May sleep.
 541  * Return: A folio or %NULL if there is no folio in the cache for this
 542  * index.  Will not return a shadow, swap or DAX entry.
 543  */
 544 static inline struct folio *filemap_lock_folio(struct address_space *mapping,
 545                                         pgoff_t index)
 546 {
 547         return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
 548 }
 549
 550 /**
 551  * find_get_page - find and get a page reference
 552  * @mapping: the address_space to search
 553  * @offset: the page index
 554  *
 555  * Looks up the page cache slot at @mapping & @offset.  If there is a
 556  * page cache page, it is returned with an increased refcount.
 557  *
 558  * Otherwise, %NULL is returned.
 559  */
 560 static inline struct page *find_get_page(struct address_space *mapping,
 561                                         pgoff_t offset)
 562 {
 563         return pagecache_get_page(mapping, offset, 0, 0);
 564 }
 565
 566 static inline struct page *find_get_page_flags(struct address_space *mapping,
 567                                         pgoff_t offset, int fgp_flags)
 568 {
 569         return pagecache_get_page(mapping, offset, fgp_flags, 0);
 570 }
 571
 572 /**
 573  * find_lock_page - locate, pin and lock a pagecache page
 574  * @mapping: the address_space to search
 575  * @index: the page index
 576  *
 577  * Looks up the page cache entry at @mapping & @index.  If there is a
 578  * page cache page, it is returned locked and with an increased
 579  * refcount.
 580  *
 581  * Context: May sleep.
 582  * Return: A struct page or %NULL if there is no page in the cache for this
 583  * index.
 584  */
 585 static inline struct page *find_lock_page(struct address_space *mapping,
 586                                         pgoff_t index)
 587 {
 588         return pagecache_get_page(mapping, index, FGP_LOCK, 0);
 589 }
 590
 591 /**
 592  * find_or_create_page - locate or add a pagecache page
 593  * @mapping: the page's address_space
 594  * @index: the page's index into the mapping
 595  * @gfp_mask: page allocation mode
 596  *
 597  * Looks up the page cache slot at @mapping & @offset.  If there is a
 598  * page cache page, it is returned locked and with an increased
 599  * refcount.
 600  *
 601  * If the page is not present, a new page is allocated using @gfp_mask
 602  * and added to the page cache and the VM's LRU list.  The page is
 603  * returned locked and with an increased refcount.
 604  *
 605  * On memory exhaustion, %NULL is returned.
 606  *
 607  * find_or_create_page() may sleep, even if @gfp_flags specifies an
 608  * atomic allocation!
 609  */
 610 static inline struct page *find_or_create_page(struct address_space *mapping,
 611                                         pgoff_t index, gfp_t gfp_mask)
 612 {
 613         return pagecache_get_page(mapping, index,
 614                                         FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
 615                                         gfp_mask);
 616 }
 617
 618 /**
 619  * grab_cache_page_nowait - returns locked page at given index in given cache
 620  * @mapping: target address_space
 621  * @index: the page index
 622  *
 623  * Same as grab_cache_page(), but do not wait if the page is unavailable.
 624  * This is intended for speculative data generators, where the data can
 625  * be regenerated if the page couldn't be grabbed.  This routine should
 626  * be safe to call while holding the lock for another page.
 627  *
 628  * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 629  * and deadlock against the caller's locked page.
 630  */
 631 static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 632                                 pgoff_t index)
 633 {
 634         return pagecache_get_page(mapping, index,
 635                         FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
 636                         mapping_gfp_mask(mapping));
 637 }
 638
 639 #define swapcache_index(folio)  __page_file_index(&(folio)->page)
 640
 641 /**
 642  * folio_index - File index of a folio.
 643  * @folio: The folio.
 644  *
 645  * For a folio which is either in the page cache or the swap cache,
 646  * return its index within the address_space it belongs to.  If you know
 647  * the page is definitely in the page cache, you can look at the folio's
 648  * index directly.
 649  *
 650  * Return: The index (offset in units of pages) of a folio in its file.
 651  */
 652 static inline pgoff_t folio_index(struct folio *folio)
 653 {
 654         if (unlikely(folio_test_swapcache(folio)))
 655                 return swapcache_index(folio);
 656         return folio->index;
 657 }
 658
 659 /**
 660  * folio_next_index - Get the index of the next folio.
 661  * @folio: The current folio.
 662  *
 663  * Return: The index of the folio which follows this folio in the file.
 664  */
 665 static inline pgoff_t folio_next_index(struct folio *folio)
 666 {
 667         return folio->index + folio_nr_pages(folio);
 668 }
 669
 670 /**
 671  * folio_file_page - The page for a particular index.
 672  * @folio: The folio which contains this index.
 673  * @index: The index we want to look up.
 674  *
 675  * Sometimes after looking up a folio in the page cache, we need to
 676  * obtain the specific page for an index (eg a page fault).
 677  *
 678  * Return: The page containing the file data for this index.
 679  */
 680 static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
 681 {
 682         /* HugeTLBfs indexes the page cache in units of hpage_size */
 683         if (folio_test_hugetlb(folio))
 684                 return &folio->page;
 685         return folio_page(folio, index & (folio_nr_pages(folio) - 1));
 686 }
 687
 688 /**
 689  * folio_contains - Does this folio contain this index?
 690  * @folio: The folio.
 691  * @index: The page index within the file.
 692  *
 693  * Context: The caller should have the page locked in order to prevent
 694  * (eg) shmem from moving the page between the page cache and swap cache
 695  * and changing its index in the middle of the operation.
 696  * Return: true or false.
 697  */
 698 static inline bool folio_contains(struct folio *folio, pgoff_t index)
 699 {
 700         /* HugeTLBfs indexes the page cache in units of hpage_size */
 701         if (folio_test_hugetlb(folio))
 702                 return folio->index == index;
 703         return index - folio_index(folio) < folio_nr_pages(folio);
 704 }
 705
 706 /*
 707  * Given the page we found in the page cache, return the page corresponding
 708  * to this index in the file
 709  */
 710 static inline struct page *find_subpage(struct page *head, pgoff_t index)
 711 {
 712         /* HugeTLBfs wants the head page regardless */
 713         if (PageHuge(head))
 714                 return head;
 715
 716         return head + (index & (thp_nr_pages(head) - 1));
 717 }
 718
 719 unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
 720                 pgoff_t end, struct folio_batch *fbatch);
 721 unsigned filemap_get_folios_contig(struct address_space *mapping,
 722                 pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
 723 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 724                         pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 725                         struct page **pages);
 726 static inline unsigned find_get_pages_tag(struct address_space *mapping,
 727                         pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
 728                         struct page **pages)
 729 {
 730         return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
 731                                         nr_pages, pages);
 732 }
 733
 734 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 735                         pgoff_t index);
 736
 737 /*
 738  * Returns locked page at given index in given cache, creating it if needed.
 739  */
 740 static inline struct page *grab_cache_page(struct address_space *mapping,
 741                                                                 pgoff_t index)
 742 {
 743         return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 744 }
 745
 746 struct folio *read_cache_folio(struct address_space *, pgoff_t index,
 747                 filler_t *filler, struct file *file);
 748 struct page *read_cache_page(struct address_space *, pgoff_t index,
 749                 filler_t *filler, struct file *file);
 750 extern struct page * read_cache_page_gfp(struct address_space *mapping,
 751                                 pgoff_t index, gfp_t gfp_mask);
 752
 753 static inline struct page *read_mapping_page(struct address_space *mapping,
 754                                 pgoff_t index, struct file *file)
 755 {
 756         return read_cache_page(mapping, index, NULL, file);
 757 }
 758
 759 static inline struct folio *read_mapping_folio(struct address_space *mapping,
 760                                 pgoff_t index, struct file *file)
 761 {
 762         return read_cache_folio(mapping, index, NULL, file);
 763 }
 764
 765 /*
 766  * Get index of the page within radix-tree (but not for hugetlb pages).
 767  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
 768  */
 769 static inline pgoff_t page_to_index(struct page *page)
 770 {
 771         struct page *head;
 772
 773         if (likely(!PageTransTail(page)))
 774                 return page->index;
 775
 776         head = compound_head(page);
 777         /*
 778          *  We don't initialize ->index for tail pages: calculate based on
 779          *  head page
 780          */
 781         return head->index + page - head;
 782 }
 783
 784 extern pgoff_t hugetlb_basepage_index(struct page *page);
 785
 786 /*
 787  * Get the offset in PAGE_SIZE (even for hugetlb pages).
 788  * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
 789  */
 790 static inline pgoff_t page_to_pgoff(struct page *page)
 791 {
 792         if (unlikely(PageHuge(page)))
 793                 return hugetlb_basepage_index(page);
 794         return page_to_index(page);
 795 }
 796
 797 /*
 798  * Return byte-offset into filesystem object for page.
 799  */
 800 static inline loff_t page_offset(struct page *page)
 801 {
 802         return ((loff_t)page->index) << PAGE_SHIFT;
 803 }
 804
 805 static inline loff_t page_file_offset(struct page *page)
 806 {
 807         return ((loff_t)page_index(page)) << PAGE_SHIFT;
 808 }
 809
 810 /**
 811  * folio_pos - Returns the byte position of this folio in its file.
 812  * @folio: The folio.
 813  */
 814 static inline loff_t folio_pos(struct folio *folio)
 815 {
 816         return page_offset(&folio->page);
 817 }
 818
 819 /**
 820  * folio_file_pos - Returns the byte position of this folio in its file.
 821  * @folio: The folio.
 822  *
 823  * This differs from folio_pos() for folios which belong to a swap file.
 824  * NFS is the only filesystem today which needs to use folio_file_pos().
 825  */
 826 static inline loff_t folio_file_pos(struct folio *folio)
 827 {
 828         return page_file_offset(&folio->page);
 829 }
 830
 831 /*
 832  * Get the offset in PAGE_SIZE (even for hugetlb folios).
 833  * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
 834  */
 835 static inline pgoff_t folio_pgoff(struct folio *folio)
 836 {
 837         if (unlikely(folio_test_hugetlb(folio)))
 838                 return hugetlb_basepage_index(&folio->page);
 839         return folio->index;
 840 }
 841
 842 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
 843                                      unsigned long address);
 844
 845 static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 846                                         unsigned long address)
 847 {
 848         pgoff_t pgoff;
 849         if (unlikely(is_vm_hugetlb_page(vma)))
 850                 return linear_hugepage_index(vma, address);
 851         pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 852         pgoff += vma->vm_pgoff;
 853         return pgoff;
 854 }
 855
 856 struct wait_page_key {
 857         struct folio *folio;
 858         int bit_nr;
 859         int page_match;
 860 };
 861
 862 struct wait_page_queue {
 863         struct folio *folio;
 864         int bit_nr;
 865         wait_queue_entry_t wait;
 866 };
 867
 868 static inline bool wake_page_match(struct wait_page_queue *wait_page,
 869                                   struct wait_page_key *key)
 870 {
 871         if (wait_page->folio != key->folio)
 872                return false;
 873         key->page_match = 1;
 874
 875         if (wait_page->bit_nr != key->bit_nr)
 876                 return false;
 877
 878         return true;
 879 }
 880
 881 void __folio_lock(struct folio *folio);
 882 int __folio_lock_killable(struct folio *folio);
 883 bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
 884                                 unsigned int flags);
 885 void unlock_page(struct page *page);
 886 void folio_unlock(struct folio *folio);
 887
 888 /**
 889  * folio_trylock() - Attempt to lock a folio.
 890  * @folio: The folio to attempt to lock.
 891  *
 892  * Sometimes it is undesirable to wait for a folio to be unlocked (eg
 893  * when the locks are being taken in the wrong order, or if making
 894  * progress through a batch of folios is more important than processing
 895  * them in order).  Usually folio_lock() is the correct function to call.
 896  *
 897  * Context: Any context.
 898  * Return: Whether the lock was successfully acquired.
 899  */
 900 static inline bool folio_trylock(struct folio *folio)
 901 {
 902         return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
 903 }
 904
 905 /*
 906  * Return true if the page was successfully locked
 907  */
 908 static inline int trylock_page(struct page *page)
 909 {
 910         return folio_trylock(page_folio(page));
 911 }
 912
 913 /**
 914  * folio_lock() - Lock this folio.
 915  * @folio: The folio to lock.
 916  *
 917  * The folio lock protects against many things, probably more than it
 918  * should.  It is primarily held while a folio is being brought uptodate,
 919  * either from its backing file or from swap.  It is also held while a
 920  * folio is being truncated from its address_space, so holding the lock
 921  * is sufficient to keep folio->mapping stable.
 922  *
 923  * The folio lock is also held while write() is modifying the page to
 924  * provide POSIX atomicity guarantees (as long as the write does not
 925  * cross a page boundary).  Other modifications to the data in the folio
 926  * do not hold the folio lock and can race with writes, eg DMA and stores
 927  * to mapped pages.
 928  *
 929  * Context: May sleep.  If you need to acquire the locks of two or
 930  * more folios, they must be in order of ascending index, if they are
 931  * in the same address_space.  If they are in different address_spaces,
 932  * acquire the lock of the folio which belongs to the address_space which
 933  * has the lowest address in memory first.
 934  */
 935 static inline void folio_lock(struct folio *folio)
 936 {
 937         might_sleep();
 938         if (!folio_trylock(folio))
 939                 __folio_lock(folio);
 940 }
 941
 942 /**
 943  * lock_page() - Lock the folio containing this page.
 944  * @page: The page to lock.
 945  *
 946  * See folio_lock() for a description of what the lock protects.
 947  * This is a legacy function and new code should probably use folio_lock()
 948  * instead.
 949  *
 950  * Context: May sleep.  Pages in the same folio share a lock, so do not
 951  * attempt to lock two pages which share a folio.
 952  */
 953 static inline void lock_page(struct page *page)
 954 {
 955         struct folio *folio;
 956         might_sleep();
 957
 958         folio = page_folio(page);
 959         if (!folio_trylock(folio))
 960                 __folio_lock(folio);
 961 }
 962
 963 /**
 964  * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
 965  * @folio: The folio to lock.
 966  *
 967  * Attempts to lock the folio, like folio_lock(), except that the sleep
 968  * to acquire the lock is interruptible by a fatal signal.
 969  *
 970  * Context: May sleep; see folio_lock().
 971  * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
 972  */
 973 static inline int folio_lock_killable(struct folio *folio)
 974 {
 975         might_sleep();
 976         if (!folio_trylock(folio))
 977                 return __folio_lock_killable(folio);
 978         return 0;
 979 }
 980
 981 /*
 982  * lock_page_killable is like lock_page but can be interrupted by fatal
 983  * signals.  It returns 0 if it locked the page and -EINTR if it was
 984  * killed while waiting.
 985  */
 986 static inline int lock_page_killable(struct page *page)
 987 {
 988         return folio_lock_killable(page_folio(page));
 989 }
 990
 991 /*
 992  * folio_lock_or_retry - Lock the folio, unless this would block and the
 993  * caller indicated that it can handle a retry.
 994  *
 995  * Return value and mmap_lock implications depend on flags; see
 996  * __folio_lock_or_retry().
 997  */
 998 static inline bool folio_lock_or_retry(struct folio *folio,
 999                 struct mm_struct *mm, unsigned int flags)
1000 {
1001         might_sleep();
1002         return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
1003 }
1004
1005 /*
1006  * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
1007  * and should not be used directly.
1008  */
1009 void folio_wait_bit(struct folio *folio, int bit_nr);
1010 int folio_wait_bit_killable(struct folio *folio, int bit_nr);
1011
1012 /*
1013  * Wait for a folio to be unlocked.
1014  *
1015  * This must be called with the caller "holding" the folio,
1016  * ie with increased folio reference count so that the folio won't
1017  * go away during the wait.
1018  */
1019 static inline void folio_wait_locked(struct folio *folio)
1020 {
1021         if (folio_test_locked(folio))
1022                 folio_wait_bit(folio, PG_locked);
1023 }
1024
1025 static inline int folio_wait_locked_killable(struct folio *folio)
1026 {
1027         if (!folio_test_locked(folio))
1028                 return 0;
1029         return folio_wait_bit_killable(folio, PG_locked);
1030 }
1031
1032 static inline void wait_on_page_locked(struct page *page)
1033 {
1034         folio_wait_locked(page_folio(page));
1035 }
1036
1037 static inline int wait_on_page_locked_killable(struct page *page)
1038 {
1039         return folio_wait_locked_killable(page_folio(page));
1040 }
1041
1042 void wait_on_page_writeback(struct page *page);
1043 void folio_wait_writeback(struct folio *folio);
1044 int folio_wait_writeback_killable(struct folio *folio);
1045 void end_page_writeback(struct page *page);
1046 void folio_end_writeback(struct folio *folio);
1047 void wait_for_stable_page(struct page *page);
1048 void folio_wait_stable(struct folio *folio);
1049 void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
1050 static inline void __set_page_dirty(struct page *page,
1051                 struct address_space *mapping, int warn)
1052 {
1053         __folio_mark_dirty(page_folio(page), mapping, warn);
1054 }
1055 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
1056 void __folio_cancel_dirty(struct folio *folio);
1057 static inline void folio_cancel_dirty(struct folio *folio)
1058 {
1059         /* Avoid atomic ops, locking, etc. when not actually needed. */
1060         if (folio_test_dirty(folio))
1061                 __folio_cancel_dirty(folio);
1062 }
1063 bool folio_clear_dirty_for_io(struct folio *folio);
1064 bool clear_page_dirty_for_io(struct page *page);
1065 void folio_invalidate(struct folio *folio, size_t offset, size_t length);
1066 int __must_check folio_write_one(struct folio *folio);
1067 static inline int __must_check write_one_page(struct page *page)
1068 {
1069         return folio_write_one(page_folio(page));
1070 }
1071
1072 int __set_page_dirty_nobuffers(struct page *page);
1073 bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
1074
1075 #ifdef CONFIG_MIGRATION
1076 int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
1077                 struct folio *src, enum migrate_mode mode);
1078 #else
1079 #define filemap_migrate_folio NULL
1080 #endif
1081 void page_endio(struct page *page, bool is_write, int err);
1082
1083 void folio_end_private_2(struct folio *folio);
1084 void folio_wait_private_2(struct folio *folio);
1085 int folio_wait_private_2_killable(struct folio *folio);
1086
1087 /*
1088  * Add an arbitrary waiter to a page's wait queue
1089  */
1090 void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
1091
1092 /*
1093  * Fault in userspace address range.
1094  */
1095 size_t fault_in_writeable(char __user *uaddr, size_t size);
1096 size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
1097 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
1098 size_t fault_in_readable(const char __user *uaddr, size_t size);
1099
1100 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
1101                 pgoff_t index, gfp_t gfp);
1102 int filemap_add_folio(struct address_space *mapping, struct folio *folio,
1103                 pgoff_t index, gfp_t gfp);
1104 void filemap_remove_folio(struct folio *folio);
1105 void delete_from_page_cache(struct page *page);
1106 void __filemap_remove_folio(struct folio *folio, void *shadow);
1107 void replace_page_cache_page(struct page *old, struct page *new);
1108 void delete_from_page_cache_batch(struct address_space *mapping,
1109                                   struct folio_batch *fbatch);
1110 int try_to_release_page(struct page *page, gfp_t gfp);
1111 bool filemap_release_folio(struct folio *folio, gfp_t gfp);
1112 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
1113                 int whence);
1114
1115 /* Must be non-static for BPF error injection */
1116 int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
1117                 pgoff_t index, gfp_t gfp, void **shadowp);
1118
1119 bool filemap_range_has_writeback(struct address_space *mapping,
1120                                  loff_t start_byte, loff_t end_byte);
1121
1122 /**
1123  * filemap_range_needs_writeback - check if range potentially needs writeback
1124  * @mapping:           address space within which to check
1125  * @start_byte:        offset in bytes where the range starts
1126  * @end_byte:          offset in bytes where the range ends (inclusive)
1127  *
1128  * Find at least one page in the range supplied, usually used to check if
1129  * direct writing in this range will trigger a writeback. Used by O_DIRECT
1130  * read/write with IOCB_NOWAIT, to see if the caller needs to do
1131  * filemap_write_and_wait_range() before proceeding.
1132  *
1133  * Return: %true if the caller should do filemap_write_and_wait_range() before
1134  * doing O_DIRECT to a page in this range, %false otherwise.
1135  */
1136 static inline bool filemap_range_needs_writeback(struct address_space *mapping,
1137                                                  loff_t start_byte,
1138                                                  loff_t end_byte)
1139 {
1140         if (!mapping->nrpages)
1141                 return false;
1142         if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
1143             !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
1144                 return false;
1145         return filemap_range_has_writeback(mapping, start_byte, end_byte);
1146 }
1147
1148 /**
1149  * struct readahead_control - Describes a readahead request.
1150  *
1151  * A readahead request is for consecutive pages.  Filesystems which
1152  * implement the ->readahead method should call readahead_page() or
1153  * readahead_page_batch() in a loop and attempt to start I/O against
1154  * each page in the request.
1155  *
1156  * Most of the fields in this struct are private and should be accessed
1157  * by the functions below.
1158  *
1159  * @file: The file, used primarily by network filesystems for authentication.
1160  *        May be NULL if invoked internally by the filesystem.
1161  * @mapping: Readahead this filesystem object.
1162  * @ra: File readahead state.  May be NULL.
1163  */
1164 struct readahead_control {
1165         struct file *file;
1166         struct address_space *mapping;
1167         struct file_ra_state *ra;
1168 /* private: use the readahead_* accessors instead */
1169         pgoff_t _index;
1170         unsigned int _nr_pages;
1171         unsigned int _batch_count;
1172 };
1173
1174 #define DEFINE_READAHEAD(ractl, f, r, m, i)                             \
1175         struct readahead_control ractl = {                              \
1176                 .file = f,                                              \
1177                 .mapping = m,                                           \
1178                 .ra = r,                                                \
1179                 ._index = i,                                            \
1180         }
1181
1182 #define VM_READAHEAD_PAGES      (SZ_128K / PAGE_SIZE)
1183
1184 void page_cache_ra_unbounded(struct readahead_control *,
1185                 unsigned long nr_to_read, unsigned long lookahead_count);
1186 void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
1187 void page_cache_async_ra(struct readahead_control *, struct folio *,
1188                 unsigned long req_count);
1189 void readahead_expand(struct readahead_control *ractl,
1190                       loff_t new_start, size_t new_len);
1191
1192 /**
1193  * page_cache_sync_readahead - generic file readahead
1194  * @mapping: address_space which holds the pagecache and I/O vectors
1195  * @ra: file_ra_state which holds the readahead state
1196  * @file: Used by the filesystem for authentication.
1197  * @index: Index of first page to be read.
1198  * @req_count: Total number of pages being read by the caller.
1199  *
1200  * page_cache_sync_readahead() should be called when a cache miss happened:
1201  * it will submit the read.  The readahead logic may decide to piggyback more
1202  * pages onto the read request if access patterns suggest it will improve
1203  * performance.
1204  */
1205 static inline
1206 void page_cache_sync_readahead(struct address_space *mapping,
1207                 struct file_ra_state *ra, struct file *file, pgoff_t index,
1208                 unsigned long req_count)
1209 {
1210         DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1211         page_cache_sync_ra(&ractl, req_count);
1212 }
1213
1214 /**
1215  * page_cache_async_readahead - file readahead for marked pages
1216  * @mapping: address_space which holds the pagecache and I/O vectors
1217  * @ra: file_ra_state which holds the readahead state
1218  * @file: Used by the filesystem for authentication.
1219  * @folio: The folio at @index which triggered the readahead call.
1220  * @index: Index of first page to be read.
1221  * @req_count: Total number of pages being read by the caller.
1222  *
1223  * page_cache_async_readahead() should be called when a page is used which
1224  * is marked as PageReadahead; this is a marker to suggest that the application
1225  * has used up enough of the readahead window that we should start pulling in
1226  * more pages.
1227  */
1228 static inline
1229 void page_cache_async_readahead(struct address_space *mapping,
1230                 struct file_ra_state *ra, struct file *file,
1231                 struct folio *folio, pgoff_t index, unsigned long req_count)
1232 {
1233         DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1234         page_cache_async_ra(&ractl, folio, req_count);
1235 }
1236
1237 static inline struct folio *__readahead_folio(struct readahead_control *ractl)
1238 {
1239         struct folio *folio;
1240
1241         BUG_ON(ractl->_batch_count > ractl->_nr_pages);
1242         ractl->_nr_pages -= ractl->_batch_count;
1243         ractl->_index += ractl->_batch_count;
1244
1245         if (!ractl->_nr_pages) {
1246                 ractl->_batch_count = 0;
1247                 return NULL;
1248         }
1249
1250         folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
1251         VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1252         ractl->_batch_count = folio_nr_pages(folio);
1253
1254         return folio;
1255 }
1256
1257 /**
1258  * readahead_page - Get the next page to read.
1259  * @ractl: The current readahead request.
1260  *
1261  * Context: The page is locked and has an elevated refcount.  The caller
1262  * should decreases the refcount once the page has been submitted for I/O
1263  * and unlock the page once all I/O to that page has completed.
1264  * Return: A pointer to the next page, or %NULL if we are done.
1265  */
1266 static inline struct page *readahead_page(struct readahead_control *ractl)
1267 {
1268         struct folio *folio = __readahead_folio(ractl);
1269
1270         return &folio->page;
1271 }
1272
1273 /**
1274  * readahead_folio - Get the next folio to read.
1275  * @ractl: The current readahead request.
1276  *
1277  * Context: The folio is locked.  The caller should unlock the folio once
1278  * all I/O to that folio has completed.
1279  * Return: A pointer to the next folio, or %NULL if we are done.
1280  */
1281 static inline struct folio *readahead_folio(struct readahead_control *ractl)
1282 {
1283         struct folio *folio = __readahead_folio(ractl);
1284
1285         if (folio)
1286                 folio_put(folio);
1287         return folio;
1288 }
1289
1290 static inline unsigned int __readahead_batch(struct readahead_control *rac,
1291                 struct page **array, unsigned int array_sz)
1292 {
1293         unsigned int i = 0;
1294         XA_STATE(xas, &rac->mapping->i_pages, 0);
1295         struct page *page;
1296
1297         BUG_ON(rac->_batch_count > rac->_nr_pages);
1298         rac->_nr_pages -= rac->_batch_count;
1299         rac->_index += rac->_batch_count;
1300         rac->_batch_count = 0;
1301
1302         xas_set(&xas, rac->_index);
1303         rcu_read_lock();
1304         xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
1305                 if (xas_retry(&xas, page))
1306                         continue;
1307                 VM_BUG_ON_PAGE(!PageLocked(page), page);
1308                 VM_BUG_ON_PAGE(PageTail(page), page);
1309                 array[i++] = page;
1310                 rac->_batch_count += thp_nr_pages(page);
1311                 if (i == array_sz)
1312                         break;
1313         }
1314         rcu_read_unlock();
1315
1316         return i;
1317 }
1318
1319 /**
1320  * readahead_page_batch - Get a batch of pages to read.
1321  * @rac: The current readahead request.
1322  * @array: An array of pointers to struct page.
1323  *
1324  * Context: The pages are locked and have an elevated refcount.  The caller
1325  * should decreases the refcount once the page has been submitted for I/O
1326  * and unlock the page once all I/O to that page has completed.
1327  * Return: The number of pages placed in the array.  0 indicates the request
1328  * is complete.
1329  */
1330 #define readahead_page_batch(rac, array)                                \
1331         __readahead_batch(rac, array, ARRAY_SIZE(array))
1332
1333 /**
1334  * readahead_pos - The byte offset into the file of this readahead request.
1335  * @rac: The readahead request.
1336  */
1337 static inline loff_t readahead_pos(struct readahead_control *rac)
1338 {
1339         return (loff_t)rac->_index * PAGE_SIZE;
1340 }
1341
1342 /**
1343  * readahead_length - The number of bytes in this readahead request.
1344  * @rac: The readahead request.
1345  */
1346 static inline size_t readahead_length(struct readahead_control *rac)
1347 {
1348         return rac->_nr_pages * PAGE_SIZE;
1349 }
1350
1351 /**
1352  * readahead_index - The index of the first page in this readahead request.
1353  * @rac: The readahead request.
1354  */
1355 static inline pgoff_t readahead_index(struct readahead_control *rac)
1356 {
1357         return rac->_index;
1358 }
1359
1360 /**
1361  * readahead_count - The number of pages in this readahead request.
1362  * @rac: The readahead request.
1363  */
1364 static inline unsigned int readahead_count(struct readahead_control *rac)
1365 {
1366         return rac->_nr_pages;
1367 }
1368
1369 /**
1370  * readahead_batch_length - The number of bytes in the current batch.
1371  * @rac: The readahead request.
1372  */
1373 static inline size_t readahead_batch_length(struct readahead_control *rac)
1374 {
1375         return rac->_batch_count * PAGE_SIZE;
1376 }
1377
1378 static inline unsigned long dir_pages(struct inode *inode)
1379 {
1380         return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
1381                                PAGE_SHIFT;
1382 }
1383
1384 /**
1385  * folio_mkwrite_check_truncate - check if folio was truncated
1386  * @folio: the folio to check
1387  * @inode: the inode to check the folio against
1388  *
1389  * Return: the number of bytes in the folio up to EOF,
1390  * or -EFAULT if the folio was truncated.
1391  */
1392 static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
1393                                               struct inode *inode)
1394 {
1395         loff_t size = i_size_read(inode);
1396         pgoff_t index = size >> PAGE_SHIFT;
1397         size_t offset = offset_in_folio(folio, size);
1398
1399         if (!folio->mapping)
1400                 return -EFAULT;
1401
1402         /* folio is wholly inside EOF */
1403         if (folio_next_index(folio) - 1 < index)
1404                 return folio_size(folio);
1405         /* folio is wholly past EOF */
1406         if (folio->index > index || !offset)
1407                 return -EFAULT;
1408         /* folio is partially inside EOF */
1409         return offset;
1410 }
1411
1412 /**
1413  * page_mkwrite_check_truncate - check if page was truncated
1414  * @page: the page to check
1415  * @inode: the inode to check the page against
1416  *
1417  * Returns the number of bytes in the page up to EOF,
1418  * or -EFAULT if the page was truncated.
1419  */
1420 static inline int page_mkwrite_check_truncate(struct page *page,
1421                                               struct inode *inode)
1422 {
1423         loff_t size = i_size_read(inode);
1424         pgoff_t index = size >> PAGE_SHIFT;
1425         int offset = offset_in_page(size);
1426
1427         if (page->mapping != inode->i_mapping)
1428                 return -EFAULT;
1429
1430         /* page is wholly inside EOF */
1431         if (page->index < index)
1432                 return PAGE_SIZE;
1433         /* page is wholly past EOF */
1434         if (page->index > index || !offset)
1435                 return -EFAULT;
1436         /* page is partially inside EOF */
1437         return offset;
1438 }
1439
1440 /**
1441  * i_blocks_per_folio - How many blocks fit in this folio.
1442  * @inode: The inode which contains the blocks.
1443  * @folio: The folio.
1444  *
1445  * If the block size is larger than the size of this folio, return zero.
1446  *
1447  * Context: The caller should hold a refcount on the folio to prevent it
1448  * from being split.
1449  * Return: The number of filesystem blocks covered by this folio.
1450  */
1451 static inline
1452 unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
1453 {
1454         return folio_size(folio) >> inode->i_blkbits;
1455 }
1456
1457 static inline
1458 unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
1459 {
1460         return i_blocks_per_folio(inode, page_folio(page));
1461 }
1462 #endif /* _LINUX_PAGEMAP_H */