Merge tag 'memblock-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt...
[linux-block.git] / mm / swapfile.c
index a9a04a5360d9a037b778bbbd67ea4dca991664e9..1e07d1c776f2ae328e5a8ef0194c38fa144368ad 100644 (file)
@@ -100,11 +100,10 @@ atomic_t nr_rotate_swap = ATOMIC_INIT(0);
 
 static struct swap_info_struct *swap_type_to_swap_info(int type)
 {
-       if (type >= READ_ONCE(nr_swapfiles))
+       if (type >= MAX_SWAPFILES)
                return NULL;
 
-       smp_rmb();      /* Pairs with smp_wmb in alloc_swap_info. */
-       return READ_ONCE(swap_info[type]);
+       return READ_ONCE(swap_info[type]); /* rcu_dereference() */
 }
 
 static inline unsigned char swap_count(unsigned char ent)
@@ -453,10 +452,10 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
                unsigned int idx)
 {
        /*
-        * If scan_swap_map() can't find a free cluster, it will check
+        * If scan_swap_map_slots() can't find a free cluster, it will check
         * si->swap_map directly. To make sure the discarding cluster isn't
-        * taken by scan_swap_map(), mark the swap entries bad (occupied). It
-        * will be cleared after discard
+        * taken by scan_swap_map_slots(), mark the swap entries bad (occupied).
+        * It will be cleared after discard
         */
        memset(si->swap_map + idx * SWAPFILE_CLUSTER,
                        SWAP_MAP_BAD, SWAPFILE_CLUSTER);
@@ -589,7 +588,7 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
 }
 
 /*
- * It's possible scan_swap_map() uses a free cluster in the middle of free
+ * It's possible scan_swap_map_slots() uses a free cluster in the middle of free
  * cluster list. Avoiding such abuse to avoid list corruption.
  */
 static bool
@@ -1037,21 +1036,6 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
        swap_range_free(si, offset, SWAPFILE_CLUSTER);
 }
 
-static unsigned long scan_swap_map(struct swap_info_struct *si,
-                                  unsigned char usage)
-{
-       swp_entry_t entry;
-       int n_ret;
-
-       n_ret = scan_swap_map_slots(si, usage, 1, &entry);
-
-       if (n_ret)
-               return swp_offset(entry);
-       else
-               return 0;
-
-}
-
 int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 {
        unsigned long size = swap_entry_size(entry_size);
@@ -1114,14 +1098,14 @@ start_over:
 nextsi:
                /*
                 * if we got here, it's likely that si was almost full before,
-                * and since scan_swap_map() can drop the si->lock, multiple
-                * callers probably all tried to get a page from the same si
-                * and it filled up before we could get one; or, the si filled
-                * up between us dropping swap_avail_lock and taking si->lock.
-                * Since we dropped the swap_avail_lock, the swap_avail_head
-                * list may have been modified; so if next is still in the
-                * swap_avail_head list then try it, otherwise start over
-                * if we have not gotten any slots.
+                * and since scan_swap_map_slots() can drop the si->lock,
+                * multiple callers probably all tried to get a page from the
+                * same si and it filled up before we could get one; or, the si
+                * filled up between us dropping swap_avail_lock and taking
+                * si->lock. Since we dropped the swap_avail_lock, the
+                * swap_avail_head list may have been modified; so if next is
+                * still in the swap_avail_head list then try it, otherwise
+                * start over if we have not gotten any slots.
                 */
                if (plist_node_empty(&next->avail_lists[node]))
                        goto start_over;
@@ -1137,30 +1121,6 @@ noswap:
        return n_ret;
 }
 
-/* The only caller of this function is now suspend routine */
-swp_entry_t get_swap_page_of_type(int type)
-{
-       struct swap_info_struct *si = swap_type_to_swap_info(type);
-       pgoff_t offset;
-
-       if (!si)
-               goto fail;
-
-       spin_lock(&si->lock);
-       if (si->flags & SWP_WRITEOK) {
-               /* This is called for allocating swap entry, not cache */
-               offset = scan_swap_map(si, 1);
-               if (offset) {
-                       atomic_long_dec(&nr_swap_pages);
-                       spin_unlock(&si->lock);
-                       return swp_entry(type, offset);
-               }
-       }
-       spin_unlock(&si->lock);
-fail:
-       return (swp_entry_t) {0};
-}
-
 static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
 {
        struct swap_info_struct *p;
@@ -1812,6 +1772,24 @@ int free_swap_and_cache(swp_entry_t entry)
 }
 
 #ifdef CONFIG_HIBERNATION
+
+swp_entry_t get_swap_page_of_type(int type)
+{
+       struct swap_info_struct *si = swap_type_to_swap_info(type);
+       swp_entry_t entry = {0};
+
+       if (!si)
+               goto fail;
+
+       /* This is called for allocating swap entry, not cache */
+       spin_lock(&si->lock);
+       if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry))
+               atomic_long_dec(&nr_swap_pages);
+       spin_unlock(&si->lock);
+fail:
+       return entry;
+}
+
 /*
  * Find the swap type that corresponds to given device (if any).
  *
@@ -2649,7 +2627,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        spin_lock(&p->lock);
        drain_mmlist();
 
-       /* wait for anyone still in scan_swap_map */
+       /* wait for anyone still in scan_swap_map_slots */
        p->highest_bit = 0;             /* cuts scans short */
        while (p->flags >= SWP_SCANNING) {
                spin_unlock(&p->lock);
@@ -2884,14 +2862,12 @@ static struct swap_info_struct *alloc_swap_info(void)
        }
        if (type >= nr_swapfiles) {
                p->type = type;
-               WRITE_ONCE(swap_info[type], p);
                /*
-                * Write swap_info[type] before nr_swapfiles, in case a
-                * racing procfs swap_start() or swap_next() is reading them.
-                * (We never shrink nr_swapfiles, we never free this entry.)
+                * Publish the swap_info_struct after initializing it.
+                * Note that kvzalloc() above zeroes all its fields.
                 */
-               smp_wmb();
-               WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1);
+               smp_store_release(&swap_info[type], p); /* rcu_assign_pointer() */
+               nr_swapfiles++;
        } else {
                defer = p;
                p = swap_info[type];
@@ -2991,7 +2967,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
                return 0;
        }
 
-       /* swap partition endianess hack... */
+       /* swap partition endianness hack... */
        if (swab32(swap_header->info.version) == 1) {
                swab32s(&swap_header->info.version);
                swab32s(&swap_header->info.last_page);