Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c

index 9a69392f1fb375c4c16b07332248edf7da57c9b6..d81c148682e715a9f0ed4937185dd3b5538a9603 100644 (file)
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
  
         s->s_magic = BFS_MAGIC;
  
-       if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+       if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
+           le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
                 printf("Superblock is corrupted\n");
                 goto out1;
         }
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
                                         sizeof(struct bfs_inode)
                                         + BFS_ROOT_INO - 1;
         imap_len = (info->si_lasti / 8) + 1;
-       info->si_imap = kzalloc(imap_len, GFP_KERNEL);
-       if (!info->si_imap)
+       info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
+       if (!info->si_imap) {
+               printf("Cannot allocate %u bytes\n", imap_len);
                 goto out1;
+       }
         for (i = 0; i < BFS_ROOT_INO; i++)
                 set_bit(i, info->si_imap);
  
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c

index 1d098c3c00e023540d6f0665720390647945af58..4ebbd57cbf8460da741860a4e657ab5fa60f6d7b 100644 (file)
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -99,25 +99,34 @@ out:
         return ret;
  }
  
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
  int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                            unsigned int nr, struct buffer_head *bhs[])
  {
         int status = 0;
         unsigned int i;
         struct buffer_head *bh;
+       int new_bh = 0;
  
         trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
  
         if (!nr)
                 goto bail;
  
+       /* Don't put buffer head and re-assign it to NULL if it is allocated
+        * outside since the caller can't be aware of this alternation!
+        */
+       new_bh = (bhs[0] == NULL);
+
         for (i = 0 ; i < nr ; i++) {
                 if (bhs[i] == NULL) {
                         bhs[i] = sb_getblk(osb->sb, block++);
                         if (bhs[i] == NULL) {
                                 status = -ENOMEM;
                                 mlog_errno(status);
-                               goto bail;
+                               break;
                         }
                 }
                 bh = bhs[i];
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                 submit_bh(REQ_OP_READ, 0, bh);
         }
  
+read_failure:
         for (i = nr; i > 0; i--) {
                 bh = bhs[i - 1];
  
+               if (unlikely(status)) {
+                       if (new_bh && bh) {
+                               /* If middle bh fails, let previous bh
+                                * finish its read and then put it to
+                                * aovoid bh leak
+                                */
+                               if (!buffer_jbd(bh))
+                                       wait_on_buffer(bh);
+                               put_bh(bh);
+                               bhs[i - 1] = NULL;
+                       } else if (bh && buffer_uptodate(bh)) {
+                               clear_buffer_uptodate(bh);
+                       }
+                       continue;
+               }
+
                 /* No need to wait on the buffer if it's managed by JBD. */
                 if (!buffer_jbd(bh))
                         wait_on_buffer(bh);
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
                          * so we can safely record this and loop back
                          * to cleanup the other buffers. */
                         status = -EIO;
-                       put_bh(bh);
-                       bhs[i - 1] = NULL;
+                       goto read_failure;
                 }
         }
  
@@ -179,6 +204,9 @@ bail:
         return status;
  }
  
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
  int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                       struct buffer_head *bhs[], int flags,
                       int (*validate)(struct super_block *sb,
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
         int i, ignore_cache = 0;
         struct buffer_head *bh;
         struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+       int new_bh = 0;
  
         trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
  
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                 goto bail;
         }
  
+       /* Don't put buffer head and re-assign it to NULL if it is allocated
+        * outside since the caller can't be aware of this alternation!
+        */
+       new_bh = (bhs[0] == NULL);
+
         ocfs2_metadata_cache_io_lock(ci);
         for (i = 0 ; i < nr ; i++) {
                 if (bhs[i] == NULL) {
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                 ocfs2_metadata_cache_io_unlock(ci);
                                 status = -ENOMEM;
                                 mlog_errno(status);
-                               goto bail;
+                               /* Don't forget to put previous bh! */
+                               break;
                         }
                 }
                 bh = bhs[i];
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                 }
         }
  
-       status = 0;
-
+read_failure:
         for (i = (nr - 1); i >= 0; i--) {
                 bh = bhs[i];
  
                 if (!(flags & OCFS2_BH_READAHEAD)) {
-                       if (status) {
-                               /* Clear the rest of the buffers on error */
-                               put_bh(bh);
-                               bhs[i] = NULL;
+                       if (unlikely(status)) {
+                               /* Clear the buffers on error including those
+                                * ever succeeded in reading
+                                */
+                               if (new_bh && bh) {
+                                       /* If middle bh fails, let previous bh
+                                        * finish its read and then put it to
+                                        * aovoid bh leak
+                                        */
+                                       if (!buffer_jbd(bh))
+                                               wait_on_buffer(bh);
+                                       put_bh(bh);
+                                       bhs[i] = NULL;
+                               } else if (bh && buffer_uptodate(bh)) {
+                                       clear_buffer_uptodate(bh);
+                               }
                                 continue;
                         }
                         /* We know this can't have changed as we hold the
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                  * uptodate. */
                                 status = -EIO;
                                 clear_buffer_needs_validate(bh);
-                               put_bh(bh);
-                               bhs[i] = NULL;
-                               continue;
+                               goto read_failure;
                         }
  
                         if (buffer_needs_validate(bh)) {
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
                                 BUG_ON(buffer_jbd(bh));
                                 clear_buffer_needs_validate(bh);
                                 status = validate(sb, bh);
-                               if (status) {
-                                       put_bh(bh);
-                                       bhs[i] = NULL;
-                                       continue;
-                               }
+                               if (status)
+                                       goto read_failure;
                         }
                 }
  
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c

index b048d4fa3959081bd1a857f0283d398b84515752..c121abbdfc7dbcfb28675aa7e62a4cb9a70633a1 100644 (file)
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
                                 /* On error, skip the f_pos to the
                                    next block. */
                                 ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
-                               brelse(bh);
-                               continue;
+                               break;
                         }
                         if (le64_to_cpu(de->inode)) {
                                 unsigned char d_type = DT_UNKNOWN;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c

index 933aac5da193415643b34a33e14db4fdb6fc29b5..7c835824247eb7a64446467b03080f07f67aa90e 100644 (file)
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
  
  /* LVB only has room for 64 bits of time here so we pack it for
   * now. */
-static u64 ocfs2_pack_timespec(struct timespec *spec)
+static u64 ocfs2_pack_timespec(struct timespec64 *spec)
  {
         u64 res;
-       u64 sec = spec->tv_sec;
+       u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
         u32 nsec = spec->tv_nsec;
  
         res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
         struct ocfs2_inode_info *oi = OCFS2_I(inode);
         struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
         struct ocfs2_meta_lvb *lvb;
-       struct timespec ts;
  
         lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
  
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
         lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
         lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
         lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
-       ts = timespec64_to_timespec(inode->i_atime);
         lvb->lvb_iatime_packed  =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
-       ts = timespec64_to_timespec(inode->i_ctime);
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
         lvb->lvb_ictime_packed =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
-       ts = timespec64_to_timespec(inode->i_mtime);
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
         lvb->lvb_imtime_packed =
-               cpu_to_be64(ocfs2_pack_timespec(&ts));
+               cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
         lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
         lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
         lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2180,7 +2176,7 @@ out:
         mlog_meta_lvb(0, lockres);
  }
  
-static void ocfs2_unpack_timespec(struct timespec *spec,
+static void ocfs2_unpack_timespec(struct timespec64 *spec,
                                   u64 packed_time)
  {
         spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
  
  static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
  {
-       struct timespec ts;
         struct ocfs2_inode_info *oi = OCFS2_I(inode);
         struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
         struct ocfs2_meta_lvb *lvb;
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
         i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
         inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
         set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_atime,
                               be64_to_cpu(lvb->lvb_iatime_packed));
-       inode->i_atime = timespec_to_timespec64(ts);
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_mtime,
                               be64_to_cpu(lvb->lvb_imtime_packed));
-       inode->i_mtime = timespec_to_timespec64(ts);
-       ocfs2_unpack_timespec(&ts,
+       ocfs2_unpack_timespec(&inode->i_ctime,
                               be64_to_cpu(lvb->lvb_ictime_packed));
-       inode->i_ctime = timespec_to_timespec64(ts);
         spin_unlock(&oi->ip_lock);
  }
  
@@ -3603,7 +3595,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
          * we can recover correctly from node failure. Otherwise, we may get
          * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
          */
-       if (!ocfs2_is_o2cb_active() &&
+       if (ocfs2_userspace_stack(osb) &&
             lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
                 lvb = 1;
  
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index fe570824b9913b94d945ba6b1395547b879c517d..d640c5f8a85da8fc4ba030b295db0233b377ea7f 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
  
         written = __generic_file_write_iter(iocb, from);
         /* buffered aio wouldn't have proper lock coverage today */
-       BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+       BUG_ON(written == -EIOCBQUEUED && !direct_io);
  
         /*
          * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
         trace_generic_file_read_iter_ret(ret);
  
         /* buffered aio wouldn't have proper lock coverage today */
-       BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+       BUG_ON(ret == -EIOCBQUEUED && !direct_io);
  
         /* see ocfs2_file_write_iter */
         if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c

index bd3475694e83a06501a055e73fd1403f81123eef..b63c97f4318e063889fe1ca203d19092c1abbedf 100644 (file)
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
         int rm_quota_used = 0, i;
         struct ocfs2_quota_recovery *qrec;
  
+       /* Whether the quota supported. */
+       int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+                       OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+               || OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+                       OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
+
         status = ocfs2_wait_on_mount(osb);
         if (status < 0) {
                 goto bail;
         }
  
-       rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
-       if (!rm_quota) {
-               status = -ENOMEM;
-               goto bail;
+       if (quota_enabled) {
+               rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
+               if (!rm_quota) {
+                       status = -ENOMEM;
+                       goto bail;
+               }
         }
  restart:
         status = ocfs2_super_lock(osb, 1);
@@ -1422,9 +1430,14 @@ restart:
                  * then quota usage would be out of sync until some node takes
                  * the slot. So we remember which nodes need quota recovery
                  * and when everything else is done, we recover quotas. */
-               for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
-               if (i == rm_quota_used)
-                       rm_quota[rm_quota_used++] = slot_num;
+               if (quota_enabled) {
+                       for (i = 0; i < rm_quota_used
+                                       && rm_quota[i] != slot_num; i++)
+                               ;
+
+                       if (i == rm_quota_used)
+                               rm_quota[rm_quota_used++] = slot_num;
+               }
  
                 status = ocfs2_recover_node(osb, node_num, slot_num);
  skip_recovery:
@@ -1452,16 +1465,19 @@ skip_recovery:
         /* Now it is right time to recover quotas... We have to do this under
          * superblock lock so that no one can start using the slot (and crash)
          * before we recover it */
-       for (i = 0; i < rm_quota_used; i++) {
-               qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
-               if (IS_ERR(qrec)) {
-                       status = PTR_ERR(qrec);
-                       mlog_errno(status);
-                       continue;
+       if (quota_enabled) {
+               for (i = 0; i < rm_quota_used; i++) {
+                       qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+                       if (IS_ERR(qrec)) {
+                               status = PTR_ERR(qrec);
+                               mlog_errno(status);
+                               continue;
+                       }
+                       ocfs2_queue_recovery_completion(osb->journal,
+                                       rm_quota[i],
+                                       NULL, NULL, qrec,
+                                       ORPHAN_NEED_TRUNCATE);
                 }
-               ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
-                                               NULL, NULL, qrec,
-                                               ORPHAN_NEED_TRUNCATE);
         }
  
         ocfs2_super_unlock(osb, 1);
@@ -1483,7 +1499,8 @@ bail:
  
         mutex_unlock(&osb->recovery_lock);
  
-       kfree(rm_quota);
+       if (quota_enabled)
+               kfree(rm_quota);
  
         /* no one is callint kthread_stop() for us so the kthread() api
          * requires that we call do_exit().  And it isn't exported, but
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c

index 7eb3b0a6347ef74990589ac47f5b4823441c8135..3f1685d7d43bf37a26161bfaeaaeefef3831820b 100644 (file)
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -25,6 +25,7 @@
  #include "ocfs2_ioctl.h"
  
  #include "alloc.h"
+#include "localalloc.h"
  #include "aops.h"
  #include "dlmglue.h"
  #include "extent_map.h"
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
         struct ocfs2_refcount_tree *ref_tree = NULL;
         u32 new_phys_cpos, new_len;
         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+       int need_free = 0;
  
         if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
                 BUG_ON(!ocfs2_is_refcount_inode(inode));
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                 if (!partial) {
                         context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
                         ret = -ENOSPC;
+                       need_free = 1;
                         goto out_commit;
                 }
         }
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
                 mlog_errno(ret);
  
  out_commit:
+       if (need_free && context->data_ac) {
+               struct ocfs2_alloc_context *data_ac = context->data_ac;
+
+               if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+                       ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+                                       new_phys_cpos, new_len);
+               else
+                       ocfs2_free_clusters(handle,
+                                       data_ac->ac_inode,
+                                       data_ac->ac_bh,
+                                       ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
+                                       new_len);
+       }
+
         ocfs2_commit_trans(osb, handle);
  
  out_unlock_mutex:
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c

index d6c350ba25b96ec9886cdc11b46a94fb17769261..c4b029c43464e0d14424a8a9af216d9168ca4bc9 100644 (file)
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
   */
  static struct ocfs2_stack_plugin *active_stack;
  
-inline int ocfs2_is_o2cb_active(void)
-{
-       return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
-}
-EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
-
  static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
  {
         struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h

index e3036e1790e86da7b4e13dcb0b8c88e3f19b6d50..f2dce10fae543c254dcb4e6628d357b60a3ac16c 100644 (file)
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
  int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
  void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
  
-/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
-int ocfs2_is_o2cb_active(void);
-
  extern struct kset *ocfs2_kset;
  
  #endif  /* STACKGLUE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index 24bcc5eec6b409ec379602156a94d74373ec2633..76f8db0b0e715c016cc00cb95aa9a269f12c075d 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
  }
  extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
                         struct vm_area_struct *vma, unsigned long addr,
-                       int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
-       alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+                       int node);
  #else
  #define alloc_pages(gfp_mask, order) \
                 alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-       alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
         alloc_pages(gfp_mask, order)
  #endif
  #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
  #define alloc_page_vma(gfp_mask, vma, addr)                    \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
  #define alloc_page_vma_node(gfp_mask, vma, addr, node)         \
-       alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+       alloc_pages_vma(gfp_mask, 0, vma, addr, node)
  
  extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
  extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h

index 5228c62af41659bb7d5ae0e7db00969b9f16ef73..bac395f1d00a0f9691b12ec6841f2401a10ca4fc 100644 (file)
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
  struct mempolicy *get_task_policy(struct task_struct *p);
  struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
                 unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+                                               unsigned long addr);
  bool vma_policy_mof(struct vm_area_struct *vma);
  
  extern void numa_default_policy(void);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h

index f35c7bf7614302ee51f0896258a3dfc08d5224f8..0096a05395e380a35fe25a6329e05953eb66ab66 100644 (file)
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
  
  #ifdef CONFIG_TREE_SRCU
  #define _SRCU_NOTIFIER_HEAD(name, mod)                         \
-       static DEFINE_PER_CPU(struct srcu_data,                 \
-                       name##_head_srcu_data);                 \
+       static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
         mod struct srcu_notifier_head name =                    \
                         SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
  
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c

index c6a3b6851372c480005d4f053757ba02ad101d8f..35cf0ad29718ffdb0a35dd9c8b2313645c91c6fd 100644 (file)
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -25,8 +25,6 @@
  #include <linux/elf.h>
  #include <linux/elfcore.h>
  #include <linux/kernel.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
  #include <linux/syscalls.h>
  #include <linux/vmalloc.h>
  #include "kexec_internal.h"
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 3ae223f7b5dfabaf929c66941fc0b6693e9b063a..5fc724e4e454c3304ecaebe7c868eb622f784eb8 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,7 +66,6 @@
  #include <linux/kexec.h>
  #include <linux/bpf.h>
  #include <linux/mount.h>
-#include <linux/pipe_fs_i.h>
  
  #include <linux/uaccess.h>
  #include <asm/processor.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 4e4ef8fa479d53b7ee7c4c8fcb86985acb790c8a..55478ab3c83be372f9fa4d654f16e32ffdeb1e29 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ release:
   *         available
   * never: never stall for any thp allocation
   */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
  {
         const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+       gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+       struct mempolicy *pol;
+       /*
+        * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+        * specified, to express a general desire to stay on the current
+        * node for optimistic allocation attempts. If the defrag mode
+        * and/or madvise hint requires the direct reclaim then we prefer
+        * to fallback to other node rather than node reclaim because that
+        * can lead to excessive reclaim even though there is free memory
+        * on other nodes. We expect that NUMA preferences are specified
+        * by memory policies.
+        */
+       pol = get_vma_policy(vma, addr);
+       if (pol->mode != MPOL_BIND)
+               this_node = __GFP_THISNODE;
+       mpol_cond_put(pol);
+#endif
  
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
                 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+               return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
                 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            __GFP_KSWAPD_RECLAIM);
+                                                            __GFP_KSWAPD_RECLAIM | this_node);
         if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
                 return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-                                                            0);
-       return GFP_TRANSHUGE_LIGHT;
+                                                            this_node);
+       return GFP_TRANSHUGE_LIGHT | this_node;
  }
  
  /* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                         pte_free(vma->vm_mm, pgtable);
                 return ret;
         }
-       gfp = alloc_hugepage_direct_gfpmask(vma);
-       page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+       gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
         if (unlikely(!page)) {
                 count_vm_event(THP_FAULT_FALLBACK);
                 return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
  alloc:
         if (transparent_hugepage_enabled(vma) &&
             !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-               new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+               new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+                               haddr, numa_node_id());
         } else
                 new_page = NULL;
  
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 54920cbc46bfdcb87b0a4ae3e6b4538596b6bd58..6e1469b80cb7d57d3cdf01ef9c917a21ece0b7fa 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
         struct mem_cgroup *memcg;
         int ret = 0;
  
-       if (memcg_kmem_bypass())
+       if (mem_cgroup_disabled() || memcg_kmem_bypass())
                 return 0;
  
         memcg = get_mem_cgroup_from_current();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 61972da38d93cb54d6f1088df186ed20bb0f98bb..2b2b3ccbbfb5768a3d6b530799ebf5c4c3129688 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
         for (i = 0; i < sections_to_remove; i++) {
                 unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
  
+               cond_resched();
                 ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
                                 altmap);
                 map_offset = 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index cfd26d7e61a17f9c5fd260b85778058aa04b83e2..5837a067124d895f38f6039d9e3739f0a0874fc0 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
         } else if (PageTransHuge(page)) {
                 struct page *thp;
  
-               thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-                                        HPAGE_PMD_ORDER);
+               thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+                               address, numa_node_id());
                 if (!thp)
                         return NULL;
                 prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
   * freeing by another task.  It is the caller's responsibility to free the
   * extra reference for shared policies.
   */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
                                                 unsigned long addr)
  {
         struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *     @vma:  Pointer to VMA or NULL if not available.
   *     @addr: Virtual Address of the allocation. Must be inside the VMA.
   *     @node: Which node to prefer for allocation (modulo policy).
- *     @hugepage: for hugepages try only the preferred node if possible
   *
   *     This function allocates a page from the kernel page pool and applies
   *     a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   */
  struct page *
  alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node, bool hugepage)
+               unsigned long addr, int node)
  {
         struct mempolicy *pol;
         struct page *page;
@@ -2040,32 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
                 goto out;
         }
  
-       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-               int hpage_node = node;
-
-               /*
-                * For hugepage allocation and non-interleave policy which
-                * allows the current node (or other explicitly preferred
-                * node) we only try to allocate from the current/preferred
-                * node and don't fall back to other nodes, as the cost of
-                * remote accesses would likely offset THP benefits.
-                *
-                * If the policy is interleave, or does not allow the current
-                * node in its nodemask, we allocate the standard way.
-                */
-               if (pol->mode == MPOL_PREFERRED &&
-                                               !(pol->flags & MPOL_F_LOCAL))
-                       hpage_node = pol->v.preferred_node;
-
-               nmask = policy_nodemask(gfp, pol);
-               if (!nmask || node_isset(hpage_node, *nmask)) {
-                       mpol_cond_put(pol);
-                       page = __alloc_pages_node(hpage_node,
-                                               gfp | __GFP_THISNODE, order);
-                       goto out;
-               }
-       }
-
         nmask = policy_nodemask(gfp, pol);
         preferred_nid = policy_node(gfp, pol, node);
         page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c

index 56bf122e0bb4ddf7b57548e7e4b4a33bbdf9a9ab..ea26d7a0342d77ac67f47e813a73f125c873a1e5 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
  
         shmem_pseudo_vma_init(&pvma, info, hindex);
         page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
         shmem_pseudo_vma_destroy(&pvma);
         if (page)
                 prep_transhuge_page(page);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 3 Nov 2018 17:21:43 +0000 (10:21 -0700)
fs/bfs/inode.c		patch \| blob \| blame \| history
fs/ocfs2/buffer_head_io.c		patch \| blob \| blame \| history
fs/ocfs2/dir.c		patch \| blob \| blame \| history
fs/ocfs2/dlmglue.c		patch \| blob \| blame \| history
fs/ocfs2/file.c		patch \| blob \| blame \| history
fs/ocfs2/journal.c		patch \| blob \| blame \| history
fs/ocfs2/move_extents.c		patch \| blob \| blame \| history
fs/ocfs2/stackglue.c		patch \| blob \| blame \| history
fs/ocfs2/stackglue.h		patch \| blob \| blame \| history
include/linux/gfp.h		patch \| blob \| blame \| history
include/linux/mempolicy.h		patch \| blob \| blame \| history
include/linux/notifier.h		patch \| blob \| blame \| history
kernel/kexec_file.c		patch \| blob \| blame \| history
kernel/sysctl.c		patch \| blob \| blame \| history
mm/huge_memory.c		patch \| blob \| blame \| history
mm/memcontrol.c		patch \| blob \| blame \| history
mm/memory_hotplug.c		patch \| blob \| blame \| history
mm/mempolicy.c		patch \| blob \| blame \| history
mm/shmem.c		patch \| blob \| blame \| history